mirror of
https://gitee.com/openharmony/third_party_rust_syn
synced 2024-11-23 07:50:04 +00:00
Merge pull request #1223 from CAD97/flat-tokenbuffer
Use a flat layout for TokenBuffer
This commit is contained in:
commit
91f01419b7
197
src/buffer.rs
197
src/buffer.rs
@ -16,21 +16,17 @@ use crate::Lifetime;
|
||||
use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
|
||||
use std::hint;
|
||||
use std::marker::PhantomData;
|
||||
use std::mem;
|
||||
use std::ptr;
|
||||
use std::slice;
|
||||
|
||||
/// Internal type which is used instead of `TokenTree` to represent a token tree
|
||||
/// within a `TokenBuffer`.
|
||||
enum Entry {
|
||||
// Mimicking types from proc-macro.
|
||||
Group(Group, TokenBuffer),
|
||||
// Group entries contain the offset to the matching End entry.
|
||||
Group(Group, usize),
|
||||
Ident(Ident),
|
||||
Punct(Punct),
|
||||
Literal(Literal),
|
||||
// End entries contain a raw pointer to the entry from the containing
|
||||
// token tree, or null if this is the outermost level.
|
||||
End(*const Entry),
|
||||
End,
|
||||
}
|
||||
|
||||
/// A buffer that can be efficiently traversed multiple times, unlike
|
||||
@ -39,102 +35,34 @@ enum Entry {
|
||||
///
|
||||
/// *This type is available only if Syn is built with the `"parsing"` feature.*
|
||||
pub struct TokenBuffer {
|
||||
// NOTE: Do not implement clone on this - there are raw pointers inside
|
||||
// these entries which will be messed up. Moving the `TokenBuffer` itself is
|
||||
// safe as the data pointed to won't be moved.
|
||||
ptr: *const Entry,
|
||||
len: usize,
|
||||
// NOTE: Do not implement clone on this - while the current design could be
|
||||
// cloned, other designs which could be desirable may not be cloneable.
|
||||
entries: Box<[Entry]>,
|
||||
}
|
||||
|
||||
// Keep the explicit impl for backwards compatibility.
|
||||
impl Drop for TokenBuffer {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
let slice = slice::from_raw_parts_mut(self.ptr as *mut Entry, self.len);
|
||||
let _ = Box::from_raw(slice);
|
||||
}
|
||||
}
|
||||
fn drop(&mut self) {}
|
||||
}
|
||||
|
||||
impl TokenBuffer {
|
||||
// NOTE: Do not mutate the Vec returned from this function once it returns;
|
||||
// the address of its backing memory must remain stable.
|
||||
fn inner_new(stream: TokenStream, up: *const Entry) -> TokenBuffer {
|
||||
let iterator = stream.into_iter();
|
||||
let mut entries = Vec::with_capacity(iterator.size_hint().0 + 1);
|
||||
let mut next_index_after_last_group = 0;
|
||||
for tt in iterator {
|
||||
fn new_inner(entries: &mut Vec<Entry>, stream: TokenStream) {
|
||||
for tt in stream {
|
||||
match tt {
|
||||
TokenTree::Ident(ident) => {
|
||||
entries.push(Entry::Ident(ident));
|
||||
}
|
||||
TokenTree::Punct(punct) => {
|
||||
entries.push(Entry::Punct(punct));
|
||||
}
|
||||
TokenTree::Literal(literal) => {
|
||||
entries.push(Entry::Literal(literal));
|
||||
}
|
||||
TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)),
|
||||
TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)),
|
||||
TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)),
|
||||
TokenTree::Group(group) => {
|
||||
// We cannot fill in a real `End` pointer until `entries` is
|
||||
// finished growing and getting potentially reallocated.
|
||||
// Instead, we temporarily coopt the spot where the end
|
||||
// pointer would go, and use it to string together an
|
||||
// intrusive linked list of all the Entry::Group entries in
|
||||
// the vector. Later after `entries` is done growing, we'll
|
||||
// traverse the linked list and fill in all the end
|
||||
// pointers with a correct value.
|
||||
let group_up =
|
||||
ptr::null::<u8>().wrapping_add(next_index_after_last_group) as *const Entry;
|
||||
|
||||
let inner = Self::inner_new(group.stream(), group_up);
|
||||
entries.push(Entry::Group(group, inner));
|
||||
next_index_after_last_group = entries.len();
|
||||
let group_start_index = entries.len();
|
||||
entries.push(Entry::End); // we replace this below
|
||||
Self::new_inner(entries, group.stream());
|
||||
let group_end_index = entries.len();
|
||||
entries.push(Entry::End);
|
||||
let group_end_offset = group_end_index - group_start_index;
|
||||
entries[group_start_index] = Entry::Group(group, group_end_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add an `End` entry to the end with a reference to the enclosing token
|
||||
// stream which was passed in.
|
||||
entries.push(Entry::End(up));
|
||||
|
||||
// NOTE: This is done to ensure that we don't accidentally modify the
|
||||
// length of the backing buffer. The backing buffer must remain at a
|
||||
// constant address after this point, as we are going to store a raw
|
||||
// pointer into it.
|
||||
let entries = entries.into_boxed_slice();
|
||||
let len = entries.len();
|
||||
|
||||
// Convert boxed slice into a pointer to the first element early, to
|
||||
// avoid invalidating pointers into this slice when we move the Box.
|
||||
// See https://github.com/rust-lang/unsafe-code-guidelines/issues/326
|
||||
let entries = Box::into_raw(entries) as *mut Entry;
|
||||
|
||||
// Traverse intrusive linked list of Entry::Group entries and fill in
|
||||
// correct End pointers.
|
||||
while let Some(idx) = next_index_after_last_group.checked_sub(1) {
|
||||
// We know that idx refers to one of the Entry::Group entries, and
|
||||
// that the very last entry is an Entry::End, so the next index
|
||||
// after any group entry is a valid index.
|
||||
let group_up = unsafe { entries.add(next_index_after_last_group) };
|
||||
|
||||
// Linked list only takes us to entries which are of type Group.
|
||||
let token_buffer = match unsafe { &*entries.add(idx) } {
|
||||
Entry::Group(_group, token_buffer) => token_buffer,
|
||||
_ => unsafe { hint::unreachable_unchecked() },
|
||||
};
|
||||
|
||||
// Last entry in any TokenBuffer is of type End.
|
||||
let buffer_ptr = token_buffer.ptr as *mut Entry;
|
||||
let last_entry = unsafe { &mut *buffer_ptr.add(token_buffer.len - 1) };
|
||||
let end_ptr_slot = match last_entry {
|
||||
Entry::End(end_ptr_slot) => end_ptr_slot,
|
||||
_ => unsafe { hint::unreachable_unchecked() },
|
||||
};
|
||||
|
||||
// Step to next element in linked list.
|
||||
next_index_after_last_group = mem::replace(end_ptr_slot, group_up) as usize;
|
||||
}
|
||||
|
||||
TokenBuffer { ptr: entries, len }
|
||||
}
|
||||
|
||||
/// Creates a `TokenBuffer` containing all the tokens from the input
|
||||
@ -153,13 +81,19 @@ impl TokenBuffer {
|
||||
/// Creates a `TokenBuffer` containing all the tokens from the input
|
||||
/// `proc_macro2::TokenStream`.
|
||||
pub fn new2(stream: TokenStream) -> Self {
|
||||
Self::inner_new(stream, ptr::null())
|
||||
let mut entries = Vec::new();
|
||||
Self::new_inner(&mut entries, stream);
|
||||
entries.push(Entry::End);
|
||||
Self {
|
||||
entries: entries.into_boxed_slice(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a cursor referencing the first token in the buffer and able to
|
||||
/// traverse until the end of the buffer.
|
||||
pub fn begin(&self) -> Cursor {
|
||||
unsafe { Cursor::create(self.ptr, self.ptr.add(self.len - 1)) }
|
||||
let ptr = self.entries.as_ptr();
|
||||
unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) }
|
||||
}
|
||||
}
|
||||
|
||||
@ -179,7 +113,7 @@ impl TokenBuffer {
|
||||
pub struct Cursor<'a> {
|
||||
// The current entry which the `Cursor` is pointing at.
|
||||
ptr: *const Entry,
|
||||
// This is the only `Entry::End(..)` object which this cursor is allowed to
|
||||
// This is the only `Entry::End` object which this cursor is allowed to
|
||||
// point at. All other `End` objects are skipped over in `Cursor::create`.
|
||||
scope: *const Entry,
|
||||
// Cursor is covariant in 'a. This field ensures that our pointers are still
|
||||
@ -199,7 +133,7 @@ impl<'a> Cursor<'a> {
|
||||
// object in global storage.
|
||||
struct UnsafeSyncEntry(Entry);
|
||||
unsafe impl Sync for UnsafeSyncEntry {}
|
||||
static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0 as *const Entry));
|
||||
static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End);
|
||||
|
||||
Cursor {
|
||||
ptr: &EMPTY_ENTRY.0,
|
||||
@ -212,15 +146,15 @@ impl<'a> Cursor<'a> {
|
||||
/// `None`-delimited scopes when the cursor reaches the end of them,
|
||||
/// allowing for them to be treated transparently.
|
||||
unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
|
||||
// NOTE: If we're looking at a `End(..)`, we want to advance the cursor
|
||||
// NOTE: If we're looking at a `End`, we want to advance the cursor
|
||||
// past it, unless `ptr == scope`, which means that we're at the edge of
|
||||
// our cursor's scope. We should only have `ptr != scope` at the exit
|
||||
// from None-delimited groups entered with `ignore_none`.
|
||||
while let Entry::End(exit) = *ptr {
|
||||
while let Entry::End = *ptr {
|
||||
if ptr == scope {
|
||||
break;
|
||||
}
|
||||
ptr = exit;
|
||||
ptr = ptr.add(1);
|
||||
}
|
||||
|
||||
Cursor {
|
||||
@ -238,24 +172,46 @@ impl<'a> Cursor<'a> {
|
||||
/// Bump the cursor to point at the next token after the current one. This
|
||||
/// is undefined behavior if the cursor is currently looking at an
|
||||
/// `Entry::End`.
|
||||
unsafe fn bump(self) -> Cursor<'a> {
|
||||
///
|
||||
/// If the cursor is looking at an `Entry::Group`, the bumped cursor will
|
||||
/// point at the first token in the group (with the same scope end).
|
||||
unsafe fn bump_ignore_group(self) -> Cursor<'a> {
|
||||
Cursor::create(self.ptr.offset(1), self.scope)
|
||||
}
|
||||
|
||||
/// Bump the cursor to point at the next token after the current one. This
|
||||
/// is undefined behavior if the cursor is currently looking at an
|
||||
/// `Entry::End`.
|
||||
///
|
||||
/// If the cursor is looking at an `Entry::Group`, the bumped cursor will
|
||||
/// point at the first token after the group.
|
||||
unsafe fn bump_over_group(self) -> Cursor<'a> {
|
||||
match self.entry() {
|
||||
&Entry::Group(_, end_offset) => Cursor::create(self.ptr.add(end_offset), self.scope),
|
||||
_ => self.bump_ignore_group(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Bump the cursor to enter the current group. This is undefined behavior
|
||||
/// if the cursor is not currently looking at an `Entry::Group`.
|
||||
unsafe fn bump_into_group(self) -> Cursor<'a> {
|
||||
match self.entry() {
|
||||
&Entry::Group(_, end_offset) => {
|
||||
Cursor::create(self.ptr.add(1), self.ptr.add(end_offset))
|
||||
}
|
||||
_ => hint::unreachable_unchecked(),
|
||||
}
|
||||
}
|
||||
|
||||
/// While the cursor is looking at a `None`-delimited group, move it to look
|
||||
/// at the first token inside instead. If the group is empty, this will move
|
||||
/// the cursor past the `None`-delimited group.
|
||||
///
|
||||
/// WARNING: This mutates its argument.
|
||||
fn ignore_none(&mut self) {
|
||||
while let Entry::Group(group, buf) = self.entry() {
|
||||
while let Entry::Group(group, _) = self.entry() {
|
||||
if group.delimiter() == Delimiter::None {
|
||||
// NOTE: We call `Cursor::create` here to make sure that
|
||||
// situations where we should immediately exit the span after
|
||||
// entering it are handled correctly.
|
||||
unsafe {
|
||||
*self = Cursor::create(buf.ptr, self.scope);
|
||||
}
|
||||
unsafe { *self = self.bump_ignore_group() };
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
@ -279,9 +235,10 @@ impl<'a> Cursor<'a> {
|
||||
self.ignore_none();
|
||||
}
|
||||
|
||||
if let Entry::Group(group, buf) = self.entry() {
|
||||
if let Entry::Group(group, _) = self.entry() {
|
||||
if group.delimiter() == delim {
|
||||
return Some((buf.begin(), group.span(), unsafe { self.bump() }));
|
||||
let (into, over) = unsafe { (self.bump_into_group(), self.bump_over_group()) };
|
||||
return Some((into, group.span(), over));
|
||||
}
|
||||
}
|
||||
|
||||
@ -293,7 +250,7 @@ impl<'a> Cursor<'a> {
|
||||
pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> {
|
||||
self.ignore_none();
|
||||
match self.entry() {
|
||||
Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump() })),
|
||||
Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@ -304,7 +261,7 @@ impl<'a> Cursor<'a> {
|
||||
self.ignore_none();
|
||||
match self.entry() {
|
||||
Entry::Punct(punct) if punct.as_char() != '\'' => {
|
||||
Some((punct.clone(), unsafe { self.bump() }))
|
||||
Some((punct.clone(), unsafe { self.bump_ignore_group() }))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
@ -315,7 +272,7 @@ impl<'a> Cursor<'a> {
|
||||
pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> {
|
||||
self.ignore_none();
|
||||
match self.entry() {
|
||||
Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump() })),
|
||||
Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@ -326,7 +283,7 @@ impl<'a> Cursor<'a> {
|
||||
self.ignore_none();
|
||||
match self.entry() {
|
||||
Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
|
||||
let next = unsafe { self.bump() };
|
||||
let next = unsafe { self.bump_ignore_group() };
|
||||
match next.ident() {
|
||||
Some((ident, rest)) => {
|
||||
let lifetime = Lifetime {
|
||||
@ -367,10 +324,10 @@ impl<'a> Cursor<'a> {
|
||||
Entry::Literal(literal) => literal.clone().into(),
|
||||
Entry::Ident(ident) => ident.clone().into(),
|
||||
Entry::Punct(punct) => punct.clone().into(),
|
||||
Entry::End(..) => return None,
|
||||
Entry::End => return None,
|
||||
};
|
||||
|
||||
Some((tree, unsafe { self.bump() }))
|
||||
Some((tree, unsafe { self.bump_over_group() }))
|
||||
}
|
||||
|
||||
/// Returns the `Span` of the current token, or `Span::call_site()` if this
|
||||
@ -381,7 +338,7 @@ impl<'a> Cursor<'a> {
|
||||
Entry::Literal(literal) => literal.span(),
|
||||
Entry::Ident(ident) => ident.span(),
|
||||
Entry::Punct(punct) => punct.span(),
|
||||
Entry::End(..) => Span::call_site(),
|
||||
Entry::End => Span::call_site(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -391,17 +348,17 @@ impl<'a> Cursor<'a> {
|
||||
/// This method treats `'lifetimes` as a single token.
|
||||
pub(crate) fn skip(self) -> Option<Cursor<'a>> {
|
||||
match self.entry() {
|
||||
Entry::End(..) => None,
|
||||
Entry::End => None,
|
||||
|
||||
// Treat lifetimes as a single tt for the purposes of 'skip'.
|
||||
Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
|
||||
let next = unsafe { self.bump() };
|
||||
let next = unsafe { self.bump_ignore_group() };
|
||||
match next.entry() {
|
||||
Entry::Ident(_) => Some(unsafe { next.bump() }),
|
||||
Entry::Ident(_) => Some(unsafe { next.bump_ignore_group() }),
|
||||
_ => Some(next),
|
||||
}
|
||||
}
|
||||
_ => Some(unsafe { self.bump() }),
|
||||
_ => Some(unsafe { self.bump_over_group() }),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user