Merge pull request #1223 from CAD97/flat-tokenbuffer

Use a flat layout for TokenBuffer
This commit is contained in:
David Tolnay 2022-09-28 16:55:27 -07:00 committed by GitHub
commit 91f01419b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -16,21 +16,17 @@ use crate::Lifetime;
use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
use std::hint;
use std::marker::PhantomData;
use std::mem;
use std::ptr;
use std::slice;
/// Internal type which is used instead of `TokenTree` to represent a token tree
/// within a `TokenBuffer`.
enum Entry {
// Mimicking types from proc-macro.
Group(Group, TokenBuffer),
// Group entries contain the offset to the matching End entry.
Group(Group, usize),
Ident(Ident),
Punct(Punct),
Literal(Literal),
// End entries contain a raw pointer to the entry from the containing
// token tree, or null if this is the outermost level.
End(*const Entry),
End,
}
/// A buffer that can be efficiently traversed multiple times, unlike
@ -39,102 +35,34 @@ enum Entry {
///
/// *This type is available only if Syn is built with the `"parsing"` feature.*
pub struct TokenBuffer {
// NOTE: Do not implement clone on this - there are raw pointers inside
// these entries which will be messed up. Moving the `TokenBuffer` itself is
// safe as the data pointed to won't be moved.
ptr: *const Entry,
len: usize,
// NOTE: Do not implement clone on this - while the current design could be
// cloned, other designs which could be desirable may not be cloneable.
entries: Box<[Entry]>,
}
// Keep the explicit impl for backwards compatibility.
impl Drop for TokenBuffer {
fn drop(&mut self) {
unsafe {
let slice = slice::from_raw_parts_mut(self.ptr as *mut Entry, self.len);
let _ = Box::from_raw(slice);
}
}
fn drop(&mut self) {}
}
impl TokenBuffer {
// NOTE: Do not mutate the Vec returned from this function once it returns;
// the address of its backing memory must remain stable.
fn inner_new(stream: TokenStream, up: *const Entry) -> TokenBuffer {
let iterator = stream.into_iter();
let mut entries = Vec::with_capacity(iterator.size_hint().0 + 1);
let mut next_index_after_last_group = 0;
for tt in iterator {
fn new_inner(entries: &mut Vec<Entry>, stream: TokenStream) {
for tt in stream {
match tt {
TokenTree::Ident(ident) => {
entries.push(Entry::Ident(ident));
}
TokenTree::Punct(punct) => {
entries.push(Entry::Punct(punct));
}
TokenTree::Literal(literal) => {
entries.push(Entry::Literal(literal));
}
TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)),
TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)),
TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)),
TokenTree::Group(group) => {
// We cannot fill in a real `End` pointer until `entries` is
// finished growing and getting potentially reallocated.
// Instead, we temporarily coopt the spot where the end
// pointer would go, and use it to string together an
// intrusive linked list of all the Entry::Group entries in
// the vector. Later after `entries` is done growing, we'll
// traverse the linked list and fill in all the end
// pointers with a correct value.
let group_up =
ptr::null::<u8>().wrapping_add(next_index_after_last_group) as *const Entry;
let inner = Self::inner_new(group.stream(), group_up);
entries.push(Entry::Group(group, inner));
next_index_after_last_group = entries.len();
let group_start_index = entries.len();
entries.push(Entry::End); // we replace this below
Self::new_inner(entries, group.stream());
let group_end_index = entries.len();
entries.push(Entry::End);
let group_end_offset = group_end_index - group_start_index;
entries[group_start_index] = Entry::Group(group, group_end_offset);
}
}
}
// Add an `End` entry to the end with a reference to the enclosing token
// stream which was passed in.
entries.push(Entry::End(up));
// NOTE: This is done to ensure that we don't accidentally modify the
// length of the backing buffer. The backing buffer must remain at a
// constant address after this point, as we are going to store a raw
// pointer into it.
let entries = entries.into_boxed_slice();
let len = entries.len();
// Convert boxed slice into a pointer to the first element early, to
// avoid invalidating pointers into this slice when we move the Box.
// See https://github.com/rust-lang/unsafe-code-guidelines/issues/326
let entries = Box::into_raw(entries) as *mut Entry;
// Traverse intrusive linked list of Entry::Group entries and fill in
// correct End pointers.
while let Some(idx) = next_index_after_last_group.checked_sub(1) {
// We know that idx refers to one of the Entry::Group entries, and
// that the very last entry is an Entry::End, so the next index
// after any group entry is a valid index.
let group_up = unsafe { entries.add(next_index_after_last_group) };
// Linked list only takes us to entries which are of type Group.
let token_buffer = match unsafe { &*entries.add(idx) } {
Entry::Group(_group, token_buffer) => token_buffer,
_ => unsafe { hint::unreachable_unchecked() },
};
// Last entry in any TokenBuffer is of type End.
let buffer_ptr = token_buffer.ptr as *mut Entry;
let last_entry = unsafe { &mut *buffer_ptr.add(token_buffer.len - 1) };
let end_ptr_slot = match last_entry {
Entry::End(end_ptr_slot) => end_ptr_slot,
_ => unsafe { hint::unreachable_unchecked() },
};
// Step to next element in linked list.
next_index_after_last_group = mem::replace(end_ptr_slot, group_up) as usize;
}
TokenBuffer { ptr: entries, len }
}
/// Creates a `TokenBuffer` containing all the tokens from the input
@ -153,13 +81,19 @@ impl TokenBuffer {
/// Creates a `TokenBuffer` containing all the tokens from the input
/// `proc_macro2::TokenStream`.
pub fn new2(stream: TokenStream) -> Self {
Self::inner_new(stream, ptr::null())
let mut entries = Vec::new();
Self::new_inner(&mut entries, stream);
entries.push(Entry::End);
Self {
entries: entries.into_boxed_slice(),
}
}
/// Creates a cursor referencing the first token in the buffer and able to
/// traverse until the end of the buffer.
pub fn begin(&self) -> Cursor {
unsafe { Cursor::create(self.ptr, self.ptr.add(self.len - 1)) }
let ptr = self.entries.as_ptr();
unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) }
}
}
@ -179,7 +113,7 @@ impl TokenBuffer {
pub struct Cursor<'a> {
// The current entry which the `Cursor` is pointing at.
ptr: *const Entry,
// This is the only `Entry::End(..)` object which this cursor is allowed to
// This is the only `Entry::End` object which this cursor is allowed to
// point at. All other `End` objects are skipped over in `Cursor::create`.
scope: *const Entry,
// Cursor is covariant in 'a. This field ensures that our pointers are still
@ -199,7 +133,7 @@ impl<'a> Cursor<'a> {
// object in global storage.
struct UnsafeSyncEntry(Entry);
unsafe impl Sync for UnsafeSyncEntry {}
static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0 as *const Entry));
static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End);
Cursor {
ptr: &EMPTY_ENTRY.0,
@ -212,15 +146,15 @@ impl<'a> Cursor<'a> {
/// `None`-delimited scopes when the cursor reaches the end of them,
/// allowing for them to be treated transparently.
unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
// NOTE: If we're looking at a `End(..)`, we want to advance the cursor
// NOTE: If we're looking at a `End`, we want to advance the cursor
// past it, unless `ptr == scope`, which means that we're at the edge of
// our cursor's scope. We should only have `ptr != scope` at the exit
// from None-delimited groups entered with `ignore_none`.
while let Entry::End(exit) = *ptr {
while let Entry::End = *ptr {
if ptr == scope {
break;
}
ptr = exit;
ptr = ptr.add(1);
}
Cursor {
@ -238,24 +172,46 @@ impl<'a> Cursor<'a> {
/// Bump the cursor to point at the next token after the current one. This
/// is undefined behavior if the cursor is currently looking at an
/// `Entry::End`.
unsafe fn bump(self) -> Cursor<'a> {
///
/// If the cursor is looking at an `Entry::Group`, the bumped cursor will
/// point at the first token in the group (with the same scope end).
unsafe fn bump_ignore_group(self) -> Cursor<'a> {
Cursor::create(self.ptr.offset(1), self.scope)
}
/// Bump the cursor to point at the next token after the current one. This
/// is undefined behavior if the cursor is currently looking at an
/// `Entry::End`.
///
/// If the cursor is looking at an `Entry::Group`, the bumped cursor will
/// point at the first token after the group.
unsafe fn bump_over_group(self) -> Cursor<'a> {
match self.entry() {
&Entry::Group(_, end_offset) => Cursor::create(self.ptr.add(end_offset), self.scope),
_ => self.bump_ignore_group(),
}
}
/// Bump the cursor to enter the current group. This is undefined behavior
/// if the cursor is not currently looking at an `Entry::Group`.
unsafe fn bump_into_group(self) -> Cursor<'a> {
match self.entry() {
&Entry::Group(_, end_offset) => {
Cursor::create(self.ptr.add(1), self.ptr.add(end_offset))
}
_ => hint::unreachable_unchecked(),
}
}
/// While the cursor is looking at a `None`-delimited group, move it to look
/// at the first token inside instead. If the group is empty, this will move
/// the cursor past the `None`-delimited group.
///
/// WARNING: This mutates its argument.
fn ignore_none(&mut self) {
while let Entry::Group(group, buf) = self.entry() {
while let Entry::Group(group, _) = self.entry() {
if group.delimiter() == Delimiter::None {
// NOTE: We call `Cursor::create` here to make sure that
// situations where we should immediately exit the span after
// entering it are handled correctly.
unsafe {
*self = Cursor::create(buf.ptr, self.scope);
}
unsafe { *self = self.bump_ignore_group() };
} else {
break;
}
@ -279,9 +235,10 @@ impl<'a> Cursor<'a> {
self.ignore_none();
}
if let Entry::Group(group, buf) = self.entry() {
if let Entry::Group(group, _) = self.entry() {
if group.delimiter() == delim {
return Some((buf.begin(), group.span(), unsafe { self.bump() }));
let (into, over) = unsafe { (self.bump_into_group(), self.bump_over_group()) };
return Some((into, group.span(), over));
}
}
@ -293,7 +250,7 @@ impl<'a> Cursor<'a> {
pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> {
self.ignore_none();
match self.entry() {
Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump() })),
Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })),
_ => None,
}
}
@ -304,7 +261,7 @@ impl<'a> Cursor<'a> {
self.ignore_none();
match self.entry() {
Entry::Punct(punct) if punct.as_char() != '\'' => {
Some((punct.clone(), unsafe { self.bump() }))
Some((punct.clone(), unsafe { self.bump_ignore_group() }))
}
_ => None,
}
@ -315,7 +272,7 @@ impl<'a> Cursor<'a> {
pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> {
self.ignore_none();
match self.entry() {
Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump() })),
Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })),
_ => None,
}
}
@ -326,7 +283,7 @@ impl<'a> Cursor<'a> {
self.ignore_none();
match self.entry() {
Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
let next = unsafe { self.bump() };
let next = unsafe { self.bump_ignore_group() };
match next.ident() {
Some((ident, rest)) => {
let lifetime = Lifetime {
@ -367,10 +324,10 @@ impl<'a> Cursor<'a> {
Entry::Literal(literal) => literal.clone().into(),
Entry::Ident(ident) => ident.clone().into(),
Entry::Punct(punct) => punct.clone().into(),
Entry::End(..) => return None,
Entry::End => return None,
};
Some((tree, unsafe { self.bump() }))
Some((tree, unsafe { self.bump_over_group() }))
}
/// Returns the `Span` of the current token, or `Span::call_site()` if this
@ -381,7 +338,7 @@ impl<'a> Cursor<'a> {
Entry::Literal(literal) => literal.span(),
Entry::Ident(ident) => ident.span(),
Entry::Punct(punct) => punct.span(),
Entry::End(..) => Span::call_site(),
Entry::End => Span::call_site(),
}
}
@ -391,17 +348,17 @@ impl<'a> Cursor<'a> {
/// This method treats `'lifetimes` as a single token.
pub(crate) fn skip(self) -> Option<Cursor<'a>> {
match self.entry() {
Entry::End(..) => None,
Entry::End => None,
// Treat lifetimes as a single tt for the purposes of 'skip'.
Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
let next = unsafe { self.bump() };
let next = unsafe { self.bump_ignore_group() };
match next.entry() {
Entry::Ident(_) => Some(unsafe { next.bump() }),
Entry::Ident(_) => Some(unsafe { next.bump_ignore_group() }),
_ => Some(next),
}
}
_ => Some(unsafe { self.bump() }),
_ => Some(unsafe { self.bump_over_group() }),
}
}
}