Auto merge of #219 - emilio:macros, r=fitzgen

Parse macro expressions.

Clang is trolling me really hard so I'm going to see if the extra token I'm always getting is LLVM 3.9 specific.
This commit is contained in:
bors-servo 2016-11-08 19:35:27 -06:00 committed by GitHub
commit 8d83bdfb48
11 changed files with 264 additions and 83 deletions

View File

@ -31,6 +31,7 @@ env_logger = "0.3"
rustc-serialize = "0.3.19"
syntex_syntax = "0.44"
regex = "0.1"
cexpr = "0.2"
[dependencies.aster]
features = ["with-syntex"]

14
src/chooser.rs Normal file
View File

@ -0,0 +1,14 @@
//! A public API for more fine-grained customization of bindgen behavior.
pub use ir::int::IntKind;
use std::fmt;
/// A trait to allow configuring different kinds of types in different
/// situations.
pub trait TypeChooser: fmt::Debug {
/// The integer kind an integer macro should have, given a name and the
/// value of that macro, or `None` if you want the default to be chosen.
fn int_macro(&self, _name: &str, _value: i64) -> Option<IntKind> {
None
}
}

View File

@ -4,8 +4,9 @@
#![allow(non_upper_case_globals, dead_code)]
use cexpr;
use clangll::*;
use std::{mem, ptr};
use std::{mem, ptr, slice};
use std::ffi::{CStr, CString};
use std::fmt;
use std::hash::Hash;
@ -1050,18 +1051,18 @@ impl TranslationUnit {
let range = cursor.extent();
let mut tokens = vec![];
unsafe {
let mut token_ptr = ::std::ptr::null_mut();
let mut token_ptr = ptr::null_mut();
let mut num_tokens: c_uint = 0;
clang_tokenize(self.x, range, &mut token_ptr, &mut num_tokens);
if token_ptr.is_null() {
return None;
}
let token_array = ::std::slice::from_raw_parts(token_ptr,
num_tokens as usize);
let token_array = slice::from_raw_parts(token_ptr,
num_tokens as usize);
for &token in token_array.iter() {
let kind = clang_getTokenKind(token);
let spelling: String = clang_getTokenSpelling(self.x, token)
.into();
let spelling = clang_getTokenSpelling(self.x, token).into();
tokens.push(Token {
kind: kind,
@ -1072,6 +1073,62 @@ impl TranslationUnit {
}
Some(tokens)
}
/// Convert a set of tokens from clang into `cexpr` tokens, for further
/// processing.
pub fn cexpr_tokens(&self,
cursor: &Cursor)
-> Option<Vec<cexpr::token::Token>> {
use cexpr::token;
let mut tokens = match self.tokens(cursor) {
Some(tokens) => tokens,
None => return None,
};
// FIXME(emilio): LLVM 3.9 at least always include an extra token for no
// good reason (except if we're at EOF). So we do this kind of hack,
// where we skip known-to-cause problems trailing punctuation and
// trailing keywords.
//
// This is sort of unfortunate, though :(.
//
// I'll try to get it fixed in LLVM if I have the time to submit a
// patch.
let mut trim_last_token = false;
if let Some(token) = tokens.last() {
// The starting of the next macro.
trim_last_token |= token.spelling == "#" &&
token.kind == CXToken_Punctuation;
// A following keyword of any kind, like a following declaration.
trim_last_token |= token.kind == CXToken_Keyword;
}
if trim_last_token {
tokens.pop().unwrap();
}
Some(tokens.into_iter()
.filter_map(|token| {
let kind = match token.kind {
CXToken_Punctuation => token::Kind::Punctuation,
CXToken_Literal => token::Kind::Literal,
CXToken_Identifier => token::Kind::Identifier,
CXToken_Keyword => token::Kind::Keyword,
// NB: cexpr is not too happy about comments inside
// expressions, so we strip them down here.
CXToken_Comment => return None,
_ => panic!("Found unexpected token kind: {}", token.kind),
};
Some(token::Token {
kind: kind,
raw: token.spelling.into_bytes().into_boxed_slice(),
})
})
.collect::<Vec<_>>())
}
}
impl Drop for TranslationUnit {

View File

@ -1587,8 +1587,19 @@ impl ToRustTy for Type {
IntKind::ULong => raw!(c_ulong),
IntKind::LongLong => raw!(c_longlong),
IntKind::ULongLong => raw!(c_ulonglong),
IntKind::I8 => aster::ty::TyBuilder::new().i8(),
IntKind::U8 => aster::ty::TyBuilder::new().u8(),
IntKind::I16 => aster::ty::TyBuilder::new().i16(),
IntKind::U16 => aster::ty::TyBuilder::new().u16(),
IntKind::I32 => aster::ty::TyBuilder::new().i32(),
IntKind::U32 => aster::ty::TyBuilder::new().u32(),
IntKind::I64 => aster::ty::TyBuilder::new().i64(),
IntKind::U64 => aster::ty::TyBuilder::new().u64(),
IntKind::Custom { name, .. } => {
let ident = ctx.rust_ident_raw(name);
quote_ty!(ctx.ext_cx(), $ident)
}
// FIXME: This doesn't generate the proper alignment, but we
// can't do better right now. We should be able to use
// i128/u128 when they're available.

View File

@ -1,10 +1,11 @@
//! Common context that is passed around during parsing and codegen.
use BindgenOptions;
use cexpr;
use clang::{self, Cursor};
use parse::ClangItemParser;
use std::borrow::{Borrow, Cow};
use std::collections::{HashMap, HashSet, hash_map};
use std::borrow::Cow;
use std::collections::{HashMap, hash_map};
use std::collections::btree_map::{self, BTreeMap};
use std::fmt;
use super::int::IntKind;
@ -77,8 +78,9 @@ pub struct BindgenContext<'ctx> {
pub currently_parsed_types: Vec<(Cursor, ItemId)>,
/// A HashSet with all the already parsed macro names. This is done to avoid
/// hard errors while parsing duplicated macros.
parsed_macros: HashSet<String>,
/// hard errors while parsing duplicated macros, as well to allow macro
/// expression parsing.
parsed_macros: HashMap<Vec<u8>, cexpr::expr::EvalResult>,
/// The active replacements collected from replaces="xxx" annotations.
replacements: HashMap<String, ItemId>,
@ -243,7 +245,7 @@ impl<'ctx> BindgenContext<'ctx> {
/// Returns a mangled name as a rust identifier.
pub fn rust_ident_raw(&self, name: &str) -> Ident {
self.ext_cx().ident_of(name.borrow())
self.ext_cx().ident_of(name)
}
/// Iterate over all items that have been defined.
@ -715,14 +717,21 @@ impl<'ctx> BindgenContext<'ctx> {
}
/// Have we parsed the macro named `macro_name` already?
pub fn parsed_macro(&self, macro_name: &str) -> bool {
self.parsed_macros.contains(macro_name)
pub fn parsed_macro(&self, macro_name: &[u8]) -> bool {
self.parsed_macros.contains_key(macro_name)
}
/// Get the currently parsed macros.
pub fn parsed_macros(&self) -> &HashMap<Vec<u8>, cexpr::expr::EvalResult> {
debug_assert!(!self.in_codegen_phase());
&self.parsed_macros
}
/// Mark the macro named `macro_name` as parsed.
pub fn note_parsed_macro(&mut self, macro_name: String) {
debug_assert!(!self.parsed_macros.contains(&macro_name));
self.parsed_macros.insert(macro_name);
pub fn note_parsed_macro(&mut self,
id: Vec<u8>,
value: cexpr::expr::EvalResult) {
self.parsed_macros.insert(id, value);
}
/// Are we in the codegen phase?

View File

@ -36,18 +36,44 @@ pub enum IntKind {
/// An `unsigned long long`.
ULongLong,
/// A 8-bit signed integer.
I8,
/// A 8-bit unsigned integer.
U8,
/// A 16-bit signed integer.
I16,
/// Either a `char16_t` or a `wchar_t`.
U16,
/// A `char32_t`.
/// A 32-bit signed integer.
I32,
/// A 32-bit unsigned integer.
U32,
/// A 64-bit signed integer.
I64,
/// A 64-bit unsigned integer.
U64,
/// An `int128_t`
I128,
/// A `uint128_t`.
U128, /* Though now we're at it we could add equivalents for the rust
* types... */
U128,
/// A custom integer type, used to allow custom macro types depending on
/// range.
Custom {
/// The name of the type, which would be used without modification.
name: &'static str,
/// Whether the type is signed or not.
is_signed: bool,
},
}
impl IntKind {
@ -55,10 +81,13 @@ impl IntKind {
pub fn is_signed(&self) -> bool {
use self::IntKind::*;
match *self {
Bool | UChar | UShort | UInt | ULong | ULongLong | U16 | U32 |
U128 => false,
Bool | UChar | UShort | UInt | ULong | ULongLong | U8 | U16 |
U32 | U64 | U128 => false,
Char | Short | Int | Long | LongLong | I128 => true,
Char | Short | Int | Long | LongLong | I8 | I16 | I32 | I64 |
I128 => true,
Custom { is_signed, .. } => is_signed,
}
}
}

View File

@ -1,7 +1,9 @@
//! Intermediate representation of variables.
use cexpr;
use clang;
use parse::{ClangItemParser, ClangSubItemParser, ParseError, ParseResult};
use std::num::Wrapping;
use super::context::BindgenContext;
use super::function::cursor_mangling;
use super::int::IntKind;
@ -73,43 +75,65 @@ impl ClangSubItemParser for Var {
ctx: &mut BindgenContext)
-> Result<ParseResult<Self>, ParseError> {
use clangll::*;
use cexpr::expr::EvalResult;
match cursor.kind() {
CXCursor_MacroDefinition => {
let value = parse_int_literal_tokens(&cursor,
ctx.translation_unit());
let value = parse_macro(ctx, &cursor, ctx.translation_unit());
let value = match value {
let (id, value) = match value {
Some(v) => v,
None => return Err(ParseError::Continue),
};
let name = cursor.spelling();
if name.is_empty() {
warn!("Empty macro name?");
return Err(ParseError::Continue);
}
assert!(!id.is_empty(), "Empty macro name?");
if ctx.parsed_macro(&name) {
if ctx.parsed_macro(&id) {
let name = String::from_utf8(id).unwrap();
warn!("Duplicated macro definition: {}", name);
return Err(ParseError::Continue);
}
ctx.note_parsed_macro(name.clone());
let ty = if value < 0 {
Item::builtin_type(TypeKind::Int(IntKind::Int), true, ctx)
} else if value.abs() > u32::max_value() as i64 {
Item::builtin_type(TypeKind::Int(IntKind::ULongLong),
true,
ctx)
} else {
Item::builtin_type(TypeKind::Int(IntKind::UInt), true, ctx)
// NB: It's important to "note" the macro even if the result is
// not an integer, otherwise we might loose other kind of
// derived macros.
ctx.note_parsed_macro(id.clone(), value.clone());
// NOTE: Unwrapping, here and above, is safe, because the
// identifier of a token comes straight from clang, and we
// enforce utf8 there, so we should have already panicked at
// this point.
let name = String::from_utf8(id).unwrap();
let (int_kind, val) = match value {
// TODO(emilio): Handle the non-invalid ones!
EvalResult::Float(..) |
EvalResult::Char(..) |
EvalResult::Str(..) |
EvalResult::Invalid => return Err(ParseError::Continue),
EvalResult::Int(Wrapping(value)) => {
let kind = ctx.options().type_chooser.as_ref()
.and_then(|c| c.int_macro(&name, value))
.unwrap_or_else(|| {
if value < 0 {
if value < i32::min_value() as i64 {
IntKind::LongLong
} else {
IntKind::Int
}
} else if value > u32::max_value() as i64 {
IntKind::ULongLong
} else {
IntKind::UInt
}
});
(kind, value)
}
};
Ok(ParseResult::New(Var::new(name,
None,
ty,
Some(value),
true),
let ty = Item::builtin_type(TypeKind::Int(int_kind), true, ctx);
Ok(ParseResult::New(Var::new(name, None, ty, Some(val), true),
Some(cursor)))
}
CXCursor_VarDecl => {
@ -153,49 +177,43 @@ impl ClangSubItemParser for Var {
}
}
/// Try and parse the immediately found tokens from an unit (if any) to integers
fn parse_int_literal_tokens(cursor: &clang::Cursor,
unit: &clang::TranslationUnit)
-> Option<i64> {
use clangll::{CXToken_Literal, CXToken_Punctuation};
/// Try and parse a macro using all the macros parsed until now.
fn parse_macro(ctx: &BindgenContext,
cursor: &clang::Cursor,
unit: &clang::TranslationUnit)
-> Option<(Vec<u8>, cexpr::expr::EvalResult)> {
use cexpr::{expr, nom};
let tokens = match unit.tokens(cursor) {
let cexpr_tokens = match unit.cexpr_tokens(cursor) {
None => return None,
Some(tokens) => tokens,
};
let mut literal = None;
let mut negate = false;
for token in tokens.into_iter() {
match token.kind {
CXToken_Punctuation if token.spelling == "-" => {
negate = !negate;
}
CXToken_Literal => {
literal = Some(token.spelling);
break;
}
_ => {
// Reset values if we found anything else
negate = false;
literal = None;
}
}
}
let parser = expr::IdentifierParser::new(ctx.parsed_macros());
let result = parser.macro_definition(&cexpr_tokens);
literal.and_then(|lit| {
if lit.starts_with("0x") {
// TODO: try to preserve hex literals?
i64::from_str_radix(&lit[2..], 16).ok()
} else if lit == "0" {
Some(0)
} else if lit.starts_with("0") {
i64::from_str_radix(&lit[1..], 8).ok()
} else {
lit.parse().ok()
}
})
.map(|lit| if negate { -lit } else { lit })
match result {
nom::IResult::Done(_, (id, val)) => Some((id.into(), val)),
_ => None,
}
}
fn parse_int_literal_tokens(cursor: &clang::Cursor,
unit: &clang::TranslationUnit)
-> Option<i64> {
use cexpr::{expr, nom};
use cexpr::expr::EvalResult;
let cexpr_tokens = match unit.cexpr_tokens(cursor) {
None => return None,
Some(tokens) => tokens,
};
// TODO(emilio): We can try to parse other kinds of literals.
match expr::expr(&cexpr_tokens) {
nom::IResult::Done(_, EvalResult::Int(Wrapping(val))) => Some(val),
_ => None,
}
}
fn get_integer_literal_from_cursor(cursor: &clang::Cursor,

View File

@ -24,6 +24,7 @@
#[macro_use]
extern crate cfg_if;
extern crate cexpr;
extern crate syntex_syntax as syntax;
extern crate aster;
extern crate quasi;
@ -62,6 +63,8 @@ mod parse;
mod regex_set;
mod uses;
pub mod chooser;
#[cfg(rustfmt)]
mod codegen;
@ -237,6 +240,13 @@ impl Builder {
self
}
/// Allows configuring types in different situations, see the `TypeChooser`
/// documentation.
pub fn type_chooser(mut self, cb: Box<chooser::TypeChooser>) -> Self {
self.options.type_chooser = Some(cb);
self
}
/// Generate the Rust bindings using the options built up thus far.
pub fn generate<'ctx>(self) -> Result<Bindings<'ctx>, ()> {
Bindings::generate(self.options, None)
@ -331,6 +341,10 @@ pub struct BindgenOptions {
/// Generate a dummy C/C++ file that includes the header and has dummy uses
/// of all types defined therein. See the `uses` module for more.
pub dummy_uses: Option<String>,
/// A user-provided type chooser to allow customizing different kinds of
/// situations.
pub type_chooser: Option<Box<chooser::TypeChooser>>,
}
impl Default for BindgenOptions {
@ -359,6 +373,7 @@ impl Default for BindgenOptions {
clang_args: vec![],
input_header: None,
dummy_uses: None,
type_chooser: None,
}
}
}

View File

@ -24,8 +24,9 @@ impl <T> ::std::clone::Clone for __BindgenUnionField<T> {
fn clone(&self) -> Self { Self::new() }
}
impl <T> ::std::marker::Copy for __BindgenUnionField<T> { }
pub const JSVAL_ALIGNMENT: ::std::os::raw::c_uint = 8;
pub const JSVAL_TAG_SHIFT: ::std::os::raw::c_uint = 47;
pub const JSVAL_PAYLOAD_MASK: ::std::os::raw::c_ulonglong = 140737488355327;
pub const JSVAL_TAG_MASK: ::std::os::raw::c_longlong = -140737488355328;
#[repr(u8)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum JSValueType {

View File

@ -0,0 +1,14 @@
/* automatically generated by rust-bindgen */
#![allow(non_snake_case)]
pub const FOO: ::std::os::raw::c_uint = 1;
pub const BAR: ::std::os::raw::c_uint = 4;
pub const BAZ: ::std::os::raw::c_uint = 5;
pub const BARR: ::std::os::raw::c_uint = 1;
pub const BAZZ: ::std::os::raw::c_uint = 7;
pub const I_RAN_OUT_OF_DUMB_NAMES: ::std::os::raw::c_uint = 7;
pub const HAZ_A_COMMENT: ::std::os::raw::c_uint = 1;
pub const HAZ_A_COMMENT_INSIDE: ::std::os::raw::c_uint = 2;

View File

@ -0,0 +1,12 @@
#define FOO 1
#define BAR 4
#define BAZ (FOO + BAR)
#define BARR (1 << 0)
#define BAZZ ((1 << 1) + BAZ)
#define I_RAN_OUT_OF_DUMB_NAMES (BARR | BAZZ)
/* I haz a comment */
#define HAZ_A_COMMENT BARR
#define HAZ_A_COMMENT_INSIDE (/* comment for real */ BARR + FOO)