22: Upgrade to nom 5 r=jethrogb a=jonhoo

This patchset builds on #21, and bumps the dependency on `nom` to version 5. The change is a fairly major one, since `nom` moved away from macro-based parsers to combinators based on function callbacks.

Fixes #19 

---

**Old obsolete text:**

The current state of affairs is that this _almost_ compiles. The biggest remaining issue is how to pass the methods of `expr::PRef` in the new function-passing style. The errors should pretty quickly make it obvious what's going on. I'm honestly not sure what the right way to go about this is. The methods that call them may need to be re-written in the sort of "linear" style that `nom 5` favors. [`nom-methods`](https://docs.rs/nom-methods/) may provide some inspiration here. The new `&mut self` style also makes it awkward to have a combinator that may call multiple different methods on `self`.  It may be that we want to return to the old "consume self return Self" strategy to get around this.

I don't know that I'll have the time to finish this PR up any time soon, so @jethrogb if you want to pick this up and run with it, feel free!

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/jethrogb/rust-cexpr/22)
<!-- Reviewable:end -->


Co-authored-by: Jon Gjengset <jon@thesquareplanet.com>
This commit is contained in:
bors[bot] 2020-03-09 16:52:34 +00:00 committed by GitHub
commit c257faa225
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 545 additions and 461 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "cexpr"
version = "0.3.6"
version = "0.4.0"
edition = "2018"
authors = ["Jethro Beekman <jethro@jbeekman.nl>"]
license = "Apache-2.0/MIT"
@ -13,7 +13,7 @@ keywords = ["C","expression","parser"]
travis-ci = { repository = "jethrogb/rust-cexpr" }
[dependencies]
nom = {version = "^4", features = ["verbose-errors"] }
nom = "5"
[dev-dependencies]
clang-sys = ">= 0.13.0, < 0.29.0"

View File

@ -27,8 +27,13 @@ use std::ops::{
};
use crate::literal::{self, CChar};
use crate::nom_crate::*;
use crate::token::{Kind as TokenKind, Token};
use crate::ToCexprResult;
use nom::branch::alt;
use nom::combinator::{complete, map, map_opt};
use nom::multi::{fold_many0, many0, separated_list};
use nom::sequence::{delimited, pair, preceded};
use nom::*;
/// Expression parser/evaluator that supports identifiers.
#[derive(Debug)]
@ -38,10 +43,12 @@ pub struct IdentifierParser<'ident> {
#[derive(Copy, Clone)]
struct PRef<'a>(&'a IdentifierParser<'a>);
pub type CResult<'a, R> = IResult<&'a [Token], R, crate::Error>;
/// A shorthand for the type of cexpr expression evaluation results.
pub type CResult<'a, R> = IResult<&'a [Token], R, crate::Error<&'a [Token]>>;
/// The result of parsing a literal or evaluating an expression.
#[derive(Debug, Clone, PartialEq)]
#[allow(missing_docs)]
pub enum EvalResult {
Int(Wrapping<i64>),
Float(f64),
@ -53,6 +60,7 @@ pub enum EvalResult {
macro_rules! result_opt (
(fn $n:ident: $e:ident -> $t:ty) => (
#[allow(dead_code)]
#[allow(clippy::wrong_self_convention)]
fn $n(self) -> Option<$t> {
if let EvalResult::$e(v) = self {
Some(v)
@ -69,6 +77,7 @@ impl EvalResult {
result_opt!(fn as_char: Char -> CChar);
result_opt!(fn as_str: Str -> Vec<u8>);
#[allow(clippy::wrong_self_convention)]
fn as_numeric(self) -> Option<EvalResult> {
match self {
EvalResult::Int(_) | EvalResult::Float(_) => Some(self),
@ -88,89 +97,76 @@ impl From<Vec<u8>> for EvalResult {
// ===========================================
macro_rules! exact_token (
($i:expr, $k:ident, $c:expr) => ({
if $i.is_empty() {
let res: CResult<'_, &[u8]> = Err(crate::nom_crate::Err::Incomplete(Needed::Size($c.len())));
($k:ident, $c:expr) => ({
move |input: &[Token]| {
if input.is_empty() {
let res: CResult<'_, &[u8]> = Err(crate::nom::Err::Incomplete(Needed::Size($c.len())));
res
} else {
if $i[0].kind==TokenKind::$k && &$i[0].raw[..]==$c {
Ok((&$i[1..], &$i[0].raw[..]))
if input[0].kind==TokenKind::$k && &input[0].raw[..]==$c {
Ok((&input[1..], &input[0].raw[..]))
} else {
Err(crate::nom_crate::Err::Error(error_position!($i, ErrorKind::Custom(crate::Error::ExactToken(TokenKind::$k,$c)))))
Err(crate::nom::Err::Error((input, crate::ErrorKind::ExactToken(TokenKind::$k,$c)).into()))
}
}
}
});
);
macro_rules! typed_token (
($i:expr, $k:ident) => ({
if $i.is_empty() {
let res: CResult<'_, &[u8]> = Err(crate::nom_crate::Err::Incomplete(Needed::Size(1)));
($k:ident) => ({
move |input: &[Token]| {
if input.is_empty() {
let res: CResult<'_, &[u8]> = Err(nom::Err::Incomplete(Needed::Size(1)));
res
} else {
if $i[0].kind==TokenKind::$k {
Ok((&$i[1..], &$i[0].raw[..]))
if input[0].kind==TokenKind::$k {
Ok((&input[1..], &input[0].raw[..]))
} else {
Err(Err::Error(error_position!($i, ErrorKind::Custom(crate::Error::TypedToken(TokenKind::$k)))))
Err(crate::nom::Err::Error((input, crate::ErrorKind::TypedToken(TokenKind::$k)).into()))
}
}
}
});
);
#[allow(unused_macros)]
macro_rules! any_token (
($i:expr,) => ({
if $i.is_empty() {
let res: CResult<'_, &Token> = Err(::nom_crate::Err::Incomplete(Needed::Size(1)));
res
} else {
Ok((&$i[1..], &$i[0]))
}
});
);
#[allow(dead_code)]
fn any_token(input: &[Token]) -> CResult<'_, &Token> {
if input.is_empty() {
Err(crate::nom::Err::Incomplete(Needed::Size(1)))
} else {
Ok((&input[1..], &input[0]))
}
}
macro_rules! p (
($i:expr, $c:expr) => (exact_token!($i,Punctuation,$c.as_bytes()))
);
fn p(c: &'static str) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> {
exact_token!(Punctuation, c.as_bytes())
}
macro_rules! one_of_punctuation (
($i:expr, $c:expr) => ({
if $i.is_empty() {
let min = $c.iter().map(|opt|opt.len()).min().expect("at least one option");
let res: CResult<'_, &[u8]> = Err(crate::nom_crate::Err::Incomplete(Needed::Size(min)));
res
} else {
if $i[0].kind==TokenKind::Punctuation && $c.iter().any(|opt|opt.as_bytes()==&$i[0].raw[..]) {
Ok((&$i[1..], &$i[0].raw[..]))
} else {
const VALID_VALUES: &'static [&'static str] = &$c;
Err(Err::Error(error_position!($i, ErrorKind::Custom(crate::Error::ExactTokens(TokenKind::Punctuation,VALID_VALUES)))))
}
}
});
);
/// equivalent to nom's complete! macro, but adds the custom error type
#[macro_export]
macro_rules! comp (
($i:expr, $submac:ident!( $($args:tt)* )) => (
{
use crate::nom_crate::lib::std::result::Result::*;
use crate::nom_crate::{Err,ErrorKind};
let i_ = $i.clone();
match $submac!(i_, $($args)*) {
Err(Err::Incomplete(_)) => {
Err(Err::Error(error_position!($i, ErrorKind::Complete::<crate::Error>)))
},
rest => rest
}
}
);
($i:expr, $f:expr) => (
comp!($i, call!($f));
);
);
fn one_of_punctuation(c: &'static [&'static str]) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> {
move |input| {
if input.is_empty() {
let min = c
.iter()
.map(|opt| opt.len())
.min()
.expect("at least one option");
Err(crate::nom::Err::Incomplete(Needed::Size(min)))
} else if input[0].kind == TokenKind::Punctuation
&& c.iter().any(|opt| opt.as_bytes() == &input[0].raw[..])
{
Ok((&input[1..], &input[0].raw[..]))
} else {
Err(crate::nom::Err::Error(
(
input,
crate::ErrorKind::ExactTokens(TokenKind::Punctuation, c),
)
.into(),
))
}
}
}
// ==================================================
// ============= Numeric expressions ================
@ -297,120 +293,121 @@ fn unary_op(input: (&[u8], EvalResult)) -> Option<EvalResult> {
}
}
macro_rules! numeric (
($i:expr, $submac:ident!( $($args:tt)* )) => (map_opt!($i,$submac!($($args)*),EvalResult::as_numeric));
($i:expr, $f:expr ) => (map_opt!($i,call!($f),EvalResult::as_numeric));
);
fn numeric<I: Clone, E: nom::error::ParseError<I>, F>(
f: F,
) -> impl Fn(I) -> nom::IResult<I, EvalResult, E>
where
F: Fn(I) -> nom::IResult<I, EvalResult, E>,
{
nom::combinator::map_opt(f, EvalResult::as_numeric)
}
impl<'a> PRef<'a> {
method!(unary<PRef<'a>,&[Token],EvalResult,crate::Error>, mut self,
alt!(
delimited!(p!("("),call_m!(self.numeric_expr),p!(")")) |
numeric!(call_m!(self.literal)) |
numeric!(call_m!(self.identifier)) |
map_opt!(pair!(one_of_punctuation!(["+", "-", "~"]),call_m!(self.unary)),unary_op)
)
);
fn unary(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
alt((
delimited(p("("), |i| self.numeric_expr(i), p(")")),
numeric(|i| self.literal(i)),
numeric(|i| self.identifier(i)),
map_opt(
pair(one_of_punctuation(&["+", "-", "~"][..]), |i| self.unary(i)),
unary_op,
),
))(input)
}
method!(mul_div_rem<PRef<'a>,&[Token],EvalResult,crate::Error>, mut self,
do_parse!(
acc: call_m!(self.unary) >>
res: fold_many0!(
pair!(comp!(one_of_punctuation!(["*", "/", "%"])), call_m!(self.unary)),
acc,
|mut acc, (op, val): (&[u8], EvalResult)| {
match op[0] as char {
'*' => acc *= &val,
'/' => acc /= &val,
'%' => acc %= &val,
_ => unreachable!()
};
acc
}
) >> (res)
)
);
fn mul_div_rem(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
let (input, acc) = self.unary(input)?;
fold_many0(
pair(complete(one_of_punctuation(&["*", "/", "%"][..])), |i| {
self.unary(i)
}),
acc,
|mut acc, (op, val): (&[u8], EvalResult)| {
match op[0] as char {
'*' => acc *= &val,
'/' => acc /= &val,
'%' => acc %= &val,
_ => unreachable!(),
};
acc
},
)(input)
}
method!(add_sub<PRef<'a>,&[Token],EvalResult,crate::Error>, mut self,
do_parse!(
acc: call_m!(self.mul_div_rem) >>
res: fold_many0!(
pair!(comp!(one_of_punctuation!(["+", "-"])), call_m!(self.mul_div_rem)),
acc,
|mut acc, (op, val): (&[u8], EvalResult)| {
match op[0] as char {
'+' => acc += &val,
'-' => acc -= &val,
_ => unreachable!()
};
acc
}
) >> (res)
)
);
fn add_sub(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
let (input, acc) = self.mul_div_rem(input)?;
fold_many0(
pair(complete(one_of_punctuation(&["+", "-"][..])), |i| {
self.mul_div_rem(i)
}),
acc,
|mut acc, (op, val): (&[u8], EvalResult)| {
match op[0] as char {
'+' => acc += &val,
'-' => acc -= &val,
_ => unreachable!(),
};
acc
},
)(input)
}
method!(shl_shr<PRef<'a>,&[Token],EvalResult,crate::Error>, mut self,
numeric!(do_parse!(
acc: call_m!(self.add_sub) >>
res: fold_many0!(
pair!(comp!(one_of_punctuation!(["<<", ">>"])), call_m!(self.add_sub)),
acc,
|mut acc, (op, val): (&[u8], EvalResult)| {
match op {
b"<<" => acc <<= &val,
b">>" => acc >>= &val,
_ => unreachable!()
};
acc
}
) >> (res)
))
);
fn shl_shr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
let (input, acc) = self.add_sub(input)?;
numeric(fold_many0(
pair(complete(one_of_punctuation(&["<<", ">>"][..])), |i| {
self.add_sub(i)
}),
acc,
|mut acc, (op, val): (&[u8], EvalResult)| {
match op {
b"<<" => acc <<= &val,
b">>" => acc >>= &val,
_ => unreachable!(),
};
acc
},
))(input)
}
method!(and<PRef<'a>,&[Token],EvalResult,crate::Error>, mut self,
numeric!(do_parse!(
acc: call_m!(self.shl_shr) >>
res: fold_many0!(
preceded!(comp!(p!("&")), call_m!(self.shl_shr)),
acc,
|mut acc, val: EvalResult| {
acc &= &val;
acc
}
) >> (res)
))
);
fn and(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
let (input, acc) = self.shl_shr(input)?;
numeric(fold_many0(
preceded(complete(p("&")), |i| self.shl_shr(i)),
acc,
|mut acc, val: EvalResult| {
acc &= &val;
acc
},
))(input)
}
method!(xor<PRef<'a>,&[Token],EvalResult,crate::Error>, mut self,
numeric!(do_parse!(
acc: call_m!(self.and) >>
res: fold_many0!(
preceded!(comp!(p!("^")), call_m!(self.and)),
acc,
|mut acc, val: EvalResult| {
acc ^= &val;
acc
}
) >> (res)
))
);
fn xor(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
let (input, acc) = self.and(input)?;
numeric(fold_many0(
preceded(complete(p("^")), |i| self.and(i)),
acc,
|mut acc, val: EvalResult| {
acc ^= &val;
acc
},
))(input)
}
method!(or<PRef<'a>,&[Token],EvalResult,crate::Error>, mut self,
numeric!(do_parse!(
acc: call_m!(self.xor) >>
res: fold_many0!(
preceded!(comp!(p!("|")), call_m!(self.xor)),
acc,
|mut acc, val: EvalResult| {
acc |= &val;
acc
}
) >> (res)
))
);
fn or(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
let (input, acc) = self.xor(input)?;
numeric(fold_many0(
preceded(complete(p("|")), |i| self.xor(i)),
acc,
|mut acc, val: EvalResult| {
acc |= &val;
acc
},
))(input)
}
#[inline(always)]
fn numeric_expr(self, input: &[Token]) -> (Self, CResult<'_, EvalResult>) {
fn numeric_expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
self.or(input)
}
}
@ -420,89 +417,87 @@ impl<'a> PRef<'a> {
// =======================================================
impl<'a> PRef<'a> {
fn identifier(self, input: &[Token]) -> (Self, CResult<'_, EvalResult>) {
(
self,
match input.split_first() {
None => Err(Err::Incomplete(Needed::Size(1))),
Some((
&Token {
kind: TokenKind::Identifier,
ref raw,
},
rest,
)) => {
if let Some(r) = self.identifiers.get(&raw[..]) {
Ok((rest, r.clone()))
} else {
Err(Err::Error(error_position!(
input,
ErrorKind::Custom(crate::Error::UnknownIdentifier)
)))
}
}
Some(_) => Err(Err::Error(error_position!(
input,
ErrorKind::Custom(crate::Error::TypedToken(TokenKind::Identifier))
))),
},
)
}
fn literal(self, input: &[Token]) -> (Self, CResult<'_, EvalResult>) {
(
self,
match input.split_first() {
None => Err(Err::Incomplete(Needed::Size(1))),
Some((
&Token {
kind: TokenKind::Literal,
ref raw,
},
rest,
)) => match literal::parse(raw) {
Ok((_, result)) => Ok((rest, result)),
_ => Err(Err::Error(error_position!(
input,
ErrorKind::Custom(crate::Error::InvalidLiteral)
))),
fn identifier(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
match input.split_first() {
None => Err(Err::Incomplete(Needed::Size(1))),
Some((
&Token {
kind: TokenKind::Identifier,
ref raw,
},
Some(_) => Err(Err::Error(error_position!(
input,
ErrorKind::Custom(crate::Error::TypedToken(TokenKind::Literal))
))),
},
)
rest,
)) => {
if let Some(r) = self.identifiers.get(&raw[..]) {
Ok((rest, r.clone()))
} else {
Err(Err::Error(
(input, crate::ErrorKind::UnknownIdentifier).into(),
))
}
}
Some(_) => Err(Err::Error(
(input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into(),
)),
}
}
method!(string<PRef<'a>,&[Token],Vec<u8>,crate::Error>, mut self,
alt!(
map_opt!(call_m!(self.literal),EvalResult::as_str) |
map_opt!(call_m!(self.identifier),EvalResult::as_str)
)
);
fn literal(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
match input.split_first() {
None => Err(Err::Incomplete(Needed::Size(1))),
Some((
&Token {
kind: TokenKind::Literal,
ref raw,
},
rest,
)) => match literal::parse(raw) {
Ok((_, result)) => Ok((rest, result)),
_ => Err(Err::Error((input, crate::ErrorKind::InvalidLiteral).into())),
},
Some(_) => Err(Err::Error(
(input, crate::ErrorKind::TypedToken(TokenKind::Literal)).into(),
)),
}
}
fn string(self, input: &'_ [Token]) -> CResult<'_, Vec<u8>> {
alt((
map_opt(|i| self.literal(i), EvalResult::as_str),
map_opt(|i| self.identifier(i), EvalResult::as_str),
))(input)
.to_cexpr_result()
}
// "string1" "string2" etc...
method!(concat_str<PRef<'a>,&[Token],EvalResult,crate::Error>, mut self,
map!(
pair!(call_m!(self.string),many0!(comp!(call_m!(self.string)))),
|(first,v)| Vec::into_iter(v).fold(first,|mut s,elem|{Vec::extend_from_slice(&mut s,Vec::<u8>::as_slice(&elem));s}).into()
)
);
fn concat_str(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
map(
pair(|i| self.string(i), many0(complete(|i| self.string(i)))),
|(first, v)| {
Vec::into_iter(v)
.fold(first, |mut s, elem| {
Vec::extend_from_slice(&mut s, Vec::<u8>::as_slice(&elem));
s
})
.into()
},
)(input)
.to_cexpr_result()
}
method!(expr<PRef<'a>,&[Token],EvalResult,crate::Error>, mut self,
alt!(
call_m!(self.numeric_expr) |
delimited!(p!("("),call_m!(self.expr),p!(")")) |
call_m!(self.concat_str) |
call_m!(self.literal) |
call_m!(self.identifier)
)
);
fn expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
alt((
|i| self.numeric_expr(i),
delimited(p("("), |i| self.expr(i), p(")")),
|i| self.concat_str(i),
|i| self.literal(i),
|i| self.identifier(i),
))(input)
.to_cexpr_result()
}
method!(macro_definition<PRef<'a>,&[Token],(&[u8],EvalResult),crate::Error>, mut self,
pair!(typed_token!(Identifier),call_m!(self.expr))
);
fn macro_definition(self, input: &'_ [Token]) -> CResult<'_, (&'_ [u8], EvalResult)> {
pair(typed_token!(Identifier), |i| self.expr(i))(input)
}
}
impl<'a> ::std::ops::Deref for PRef<'a> {
@ -521,9 +516,7 @@ impl<'ident> IdentifierParser<'ident> {
/// a known identifier is encountered during parsing, it is substituted
/// for the value specified.
pub fn new(identifiers: &HashMap<Vec<u8>, EvalResult>) -> IdentifierParser<'_> {
IdentifierParser {
identifiers: identifiers,
}
IdentifierParser { identifiers }
}
/// Parse and evalute an expression of a list of tokens.
@ -531,7 +524,7 @@ impl<'ident> IdentifierParser<'ident> {
/// Returns an error if the input is not a valid expression or if the token
/// stream contains comments, keywords or unknown identifiers.
pub fn expr<'a>(&self, input: &'a [Token]) -> CResult<'a, EvalResult> {
self.as_ref().expr(input).1
self.as_ref().expr(input)
}
/// Parse and evaluate a macro definition from of a list of tokens.
@ -555,7 +548,7 @@ impl<'ident> IdentifierParser<'ident> {
/// #define NEGATIVE_THREE(IDENTIFIER) -3
/// ```
pub fn macro_definition<'a>(&self, input: &'a [Token]) -> CResult<'a, (&'a [u8], EvalResult)> {
crate::assert_full_parse(self.as_ref().macro_definition(input).1)
crate::assert_full_parse(self.as_ref().macro_definition(input))
}
}
@ -563,7 +556,7 @@ impl<'ident> IdentifierParser<'ident> {
///
/// Returns an error if the input is not a valid expression or if the token
/// stream contains comments, keywords or identifiers.
pub fn expr<'a>(input: &'a [Token]) -> CResult<'a, EvalResult> {
pub fn expr(input: &[Token]) -> CResult<'_, EvalResult> {
IdentifierParser::new(&HashMap::new()).expr(input)
}
@ -575,11 +568,10 @@ pub fn expr<'a>(input: &'a [Token]) -> CResult<'a, EvalResult> {
/// Returns an error if the replacement is not a valid expression, if called
/// on a function-like macro, or if the token stream contains comments,
/// keywords or identifiers.
pub fn macro_definition<'a>(input: &'a [Token]) -> CResult<'a, (&'a [u8], EvalResult)> {
pub fn macro_definition(input: &[Token]) -> CResult<'_, (&'_ [u8], EvalResult)> {
IdentifierParser::new(&HashMap::new()).macro_definition(input)
}
named_attr!(
/// Parse a functional macro declaration from a list of tokens.
///
/// Returns the identifier for the macro and the argument list (in order). The
@ -619,13 +611,13 @@ named_attr!(
/// let (_, evaluated) = assert_full_parse(IdentifierParser::new(&idents).expr(expr)).unwrap();
/// assert_eq!(evaluated, EvalResult::Str(b"testsuffix".to_vec()));
/// ```
,pub fn_macro_declaration<&[Token],(&[u8],Vec<&[u8]>),crate::Error>,
pair!(
typed_token!(Identifier),
delimited!(
p!("("),
separated_list!(p!(","), typed_token!(Identifier)),
p!(")")
)
)
);
pub fn fn_macro_declaration(input: &[Token]) -> CResult<'_, (&[u8], Vec<&[u8]>)> {
pair(
typed_token!(Identifier),
delimited(
p("("),
separated_list(p(","), typed_token!(Identifier)),
p(")"),
),
)(input)
}

View File

@ -5,25 +5,29 @@
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! A C expression parser and evaluator.
//!
//! This crate provides methods for parsing and evaluating simple C expressions. In general, the
//! crate can handle most arithmetic expressions that would appear in macros or the definition of
//! constants, as well as string and character constants.
//!
//! The main entry point for is [`token::parse`], which parses a byte string and returns its
//! evaluated value.
#![warn(rust_2018_idioms)]
#![warn(missing_docs)]
#![allow(deprecated)]
#[macro_use]
extern crate nom as nom_crate;
pub mod nom {
//! nom's result types, re-exported.
pub use crate::nom_crate::{Err, ErrorKind, IResult, Needed};
pub use nom::{error::ErrorKind, Err, IResult, Needed};
}
pub mod expr;
pub mod literal;
pub mod token;
use crate::nom::*;
#[derive(Debug)]
/// Parsing errors specific to C parsing
pub enum Error {
#[derive(Debug)]
pub enum ErrorKind {
/// Expected the specified token
ExactToken(token::Kind, &'static [u8]),
/// Expected one of the specified tokens
@ -39,35 +43,98 @@ pub enum Error {
InvalidLiteral,
/// A full parse was requested, but data was left over after parsing finished.
Partial,
/// An error occurred in an underlying nom parser.
Parser(nom::ErrorKind),
}
impl From<u32> for Error {
fn from(_: u32) -> Self {
Error::InvalidLiteral
impl From<nom::ErrorKind> for ErrorKind {
fn from(k: nom::ErrorKind) -> Self {
ErrorKind::Parser(k)
}
}
macro_rules! identity (
($i:expr,$e:expr) => ($e);
);
impl From<u32> for ErrorKind {
fn from(_: u32) -> Self {
ErrorKind::InvalidLiteral
}
}
/// Parsing errors specific to C parsing.
///
/// This is a superset of `(I, nom::ErrorKind)` that includes the additional errors specified by
/// [`ErrorKind`].
#[derive(Debug)]
pub struct Error<I> {
/// The remainder of the input stream at the time of the error.
pub input: I,
/// The error that occurred.
pub error: ErrorKind,
}
impl<I> From<(I, nom::ErrorKind)> for Error<I> {
fn from(e: (I, nom::ErrorKind)) -> Self {
Self::from((e.0, ErrorKind::from(e.1)))
}
}
impl<I> From<(I, ErrorKind)> for Error<I> {
fn from(e: (I, ErrorKind)) -> Self {
Self {
input: e.0,
error: e.1,
}
}
}
impl<I> ::nom::error::ParseError<I> for Error<I> {
fn from_error_kind(input: I, kind: nom::ErrorKind) -> Self {
Self {
input,
error: kind.into(),
}
}
fn append(_: I, _: nom::ErrorKind, other: Self) -> Self {
other
}
}
// in lieu of https://github.com/Geal/nom/issues/1010
trait ToCexprResult<I, O> {
fn to_cexpr_result(self) -> nom::IResult<I, O, Error<I>>;
}
impl<I, O, E> ToCexprResult<I, O> for nom::IResult<I, O, E>
where
Error<I>: From<E>,
{
fn to_cexpr_result(self) -> nom::IResult<I, O, Error<I>> {
match self {
Ok(v) => Ok(v),
Err(nom::Err::Incomplete(n)) => Err(nom::Err::Incomplete(n)),
Err(nom::Err::Error(e)) => Err(nom::Err::Error(e.into())),
Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(e.into())),
}
}
}
/// If the input result indicates a succesful parse, but there is data left,
/// return an `Error::Partial` instead.
pub fn assert_full_parse<I, O, E>(result: IResult<&[I], O, E>) -> IResult<&[I], O, crate::Error>
pub fn assert_full_parse<'i, I: 'i, O, E>(
result: nom::IResult<&'i [I], O, E>,
) -> nom::IResult<&'i [I], O, Error<&'i [I]>>
where
Error: From<E>,
Error<&'i [I]>: From<E>,
{
match fix_error!((), crate::Error, identity!(result)) {
match result.to_cexpr_result() {
Ok((rem, output)) => {
if rem.len() == 0 {
if rem.is_empty() {
Ok((rem, output))
} else {
Err(Err::Error(error_position!(
rem,
ErrorKind::Custom(crate::Error::Partial)
)))
Err(nom::Err::Error((rem, ErrorKind::Partial).into()))
}
}
r => r,
Err(nom::Err::Incomplete(n)) => Err(nom::Err::Incomplete(n)),
Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(e)),
Err(nom::Err::Error(e)) => Err(nom::Err::Error(e)),
}
}

View File

@ -39,9 +39,17 @@
use std::char;
use std::str::{self, FromStr};
use crate::nom_crate::*;
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::character::complete::{char, one_of};
use nom::combinator::{complete, map, map_opt, opt, recognize};
use nom::multi::{fold_many0, many0, many1, many_m_n};
use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::*;
use crate::expr::EvalResult;
use crate::ToCexprResult;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
/// Representation of a C character
@ -80,56 +88,61 @@ impl Into<Vec<u8>> for CChar {
}
/// ensures the child parser consumes the whole input
#[macro_export]
macro_rules! full (
($i: expr, $submac:ident!( $($args:tt)* )) => (
{
use crate::nom_crate::lib::std::result::Result::*;
let res = $submac!($i, $($args)*);
match res {
Ok((i, o)) => if i.len() == 0 {
Ok((i, o))
} else {
Err(crate::nom_crate::Err::Error(error_position!(i, crate::nom_crate::ErrorKind::Custom(42))))
},
r => r,
}
}
);
($i:expr, $f:ident) => (
full!($i, call!($f));
);
);
// ====================================================
// ======== macros that shouldn't be necessary ========
// ====================================================
macro_rules! force_type (
($input:expr,IResult<$i:ty,$o:ty,$e:ty>) => (Err::<($i,$o),Err<$i,$e>>(crate::nom_crate::Err::Error(error_position!($input, ErrorKind::Fix))))
);
pub fn full<I: Clone, O, E: From<nom::error::ErrorKind>, F>(
f: F,
) -> impl Fn(I) -> nom::IResult<I, O, (I, E)>
where
I: nom::InputLength,
F: Fn(I) -> nom::IResult<I, O, (I, E)>,
{
move |input| {
let res = f(input);
match res {
Ok((i, o)) => {
if i.input_len() == 0 {
Ok((i, o))
} else {
Err(nom::Err::Error((i, nom::error::ErrorKind::Complete.into())))
}
}
r => r,
}
}
}
// =================================
// ======== matching digits ========
// =================================
macro_rules! byte (
($i:expr, $($p: pat)|* ) => ({
match $i.split_first() {
$(Some((&c @ $p,rest)))|* => Ok::<(&[_],u8),crate::nom_crate::Err<&[_],u32>>((rest,c)),
Some(_) => Err(crate::nom_crate::Err::Error(error_position!($i, ErrorKind::OneOf))),
None => Err(crate::nom_crate::Err::Incomplete(Needed::Size(1))),
}
})
);
macro_rules! byte {
($($p: pat)|* ) => {{
fn parser(i: &[u8]) -> crate::nom::IResult<&[u8], u8> {
match i.split_first() {
$(Some((&c @ $p,rest)))|* => Ok((rest,c)),
Some(_) => Err(nom::Err::Error((i, nom::error::ErrorKind::OneOf))),
None => Err(nom::Err::Incomplete(Needed::Size(1))),
}
}
named!(binary<u8>, byte!(b'0'..=b'1'));
named!(octal<u8>, byte!(b'0'..=b'7'));
named!(decimal<u8>, byte!(b'0'..=b'9'));
named!(
hexadecimal<u8>,
byte!(b'0' ..= b'9' | b'a' ..= b'f' | b'A' ..= b'F')
);
parser
}}
}
fn binary(i: &[u8]) -> nom::IResult<&[u8], u8> {
byte!(b'0'..=b'1')(i)
}
fn octal(i: &[u8]) -> nom::IResult<&[u8], u8> {
byte!(b'0'..=b'7')(i)
}
fn decimal(i: &[u8]) -> nom::IResult<&[u8], u8> {
byte!(b'0'..=b'9')(i)
}
fn hexadecimal(i: &[u8]) -> nom::IResult<&[u8], u8> {
byte!(b'0' ..= b'9' | b'a' ..= b'f' | b'A' ..= b'F')(i)
}
// ========================================
// ======== characters and strings ========
@ -166,62 +179,60 @@ fn c_unicode_escape(n: Vec<u8>) -> Option<CChar> {
.map(CChar::Char)
}
named!(
escaped_char<CChar>,
preceded!(
complete!(char!('\\')),
alt_complete!(
map!(one_of!(r#"'"?\"#), CChar::Char)
| map!(one_of!("abfnrtv"), escape2char)
| map_opt!(many_m_n!(1, 3, octal), |v| c_raw_escape(v, 8))
| map_opt!(
preceded!(char!('x'), many1!(hexadecimal)),
|v| c_raw_escape(v, 16)
)
| map_opt!(
preceded!(char!('u'), many_m_n!(4, 4, hexadecimal)),
c_unicode_escape
)
| map_opt!(
preceded!(char!('U'), many_m_n!(8, 8, hexadecimal)),
c_unicode_escape
)
)
)
);
named!(
c_width_prefix,
alt!(tag!("u8") | tag!("u") | tag!("U") | tag!("L"))
);
named!(
c_char<CChar>,
delimited!(
terminated!(opt!(c_width_prefix), char!('\'')),
alt!(escaped_char | map!(byte!(0 ..= 91 /* \=92 */ | 93 ..= 255), CChar::from)),
char!('\'')
)
);
named!(
c_string<Vec<u8>>,
delimited!(
alt!(preceded!(c_width_prefix, char!('"')) | char!('"')),
fold_many0!(
alt!(
map!(escaped_char, |c: CChar| c.into())
| map!(is_not!([b'\\', b'"']), |c: &[u8]| c.into())
fn escaped_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {
preceded(
char('\\'),
alt((
map(one_of(r#"'"?\"#), CChar::Char),
map(one_of("abfnrtv"), escape2char),
map_opt(many_m_n(1, 3, octal), |v| c_raw_escape(v, 8)),
map_opt(preceded(char('x'), many1(hexadecimal)), |v| {
c_raw_escape(v, 16)
}),
map_opt(
preceded(char('u'), many_m_n(4, 4, hexadecimal)),
c_unicode_escape,
),
map_opt(
preceded(char('U'), many_m_n(8, 8, hexadecimal)),
c_unicode_escape,
),
)),
)(i)
}
fn c_width_prefix(i: &[u8]) -> nom::IResult<&[u8], &[u8]> {
alt((tag("u8"), tag("u"), tag("U"), tag("L")))(i)
}
fn c_char(i: &[u8]) -> nom::IResult<&[u8], CChar> {
delimited(
terminated(opt(c_width_prefix), char('\'')),
alt((
escaped_char,
map(byte!(0 ..= 91 /* \=92 */ | 93 ..= 255), CChar::from),
)),
char('\''),
)(i)
}
fn c_string(i: &[u8]) -> nom::IResult<&[u8], Vec<u8>> {
delimited(
alt((preceded(c_width_prefix, char('"')), char('"'))),
fold_many0(
alt((
map(escaped_char, |c: CChar| c.into()),
map(is_not([b'\\', b'"']), |c: &[u8]| c.into()),
)),
Vec::new(),
|mut v: Vec<u8>, res: Vec<u8>| {
v.extend_from_slice(&res);
v
}
},
),
char!('"')
)
);
char('"'),
)(i)
}
// ================================
// ======== parse integers ========
@ -241,100 +252,110 @@ fn take_ul(input: &[u8]) -> IResult<&[u8], &[u8]> {
}
}
named!(
c_int<i64>,
map!(
terminated!(
alt_complete!(
map_opt!(preceded!(tag!("0x"), many1!(complete!(hexadecimal))), |v| {
fn c_int(i: &[u8]) -> nom::IResult<&[u8], i64> {
map(
terminated(
alt((
map_opt(preceded(tag("0x"), many1(complete(hexadecimal))), |v| {
c_int_radix(v, 16)
}) | map_opt!(preceded!(tag!("0X"), many1!(complete!(hexadecimal))), |v| {
}),
map_opt(preceded(tag("0X"), many1(complete(hexadecimal))), |v| {
c_int_radix(v, 16)
}) | map_opt!(preceded!(tag!("0b"), many1!(complete!(binary))), |v| {
}),
map_opt(preceded(tag("0b"), many1(complete(binary))), |v| {
c_int_radix(v, 2)
}) | map_opt!(preceded!(tag!("0B"), many1!(complete!(binary))), |v| {
}),
map_opt(preceded(tag("0B"), many1(complete(binary))), |v| {
c_int_radix(v, 2)
}) | map_opt!(preceded!(char!('0'), many1!(complete!(octal))), |v| {
}),
map_opt(preceded(char('0'), many1(complete(octal))), |v| {
c_int_radix(v, 8)
}) | map_opt!(many1!(complete!(decimal)), |v| c_int_radix(v, 10))
| force_type!(IResult<_, _, u32>)
),
opt!(take_ul)
}),
map_opt(many1(complete(decimal)), |v| c_int_radix(v, 10)),
|input| Err(crate::nom::Err::Error((input, crate::nom::ErrorKind::Fix))),
)),
opt(take_ul),
),
|i| i as i64
)
);
|i| i as i64,
)(i)
}
// ==============================
// ======== parse floats ========
// ==============================
named!(float_width<u8>, complete!(byte!(b'f' | b'l' | b'F' | b'L')));
named!(
float_exp<(Option<u8>, Vec<u8>)>,
preceded!(
byte!(b'e' | b'E'),
pair!(opt!(byte!(b'-' | b'+')), many1!(complete!(decimal)))
)
);
fn float_width(i: &[u8]) -> nom::IResult<&[u8], u8> {
nom::combinator::complete(byte!(b'f' | b'l' | b'F' | b'L'))(i)
}
named!(
c_float<f64>,
map_opt!(
alt!(
terminated!(
recognize!(tuple!(
many1!(complete!(decimal)),
fn float_exp(i: &[u8]) -> nom::IResult<&[u8], (Option<u8>, Vec<u8>)> {
preceded(
byte!(b'e' | b'E'),
pair(opt(byte!(b'-' | b'+')), many1(complete(decimal))),
)(i)
}
fn c_float(i: &[u8]) -> nom::IResult<&[u8], f64> {
map_opt(
alt((
terminated(
recognize(tuple((
many1(complete(decimal)),
byte!(b'.'),
many0!(complete!(decimal))
)),
opt!(float_width)
) | terminated!(
recognize!(tuple!(
many0!(complete!(decimal)),
many0(complete(decimal)),
))),
opt(float_width),
),
terminated(
recognize(tuple((
many0(complete(decimal)),
byte!(b'.'),
many1!(complete!(decimal))
)),
opt!(float_width)
) | terminated!(
recognize!(tuple!(
many0!(complete!(decimal)),
opt!(byte!(b'.')),
many1!(complete!(decimal)),
float_exp
)),
opt!(float_width)
) | terminated!(
recognize!(tuple!(
many1!(complete!(decimal)),
opt!(byte!(b'.')),
many0!(complete!(decimal)),
float_exp
)),
opt!(float_width)
) | terminated!(recognize!(many1!(complete!(decimal))), float_width)
),
|v| str::from_utf8(v).ok().and_then(|i| f64::from_str(i).ok())
)
);
many1(complete(decimal)),
))),
opt(float_width),
),
terminated(
recognize(tuple((
many0(complete(decimal)),
opt(byte!(b'.')),
many1(complete(decimal)),
float_exp,
))),
opt(float_width),
),
terminated(
recognize(tuple((
many1(complete(decimal)),
opt(byte!(b'.')),
many0(complete(decimal)),
float_exp,
))),
opt(float_width),
),
terminated(recognize(many1(complete(decimal))), float_width),
)),
|v| str::from_utf8(v).ok().and_then(|i| f64::from_str(i).ok()),
)(i)
}
// ================================
// ======== main interface ========
// ================================
named!(one_literal<&[u8],EvalResult,crate::Error>,
fix_error!(crate::Error,alt_complete!(
map!(full!(c_char),EvalResult::Char) |
map!(full!(c_int),|i|EvalResult::Int(::std::num::Wrapping(i))) |
map!(full!(c_float),EvalResult::Float) |
map!(full!(c_string),EvalResult::Str)
))
);
fn one_literal(input: &[u8]) -> nom::IResult<&[u8], EvalResult, crate::Error<&[u8]>> {
alt((
map(full(c_char), EvalResult::Char),
map(full(c_int), |i| EvalResult::Int(::std::num::Wrapping(i))),
map(full(c_float), EvalResult::Float),
map(full(c_string), EvalResult::Str),
))(input)
.to_cexpr_result()
}
/// Parse a C literal.
///
/// The input must contain exactly the representation of a single literal
/// token, and in particular no whitespace or sign prefixes.
pub fn parse(input: &[u8]) -> IResult<&[u8], EvalResult, crate::Error> {
pub fn parse(input: &[u8]) -> IResult<&[u8], EvalResult, crate::Error<&[u8]>> {
crate::assert_full_parse(one_literal(input))
}

View File

@ -10,6 +10,7 @@
//! This is designed to map onto a libclang CXToken.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[allow(missing_docs)]
pub enum Kind {
Punctuation,
Keyword,
@ -18,16 +19,19 @@ pub enum Kind {
Comment,
}
/// A single token in a C expression.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token {
/// The type of this token.
pub kind: Kind,
/// The bytes that make up the token.
pub raw: Box<[u8]>,
}
impl<'a> From<(Kind, &'a [u8])> for Token {
fn from((kind, value): (Kind, &'a [u8])) -> Token {
Token {
kind: kind,
kind,
raw: value.to_owned().into_boxed_slice(),
}
}

View File

@ -235,7 +235,7 @@ fn file_visit_macros<F: FnMut(Vec<u8>, Vec<Token>)>(
let tu = {
let index = clang_createIndex(true as _, false as _);
let cfile = ffi::CString::new(file).unwrap();
let mut tu = mem::uninitialized();
let mut tu = mem::MaybeUninit::uninit();
assert!(
clang_parseTranslationUnit2(
index,
@ -245,12 +245,12 @@ fn file_visit_macros<F: FnMut(Vec<u8>, Vec<Token>)>(
ptr::null_mut(),
0,
CXTranslationUnit_DetailedPreprocessingRecord,
&mut tu
&mut *tu.as_mut_ptr()
) == CXError_Success,
"Failure reading test case {}",
file
);
tu
tu.assume_init()
};
visit_children(clang_getTranslationUnitCursor(tu), |cur, _parent| {
if cur.kind == CXCursor_MacroDefinition {
@ -308,8 +308,8 @@ fn fix_bug_9069() -> bool {
token_sets[0] != token_sets[1]
}
use std::sync::atomic::{AtomicBool, Ordering, ATOMIC_BOOL_INIT};
use std::sync::{Once, ONCE_INIT};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Once;
static CHECK_FIX: Once = Once::new();
static FIX: AtomicBool = AtomicBool::new(false);