commit 936c6939d80a9b14decb5b1191c64e713efa9310 Author: Jethro Beekman Date: Tue Jul 12 22:37:23 2016 -0700 version 0.1.0 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a9d37c5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..213ab53 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "cexpr" +version = "0.1.0" +authors = ["Jethro Beekman "] +license = "Apache-2.0/MIT" +description = "A C expression parser and evaluator" +documentation = "https://jethrogb.github.io/rust-cexpr/cexpr" +repository = "https://github.com/jethrogb/rust-cexpr" +keywords = ["C","expression","parser"] + +[dependencies] +nom = { version = "^1", features = ["regexp","regexp_macros"] } +regex = "^0.1.56" # version from nom +lazy_static = "^0.1.15" # version from nom + +[dev-dependencies] +clang-sys = "0.7.0" diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..ed958e7 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,25 @@ +(C) Copyright 2016 Jethro G. Beekman + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/src/expr.rs b/src/expr.rs new file mode 100644 index 0000000..0926c37 --- /dev/null +++ b/src/expr.rs @@ -0,0 +1,474 @@ +// (C) Copyright 2016 Jethro G. Beekman +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +//! Evaluating C expressions from tokens. +//! +//! Numerical operators are supported. All numerical values are treated as +//! `i64` or `f64`. Type casting is not supported. `i64` are converted to +//! `f64` when used in conjunction with a `f64`. +//! +//! The `sizeof` operator is not supported. +//! +//! String concatenation is supported, but width prefixes are ignored all +//! strings are treated as narrow strings. +//! +//! Use the `IdentifierParser` to substitute identifiers found in expressions. + +use std::collections::HashMap; +use std::ops::{AddAssign,BitAndAssign,BitOrAssign,BitXorAssign,DivAssign,MulAssign,RemAssign,ShlAssign,ShrAssign,SubAssign}; + +use literal::{self,CChar}; +use token::{Token,Kind as TokenKind}; +use nom_crate::*; + +/// Expression parser/evaluator that supports identifiers. +#[derive(Debug)] +pub struct IdentifierParser<'ident> { + identifiers: &'ident HashMap,EvalResult>, +} +#[derive(Copy,Clone)] +struct PRef<'a>(&'a IdentifierParser<'a>); + +pub type CResult<'a,R:'a> = IResult<&'a [Token],R,::Error>; + +/// The result of parsing a literal or evaluating an expression. +#[derive(Debug,Clone,PartialEq)] +pub enum EvalResult { + Int(i64), + Float(f64), + Char(CChar), + Str(Vec), + Invalid, +} + +macro_rules! result_opt ( + (fn $n:ident: $e:ident -> $t:ty) => ( + #[allow(dead_code)] + fn $n(self) -> Option<$t> { + if let EvalResult::$e(v) = self { + Some(v) + } else { + None + } + } + ); +); + +impl EvalResult { + result_opt!(fn as_int: Int -> i64); + result_opt!(fn as_float: Float -> f64); + result_opt!(fn as_char: Char -> CChar); + result_opt!(fn as_str: Str -> Vec); + + fn as_numeric(self) -> Option { + match self { + EvalResult::Int(_) | EvalResult::Float(_) => Some(self), + _ => None, + } + } +} + +impl From> for EvalResult { + fn from(s: Vec) -> EvalResult { + EvalResult::Str(s) + } +} + +// =========================================== +// ============= Clang tokens ================ +// =========================================== + +macro_rules! exact_token ( + ($i:expr, $k: ident, $c: expr) => ({ + if $i.is_empty() { + let res: CResult<&[u8]> = IResult::Incomplete(Needed::Size(1)); + res + } else { + if $i[0].kind==TokenKind::$k && &$i[0].raw[..]==$c { + IResult::Done(&$i[1..], &$i[0].raw[..]) + } else { + IResult::Error(Err::Position(ErrorKind::Custom(::Error::ExactToken(TokenKind::$k,$c)), $i)) + } + } + }); +); + +macro_rules! typed_token ( + ($i:expr, $k: ident) => ({ + if $i.is_empty() { + let res: CResult<&[u8]> = IResult::Incomplete(Needed::Size(1)); + res + } else { + if $i[0].kind==TokenKind::$k { + IResult::Done(&$i[1..], &$i[0].raw[..]) + } else { + IResult::Error(Err::Position(ErrorKind::Custom(::Error::TypedToken(TokenKind::$k)), $i)) + } + } + }); +); + +macro_rules! any_token ( + ($i:expr,) => ({ + if $i.is_empty() { + let res: CResult<&Token> = IResult::Incomplete(Needed::Size(1)); + res + } else { + IResult::Done(&$i[1..], &$i[0]) + } + }); +); + +macro_rules! p ( + ($i:expr, $c: expr) => (exact_token!($i,Punctuation,$c.as_bytes())) +); + +// ================================================== +// ============= Numeric expressions ================ +// ================================================== + +impl<'a> AddAssign<&'a EvalResult> for EvalResult { + fn add_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self=match (&*self,rhs) { + (&Int(a), &Int(b)) => Int(a+b), + (&Float(a),&Int(b)) => Float(a+(b as f64)), + (&Int(a), &Float(b)) => Float(a as f64+b), + (&Float(a),&Float(b)) => Float(a+b), + _ => Invalid + }; + } +} +impl<'a> BitAndAssign<&'a EvalResult> for EvalResult { + fn bitand_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self=match (&*self,rhs) { + (&Int(a),&Int(b)) => Int(a&b), + _ => Invalid + }; } +} +impl<'a> BitOrAssign<&'a EvalResult> for EvalResult { + fn bitor_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self=match (&*self,rhs) { + (&Int(a),&Int(b)) => Int(a|b), + _ => Invalid + }; + } +} +impl<'a> BitXorAssign<&'a EvalResult> for EvalResult { + fn bitxor_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self=match (&*self,rhs) { + (&Int(a),&Int(b)) => Int(a^b), + _ => Invalid + }; + } +} +impl<'a> DivAssign<&'a EvalResult> for EvalResult { + fn div_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self=match (&*self,rhs) { + (&Int(a), &Int(b)) => Int(a/b), + (&Float(a),&Int(b)) => Float(a/(b as f64)), + (&Int(a), &Float(b)) => Float(a as f64/b), + (&Float(a),&Float(b)) => Float(a/b), + _ => Invalid + }; + } +} +impl<'a> MulAssign<&'a EvalResult> for EvalResult { + fn mul_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self=match (&*self,rhs) { + (&Int(a), &Int(b)) => Int(a*b), + (&Float(a),&Int(b)) => Float(a*(b as f64)), + (&Int(a), &Float(b)) => Float(a as f64*b), + (&Float(a),&Float(b)) => Float(a*b), + _ => Invalid + }; + } +} +impl<'a> RemAssign<&'a EvalResult> for EvalResult { + fn rem_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self=match (&*self,rhs) { + (&Int(a), &Int(b)) => Int(a%b), + (&Float(a),&Int(b)) => Float(a%(b as f64)), + (&Int(a), &Float(b)) => Float(a as f64%b), + (&Float(a),&Float(b)) => Float(a%b), + _ => Invalid + }; + } +} +impl<'a> ShlAssign<&'a EvalResult> for EvalResult { + fn shl_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self=match (&*self,rhs) { + (&Int(a),&Int(b)) => Int(a< Invalid + }; + } +} +impl<'a> ShrAssign<&'a EvalResult> for EvalResult { + fn shr_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self=match (&*self,rhs) { + (&Int(a),&Int(b)) => Int(a>>b), + _ => Invalid + }; + } +} +impl<'a> SubAssign<&'a EvalResult> for EvalResult { + fn sub_assign(&mut self, rhs: &'a EvalResult) { + use self::EvalResult::*; + *self=match (&*self,rhs) { + (&Int(a), &Int(b)) => Int(a-b), + (&Float(a),&Int(b)) => Float(a-(b as f64)), + (&Int(a), &Float(b)) => Float(a as f64-b), + (&Float(a),&Float(b)) => Float(a-b), + _ => Invalid + }; + } +} + +fn unary_op(input: (&[u8],EvalResult)) -> Option { + use self::EvalResult::*; + assert_eq!(input.0.len(),1); + match (input.0[0],input.1) { + (b'+',i) => Some(i), + (b'-',Int(i)) => Some(Int(-i)), + (b'-',Float(i)) => Some(Float(-i)), + (b'-',_) => unreachable!("non-numeric unary op"), + (b'~',Int(i)) => Some(Int(!i)), + (b'~',Float(_)) => None, + (b'~',_) => unreachable!("non-numeric unary op"), + _ => unreachable!("invalid unary op"), + } +} + +macro_rules! numeric ( + ($i:expr, $submac:ident!( $($args:tt)* )) => (map_opt!($i,$submac!($($args)*),EvalResult::as_numeric)); + ($i:expr, $f:expr ) => (map_opt!($i,call!($f),EvalResult::as_numeric)); +); + +impl<'a> PRef<'a> { + method!(unary,&[Token],EvalResult,::Error>, mut self, + alt!( + delimited!(p!("("),call_m!(self.numeric_expr),p!(")")) | + numeric!(call_m!(self.literal)) | + numeric!(call_m!(self.identifier)) | + map_opt!(pair!(alt!( p!("+") | p!("-") | p!("~") ),call_m!(self.unary)),unary_op) + ) + ); + + method!(mul_div_rem,&[Token],EvalResult,::Error>, mut self, + chain!( + mut acc: call_m!(self.unary) ~ + many0!(alt!( + tap!(mul: preceded!(p!("*"), call_m!(self.unary)) => acc *= &mul) | + tap!(div: preceded!(p!("/"), call_m!(self.unary)) => acc /= &div) | + tap!(rem: preceded!(p!("%"), call_m!(self.unary)) => acc %= &rem) + )), + || { return acc } + ) + ); + + method!(add_sub,&[Token],EvalResult,::Error>, mut self, + chain!( + mut acc: call_m!(self.mul_div_rem) ~ + many0!(alt!( + tap!(add: preceded!(p!("+"), call_m!(self.mul_div_rem)) => acc += &add) | + tap!(sub: preceded!(p!("-"), call_m!(self.mul_div_rem)) => acc -= &sub) + )), + || { return acc } + ) + ); + + method!(shl_shr,&[Token],EvalResult,::Error>, mut self, + numeric!(chain!( + mut acc: call_m!(self.add_sub) ~ + many0!(alt!( + tap!(shl: preceded!(p!("<<"), call_m!(self.add_sub)) => acc <<= &shl) | + tap!(shr: preceded!(p!(">>"), call_m!(self.add_sub)) => acc >>= &shr) + )), + || { return acc } + )) + ); + + method!(and,&[Token],EvalResult,::Error>, mut self, + numeric!(chain!( + mut acc: call_m!(self.shl_shr) ~ + many0!( + tap!(and: preceded!(p!("&"), call_m!(self.shl_shr)) => acc &= &and) + ), + || { return acc } + )) + ); + + method!(xor,&[Token],EvalResult,::Error>, mut self, + numeric!(chain!( + mut acc: call_m!(self.and) ~ + many0!( + tap!(xor: preceded!(p!("^"), call_m!(self.and)) => acc ^= &xor) + ), + || { return acc } + )) + ); + + method!(or,&[Token],EvalResult,::Error>, mut self, + numeric!(chain!( + mut acc: call_m!(self.xor) ~ + many0!( + tap!(or: preceded!(p!("|"), call_m!(self.xor)) => acc |= &or) + ), + || { return acc } + )) + ); + + #[inline(always)] + fn numeric_expr(self, input: &[Token]) -> (Self,CResult) { + self.or(input) + } +} + +// ======================================================= +// ============= Literals and identifiers ================ +// ======================================================= + +impl<'a> PRef<'a> { + fn identifier(self, input: &[Token]) -> (Self,CResult) { + (self,match input.split_first() { + None => + IResult::Incomplete(Needed::Size(1)), + Some((&Token{kind:TokenKind::Identifier,ref raw},rest)) => { + if let Some(r) = self.identifiers.get(&raw[..]) { + IResult::Done(rest, r.clone()) + } else { + IResult::Error(Err::Position(ErrorKind::Custom(::Error::UnknownIdentifier), input)) + } + }, + Some(_) => + IResult::Error(Err::Position(ErrorKind::Custom(::Error::TypedToken(TokenKind::Identifier)), input)), + }) + } + + fn literal(self, input: &[Token]) -> (Self,CResult) { + (self,match input.split_first() { + None => + IResult::Incomplete(Needed::Size(1)), + Some((&Token{kind:TokenKind::Literal,ref raw},rest)) => + match literal::parse(raw) { + IResult::Done(_,result) => IResult::Done(rest, result), + _ => IResult::Error(Err::Position(ErrorKind::Custom(::Error::InvalidLiteral), input)) + }, + Some(_) => + IResult::Error(Err::Position(ErrorKind::Custom(::Error::TypedToken(TokenKind::Literal)), input)), + }) + } + + method!(string,&[Token],Vec,::Error>, mut self, + alt!( + map_opt!(call_m!(self.literal),EvalResult::as_str) | + map_opt!(call_m!(self.identifier),EvalResult::as_str) + ) + ); + + // "string1" "string2" etc... + method!(concat_str,&[Token],EvalResult,::Error>, mut self, + map!( + pair!(call_m!(self.string),many0!(call_m!(self.string))), + |(first,v)| Vec::into_iter(v).fold(first,|mut s,elem|{Vec::extend_from_slice(&mut s,Vec::::as_slice(&elem));s}).into() + ) + ); + + method!(expr,&[Token],EvalResult,::Error>, mut self, + alt!( + delimited!(p!("("),call_m!(self.expr),p!(")")) | + call_m!(self.numeric_expr) | + call_m!(self.concat_str) | + call_m!(self.literal) | + call_m!(self.identifier) + ) + ); + + method!(macro_definition,&[Token],(&[u8],EvalResult),::Error>, mut self, + pair!(typed_token!(Identifier),call_m!(self.expr)) + ); +} + +impl<'a> ::std::ops::Deref for PRef<'a> { + type Target=IdentifierParser<'a>; + fn deref(&self) -> &IdentifierParser<'a> { + self.0 + } +} + +impl<'ident> IdentifierParser<'ident> { + fn as_ref(&self) -> PRef { + PRef(self) + } + + /// Create a new `IdentifierParser` with a set of known identifiers. When + /// a known identifier is encountered during parsing, it is substituted + /// for the value specified. + pub fn new(identifiers: &HashMap,EvalResult>) -> IdentifierParser { + IdentifierParser{identifiers:identifiers} + } + + /// Parse and evalute an expression of a list of tokens. + /// + /// Returns an error if the input is not a valid expression or if the token + /// stream contains comments, keywords or unknown identifiers. + pub fn expr<'a>(&self,input: &'a [Token]) -> CResult<'a,EvalResult> { + self.as_ref().expr(input).1 + } + + /// Parse and evaluate a macro definition from of a list of tokens. + /// + /// Returns the identifier for the macro and its replacement evaluated as an + /// expression. The input should not include `#define`. + /// + /// Returns an error if the replacement is not a valid expression, if called + /// on most function-like macros, or if the token stream contains comments, + /// keywords or unknown identifiers. + /// + /// N.B. This is intended to fail on function-like macros, but if it the + /// macro takes a single argument, the argument name is defined as an + /// identifier, and the macro otherwise parses as an expression, it will + /// return a result even on function-like macros. + /// + /// ```ignore + /// // will evaluate into IDENTIFIER + /// #define DELETE(IDENTIFIER) + /// // will evaluate into IDENTIFIER-3 + /// #define NEGATIVE_THREE(IDENTIFIER) -3 + /// ``` + pub fn macro_definition<'a>(&self,input: &'a [Token]) -> CResult<'a,(&'a [u8],EvalResult)> { + ::assert_full_parse(self.as_ref().macro_definition(input).1) + } +} + +/// Parse and evalute an expression of a list of tokens. +/// +/// Returns an error if the input is not a valid expression or if the token +/// stream contains comments, keywords or identifiers. +pub fn expr<'a>(input: &'a [Token]) -> CResult<'a,EvalResult> { + IdentifierParser::new(&HashMap::new()).expr(input) +} + +/// Parse and evaluate a macro definition from of a list of tokens. +/// +/// Returns the identifier for the macro and its replacement evaluated as an +/// expression. The input should not include `#define`. +/// +/// Returns an error if the replacement is not a valid expression, if called +/// on a function-like macro, or if the token stream contains comments, +/// keywords or identifiers. +pub fn macro_definition<'a>(input: &'a [Token]) -> CResult<'a,(&'a [u8],EvalResult)> { + IdentifierParser::new(&HashMap::new()).macro_definition(input) +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..15675a3 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,58 @@ +// (C) Copyright 2016 Jethro G. Beekman +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +extern crate regex; +#[macro_use] +extern crate nom as nom_crate; +#[macro_use] +extern crate lazy_static; + +pub mod nom { + //! nom's result types, re-exported. + pub use nom_crate::{IResult,Needed,Err,ErrorKind}; +} +pub mod literal; +pub mod expr; +pub mod token; + +use nom::*; + +#[derive(Debug)] +/// Parsing errors specific to C parsing +pub enum Error { + /// Expected the specified token + ExactToken(token::Kind,&'static [u8]), + /// Expected a token of the specified kind + TypedToken(token::Kind), + /// An unknown identifier was encountered + UnknownIdentifier, + /// An invalid literal was encountered. + /// + /// When encountered, this generally means a bug exists in the data that + /// was passed in or the parsing logic. + InvalidLiteral, + /// A full parse was requested, but data was left over after parsing finished. + Partial, +} + +macro_rules! identity ( + ($i:expr,$e:expr) => ($e); +); + +/// If the input result indicates a succesful parse, but there is data left, +/// return an `Error::Partial` instead. +pub fn assert_full_parse(result: IResult<&[I],O,E>) -> IResult<&[I],O,::Error> { + match fix_error!((),::Error,identity!(result)) { + IResult::Done(rem,output) => if rem.len()==0 { + IResult::Done(rem, output) + } else { + IResult::Error(Err::Position(ErrorKind::Custom(::Error::Partial), rem)) + }, + r => r, + } +} diff --git a/src/literal.rs b/src/literal.rs new file mode 100644 index 0000000..1071580 --- /dev/null +++ b/src/literal.rs @@ -0,0 +1,191 @@ +// (C) Copyright 2016 Jethro G. Beekman +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +//! Parsing C literals from byte slices. +//! +//! This will parse a representation of a C literal into a Rust type. +//! +//! # characters +//! Character literals are stored into the `CChar` type, which can hold values +//! that are not valid Unicode code points. ASCII characters are represented as +//! `char`, literal bytes with the high byte set are converted into the raw +//! representation. Escape sequences are supported. If hex and octal escapes +//! map to an ASCII character, that is used, otherwise, the raw encoding is +//! used, including for values over 255. Unicode escapes are checked for +//! validity and mapped to `char`. Character sequences are not supported. Width +//! prefixes are ignored. +//! +//! # strings +//! Strings are interpreted as byte vectors. Escape sequences are supported. If +//! hex and octal escapes map onto multi-byte characters, they are truncated to +//! one 8-bit character. Unicode escapes are converted into their UTF-8 +//! encoding. Width prefixes are ignored. +//! +//! # integers +//! Integers are read into `i64`. Binary, octal, decimal and hexadecimal are +//! all supported. If the literal value is between `i64::MAX` and `u64::MAX`, +//! it is bit-cast to `i64`. Values over `u64::MAX` cannot be parsed. Width and +//! sign suffixes are ignored. Sign prefixes are not supported. +//! +//! # real numbers +//! Reals are read into `f64`. Width suffixes are ignored. Sign prefixes are +//! not supported in the significand. + +use std::char; +use std::str::{self,FromStr}; + +use nom_crate::*; + +use expr::EvalResult; + +#[derive(Debug,Copy,Clone,PartialEq,Eq)] +/// Representation of a C character +pub enum CChar { + /// A character that can be represented as a `char` + Char(char), + /// Any other character (8-bit characters, unicode surrogates, etc.) + Raw(u64), +} + +impl From for CChar { + fn from(i: u8) -> CChar { + match i { + 0 ... 0x7f => CChar::Char(i as u8 as char), + _ => CChar::Raw(i as u64), + } + } +} + +// A non-allocating version of this would be nice... +impl Into> for CChar { + fn into(self) -> Vec { + match self { + CChar::Char(c) => { + let mut s=String::with_capacity(4); + s.extend(&[c]); + s.into_bytes() + } + CChar::Raw(i) => { + let mut v=Vec::with_capacity(1); + v.push(i as u8); + v + } + } + } +} + +const OCTAL: &'static [u8]=b"01234567"; +const DECIMAL: &'static [u8]=b"0123456789"; +const HEX: &'static [u8]=b"0123456789abcdefABCDEF"; + +fn escape2char(c: char) -> CChar { + CChar::Char(match c { + 'a' => '\x07', + 'b' => '\x08', + 'f' => '\x0c', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'v' => '\x0b', + _ => unreachable!("invalid escape {}",c) + }) +} + +fn c_raw_escape(n: &[u8], radix: u32) -> Option { + str::from_utf8(n).ok() + .and_then(|i|u64::from_str_radix(i,radix).ok()) + .map(|i|match i { + 0 ... 0x7f => CChar::Char(i as u8 as char), + _ => CChar::Raw(i), + }) +} + +fn c_unicode_escape(n: Vec) -> Option { + u32::from_str_radix(String::as_str(&n.into_iter().collect()),16).ok().and_then(char::from_u32).map(CChar::Char) +} + +named!(escaped_char, + preceded!(char!('\\'),alt!( + map!(one_of!(br#"'"?\"#),CChar::Char) | + map!(one_of!(b"abfnrtv"),escape2char) | + map_opt!(re_bytes_find_static!(r"^[0-7]{1,3}"),|v|c_raw_escape(v,8)) | + map_opt!(preceded!(char!('x'),is_a!(HEX)),|v|c_raw_escape(v,16)) | + map_opt!(preceded!(char!('u'),many_m_n!(4,4,one_of!(HEX))),c_unicode_escape) | + map_opt!(preceded!(char!('U'),many_m_n!(8,8,one_of!(HEX))),c_unicode_escape) + )) +); + +named!(c_width_prefix, + alt!( + tag!("u8") | + tag!("u") | + tag!("U") | + tag!("L") + ) +); + +named!(c_char, + delimited!( + terminated!(opt!(c_width_prefix),char!('\'')), + alt!( escaped_char | map!(le_u8,CChar::from) ), + char!('\'') + ) +); + +fn empty_vec(input: &[u8]) -> IResult<&[u8],Vec> { + IResult::Done(input,vec![]) +} + +named!(c_string >, + delimited!( + alt!( preceded!(c_width_prefix,char!('"')) | char!('"') ), + chain!( + mut vec: empty_vec ~ + many0!(alt!( + map!(tap!(c: escaped_char => { let v: Vec=c.into(); vec.extend_from_slice(&v) } ),|_|()) | + map!(tap!(s: is_not!(b"\"") => vec.extend_from_slice(s) ),|_|()) + )), + ||{return vec} + ), + char!('"') + ) +); + +named!(c_int, + terminated!(alt_complete!( + map!(preceded!(tag!("0x"),is_a!(HEX)), + |v|str::from_utf8(v).ok().and_then(|i|u64::from_str_radix(i,16).ok().map(|i|i as i64)).unwrap()) | + map!(preceded!(tag!("0b"),is_a!(b"01")), + |v|str::from_utf8(v).ok().and_then(|i|i64::from_str_radix(i,2).ok()).unwrap()) | + map!(preceded!(char!('0'),is_a!(OCTAL)), + |v|str::from_utf8(v).ok().and_then(|i|i64::from_str_radix(i,8).ok()).unwrap_or(0/*empty match*/)) | + map!(is_a!(DECIMAL), + |v|str::from_utf8(v).ok().and_then(|i|i64::from_str_radix(i,10).ok()).unwrap()) + ),is_a!("ulUL")) +); + +named!(c_float, + map_opt!(terminated!(re_bytes_find_static!(r"^(\d*\.\d+|\d+\.?)(e[+-]?\d+)?"),opt!(complete!(one_of!("flFL")))), + |v|str::from_utf8(v).ok().and_then(|i|f64::from_str(i).ok())) +); + +named!(one_literal<&[u8],EvalResult,::Error>, + fix_error!(::Error,alt_complete!( + map!(c_char,EvalResult::Char) | + map!(c_int,EvalResult::Int) | + map!(c_float,EvalResult::Float) | + map!(c_string,EvalResult::Str) + )) +); + +/// Parse a C literal. +/// +/// The input must contain exactly the representation of a single literal +/// token, and in particular no whitespace or sign prefixes. +pub fn parse(input: &[u8]) -> IResult<&[u8],EvalResult,::Error> { + ::assert_full_parse(one_literal(input)) +} diff --git a/src/token.rs b/src/token.rs new file mode 100644 index 0000000..c38510e --- /dev/null +++ b/src/token.rs @@ -0,0 +1,31 @@ +// (C) Copyright 2016 Jethro G. Beekman +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +//! Representation of a C token +//! +//! This is designed to map onto a libclang CXToken. + +#[derive(Debug,Copy,Clone,PartialEq,Eq)] +pub enum Kind { + Punctuation, + Keyword, + Identifier, + Literal, + Comment, +} + +#[derive(Debug,Clone,PartialEq,Eq)] +pub struct Token { + pub kind: Kind, + pub raw: Box<[u8]>, +} + +/// Remove all comment tokens from a vector of tokens +pub fn remove_comments(v: &mut Vec) -> &mut Vec { + v.retain(|t|t.kind!=Kind::Comment); + v +} diff --git a/tests/clang.rs b/tests/clang.rs new file mode 100644 index 0000000..ae5e2d1 --- /dev/null +++ b/tests/clang.rs @@ -0,0 +1,158 @@ +// (C) Copyright 2016 Jethro G. Beekman +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +extern crate cexpr; +extern crate clang_sys; + +use std::{ptr,mem,ffi,slice,char}; +use std::str::{self,FromStr}; +use std::collections::HashMap; + +use clang_sys::*; +use cexpr::token::Token; +use cexpr::expr::{IdentifierParser,EvalResult}; +use cexpr::literal::CChar; + +const TEST_HEADER: &'static str="tests/test.h"; + +// main testing routine +fn clang_test(ident: Vec, tokens: &[Token], idents: &mut HashMap,EvalResult>) -> bool { + use cexpr::expr::EvalResult::*; + + let display_name=String::from_utf8_lossy(&ident).into_owned(); + + let test={ + // Split name such as Str_test_string into (Str,test_string) + let pos=ident.iter().position(|c|*c==b'_').expect(&format!("Invalid definition in testcase: {}",display_name)); + let expected=&ident[..pos]; + let value=&ident[(pos+1)..]; + + if expected==b"Str" { + Some(Str(value.to_owned())) + } else if expected==b"Int" { + str::from_utf8(value).ok().map(|s|s.replace("n","-")).and_then(|v|i64::from_str(&v).ok()).map(Int) + } else if expected==b"Float" { + str::from_utf8(value).ok().map(|s|s.replace("n","-").replace("p",".")).and_then(|v|f64::from_str(&v).ok()).map(Float) + } else if expected==b"CharRaw" { + str::from_utf8(value).ok().and_then(|v|u64::from_str(v).ok()).map(CChar::Raw).map(Char) + } else if expected==b"CharChar" { + str::from_utf8(value).ok().and_then(|v|u32::from_str(v).ok()).and_then(char::from_u32).map(CChar::Char).map(Char) + } else { + Some(Invalid) + }.expect(&format!("Invalid definition in testcase: {}",display_name)) + }; + + match IdentifierParser::new(idents).macro_definition(&tokens) { + cexpr::nom::IResult::Done(_,(_,val)) => { + if val==test { + if let Some(_)=idents.insert(ident,val) { + panic!("Duplicate definition for testcase: {}",display_name); + } + true + } else { + println!("Failed test for {}, expected {:?}, got {:?}",display_name,test,val); + false + } + }, + e @ _ => { + if test==Invalid { + true + } else { + println!("Failed test for {}, expected {:?}, got {:?}",display_name,test,e); + false + } + } + } +} + +// support code for the clang lexer +unsafe fn clang_str_to_vec(s: CXString) -> Vec { + let vec=ffi::CStr::from_ptr(clang_getCString(s)).to_bytes().to_owned(); + clang_disposeString(s); + vec +} + +unsafe fn token_clang_to_cexpr(tu: CXTranslationUnit, orig: &CXToken) -> Token { + Token { + kind:match clang_getTokenKind(*orig) { + CXTokenKind::Comment => cexpr::token::Kind::Comment, + CXTokenKind::Identifier => cexpr::token::Kind::Identifier, + CXTokenKind::Keyword => cexpr::token::Kind::Keyword, + CXTokenKind::Literal => cexpr::token::Kind::Literal, + CXTokenKind::Punctuation => cexpr::token::Kind::Punctuation, + }, + raw:clang_str_to_vec(clang_getTokenSpelling(tu,*orig)).into_boxed_slice() + } +} + +extern "C" fn visit_children_thunk(cur: CXCursor, parent: CXCursor, closure: CXClientData) -> CXChildVisitResult + where F: FnMut(CXCursor,CXCursor) -> CXChildVisitResult +{ + unsafe{(&mut *(closure as *mut F))(cur,parent)} +} + +unsafe fn visit_children(cursor: CXCursor, mut f: F) + where F: FnMut(CXCursor,CXCursor) -> CXChildVisitResult +{ + clang_visitChildren(cursor, visit_children_thunk:: as _, &mut f as *mut F as CXClientData); +} + +unsafe fn location_in_scope(r: CXSourceRange) -> bool { + let start=clang_getRangeStart(r); + let mut file=CXFile(ptr::null_mut()); + clang_getSpellingLocation(start,&mut file,ptr::null_mut(),ptr::null_mut(),ptr::null_mut()); + clang_Location_isFromMainFile(start)!=0 + && clang_Location_isInSystemHeader(start)==0 + && file.0!=ptr::null_mut() +} + +#[test] +fn clang() { + let mut idents=HashMap::new(); + let mut all_succeeded=true; + unsafe { + let tu={ + let index=clang_createIndex(true as _, false as _); + let file=ffi::CString::new(TEST_HEADER).unwrap(); + let mut tu=mem::uninitialized(); + assert_eq!(clang_parseTranslationUnit2( + index, + file.as_ptr(), + ptr::null(),0, + ptr::null_mut(),0, + CXTranslationUnit_DetailedPreprocessingRecord, + &mut tu + ),CXErrorCode::Success); + tu + }; + visit_children(clang_getTranslationUnitCursor(tu),|cur,_parent| { + if cur.kind==CXCursorKind::MacroDefinition { + let mut range=clang_getCursorExtent(cur); + if !location_in_scope(range) { return CXChildVisitResult::Continue } + range.end_int_data-=1; // clang bug for macros only + let mut token_ptr=ptr::null_mut(); + let mut num=0; + clang_tokenize(tu,range,&mut token_ptr,&mut num); + if token_ptr!=ptr::null_mut() { + let tokens=slice::from_raw_parts(token_ptr,num as usize); + let tokens: Vec<_>=tokens.iter().filter_map(|t| + if clang_getTokenKind(*t)!=CXTokenKind::Comment { + Some(token_clang_to_cexpr(tu,t)) + } else { + None + } + ).collect(); + clang_disposeTokens(tu,token_ptr,num); + all_succeeded&=clang_test(clang_str_to_vec(clang_getCursorSpelling(cur)),&tokens,&mut idents); + } + } + CXChildVisitResult::Continue + }); + clang_disposeTranslationUnit(tu); + }; + if !all_succeeded { panic!("One or more tests failed") } +} diff --git a/tests/test.h b/tests/test.h new file mode 100644 index 0000000..5305560 --- /dev/null +++ b/tests/test.h @@ -0,0 +1,39 @@ +#define Int_456 456 +#define Int_0 0 +#define Int_1 0b1 +#define Int_2 0x2 +#define Int_63 077 +#define Int_123 123 +#define Int_124 124u +#define Int_125 125uL +#define Int_126 126LuL +#define Int_n3 ((-3)) +#define Int_16 (((1)<<4ULL))/*comment*/ +#define Int_13 1|8^6&2<<1 +#define Int_n5 -3-2 + +#define CharChar_65 'A' +#define CharChar_127849 '\U0001f369' // 🍩 +#define CharRaw_255 U'\xff' + +#define Str_unicode u"unicode" +#define Str_long L"long" +#define Str_concat u"con" L"cat" +#define Str_concat_parens ("concat" U"_parens") +#define Str_concat_identifier (Str_concat L"_identifier") + +#define Float_0 0. +#define Float_1 1f +#define Float_p1 .1 +#define Float_2 2.0 +#define Float_1000 1e3 +#define Float_2000 2e+3 +#define Float_p001 1e-3 +#define Float_80 10.0*(1<<3) + +#define FAIL_1(x) 3 +#define FAIL_2 +#define FAIL_3 0b2 +#define FAIL_4 3<<1f +#define FAIL_5 UNKNOWN +#define FAIL_6 "test" Str_long Int_0