[syntax][pseudo] Introduce the C++ spec grammar.

Add a dummy clang-pseudo tool (right now it accepts and parses the
grammar file).

Differential Revision: https://reviews.llvm.org/D115856
This commit is contained in:
Haojian Wu 2022-01-20 11:02:50 +01:00
parent ffe8720aa0
commit e1db505b42
8 changed files with 810 additions and 1 deletions

View File

@ -92,7 +92,7 @@ public:
return It->second; return It->second;
}; };
for (const auto &Spec : Specs) { for (const auto &Spec : Specs) {
assert(Spec.Sequence.size() < Rule::MaxElements); assert(Spec.Sequence.size() <= Rule::MaxElements);
Symbols.clear(); Symbols.clear();
for (const RuleSpec::Element &Elt : Spec.Sequence) for (const RuleSpec::Element &Elt : Spec.Sequence)
Symbols.push_back(Lookup(Elt.Symbol)); Symbols.push_back(Lookup(Elt.Symbol));

View File

@ -0,0 +1,739 @@
# This is a C++ grammar from the C++ standard [1].
#
# The grammar is a superset of the true grammar requring semantic constraints to
# resolve ambiguties. The grammar is context-free and ambiguous (beyond the
# limit of LR(k)). We use general parsing algorithm (e.g GLR) to handle the
# grammar and generate a transition table which is used to drive the parsing.
#
# It aims to align with the ISO C++ grammar as much as possible. We adjust it
# to fit the need for the grammar-based parser:
# - attributes are omitted, which will be handled as comments;
# - we don't allow nullable non-terminal symbols. There are few nullable
# non-terminals in the spec grammar, they are adjusted to be non-nullable;
# - the file merely describes the core C++ grammar. Preprocessor directives and
# lexical conversions are omitted as we reuse clang's lexer and run a fake
# preprocessor;
#
# Guidelines:
# - non-terminals are lower_case; terminals (aka tokens) correspond to
# clang::TokenKind, written as "IDENTIFIER", "USING", "::" etc;
# - optional symbols are supported, with a _opt suffix;
#
# [1] https://isocpp.org/files/papers/N4860.pdf
#
#
# _ serves as a "fake" start symbol, coming with real grammar symbols.
_ := translation-unit
# gram.key
typedef-name := IDENTIFIER
typedef-name := simple-template-id
namespace-name := IDENTIFIER
namespace-name := namespace-alias
namespace-alias := IDENTIFIER
class-name := IDENTIFIER
class-name := simple-template-id
enum-name := IDENTIFIER
template-name := IDENTIFIER
# gram.basic
#! Custom modifications to eliminate optional declaration-seq
translation-unit := declaration-seq
translation-unit := global-module-fragment_opt module-declaration declaration-seq_opt private-module-fragment_opt
# gram.expr
# expr.prim
primary-expression := literal
primary-expression := THIS
primary-expression := ( expression )
primary-expression := id-expression
primary-expression := lambda-expression
primary-expression := fold-expression
primary-expression := requires-expression
id-expression := unqualified-id
id-expression := qualified-id
unqualified-id := IDENTIFIER
unqualified-id := operator-function-id
unqualified-id := conversion-function-id
unqualified-id := literal-operator-id
unqualified-id := ~ type-name
unqualified-id := ~ decltype-specifier
unqualified-id := template-id
qualified-id := nested-name-specifier TEMPLATE_opt unqualified-id
nested-name-specifier := ::
nested-name-specifier := type-name ::
nested-name-specifier := namespace-name ::
nested-name-specifier := decltype-specifier ::
nested-name-specifier := nested-name-specifier IDENTIFIER ::
nested-name-specifier := nested-name-specifier TEMPLATE_opt simple-template-id ::
lambda-expression := lambda-introducer lambda-declarator_opt compound-statement
lambda-expression := lambda-introducer < template-parameter-list > requires-clause_opt lambda-declarator_opt compound-statement
lambda-introducer := [ lambda-capture_opt ]
lambda-declarator := ( parameter-declaration-clause ) decl-specifier-seq_opt noexcept-specifier_opt trailing-return-type_opt requires-clause_opt
lambda-capture := capture-default
lambda-capture := capture-list
lambda-capture := capture-default , capture-list
capture-default := &
capture-default := =
capture-list := capture
capture-list := capture-list , capture
capture := simple-capture
capture := init-capture
simple-capture := IDENTIFIER ..._opt
simple-capture := & IDENTIFIER ..._opt
simple-capture := THIS
simple-capture := * THIS
init-capture := ..._opt IDENTIFIER initializer
init-capture := & ..._opt IDENTIFIER initializer
fold-expression := ( cast-expression fold-operator ... )
fold-expression := ( ... fold-operator cast-expression )
fold-expression := ( cast-expression fold-operator ... fold-operator cast-expression )
fold-operator := +
fold-operator := -
fold-operator := *
fold-operator := /
fold-operator := %
fold-operator := ^
fold-operator := |
fold-operator := <<
fold-operator := >>
fold-operator := +=
fold-operator := -=
fold-operator := *=
fold-operator := /=
fold-operator := %=
fold-operator := ^=
fold-operator := &=
fold-operator := |=
fold-operator := <<=
fold-operator := >>=
fold-operator := =
fold-operator := ==
fold-operator := !=
fold-operator := <
fold-operator := >
fold-operator := <=
fold-operator := >=
fold-operator := &&
fold-operator := ||
fold-operator := ,
fold-operator := .*
fold-operator := ->*
requires-expression := REQUIRES requirement-parameter-list_opt requirement-body
requirement-parameter-list := ( parameter-declaration-clause_opt )
requirement-body := { requirement-seq }
requirement-seq := requirement
requirement-seq := requirement-seq requirement
requirement := simple-requirement
requirement := type-requirement
requirement := compound-requirement
requirement := nested-requirement
simple-requirement := expression ;
type-requirement := TYPENAME nested-name-specifier_opt type-name ;
compound-requirement := { expression } NOEXCEPT_opt return-type-requirement_opt ;
return-type-requirement := -> type-constraint
nested-requirement := REQUIRES constraint-expression ;
# expr.post
postfix-expression := primary-expression
postfix-expression := postfix-expression [ expr-or-braced-init-list ]
postfix-expression := postfix-expression ( expression-list_opt )
postfix-expression := simple-type-specifier ( expression-list_opt )
postfix-expression := typename-specifier ( expression-list_opt )
postfix-expression := simple-type-specifier braced-init-list
postfix-expression := postfix-expression . TEMPLATE_opt id-expression
postfix-expression := postfix-expression -> TEMPLATE_opt id-expression
postfix-expression := postfix-expression ++
postfix-expression := postfix-expression --
postfix-expression := DYNAMIC_CAST < type-id > ( expression )
postfix-expression := STATIC_CAST < type-id > ( expression )
postfix-expression := REINTERPRET_CAST < type-id > ( expression )
postfix-expression := CONST_CAST < type-id > ( expression )
postfix-expression := TYPEID ( expression )
postfix-expression := TYPEID ( type-id )
expression-list := initializer-list
# expr.unary
unary-expression := postfix-expression
unary-expression := unary-operator cast-expression
unary-expression := ++ cast-expression
unary-expression := -- cast-expression
unary-expression := await-expression
unary-expression := SIZEOF unary-expression
unary-expression := SIZEOF ( type-id )
unary-expression := SIZEOF ... ( IDENTIFIER )
unary-expression := ALIGNOF ( type-id )
unary-expression := noexcept-expression
unary-expression := new-expression
unary-expression := delete-expression
unary-operator := *
unary-operator := &
unary-operator := +
unary-operator := -
unary-operator := !
unary-operator := ~
await-expression := CO_AWAIT cast-expression
noexcept-expression := NOEXCEPT ( expression )
new-expression := ::_opt NEW new-placement_opt new-type-id new-initializer_opt
new-expression := ::_opt NEW new-placement_opt ( type-id ) new-initializer_opt
new-placement := ( expression-list )
new-type-id := type-specifier-seq new-declarator_opt
new-declarator := ptr-operator new-declarator_opt
new-declarator := noptr-new-declarator
noptr-new-declarator := [ expression_opt ]
noptr-new-declarator := noptr-new-declarator [ constant-expression ]
new-initializer := ( expression-list_opt )
new-initializer := braced-init-list
delete-expression := ::_opt DELETE cast-expression
delete-expression := ::_opt DELETE [ ] cast-expression
cast-expression := unary-expression
cast-expression := ( type-id ) cast-expression
# expr.mptr.oper
pm-expression := cast-expression
pm-expression := pm-expression .* cast-expression
pm-expression := pm-expression ->* cast-expression
# expr.mul
multiplicative-expression := pm-expression
multiplicative-expression := multiplicative-expression * pm-expression
multiplicative-expression := multiplicative-expression / pm-expression
multiplicative-expression := multiplicative-expression % pm-expression
# expr.add
additive-expression := multiplicative-expression
additive-expression := additive-expression + multiplicative-expression
additive-expression := additive-expression - multiplicative-expression
# expr.shift
shift-expression := additive-expression
shift-expression := shift-expression << additive-expression
shift-expression := shift-expression >> additive-expression
# expr.spaceship
compare-expression := shift-expression
compare-expression := compare-expression <=> shift-expression
# expr.rel
relational-expression := compare-expression
relational-expression := relational-expression < compare-expression
relational-expression := relational-expression > compare-expression
relational-expression := relational-expression <= compare-expression
relational-expression := relational-expression >= compare-expression
# expr.eq
equality-expression := relational-expression
equality-expression := equality-expression == relational-expression
equality-expression := equality-expression != relational-expression
# expr.bit.and
and-expression := equality-expression
and-expression := and-expression & equality-expression
# expr.xor
exclusive-or-expression := and-expression
exclusive-or-expression := exclusive-or-expression ^ and-expression
# expr.or
inclusive-or-expression := exclusive-or-expression
inclusive-or-expression := inclusive-or-expression | exclusive-or-expression
# expr.log.and
logical-and-expression := inclusive-or-expression
logical-and-expression := logical-and-expression && inclusive-or-expression
# expr.log.or
logical-or-expression := logical-and-expression
logical-or-expression := logical-or-expression || logical-and-expression
# expr.cond
conditional-expression := logical-or-expression
conditional-expression := logical-or-expression ? expression : assignment-expression
# expr.ass
yield-expression := CO_YIELD assignment-expression
yield-expression := CO_YIELD braced-init-list
throw-expression := THROW assignment-expression_opt
assignment-expression := conditional-expression
assignment-expression := yield-expression
assignment-expression := throw-expression
assignment-expression := logical-or-expression assignment-operator initializer-clause
assignment-operator := =
assignment-operator := *=
assignment-operator := /=
assignment-operator := %=
assignment-operator := +=
assignment-operator := -=
assignment-operator := >>=
assignment-operator := <<=
assignment-operator := &=
assignment-operator := ^=
assignment-operator := |=
# expr.comma
expression := assignment-expression
expression := expression , assignment-expression
# expr.const
constant-expression := conditional-expression
# gram.stmt
statement := labeled-statement
statement := expression-statement
statement := compound-statement
statement := selection-statement
statement := iteration-statement
statement := jump-statement
statement := declaration-statement
statement := try-block
init-statement := expression-statement
init-statement := simple-declaration
condition := expression
condition := decl-specifier-seq declarator brace-or-equal-initializer
labeled-statement := IDENTIFIER : statement
labeled-statement := CASE constant-expression : statement
labeled-statement := DEFAULT : statement
expression-statement := expression_opt ;
compound-statement := { statement-seq_opt }
statement-seq := statement
statement-seq := statement-seq statement
selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement
selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement ELSE statement
selection-statement := SWITCH ( init-statement_opt condition ) statement
iteration-statement := WHILE ( condition ) statement
iteration-statement := DO statement WHILE ( expression ) ;
iteration-statement := FOR ( init-statement condition_opt ; expression_opt ) statement
iteration-statement := FOR ( init-statement_opt for-range-declaration : for-range-initializer ) statement
for-range-declaration := decl-specifier-seq declarator
for-range-declaration := decl-specifier-seq ref-qualifier_opt [ identifier-list ]
for-range-initializer := expr-or-braced-init-list
jump-statement := BREAK ;
jump-statement := CONTINUE ;
jump-statement := RETURN expr-or-braced-init-list_opt ;
jump-statement := coroutine-return-statement
jump-statement := GOTO IDENTIFIER ;
coroutine-return-statement := CO_RETURN expr-or-braced-init-list_opt ;
declaration-statement := block-declaration
# gram.dcl
declaration-seq := declaration
declaration-seq := declaration-seq declaration
declaration := block-declaration
declaration := nodeclspec-function-declaration
declaration := function-definition
declaration := template-declaration
declaration := deduction-guide
declaration := explicit-instantiation
declaration := explicit-specialization
declaration := export-declaration
declaration := linkage-specification
declaration := namespace-definition
declaration := empty-declaration
declaration := module-import-declaration
block-declaration := simple-declaration
block-declaration := asm-declaration
block-declaration := namespace-alias-definition
block-declaration := using-declaration
block-declaration := using-enum-declaration
block-declaration := using-directive
block-declaration := static_assert-declaration
block-declaration := alias-declaration
block-declaration := opaque-enum-declaration
nodeclspec-function-declaration := declarator ;
alias-declaration := USING IDENTIFIER = defining-type-id ;
simple-declaration := decl-specifier-seq init-declarator-list_opt ;
simple-declaration := decl-specifier-seq ref-qualifier_opt [ identifier-list ] initializer ;
static_assert-declaration := STATIC_ASSERT ( constant-expression ) ;
static_assert-declaration := STATIC_ASSERT ( constant-expression , string-literal ) ;
empty-declaration := ;
# dcl.spec
decl-specifier := storage-class-specifier
decl-specifier := defining-type-specifier
decl-specifier := function-specifier
decl-specifier := FRIEND
decl-specifier := TYPEDEF
decl-specifier := CONSTEXPR
decl-specifier := CONSTEVAL
decl-specifier := CONSTINIT
decl-specifier := INLINE
decl-specifier-seq := decl-specifier
decl-specifier-seq := decl-specifier decl-specifier-seq
storage-class-specifier := STATIC
storage-class-specifier := THREAD_LOCAL
storage-class-specifier := EXTERN
storage-class-specifier := MUTABLE
function-specifier := VIRTUAL
function-specifier := explicit-specifier
explicit-specifier := EXPLICIT ( constant-expression )
explicit-specifier := EXPLICIT
type-specifier := simple-type-specifier
type-specifier := elaborated-type-specifier
type-specifier := typename-specifier
type-specifier := cv-qualifier
type-specifier-seq := type-specifier
type-specifier-seq := type-specifier type-specifier-seq
defining-type-specifier := type-specifier
defining-type-specifier := class-specifier
defining-type-specifier := enum-specifier
defining-type-specifier-seq := defining-type-specifier
defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq
simple-type-specifier := nested-name-specifier_opt type-name
simple-type-specifier := nested-name-specifier TEMPLATE simple-template-id
simple-type-specifier := decltype-specifier
simple-type-specifier := placeholder-type-specifier
simple-type-specifier := nested-name-specifier_opt template-name
simple-type-specifier := CHAR
simple-type-specifier := CHAR8_T
simple-type-specifier := CHAR16_T
simple-type-specifier := CHAR32_T
simple-type-specifier := WCHAR_T
simple-type-specifier := BOOL
simple-type-specifier := SHORT
simple-type-specifier := INT
simple-type-specifier := LONG
simple-type-specifier := SIGNED
simple-type-specifier := UNSIGNED
simple-type-specifier := FLOAT
simple-type-specifier := DOUBLE
simple-type-specifier := VOID
type-name := class-name
type-name := enum-name
type-name := typedef-name
elaborated-type-specifier := class-key nested-name-specifier_opt IDENTIFIER
elaborated-type-specifier := class-key simple-template-id
elaborated-type-specifier := class-key nested-name-specifier TEMPLATE_opt simple-template-id
elaborated-type-specifier := elaborated-enum-specifier
elaborated-enum-specifier := ENUM nested-name-specifier_opt IDENTIFIER
decltype-specifier := DECLTYPE ( expression )
placeholder-type-specifier := type-constraint_opt AUTO
placeholder-type-specifier := type-constraint_opt DECLTYPE ( AUTO )
init-declarator-list := init-declarator
init-declarator-list := init-declarator-list , init-declarator
init-declarator := declarator initializer_opt
init-declarator := declarator requires-clause
declarator := ptr-declarator
declarator := noptr-declarator parameters-and-qualifiers trailing-return-type
ptr-declarator := noptr-declarator
ptr-declarator := ptr-operator ptr-declarator
noptr-declarator := declarator-id
noptr-declarator := noptr-declarator parameters-and-qualifiers
noptr-declarator := noptr-declarator [ constant-expression_opt ]
noptr-declarator := ( ptr-declarator )
parameters-and-qualifiers := ( parameter-declaration-list_opt ) cv-qualifier-seq_opt ref-qualifier_opt noexcept-specifier_opt
trailing-return-type := -> type-id
ptr-operator := * cv-qualifier-seq_opt
ptr-operator := &
ptr-operator := &&
ptr-operator := nested-name-specifier * cv-qualifier-seq_opt
cv-qualifier-seq := cv-qualifier cv-qualifier-seq_opt
cv-qualifier := CONST
cv-qualifier := VOLATILE
ref-qualifier := &
ref-qualifier := &&
declarator-id := ..._opt id-expression
type-id := type-specifier-seq abstract-declarator_opt
defining-type-id := defining-type-specifier-seq abstract-declarator_opt
abstract-declarator := ptr-abstract-declarator
abstract-declarator := noptr-abstract-declarator_opt parameters-and-qualifiers trailing-return-type
abstract-declarator := abstract-pack-declarator
ptr-abstract-declarator := noptr-abstract-declarator
ptr-abstract-declarator := ptr-operator ptr-abstract-declarator_opt
noptr-abstract-declarator := noptr-abstract-declarator_opt parameters-and-qualifiers
noptr-abstract-declarator := noptr-abstract-declarator_opt [ constant-expression ]
noptr-abstract-declarator := ( ptr-abstract-declarator )
abstract-pack-declarator := noptr-abstract-pack-declarator
abstract-pack-declarator := ptr-operator abstract-pack-declarator
noptr-abstract-pack-declarator := noptr-abstract-pack-declarator parameters-and-qualifiers
noptr-abstract-pack-declarator := noptr-abstract-pack-declarator [ constant-expression_opt ]
noptr-abstract-pack-declarator := ...
#! Custom modifications to avoid nullable clause.
parameter-declaration-clause := parameter-declaration-list
parameter-declaration-clause := parameter-declaration-list_opt ...
parameter-declaration-clause := parameter-declaration-list , ...
parameter-declaration-list := parameter-declaration
parameter-declaration-list := parameter-declaration-list , parameter-declaration
parameter-declaration := decl-specifier-seq declarator
parameter-declaration := decl-specifier-seq declarator = initializer-clause
parameter-declaration := decl-specifier-seq abstract-declarator_opt
parameter-declaration := decl-specifier-seq abstract-declarator_opt = initializer-clause
# dcl.init
initializer := brace-or-equal-initializer
initializer := ( expression-list )
brace-or-equal-initializer := = initializer-clause
brace-or-equal-initializer := braced-init-list
initializer-clause := assignment-expression
initializer-clause := braced-init-list
braced-init-list := { initializer-list ,_opt }
braced-init-list := { designated-initializer-list ,_opt }
braced-init-list := { }
initializer-list := initializer-clause ..._opt
initializer-list := initializer-list , initializer-clause ..._opt
designated-initializer-list := designated-initializer-clause
designated-initializer-list := designated-initializer-list , designated-initializer-clause
designated-initializer-clause := designator brace-or-equal-initializer
designator := . IDENTIFIER
expr-or-braced-init-list := expression
expr-or-braced-init-list := braced-init-list
# dcl.fct
function-definition := decl-specifier-seq_opt declarator virt-specifier-seq_opt function-body
function-definition := decl-specifier-seq_opt declarator requires-clause function-body
function-body := ctor-initializer_opt compound-statement
function-body := function-try-block
function-body := = DEFAULT ;
function-body := = DELETE ;
# dcl.enum
enum-specifier := enum-head { enumerator-list_opt }
enum-specifier := enum-head { enumerator-list , }
enum-head := enum-key enum-head-name_opt enum-base_opt
enum-head-name := nested-name-specifier_opt IDENTIFIER
opaque-enum-declaration := enum-key enum-head-name enum-base_opt ;
enum-key := ENUM
enum-key := ENUM CLASS
enum-key := ENUM STRUCT
enum-base := : type-specifier-seq
enumerator-list := enumerator-definition
enumerator-list := enumerator-list , enumerator-definition
enumerator-definition := enumerator
enumerator-definition := enumerator = constant-expression
enumerator := IDENTIFIER
using-enum-declaration := USING elaborated-enum-specifier ;
# basic.namespace
namespace-definition := named-namespace-definition
namespace-definition := unnamed-namespace-definition
namespace-definition := nested-namespace-definition
named-namespace-definition := INLINE_opt NAMESPACE IDENTIFIER { namespace-body_opt }
unnamed-namespace-definition := INLINE_opt NAMESPACE { namespace-body_opt }
nested-namespace-definition := NAMESPACE enclosing-namespace-specifier :: INLINE_opt IDENTIFIER { namespace-body }
enclosing-namespace-specifier := IDENTIFIER
enclosing-namespace-specifier := enclosing-namespace-specifier :: INLINE_opt IDENTIFIER
#! Custom modification to avoid nullable namespace-body.
namespace-body := declaration-seq
namespace-alias-definition := NAMESPACE IDENTIFIER = qualified-namespace-specifier ;
qualified-namespace-specifier := nested-name-specifier_opt namespace-name
using-directive := USING NAMESPACE nested-name-specifier_opt namespace-name ;
using-declaration := USING using-declarator-list ;
using-declarator-list := using-declarator ..._opt
using-declarator-list := using-declarator-list , using-declarator ..._opt
using-declarator := TYPENAME_opt nested-name-specifier unqualified-id
# dcl.asm
asm-declaration := ASM ( string-literal ) ;
# dcl.link
linkage-specification := EXTERN string-literal { declaration-seq_opt }
linkage-specification := EXTERN string-literal declaration
# gram.module
module-declaration := export-keyword_opt module-keyword module-name module-partition_opt
module-name := module-name-qualifier_opt IDENTIFIER
module-partition := : module-name-qualifier_opt IDENTIFIER
module-name-qualifier := IDENTIFIER .
module-name-qualifier := module-name-qualifier IDENTIFIER .
export-declaration := EXPORT declaration
export-declaration := EXPORT ( declaration-seq_opt )
export-declaration := export-keyword module-import-declaration
module-import-declaration := import-keyword module-name
module-import-declaration := import-keyword module-partition
# FIXME: we don't have header-name in the grammar. Handle these in PP?
# module-import-declaration := import-keyword header-name
global-module-fragment := module-keyword ; declaration-seq_opt
private-module-fragment := module-keyword : PRIVATE ; declaration-seq_opt
# gram.class
class-specifier := class-head { member-specification_opt }
class-head := class-key class-head-name class-virt-specifier_opt base-clause_opt
class-head := class-key base-clause_opt
class-head-name := nested-name-specifier_opt class-name
class-virt-specifier := contextual-final
class-key := CLASS
class-key := STRUCT
class-key := UNION
member-specification := member-declaration member-specification_opt
member-specification := access-specifier : member-declaration member-specification_opt
member-declaration := decl-specifier-seq_opt member-declarator-list_opt ;
member-declaration := function-definition
member-declaration := using-declaration
member-declaration := using-enum-declaration
member-declaration := static_assert-declaration
member-declaration := template-declaration
member-declaration := explicit-specialization
member-declaration := deduction-guide
member-declaration := alias-declaration
member-declaration := opaque-enum-declaration
member-declaration := empty-declaration
member-declarator-list := member-declarator
member-declarator-list := member-declarator-list , member-declarator
member-declarator := declarator virt-specifier-seq_opt pure-specifier_opt
member-declarator := declarator requires-clause
member-declarator := declarator brace-or-equal-initializer
member-declarator := IDENTIFIER_opt : constant-expression brace-or-equal-initializer_opt
virt-specifier-seq := virt-specifier
virt-specifier-seq := virt-specifier-seq virt-specifier
virt-specifier := contextual-override
virt-specifier := contextual-final
pure-specifier := = contextual-zero
conversion-function-id := OPERATOR conversion-type-id
conversion-type-id := type-specifier-seq conversion-declarator_opt
conversion-declarator := ptr-operator conversion-declarator_opt
base-clause := : base-specifier-list
base-specifier-list := base-specifier ..._opt
base-specifier-list := base-specifier-list , base-specifier ..._opt
base-specifier := class-or-decltype
base-specifier := VIRTUAL access-specifier_opt class-or-decltype
base-specifier := access-specifier VIRTUAL_opt class-or-decltype
class-or-decltype := nested-name-specifier_opt type-name
class-or-decltype := nested-name-specifier TEMPLATE simple-template-id
class-or-decltype := decltype-specifier
access-specifier := PRIVATE
access-specifier := PROTECTED
access-specifier := PUBLIC
ctor-initializer := : mem-initializer-list
mem-initializer-list := mem-initializer ..._opt
mem-initializer-list := mem-initializer-list , mem-initializer ..._opt
mem-initializer := mem-initializer-id ( expression-list_opt )
mem-initializer := mem-initializer-id braced-init-list
mem-initializer-id := class-or-decltype
mem-initializer-id := IDENTIFIER
# gram.over
operator-function-id := OPERATOR operator-name
operator-name := NEW
operator-name := DELETE
operator-name := NEW [ ]
operator-name := DELETE [ ]
operator-name := CO_AWAIT
operator-name := ( )
operator-name := [ ]
operator-name := ->
operator-name := ->*
operator-name := ~
operator-name := !
operator-name := +
operator-name := -
operator-name := *
operator-name := /
operator-name := %
operator-name := ^
operator-name := &
operator-name := |
operator-name := =
operator-name := +=
operator-name := -=
operator-name := *=
operator-name := /=
operator-name := %=
operator-name := ^=
operator-name := &=
operator-name := |=
operator-name := ==
operator-name := !=
operator-name := <
operator-name := >
operator-name := <=
operator-name := >=
operator-name := <=>
operator-name := ^^
operator-name := ||
operator-name := <<
operator-name := >>
operator-name := <<=
operator-name := >>=
operator-name := ++
operator-name := --
operator-name := ,
literal-operator-id := OPERATOR string-literal IDENTIFIER
literal-operator-id := OPERATOR user-defined-string-literal
# gram.temp
template-declaration := template-head declaration
template-declaration := template-head concept-definition
template-head := TEMPLATE < template-parameter-list > requires-clause_opt
template-parameter-list := template-parameter
template-parameter-list := template-parameter-list , template-parameter
requires-clause := REQUIRES constraint-logical-or-expression
constraint-logical-or-expression := constraint-logical-and-expression
constraint-logical-or-expression := constraint-logical-or-expression || constraint-logical-and-expression
constraint-logical-and-expression := primary-expression
constraint-logical-and-expression := constraint-logical-and-expression && primary-expression
template-parameter := type-parameter
template-parameter := parameter-declaration
type-parameter := type-parameter-key ..._opt IDENTIFIER
type-parameter := type-parameter-key IDENTIFIER_opt = type-id
type-parameter := type-constraint ..._opt IDENTIFIER_opt
type-parameter := type-constraint IDENTIFIER_opt = type-id
type-parameter := template-head type-parameter-key ..._opt IDENTIFIER_opt
type-parameter := template-head type-parameter-key IDENTIFIER_opt = id-expression
type-parameter-key := CLASS
type-parameter-key := TYPENAME
type-constraint := nested-name-specifier_opt concept-name
type-constraint := nested-name-specifier_opt concept-name < template-argument-list_opt >
simple-template-id := template-name < template-argument-list_opt >
template-id := simple-template-id
template-id := operator-function-id < template-argument-list_opt >
template-id := literal-operator-id < template-argument-list_opt >
template-argument-list := template-argument ..._opt
template-argument-list := template-argument-list , template-argument ..._opt
template-argument := constant-expression
template-argument := type-id
template-argument := id-expression
constraint-expression := logical-or-expression
deduction-guide := explicit-specifier_opt template-name ( parameter-declaration-list_opt ) -> simple-template-id ;
concept-definition := CONCEPT concept-name = constraint-expression ;
concept-name := IDENTIFIER
typename-specifier := TYPENAME nested-name-specifier IDENTIFIER
typename-specifier := TYPENAME nested-name-specifier TEMPLATE_opt simple-template-id
explicit-instantiation := EXTERN_opt TEMPLATE declaration
explicit-specialization := TEMPLATE < > declaration
# gram.except
try-block := TRY compound-statement handler-seq
function-try-block := TRY ctor-initializer_opt compound-statement handler-seq
handler-seq := handler handler-seq_opt
handler := CATCH ( exception-declaration ) compound-statement
exception-declaration := type-specifier-seq declarator
exception-declaration := type-specifier-seq abstract-declarator_opt
noexcept-specifier := NOEXCEPT ( constant-expression )
noexcept-specifier := NOEXCEPT
# gram.cpp
identifier-list := IDENTIFIER
identifier-list := identifier-list , IDENTIFIER
# gram.lex
#! As we use clang lexer, most of lexical symbols are not needed, we only add
#! needed literals.
literal := integer-literal
literal := character-literal
literal := floating-point-literal
literal := string-literal
literal := boolean-literal
literal := pointer-literal
literal := user-defined-literal
integer-literal := NUMERIC_CONSTANT
character-literal := CHAR_CONSTANT
character-literal := WIDE_CHAR_CONSTANT
character-literal := UTF8_CHAR_CONSTANT
character-literal := UTF16_CHAR_CONSTANT
character-literal := UTF32_CHAR_CONSTANT
floating-point-literal := NUMERIC_CONSTANT
string-literal-chunk := STRING_LITERAL
string-literal-chunk := WIDE_STRING_LITERAL
string-literal-chunk := UTF8_STRING_LITERAL
string-literal-chunk := UTF16_STRING_LITERAL
string-literal-chunk := UTF32_STRING_LITERAL
#! Technically, string concatenation happens at phase 6 which is before parsing,
#! so it doesn't belong to the grammar. However, we extend the grammar to
#! support it, to make the pseudo parser fully functional on practical code.
string-literal := string-literal-chunk
string-literal := string-literal string-literal-chunk
user-defined-literal := user-defined-integer-literal
user-defined-literal := user-defined-floating-point-literal
user-defined-literal := user-defined-string-literal
user-defined-literal := user-defined-character-literal
user-defined-integer-literal := NUMERIC_CONSTANT
user-defined-string-literal-chunk := STRING_LITERAL
user-defined-string-literal-chunk := WIDE_STRING_LITERAL
user-defined-string-literal-chunk := UTF8_STRING_LITERAL
user-defined-string-literal-chunk := UTF16_STRING_LITERAL
user-defined-string-literal-chunk := UTF32_STRING_LITERAL
user-defined-string-literal := user-defined-string-literal-chunk
user-defined-string-literal := string-literal-chunk user-defined-string-literal
user-defined-string-literal := user-defined-string-literal string-literal-chunk
user-defined-floating-point-literal := NUMERIC_CONSTANT
user-defined-character-literal := CHAR_CONSTANT
user-defined-character-literal := WIDE_CHAR_CONSTANT
user-defined-character-literal := UTF8_CHAR_CONSTANT
user-defined-character-literal := UTF16_CHAR_CONSTANT
user-defined-character-literal := UTF32_CHAR_CONSTANT
boolean-literal := FALSE
boolean-literal := TRUE
pointer-literal := NULLPTR
#! Contextual keywords -- clang lexer always lexes them as identifier tokens.
#! Placeholders for literal text in the grammar that lex as other things.
contextual-override := IDENTIFIER
contextual-final := IDENTIFIER
contextual-zero := NUMERIC_CONSTANT
module-keyword := IDENTIFIER
import-keyword := IDENTIFIER
export-keyword := IDENTIFIER

View File

@ -76,6 +76,7 @@ list(APPEND CLANG_TEST_DEPS
clang-repl clang-repl
clang-diff clang-diff
clang-scan-deps clang-scan-deps
clang-pseudo
diagtool diagtool
hmaptool hmaptool
) )

View File

@ -0,0 +1,2 @@
// verify clang/lib/Tooling/Syntax/Pseudo/cxx.bnf
// RUN: clang-pseudo -check-grammar=%cxx-bnf-file

View File

@ -0,0 +1,4 @@
cxx_bnf_file = os.path.join(config.clang_src_dir, 'lib', 'Tooling', 'Syntax',
'Pseudo', 'cxx.bnf')
config.substitutions.append(('%cxx-bnf-file',
'%s' % (cxx_bnf_file)))

View File

@ -14,6 +14,7 @@ add_clang_subdirectory(clang-offload-bundler)
add_clang_subdirectory(clang-offload-wrapper) add_clang_subdirectory(clang-offload-wrapper)
add_clang_subdirectory(clang-scan-deps) add_clang_subdirectory(clang-scan-deps)
add_clang_subdirectory(clang-repl) add_clang_subdirectory(clang-repl)
add_clang_subdirectory(clang-pseudo)
add_clang_subdirectory(c-index-test) add_clang_subdirectory(c-index-test)

View File

@ -0,0 +1,15 @@
set(LLVM_LINK_COMPONENTS support)
add_clang_tool(clang-pseudo
ClangPseudo.cpp
)
set(CLANG_PSEUDO_LIB_DEPS
clangBasic
clangToolingSyntaxPseudo
)
clang_target_link_libraries(clang-pseudo
PRIVATE
${CLANG_PSEUDO_LIB_DEPS}
)

View File

@ -0,0 +1,47 @@
//===-- ClangPseudo.cpp - Clang pseudo parser tool ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Syntax/Pseudo/Grammar.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
using clang::syntax::pseudo::Grammar;
using llvm::cl::desc;
using llvm::cl::init;
using llvm::cl::opt;
static opt<std::string>
CheckGrammar("check-grammar", desc("Parse and check a BNF grammar file."),
init(""));
int main(int argc, char *argv[]) {
llvm::cl::ParseCommandLineOptions(argc, argv, "");
if (CheckGrammar.getNumOccurrences()) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
llvm::MemoryBuffer::getFile(CheckGrammar);
if (std::error_code EC = Text.getError()) {
llvm::errs() << "Error: can't read grammar file '" << CheckGrammar
<< "': " << EC.message() << "\n";
return 1;
}
std::vector<std::string> Diags;
auto RSpecs = Grammar::parseBNF(Text.get()->getBuffer(), Diags);
if (!Diags.empty()) {
llvm::errs() << llvm::join(Diags, "\n");
return 2;
}
llvm::errs() << llvm::formatv("grammar file {0} is parsed successfully\n",
CheckGrammar);
return 0;
}
return 0;
}