mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-27 23:51:56 +00:00
Lexing support for user-defined literals. Currently these lex as the same token
kinds as the underlying string literals, and we silently drop the ud-suffix; those issues will be fixed by subsequent patches. llvm-svn: 152012
This commit is contained in:
parent
74226d3597
commit
e18f0faff2
@ -133,6 +133,9 @@ def warn_cxx98_compat_unicode_literal : Warning<
|
||||
InGroup<CXX98Compat>, DefaultIgnore;
|
||||
def err_unsupported_string_concat : Error<
|
||||
"unsupported non-standard concatenation of string literals">;
|
||||
def err_string_concat_mixed_suffix : Error<
|
||||
"differing user-defined suffixes ('%0' and '%1') in string literal "
|
||||
"concatenation">;
|
||||
def err_bad_string_encoding : Error<
|
||||
"illegal character encoding in string literal">;
|
||||
def warn_bad_string_encoding : ExtWarn<
|
||||
|
@ -530,6 +530,8 @@ private:
|
||||
// Other lexer functions.
|
||||
|
||||
void SkipBytes(unsigned Bytes, bool StartOfLine);
|
||||
|
||||
const char *LexUDSuffix(Token &Result, const char *CurPtr);
|
||||
|
||||
// Helper functions to lex the remainder of a token of the specific type.
|
||||
void LexIdentifier (Token &Result, const char *CurPtr);
|
||||
|
@ -128,6 +128,7 @@ class CharLiteralParser {
|
||||
tok::TokenKind Kind;
|
||||
bool IsMultiChar;
|
||||
bool HadError;
|
||||
SmallString<32> UDSuffixBuf;
|
||||
public:
|
||||
CharLiteralParser(const char *begin, const char *end,
|
||||
SourceLocation Loc, Preprocessor &PP,
|
||||
@ -140,6 +141,7 @@ public:
|
||||
bool isUTF32() const { return Kind == tok::utf32_char_constant; }
|
||||
bool isMultiChar() const { return IsMultiChar; }
|
||||
uint64_t getValue() const { return Value; }
|
||||
StringRef getUDSuffix() const { return UDSuffixBuf; }
|
||||
};
|
||||
|
||||
/// StringLiteralParser - This decodes string escape characters and performs
|
||||
@ -157,6 +159,7 @@ class StringLiteralParser {
|
||||
tok::TokenKind Kind;
|
||||
SmallString<512> ResultBuf;
|
||||
char *ResultPtr; // cursor
|
||||
SmallString<32> UDSuffixBuf;
|
||||
public:
|
||||
StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
|
||||
Preprocessor &PP, bool Complain = true);
|
||||
@ -196,6 +199,8 @@ public:
|
||||
bool isUTF32() const { return Kind == tok::utf32_string_literal; }
|
||||
bool isPascal() const { return Pascal; }
|
||||
|
||||
StringRef getUDSuffix() const { return UDSuffixBuf; }
|
||||
|
||||
private:
|
||||
void init(const Token *StringToks, unsigned NumStringToks);
|
||||
bool CopyStringFragment(StringRef Fragment);
|
||||
|
@ -1078,6 +1078,12 @@ static void InitCharacterInfo() {
|
||||
}
|
||||
|
||||
|
||||
/// isIdentifierHead - Return true if this is the first character of an
|
||||
/// identifier, which is [a-zA-Z_].
|
||||
static inline bool isIdentifierHead(unsigned char c) {
|
||||
return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false;
|
||||
}
|
||||
|
||||
/// isIdentifierBody - Return true if this is the body character of an
|
||||
/// identifier, which is [a-zA-Z0-9_].
|
||||
static inline bool isIdentifierBody(unsigned char c) {
|
||||
@ -1543,7 +1549,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
|
||||
unsigned Size;
|
||||
char C = getCharAndSize(CurPtr, Size);
|
||||
char PrevCh = 0;
|
||||
while (isNumberBody(C)) { // FIXME: UCNs?
|
||||
while (isNumberBody(C)) { // FIXME: UCNs.
|
||||
CurPtr = ConsumeChar(CurPtr, Size, Result);
|
||||
PrevCh = C;
|
||||
C = getCharAndSize(CurPtr, Size);
|
||||
@ -1567,6 +1573,23 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
|
||||
Result.setLiteralData(TokStart);
|
||||
}
|
||||
|
||||
/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes
|
||||
/// in C++11.
|
||||
const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) {
|
||||
assert(getFeatures().CPlusPlus0x && "ud-suffix only exists in C++11");
|
||||
|
||||
// Maximally munch an identifier. FIXME: UCNs.
|
||||
unsigned Size;
|
||||
char C = getCharAndSize(CurPtr, Size);
|
||||
if (isIdentifierHead(C)) {
|
||||
do {
|
||||
CurPtr = ConsumeChar(CurPtr, Size, Result);
|
||||
C = getCharAndSize(CurPtr, Size);
|
||||
} while (isIdentifierBody(C));
|
||||
}
|
||||
return CurPtr;
|
||||
}
|
||||
|
||||
/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
|
||||
/// either " or L" or u8" or u" or U".
|
||||
void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
|
||||
@ -1606,6 +1629,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
|
||||
C = getAndAdvanceChar(CurPtr, Result);
|
||||
}
|
||||
|
||||
// If we are in C++11, lex the optional ud-suffix.
|
||||
if (getFeatures().CPlusPlus0x)
|
||||
CurPtr = LexUDSuffix(Result, CurPtr);
|
||||
|
||||
// If a nul character existed in the string, warn about it.
|
||||
if (NulCharacter && !isLexingRawMode())
|
||||
Diag(NulCharacter, diag::null_in_string);
|
||||
@ -1685,6 +1712,10 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
|
||||
}
|
||||
}
|
||||
|
||||
// If we are in C++11, lex the optional ud-suffix.
|
||||
if (getFeatures().CPlusPlus0x)
|
||||
CurPtr = LexUDSuffix(Result, CurPtr);
|
||||
|
||||
// Update the location of token as well as BufferPtr.
|
||||
const char *TokStart = BufferPtr;
|
||||
FormTokenWithChars(Result, CurPtr, Kind);
|
||||
@ -1768,6 +1799,10 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
|
||||
C = getAndAdvanceChar(CurPtr, Result);
|
||||
}
|
||||
|
||||
// If we are in C++11, lex the optional ud-suffix.
|
||||
if (getFeatures().CPlusPlus0x)
|
||||
CurPtr = LexUDSuffix(Result, CurPtr);
|
||||
|
||||
// If a nul character existed in the character, warn about it.
|
||||
if (NulCharacter && !isLexingRawMode())
|
||||
Diag(NulCharacter, diag::null_in_char);
|
||||
|
@ -731,7 +731,11 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
|
||||
}
|
||||
|
||||
|
||||
/// character-literal: [C++0x lex.ccon]
|
||||
/// user-defined-character-literal: [C++11 lex.ext]
|
||||
/// character-literal ud-suffix
|
||||
/// ud-suffix:
|
||||
/// identifier
|
||||
/// character-literal: [C++11 lex.ccon]
|
||||
/// ' c-char-sequence '
|
||||
/// u' c-char-sequence '
|
||||
/// U' c-char-sequence '
|
||||
@ -744,7 +748,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
|
||||
/// backslash \, or new-line character
|
||||
/// escape-sequence
|
||||
/// universal-character-name
|
||||
/// escape-sequence: [C++0x lex.ccon]
|
||||
/// escape-sequence:
|
||||
/// simple-escape-sequence
|
||||
/// octal-escape-sequence
|
||||
/// hexadecimal-escape-sequence
|
||||
@ -757,7 +761,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
|
||||
/// hexadecimal-escape-sequence:
|
||||
/// \x hexadecimal-digit
|
||||
/// hexadecimal-escape-sequence hexadecimal-digit
|
||||
/// universal-character-name:
|
||||
/// universal-character-name: [C++11 lex.charset]
|
||||
/// \u hex-quad
|
||||
/// \U hex-quad hex-quad
|
||||
/// hex-quad:
|
||||
@ -780,8 +784,17 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
|
||||
assert(begin[0] == '\'' && "Invalid token lexed");
|
||||
++begin;
|
||||
|
||||
// Remove an optional ud-suffix.
|
||||
if (end[-1] != '\'') {
|
||||
const char *UDSuffixEnd = end;
|
||||
do {
|
||||
--end;
|
||||
} while (end[-1] != '\'');
|
||||
UDSuffixBuf.assign(end, UDSuffixEnd);
|
||||
}
|
||||
|
||||
// Trim the ending quote.
|
||||
assert(end[-1] == '\'' && "Invalid token lexed");
|
||||
assert(end != begin && "Invalid token lexed");
|
||||
--end;
|
||||
|
||||
// FIXME: The "Value" is an uint64_t so we can handle char literals of
|
||||
@ -1071,6 +1084,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
|
||||
|
||||
Pascal = false;
|
||||
|
||||
SourceLocation UDSuffixTokLoc;
|
||||
|
||||
for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
|
||||
const char *ThisTokBuf = &TokenBuf[0];
|
||||
// Get the spelling of the token, which eliminates trigraphs, etc. We know
|
||||
@ -1085,7 +1100,39 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
|
||||
continue;
|
||||
}
|
||||
|
||||
const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
|
||||
const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
|
||||
|
||||
// Remove an optional ud-suffix.
|
||||
if (ThisTokEnd[-1] != '"') {
|
||||
const char *UDSuffixEnd = ThisTokEnd;
|
||||
do {
|
||||
--ThisTokEnd;
|
||||
} while (ThisTokEnd[-1] != '"');
|
||||
|
||||
StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
|
||||
|
||||
if (UDSuffixBuf.empty()) {
|
||||
UDSuffixBuf.assign(UDSuffix);
|
||||
UDSuffixTokLoc = StringToks[i].getLocation();
|
||||
} else if (!UDSuffixBuf.equals(UDSuffix)) {
|
||||
// C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
|
||||
// result of a concatenation involving at least one user-defined-string-
|
||||
// literal, all the participating user-defined-string-literals shall
|
||||
// have the same ud-suffix.
|
||||
if (Diags) {
|
||||
SourceLocation TokLoc = StringToks[i].getLocation();
|
||||
Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
|
||||
<< UDSuffixBuf << UDSuffix
|
||||
<< SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
|
||||
<< SourceRange(TokLoc, TokLoc);
|
||||
}
|
||||
hadError = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Strip the end quote.
|
||||
--ThisTokEnd;
|
||||
|
||||
// TODO: Input character set mapping support.
|
||||
|
||||
// Skip marker for wide or unicode strings.
|
||||
|
@ -1,7 +1,7 @@
|
||||
// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s
|
||||
|
||||
int * operator "" p31(long double); // expected-warning{{user-defined literal with suffix 'p31' is preempted by C99 hexfloat extension}}
|
||||
long double operator "" _p31(long double);
|
||||
void operator "" p31(long double); // expected-warning{{user-defined literal with suffix 'p31' is preempted by C99 hexfloat extension}}
|
||||
void operator "" _p31(long double);
|
||||
long double operator "" pi(long double); // expected-warning{{user-defined literals not starting with '_' are reserved by the implementation}}
|
||||
|
||||
float hexfloat = 0x1p31; // allow hexfloats
|
||||
|
@ -7,9 +7,13 @@ template<char...> void operator "" _a();
|
||||
template<char... C> S<C...> operator "" _a();
|
||||
|
||||
template<typename T> struct U {
|
||||
friend int operator "" _a(const char *, size_t);
|
||||
// FIXME: It's not entirely clear whether this is intended to be legal.
|
||||
friend U operator "" _a(const T *, size_t); // expected-error {{parameter}}
|
||||
};
|
||||
template<char...> struct V {
|
||||
friend void operator "" _b(); // expected-error {{parameter}}
|
||||
};
|
||||
|
||||
template<char... C, int N = 0> void operator "" _b(); // expected-error {{parameter}}
|
||||
template<char... C> void operator "" _b(int N = 0); // expected-error {{parameter}}
|
||||
|
@ -9,8 +9,8 @@ void operator "" _km(long double); // ok
|
||||
string operator "" _i18n(const char*, std::size_t); // ok
|
||||
// FIXME: This should be accepted once we support UCNs
|
||||
template<char...> int operator "" \u03C0(); // ok, UCN for lowercase pi // expected-error {{expected identifier}}
|
||||
// FIXME: This should be rejected once we lex user-defined literal suffices
|
||||
float operator ""E(const char *); // expected-warning {{hexfloat}}
|
||||
// FIXME: Accept this as an extension, with a fix-it to add the space
|
||||
float operator ""E(const char *); // expected-error {{must be '""'}} expected-error {{expected identifier}}
|
||||
float operator " " B(const char *); // expected-error {{must be '""'}} expected-warning {{hexfloat}}
|
||||
string operator "" 5X(const char *, std::size_t); // expected-error {{expected identifier}}
|
||||
double operator "" _miles(double); // expected-error {{parameter}}
|
||||
|
@ -50,7 +50,7 @@ int InitList() {
|
||||
return { 0 }; // expected-warning {{generalized initializer lists are incompatible with C++98}}
|
||||
}
|
||||
|
||||
int operator""_hello(const char *); // expected-warning {{literal operators are incompatible with C++98}}
|
||||
int operator"" _hello(const char *); // expected-warning {{literal operators are incompatible with C++98}}
|
||||
|
||||
enum EnumFixed : int { // expected-warning {{enumeration types with a fixed underlying type are incompatible with C++98}}
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user