[MC] Separate masm integer literal lexer support from inline asm

Summary:
This renames the IsParsingMSInlineAsm member variable of AsmLexer to
LexMasmIntegers and moves it up to MCAsmLexer. This is the only behavior
controlled by that variable. I added a public setter, so that it can be
set from outside or from the llvm-mc command line. We may need to
arrange things so that users can get this behavior from clang, but
that's future work.

I also put additional hex literal lexing functionality under this flag
to fix PR32973. It appears that this hex literal parsing wasn't intended
to be enabled in non-masm-style blocks.

Now, masm integers (0b1101 and 0ABCh) work in __asm blocks from clang,
but 0b label references work when using .intel_syntax in standalone .s
files.

However, 0b label references will *not* work from __asm blocks in clang.
They will work from GCC inline asm blocks, which it sounds like is
important for Crypto++ as mentioned in PR36144.

Essentially, we only lex masm literals for inline asm blobs that use
intel syntax. If the .intel_syntax directive is used inside a gnu-style
inline asm statement, masm literals will not be lexed, which is
compatible with gas and llvm-mc standalone .s assembly.

This fixes PR36144 and PR32973.

Reviewers: Gerolf, avt77

Subscribers: eraman, hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D53535

llvm-svn: 345189
This commit is contained in:
Reid Kleckner 2018-10-24 20:23:57 +00:00
parent f7d57b84b7
commit c6d928b2f6
11 changed files with 47 additions and 26 deletions

View File

@ -30,7 +30,6 @@ class AsmLexer : public MCAsmLexer {
StringRef CurBuf;
bool IsAtStartOfLine = true;
bool IsAtStartOfStatement = true;
bool IsParsingMSInlineAsm = false;
bool IsPeeking = false;
protected:
@ -44,7 +43,6 @@ public:
~AsmLexer() override;
void setBuffer(StringRef Buf, const char *ptr = nullptr);
void setParsingMSInlineAsm(bool V) { IsParsingMSInlineAsm = V; }
StringRef LexUntilEndOfStatement() override;

View File

@ -50,6 +50,7 @@ protected: // Can only create subclasses.
bool SkipSpace = true;
bool AllowAtInIdentifier;
bool IsAtStartOfStatement = true;
bool LexMasmIntegers = false;
AsmCommentConsumer *CommentConsumer = nullptr;
MCAsmLexer();
@ -146,6 +147,10 @@ public:
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
this->CommentConsumer = CommentConsumer;
}
/// Set whether to lex masm-style binary and hex literals. They look like
/// 0b1101 and 0ABCh respectively.
void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
};
} // end namespace llvm

View File

@ -156,9 +156,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
// Enable lexing Masm binary and hex integer literals in intel inline
// assembly.
if (Dialect == InlineAsm::AD_Intel)
// We need this flag to be able to parse numbers like "0bH"
Parser->setParsingInlineAsm(true);
Parser->getLexer().setLexMasmIntegers(true);
if (MF) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
TAP->SetFrameRegister(TRI->getFrameRegister(*MF));

View File

@ -243,22 +243,26 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
// integer as a hexadecimal, possibly with leading zeroes.
static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
const char *FirstHex = nullptr;
static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
bool LexHex) {
const char *FirstNonDec = nullptr;
const char *LookAhead = CurPtr;
while (true) {
if (isDigit(*LookAhead)) {
++LookAhead;
} else if (isHexDigit(*LookAhead)) {
if (!FirstHex)
FirstHex = LookAhead;
++LookAhead;
} else {
break;
if (!FirstNonDec)
FirstNonDec = LookAhead;
// Keep going if we are looking for a 'h' suffix.
if (LexHex && isHexDigit(*LookAhead))
++LookAhead;
else
break;
}
}
bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
if (isHex)
return 16;
return DefaultRadix;
@ -281,7 +285,7 @@ static AsmToken intToken(StringRef Ref, APInt &Value)
AsmToken AsmLexer::LexDigit() {
// MASM-flavor binary integer: [01]+[bB]
// MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
if (LexMasmIntegers && isdigit(CurPtr[-1])) {
const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
CurPtr - 1 : nullptr;
const char *OldCurPtr = CurPtr;
@ -320,7 +324,7 @@ AsmToken AsmLexer::LexDigit() {
// Decimal integer: [1-9][0-9]*
if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
unsigned Radix = doLookAhead(CurPtr, 10);
unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
bool isHex = Radix == 16;
// Check for floating point literals.
if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
@ -335,8 +339,8 @@ AsmToken AsmLexer::LexDigit() {
return ReturnError(TokStart, !isHex ? "invalid decimal number" :
"invalid hexdecimal number");
// Consume the [bB][hH].
if (Radix == 2 || Radix == 16)
// Consume the [hH].
if (LexMasmIntegers && Radix == 16)
++CurPtr;
// The darwin/x86 (and x86-64) assembler accepts and ignores type
@ -346,7 +350,7 @@ AsmToken AsmLexer::LexDigit() {
return intToken(Result, Value);
}
if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
++CurPtr;
// See if we actually have "0b" as part of something like "jmp 0b\n"
if (!isDigit(CurPtr[0])) {
@ -395,7 +399,7 @@ AsmToken AsmLexer::LexDigit() {
return ReturnError(TokStart, "invalid hexadecimal number");
// Consume the optional [hH].
if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
++CurPtr;
// The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
@ -407,7 +411,7 @@ AsmToken AsmLexer::LexDigit() {
// Either octal or hexadecimal.
APInt Value(128, 0, true);
unsigned Radix = doLookAhead(CurPtr, 8);
unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
bool isHex = Radix == 16;
StringRef Result(TokStart, CurPtr - TokStart);
if (Result.getAsInteger(Radix, Value))

View File

@ -229,7 +229,9 @@ public:
void setParsingInlineAsm(bool V) override {
ParsingInlineAsm = V;
Lexer.setParsingMSInlineAsm(V);
// When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
// hex integer literals.
Lexer.setLexMasmIntegers(V);
}
bool isParsingInlineAsm() override { return ParsingInlineAsm; }

View File

@ -3283,7 +3283,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
else if (IDVal.startswith(".att_syntax")) {
getParser().setParsingInlineAsm(false);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "prefix")
Parser.Lex();
@ -3296,7 +3295,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
} else if (IDVal.startswith(".intel_syntax")) {
getParser().setAssemblerDialect(1);
getParser().setParsingInlineAsm(true);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "noprefix")
Parser.Lex();

View File

@ -0,0 +1,8 @@
// RUN: llvm-mc -triple aarch64-elf -filetype=obj %s -o - | llvm-objdump -d -r - | FileCheck %s
.macro do_add sz
add v0.\sz, v0.\sz, v0.\sz
.endm
do_add 8h
// CHECK: add v0.8h, v0.8h, v0.8h

View File

@ -1,4 +1,4 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
// RUN: llvm-mc -masm-integers -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
// rdar://12470373
// Checks to make sure we parse the hexadecimal suffix properly.

View File

@ -1,4 +1,4 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown %s
// RUN: llvm-mc -triple x86_64-unknown-unknown %s -masm-integers=1
.intel_syntax
add rbx, 0B0h

View File

@ -5,7 +5,7 @@
.intel_syntax noprefix
mov eax, 1
mov ebx, 0ffh
mov ebx, 0xff
imul esi, edi
lea eax, [rsi + rdi]

View File

@ -164,6 +164,10 @@ MainFileName("main-file-name",
static cl::opt<bool> SaveTempLabels("save-temp-labels",
cl::desc("Don't discard temporary labels"));
static cl::opt<bool> LexMasmIntegers(
"masm-integers",
cl::desc("Enable binary and hex masm integers (0b110 and 0ABCh)"));
static cl::opt<bool> NoExecStack("no-exec-stack",
cl::desc("File doesn't need an exec stack"));
@ -293,6 +297,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget,
return SymbolResult;
Parser->setShowParsedOperands(ShowInstOperands);
Parser->setTargetParser(*TAP);
Parser->getLexer().setLexMasmIntegers(LexMasmIntegers);
int Res = Parser->Run(NoInitialTextSection);