From 84cac855385c865c8ee517c68744952fedcc2568 Mon Sep 17 00:00:00 2001 From: Nirav Dave Date: Mon, 3 Oct 2016 13:48:27 +0000 Subject: [PATCH] Prevent out of order HashDirective lexing in AsmLexer. Retrying after buildbot reset. To lex hash directives we peek ahead to find component tokens, create a unified token, and unlex the peeked tokens so the parser does not need to parse the tokens then. Make sure we do not to lex another hash directive during peek operation. This fixes PR28921. Reviewers: rnk, loladiro Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D24839 llvm-svn: 283111 --- include/llvm/MC/MCParser/AsmLexer.h | 2 +- lib/MC/MCParser/AsmLexer.cpp | 43 ++++++++++++----------------- test/MC/AsmParser/pr28921.s | 8 ++++++ 3 files changed, 26 insertions(+), 27 deletions(-) create mode 100644 test/MC/AsmParser/pr28921.s diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h index 06937e25f59..029598c013d 100644 --- a/include/llvm/MC/MCParser/AsmLexer.h +++ b/include/llvm/MC/MCParser/AsmLexer.h @@ -32,7 +32,7 @@ class AsmLexer : public MCAsmLexer { bool IsAtStartOfLine; bool IsAtStartOfStatement; bool IsParsingMSInlineAsm; - + bool IsPeeking; void operator=(const AsmLexer&) = delete; AsmLexer(const AsmLexer&) = delete; diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 99e1df6be6e..0fa7fbdc7b6 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -11,29 +11,29 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCParser/AsmLexer.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/SaveAndRestore.h" #include #include #include #include -#include #include +#include #include using namespace llvm; -AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { - CurPtr = nullptr; - IsAtStartOfLine = true; - IsAtStartOfStatement = true; - IsParsingMSInlineAsm = false; +AsmLexer::AsmLexer(const MCAsmInfo &MAI) + : MAI(MAI), CurPtr(nullptr), IsAtStartOfLine(true), + IsAtStartOfStatement(true), IsParsingMSInlineAsm(false), + IsPeeking(false) { AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); } @@ -487,17 +487,15 @@ StringRef AsmLexer::LexUntilEndOfLine() { size_t AsmLexer::peekTokens(MutableArrayRef Buf, bool ShouldSkipSpace) { - const char *SavedTokStart = TokStart; - const char *SavedCurPtr = CurPtr; - bool SavedAtStartOfLine = IsAtStartOfLine; - bool SavedAtStartOfStatement = IsAtStartOfStatement; - bool SavedSkipSpace = SkipSpace; - + SaveAndRestore SavedTokenStart(TokStart); + SaveAndRestore SavedCurPtr(CurPtr); + SaveAndRestore SavedAtStartOfLine(IsAtStartOfLine); + SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement); + SaveAndRestore SavedSkipSpace(SkipSpace, ShouldSkipSpace); + SaveAndRestore SavedIsPeeking(IsPeeking, true); std::string SavedErr = getErr(); SMLoc SavedErrLoc = getErrLoc(); - SkipSpace = ShouldSkipSpace; - size_t ReadCount; for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { AsmToken Token = LexToken(); @@ -509,13 +507,6 @@ size_t AsmLexer::peekTokens(MutableArrayRef Buf, } SetError(SavedErrLoc, SavedErr); - - SkipSpace = SavedSkipSpace; - IsAtStartOfLine = SavedAtStartOfLine; - IsAtStartOfStatement = SavedAtStartOfStatement; - CurPtr = SavedCurPtr; - TokStart = SavedTokStart; - return ReadCount; } @@ -525,7 +516,7 @@ bool AsmLexer::isAtStartOfComment(const char *Ptr) { if (CommentString.size() == 1) return CommentString[0] == Ptr[0]; - // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin + // Allow # preprocessor commments also be counted as comments for "##" cases if (CommentString[1] == '#') return CommentString[0] == Ptr[0]; @@ -542,7 +533,7 @@ AsmToken AsmLexer::LexToken() { // This always consumes at least one character. int CurChar = getNextChar(); - if (CurChar == '#' && IsAtStartOfStatement) { + if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) { // If this starts with a '#', this may be a cpp // hash directive and otherwise a line comment. AsmToken TokenBuf[2]; diff --git a/test/MC/AsmParser/pr28921.s b/test/MC/AsmParser/pr28921.s new file mode 100644 index 00000000000..2fbb555f4f3 --- /dev/null +++ b/test/MC/AsmParser/pr28921.s @@ -0,0 +1,8 @@ +// RUN: llvm-mc -triple i386-unknown-unknown %s + +# 1 "kernel.S" +# 1 "" 1 +# 1 "kernel.S" 2 +## +# 10 "kernel.S" +##