mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-03-01 14:58:18 +00:00
[flang] Stricter "implicit continuation" in preprocessing
The prescanner performs implicit line continuation when it looks like the parenthesized arguments of a call to a function-like macro may span multiple lines. In an attempt to work more like a Fortran-oblivious C preprocessor, the prescanner will act as if the following lines had been continuations so that the function-like macro could be invoked. This still seems like a good idea, but a recent bug report on LLVM's GitHub issue tracker shows one way in which it could trigger inadvertently and mess up a program. So this patch makes the conditions for implicit line continuation much more strict. First, the leading parenthesis has to have been preceded by an identifier that's known to be a macro name. (It doesn't have to be a function-like macro, since it's possible for a keyword-like macro to expand to the name of a function-like macro.) Second, no macro definition can ever have had unbalanced parentheses in its replacement text. Also cleans up some parenthesis recognition code to fix some issues found in testing, so that a token with leading or trailing spaces can still be recognized as a parenthesis or comma. Fixes https://github.com/llvm/llvm-project/issues/63844. Differential Revision: https://reviews.llvm.org/D155499
This commit is contained in:
parent
299ec3c22a
commit
6fac3f7b2e
@ -74,8 +74,14 @@
|
||||
operators; e.g., `#if 2 .LT. 3` should work.
|
||||
* If a function-like macro does not close its parentheses, line
|
||||
continuation should be assumed.
|
||||
This includes the case of a keyword-like macro that expands to
|
||||
the name of a function-like macro.
|
||||
* ... However, the leading parenthesis has to be on the same line as
|
||||
the name of the function-like macro, or on a continuation line thereof.
|
||||
* And no macro definition prior to that point can be allowed to have
|
||||
unbalanced parentheses in its replacement text.
|
||||
When that happens, it's possible to have false positive cases
|
||||
causing implicit line continuations that break real code.
|
||||
* If macros expand to text containing `&`, it doesn't work as a free form
|
||||
line continuation marker.
|
||||
* `#define c 1` does not allow a `c` in column 1 to be used as a label
|
||||
@ -218,7 +224,7 @@ N N N N N . pp116.F90 FLM call split between name and (, no leading &
|
||||
. . E E . E pp124.F90 KWM NOT expanded in Hollerith in FORMAT
|
||||
E . . E E . pp125.F90 #DEFINE works in free form
|
||||
. . . . . . pp126.F90 \ newline works in #define
|
||||
N . E . E E pp127.F90 FLM call with closing ')' on next line (not a continuation)
|
||||
. . E . E E pp127.F90 FLM call with closing ')' on next line (not a continuation)
|
||||
E . E . E E pp128.F90 FLM call with '(' on next line (not a continuation)
|
||||
. . N . . N pp129.F90 #define KWM !, then KWM works as comment line initiator
|
||||
E . E . . E pp130.F90 #define KWM &, use for continuation w/o pasting (ifort and nag seem to continue #define)
|
||||
|
@ -55,6 +55,8 @@ public:
|
||||
interval_.ExtendToCover(that.interval_);
|
||||
}
|
||||
|
||||
// Returns the block's first non-blank character, if it has
|
||||
// one; otherwise ' '.
|
||||
char FirstNonBlank() const {
|
||||
for (char ch : *this) {
|
||||
if (ch != ' ' && ch != '\t') {
|
||||
@ -64,6 +66,22 @@ public:
|
||||
return ' '; // non no-blank character
|
||||
}
|
||||
|
||||
// Returns the block's only non-blank character, if it has
|
||||
// exactly one non-blank character; otherwise ' '.
|
||||
char OnlyNonBlank() const {
|
||||
char result{' '};
|
||||
for (char ch : *this) {
|
||||
if (ch != ' ' && ch != '\t') {
|
||||
if (result == ' ') {
|
||||
result = ch;
|
||||
} else {
|
||||
return ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::size_t CountLeadingBlanks() const {
|
||||
std::size_t n{size()};
|
||||
std::size_t j{0};
|
||||
|
@ -147,12 +147,14 @@ TokenSequence Definition::Apply(
|
||||
CharBlock token{replacement_.TokenAt(j)};
|
||||
std::size_t bytes{token.size()};
|
||||
if (skipping) {
|
||||
if (bytes == 1) {
|
||||
if (token[0] == '(') {
|
||||
++parenthesesNesting;
|
||||
} else if (token[0] == ')') {
|
||||
skipping = --parenthesesNesting > 0;
|
||||
char ch{token.OnlyNonBlank()};
|
||||
if (ch == '(') {
|
||||
++parenthesesNesting;
|
||||
} else if (ch == ')') {
|
||||
if (parenthesesNesting > 0) {
|
||||
--parenthesesNesting;
|
||||
}
|
||||
skipping = parenthesesNesting > 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -207,18 +209,21 @@ TokenSequence Definition::Apply(
|
||||
result.Put(args[k]);
|
||||
}
|
||||
} else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
|
||||
j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
|
||||
j + 2 < tokens && replacement_.TokenAt(j + 1).OnlyNonBlank() == '(' &&
|
||||
parenthesesNesting == 0) {
|
||||
parenthesesNesting = 1;
|
||||
skipping = args.size() == argumentCount_;
|
||||
++j;
|
||||
} else {
|
||||
if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
|
||||
++parenthesesNesting;
|
||||
} else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
|
||||
if (--parenthesesNesting == 0) {
|
||||
skipping = false;
|
||||
continue;
|
||||
if (parenthesesNesting > 0) {
|
||||
char ch{token.OnlyNonBlank()};
|
||||
if (ch == '(') {
|
||||
++parenthesesNesting;
|
||||
} else if (ch == ')') {
|
||||
if (--parenthesesNesting == 0) {
|
||||
skipping = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
result.Put(replacement_, j);
|
||||
@ -361,18 +366,16 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
|
||||
std::vector<std::size_t> argStart{++k};
|
||||
for (int nesting{0}; k < tokens; ++k) {
|
||||
CharBlock token{input.TokenAt(k)};
|
||||
if (token.size() == 1) {
|
||||
char ch{token[0]};
|
||||
if (ch == '(') {
|
||||
++nesting;
|
||||
} else if (ch == ')') {
|
||||
if (nesting == 0) {
|
||||
break;
|
||||
}
|
||||
--nesting;
|
||||
} else if (ch == ',' && nesting == 0) {
|
||||
argStart.push_back(k + 1);
|
||||
char ch{token.OnlyNonBlank()};
|
||||
if (ch == '(') {
|
||||
++nesting;
|
||||
} else if (ch == ')') {
|
||||
if (nesting == 0) {
|
||||
break;
|
||||
}
|
||||
--nesting;
|
||||
} else if (ch == ',' && nesting == 0) {
|
||||
argStart.push_back(k + 1);
|
||||
}
|
||||
}
|
||||
if (argStart.size() == 1 && k == argStart[0] && def->argumentCount() == 0) {
|
||||
@ -454,12 +457,11 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
|
||||
}
|
||||
nameToken = SaveTokenAsName(nameToken);
|
||||
definitions_.erase(nameToken);
|
||||
if (++j < tokens && dir.TokenAt(j).size() == 1 &&
|
||||
dir.TokenAt(j)[0] == '(') {
|
||||
if (++j < tokens && dir.TokenAt(j).OnlyNonBlank() == '(') {
|
||||
j = dir.SkipBlanks(j + 1);
|
||||
std::vector<std::string> argName;
|
||||
bool isVariadic{false};
|
||||
if (dir.TokenAt(j).ToString() != ")") {
|
||||
if (dir.TokenAt(j).OnlyNonBlank() != ')') {
|
||||
while (true) {
|
||||
std::string an{dir.TokenAt(j).ToString()};
|
||||
if (an == "...") {
|
||||
@ -478,11 +480,11 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
|
||||
"#define: malformed argument list"_err_en_US);
|
||||
return;
|
||||
}
|
||||
std::string punc{dir.TokenAt(j).ToString()};
|
||||
if (punc == ")") {
|
||||
char punc{dir.TokenAt(j).OnlyNonBlank()};
|
||||
if (punc == ')') {
|
||||
break;
|
||||
}
|
||||
if (isVariadic || punc != ",") {
|
||||
if (isVariadic || punc != ',') {
|
||||
prescanner.Say(dir.GetTokenProvenanceRange(j),
|
||||
"#define: malformed argument list"_err_en_US);
|
||||
return;
|
||||
@ -502,10 +504,12 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
|
||||
}
|
||||
}
|
||||
j = dir.SkipBlanks(j + 1);
|
||||
CheckForUnbalancedParentheses(dir, j, tokens - j);
|
||||
definitions_.emplace(std::make_pair(
|
||||
nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
|
||||
} else {
|
||||
j = dir.SkipBlanks(j + 1);
|
||||
CheckForUnbalancedParentheses(dir, j, tokens - j);
|
||||
definitions_.emplace(
|
||||
std::make_pair(nameToken, Definition{dir, j, tokens - j}));
|
||||
}
|
||||
@ -883,7 +887,7 @@ static std::int64_t ExpressionValue(const TokenSequence &token,
|
||||
}
|
||||
switch (op) {
|
||||
case PARENS:
|
||||
if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
|
||||
if (*atToken < tokens && token.TokenAt(*atToken).OnlyNonBlank() == ')') {
|
||||
++*atToken;
|
||||
break;
|
||||
}
|
||||
@ -1085,8 +1089,8 @@ bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
|
||||
if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
|
||||
CharBlock name;
|
||||
if (j + 3 < expr1.SizeInTokens() &&
|
||||
expr1.TokenAt(j + 1).ToString() == "(" &&
|
||||
expr1.TokenAt(j + 3).ToString() == ")") {
|
||||
expr1.TokenAt(j + 1).OnlyNonBlank() == '(' &&
|
||||
expr1.TokenAt(j + 3).OnlyNonBlank() == ')') {
|
||||
name = expr1.TokenAt(j + 2);
|
||||
j += 3;
|
||||
} else if (j + 1 < expr1.SizeInTokens() &&
|
||||
@ -1176,4 +1180,24 @@ void Preprocessor::LineDirective(
|
||||
sourceFile->LineDirective(pos->trueLineNumber + 1, *linePath, *lineNumber);
|
||||
}
|
||||
}
|
||||
|
||||
void Preprocessor::CheckForUnbalancedParentheses(
|
||||
const TokenSequence &tokens, std::size_t j, std::size_t n) {
|
||||
if (!anyMacroWithUnbalancedParentheses_) {
|
||||
int nesting{0};
|
||||
for (; n-- > 0; ++j) {
|
||||
char ch{tokens.TokenAt(j).OnlyNonBlank()};
|
||||
if (ch == '(') {
|
||||
++nesting;
|
||||
} else if (ch == ')') {
|
||||
if (nesting-- == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nesting != 0) {
|
||||
anyMacroWithUnbalancedParentheses_ = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace Fortran::parser
|
||||
|
@ -80,6 +80,10 @@ public:
|
||||
// Implements a preprocessor directive.
|
||||
void Directive(const TokenSequence &, Prescanner &);
|
||||
|
||||
bool anyMacroWithUnbalancedParentheses() const {
|
||||
return anyMacroWithUnbalancedParentheses_;
|
||||
}
|
||||
|
||||
private:
|
||||
enum class IsElseActive { No, Yes };
|
||||
enum class CanDeadElseAppear { No, Yes };
|
||||
@ -91,11 +95,14 @@ private:
|
||||
bool IsIfPredicateTrue(const TokenSequence &expr, std::size_t first,
|
||||
std::size_t exprTokens, Prescanner &);
|
||||
void LineDirective(const TokenSequence &, std::size_t, Prescanner &);
|
||||
void CheckForUnbalancedParentheses(
|
||||
const TokenSequence &, std::size_t first, std::size_t tokens);
|
||||
|
||||
AllSources &allSources_;
|
||||
std::list<std::string> names_;
|
||||
std::unordered_map<CharBlock, Definition> definitions_;
|
||||
std::stack<CanDeadElseAppear> ifStack_;
|
||||
bool anyMacroWithUnbalancedParentheses_{false};
|
||||
};
|
||||
} // namespace Fortran::parser
|
||||
#endif // FORTRAN_PARSER_PREPROCESSOR_H_
|
||||
|
@ -630,10 +630,14 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
|
||||
}
|
||||
} else {
|
||||
char ch{*at_};
|
||||
if (ch == '(' || ch == '[') {
|
||||
++delimiterNesting_;
|
||||
} else if ((ch == ')' || ch == ']') && delimiterNesting_ > 0) {
|
||||
--delimiterNesting_;
|
||||
if (ch == '(') {
|
||||
if (parenthesisNesting_++ == 0) {
|
||||
isPossibleMacroCall_ = tokens.SizeInTokens() > 0 &&
|
||||
preprocessor_.IsNameDefined(
|
||||
tokens.TokenAt(tokens.SizeInTokens() - 1));
|
||||
}
|
||||
} else if (ch == ')' && parenthesisNesting_ > 0) {
|
||||
--parenthesisNesting_;
|
||||
}
|
||||
char nch{EmitCharAndAdvance(tokens, ch)};
|
||||
preventHollerith_ = false;
|
||||
@ -1142,8 +1146,9 @@ bool Prescanner::FreeFormContinuation() {
|
||||
// Implicit line continuation allows a preprocessor macro call with
|
||||
// arguments to span multiple lines.
|
||||
bool Prescanner::IsImplicitContinuation() const {
|
||||
return !inPreprocessorDirective_ && !inCharLiteral_ &&
|
||||
delimiterNesting_ > 0 && !IsAtEnd() &&
|
||||
return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ &&
|
||||
parenthesisNesting_ > 0 &&
|
||||
!preprocessor_.anyMacroWithUnbalancedParentheses() && !IsAtEnd() &&
|
||||
ClassifyLine(nextLine_).kind == LineClassification::Kind::Source;
|
||||
}
|
||||
|
||||
|
@ -109,8 +109,9 @@ private:
|
||||
BeginSourceLineAndAdvance();
|
||||
slashInCurrentStatement_ = false;
|
||||
preventHollerith_ = false;
|
||||
delimiterNesting_ = 0;
|
||||
parenthesisNesting_ = 0;
|
||||
continuationLines_ = 0;
|
||||
isPossibleMacroCall_ = false;
|
||||
}
|
||||
|
||||
Provenance GetProvenance(const char *sourceChar) const {
|
||||
@ -194,9 +195,10 @@ private:
|
||||
bool inFixedForm_{false};
|
||||
int fixedFormColumnLimit_{72};
|
||||
Encoding encoding_{Encoding::UTF_8};
|
||||
int delimiterNesting_{0};
|
||||
int parenthesisNesting_{0};
|
||||
int prescannerNesting_{0};
|
||||
int continuationLines_{0};
|
||||
bool isPossibleMacroCall_{false};
|
||||
|
||||
Provenance startProvenance_;
|
||||
const char *start_{nullptr}; // beginning of current source file content
|
||||
|
@ -376,11 +376,13 @@ const TokenSequence &TokenSequence::CheckBadParentheses(
|
||||
std::size_t tokens{SizeInTokens()};
|
||||
for (std::size_t j{0}; j < tokens; ++j) {
|
||||
CharBlock token{TokenAt(j)};
|
||||
char ch{token.FirstNonBlank()};
|
||||
char ch{token.OnlyNonBlank()};
|
||||
if (ch == '(') {
|
||||
++nesting;
|
||||
} else if (ch == ')') {
|
||||
--nesting;
|
||||
if (nesting-- == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nesting != 0) {
|
||||
@ -388,7 +390,7 @@ const TokenSequence &TokenSequence::CheckBadParentheses(
|
||||
std::vector<std::size_t> stack;
|
||||
for (std::size_t j{0}; j < tokens; ++j) {
|
||||
CharBlock token{TokenAt(j)};
|
||||
char ch{token.FirstNonBlank()};
|
||||
char ch{token.OnlyNonBlank()};
|
||||
if (ch == '(') {
|
||||
stack.push_back(j);
|
||||
} else if (ch == ')') {
|
||||
|
@ -50,7 +50,7 @@ subroutine check_not_assert()
|
||||
y = elem_func_r(cos(x) + u)
|
||||
|
||||
! Array constructors as elemental function arguments.
|
||||
y = atan2( (/ (real(i, 4), i = 1, 2) /),
|
||||
y = atan2( (/ (real(i, 4), i = 1, 2) /), &
|
||||
real( (/ (i, i = j, k, l) /), 4) )
|
||||
end subroutine
|
||||
|
||||
|
9
flang/test/Preprocessing/implicit-contin1.F90
Normal file
9
flang/test/Preprocessing/implicit-contin1.F90
Normal file
@ -0,0 +1,9 @@
|
||||
! RUN: %flang -E %s | FileCheck %s
|
||||
! When there's a macro definition with unbalanced parentheses,
|
||||
! don't apply implicit continuation.
|
||||
#define M )
|
||||
call foo (1 M
|
||||
end
|
||||
|
||||
!CHECK: call foo(1 )
|
||||
!CHECK: end
|
13
flang/test/Preprocessing/implicit-contin2.F90
Normal file
13
flang/test/Preprocessing/implicit-contin2.F90
Normal file
@ -0,0 +1,13 @@
|
||||
! RUN: %flang -E %s | FileCheck %s
|
||||
! Test implicit continuation for possible function-like macro calls
|
||||
#define flm(x) x
|
||||
print *, flm(1)
|
||||
continue
|
||||
print *, flm(2
|
||||
)
|
||||
end
|
||||
|
||||
!CHECK: print *, 1
|
||||
!CHECK: continue
|
||||
!CHECK: print *, 2
|
||||
!CHECK: end
|
7
flang/test/Preprocessing/implicit-contin3.F90
Normal file
7
flang/test/Preprocessing/implicit-contin3.F90
Normal file
@ -0,0 +1,7 @@
|
||||
! RUN: not %flang -E %s 2>&1 | FileCheck %s
|
||||
! Test implicit continuation for possible function-like macro calls only
|
||||
#define flm(x) x
|
||||
call notamacro(3
|
||||
)
|
||||
!CHECK: error: Unmatched '('
|
||||
!CHECK: error: Unmatched ')'
|
@ -1,5 +1,5 @@
|
||||
! RUN: %flang -E %s 2>&1 | FileCheck %s
|
||||
! CHECK: res = IFLM(666 )
|
||||
! CHECK: res = ((666)+111)
|
||||
! FLM call with closing ')' on next line (not a continuation)
|
||||
integer function IFLM(x)
|
||||
integer :: x
|
||||
|
@ -19,7 +19,7 @@ program BigArray
|
||||
( &
|
||||
!ERROR: Must be a constant value
|
||||
0_foo,ii=1,limit &
|
||||
),
|
||||
), &
|
||||
jj=kk,limit &
|
||||
), &
|
||||
kk=1,limit &
|
||||
|
Loading…
x
Reference in New Issue
Block a user