[flang] Stricter "implicit continuation" in preprocessing

The prescanner performs implicit line continuation when it looks
like the parenthesized arguments of a call to a function-like macro
may span multiple lines.  In an attempt to work more like a
Fortran-oblivious C preprocessor, the prescanner will act as if
the following lines had been continuations so that the function-like
macro could be invoked.

This still seems like a good idea, but a recent bug report on
LLVM's GitHub issue tracker shows one way in which it could trigger
inadvertently and mess up a program.  So this patch makes the
conditions for implicit line continuation much more strict.

First, the leading parenthesis has to have been preceded by an
identifier that's known to be a macro name.  (It doesn't have to
be a function-like macro, since it's possible for a keyword-like
macro to expand to the name of a function-like macro.)  Second,
no macro definition can ever have had unbalanced parentheses in
its replacement text.

Also cleans up some parenthesis recognition code to fix some
issues found in testing, so that a token with leading or trailing
spaces can still be recognized as a parenthesis or comma.

Fixes https://github.com/llvm/llvm-project/issues/63844.

Differential Revision: https://reviews.llvm.org/D155499
This commit is contained in:
Peter Klausler 2023-07-13 16:26:23 -07:00
parent 299ec3c22a
commit 6fac3f7b2e
No known key found for this signature in database
13 changed files with 140 additions and 47 deletions

View File

@ -74,8 +74,14 @@
operators; e.g., `#if 2 .LT. 3` should work.
* If a function-like macro does not close its parentheses, line
continuation should be assumed.
This includes the case of a keyword-like macro that expands to
the name of a function-like macro.
* ... However, the leading parenthesis has to be on the same line as
the name of the function-like macro, or on a continuation line thereof.
* And no macro definition prior to that point can be allowed to have
unbalanced parentheses in its replacement text.
When that happens, it's possible to have false positive cases
causing implicit line continuations that break real code.
* If macros expand to text containing `&`, it doesn't work as a free form
line continuation marker.
* `#define c 1` does not allow a `c` in column 1 to be used as a label
@ -218,7 +224,7 @@ N N N N N . pp116.F90 FLM call split between name and (, no leading &
. . E E . E pp124.F90 KWM NOT expanded in Hollerith in FORMAT
E . . E E . pp125.F90 #DEFINE works in free form
. . . . . . pp126.F90 \ newline works in #define
N . E . E E pp127.F90 FLM call with closing ')' on next line (not a continuation)
. . E . E E pp127.F90 FLM call with closing ')' on next line (not a continuation)
E . E . E E pp128.F90 FLM call with '(' on next line (not a continuation)
. . N . . N pp129.F90 #define KWM !, then KWM works as comment line initiator
E . E . . E pp130.F90 #define KWM &, use for continuation w/o pasting (ifort and nag seem to continue #define)

View File

@ -55,6 +55,8 @@ public:
interval_.ExtendToCover(that.interval_);
}
// Returns the block's first non-blank character, if it has
// one; otherwise ' '.
char FirstNonBlank() const {
for (char ch : *this) {
if (ch != ' ' && ch != '\t') {
@ -64,6 +66,22 @@ public:
return ' '; // non no-blank character
}
// Returns the block's only non-blank character, if it has
// exactly one non-blank character; otherwise ' '.
char OnlyNonBlank() const {
char result{' '};
for (char ch : *this) {
if (ch != ' ' && ch != '\t') {
if (result == ' ') {
result = ch;
} else {
return ' ';
}
}
}
return result;
}
std::size_t CountLeadingBlanks() const {
std::size_t n{size()};
std::size_t j{0};

View File

@ -147,12 +147,14 @@ TokenSequence Definition::Apply(
CharBlock token{replacement_.TokenAt(j)};
std::size_t bytes{token.size()};
if (skipping) {
if (bytes == 1) {
if (token[0] == '(') {
++parenthesesNesting;
} else if (token[0] == ')') {
skipping = --parenthesesNesting > 0;
char ch{token.OnlyNonBlank()};
if (ch == '(') {
++parenthesesNesting;
} else if (ch == ')') {
if (parenthesesNesting > 0) {
--parenthesesNesting;
}
skipping = parenthesesNesting > 0;
}
continue;
}
@ -207,18 +209,21 @@ TokenSequence Definition::Apply(
result.Put(args[k]);
}
} else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
j + 2 < tokens && replacement_.TokenAt(j + 1).OnlyNonBlank() == '(' &&
parenthesesNesting == 0) {
parenthesesNesting = 1;
skipping = args.size() == argumentCount_;
++j;
} else {
if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
++parenthesesNesting;
} else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
if (--parenthesesNesting == 0) {
skipping = false;
continue;
if (parenthesesNesting > 0) {
char ch{token.OnlyNonBlank()};
if (ch == '(') {
++parenthesesNesting;
} else if (ch == ')') {
if (--parenthesesNesting == 0) {
skipping = false;
continue;
}
}
}
result.Put(replacement_, j);
@ -361,18 +366,16 @@ std::optional<TokenSequence> Preprocessor::MacroReplacement(
std::vector<std::size_t> argStart{++k};
for (int nesting{0}; k < tokens; ++k) {
CharBlock token{input.TokenAt(k)};
if (token.size() == 1) {
char ch{token[0]};
if (ch == '(') {
++nesting;
} else if (ch == ')') {
if (nesting == 0) {
break;
}
--nesting;
} else if (ch == ',' && nesting == 0) {
argStart.push_back(k + 1);
char ch{token.OnlyNonBlank()};
if (ch == '(') {
++nesting;
} else if (ch == ')') {
if (nesting == 0) {
break;
}
--nesting;
} else if (ch == ',' && nesting == 0) {
argStart.push_back(k + 1);
}
}
if (argStart.size() == 1 && k == argStart[0] && def->argumentCount() == 0) {
@ -454,12 +457,11 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
}
nameToken = SaveTokenAsName(nameToken);
definitions_.erase(nameToken);
if (++j < tokens && dir.TokenAt(j).size() == 1 &&
dir.TokenAt(j)[0] == '(') {
if (++j < tokens && dir.TokenAt(j).OnlyNonBlank() == '(') {
j = dir.SkipBlanks(j + 1);
std::vector<std::string> argName;
bool isVariadic{false};
if (dir.TokenAt(j).ToString() != ")") {
if (dir.TokenAt(j).OnlyNonBlank() != ')') {
while (true) {
std::string an{dir.TokenAt(j).ToString()};
if (an == "...") {
@ -478,11 +480,11 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
"#define: malformed argument list"_err_en_US);
return;
}
std::string punc{dir.TokenAt(j).ToString()};
if (punc == ")") {
char punc{dir.TokenAt(j).OnlyNonBlank()};
if (punc == ')') {
break;
}
if (isVariadic || punc != ",") {
if (isVariadic || punc != ',') {
prescanner.Say(dir.GetTokenProvenanceRange(j),
"#define: malformed argument list"_err_en_US);
return;
@ -502,10 +504,12 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
}
}
j = dir.SkipBlanks(j + 1);
CheckForUnbalancedParentheses(dir, j, tokens - j);
definitions_.emplace(std::make_pair(
nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
} else {
j = dir.SkipBlanks(j + 1);
CheckForUnbalancedParentheses(dir, j, tokens - j);
definitions_.emplace(
std::make_pair(nameToken, Definition{dir, j, tokens - j}));
}
@ -883,7 +887,7 @@ static std::int64_t ExpressionValue(const TokenSequence &token,
}
switch (op) {
case PARENS:
if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
if (*atToken < tokens && token.TokenAt(*atToken).OnlyNonBlank() == ')') {
++*atToken;
break;
}
@ -1085,8 +1089,8 @@ bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
CharBlock name;
if (j + 3 < expr1.SizeInTokens() &&
expr1.TokenAt(j + 1).ToString() == "(" &&
expr1.TokenAt(j + 3).ToString() == ")") {
expr1.TokenAt(j + 1).OnlyNonBlank() == '(' &&
expr1.TokenAt(j + 3).OnlyNonBlank() == ')') {
name = expr1.TokenAt(j + 2);
j += 3;
} else if (j + 1 < expr1.SizeInTokens() &&
@ -1176,4 +1180,24 @@ void Preprocessor::LineDirective(
sourceFile->LineDirective(pos->trueLineNumber + 1, *linePath, *lineNumber);
}
}
void Preprocessor::CheckForUnbalancedParentheses(
const TokenSequence &tokens, std::size_t j, std::size_t n) {
if (!anyMacroWithUnbalancedParentheses_) {
int nesting{0};
for (; n-- > 0; ++j) {
char ch{tokens.TokenAt(j).OnlyNonBlank()};
if (ch == '(') {
++nesting;
} else if (ch == ')') {
if (nesting-- == 0) {
break;
}
}
}
if (nesting != 0) {
anyMacroWithUnbalancedParentheses_ = true;
}
}
}
} // namespace Fortran::parser

View File

@ -80,6 +80,10 @@ public:
// Implements a preprocessor directive.
void Directive(const TokenSequence &, Prescanner &);
bool anyMacroWithUnbalancedParentheses() const {
return anyMacroWithUnbalancedParentheses_;
}
private:
enum class IsElseActive { No, Yes };
enum class CanDeadElseAppear { No, Yes };
@ -91,11 +95,14 @@ private:
bool IsIfPredicateTrue(const TokenSequence &expr, std::size_t first,
std::size_t exprTokens, Prescanner &);
void LineDirective(const TokenSequence &, std::size_t, Prescanner &);
void CheckForUnbalancedParentheses(
const TokenSequence &, std::size_t first, std::size_t tokens);
AllSources &allSources_;
std::list<std::string> names_;
std::unordered_map<CharBlock, Definition> definitions_;
std::stack<CanDeadElseAppear> ifStack_;
bool anyMacroWithUnbalancedParentheses_{false};
};
} // namespace Fortran::parser
#endif // FORTRAN_PARSER_PREPROCESSOR_H_

View File

@ -630,10 +630,14 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
}
} else {
char ch{*at_};
if (ch == '(' || ch == '[') {
++delimiterNesting_;
} else if ((ch == ')' || ch == ']') && delimiterNesting_ > 0) {
--delimiterNesting_;
if (ch == '(') {
if (parenthesisNesting_++ == 0) {
isPossibleMacroCall_ = tokens.SizeInTokens() > 0 &&
preprocessor_.IsNameDefined(
tokens.TokenAt(tokens.SizeInTokens() - 1));
}
} else if (ch == ')' && parenthesisNesting_ > 0) {
--parenthesisNesting_;
}
char nch{EmitCharAndAdvance(tokens, ch)};
preventHollerith_ = false;
@ -1142,8 +1146,9 @@ bool Prescanner::FreeFormContinuation() {
// Implicit line continuation allows a preprocessor macro call with
// arguments to span multiple lines.
bool Prescanner::IsImplicitContinuation() const {
return !inPreprocessorDirective_ && !inCharLiteral_ &&
delimiterNesting_ > 0 && !IsAtEnd() &&
return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ &&
parenthesisNesting_ > 0 &&
!preprocessor_.anyMacroWithUnbalancedParentheses() && !IsAtEnd() &&
ClassifyLine(nextLine_).kind == LineClassification::Kind::Source;
}

View File

@ -109,8 +109,9 @@ private:
BeginSourceLineAndAdvance();
slashInCurrentStatement_ = false;
preventHollerith_ = false;
delimiterNesting_ = 0;
parenthesisNesting_ = 0;
continuationLines_ = 0;
isPossibleMacroCall_ = false;
}
Provenance GetProvenance(const char *sourceChar) const {
@ -194,9 +195,10 @@ private:
bool inFixedForm_{false};
int fixedFormColumnLimit_{72};
Encoding encoding_{Encoding::UTF_8};
int delimiterNesting_{0};
int parenthesisNesting_{0};
int prescannerNesting_{0};
int continuationLines_{0};
bool isPossibleMacroCall_{false};
Provenance startProvenance_;
const char *start_{nullptr}; // beginning of current source file content

View File

@ -376,11 +376,13 @@ const TokenSequence &TokenSequence::CheckBadParentheses(
std::size_t tokens{SizeInTokens()};
for (std::size_t j{0}; j < tokens; ++j) {
CharBlock token{TokenAt(j)};
char ch{token.FirstNonBlank()};
char ch{token.OnlyNonBlank()};
if (ch == '(') {
++nesting;
} else if (ch == ')') {
--nesting;
if (nesting-- == 0) {
break;
}
}
}
if (nesting != 0) {
@ -388,7 +390,7 @@ const TokenSequence &TokenSequence::CheckBadParentheses(
std::vector<std::size_t> stack;
for (std::size_t j{0}; j < tokens; ++j) {
CharBlock token{TokenAt(j)};
char ch{token.FirstNonBlank()};
char ch{token.OnlyNonBlank()};
if (ch == '(') {
stack.push_back(j);
} else if (ch == ')') {

View File

@ -50,7 +50,7 @@ subroutine check_not_assert()
y = elem_func_r(cos(x) + u)
! Array constructors as elemental function arguments.
y = atan2( (/ (real(i, 4), i = 1, 2) /),
y = atan2( (/ (real(i, 4), i = 1, 2) /), &
real( (/ (i, i = j, k, l) /), 4) )
end subroutine

View File

@ -0,0 +1,9 @@
! RUN: %flang -E %s | FileCheck %s
! When there's a macro definition with unbalanced parentheses,
! don't apply implicit continuation.
#define M )
call foo (1 M
end
!CHECK: call foo(1 )
!CHECK: end

View File

@ -0,0 +1,13 @@
! RUN: %flang -E %s | FileCheck %s
! Test implicit continuation for possible function-like macro calls
#define flm(x) x
print *, flm(1)
continue
print *, flm(2
)
end
!CHECK: print *, 1
!CHECK: continue
!CHECK: print *, 2
!CHECK: end

View File

@ -0,0 +1,7 @@
! RUN: not %flang -E %s 2>&1 | FileCheck %s
! Test implicit continuation for possible function-like macro calls only
#define flm(x) x
call notamacro(3
)
!CHECK: error: Unmatched '('
!CHECK: error: Unmatched ')'

View File

@ -1,5 +1,5 @@
! RUN: %flang -E %s 2>&1 | FileCheck %s
! CHECK: res = IFLM(666 )
! CHECK: res = ((666)+111)
! FLM call with closing ')' on next line (not a continuation)
integer function IFLM(x)
integer :: x

View File

@ -19,7 +19,7 @@ program BigArray
( &
!ERROR: Must be a constant value
0_foo,ii=1,limit &
),
), &
jj=kk,limit &
), &
kk=1,limit &