[clang-format] Add simple macro replacements in formatting.

Add configuration to specify macros.
Macros will be expanded, and the code will be parsed and annotated
in the expanded state. In a second step, the formatting decisions
in the annotated expanded code will be reconstructed onto the
original unexpanded macro call.

Eventually, this will allow to remove special-case code for
various macro options we accumulated over the years in favor of
one principled mechanism.

Differential Revision: https://reviews.llvm.org/D144170
This commit is contained in:
Manuel Klimek 2023-02-01 13:20:28 +00:00
parent bf579a7049
commit 01402831aa
18 changed files with 737 additions and 102 deletions

View File

@ -2745,6 +2745,39 @@ struct FormatStyle {
/// \version 3.7
std::string MacroBlockEnd;
/// A list of macros of the form \c <definition>=<expansion> .
///
/// Code will be parsed with macros expanded, in order to determine how to
/// interpret and format the macro arguments.
///
/// For example, the code:
/// \code
/// A(a*b);
/// \endcode
/// will usually be interpreted as a call to a function A, and the
/// multiplication expression will be formatted as `a * b`.
///
/// If we specify the macro definition:
/// \code
/// Macros:
/// - A(x)=x
/// \endcode
/// the code will now be parsed as a declaration of the variable b of type a*,
/// and formatted as `a* b` (depending on pointer-binding rules).
///
/// Features and restrictions:
/// * Both function-like macros and object-like macros are supported.
/// * Macro arguments must be used exactly once in the expansion.
/// * No recursive expansion; macros referencing other macros will be
/// ignored.
/// * Overloading by arity is supported: for example, given the macro
/// definitions A=x, A()=y, A(a)=a,
/// 'A;' -> 'x;'
/// 'A();' -> 'y;'
/// 'A(z);' -> 'z;'
/// 'A(a, b) will not be expanded.
std::vector<std::string> Macros;
/// The maximum number of consecutive empty lines to keep.
/// \code
/// MaxEmptyLinesToKeep: 1 vs. MaxEmptyLinesToKeep: 0
@ -4306,7 +4339,8 @@ struct FormatStyle {
StatementAttributeLikeMacros == R.StatementAttributeLikeMacros &&
StatementMacros == R.StatementMacros && TabWidth == R.TabWidth &&
TypenameMacros == R.TypenameMacros && UseTab == R.UseTab &&
WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros;
WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros &&
Macros == R.Macros;
}
std::optional<FormatStyle> GetLanguageStyle(LanguageKind Language) const;

View File

@ -18,6 +18,7 @@
#include "WhitespaceManager.h"
#include "clang/Basic/OperatorPrecedence.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Debug.h"
@ -739,9 +740,14 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
if (Previous.is(TT_TemplateString) && Previous.opensScope())
CurrentState.NoLineBreak = true;
// Align following lines within parentheses / brackets if configured.
// Note: This doesn't apply to macro expansion lines, which are MACRO( , , )
// with args as children of the '(' and ',' tokens. It does not make sense to
// align the commas with the opening paren.
if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
!CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&
Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) &&
!(Current.MacroParent && Previous.MacroParent) &&
(Current.isNot(TT_LineComment) || Previous.is(BK_BracedInit))) {
CurrentState.Indent = State.Column + Spaces;
CurrentState.IsAligned = true;

View File

@ -1036,6 +1036,7 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("UseTab", Style.UseTab);
IO.mapOptional("WhitespaceSensitiveMacros",
Style.WhitespaceSensitiveMacros);
IO.mapOptional("Macros", Style.Macros);
// If AlwaysBreakAfterDefinitionReturnType was specified but
// AlwaysBreakAfterReturnType was not, initialize the latter from the

View File

@ -377,6 +377,11 @@ public:
/// binary operator.
TokenType getType() const { return Type; }
void setType(TokenType T) {
// If this token is a macro argument while formatting an unexpanded macro
// call, we do not change its type any more - the type was deduced from
// formatting the expanded macro stream already.
if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg)
return;
assert((!TypeIsFinalized || T == Type) &&
"Please use overwriteFixedType to change a fixed type.");
Type = T;

View File

@ -141,24 +141,44 @@ void MacroExpander::parseDefinition(const std::string &Macro) {
if (!Tokens.empty()) {
DefinitionParser Parser(Tokens);
auto Definition = Parser.parse();
Definitions[Definition.Name] = std::move(Definition);
if (Definition.ObjectLike) {
ObjectLike[Definition.Name] = std::move(Definition);
} else {
FunctionLike[Definition.Name][Definition.Params.size()] =
std::move(Definition);
}
}
}
bool MacroExpander::defined(llvm::StringRef Name) const {
return Definitions.find(Name) != Definitions.end();
return FunctionLike.find(Name) != FunctionLike.end() ||
ObjectLike.find(Name) != ObjectLike.end();
}
bool MacroExpander::objectLike(llvm::StringRef Name) const {
return Definitions.find(Name)->second.ObjectLike;
return ObjectLike.find(Name) != ObjectLike.end();
}
llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
ArgsList Args) const {
assert(defined(ID->TokenText));
SmallVector<FormatToken *, 8> Result;
const Definition &Def = Definitions.find(ID->TokenText)->second;
bool MacroExpander::hasArity(llvm::StringRef Name, unsigned Arity) const {
auto it = FunctionLike.find(Name);
return it != FunctionLike.end() &&
(it->second.find(Arity) != it->second.end());
}
llvm::SmallVector<FormatToken *, 8>
MacroExpander::expand(FormatToken *ID,
std::optional<ArgsList> OptionalArgs) const {
if (OptionalArgs)
assert(hasArity(ID->TokenText, OptionalArgs->size()));
else
assert(objectLike(ID->TokenText));
const Definition &Def = OptionalArgs
? FunctionLike.find(ID->TokenText)
->second.find(OptionalArgs.value().size())
->second
: ObjectLike.find(ID->TokenText)->second;
ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList();
SmallVector<FormatToken *, 8> Result;
// Expand each argument at most once.
llvm::StringSet<> ExpandedArgs;

View File

@ -106,17 +106,23 @@ public:
IdentifierTable &IdentTable);
~MacroExpander();
/// Returns whether a macro \p Name is defined.
/// Returns whether any macro \p Name is defined, regardless of overloads.
bool defined(llvm::StringRef Name) const;
/// Returns whether the macro has no arguments and should not consume
/// subsequent parentheses.
/// Returns whetherh there is an object-like overload, i.e. where the macro
/// has no arguments and should not consume subsequent parentheses.
bool objectLike(llvm::StringRef Name) const;
/// Returns whether macro \p Name provides an overload with the given arity.
bool hasArity(llvm::StringRef Name, unsigned Arity) const;
/// Returns the expanded stream of format tokens for \p ID, where
/// each element in \p Args is a positional argument to the macro call.
llvm::SmallVector<FormatToken *, 8> expand(FormatToken *ID,
ArgsList Args) const;
/// If \p Args is not set, the object-like overload is used.
/// If \p Args is set, the overload with the arity equal to \c Args.size() is
/// used.
llvm::SmallVector<FormatToken *, 8>
expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const;
private:
struct Definition;
@ -129,7 +135,8 @@ private:
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
IdentifierTable &IdentTable;
SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
llvm::StringMap<Definition> Definitions;
llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike;
llvm::StringMap<Definition> ObjectLike;
};
/// Converts a sequence of UnwrappedLines containing expanded macros into a
@ -149,7 +156,7 @@ private:
///
/// After this point, the state of the spelled/expanded stream is "in sync"
/// (both at the start of an UnwrappedLine, with no macros open), so the
/// Unexpander can be thrown away and parsing can continue.
/// Reconstructor can be thrown away and parsing can continue.
///
/// Given a mapping from the macro name identifier token in the macro call
/// to the tokens of the macro call, for example:

View File

@ -104,12 +104,12 @@ TokenAnalyzer::process(bool SkipAnnotation) {
IdentifierTable IdentTable(getFormattingLangOpts(Style));
FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(),
Env.getFirstStartColumn(), Style, Encoding, Allocator,
IdentTable);
ArrayRef<FormatToken *> Toks(Lex.lex());
SmallVector<FormatToken *, 10> Tokens(Toks.begin(), Toks.end());
UnwrappedLineParser Parser(Style, Lex.getKeywords(),
Env.getFirstStartColumn(), Tokens, *this);
UnwrappedLineParser Parser(Env.getSourceManager(), Style, Lex.getKeywords(),
Env.getFirstStartColumn(), Tokens, *this,
Allocator, IdentTable);
Parser.parse();
assert(UnwrappedLines.back().empty());
unsigned Penalty = 0;

View File

@ -46,7 +46,7 @@ public:
FileID getFileID() const { return ID; }
const SourceManager &getSourceManager() const { return SM; }
SourceManager &getSourceManager() const { return SM; }
ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }

View File

@ -2614,6 +2614,13 @@ public:
// Consume operators with higher precedence.
parse(Precedence + 1);
// Do not assign fake parenthesis to tokens that are part of an
// unexpanded macro call. The line within the macro call contains
// the parenthesis and commas, and we will not find operators within
// that structure.
if (Current && Current->MacroParent)
break;
int CurrentPrecedence = getCurrentPrecedence();
if (Precedence == CurrentPrecedence && Current &&
@ -4389,8 +4396,12 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) {
return true;
}
if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen))
if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen) &&
// In an unexpanded macro call we only find the parentheses and commas
// in a line; the commas and closing parenthesis do not require a space.
(Left.Children.empty() || !Left.MacroParent)) {
return true;
}
if (Right.is(tok::comma))
return false;
if (Right.is(TT_ObjCBlockLParen))

View File

@ -65,20 +65,32 @@ public:
// left them in a different state.
First->Previous = nullptr;
FormatToken *Current = First;
addChildren(Line.Tokens.front(), Current);
for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
if (Node.Tok->MacroParent)
ContainsMacroCall = true;
Current->Next = Node.Tok;
Node.Tok->Previous = Current;
Current = Current->Next;
Current->Children.clear();
for (const auto &Child : Node.Children) {
Children.push_back(new AnnotatedLine(Child));
Current->Children.push_back(Children.back());
}
addChildren(Node, Current);
// FIXME: if we add children, previous will point to the token before
// the children; changing this requires significant changes across
// clang-format.
}
Last = Current;
Last->Next = nullptr;
}
void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
Current->Children.clear();
for (const auto &Child : Node.Children) {
Children.push_back(new AnnotatedLine(Child));
if (Children.back()->ContainsMacroCall)
ContainsMacroCall = true;
Current->Children.push_back(Children.back());
}
}
~AnnotatedLine() {
for (AnnotatedLine *Child : Children)
delete Child;
@ -149,6 +161,9 @@ public:
bool MightBeFunctionDecl;
bool IsMultiVariableDeclStmt;
/// \c True if this line contains a macro call for which an expansion exists.
bool ContainsMacroCall = false;
/// \c True if this line should be formatted, i.e. intersects directly or
/// indirectly with one of the input ranges.
bool Affected;

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "UnwrappedLineFormatter.h"
#include "FormatToken.h"
#include "NamespaceEndCommentsFixer.h"
#include "WhitespaceManager.h"
#include "llvm/Support/Debug.h"
@ -918,9 +919,22 @@ private:
static void markFinalized(FormatToken *Tok) {
for (; Tok; Tok = Tok->Next) {
Tok->Finalized = true;
for (AnnotatedLine *Child : Tok->Children)
markFinalized(Child->First);
if (Tok->MacroCtx && Tok->MacroCtx->Role == MR_ExpandedArg) {
// In the first pass we format all macro arguments in the expanded token
// stream. Instead of finalizing the macro arguments, we mark that they
// will be modified as unexpanded arguments (as part of the macro call
// formatting) in the next pass.
Tok->MacroCtx->Role = MR_UnexpandedArg;
// Reset whether spaces are required before this token, as that is context
// dependent, and that context may change when formatting the macro call.
// For example, given M(x) -> 2 * x, and the macro call M(var),
// the token 'var' will have SpacesRequiredBefore = 1 after being
// formatted as part of the expanded macro, but SpacesRequiredBefore = 0
// for its position within the macro call.
Tok->SpacesRequiredBefore = 0;
} else {
Tok->Finalized = true;
}
}
}
@ -975,15 +989,15 @@ protected:
bool formatChildren(LineState &State, bool NewLine, bool DryRun,
unsigned &Penalty) {
const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
bool HasLBrace = LBrace && LBrace->is(tok::l_brace) && LBrace->is(BK_Block);
FormatToken &Previous = *State.NextToken->Previous;
if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->isNot(BK_Block) ||
Previous.Children.size() == 0) {
if (Previous.Children.size() == 0 || (!HasLBrace && !LBrace->MacroParent)) {
// The previous token does not open a block. Nothing to do. We don't
// assert so that we can simply call this function for all tokens.
return true;
}
if (NewLine) {
if (NewLine || Previous.MacroParent) {
const ParenState &P = State.Stack.back();
int AdditionalIndent =
@ -1349,11 +1363,12 @@ unsigned UnwrappedLineFormatter::format(
NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
bool FitsIntoOneLine =
TheLine.Last->TotalLength + Indent <= ColumnLimit ||
(TheLine.Type == LT_ImportStatement &&
(!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
(Style.isCSharp() &&
TheLine.InPPDirective); // don't split #regions in C#
!TheLine.ContainsMacroCall &&
(TheLine.Last->TotalLength + Indent <= ColumnLimit ||
(TheLine.Type == LT_ImportStatement &&
(!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
(Style.isCSharp() &&
TheLine.InPPDirective)); // don't split #regions in C#
if (Style.ColumnLimit == 0) {
NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
.formatLine(TheLine, NextStartColumn + Indent,

View File

@ -14,11 +14,15 @@
#include "UnwrappedLineParser.h"
#include "FormatToken.h"
#include "FormatTokenLexer.h"
#include "FormatTokenSource.h"
#include "Macros.h"
#include "TokenAnnotator.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@ -143,11 +147,12 @@ private:
unsigned OldLineLevel;
};
UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
const AdditionalKeywords &Keywords,
unsigned FirstStartColumn,
ArrayRef<FormatToken *> Tokens,
UnwrappedLineConsumer &Callback)
UnwrappedLineParser::UnwrappedLineParser(
SourceManager &SourceMgr, const FormatStyle &Style,
const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
IdentifierTable &IdentTable)
: Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
CurrentLines(&Lines), Style(Style), Keywords(Keywords),
CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
@ -155,7 +160,8 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
? IG_Rejected
: IG_Inited),
IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
void UnwrappedLineParser::reset() {
PPBranchLevel = -1;
@ -173,6 +179,15 @@ void UnwrappedLineParser::reset() {
NestedTooDeep.clear();
PPStack.clear();
Line->FirstStartColumn = FirstStartColumn;
if (!Unexpanded.empty())
for (FormatToken *Token : AllTokens)
Token->MacroCtx.reset();
CurrentExpandedLines.clear();
ExpandedLines.clear();
Unexpanded.clear();
InExpansion = false;
Reconstruct.reset();
}
void UnwrappedLineParser::parse() {
@ -196,12 +211,36 @@ void UnwrappedLineParser::parse() {
}
// Create line with eof token.
assert(FormatTok->is(tok::eof));
pushToken(FormatTok);
addUnwrappedLine();
for (const UnwrappedLine &Line : Lines)
Callback.consumeUnwrappedLine(Line);
// In a first run, format everything with the lines containing macro calls
// replaced by the expansion.
if (!ExpandedLines.empty()) {
LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
for (const auto &Line : Lines) {
if (!Line.Tokens.empty()) {
auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
if (it != ExpandedLines.end()) {
for (const auto &Expanded : it->second) {
LLVM_DEBUG(printDebugInfo(Expanded));
Callback.consumeUnwrappedLine(Expanded);
}
continue;
}
}
LLVM_DEBUG(printDebugInfo(Line));
Callback.consumeUnwrappedLine(Line);
}
Callback.finishRun();
}
LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
for (const UnwrappedLine &Line : Lines) {
LLVM_DEBUG(printDebugInfo(Line));
Callback.consumeUnwrappedLine(Line);
}
Callback.finishRun();
Lines.clear();
while (!PPLevelBranchIndex.empty() &&
@ -724,7 +763,7 @@ FormatToken *UnwrappedLineParser::parseBlock(
parseParens();
size_t NbPreprocessorDirectives =
CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
!parsingPPDirective() ? PreprocessorDirectives.size() : 0;
addUnwrappedLine();
size_t OpeningLineIndex =
CurrentLines->empty()
@ -4152,12 +4191,25 @@ void UnwrappedLineParser::parseVerilogCaseLabel() {
Line->Level = OrigLevel;
}
bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
for (const auto &N : Line.Tokens) {
if (N.Tok->MacroCtx)
return true;
for (const UnwrappedLine &Child : N.Children)
if (containsExpansion(Child))
return true;
}
return false;
}
void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
if (Line->Tokens.empty())
return;
LLVM_DEBUG({
if (CurrentLines == &Lines)
if (!parsingPPDirective()) {
llvm::dbgs() << "Adding unwrapped line:\n";
printDebugInfo(*Line);
}
});
// If this line closes a block when in Whitesmiths mode, remember that
@ -4168,7 +4220,39 @@ void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
CurrentLines->push_back(std::move(*Line));
// If the current line was expanded from a macro call, we use it to
// reconstruct an unwrapped line from the structure of the expanded unwrapped
// line and the unexpanded token stream.
if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
if (!Reconstruct)
Reconstruct.emplace(Line->Level, Unexpanded);
Reconstruct->addLine(*Line);
// While the reconstructed unexpanded lines are stored in the normal
// flow of lines, the expanded lines are stored on the side to be analyzed
// in an extra step.
CurrentExpandedLines.push_back(std::move(*Line));
if (Reconstruct->finished()) {
UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
assert(!Reconstructed.Tokens.empty() &&
"Reconstructed must at least contain the macro identifier.");
assert(!parsingPPDirective());
LLVM_DEBUG({
llvm::dbgs() << "Adding unexpanded line:\n";
printDebugInfo(Reconstructed);
});
ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
Lines.push_back(std::move(Reconstructed));
CurrentExpandedLines.clear();
Reconstruct.reset();
}
} else {
// At the top level we only get here when no unexpansion is going on, or
// when conditional formatting led to unfinished macro reconstructions.
assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
CurrentLines->push_back(std::move(*Line));
}
Line->Tokens.clear();
Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
Line->FirstStartColumn = 0;
@ -4176,7 +4260,7 @@ void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
--Line->Level;
if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
CurrentLines->append(
std::make_move_iterator(PreprocessorDirectives.begin()),
std::make_move_iterator(PreprocessorDirectives.end()));
@ -4470,6 +4554,87 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
continue;
}
if (FormatTok->is(tok::identifier) &&
Macros.defined(FormatTok->TokenText) &&
// FIXME: Allow expanding macros in preprocessor directives.
!Line->InPPDirective) {
FormatToken *ID = FormatTok;
unsigned Position = Tokens->getPosition();
// To correctly parse the code, we need to replace the tokens of the macro
// call with its expansion.
auto PreCall = std::move(Line);
Line.reset(new UnwrappedLine);
bool OldInExpansion = InExpansion;
InExpansion = true;
// We parse the macro call into a new line.
auto Args = parseMacroCall();
InExpansion = OldInExpansion;
assert(Line->Tokens.front().Tok == ID);
// And remember the unexpanded macro call tokens.
auto UnexpandedLine = std::move(Line);
// Reset to the old line.
Line = std::move(PreCall);
LLVM_DEBUG({
llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
if (Args) {
llvm::dbgs() << "(";
for (const auto &Arg : Args.value())
for (const auto &T : Arg)
llvm::dbgs() << T->TokenText << " ";
llvm::dbgs() << ")";
}
llvm::dbgs() << "\n";
});
if (Macros.objectLike(ID->TokenText) && Args &&
!Macros.hasArity(ID->TokenText, Args->size())) {
// The macro is either
// - object-like, but we got argumnets, or
// - overloaded to be both object-like and function-like, but none of
// the function-like arities match the number of arguments.
// Thus, expand as object-like macro.
LLVM_DEBUG(llvm::dbgs()
<< "Macro \"" << ID->TokenText
<< "\" not overloaded for arity " << Args->size()
<< "or not function-like, using object-like overload.");
Args.reset();
UnexpandedLine->Tokens.resize(1);
Tokens->setPosition(Position);
nextToken();
assert(!Args && Macros.objectLike(ID->TokenText));
}
if ((!Args && Macros.objectLike(ID->TokenText)) ||
(Args && Macros.hasArity(ID->TokenText, Args->size()))) {
// Next, we insert the expanded tokens in the token stream at the
// current position, and continue parsing.
Unexpanded[ID] = std::move(UnexpandedLine);
SmallVector<FormatToken *, 8> Expansion =
Macros.expand(ID, std::move(Args));
if (!Expansion.empty())
FormatTok = Tokens->insertTokens(Expansion);
LLVM_DEBUG({
llvm::dbgs() << "Expanded: ";
for (const auto &T : Expansion)
llvm::dbgs() << T->TokenText << " ";
llvm::dbgs() << "\n";
});
} else {
LLVM_DEBUG({
llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
<< "\", because it was used ";
if (Args)
llvm::dbgs() << "with " << Args->size();
else
llvm::dbgs() << "without";
llvm::dbgs() << " arguments, which doesn't match any definition.\n";
});
Tokens->setPosition(Position);
FormatTok = ID;
}
}
if (!FormatTok->is(tok::comment)) {
distributeComments(Comments, FormatTok);
Comments.clear();
@ -4483,6 +4648,71 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
Comments.clear();
}
namespace {
template <typename Iterator>
void pushTokens(Iterator Begin, Iterator End,
llvm::SmallVectorImpl<FormatToken *> &Into) {
for (auto I = Begin; I != End; ++I) {
Into.push_back(I->Tok);
for (const auto &Child : I->Children)
pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
}
}
} // namespace
std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
UnwrappedLineParser::parseMacroCall() {
std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
assert(Line->Tokens.empty());
nextToken();
if (!FormatTok->is(tok::l_paren))
return Args;
unsigned Position = Tokens->getPosition();
FormatToken *Tok = FormatTok;
nextToken();
Args.emplace();
auto ArgStart = std::prev(Line->Tokens.end());
int Parens = 0;
do {
switch (FormatTok->Tok.getKind()) {
case tok::l_paren:
++Parens;
nextToken();
break;
case tok::r_paren: {
if (Parens > 0) {
--Parens;
nextToken();
break;
}
Args->push_back({});
pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
nextToken();
return Args;
}
case tok::comma: {
if (Parens > 0) {
nextToken();
break;
}
Args->push_back({});
pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
nextToken();
ArgStart = std::prev(Line->Tokens.end());
break;
}
default:
nextToken();
break;
}
} while (!eof());
Line->Tokens.resize(1);
Tokens->setPosition(Position);
FormatTok = Tok;
return {};
}
void UnwrappedLineParser::pushToken(FormatToken *Tok) {
Line->Tokens.push_back(UnwrappedLineNode(Tok));
if (MustBreakBeforeNextToken) {

View File

@ -15,10 +15,14 @@
#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
#include "Encoding.h"
#include "FormatToken.h"
#include "Macros.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/Regex.h"
#include <list>
#include <stack>
@ -76,6 +80,19 @@ struct UnwrappedLine {
unsigned FirstStartColumn = 0;
};
/// Interface for users of the UnwrappedLineParser to receive the parsed lines.
/// Parsing a single snippet of code can lead to multiple runs, where each
/// run is a coherent view of the file.
///
/// For example, different runs are generated:
/// - for different combinations of #if blocks
/// - when macros are involved, for the expanded code and the as-written code
///
/// Some tokens will only be visible in a subset of the runs.
/// For each run, \c UnwrappedLineParser will call \c consumeUnwrappedLine
/// for each parsed unwrapped line, and then \c finishRun to indicate
/// that the set of unwrapped lines before is one coherent view of the
/// code snippet to be formatted.
class UnwrappedLineConsumer {
public:
virtual ~UnwrappedLineConsumer() {}
@ -87,10 +104,12 @@ class FormatTokenSource;
class UnwrappedLineParser {
public:
UnwrappedLineParser(const FormatStyle &Style,
UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style,
const AdditionalKeywords &Keywords,
unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens,
UnwrappedLineConsumer &Callback);
UnwrappedLineConsumer &Callback,
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
IdentifierTable &IdentTable);
void parse();
@ -193,6 +212,8 @@ private:
unsigned parseVerilogHierarchyHeader();
void parseVerilogTable();
void parseVerilogCaseLabel();
std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
parseMacroCall();
// Used by addUnwrappedLine to denote whether to keep or remove a level
// when resetting the line state.
@ -236,16 +257,49 @@ private:
bool isOnNewLine(const FormatToken &FormatTok);
// Returns whether there is a macro expansion in the line, i.e. a token that
// was expanded from a macro call.
bool containsExpansion(const UnwrappedLine &Line) const;
// Compute hash of the current preprocessor branch.
// This is used to identify the different branches, and thus track if block
// open and close in the same branch.
size_t computePPHash() const;
bool parsingPPDirective() const { return CurrentLines != &Lines; }
// FIXME: We are constantly running into bugs where Line.Level is incorrectly
// subtracted from beyond 0. Introduce a method to subtract from Line.Level
// and use that everywhere in the Parser.
std::unique_ptr<UnwrappedLine> Line;
// Lines that are created by macro expansion.
// When formatting code containing macro calls, we first format the expanded
// lines to set the token types correctly. Afterwards, we format the
// reconstructed macro calls, re-using the token types determined in the first
// step.
// ExpandedLines will be reset every time we create a new LineAndExpansion
// instance once a line containing macro calls has been parsed.
SmallVector<UnwrappedLine, 8> CurrentExpandedLines;
// Maps from the first token of a top-level UnwrappedLine that contains
// a macro call to the replacement UnwrappedLines expanded from the macro
// call.
llvm::DenseMap<FormatToken *, SmallVector<UnwrappedLine, 8>> ExpandedLines;
// Map from the macro identifier to a line containing the full unexpanded
// macro call.
llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> Unexpanded;
// For recursive macro expansions, trigger reconstruction only on the
// outermost expansion.
bool InExpansion = false;
// Set while we reconstruct a macro call.
// For reconstruction, we feed the expanded lines into the reconstructor
// until it is finished.
std::optional<MacroCallReconstructor> Reconstruct;
// Comments are sorted into unwrapped lines by whether they are in the same
// line as the previous token, or not. If not, they belong to the next token.
// Since the next token might already be in a new unwrapped line, we need to
@ -345,13 +399,17 @@ private:
// does not start at the beginning of the file.
unsigned FirstStartColumn;
MacroExpander Macros;
friend class ScopedLineState;
friend class CompoundStatementIndenter;
};
struct UnwrappedLineNode {
UnwrappedLineNode() : Tok(nullptr) {}
UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
UnwrappedLineNode(FormatToken *Tok,
llvm::ArrayRef<UnwrappedLine> Children = {})
: Tok(Tok), Children(Children.begin(), Children.end()) {}
FormatToken *Tok;
SmallVector<UnwrappedLine, 0> Children;

View File

@ -49,7 +49,7 @@ void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
unsigned Spaces,
unsigned StartOfTokenColumn,
bool IsAligned, bool InPPDirective) {
if (Tok.Finalized)
if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
return;
Tok.setDecision((Newlines > 0) ? FD_Break : FD_Continue);
Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange,
@ -60,7 +60,7 @@ void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
bool InPPDirective) {
if (Tok.Finalized)
if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
return;
Changes.push_back(Change(Tok, /*CreateReplacement=*/false,
Tok.WhitespaceRange, /*Spaces=*/0,
@ -84,7 +84,7 @@ void WhitespaceManager::replaceWhitespaceInToken(
const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
unsigned Newlines, int Spaces) {
if (Tok.Finalized)
if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
return;
SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
Changes.push_back(

View File

@ -22576,6 +22576,244 @@ TEST_F(FormatTest, MergeLessLessAtEnd) {
"aaaallvm::outs()\n <<");
}
TEST_F(FormatTest, UnexpandConfiguredMacros) {
FormatStyle Style = getLLVMStyle();
Style.Macros.push_back("CLASS=class C {");
Style.Macros.push_back("SEMI=;");
Style.Macros.push_back("STMT=f();");
Style.Macros.push_back("ID(x)=x");
Style.Macros.push_back("ID3(x, y, z)=x y z");
Style.Macros.push_back("CALL(x)=f([] { x })");
Style.Macros.push_back("ASSIGN_OR_RETURN(a, b)=a = (b)");
Style.Macros.push_back("ASSIGN_OR_RETURN(a, b, c)=a = (b); if (x) return c");
Style.Macros.push_back("MOCK_METHOD(r, n, a, s)=r n a s");
verifyFormat("ID(nested(a(b, c), d))", Style);
verifyFormat("CLASS\n"
" a *b;\n"
"};",
Style);
verifyFormat("SEMI\n"
"SEMI\n"
"SEMI",
Style);
verifyFormat("STMT\n"
"STMT\n"
"STMT",
Style);
verifyFormat("void f() { ID(a *b); }", Style);
verifyFormat(R"(ID(
{ ID(a *b); });
)",
Style);
verifyIncompleteFormat(R"(ID3({, ID(a *b),
;
});
)",
Style);
verifyFormat("ID(CALL(CALL(return a * b;)));", Style);
verifyFormat("ASSIGN_OR_RETURN(MySomewhatLongType *variable,\n"
" MySomewhatLongFunction(SomethingElse()));\n",
Style);
verifyFormat("ASSIGN_OR_RETURN(MySomewhatLongType *variable,\n"
" MySomewhatLongFunction(SomethingElse()), "
"ReturnMe());\n",
Style);
verifyFormat(R"(
#define MACRO(a, b) ID(a + b)
)",
Style);
EXPECT_EQ(R"(
int a;
int b;
int c;
int d;
int e;
int f;
ID(
namespace foo {
int a;
}
) // namespace k
)",
format(R"(
int a;
int b;
int c;
int d;
int e;
int f;
ID(namespace foo { int a; }) // namespace k
)",
Style));
verifyFormat(R"(ID(
//
({ ; }))
)",
Style);
Style.ColumnLimit = 35;
// FIXME: Arbitrary formatting of macros where the end of the logical
// line is in the middle of a macro call are not working yet.
verifyFormat(R"(ID(
void f();
void)
ID(g) ID(()) ID(
;
void g();)
)",
Style);
Style.ColumnLimit = 10;
verifyFormat("STMT\n"
"STMT\n"
"STMT",
Style);
EXPECT_EQ(R"(
ID(CALL(CALL(
a *b)));
)",
format(R"(
ID(CALL(CALL(a * b)));
)",
Style));
// FIXME: If we want to support unbalanced braces or parens from macro
// expansions we need to re-think how we propagate errors in
// TokenAnnotator::parseLine; for investigation, switching the inner loop of
// TokenAnnotator::parseLine to return LT_Other instead of LT_Invalid in case
// of !consumeToken() changes the formatting of the test below and makes it
// believe it has a fully correct formatting.
EXPECT_EQ(R"(
ID3(
{
CLASS
a *b;
};
},
ID(x *y);
,
STMT
STMT
STMT)
void f();
)",
format(R"(
ID3({CLASS a*b; };}, ID(x*y);, STMT STMT STMT)
void f();
)",
Style));
verifyFormat("ID(a(\n"
"#ifdef A\n"
" b, c\n"
"#else\n"
" d(e)\n"
"#endif\n"
" ))",
Style);
Style.ColumnLimit = 80;
verifyFormat(R"(ASSIGN_OR_RETURN(
// Comment
a b, c);
)",
Style);
Style.ColumnLimit = 30;
verifyFormat(R"(ASSIGN_OR_RETURN(
// Comment
//
a b,
xxxxxxxxxxxx(
yyyyyyyyyyyyyyyyy,
zzzzzzzzzzzzzzzzzz),
f([]() {
a();
b();
}));
)",
Style);
verifyFormat(R"(int a = []() {
ID(
x;
y;
z;)
;
}();
)",
Style);
EXPECT_EQ(
R"(ASSIGN_OR_RETURN((
====
#))
})",
format(R"(ASSIGN_OR_RETURN((
====
#))
})",
Style, SC_ExpectIncomplete));
EXPECT_EQ(R"(ASSIGN_OR_RETURN(
}
(
====
#),
a))",
format(R"(ASSIGN_OR_RETURN(
}
(
====
#),
a))",
Style, SC_ExpectIncomplete));
EXPECT_EQ(R"(ASSIGN_OR_RETURN(a
//
====
#
<))",
format(R"(ASSIGN_OR_RETURN(a
//
====
#
<))",
Style));
verifyFormat("class C {\n"
" MOCK_METHOD(R, f,\n"
" (a *b, c *d),\n"
" (override));\n"
"};",
Style);
}
TEST_F(FormatTest, KeepParensWhenExpandingObjectLikeMacros) {
FormatStyle Style = getLLVMStyle();
Style.Macros.push_back("FN=class C { int f");
verifyFormat("void f() {\n"
" FN(a *b);\n"
" };\n"
"}",
Style);
}
TEST_F(FormatTest, DoesNotExpandFunctionLikeMacrosWithoutParens) {
FormatStyle Style = getLLVMStyle();
Style.Macros.push_back("CLASS()=class C {");
verifyFormat("CLASS void f();\n"
"}\n"
";",
Style);
}
TEST_F(FormatTest, ContinueFormattingAfterUnclosedParensAfterObjectLikeMacro) {
FormatStyle Style = getLLVMStyle();
Style.Macros.push_back("O=class {");
verifyIncompleteFormat("O(auto x = [](){\n"
" f();}",
Style);
}
TEST_F(FormatTest, HandleUnbalancedImplicitBracesAcrossPPBranches) {
std::string code = "#if A\n"
"#if B\n"

View File

@ -33,13 +33,31 @@ public:
TokenList
expand(llvm::StringRef Name,
const SmallVector<llvm::SmallVector<FormatToken *, 8>, 1> &Args) {
return expandInternal(Name, Args);
}
TokenList expand(llvm::StringRef Name) { return expandInternal(Name, {}); }
TokenList expand(llvm::StringRef Name, const std::vector<std::string> &Args) {
return expandInternal(Name, lexArgs(Args));
}
const UnexpandedMap &getUnexpanded() const { return Unexpanded; }
const TokenList &getTokens() const { return Tokens; }
private:
TokenList expandInternal(
llvm::StringRef Name,
const std::optional<SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
&Args) {
auto *ID = Lex.id(Name);
auto UnexpandedLine = std::make_unique<UnwrappedLine>();
UnexpandedLine->Tokens.push_back(ID);
if (!Args.empty()) {
if (Args && !Args->empty()) {
UnexpandedLine->Tokens.push_back(Lex.id("("));
for (auto I = Args.begin(), E = Args.end(); I != E; ++I) {
if (I != Args.begin())
for (auto I = Args->begin(), E = Args->end(); I != E; ++I) {
if (I != Args->begin())
UnexpandedLine->Tokens.push_back(Lex.id(","));
UnexpandedLine->Tokens.insert(UnexpandedLine->Tokens.end(), I->begin(),
I->end());
@ -57,16 +75,6 @@ public:
return UnexpandedTokens;
}
TokenList expand(llvm::StringRef Name,
const std::vector<std::string> &Args = {}) {
return expand(Name, lexArgs(Args));
}
const UnexpandedMap &getUnexpanded() const { return Unexpanded; }
const TokenList &getTokens() const { return Tokens; }
private:
llvm::SmallVector<TokenList, 1>
lexArgs(const std::vector<std::string> &Args) {
llvm::SmallVector<TokenList, 1> Result;
@ -563,34 +571,6 @@ TEST_F(MacroCallReconstructorTest, ParentOutsideMacroCall) {
EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
}
TEST_F(MacroCallReconstructorTest, UnusedMacroArguments) {
auto Macros = createExpander({"X=x"});
Expansion Exp(Lex, *Macros);
TokenList Call = Exp.expand("X", {"a", "b", "c"});
MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
Unexp.addLine(line(Exp.getTokens()));
EXPECT_TRUE(Unexp.finished());
Matcher U(Call, Lex);
EXPECT_THAT(std::move(Unexp).takeResult(),
matchesLine(line(U.consume("X(a, b, c)"))));
}
TEST_F(MacroCallReconstructorTest, UnusedEmptyMacroArgument) {
auto Macros = createExpander({"X=x"});
Expansion Exp(Lex, *Macros);
TokenList Call = Exp.expand("X", {std::string("")});
MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
Matcher E(Exp.getTokens(), Lex);
auto Semi = tokens(";");
Unexp.addLine(line({E.consume("x"), Semi}));
EXPECT_TRUE(Unexp.finished());
Matcher U(Call, Lex);
EXPECT_THAT(std::move(Unexp).takeResult(),
matchesLine(line({U.consume("X()"), Semi})));
}
TEST_F(MacroCallReconstructorTest, ChildrenSplitAcrossArguments) {
auto Macros = createExpander({"CALL(a, b)=f([]() a b)"});
Expansion Exp(Lex, *Macros);
@ -655,7 +635,7 @@ TEST_F(MacroCallReconstructorTest, ChildrenAfterMacroCall) {
}
TEST_F(MacroCallReconstructorTest, InvalidCodeSplittingBracesAcrossArgs) {
auto Macros = createExpander({"M(a, b)=(a) (b)"});
auto Macros = createExpander({"M(a, b, c)=(a) (b) c"});
Expansion Exp(Lex, *Macros);
TokenList Call = Exp.expand("M", {std::string("{"), "x", ""});

View File

@ -19,9 +19,16 @@ public:
Lex.Allocator, Lex.IdentTable);
}
std::string expand(MacroExpander &Macros, llvm::StringRef Name) {
EXPECT_TRUE(Macros.defined(Name))
<< "Macro not defined: \"" << Name << "\"";
return text(Macros.expand(Lex.id(Name), {}));
}
std::string expand(MacroExpander &Macros, llvm::StringRef Name,
const std::vector<std::string> &Args = {}) {
EXPECT_TRUE(Macros.defined(Name));
const std::vector<std::string> &Args) {
EXPECT_TRUE(Macros.defined(Name))
<< "Macro not defined: \"" << Name << "\"";
return text(Macros.expand(Lex.id(Name), lexArgs(Args)));
}
@ -95,7 +102,7 @@ TEST_F(MacroExpanderTest, ExpandsWithoutArguments) {
EXPECT_EQ("", expand(*Macros, "A"));
EXPECT_EQ("b", expand(*Macros, "B"));
EXPECT_EQ("c+c", expand(*Macros, "C"));
EXPECT_EQ("", expand(*Macros, "D"));
EXPECT_EQ("", expand(*Macros, "D", {}));
}
TEST_F(MacroExpanderTest, ExpandsWithArguments) {
@ -105,7 +112,6 @@ TEST_F(MacroExpanderTest, ExpandsWithArguments) {
});
EXPECT_EQ("", expand(*Macros, "A", {"a"}));
EXPECT_EQ("b1+b2+b3", expand(*Macros, "B", {"b1", "b2 + b3"}));
EXPECT_EQ("x+", expand(*Macros, "B", {"x"}));
}
TEST_F(MacroExpanderTest, AttributizesTokens) {
@ -200,6 +206,14 @@ TEST_F(MacroExpanderTest, UnderstandsCppTokens) {
EXPECT_ATTRIBUTES(Result, Attributes);
}
TEST_F(MacroExpanderTest, Overloads) {
auto Macros = create({"A=x", "A()=y", "A(a)=a", "A(a, b)=a b"});
EXPECT_EQ("x", expand(*Macros, "A"));
EXPECT_EQ("y", expand(*Macros, "A", {}));
EXPECT_EQ("z", expand(*Macros, "A", {"z"}));
EXPECT_EQ("xy", expand(*Macros, "A", {"x", "y"}));
}
} // namespace
} // namespace format
} // namespace clang

View File

@ -72,7 +72,8 @@ public:
TokenList annotate(llvm::StringRef Code) {
FormatTokenLexer Lex = getNewLexer(Code);
auto Tokens = Lex.lex();
UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this);
UnwrappedLineParser Parser(SourceMgr.get(), Style, Lex.getKeywords(), 0,
Tokens, *this, Allocator, IdentTable);
Parser.parse();
TokenAnnotator Annotator(Style, Lex.getKeywords());
for (auto &Line : UnwrappedLines) {