Reid Kleckner e08464fb45 Avoid including FileManager.h from SourceManager.h
Most clients of SourceManager.h need to do things like turning source
locations into file & line number pairs, but this doesn't require
bringing in FileManager.h and LLVM's FS headers.

The main code change here is to sink SM::createFileID into the cpp file.
I reason that this is not performance critical because it doesn't happen
on the diagnostic path, it happens along the paths of macro expansion
(could be hot) and new includes (less hot).

Saves some includes:
    309 -    /usr/local/google/home/rnk/llvm-project/clang/include/clang/Basic/FileManager.h
    272 -    /usr/local/google/home/rnk/llvm-project/clang/include/clang/Basic/FileSystemOptions.h
    271 -    /usr/local/google/home/rnk/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h
    267 -    /usr/local/google/home/rnk/llvm-project/llvm/include/llvm/Support/FileSystem.h
    266 -    /usr/local/google/home/rnk/llvm-project/llvm/include/llvm/Support/Chrono.h

Differential Revision: https://reviews.llvm.org/D75406
2020-03-11 13:53:12 -07:00

379 lines
16 KiB
C++

//===--- Format.cpp -----------------------------------------*- C++-*------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Format.h"
#include "Logger.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/FileManager.h"
#include "clang/Format/Format.h"
#include "clang/Lex/Lexer.h"
#include "clang/Tooling/Core/Replacement.h"
#include "llvm/Support/Unicode.h"
namespace clang {
namespace clangd {
namespace {
/// Append closing brackets )]} to \p Code to make it well-formed.
/// Clang-format conservatively refuses to format files with unmatched brackets
/// as it isn't sure where the errors are and so can't correct.
/// When editing, it's reasonable to assume code before the cursor is complete.
void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
SourceManagerForFile FileSM("dummy.cpp", Code);
auto &SM = FileSM.get();
FileID FID = SM.getMainFileID();
Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style));
Token Tok;
std::vector<char> Brackets;
while (!Lex.LexFromRawLexer(Tok)) {
switch(Tok.getKind()) {
case tok::l_paren:
Brackets.push_back(')');
break;
case tok::l_brace:
Brackets.push_back('}');
break;
case tok::l_square:
Brackets.push_back(']');
break;
case tok::r_paren:
if (!Brackets.empty() && Brackets.back() == ')')
Brackets.pop_back();
break;
case tok::r_brace:
if (!Brackets.empty() && Brackets.back() == '}')
Brackets.pop_back();
break;
case tok::r_square:
if (!Brackets.empty() && Brackets.back() == ']')
Brackets.pop_back();
break;
default:
continue;
}
}
// Attempt to end any open comments first.
Code.append("\n// */\n");
Code.append(Brackets.rbegin(), Brackets.rend());
}
static StringRef commentMarker(llvm::StringRef Line) {
for (StringRef Marker : {"///", "//"}){
auto I = Line.rfind(Marker);
if (I != StringRef::npos)
return Line.substr(I, Marker.size());
}
return "";
}
llvm::StringRef firstLine(llvm::StringRef Code) {
return Code.take_until([](char C) { return C == '\n'; });
}
llvm::StringRef lastLine(llvm::StringRef Code) {
llvm::StringRef Rest = Code;
while (!Rest.empty() && Rest.back() != '\n')
Rest = Rest.drop_back();
return Code.substr(Rest.size());
}
// Filename is needed for tooling::Replacement and some overloads of reformat().
// Its value should not affect the outcome. We use the default from reformat().
llvm::StringRef Filename = "<stdin>";
// tooling::Replacement from overlapping StringRefs: From must be part of Code.
tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
llvm::StringRef To) {
assert(From.begin() >= Code.begin() && From.end() <= Code.end());
// The filename is required but ignored.
return tooling::Replacement(Filename, From.data() - Code.data(),
From.size(), To);
}
// High-level representation of incremental formatting changes.
// The changes are made in two steps.
// 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
// comment markers when splitting a line comment with a newline).
// 2) a selective clang-format run:
// - the "source code" passed to clang format is the code up to the cursor,
// a placeholder for the cursor, and some closing brackets
// - the formatting is restricted to the cursor and (possibly) other ranges
// (e.g. the old line when inserting a newline).
// - changes before the cursor are applied, those after are discarded.
struct IncrementalChanges {
// Changes that should be applied before running clang-format.
tooling::Replacements Changes;
// Ranges of the original source code that should be clang-formatted.
// The CursorProxyText will also be formatted.
std::vector<tooling::Range> FormatRanges;
// The source code that should stand in for the cursor when clang-formatting.
// e.g. after inserting a newline, a line-comment at the cursor is used to
// ensure that the newline is preserved.
std::string CursorPlaceholder;
};
// After a newline:
// - we continue any line-comment that was split
// - we format the old line in addition to the cursor
// - we represent the cursor with a line comment to preserve the newline
IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
unsigned Cursor) {
IncrementalChanges Result;
// Before newline, code looked like:
// leading^trailing
// After newline, code looks like:
// leading
// indentation^trailing
// Where indentation was added by the editor.
StringRef Trailing = firstLine(Code.substr(Cursor));
StringRef Indentation = lastLine(Code.take_front(Cursor));
if (Indentation.data() == Code.data()) {
vlog("Typed a newline, but we're still on the first line!");
return Result;
}
StringRef Leading =
lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
// Strip leading whitespace on trailing line.
StringRef TrailingTrim = Trailing.ltrim();
if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
cantFail(Result.Changes.add(
replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
// If we split a comment, replace indentation with a comment marker.
// If the editor made the new line a comment, also respect that.
StringRef CommentMarker = commentMarker(Leading);
bool NewLineIsComment = !commentMarker(Indentation).empty();
if (!CommentMarker.empty() &&
(NewLineIsComment || !commentMarker(NextLine).empty() ||
(!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) {
using llvm::sys::unicode::columnWidthUTF8;
// We indent the new comment to match the previous one.
StringRef PreComment =
Leading.take_front(CommentMarker.data() - Leading.data());
std::string IndentAndComment =
(std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ")
.str();
cantFail(
Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
} else {
// Remove any indentation and let clang-format re-add it.
// This prevents the cursor marker dragging e.g. an aligned comment with it.
cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
}
// If we put a the newline inside a {} pair, put } on its own line...
if (CommentMarker.empty() && Leading.endswith("{") &&
Trailing.startswith("}")) {
cantFail(
Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
// ...and format it.
Result.FormatRanges.push_back(
tooling::Range(Trailing.data() - Code.data() + 1, 1));
}
// Format the whole leading line.
Result.FormatRanges.push_back(
tooling::Range(Leading.data() - Code.data(), Leading.size()));
// We use a comment to represent the cursor, to preserve the newline.
// A trailing identifier improves parsing of e.g. for without braces.
// Exception: if the previous line has a trailing comment, we can't use one
// as the cursor (they will be aligned). But in this case we don't need to.
Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
return Result;
}
IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
llvm::StringRef InsertedText) {
IncrementalChanges Result;
if (InsertedText == "\n")
return getIncrementalChangesAfterNewline(Code, Cursor);
Result.CursorPlaceholder = " /**/";
return Result;
}
// Returns equivalent replacements that preserve the correspondence between
// OldCursor and NewCursor. If OldCursor lies in a replaced region, that
// replacement will be split.
std::vector<tooling::Replacement>
split(const tooling::Replacements &Replacements, unsigned OldCursor,
unsigned NewCursor) {
std::vector<tooling::Replacement> Result;
int LengthChange = 0;
for (const tooling::Replacement &R : Replacements) {
if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
Result.push_back(R);
LengthChange += R.getReplacementText().size() - R.getLength();
} else if (R.getOffset() < OldCursor) { // overlaps cursor
int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
assert(ReplacementSplit >= 0 &&
ReplacementSplit <= int(R.getReplacementText().size()) &&
"NewCursor incompatible with OldCursor!");
Result.push_back(tooling::Replacement(
R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
R.getReplacementText().take_front(ReplacementSplit)));
Result.push_back(tooling::Replacement(
R.getFilePath(), OldCursor,
R.getLength() - (OldCursor - R.getOffset()),
R.getReplacementText().drop_front(ReplacementSplit)));
} else if (R.getOffset() >= OldCursor) { // after cursor
Result.push_back(R);
}
}
return Result;
}
} // namespace
// We're simulating the following sequence of changes:
// - apply the pre-formatting edits (see getIncrementalChanges)
// - insert a placeholder for the cursor
// - format some of the resulting code
// - remove the cursor placeholder again
// The replacements we return are produced by composing these.
//
// The text we actually pass to clang-format is slightly different from this,
// e.g. we have to close brackets. We ensure these differences are *after*
// all the regions we want to format, and discard changes in them.
std::vector<tooling::Replacement>
formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
llvm::StringRef InsertedText, format::FormatStyle Style) {
IncrementalChanges Incremental =
getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText);
// Never *remove* lines in response to pressing enter! This annoys users.
if (InsertedText == "\n") {
Style.MaxEmptyLinesToKeep = 1000;
Style.KeepEmptyLinesAtTheStartOfBlocks = true;
}
// Compute the code we want to format:
// 1) Start with code after the pre-formatting edits.
std::string CodeToFormat = cantFail(
tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
// 2) Truncate code after the last interesting range.
unsigned FormatLimit = Cursor;
for (tooling::Range &R : Incremental.FormatRanges)
FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
CodeToFormat.resize(FormatLimit);
// 3) Insert a placeholder for the cursor.
CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
// 4) Append brackets after FormatLimit so the code is well-formed.
closeBrackets(CodeToFormat, Style);
// Determine the ranges to format:
std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
// Ranges after the cursor need to be adjusted for the placeholder.
for (auto &R : RangesToFormat) {
if (R.getOffset() > Cursor)
R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
R.getLength());
}
// We also format the cursor.
RangesToFormat.push_back(
tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
// Also update FormatLimit for the placeholder, we'll use this later.
FormatLimit += Incremental.CursorPlaceholder.size();
// Run clang-format, and truncate changes at FormatLimit.
tooling::Replacements FormattingChanges;
format::FormattingAttemptStatus Status;
for (const tooling::Replacement &R : format::reformat(
Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
cantFail(FormattingChanges.add(R));
else if(R.getOffset() < FormatLimit) { // Overlaps limit.
if (R.getReplacementText().empty()) // Deletions are easy to handle.
cantFail(FormattingChanges.add(tooling::Replacement(Filename,
R.getOffset(), FormatLimit - R.getOffset(), "")));
else
// Hopefully won't happen in practice?
elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
Cursor, CodeToFormat);
}
}
if (!Status.FormatComplete)
vlog("Incremental format incomplete at line {0}", Status.Line);
// Now we are ready to compose the changes relative to OriginalCode.
// edits -> insert placeholder -> format -> remove placeholder.
// We must express insert/remove as Replacements.
tooling::Replacements InsertCursorPlaceholder(
tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
unsigned FormattedCursorStart =
FormattingChanges.getShiftedCodePosition(Cursor),
FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
Cursor + Incremental.CursorPlaceholder.size());
tooling::Replacements RemoveCursorPlaceholder(
tooling::Replacement(Filename, FormattedCursorStart,
FormattedCursorEnd - FormattedCursorStart, ""));
// We can't simply merge() and return: tooling::Replacements will combine
// adjacent edits left and right of the cursor. This gives the right source
// code, but loses information about where the cursor is!
// Fortunately, none of the individual passes lose information, so:
// - we use merge() to compute the final Replacements
// - we chain getShiftedCodePosition() to compute final cursor position
// - we split the final Replacements at the cursor position, so that
// each Replacement lies either before or after the cursor.
tooling::Replacements Final;
unsigned FinalCursor = OriginalCursor;
#ifndef NDEBUG
std::string FinalCode = std::string(OriginalCode);
dlog("Initial code: {0}", FinalCode);
#endif
for (auto Pass :
std::vector<std::pair<const char *, const tooling::Replacements *>>{
{"Pre-formatting changes", &Incremental.Changes},
{"Insert placeholder", &InsertCursorPlaceholder},
{"clang-format", &FormattingChanges},
{"Remove placeholder", &RemoveCursorPlaceholder}}) {
Final = Final.merge(*Pass.second);
FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
#ifndef NDEBUG
FinalCode =
cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
dlog("After {0}:\n{1}^{2}", Pass.first,
StringRef(FinalCode).take_front(FinalCursor),
StringRef(FinalCode).drop_front(FinalCursor));
#endif
}
return split(Final, OriginalCursor, FinalCursor);
}
unsigned
transformCursorPosition(unsigned Offset,
const std::vector<tooling::Replacement> &Replacements) {
unsigned OriginalOffset = Offset;
for (const auto &R : Replacements) {
if (R.getOffset() + R.getLength() <= OriginalOffset) {
// Replacement is before cursor.
Offset += R.getReplacementText().size();
Offset -= R.getLength();
} else if (R.getOffset() < OriginalOffset) {
// Replacement overlaps cursor.
// Preserve position within replacement text, as far as possible.
unsigned PositionWithinReplacement = Offset - R.getOffset();
if (PositionWithinReplacement > R.getReplacementText().size()) {
Offset += R.getReplacementText().size();
Offset -= PositionWithinReplacement;
}
} else {
// Replacement after cursor.
break; // Replacements are sorted, the rest are also after the cursor.
}
}
return Offset;
}
} // namespace clangd
} // namespace clang