//===--- Format.cpp -----------------------------------------*- C++-*------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Format.h" #include "support/Logger.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" #include "clang/Lex/Lexer.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/Support/Unicode.h" namespace clang { namespace clangd { namespace { /// Append closing brackets )]} to \p Code to make it well-formed. /// Clang-format conservatively refuses to format files with unmatched brackets /// as it isn't sure where the errors are and so can't correct. /// When editing, it's reasonable to assume code before the cursor is complete. void closeBrackets(std::string &Code, const format::FormatStyle &Style) { SourceManagerForFile FileSM("dummy.cpp", Code); auto &SM = FileSM.get(); FileID FID = SM.getMainFileID(); Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style)); Token Tok; std::vector Brackets; while (!Lex.LexFromRawLexer(Tok)) { switch(Tok.getKind()) { case tok::l_paren: Brackets.push_back(')'); break; case tok::l_brace: Brackets.push_back('}'); break; case tok::l_square: Brackets.push_back(']'); break; case tok::r_paren: if (!Brackets.empty() && Brackets.back() == ')') Brackets.pop_back(); break; case tok::r_brace: if (!Brackets.empty() && Brackets.back() == '}') Brackets.pop_back(); break; case tok::r_square: if (!Brackets.empty() && Brackets.back() == ']') Brackets.pop_back(); break; default: continue; } } // Attempt to end any open comments first. Code.append("\n// */\n"); Code.append(Brackets.rbegin(), Brackets.rend()); } static StringRef commentMarker(llvm::StringRef Line) { for (StringRef Marker : {"///", "//"}){ auto I = Line.rfind(Marker); if (I != StringRef::npos) return Line.substr(I, Marker.size()); } return ""; } llvm::StringRef firstLine(llvm::StringRef Code) { return Code.take_until([](char C) { return C == '\n'; }); } llvm::StringRef lastLine(llvm::StringRef Code) { llvm::StringRef Rest = Code; while (!Rest.empty() && Rest.back() != '\n') Rest = Rest.drop_back(); return Code.substr(Rest.size()); } // Filename is needed for tooling::Replacement and some overloads of reformat(). // Its value should not affect the outcome. We use the default from reformat(). llvm::StringRef Filename = ""; // tooling::Replacement from overlapping StringRefs: From must be part of Code. tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From, llvm::StringRef To) { assert(From.begin() >= Code.begin() && From.end() <= Code.end()); // The filename is required but ignored. return tooling::Replacement(Filename, From.data() - Code.data(), From.size(), To); } // High-level representation of incremental formatting changes. // The changes are made in two steps. // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding // comment markers when splitting a line comment with a newline). // 2) a selective clang-format run: // - the "source code" passed to clang format is the code up to the cursor, // a placeholder for the cursor, and some closing brackets // - the formatting is restricted to the cursor and (possibly) other ranges // (e.g. the old line when inserting a newline). // - changes before the cursor are applied, those after are discarded. struct IncrementalChanges { // Changes that should be applied before running clang-format. tooling::Replacements Changes; // Ranges of the original source code that should be clang-formatted. // The CursorProxyText will also be formatted. std::vector FormatRanges; // The source code that should stand in for the cursor when clang-formatting. // e.g. after inserting a newline, a line-comment at the cursor is used to // ensure that the newline is preserved. std::string CursorPlaceholder; }; // After a newline: // - we continue any line-comment that was split // - we format the old line in addition to the cursor // - we represent the cursor with a line comment to preserve the newline IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code, unsigned Cursor) { IncrementalChanges Result; // Before newline, code looked like: // leading^trailing // After newline, code looks like: // leading // indentation^trailing // Where indentation was added by the editor. StringRef Trailing = firstLine(Code.substr(Cursor)); StringRef Indentation = lastLine(Code.take_front(Cursor)); if (Indentation.data() == Code.data()) { vlog("Typed a newline, but we're still on the first line!"); return Result; } StringRef Leading = lastLine(Code.take_front(Indentation.data() - Code.data() - 1)); StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1)); // Strip leading whitespace on trailing line. StringRef TrailingTrim = Trailing.ltrim(); if (unsigned TrailWS = Trailing.size() - TrailingTrim.size()) cantFail(Result.Changes.add( replacement(Code, StringRef(Trailing.begin(), TrailWS), ""))); // If we split a comment, replace indentation with a comment marker. // If the editor made the new line a comment, also respect that. StringRef CommentMarker = commentMarker(Leading); bool NewLineIsComment = !commentMarker(Indentation).empty(); if (!CommentMarker.empty() && (NewLineIsComment || !commentMarker(NextLine).empty() || (!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) { using llvm::sys::unicode::columnWidthUTF8; // We indent the new comment to match the previous one. StringRef PreComment = Leading.take_front(CommentMarker.data() - Leading.data()); std::string IndentAndComment = (std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ") .str(); cantFail( Result.Changes.add(replacement(Code, Indentation, IndentAndComment))); } else { // Remove any indentation and let clang-format re-add it. // This prevents the cursor marker dragging e.g. an aligned comment with it. cantFail(Result.Changes.add(replacement(Code, Indentation, ""))); } // If we put a the newline inside a {} pair, put } on its own line... if (CommentMarker.empty() && Leading.endswith("{") && Trailing.startswith("}")) { cantFail( Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}"))); // ...and format it. Result.FormatRanges.push_back( tooling::Range(Trailing.data() - Code.data() + 1, 1)); } // Format the whole leading line. Result.FormatRanges.push_back( tooling::Range(Leading.data() - Code.data(), Leading.size())); // We use a comment to represent the cursor, to preserve the newline. // A trailing identifier improves parsing of e.g. for without braces. // Exception: if the previous line has a trailing comment, we can't use one // as the cursor (they will be aligned). But in this case we don't need to. Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident"; return Result; } IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor, llvm::StringRef InsertedText) { IncrementalChanges Result; if (InsertedText == "\n") return getIncrementalChangesAfterNewline(Code, Cursor); Result.CursorPlaceholder = " /**/"; return Result; } // Returns equivalent replacements that preserve the correspondence between // OldCursor and NewCursor. If OldCursor lies in a replaced region, that // replacement will be split. std::vector split(const tooling::Replacements &Replacements, unsigned OldCursor, unsigned NewCursor) { std::vector Result; int LengthChange = 0; for (const tooling::Replacement &R : Replacements) { if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor Result.push_back(R); LengthChange += R.getReplacementText().size() - R.getLength(); } else if (R.getOffset() < OldCursor) { // overlaps cursor int ReplacementSplit = NewCursor - LengthChange - R.getOffset(); assert(ReplacementSplit >= 0 && ReplacementSplit <= int(R.getReplacementText().size()) && "NewCursor incompatible with OldCursor!"); Result.push_back(tooling::Replacement( R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(), R.getReplacementText().take_front(ReplacementSplit))); Result.push_back(tooling::Replacement( R.getFilePath(), OldCursor, R.getLength() - (OldCursor - R.getOffset()), R.getReplacementText().drop_front(ReplacementSplit))); } else if (R.getOffset() >= OldCursor) { // after cursor Result.push_back(R); } } return Result; } } // namespace // We're simulating the following sequence of changes: // - apply the pre-formatting edits (see getIncrementalChanges) // - insert a placeholder for the cursor // - format some of the resulting code // - remove the cursor placeholder again // The replacements we return are produced by composing these. // // The text we actually pass to clang-format is slightly different from this, // e.g. we have to close brackets. We ensure these differences are *after* // all the regions we want to format, and discard changes in them. std::vector formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor, llvm::StringRef InsertedText, format::FormatStyle Style) { IncrementalChanges Incremental = getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText); // Never *remove* lines in response to pressing enter! This annoys users. if (InsertedText == "\n") { Style.MaxEmptyLinesToKeep = 1000; Style.KeepEmptyLinesAtTheStartOfBlocks = true; } // Compute the code we want to format: // 1) Start with code after the pre-formatting edits. std::string CodeToFormat = cantFail( tooling::applyAllReplacements(OriginalCode, Incremental.Changes)); unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor); // 2) Truncate code after the last interesting range. unsigned FormatLimit = Cursor; for (tooling::Range &R : Incremental.FormatRanges) FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength()); CodeToFormat.resize(FormatLimit); // 3) Insert a placeholder for the cursor. CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder); // 4) Append brackets after FormatLimit so the code is well-formed. closeBrackets(CodeToFormat, Style); // Determine the ranges to format: std::vector RangesToFormat = Incremental.FormatRanges; // Ranges after the cursor need to be adjusted for the placeholder. for (auto &R : RangesToFormat) { if (R.getOffset() > Cursor) R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(), R.getLength()); } // We also format the cursor. RangesToFormat.push_back( tooling::Range(Cursor, Incremental.CursorPlaceholder.size())); // Also update FormatLimit for the placeholder, we'll use this later. FormatLimit += Incremental.CursorPlaceholder.size(); // Run clang-format, and truncate changes at FormatLimit. tooling::Replacements FormattingChanges; format::FormattingAttemptStatus Status; for (const tooling::Replacement &R : format::reformat( Style, CodeToFormat, RangesToFormat, Filename, &Status)) { if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit. cantFail(FormattingChanges.add(R)); else if(R.getOffset() < FormatLimit) { // Overlaps limit. if (R.getReplacementText().empty()) // Deletions are easy to handle. cantFail(FormattingChanges.add(tooling::Replacement(Filename, R.getOffset(), FormatLimit - R.getOffset(), ""))); else // Hopefully won't happen in practice? elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}", Cursor, CodeToFormat); } } if (!Status.FormatComplete) vlog("Incremental format incomplete at line {0}", Status.Line); // Now we are ready to compose the changes relative to OriginalCode. // edits -> insert placeholder -> format -> remove placeholder. // We must express insert/remove as Replacements. tooling::Replacements InsertCursorPlaceholder( tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder)); unsigned FormattedCursorStart = FormattingChanges.getShiftedCodePosition(Cursor), FormattedCursorEnd = FormattingChanges.getShiftedCodePosition( Cursor + Incremental.CursorPlaceholder.size()); tooling::Replacements RemoveCursorPlaceholder( tooling::Replacement(Filename, FormattedCursorStart, FormattedCursorEnd - FormattedCursorStart, "")); // We can't simply merge() and return: tooling::Replacements will combine // adjacent edits left and right of the cursor. This gives the right source // code, but loses information about where the cursor is! // Fortunately, none of the individual passes lose information, so: // - we use merge() to compute the final Replacements // - we chain getShiftedCodePosition() to compute final cursor position // - we split the final Replacements at the cursor position, so that // each Replacement lies either before or after the cursor. tooling::Replacements Final; unsigned FinalCursor = OriginalCursor; #ifndef NDEBUG std::string FinalCode = std::string(OriginalCode); dlog("Initial code: {0}", FinalCode); #endif for (auto Pass : std::vector>{ {"Pre-formatting changes", &Incremental.Changes}, {"Insert placeholder", &InsertCursorPlaceholder}, {"clang-format", &FormattingChanges}, {"Remove placeholder", &RemoveCursorPlaceholder}}) { Final = Final.merge(*Pass.second); FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor); #ifndef NDEBUG FinalCode = cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second)); dlog("After {0}:\n{1}^{2}", Pass.first, StringRef(FinalCode).take_front(FinalCursor), StringRef(FinalCode).drop_front(FinalCursor)); #endif } return split(Final, OriginalCursor, FinalCursor); } unsigned transformCursorPosition(unsigned Offset, const std::vector &Replacements) { unsigned OriginalOffset = Offset; for (const auto &R : Replacements) { if (R.getOffset() + R.getLength() <= OriginalOffset) { // Replacement is before cursor. Offset += R.getReplacementText().size(); Offset -= R.getLength(); } else if (R.getOffset() < OriginalOffset) { // Replacement overlaps cursor. // Preserve position within replacement text, as far as possible. unsigned PositionWithinReplacement = Offset - R.getOffset(); if (PositionWithinReplacement > R.getReplacementText().size()) { Offset += R.getReplacementText().size(); Offset -= PositionWithinReplacement; } } else { // Replacement after cursor. break; // Replacements are sorted, the rest are also after the cursor. } } return Offset; } } // namespace clangd } // namespace clang