From: Krasimir Georgiev Date: Mon, 30 Oct 2017 14:01:50 +0000 (+0000) Subject: [clang-format] Format raw string literals X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bd3b05d2cc22423f7eb44ff6bf948b44cc605309;p=clang [clang-format] Format raw string literals Summary: This patch adds raw string literal formatting. Reviewers: djasper, klimek Reviewed By: klimek Subscribers: klimek, mgorny Differential Revision: https://reviews.llvm.org/D35943 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@316903 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h index 302ced3f78..a0e42f4e30 100644 --- a/include/clang/Format/Format.h +++ b/include/clang/Format/Format.h @@ -1327,6 +1327,41 @@ struct FormatStyle { /// \brief Pointer and reference alignment style. PointerAlignmentStyle PointerAlignment; + /// See documentation of ``RawStringFormats``. + struct RawStringFormat { + /// \brief The delimiter that this raw string format matches. + std::string Delimiter; + /// \brief The language of this raw string. + LanguageKind Language; + /// \brief The style name on which this raw string format is based on. + /// If not specified, the raw string format is based on the style that this + /// format is based on. + std::string BasedOnStyle; + bool operator==(const RawStringFormat &Other) const { + return Delimiter == Other.Delimiter && Language == Other.Language && + BasedOnStyle == Other.BasedOnStyle; + } + }; + + /// \brief Raw string delimiters denoting that the raw string contents are + /// code in a particular language and can be reformatted. + /// + /// A raw string with a matching delimiter will be reformatted assuming the + /// specified language based on a predefined style given by 'BasedOnStyle'. + /// If 'BasedOnStyle' is not found, the formatting is based on llvm style. + /// + /// To configure this in the .clang-format file, use: + /// \code{.yaml} + /// RawStringFormats: + /// - Delimiter: 'pb' + /// Language: TextProto + /// BasedOnStyle: llvm + /// - Delimiter: 'proto' + /// Language: TextProto + /// BasedOnStyle: google + /// \endcode + std::vector RawStringFormats; + /// \brief If ``true``, clang-format will attempt to re-flow comments. /// \code /// false: @@ -1592,6 +1627,7 @@ struct FormatStyle { PenaltyExcessCharacter == R.PenaltyExcessCharacter && PenaltyReturnTypeOnItsOwnLine == R.PenaltyReturnTypeOnItsOwnLine && PointerAlignment == R.PointerAlignment && + RawStringFormats == R.RawStringFormats && SpaceAfterCStyleCast == R.SpaceAfterCStyleCast && SpaceAfterTemplateKeyword == R.SpaceAfterTemplateKeyword && SpaceBeforeAssignmentOperators == R.SpaceBeforeAssignmentOperators && diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index b57b8de2e7..4f624dd2fb 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -14,6 +14,7 @@ #include "ContinuationIndenter.h" #include "BreakableToken.h" +#include "FormatInternal.h" #include "WhitespaceManager.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/SourceManager.h" @@ -76,6 +77,53 @@ static bool opensProtoMessageField(const FormatToken &LessTok, (LessTok.Previous && LessTok.Previous->is(tok::equal)))); } +// Returns the delimiter of a raw string literal, or None if TokenText is not +// the text of a raw string literal. The delimiter could be the empty string. +// For example, the delimiter of R"deli(cont)deli" is deli. +static llvm::Optional getRawStringDelimiter(StringRef TokenText) { + if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'. + || !TokenText.startswith("R\"") || !TokenText.endswith("\"")) + return None; + + // A raw string starts with 'R"(' and delimiter is ascii and has + // size at most 16 by the standard, so the first '(' must be among the first + // 19 bytes. + size_t LParenPos = TokenText.substr(0, 19).find_first_of('('); + if (LParenPos == StringRef::npos) + return None; + StringRef Delimiter = TokenText.substr(2, LParenPos - 2); + + // Check that the string ends in ')Delimiter"'. + size_t RParenPos = TokenText.size() - Delimiter.size() - 2; + if (TokenText[RParenPos] != ')') + return None; + if (!TokenText.substr(RParenPos + 1).startswith(Delimiter)) + return None; + return Delimiter; +} + +RawStringFormatStyleManager::RawStringFormatStyleManager( + const FormatStyle &CodeStyle) { + for (const auto &RawStringFormat : CodeStyle.RawStringFormats) { + FormatStyle Style; + if (!getPredefinedStyle(RawStringFormat.BasedOnStyle, + RawStringFormat.Language, &Style)) { + Style = getLLVMStyle(); + Style.Language = RawStringFormat.Language; + } + Style.ColumnLimit = CodeStyle.ColumnLimit; + DelimiterStyle.insert({RawStringFormat.Delimiter, Style}); + } +} + +llvm::Optional +RawStringFormatStyleManager::get(StringRef Delimiter) const { + auto It = DelimiterStyle.find(Delimiter); + if (It == DelimiterStyle.end()) + return None; + return It->second; +} + ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, @@ -85,14 +133,18 @@ ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr), Whitespaces(Whitespaces), Encoding(Encoding), BinPackInconclusiveFunctions(BinPackInconclusiveFunctions), - CommentPragmasRegex(Style.CommentPragmas) {} + CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {} LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, + unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun) { LineState State; State.FirstIndent = FirstIndent; - State.Column = FirstIndent; + if (FirstStartColumn && Line->First->NewlinesBefore == 0) + State.Column = FirstStartColumn; + else + State.Column = FirstIndent; // With preprocessor directive indentation, the line starts on column 0 // since it's indented after the hash, but FirstIndent is set to the // preprocessor indent. @@ -1216,6 +1268,89 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) { State.Stack.back().BreakBeforeParameter = true; } +static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn, + unsigned TabWidth, + encoding::Encoding Encoding) { + size_t LastNewlinePos = Text.find_last_of("\n"); + if (LastNewlinePos == StringRef::npos) { + return StartColumn + + encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding); + } else { + return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos), + /*StartColumn=*/0, TabWidth, Encoding); + } +} + +unsigned ContinuationIndenter::reformatRawStringLiteral( + const FormatToken &Current, unsigned StartColumn, LineState &State, + StringRef Delimiter, const FormatStyle &RawStringStyle, bool DryRun) { + // The text of a raw string is between the leading 'R"delimiter(' and the + // trailing 'delimiter)"'. + unsigned PrefixSize = 3 + Delimiter.size(); + unsigned SuffixSize = 2 + Delimiter.size(); + + // The first start column is the column the raw text starts. + unsigned FirstStartColumn = StartColumn + PrefixSize; + + // The next start column is the intended indentation a line break inside + // the raw string at level 0. It is determined by the following rules: + // - if the content starts on newline, it is one level more than the current + // indent, and + // - if the content does not start on a newline, it is the first start + // column. + // These rules have the advantage that the formatted content both does not + // violate the rectangle rule and visually flows within the surrounding + // source. + bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n'; + unsigned NextStartColumn = ContentStartsOnNewline + ? State.Stack.back().Indent + Style.IndentWidth + : FirstStartColumn; + + // The last start column is the column the raw string suffix starts if it is + // put on a newline. + // The last start column is the intended indentation of the raw string postfix + // if it is put on a newline. It is determined by the following rules: + // - if the raw string prefix starts on a newline, it is the column where + // that raw string prefix starts, and + // - if the raw string prefix does not start on a newline, it is the current + // indent. + unsigned LastStartColumn = Current.NewlinesBefore + ? FirstStartColumn - PrefixSize + : State.Stack.back().Indent; + + std::string RawText = + Current.TokenText.substr(PrefixSize).drop_back(SuffixSize); + + std::pair Fixes = internal::reformat( + RawStringStyle, RawText, {tooling::Range(0, RawText.size())}, + FirstStartColumn, NextStartColumn, LastStartColumn, "", + /*FormattingAttemptStatus=*/nullptr); + + auto NewCode = applyAllReplacements(RawText, Fixes.first); + tooling::Replacements NoFixes; + if (!NewCode) { + State.Column += Current.ColumnWidth; + return 0; + } + if (!DryRun) { + SourceLocation OriginLoc = + Current.Tok.getLocation().getLocWithOffset(PrefixSize); + for (const tooling::Replacement &Fix : Fixes.first) { + auto Err = Whitespaces.addReplacement(tooling::Replacement( + SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()), + Fix.getLength(), Fix.getReplacementText())); + if (Err) { + llvm::errs() << "Failed to reformat raw string: " + << llvm::toString(std::move(Err)) << "\n"; + } + } + } + unsigned RawLastLineEndColumn = getLastLineEndColumn( + *NewCode, FirstStartColumn, Style.TabWidth, Encoding); + State.Column = RawLastLineEndColumn + SuffixSize; + return Fixes.second; +} + unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, LineState &State) { if (!Current.IsMultiline) @@ -1238,9 +1373,18 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun) { - // Don't break multi-line tokens other than block comments. Instead, just - // update the state. - if (Current.isNot(TT_BlockComment) && Current.IsMultiline) + // Compute the raw string style to use in case this is a raw string literal + // that can be reformatted. + llvm::Optional Delimiter = None; + llvm::Optional RawStringStyle = None; + if (Current.isStringLiteral()) + Delimiter = getRawStringDelimiter(Current.TokenText); + if (Delimiter) + RawStringStyle = RawStringFormats.get(*Delimiter); + + // Don't break multi-line tokens other than block comments and raw string + // literals. Instead, just update the state. + if (Current.isNot(TT_BlockComment) && !RawStringStyle && Current.IsMultiline) return addMultilineToken(Current, State); // Don't break implicit string literals or import statements. @@ -1275,6 +1419,11 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, if (Current.IsUnterminatedLiteral) return 0; + if (RawStringStyle) { + RawStringStyle->ColumnLimit = ColumnLimit; + return reformatRawStringLiteral(Current, StartColumn, State, *Delimiter, + *RawStringStyle, DryRun); + } StringRef Text = Current.TokenText; StringRef Prefix; StringRef Postfix; diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h index 631129c1ea..3bc9929b3d 100644 --- a/lib/Format/ContinuationIndenter.h +++ b/lib/Format/ContinuationIndenter.h @@ -20,6 +20,8 @@ #include "FormatToken.h" #include "clang/Format/Format.h" #include "llvm/Support/Regex.h" +#include +#include namespace clang { class SourceManager; @@ -30,8 +32,17 @@ class AnnotatedLine; struct FormatToken; struct LineState; struct ParenState; +struct RawStringFormatStyleManager; class WhitespaceManager; +struct RawStringFormatStyleManager { + llvm::StringMap DelimiterStyle; + + RawStringFormatStyleManager(const FormatStyle &CodeStyle); + + llvm::Optional get(StringRef Delimiter) const; +}; + class ContinuationIndenter { public: /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in @@ -44,9 +55,11 @@ public: bool BinPackInconclusiveFunctions); /// \brief Get the initial state, i.e. the state after placing \p Line's - /// first token at \p FirstIndent. - LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, - bool DryRun); + /// first token at \p FirstIndent. When reformatting a fragment of code, as in + /// the case of formatting inside raw string literals, \p FirstStartColumn is + /// the column at which the state of the parent formatter is. + LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, + const AnnotatedLine *Line, bool DryRun); // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a // better home. @@ -88,15 +101,24 @@ private: /// \brief Update 'State' with the next token opening a nested block. void moveStateToNewBlock(LineState &State); + /// \brief Reformats a raw string literal. + /// + /// \returns An extra penalty induced by reformatting the token. + unsigned reformatRawStringLiteral(const FormatToken &Current, + unsigned StartColumn, LineState &State, + StringRef Delimiter, + const FormatStyle &RawStringStyle, + bool DryRun); + /// \brief If the current token sticks out over the end of the line, break /// it if possible. /// /// \returns An extra penalty if a token was broken, otherwise 0. /// - /// The returned penalty will cover the cost of the additional line breaks and - /// column limit violation in all lines except for the last one. The penalty - /// for the column limit violation in the last line (and in single line - /// tokens) is handled in \c addNextStateToQueue. + /// The returned penalty will cover the cost of the additional line breaks + /// and column limit violation in all lines except for the last one. The + /// penalty for the column limit violation in the last line (and in single + /// line tokens) is handled in \c addNextStateToQueue. unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun); @@ -143,6 +165,7 @@ private: encoding::Encoding Encoding; bool BinPackInconclusiveFunctions; llvm::Regex CommentPragmasRegex; + const RawStringFormatStyleManager RawStringFormats; }; struct ParenState { diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index cb0a010512..ecc2bb7b84 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -16,6 +16,7 @@ #include "clang/Format/Format.h" #include "AffectedRangeManager.h" #include "ContinuationIndenter.h" +#include "FormatInternal.h" #include "FormatTokenLexer.h" #include "NamespaceEndCommentsFixer.h" #include "SortJavaScriptImports.h" @@ -44,7 +45,8 @@ using clang::format::FormatStyle; -LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory) +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory); +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat); namespace llvm { namespace yaml { @@ -389,6 +391,7 @@ template <> struct MappingTraits { IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", Style.PenaltyReturnTypeOnItsOwnLine); IO.mapOptional("PointerAlignment", Style.PointerAlignment); + IO.mapOptional("RawStringFormats", Style.RawStringFormats); IO.mapOptional("ReflowComments", Style.ReflowComments); IO.mapOptional("SortIncludes", Style.SortIncludes); IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations); @@ -441,6 +444,14 @@ template <> struct MappingTraits { } }; +template <> struct MappingTraits { + static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) { + IO.mapOptional("Delimiter", Format.Delimiter); + IO.mapOptional("Language", Format.Language); + IO.mapOptional("BasedOnStyle", Format.BasedOnStyle); + } +}; + // Allows to read vector while keeping default values. // IO.getContext() should contain a pointer to the FormatStyle structure, that // will be used to get default values for missing keys. @@ -620,6 +631,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.Standard = FormatStyle::LS_Cpp11; LLVMStyle.UseTab = FormatStyle::UT_Never; + LLVMStyle.RawStringFormats = {{"pb", FormatStyle::LK_TextProto, "google"}}; LLVMStyle.ReflowComments = true; LLVMStyle.SpacesInParentheses = false; LLVMStyle.SpacesInSquareBrackets = false; @@ -895,7 +907,7 @@ public: JavaScriptRequoter(const Environment &Env, const FormatStyle &Style) : TokenAnalyzer(Env, Style) {} - tooling::Replacements + std::pair analyze(TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -903,7 +915,7 @@ public: AnnotatedLines.end()); tooling::Replacements Result; requoteJSStringLiteral(AnnotatedLines, Result); - return Result; + return {Result, 0}; } private: @@ -984,7 +996,7 @@ public: FormattingAttemptStatus *Status) : TokenAnalyzer(Env, Style), Status(Status) {} - tooling::Replacements + std::pair analyze(TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -1003,13 +1015,20 @@ public: ContinuationIndenter Indenter(Style, Tokens.getKeywords(), Env.getSourceManager(), Whitespaces, Encoding, BinPackInconclusiveFunctions); - UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), - Env.getSourceManager(), Status) - .format(AnnotatedLines); + unsigned Penalty = + UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, + Tokens.getKeywords(), Env.getSourceManager(), + Status) + .format(AnnotatedLines, /*DryRun=*/false, + /*AdditionalIndent=*/0, + /*FixBadIndentation=*/false, + /*FirstStartColumn=*/Env.getFirstStartColumn(), + /*NextStartColumn=*/Env.getNextStartColumn(), + /*LastStartColumn=*/Env.getLastStartColumn()); for (const auto &R : Whitespaces.generateReplacements()) if (Result.add(R)) - return Result; - return Result; + return {Result, 0}; + return {Result, Penalty}; } private: @@ -1097,7 +1116,7 @@ public: DeletedTokens(FormatTokenLess(Env.getSourceManager())) {} // FIXME: eliminate unused parameters. - tooling::Replacements + std::pair analyze(TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -1125,7 +1144,7 @@ public: } } - return generateFixes(); + return {generateFixes(), 0}; } private: @@ -1906,19 +1925,22 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, return processReplacements(Cleanup, Code, NewReplaces, Style); } -tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, - ArrayRef Ranges, - StringRef FileName, - FormattingAttemptStatus *Status) { +namespace internal { +std::pair +reformat(const FormatStyle &Style, StringRef Code, + ArrayRef Ranges, unsigned FirstStartColumn, + unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName, + FormattingAttemptStatus *Status) { FormatStyle Expanded = expandPresets(Style); if (Expanded.DisableFormat) - return tooling::Replacements(); + return {tooling::Replacements(), 0}; if (isLikelyXml(Code)) - return tooling::Replacements(); + return {tooling::Replacements(), 0}; if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code)) - return tooling::Replacements(); + return {tooling::Replacements(), 0}; - typedef std::function + typedef std::function( + const Environment &)> AnalyzerPass; SmallVector Passes; @@ -1944,26 +1966,42 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, return Formatter(Env, Expanded, Status).process(); }); - std::unique_ptr Env = - Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + std::unique_ptr Env = Environment::CreateVirtualEnvironment( + Code, FileName, Ranges, FirstStartColumn, NextStartColumn, + LastStartColumn); llvm::Optional CurrentCode = None; tooling::Replacements Fixes; + unsigned Penalty = 0; for (size_t I = 0, E = Passes.size(); I < E; ++I) { - tooling::Replacements PassFixes = Passes[I](*Env); + std::pair PassFixes = Passes[I](*Env); auto NewCode = applyAllReplacements( - CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes); + CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes.first); if (NewCode) { - Fixes = Fixes.merge(PassFixes); + Fixes = Fixes.merge(PassFixes.first); + Penalty += PassFixes.second; if (I + 1 < E) { CurrentCode = std::move(*NewCode); Env = Environment::CreateVirtualEnvironment( *CurrentCode, FileName, - tooling::calculateRangesAfterReplacements(Fixes, Ranges)); + tooling::calculateRangesAfterReplacements(Fixes, Ranges), + FirstStartColumn, NextStartColumn, LastStartColumn); } } } - return Fixes; + return {Fixes, Penalty}; +} +} // namespace internal + +tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, + ArrayRef Ranges, + StringRef FileName, + FormattingAttemptStatus *Status) { + return internal::reformat(Style, Code, Ranges, + /*FirstStartColumn=*/0, + /*NextStartColumn=*/0, + /*LastStartColumn=*/0, FileName, Status) + .first; } tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, @@ -1975,7 +2013,7 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, std::unique_ptr Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); Cleaner Clean(*Env, Style); - return Clean.process(); + return Clean.process().first; } tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, @@ -1995,7 +2033,7 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, std::unique_ptr Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); NamespaceEndCommentsFixer Fix(*Env, Style); - return Fix.process(); + return Fix.process().first; } tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, @@ -2005,7 +2043,7 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, std::unique_ptr Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); UsingDeclarationsSorter Sorter(*Env, Style); - return Sorter.process(); + return Sorter.process().first; } LangOptions getFormattingLangOpts(const FormatStyle &Style) { diff --git a/lib/Format/FormatInternal.h b/lib/Format/FormatInternal.h new file mode 100644 index 0000000000..1373e9d2ee --- /dev/null +++ b/lib/Format/FormatInternal.h @@ -0,0 +1,79 @@ +//===--- FormatInternal.h - Format C++ code ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares Format APIs to be used internally by the +/// formatting library implementation. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H +#define LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H + +namespace clang { +namespace format { +namespace internal { + +/// \brief Reformats the given \p Ranges in the code fragment \p Code. +/// +/// A fragment of code could conceptually be surrounded by other code that might +/// constrain how that fragment is laid out. +/// For example, consider the fragment of code between 'R"(' and ')"', +/// exclusive, in the following code: +/// +/// void outer(int x) { +/// string inner = R"(name: data +/// ^ FirstStartColumn +/// value: { +/// x: 1 +/// ^ NextStartColumn +/// } +/// )"; +/// ^ LastStartColumn +/// } +/// +/// The outer code can influence the inner fragment as follows: +/// * \p FirstStartColumn specifies the column at which \p Code starts. +/// * \p NextStartColumn specifies the additional indent dictated by the +/// surrounding code. It is applied to the rest of the lines of \p Code. +/// * \p LastStartColumn specifies the column at which the last line of +/// \p Code should end, in case the last line is an empty line. +/// +/// In the case where the last line of the fragment contains content, +/// the fragment ends at the end of that content and \p LastStartColumn is +/// not taken into account, for example in: +/// +/// void block() { +/// string inner = R"(name: value)"; +/// } +/// +/// Each range is extended on either end to its next bigger logic unit, i.e. +/// everything that might influence its formatting or might be influenced by its +/// formatting. +/// +/// Returns a pair P, where: +/// * P.first are the ``Replacements`` necessary to make all \p Ranges comply +/// with \p Style. +/// * P.second is the penalty induced by formatting the fragment \p Code. +/// If the formatting of the fragment doesn't have a notion of penalty, +/// returns 0. +/// +/// If ``Status`` is non-null, its value will be populated with the status of +/// this formatting attempt. See \c FormattingAttemptStatus. +std::pair +reformat(const FormatStyle &Style, StringRef Code, + ArrayRef Ranges, unsigned FirstStartColumn, + unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName, + FormattingAttemptStatus *Status); + +} // namespace internal +} // namespace format +} // namespace clang + +#endif diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp index 8fb3d84ea5..d37fcec6c5 100644 --- a/lib/Format/FormatTokenLexer.cpp +++ b/lib/Format/FormatTokenLexer.cpp @@ -24,10 +24,10 @@ namespace clang { namespace format { FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, - const FormatStyle &Style, + unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding) : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), - Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), + Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), Style(Style), IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h index bf10f09cd1..6605a1c35a 100644 --- a/lib/Format/FormatTokenLexer.h +++ b/lib/Format/FormatTokenLexer.h @@ -36,7 +36,7 @@ enum LexerState { class FormatTokenLexer { public: - FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, + FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding); ArrayRef lex(); diff --git a/lib/Format/NamespaceEndCommentsFixer.cpp b/lib/Format/NamespaceEndCommentsFixer.cpp index c660843dca..df99bb2e13 100644 --- a/lib/Format/NamespaceEndCommentsFixer.cpp +++ b/lib/Format/NamespaceEndCommentsFixer.cpp @@ -137,7 +137,7 @@ NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style) : TokenAnalyzer(Env, Style) {} -tooling::Replacements NamespaceEndCommentsFixer::analyze( +std::pair NamespaceEndCommentsFixer::analyze( TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines, FormatTokenLexer &Tokens) { const SourceManager &SourceMgr = Env.getSourceManager(); @@ -206,7 +206,7 @@ tooling::Replacements NamespaceEndCommentsFixer::analyze( } StartLineIndex = SIZE_MAX; } - return Fixes; + return {Fixes, 0}; } } // namespace format diff --git a/lib/Format/NamespaceEndCommentsFixer.h b/lib/Format/NamespaceEndCommentsFixer.h index 7790668a2e..4779f0d27c 100644 --- a/lib/Format/NamespaceEndCommentsFixer.h +++ b/lib/Format/NamespaceEndCommentsFixer.h @@ -25,7 +25,7 @@ class NamespaceEndCommentsFixer : public TokenAnalyzer { public: NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style); - tooling::Replacements + std::pair analyze(TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines, FormatTokenLexer &Tokens) override; diff --git a/lib/Format/SortJavaScriptImports.cpp b/lib/Format/SortJavaScriptImports.cpp index c4db9a6c2f..d0b979e100 100644 --- a/lib/Format/SortJavaScriptImports.cpp +++ b/lib/Format/SortJavaScriptImports.cpp @@ -123,7 +123,7 @@ public: : TokenAnalyzer(Env, Style), FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {} - tooling::Replacements + std::pair analyze(TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -138,7 +138,7 @@ public: parseModuleReferences(Keywords, AnnotatedLines); if (References.empty()) - return Result; + return {Result, 0}; SmallVector Indices; for (unsigned i = 0, e = References.size(); i != e; ++i) @@ -168,7 +168,7 @@ public: } if (ReferencesInOrder && SymbolsInOrder) - return Result; + return {Result, 0}; SourceRange InsertionPoint = References[0].Range; InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd()); @@ -202,7 +202,7 @@ public: assert(false); } - return Result; + return {Result, 0}; } private: @@ -449,7 +449,7 @@ tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, std::unique_ptr Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); JavaScriptImportSorter Sorter(*Env, Style); - return Sorter.process(); + return Sorter.process().first; } } // end namespace format diff --git a/lib/Format/TokenAnalyzer.cpp b/lib/Format/TokenAnalyzer.cpp index 7a8b70c4ba..d1dfb1fea3 100644 --- a/lib/Format/TokenAnalyzer.cpp +++ b/lib/Format/TokenAnalyzer.cpp @@ -38,7 +38,10 @@ namespace format { // Code. std::unique_ptr Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName, - ArrayRef Ranges) { + ArrayRef Ranges, + unsigned FirstStartColumn, + unsigned NextStartColumn, + unsigned LastStartColumn) { // This is referenced by `FileMgr` and will be released by `FileMgr` when it // is deleted. IntrusiveRefCntPtr InMemoryFileSystem( @@ -70,9 +73,9 @@ Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName, SourceLocation End = Start.getLocWithOffset(Range.getLength()); CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); } - return llvm::make_unique(ID, std::move(FileMgr), - std::move(VirtualSM), - std::move(Diagnostics), CharRanges); + return llvm::make_unique( + ID, std::move(FileMgr), std::move(VirtualSM), std::move(Diagnostics), + CharRanges, FirstStartColumn, NextStartColumn, LastStartColumn); } TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) @@ -89,14 +92,16 @@ TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) << "\n"); } -tooling::Replacements TokenAnalyzer::process() { +std::pair TokenAnalyzer::process() { tooling::Replacements Result; - FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style, - Encoding); + FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), + Env.getFirstStartColumn(), Style, Encoding); - UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this); + UnwrappedLineParser Parser(Style, Tokens.getKeywords(), + Env.getFirstStartColumn(), Tokens.lex(), *this); Parser.parse(); assert(UnwrappedLines.rbegin()->empty()); + unsigned Penalty = 0; for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) { DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); SmallVector AnnotatedLines; @@ -107,13 +112,13 @@ tooling::Replacements TokenAnalyzer::process() { Annotator.annotate(*AnnotatedLines.back()); } - tooling::Replacements RunResult = + std::pair RunResult = analyze(Annotator, AnnotatedLines, Tokens); DEBUG({ llvm::dbgs() << "Replacements for run " << Run << ":\n"; - for (tooling::Replacements::const_iterator I = RunResult.begin(), - E = RunResult.end(); + for (tooling::Replacements::const_iterator I = RunResult.first.begin(), + E = RunResult.first.end(); I != E; ++I) { llvm::dbgs() << I->toString() << "\n"; } @@ -121,17 +126,19 @@ tooling::Replacements TokenAnalyzer::process() { for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { delete AnnotatedLines[i]; } - for (const auto &R : RunResult) { + + Penalty += RunResult.second; + for (const auto &R : RunResult.first) { auto Err = Result.add(R); // FIXME: better error handling here. For now, simply return an empty // Replacements to indicate failure. if (Err) { llvm::errs() << llvm::toString(std::move(Err)) << "\n"; - return tooling::Replacements(); + return {tooling::Replacements(), 0}; } } } - return Result; + return {Result, Penalty}; } void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) { diff --git a/lib/Format/TokenAnalyzer.h b/lib/Format/TokenAnalyzer.h index 78a3d1bc8d..96ea00b25b 100644 --- a/lib/Format/TokenAnalyzer.h +++ b/lib/Format/TokenAnalyzer.h @@ -37,21 +37,37 @@ namespace format { class Environment { public: Environment(SourceManager &SM, FileID ID, ArrayRef Ranges) - : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {} + : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM), + FirstStartColumn(0), + NextStartColumn(0), + LastStartColumn(0) {} Environment(FileID ID, std::unique_ptr FileMgr, std::unique_ptr VirtualSM, std::unique_ptr Diagnostics, - const std::vector &CharRanges) + const std::vector &CharRanges, + unsigned FirstStartColumn, + unsigned NextStartColumn, + unsigned LastStartColumn) : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()), - SM(*VirtualSM), FileMgr(std::move(FileMgr)), + SM(*VirtualSM), + FirstStartColumn(FirstStartColumn), + NextStartColumn(NextStartColumn), + LastStartColumn(LastStartColumn), + FileMgr(std::move(FileMgr)), VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {} - // This sets up an virtual file system with file \p FileName containing \p - // Code. + // This sets up an virtual file system with file \p FileName containing the + // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn, + // that the next lines of \p Code should start at \p NextStartColumn, and + // that \p Code should end at \p LastStartColumn if it ends in newline. + // See also the documentation of clang::format::internal::reformat. static std::unique_ptr CreateVirtualEnvironment(StringRef Code, StringRef FileName, - ArrayRef Ranges); + ArrayRef Ranges, + unsigned FirstStartColumn = 0, + unsigned NextStartColumn = 0, + unsigned LastStartColumn = 0); FileID getFileID() const { return ID; } @@ -59,10 +75,25 @@ public: const SourceManager &getSourceManager() const { return SM; } + // Returns the column at which the fragment of code managed by this + // environment starts. + unsigned getFirstStartColumn() const { return FirstStartColumn; } + + // Returns the column at which subsequent lines of the fragment of code + // managed by this environment should start. + unsigned getNextStartColumn() const { return NextStartColumn; } + + // Returns the column at which the fragment of code managed by this + // environment should end if it ends in a newline. + unsigned getLastStartColumn() const { return LastStartColumn; } + private: FileID ID; SmallVector CharRanges; SourceManager &SM; + unsigned FirstStartColumn; + unsigned NextStartColumn; + unsigned LastStartColumn; // The order of these fields are important - they should be in the same order // as they are created in `CreateVirtualEnvironment` so that they can be @@ -76,10 +107,10 @@ class TokenAnalyzer : public UnwrappedLineConsumer { public: TokenAnalyzer(const Environment &Env, const FormatStyle &Style); - tooling::Replacements process(); + std::pair process(); protected: - virtual tooling::Replacements + virtual std::pair analyze(TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines, FormatTokenLexer &Tokens) = 0; diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index a49cbaab5f..1b22e26600 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -1891,7 +1891,8 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { } Line.First->TotalLength = - Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; + Line.First->IsMultiline ? Style.ColumnLimit + : Line.FirstStartColumn + Line.First->ColumnWidth; FormatToken *Current = Line.First->Next; bool InFunctionDecl = Line.MightBeFunctionDecl; while (Current) { diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index 805509533b..04a18d45b8 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -43,7 +43,8 @@ public: InPPDirective(Line.InPPDirective), MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), IsMultiVariableDeclStmt(false), Affected(false), - LeadingEmptyLinesAffected(false), ChildrenAffected(false) { + LeadingEmptyLinesAffected(false), ChildrenAffected(false), + FirstStartColumn(Line.FirstStartColumn) { assert(!Line.Tokens.empty()); // Calculate Next and Previous for all tokens. Note that we must overwrite @@ -127,6 +128,8 @@ public: /// \c True if one of this line's children intersects with an input range. bool ChildrenAffected; + unsigned FirstStartColumn; + private: // Disallow copying. AnnotatedLine(const AnnotatedLine &) = delete; diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp index d25f0a1c29..a82cd5abe2 100644 --- a/lib/Format/UnwrappedLineFormatter.cpp +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -659,7 +659,9 @@ public: /// \brief Formats an \c AnnotatedLine and returns the penalty. /// /// If \p DryRun is \c false, directly applies the changes. - virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + virtual unsigned formatLine(const AnnotatedLine &Line, + unsigned FirstIndent, + unsigned FirstStartColumn, bool DryRun) = 0; protected: @@ -730,7 +732,8 @@ protected: *Child->First, /*Newlines=*/0, /*Spaces=*/1, /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); } - Penalty += formatLine(*Child, State.Column + 1, DryRun); + Penalty += + formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun); State.Column += 1 + Child->Last->TotalLength; return true; @@ -756,10 +759,10 @@ public: /// \brief Formats the line, simply keeping all of the input's line breaking /// decisions. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { + unsigned FirstStartColumn, bool DryRun) override { assert(!DryRun); - LineState State = - Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false); + LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn, + &Line, /*DryRun=*/false); while (State.NextToken) { bool Newline = Indenter->mustBreak(State) || @@ -782,9 +785,10 @@ public: /// \brief Puts all tokens into a single line. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { + unsigned FirstStartColumn, bool DryRun) override { unsigned Penalty = 0; - LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + LineState State = + Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); while (State.NextToken) { formatChildren(State, /*Newline=*/false, DryRun, Penalty); Indenter->addTokenToState( @@ -806,8 +810,9 @@ public: /// \brief Formats the line by finding the best line breaks with line lengths /// below the column limit. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { - LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + unsigned FirstStartColumn, bool DryRun) override { + LineState State = + Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); // If the ObjC method declaration does not fit on a line, we should format // it with one arg per line. @@ -974,7 +979,10 @@ private: unsigned UnwrappedLineFormatter::format(const SmallVectorImpl &Lines, bool DryRun, int AdditionalIndent, - bool FixBadIndentation) { + bool FixBadIndentation, + unsigned FirstStartColumn, + unsigned NextStartColumn, + unsigned LastStartColumn) { LineJoiner Joiner(Style, Keywords, Lines); // Try to look up already computed penalty in DryRun-mode. @@ -994,9 +1002,10 @@ UnwrappedLineFormatter::format(const SmallVectorImpl &Lines, // The minimum level of consecutive lines that have been formatted. unsigned RangeMinLevel = UINT_MAX; + bool FirstLine = true; for (const AnnotatedLine *Line = Joiner.getNextMergedLine(DryRun, IndentTracker); - Line; Line = NextLine) { + Line; Line = NextLine, FirstLine = false) { const AnnotatedLine &TheLine = *Line; unsigned Indent = IndentTracker.getIndent(); @@ -1020,8 +1029,12 @@ UnwrappedLineFormatter::format(const SmallVectorImpl &Lines, } if (ShouldFormat && TheLine.Type != LT_Invalid) { - if (!DryRun) - formatFirstToken(TheLine, PreviousLine, Indent); + if (!DryRun) { + bool LastLine = Line->First->is(tok::eof); + formatFirstToken(TheLine, PreviousLine, + Indent, + LastLine ? LastStartColumn : NextStartColumn + Indent); + } NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); @@ -1030,16 +1043,18 @@ UnwrappedLineFormatter::format(const SmallVectorImpl &Lines, (TheLine.Type == LT_ImportStatement && (Style.Language != FormatStyle::LK_JavaScript || !Style.JavaScriptWrapImports)); - if (Style.ColumnLimit == 0) NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); else if (FitsIntoOneLine) Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); else Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); RangeMinLevel = std::min(RangeMinLevel, TheLine.Level); } else { // If no token in the current line is affected, we still need to format @@ -1062,6 +1077,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl &Lines, // Format the first token. if (ReformatLeadingWhitespace) formatFirstToken(TheLine, PreviousLine, + TheLine.First->OriginalColumn, TheLine.First->OriginalColumn); else Whitespaces->addUntouchableToken(*TheLine.First, @@ -1084,12 +1100,14 @@ UnwrappedLineFormatter::format(const SmallVectorImpl &Lines, void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, const AnnotatedLine *PreviousLine, - unsigned Indent) { + unsigned Indent, + unsigned NewlineIndent) { FormatToken &RootToken = *Line.First; if (RootToken.is(tok::eof)) { unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u); - Whitespaces->replaceWhitespace(RootToken, Newlines, /*Spaces=*/0, - /*StartOfTokenColumn=*/0); + unsigned TokenIndent = Newlines ? NewlineIndent : 0; + Whitespaces->replaceWhitespace(RootToken, Newlines, TokenIndent, + TokenIndent); return; } unsigned Newlines = @@ -1104,10 +1122,6 @@ void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, if (RootToken.IsFirst && !RootToken.HasUnescapedNewline) Newlines = 0; - // Preprocessor directives get indented after the hash, if indented. - if (Line.Type == LT_PreprocessorDirective || Line.Type == LT_ImportStatement) - Indent = 0; - // Remove empty lines after "{". if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine && PreviousLine->Last->is(tok::l_brace) && @@ -1125,6 +1139,13 @@ void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) Newlines = std::min(1u, Newlines); + if (Newlines) + Indent = NewlineIndent; + + // Preprocessor directives get indented after the hash, if indented. + if (Line.Type == LT_PreprocessorDirective || Line.Type == LT_ImportStatement) + Indent = 0; + Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent, Line.InPPDirective && !RootToken.HasUnescapedNewline); diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h index 9d7a910b98..6432ca83a4 100644 --- a/lib/Format/UnwrappedLineFormatter.h +++ b/lib/Format/UnwrappedLineFormatter.h @@ -40,13 +40,17 @@ public: /// \brief Format the current block and return the penalty. unsigned format(const SmallVectorImpl &Lines, bool DryRun = false, int AdditionalIndent = 0, - bool FixBadIndentation = false); + bool FixBadIndentation = false, + unsigned FirstStartColumn = 0, + unsigned NextStartColumn = 0, + unsigned LastStartColumn = 0); private: /// \brief Add a new line and the required indent before the first Token /// of the \c UnwrappedLine if there was no structural parsing error. void formatFirstToken(const AnnotatedLine &Line, - const AnnotatedLine *PreviousLine, unsigned Indent); + const AnnotatedLine *PreviousLine, unsigned Indent, + unsigned NewlineIndent); /// \brief Returns the column limit for a line, taking into account whether we /// need an escaped newline due to a continued preprocessor directive. diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index e210beb20e..9243cd99cb 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -225,6 +225,7 @@ private: UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, + unsigned FirstStartColumn, ArrayRef Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), @@ -232,7 +233,7 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), IfNdefCondition(nullptr), FoundIncludeGuardStart(false), - IncludeGuardRejected(false) {} + IncludeGuardRejected(false), FirstStartColumn(FirstStartColumn) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; @@ -247,10 +248,12 @@ void UnwrappedLineParser::reset() { CurrentLines = &Lines; DeclarationScopeStack.clear(); PPStack.clear(); + Line->FirstStartColumn = FirstStartColumn; } void UnwrappedLineParser::parse() { IndexedTokenSource TokenSource(AllTokens); + Line->FirstStartColumn = FirstStartColumn; do { DEBUG(llvm::dbgs() << "----\n"); reset(); @@ -2193,7 +2196,8 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix = "") { - llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" + llvm::dbgs() << Prefix << "Line(" << Line.Level + << ", FSC=" << Line.FirstStartColumn << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; for (std::list::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); @@ -2226,6 +2230,7 @@ void UnwrappedLineParser::addUnwrappedLine() { CurrentLines->push_back(std::move(*Line)); Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; + Line->FirstStartColumn = 0; if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index 4437ccf28d..1d8ccabbd0 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -56,6 +56,8 @@ struct UnwrappedLine { size_t MatchingOpeningBlockLineIndex; static const size_t kInvalidIndex = -1; + + unsigned FirstStartColumn = 0; }; class UnwrappedLineConsumer { @@ -71,6 +73,7 @@ class UnwrappedLineParser { public: UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, + unsigned FirstStartColumn, ArrayRef Tokens, UnwrappedLineConsumer &Callback); @@ -249,6 +252,10 @@ private: FormatToken *IfNdefCondition; bool FoundIncludeGuardStart; bool IncludeGuardRejected; + // Contains the first start column where the source begins. This is zero for + // normal source code and may be nonzero when formatting a code fragment that + // does not start at the beginning of the file. + unsigned FirstStartColumn; friend class ScopedLineState; friend class CompoundStatementIndenter; diff --git a/lib/Format/UsingDeclarationsSorter.cpp b/lib/Format/UsingDeclarationsSorter.cpp index 4d60d8fd9a..d6753b545e 100644 --- a/lib/Format/UsingDeclarationsSorter.cpp +++ b/lib/Format/UsingDeclarationsSorter.cpp @@ -124,7 +124,7 @@ UsingDeclarationsSorter::UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style) : TokenAnalyzer(Env, Style) {} -tooling::Replacements UsingDeclarationsSorter::analyze( +std::pair UsingDeclarationsSorter::analyze( TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines, FormatTokenLexer &Tokens) { const SourceManager &SourceMgr = Env.getSourceManager(); @@ -149,7 +149,7 @@ tooling::Replacements UsingDeclarationsSorter::analyze( UsingDeclarations.push_back(UsingDeclaration(AnnotatedLines[I], Label)); } endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); - return Fixes; + return {Fixes, 0}; } } // namespace format diff --git a/lib/Format/UsingDeclarationsSorter.h b/lib/Format/UsingDeclarationsSorter.h index f7d5f97e3a..6f137712d8 100644 --- a/lib/Format/UsingDeclarationsSorter.h +++ b/lib/Format/UsingDeclarationsSorter.h @@ -25,7 +25,7 @@ class UsingDeclarationsSorter : public TokenAnalyzer { public: UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style); - tooling::Replacements + std::pair analyze(TokenAnnotator &Annotator, SmallVectorImpl &AnnotatedLines, FormatTokenLexer &Tokens) override; diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index 0f2732c9fc..a5477a9963 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -67,6 +67,11 @@ void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, /*IsInsideToken=*/false)); } +llvm::Error +WhitespaceManager::addReplacement(const tooling::Replacement &Replacement) { + return Replaces.add(Replacement); +} + void WhitespaceManager::replaceWhitespaceInToken( const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h index 98a2071cf4..af20dc5616 100644 --- a/lib/Format/WhitespaceManager.h +++ b/lib/Format/WhitespaceManager.h @@ -57,6 +57,8 @@ public: /// was not called. void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); + llvm::Error addReplacement(const tooling::Replacement &Replacement); + /// \brief Inserts or replaces whitespace in the middle of a token. /// /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix diff --git a/unittests/Format/CMakeLists.txt b/unittests/Format/CMakeLists.txt index fa7e32c33d..992db0e508 100644 --- a/unittests/Format/CMakeLists.txt +++ b/unittests/Format/CMakeLists.txt @@ -10,6 +10,7 @@ add_clang_unittest(FormatTests FormatTestJava.cpp FormatTestObjC.cpp FormatTestProto.cpp + FormatTestRawStrings.cpp FormatTestSelective.cpp FormatTestTextProto.cpp NamespaceEndCommentsFixerTest.cpp diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index dadc0254c6..3651fa539d 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -10254,6 +10254,20 @@ TEST_F(FormatTest, ParsesConfiguration) { " Priority: 1", IncludeCategories, ExpectedCategories); CHECK_PARSE("IncludeIsMainRegex: 'abc$'", IncludeIsMainRegex, "abc$"); + + Style.RawStringFormats.clear(); + std::vector ExpectedRawStringFormats = { + {"pb", FormatStyle::LK_TextProto, "llvm"}, + {"cpp", FormatStyle::LK_Cpp, "google"}}; + + CHECK_PARSE("RawStringFormats:\n" + " - Delimiter: 'pb'\n" + " Language: TextProto\n" + " BasedOnStyle: llvm\n" + " - Delimiter: 'cpp'\n" + " Language: Cpp\n" + " BasedOnStyle: google", + RawStringFormats, ExpectedRawStringFormats); } TEST_F(FormatTest, ParsesConfigurationWithLanguages) { diff --git a/unittests/Format/FormatTestRawStrings.cpp b/unittests/Format/FormatTestRawStrings.cpp new file mode 100644 index 0000000000..6e7b706587 --- /dev/null +++ b/unittests/Format/FormatTestRawStrings.cpp @@ -0,0 +1,733 @@ +//===- unittest/Format/FormatTestRawStrings.cpp - Formatting unit tests ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Format/Format.h" + +#include "../Tooling/ReplacementTest.h" +#include "FormatTestUtils.h" + +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MemoryBuffer.h" +#include "gtest/gtest.h" + +#define DEBUG_TYPE "format-test" + +using clang::tooling::ReplacementTest; +using clang::tooling::toReplacements; + +namespace clang { +namespace format { +namespace { + +class FormatTestRawStrings : public ::testing::Test { +protected: + enum StatusCheck { SC_ExpectComplete, SC_ExpectIncomplete, SC_DoNotCheck }; + + std::string format(llvm::StringRef Code, + const FormatStyle &Style = getLLVMStyle(), + StatusCheck CheckComplete = SC_ExpectComplete) { + DEBUG(llvm::errs() << "---\n"); + DEBUG(llvm::errs() << Code << "\n\n"); + std::vector Ranges(1, tooling::Range(0, Code.size())); + FormattingAttemptStatus Status; + tooling::Replacements Replaces = + reformat(Style, Code, Ranges, "", &Status); + if (CheckComplete != SC_DoNotCheck) { + bool ExpectedCompleteFormat = CheckComplete == SC_ExpectComplete; + EXPECT_EQ(ExpectedCompleteFormat, Status.FormatComplete) + << Code << "\n\n"; + } + ReplacementCount = Replaces.size(); + auto Result = applyAllReplacements(Code, Replaces); + EXPECT_TRUE(static_cast(Result)); + DEBUG(llvm::errs() << "\n" << *Result << "\n\n"); + return *Result; + } + + FormatStyle getStyleWithColumns(FormatStyle Style, unsigned ColumnLimit) { + Style.ColumnLimit = ColumnLimit; + return Style; + } + + FormatStyle getLLVMStyleWithColumns(unsigned ColumnLimit) { + return getStyleWithColumns(getLLVMStyle(), ColumnLimit); + } + + int ReplacementCount; + + FormatStyle getRawStringPbStyleWithColumns(unsigned ColumnLimit) { + FormatStyle Style = getLLVMStyle(); + Style.ColumnLimit = ColumnLimit; + Style.RawStringFormats = {{/*Delimiter=*/"pb", + /*Kind=*/FormatStyle::LK_TextProto, + /*BasedOnStyle=*/"google"}}; + return Style; + } + + FormatStyle getRawStringLLVMCppStyleBasedOn(std::string BasedOnStyle) { + FormatStyle Style = getLLVMStyle(); + Style.RawStringFormats = {{/*Delimiter=*/"cpp", + /*Kind=*/FormatStyle::LK_Cpp, BasedOnStyle}}; + return Style; + } + + FormatStyle getRawStringGoogleCppStyleBasedOn(std::string BasedOnStyle) { + FormatStyle Style = getGoogleStyle(FormatStyle::LK_Cpp); + Style.RawStringFormats = {{/*Delimiter=*/"cpp", + /*Kind=*/FormatStyle::LK_Cpp, BasedOnStyle}}; + return Style; + } + + // Gcc 4.8 doesn't support raw string literals in macros, which breaks some + // build bots. We use this function instead. + void expect_eq(const std::string Expected, const std::string Actual) { + EXPECT_EQ(Expected, Actual); + } +}; + +TEST_F(FormatTestRawStrings, ReformatsAccordingToBaseStyle) { + // llvm style puts '*' on the right. + // google style puts '*' on the left. + + // Use the llvm style if the raw string style has no BasedOnStyle. + expect_eq(R"test(int *i = R"cpp(int *p = nullptr;)cpp")test", + format(R"test(int * i = R"cpp(int * p = nullptr;)cpp")test", + getRawStringLLVMCppStyleBasedOn(""))); + + // Use the google style if the raw string style has BasedOnStyle=google. + expect_eq(R"test(int *i = R"cpp(int* p = nullptr;)cpp")test", + format(R"test(int * i = R"cpp(int * p = nullptr;)cpp")test", + getRawStringLLVMCppStyleBasedOn("google"))); + + // Use the llvm style if the raw string style has no BasedOnStyle=llvm. + expect_eq(R"test(int* i = R"cpp(int *p = nullptr;)cpp")test", + format(R"test(int * i = R"cpp(int * p = nullptr;)cpp")test", + getRawStringGoogleCppStyleBasedOn("llvm"))); +} + +TEST_F(FormatTestRawStrings, MatchesDelimitersCaseSensitively) { + // Don't touch the 'PB' raw string, format the 'pb' raw string. + expect_eq(R"test( +s = R"PB(item:1)PB"; +t = R"pb(item: 1)pb";)test", + format(R"test( +s = R"PB(item:1)PB"; +t = R"pb(item:1)pb";)test", + getRawStringPbStyleWithColumns(40))); + + FormatStyle MixedStyle = getLLVMStyle(); + MixedStyle.RawStringFormats = { + {/*Delimiter=*/"cpp", /*Kind=*/FormatStyle::LK_Cpp, + /*BasedOnStyle=*/"llvm"}, + {/*Delimiter=*/"CPP", /*Kind=*/FormatStyle::LK_Cpp, + /*BasedOnStyle=*/"google"}}; + + // Format the 'cpp' raw string with '*' on the right. + // Format the 'CPP' raw string with '*' on the left. + // Do not format the 'Cpp' raw string. + // Do not format non-raw strings. + expect_eq(R"test( +a = R"cpp(int *i = 0;)cpp"; +b = R"CPP(int* j = 0;)CPP"; +c = R"Cpp(int * k = 0;)Cpp"; +d = R"cpp(int * k = 0;)Cpp";)test", + format(R"test( +a = R"cpp(int * i = 0;)cpp"; +b = R"CPP(int * j = 0;)CPP"; +c = R"Cpp(int * k = 0;)Cpp"; +d = R"cpp(int * k = 0;)Cpp";)test", + MixedStyle)); +} + +TEST_F(FormatTestRawStrings, ReformatsShortRawStringsOnSingleLine) { + expect_eq( + R"test(P p = TP(R"pb()pb");)test", + format( + R"test(P p = TP(R"pb( )pb");)test", + getRawStringPbStyleWithColumns(40))); + expect_eq( + R"test(P p = TP(R"pb(item_1: 1)pb");)test", + format( + R"test(P p = TP(R"pb(item_1:1)pb");)test", + getRawStringPbStyleWithColumns(40))); + expect_eq( + R"test(P p = TP(R"pb(item_1: 1)pb");)test", + format( + R"test(P p = TP(R"pb( item_1 : 1 )pb");)test", + getRawStringPbStyleWithColumns(40))); + expect_eq( + R"test(P p = TP(R"pb(item_1: 1 item_2: 2)pb");)test", + format( + R"test(P p = TP(R"pb(item_1:1 item_2:2)pb");)test", + getRawStringPbStyleWithColumns(40))); + expect_eq( + R"test(P p = TP(R"pb(item_1 <1> item_2: {2})pb");)test", + format( + R"test(P p = TP(R"pb(item_1<1> item_2:{2})pb");)test", + getRawStringPbStyleWithColumns(40))); + + // Merge two short lines into one. + expect_eq(R"test( +std::string s = R"pb( + item_1: 1 item_2: 2 +)pb"; +)test", + format(R"test( +std::string s = R"pb( + item_1:1 + item_2:2 +)pb"; +)test", + getRawStringPbStyleWithColumns(40))); +} + +TEST_F(FormatTestRawStrings, BreaksRawStringsExceedingColumnLimit) { + expect_eq(R"test( +P p = TPPPPPPPPPPPPPPP( + R"pb(item_1: 1, item_2: 2)pb");)test", + format(R"test( +P p = TPPPPPPPPPPPPPPP(R"pb(item_1: 1, item_2: 2)pb");)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +P p = + TPPPPPPPPPPPPPPP( + R"pb(item_1: 1, + item_2: 2, + item_3: 3)pb");)test", + format(R"test( +P p = TPPPPPPPPPPPPPPP(R"pb(item_1: 1, item_2: 2, item_3: 3)pb");)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +P p = TP(R"pb(item_1 <1> + item_2: <2> + item_3 {})pb");)test", + format(R"test( +P p = TP(R"pb(item_1<1> item_2:<2> item_3{ })pb");)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq( + R"test( +P p = TP(R"pb(item_1: 1, + item_2: 2, + item_3: 3, + item_4: 4)pb");)test", + format( + R"test( +P p = TP(R"pb(item_1: 1, item_2: 2, item_3: 3, item_4: 4)pb");)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +P p = TPPPPPPPPPPPPPPP( + R"pb(item_1 <1>, + item_2: {2}, + item_3: <3>, + item_4: {4})pb");)test", + format(R"test( +P p = TPPPPPPPPPPPPPPP(R"pb(item_1<1>, item_2: {2}, item_3: <3>, item_4:{4})pb");)test", + getRawStringPbStyleWithColumns(40))); + + // Breaks before a short raw string exceeding the column limit. + expect_eq(R"test( +FFFFFFFFFFFFFFFFFFFFFFFFFFF( + R"pb(key: 1)pb"); +P p = TPPPPPPPPPPPPPPPPPPPP( + R"pb(key: 2)pb"); +auto TPPPPPPPPPPPPPPPPPPPP = + R"pb(key: 3)pb"; +P p = TPPPPPPPPPPPPPPPPPPPP( + R"pb(i: 1, j: 2)pb"); + +int f(string s) { + FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF( + R"pb(key: 1)pb"); + P p = TPPPPPPPPPPPPPPPPPPPP( + R"pb(key: 2)pb"); + auto TPPPPPPPPPPPPPPPPPPPP = + R"pb(key: 3)pb"; + if (s.empty()) + P p = TPPPPPPPPPPPPPPPPPPPP( + R"pb(i: 1, j: 2)pb"); +} +)test", + format(R"test( +FFFFFFFFFFFFFFFFFFFFFFFFFFF(R"pb(key:1)pb"); +P p = TPPPPPPPPPPPPPPPPPPPP(R"pb(key:2)pb"); +auto TPPPPPPPPPPPPPPPPPPPP = R"pb(key:3)pb"; +P p = TPPPPPPPPPPPPPPPPPPPP(R"pb(i: 1, j:2)pb"); + +int f(string s) { + FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF(R"pb(key:1)pb"); + P p = TPPPPPPPPPPPPPPPPPPPP(R"pb(key:2)pb"); + auto TPPPPPPPPPPPPPPPPPPPP = R"pb(key:3)pb"; + if (s.empty()) + P p = TPPPPPPPPPPPPPPPPPPPP(R"pb(i: 1, j:2)pb"); +} +)test", + getRawStringPbStyleWithColumns(40))); +} + +TEST_F(FormatTestRawStrings, FormatsRawStringArguments) { + expect_eq(R"test( +P p = TP(R"pb(key {1})pb", param_2);)test", + format(R"test( +P p = TP(R"pb(key{1})pb",param_2);)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +PPPPPPPPPPPPP(R"pb(keykeyk)pb", + param_2);)test", + format(R"test( +PPPPPPPPPPPPP(R"pb(keykeyk)pb", param_2);)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +P p = + TP(R"pb(item: {i: 1, s: 's'} + item: {i: 2, s: 't'})pb");)test", + format(R"test( +P p = TP(R"pb(item: {i: 1, s: 's'} item: {i: 2, s: 't'})pb");)test", + getRawStringPbStyleWithColumns(40))); + expect_eq(R"test( +FFFFFFFFFFFFFFFFFFF( + R"pb(key: "value")pb", + R"pb(key2: "value")pb");)test", + format(R"test( +FFFFFFFFFFFFFFFFFFF(R"pb(key: "value")pb", R"pb(key2: "value")pb");)test", + getRawStringPbStyleWithColumns(40))); + + // Formats the first out of two arguments. + expect_eq(R"test( +FFFFFFFF(R"pb(key: 1)pb", argument2); +struct S { + const s = + f(R"pb(key: 1)pb", argument2); + void f() { + if (gol) + return g(R"pb(key: 1)pb", + 132789237); + return g(R"pb(key: 1)pb", "172893"); + } +};)test", + format(R"test( +FFFFFFFF(R"pb(key:1)pb", argument2); +struct S { +const s = f(R"pb(key:1)pb", argument2); +void f() { + if (gol) + return g(R"pb(key:1)pb", 132789237); + return g(R"pb(key:1)pb", "172893"); +} +};)test", + getRawStringPbStyleWithColumns(40))); + + // Formats the second out of two arguments. + expect_eq(R"test( +FFFFFFFF(argument1, R"pb(key: 2)pb"); +struct S { + const s = + f(argument1, R"pb(key: 2)pb"); + void f() { + if (gol) + return g(12784137, + R"pb(key: 2)pb"); + return g(17283122, R"pb(key: 2)pb"); + } +};)test", + format(R"test( +FFFFFFFF(argument1, R"pb(key:2)pb"); +struct S { +const s = f(argument1, R"pb(key:2)pb"); +void f() { + if (gol) + return g(12784137, R"pb(key:2)pb"); + return g(17283122, R"pb(key:2)pb"); +} +};)test", + getRawStringPbStyleWithColumns(40))); + + // Formats two short raw string arguments. + expect_eq(R"test( +FFFFF(R"pb(key: 1)pb", R"pb(key: 2)pb");)test", + format(R"test( +FFFFF(R"pb(key:1)pb", R"pb(key:2)pb");)test", + getRawStringPbStyleWithColumns(40))); + // TODO(krasimir): The original source code fits on one line, so the + // non-optimizing formatter is chosen. But after the formatting in protos is + // made, the code doesn't fit on one line anymore and further formatting + // splits it. + // + // Should we disable raw string formatting for the non-optimizing formatter? + expect_eq(R"test( +FFFFFFF(R"pb(key: 1)pb", R"pb(key: 2)pb");)test", + format(R"test( +FFFFFFF(R"pb(key:1)pb", R"pb(key:2)pb");)test", + getRawStringPbStyleWithColumns(40))); + + // Formats two short raw string arguments, puts second on newline. + expect_eq(R"test( +FFFFFFFF(R"pb(key: 1)pb", + R"pb(key: 2)pb");)test", + format(R"test( +FFFFFFFF(R"pb(key:1)pb", R"pb(key:2)pb");)test", + getRawStringPbStyleWithColumns(40))); + + // Formats both arguments. + expect_eq(R"test( +FFFFFFFF(R"pb(key: 1)pb", + R"pb(key: 2)pb"); +struct S { + const s = f(R"pb(key: 1)pb", + R"pb(key: 2)pb"); + void f() { + if (gol) + return g(R"pb(key: 1)pb", + R"pb(key: 2)pb"); + return g(R"pb(k1)pb", R"pb(k2)pb"); + } +};)test", + format(R"test( +FFFFFFFF(R"pb(key:1)pb", R"pb(key:2)pb"); +struct S { +const s = f(R"pb(key:1)pb", R"pb(key:2)pb"); +void f() { + if (gol) + return g(R"pb(key:1)pb", R"pb(key:2)pb"); + return g(R"pb( k1 )pb", R"pb( k2 )pb"); +} +};)test", + getRawStringPbStyleWithColumns(40))); +} + +TEST_F(FormatTestRawStrings, RawStringStartingWithNewlines) { + expect_eq(R"test( +std::string s = R"pb( + item_1: 1 +)pb"; +)test", + format(R"test( +std::string s = R"pb( + item_1:1 +)pb"; +)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +std::string s = R"pb( + + item_1: 1 +)pb"; +)test", + format(R"test( +std::string s = R"pb( + + item_1:1 +)pb"; +)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +std::string s = R"pb( + item_1: 1 +)pb"; +)test", + format(R"test( +std::string s = R"pb( + item_1:1 + +)pb"; +)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +std::string s = R"pb( + item_1: 1, + item_2: 2 +)pb"; +)test", + format(R"test( +std::string s = R"pb( + item_1:1, item_2:2 +)pb"; +)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +std::string s = R"pb( + book { + title: "Alice's Adventures" + author: "Lewis Caroll" + } + book { + title: "Peter Pan" + author: "J. M. Barrie" + } +)pb"; +)test", + format(R"test( +std::string s = R"pb( + book { title: "Alice's Adventures" author: "Lewis Caroll" } + book { title: "Peter Pan" author: "J. M. Barrie" } +)pb"; +)test", + getRawStringPbStyleWithColumns(40))); +} + +TEST_F(FormatTestRawStrings, BreaksBeforeRawStrings) { + expect_eq(R"test( +ASSERT_TRUE( + ParseFromString(R"pb(item_1: 1)pb"), + ptr);)test", + format(R"test( +ASSERT_TRUE(ParseFromString(R"pb(item_1: 1)pb"), ptr);)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +ASSERT_TRUE(toolong::ParseFromString( + R"pb(item_1: 1)pb"), + ptr);)test", + format(R"test( +ASSERT_TRUE(toolong::ParseFromString(R"pb(item_1: 1)pb"), ptr);)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +ASSERT_TRUE(ParseFromString( + R"pb(item_1: 1, + item_2: 2)pb"), + ptr);)test", + format(R"test( +ASSERT_TRUE(ParseFromString(R"pb(item_1: 1, item_2: 2)pb"), ptr);)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +ASSERT_TRUE( + ParseFromString( + R"pb(item_1: 1 item_2: 2)pb"), + ptr);)test", + format(R"test( +ASSERT_TRUE(ParseFromString(R"pb(item_1: 1 item_2: 2)pb"), ptr);)test", + getRawStringPbStyleWithColumns(40))); + +} + +TEST_F(FormatTestRawStrings, RawStringsInOperands) { + // Formats the raw string first operand of a binary operator expression. + expect_eq(R"test(auto S = R"pb(item_1: 1)pb" + rest;)test", + format(R"test(auto S = R"pb(item_1:1)pb" + rest;)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = R"pb(item_1: 1, item_2: 2)pb" + + rest;)test", + format(R"test( +auto S = R"pb(item_1:1,item_2:2)pb"+rest;)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = + R"pb(item_1: 1 item_2: 2)pb" + rest;)test", + format(R"test( +auto S = R"pb(item_1:1 item_2:2)pb"+rest;)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = R"pb(item_1: 1, + item_2: 2, + item_3: 3)pb" + rest;)test", + format(R"test( +auto S = R"pb(item_1:1,item_2:2,item_3:3)pb"+rest;)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = R"pb(item_1: 1, + item_2: 2, + item_3: 3)pb" + + longlongrest;)test", + format(R"test( +auto S = R"pb(item_1:1,item_2:2,item_3:3)pb"+longlongrest;)test", + getRawStringPbStyleWithColumns(40))); + + // Formats the raw string second operand of a binary operator expression. + expect_eq(R"test(auto S = first + R"pb(item_1: 1)pb";)test", + format(R"test(auto S = first + R"pb(item_1:1)pb";)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = first + R"pb(item_1: 1, + item_2: 2)pb";)test", + format(R"test( +auto S = first+R"pb(item_1:1,item_2:2)pb";)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = first + R"pb(item_1: 1 + item_2: 2)pb";)test", + format(R"test( +auto S = first+R"pb(item_1:1 item_2:2)pb";)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = R"pb(item_1: 1, + item_2: 2, + item_3: 3)pb" + rest;)test", + format(R"test( +auto S = R"pb(item_1:1,item_2:2,item_3:3)pb"+rest;)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = R"pb(item_1: 1, + item_2: 2, + item_3: 3)pb" + + longlongrest;)test", + format(R"test( +auto S = R"pb(item_1:1,item_2:2,item_3:3)pb"+longlongrest;)test", + getRawStringPbStyleWithColumns(40))); + + // Formats the raw string operands in expressions. + expect_eq(R"test( +auto S = R"pb(item_1: 1)pb" + + R"pb(item_2: 2)pb"; +)test", + format(R"test( +auto S=R"pb(item_1:1)pb"+R"pb(item_2:2)pb"; +)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = R"pb(item_1: 1)pb" + + R"pb(item_2: 2)pb" + + R"pb(item_3: 3)pb"; +)test", + format(R"test( +auto S=R"pb(item_1:1)pb"+R"pb(item_2:2)pb"+R"pb(item_3:3)pb"; +)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = (count < 3) + ? R"pb(item_1: 1)pb" + : R"pb(item_2: 2)pb"; +)test", + format(R"test( +auto S=(count<3)?R"pb(item_1:1)pb":R"pb(item_2:2)pb"; +)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = + (count < 3) + ? R"pb(item_1: 1, item_2: 2)pb" + : R"pb(item_3: 3)pb"; +)test", + format(R"test( +auto S=(count<3)?R"pb(item_1:1,item_2:2)pb":R"pb(item_3:3)pb"; +)test", + getRawStringPbStyleWithColumns(40))); + + expect_eq(R"test( +auto S = + (count < 3) + ? R"pb(item_1: 1)pb" + : R"pb(item_2: 2, item_3: 3)pb"; +)test", + format(R"test( +auto S=(count<3)?R"pb(item_1:1)pb":R"pb(item_2:2,item_3:3)pb"; +)test", + getRawStringPbStyleWithColumns(40))); + +} + +TEST_F(FormatTestRawStrings, PrefixAndSuffixAlignment) { + // Keep the suffix at the end of line if not on newline. + expect_eq(R"test( +int s() { + auto S = PTP( + R"pb( + item_1: 1, + item_2: 2)pb"); +})test", + format(R"test( +int s() { + auto S = PTP( + R"pb( + item_1: 1, + item_2: 2)pb"); +})test", + getRawStringPbStyleWithColumns(20))); + + // Align the suffix with the surrounding FirstIndent if the prefix is not on + // a line of its own. + expect_eq(R"test( +int s() { + auto S = PTP( + R"pb( + item_1: 1, + item_2: 2 + )pb"); +})test", + format(R"test( +int s() { + auto S = PTP(R"pb( + item_1: 1, + item_2: 2 + )pb"); +})test", + getRawStringPbStyleWithColumns(20))); + + // Align the prefix with the suffix if both the prefix and suffix are on a + // line of their own. + expect_eq(R"test( +int s() { + auto S = PTP( + R"pb( + item_1: 1, + item_2: 2, + )pb"); +})test", + format(R"test( +int s() { + auto S = PTP( + R"pb( + item_1: 1, + item_2: 2, + )pb"); +})test", + getRawStringPbStyleWithColumns(20))); +} + +TEST_F(FormatTestRawStrings, EstimatesPenalty) { + // The penalty for characters exceeding the column limit in the raw string + // forces 'hh' to be put on a newline. + expect_eq(R"test( +ff(gggggg, + hh(R"pb(key { + i1: k1 + i2: k2 + })pb")); +)test", + format(R"test( +ff(gggggg, hh(R"pb(key { + i1: k1 + i2: k2 + })pb")); +)test", + getRawStringPbStyleWithColumns(20))); +} + +TEST_F(FormatTestRawStrings, DontFormatNonRawStrings) { + expect_eq(R"test(a = R"pb(key:value)";)test", + format(R"test(a = R"pb(key:value)";)test", + getRawStringPbStyleWithColumns(20))); +} + +} // end namespace +} // end namespace format +} // end namespace clang