BreakableToken.cpp
ContinuationIndenter.cpp
Format.cpp
+ FormatToken.cpp
TokenAnnotator.cpp
UnwrappedLineParser.cpp
WhitespaceManager.cpp
}
unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
- bool DryRun) {
+ bool DryRun,
+ unsigned ExtraSpaces) {
const FormatToken &Current = *State.NextToken;
const FormatToken &Previous = *State.NextToken->Previous;
// Extra penalty that needs to be added because of the way certain line
// breaks are chosen.
- unsigned ExtraPenalty = 0;
+ unsigned Penalty = 0;
if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) {
// FIXME: Is this correct?
unsigned ContinuationIndent =
std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4;
if (Newline) {
+ // The first line break on any ParenLevel causes an extra penalty in order
+ // prefer similar line breaks.
+ if (!State.Stack.back().ContainsLineBreak)
+ Penalty += 15;
+ State.Stack.back().ContainsLineBreak = true;
+
+ Penalty += State.NextToken->SplitPenalty;
+
// Breaking before the first "<<" is generally not desirable if the LHS is
// short.
if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0 &&
State.Column <= Style.ColumnLimit / 2)
- ExtraPenalty += Style.PenaltyBreakFirstLessLess;
+ Penalty += Style.PenaltyBreakFirstLessLess;
- State.Stack.back().ContainsLineBreak = true;
if (Current.is(tok::r_brace)) {
if (Current.BlockKind == BK_BracedInit)
State.Column = State.Stack[State.Stack.size() - 2].LastSpace;
State.Stack.back().LastSpace = State.Stack.back().VariablePos;
}
- unsigned Spaces = State.NextToken->SpacesRequiredBefore;
+ unsigned Spaces = State.NextToken->SpacesRequiredBefore + ExtraSpaces;
if (!DryRun)
Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column + Spaces);
}
}
- return moveStateToNextToken(State, DryRun, Newline) + ExtraPenalty;
+ return moveStateToNextToken(State, DryRun, Newline) + Penalty;
}
unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.NextToken = State.NextToken->Next;
- if (!Newline && Style.AlwaysBreakBeforeMultilineStrings &&
- Current.is(tok::string_literal) && Current.CanBreakBefore)
- return 0;
+ unsigned Penalty = 0;
+ if (Newline || !Style.AlwaysBreakBeforeMultilineStrings ||
+ Current.isNot(tok::string_literal) || !Current.CanBreakBefore)
+ Penalty += breakProtrudingToken(Current, State, DryRun);
+
+ // If the previous has a special role, let it consume tokens as appropriate.
+ // It is necessary to start at the previous token for the only implemented
+ // role (comma separated list). That way, the decision whether or not to break
+ // after the "{" is already done and both options are tried and evaluated.
+ // FIXME: This is ugly, find a better way.
+ if (Previous && Previous->Role)
+ Penalty += Previous->Role->format(State, this, DryRun);
- return breakProtrudingToken(Current, State, DryRun);
+ return Penalty;
}
unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
///
/// If \p DryRun is \c false, also creates and stores the required
/// \c Replacement.
- unsigned addTokenToState(LineState &State, bool Newline, bool DryRun);
+ unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
+ unsigned ExtraSpaces = 0);
/// \brief Get the column limit for this line. This is the style's column
/// limit, potentially reduced for preprocessor definitions.
}
for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
I != E; ++I) {
+ unsigned Penalty = Indenter->addTokenToState(State, (*I)->NewLine, false);
DEBUG({
if ((*I)->NewLine) {
- llvm::dbgs() << "Penalty for splitting before "
+ llvm::dbgs() << "Penalty for placing "
<< (*I)->Previous->State.NextToken->Tok.getName() << ": "
- << (*I)->Previous->State.NextToken->SplitPenalty << "\n";
+ << Penalty << "\n";
}
});
- Indenter->addTokenToState(State, (*I)->NewLine, false);
}
}
return;
if (!NewLine && Indenter->mustBreak(PreviousNode->State))
return;
- if (NewLine) {
- if (!PreviousNode->State.Stack.back().ContainsLineBreak)
- Penalty += 15;
- Penalty += PreviousNode->State.NextToken->SplitPenalty;
- }
StateNode *Node = new (Allocator.Allocate())
StateNode(PreviousNode->State, NewLine, PreviousNode);
--- /dev/null
+//===--- FormatToken.cpp - Format C++ code --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements specific functions of \c FormatTokens and their
+/// roles.
+///
+//===----------------------------------------------------------------------===//
+
+#include "FormatToken.h"
+#include "ContinuationIndenter.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+namespace clang {
+namespace format {
+
+TokenRole::~TokenRole() {}
+
+void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
+
+unsigned CommaSeparatedList::format(LineState &State,
+ ContinuationIndenter *Indenter,
+ bool DryRun) {
+ if (!State.NextToken->Previous || !State.NextToken->Previous->Previous ||
+ Commas.size() <= 2)
+ return 0;
+
+ // Ensure that we start on the opening brace.
+ const FormatToken *LBrace = State.NextToken->Previous->Previous;
+ if (LBrace->isNot(tok::l_brace) ||
+ LBrace->Next->Type == TT_DesignatedInitializerPeriod)
+ return 0;
+
+ // Find the best ColumnFormat, i.e. the best number of columns to use.
+ unsigned RemainingCharacters = Style.ColumnLimit - State.Stack.back().Indent;
+ const ColumnFormat *Format = getColumnFormat(RemainingCharacters);
+ if (!Format)
+ return 0;
+
+ // Format the entire list.
+ unsigned Penalty = 0;
+ unsigned Column = 0;
+ unsigned Item = 0;
+ while (State.NextToken != LBrace->MatchingParen) {
+ bool NewLine = false;
+ unsigned ExtraSpaces = 0;
+
+ // If the previous token was one of our commas, we are now on the next item.
+ if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
+ if (!State.NextToken->isTrailingComment()) {
+ ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
+ ++Column;
+ }
+ ++Item;
+ }
+
+ if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
+ Column = 0;
+ NewLine = true;
+ }
+
+ // Place token using the continuation indenter and store the penalty.
+ Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
+ }
+ return Penalty;
+}
+
+// Returns the lengths in code points between Begin and End (both included),
+// assuming that the entire sequence is put on a single line.
+static unsigned CodePointsBetween(const FormatToken *Begin,
+ const FormatToken *End) {
+ return End->TotalLength - Begin->TotalLength + Begin->CodePointCount;
+}
+
+void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
+ if (!Token->MatchingParen)
+ return;
+
+ FormatToken *ItemBegin = Token->Next;
+ SmallVector<bool, 8> MustBreakBeforeItem;
+
+ // The lengths of an item if it is put at the end of the line. This includes
+ // trailing comments which are otherwise ignored for column alignment.
+ SmallVector<unsigned, 8> EndOfLineItemLength;
+
+ for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
+ // Skip comments on their own line.
+ while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment())
+ ItemBegin = ItemBegin->Next;
+
+ MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
+ const FormatToken *ItemEnd = NULL;
+ if (i == Commas.size()) {
+ ItemEnd = Token->MatchingParen;
+ const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
+ ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
+ if (Style.Cpp11BracedListStyle) {
+ // In Cpp11 braced list style, the } and possibly other subsequent
+ // tokens will need to stay on a line with the last element.
+ while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
+ ItemEnd = ItemEnd->Next;
+ } else {
+ // In other braced lists styles, the "}" can be wrapped to the new line.
+ ItemEnd = Token->MatchingParen->Previous;
+ }
+ } else {
+ ItemEnd = Commas[i];
+ // The comma is counted as part of the item when calculating the length.
+ ItemLengths.push_back(ItemEnd->TotalLength - ItemBegin->TotalLength +
+ ItemBegin->CodePointCount);
+ // Consume trailing comments so the are included in EndOfLineItemLength.
+ if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
+ ItemEnd->Next->isTrailingComment())
+ ItemEnd = ItemEnd->Next;
+ }
+ EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
+ ItemBegin = ItemEnd->Next;
+ }
+
+ // We can never place more than ColumnLimit / 3 items in a row (because of the
+ // spaces and the comma).
+ for (unsigned Columns = 1; Columns <= Style.ColumnLimit / 3; ++Columns) {
+ ColumnFormat Format;
+ Format.Columns = Columns;
+ Format.ColumnSizes.resize(Columns);
+ Format.LineCount = 0;
+ bool HasRowWithSufficientColumns = false;
+ unsigned Column = 0;
+ for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
+ if (MustBreakBeforeItem[i] || Column == Columns) {
+ ++Format.LineCount;
+ Column = 0;
+ }
+ if (Column == Columns - 1)
+ HasRowWithSufficientColumns = true;
+ unsigned length =
+ (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
+ Format.ColumnSizes[Column] =
+ std::max(Format.ColumnSizes[Column], length);
+ ++Column;
+ }
+ // If all rows are terminated early (e.g. by trailing comments), we don't
+ // need to look further.
+ if (!HasRowWithSufficientColumns)
+ break;
+ Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
+ for (unsigned i = 0; i < Columns; ++i) {
+ Format.TotalWidth += Format.ColumnSizes[i];
+ }
+
+ // Ignore layouts that are bound to violate the column limit.
+ if (Format.TotalWidth > Style.ColumnLimit)
+ continue;
+
+ Formats.push_back(Format);
+ }
+}
+
+const CommaSeparatedList::ColumnFormat *
+CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
+ const ColumnFormat *BestFormat = NULL;
+ for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
+ I = Formats.rbegin(),
+ E = Formats.rend();
+ I != E; ++I) {
+ if (I->TotalWidth <= RemainingCharacters) {
+ if (BestFormat && I->LineCount > BestFormat->LineCount)
+ break;
+ BestFormat = &*I;
+ }
+ }
+ return BestFormat;
+}
+
+} // namespace format
+} // namespace clang
#define LLVM_CLANG_FORMAT_FORMAT_TOKEN_H
#include "clang/Basic/OperatorPrecedence.h"
+#include "clang/Format/Format.h"
#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/OwningPtr.h"
namespace clang {
namespace format {
PPK_Inconclusive
};
+class TokenRole;
+
/// \brief A wrapper around a \c Token storing information about the
/// whitespace characters preceeding it.
struct FormatToken {
TokenType Type;
+ /// \brief The number of spaces that should be inserted before this token.
unsigned SpacesRequiredBefore;
+
+ /// \brief \c true if it is allowed to break before this token.
bool CanBreakBefore;
bool ClosesTemplateDeclaration;
/// the number of commas.
unsigned ParameterCount;
+ /// \brief A token can have a special role that can carry extra information
+ /// about the token's formatting.
+ llvm::OwningPtr<TokenRole> Role;
+
/// \brief If this is an opening parenthesis, how are the parameters packed?
ParameterPackingKind PackingKind;
void operator=(const FormatToken &) LLVM_DELETED_FUNCTION;
};
+class ContinuationIndenter;
+struct LineState;
+
+class TokenRole {
+public:
+ TokenRole(const FormatStyle &Style) : Style(Style) {}
+ virtual ~TokenRole();
+
+ /// \brief After the \c TokenAnnotator has finished annotating all the tokens,
+ /// this function precomputes required information for formatting.
+ virtual void precomputeFormattingInfos(const FormatToken *Token);
+
+ /// \brief Apply the special formatting that the given role demands.
+ ///
+ /// Continues formatting from \p State leaving indentation to \p Indenter and
+ /// returns the total penalty that this formatting incurs.
+ virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
+ bool DryRun) {
+ return 0;
+ }
+
+ /// \brief Notifies the \c Role that a comma was found.
+ virtual void CommaFound(const FormatToken *Token) {}
+
+protected:
+ const FormatStyle &Style;
+};
+
+class CommaSeparatedList : public TokenRole {
+public:
+ CommaSeparatedList(const FormatStyle &Style) : TokenRole(Style) {}
+
+ virtual void precomputeFormattingInfos(const FormatToken *Token);
+
+ virtual unsigned format(LineState &State, ContinuationIndenter *Indenter,
+ bool DryRun);
+
+ /// \brief Adds \p Token as the next comma to the \c CommaSeparated list.
+ virtual void CommaFound(const FormatToken *Token) { Commas.push_back(Token); }
+
+private:
+ /// \brief A struct that holds information on how to format a given list with
+ /// a specific number of columns.
+ struct ColumnFormat {
+ /// \brief The number of columns to use.
+ unsigned Columns;
+
+ /// \brief The total width in characters.
+ unsigned TotalWidth;
+
+ /// \brief The number of lines required for this format.
+ unsigned LineCount;
+
+ /// \brief The size of each column in characters.
+ SmallVector<unsigned, 8> ColumnSizes;
+ };
+
+ /// \brief Calculate which \c ColumnFormat fits best into
+ /// \p RemainingCharacters.
+ const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
+
+ /// \brief The ordered \c FormatTokens making up the commas of this list.
+ SmallVector<const FormatToken *, 8> Commas;
+
+ /// \brief The length of each of the list's items in characters including the
+ /// trailing comma.
+ SmallVector<unsigned, 8> ItemLengths;
+
+ /// \brief Precomputed formats that can be used for this list.
+ SmallVector<ColumnFormat, 4> Formats;
+};
+
} // namespace format
} // namespace clang
/// into template parameter lists.
class AnnotatingParser {
public:
- AnnotatingParser(AnnotatedLine &Line, IdentifierInfo &Ident_in)
- : Line(Line), CurrentToken(Line.First), KeywordVirtualFound(false),
- NameFound(false), AutoFound(false), Ident_in(Ident_in) {
+ AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
+ IdentifierInfo &Ident_in)
+ : Style(Style), Line(Line), CurrentToken(Line.First),
+ KeywordVirtualFound(false), NameFound(false), AutoFound(false),
+ Ident_in(Ident_in) {
Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
}
void updateParameterCount(FormatToken *Left, FormatToken *Current) {
if (Current->is(tok::comma)) {
++Left->ParameterCount;
+ if (!Left->Role)
+ Left->Role.reset(new CommaSeparatedList(Style));
+ Left->Role->CommaFound(Current);
} else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
Left->ParameterCount = 1;
}
SmallVector<Context, 8> Contexts;
+ const FormatStyle &Style;
AnnotatedLine &Line;
FormatToken *CurrentToken;
bool KeywordVirtualFound;
} // end anonymous namespace
void TokenAnnotator::annotate(AnnotatedLine &Line) {
- AnnotatingParser Parser(Line, Ident_in);
+ AnnotatingParser Parser(Style, Line, Ident_in);
Line.Type = Parser.parseLine();
if (Line.Type == LT_Invalid)
return;
}
calculateUnbreakableTailLengths(Line);
+ for (Current = Line.First; Current != NULL; Current = Current->Next) {
+ if (Current->Role)
+ Current->Role->precomputeFormattingInfos(Current);
+ }
+
DEBUG({
printDebugInfo(Line);
});
" 100000000, \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"\n"
"};");
- verifyFormat(
- "static SomeClass = { a, b, c, d, e, f, g, h, i, j,\n"
- " looooooooooooooooooooooooooooooooooongname,\n"
- " looooooooooooooooooooooooooooooong };");
- // Allow bin-packing in static initializers as this would often lead to
- // terrible results, e.g.:
- verifyGoogleFormat(
- "static SomeClass = {a, b, c, d, e, f, g, h, i, j,\n"
- " looooooooooooooooooooooooooooooooooongname,\n"
- " looooooooooooooooooooooooooooooong};");
// Here, everything other than the "}" would fit on a line.
verifyFormat("static int LooooooooooooooooooooooooongVariable[1] = {\n"
" 100000000000000000000000\n"
"struct {\n"
" unsigned bit;\n"
" const char *const name;\n"
- "} kBitsToOs[] = { { kOsMac, \"Mac\" }, { kOsWin, \"Windows\" },\n"
+ "} kBitsToOs[] = { { kOsMac, \"Mac\" }, { kOsWin, \"Windows\" },\n"
" { kOsLinux, \"Linux\" }, { kOsCrOS, \"Chrome OS\" } };");
}
NoSpaces);
}
+TEST_F(FormatTest, FormatsBracedListsinColumnLayout) {
+ verifyFormat("vector<int> x = { 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777 };");
+ verifyFormat("vector<int> x = { 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " // line comment\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555,\n"
+ " // line comment\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777 };");
+ verifyFormat(
+ "vector<int> x = { 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, 7777777,\n"
+ " 1, 22, 333, 4444, 55555, 666666, // comment\n"
+ " 7777777, 1, 22, 333, 4444, 55555, 666666,\n"
+ " 7777777, 1, 22, 333, 4444, 55555, 666666,\n"
+ " 7777777, 1, 22, 333, 4444, 55555, 666666,\n"
+ " 7777777 };");
+ verifyFormat("static const uint16_t CallerSavedRegs64Bittttt[] = {\n"
+ " X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,\n"
+ " X86::R8, X86::R9, X86::R10, X86::R11, 0\n"
+ "};");
+ verifyFormat("vector<int> x = { 1, 1, 1, 1,\n"
+ " 1, 1, 1, 1 };",
+ getLLVMStyleWithColumns(39));
+ verifyFormat("vector<int> x = { 1, 1, 1, 1,\n"
+ " 1, 1, 1, 1 };",
+ getLLVMStyleWithColumns(38));
+ verifyFormat("vector<int> aaaaaaaaaaaaaaaaaaaaaa = {\n"
+ " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1\n"
+ "};",
+ getLLVMStyleWithColumns(40));
+}
+
TEST_F(FormatTest, PullTrivialFunctionDefinitionsIntoSingleLine) {
verifyFormat("void f() { return 42; }");
verifyFormat("void f() {\n"