1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements an indenter that manages the indentation of
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
17 #define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
20 #include "clang/Format/Format.h"
21 #include "llvm/Support/Regex.h"
32 class WhitespaceManager;
34 class ContinuationIndenter {
36 /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
37 /// column \p FirstIndent.
38 ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr,
39 WhitespaceManager &Whitespaces,
40 encoding::Encoding Encoding,
41 bool BinPackInconclusiveFunctions);
43 /// \brief Get the initial state, i.e. the state after placing \p Line's
44 /// first token at \p FirstIndent.
45 LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
48 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
50 /// \brief Returns \c true, if a line break after \p State is allowed.
51 bool canBreak(const LineState &State);
53 /// \brief Returns \c true, if a line break after \p State is mandatory.
54 bool mustBreak(const LineState &State);
56 /// \brief Appends the next token to \p State and updates information
57 /// necessary for indentation.
59 /// Puts the token on the current line if \p Newline is \c false and adds a
60 /// line break and necessary indentation otherwise.
62 /// If \p DryRun is \c false, also creates and stores the required
64 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
65 unsigned ExtraSpaces = 0);
67 /// \brief Get the column limit for this line. This is the style's column
68 /// limit, potentially reduced for preprocessor definitions.
69 unsigned getColumnLimit(const LineState &State) const;
72 /// \brief Mark the next token as consumed in \p State and modify its stacks
74 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
76 /// \brief Update 'State' according to the next token's fake left parentheses.
77 void moveStatePastFakeLParens(LineState &State, bool Newline);
78 /// \brief Update 'State' according to the next token's fake r_parens.
79 void moveStatePastFakeRParens(LineState &State);
81 /// \brief Update 'State' according to the next token being one of "(<{[".
82 void moveStatePastScopeOpener(LineState &State, bool Newline);
83 /// \brief Update 'State' according to the next token being one of ")>}]".
84 void moveStatePastScopeCloser(LineState &State);
85 /// \brief Update 'State' with the next token opening a nested block.
86 void moveStateToNewBlock(LineState &State);
88 /// \brief If the current token sticks out over the end of the line, break
91 /// \returns An extra penalty if a token was broken, otherwise 0.
93 /// The returned penalty will cover the cost of the additional line breaks and
94 /// column limit violation in all lines except for the last one. The penalty
95 /// for the column limit violation in the last line (and in single line
96 /// tokens) is handled in \c addNextStateToQueue.
97 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
100 /// \brief Appends the next token to \p State and updates information
101 /// necessary for indentation.
103 /// Puts the token on the current line.
105 /// If \p DryRun is \c false, also creates and stores the required
107 void addTokenOnCurrentLine(LineState &State, bool DryRun,
108 unsigned ExtraSpaces);
110 /// \brief Appends the next token to \p State and updates information
111 /// necessary for indentation.
113 /// Adds a line break and necessary indentation.
115 /// If \p DryRun is \c false, also creates and stores the required
117 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
119 /// \brief Calculate the new column for a line wrap before the next token.
120 unsigned getNewLineColumn(const LineState &State);
122 /// \brief Adds a multiline token to the \p State.
124 /// \returns Extra penalty for the first line of the literal: last line is
125 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
126 /// matter, as we don't change them.
127 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
129 /// \brief Returns \c true if the next token starts a multiline string
132 /// This includes implicitly concatenated strings, strings that will be broken
133 /// by clang-format and string literals with escaped newlines.
134 bool nextIsMultilineString(const LineState &State);
137 SourceManager &SourceMgr;
138 WhitespaceManager &Whitespaces;
139 encoding::Encoding Encoding;
140 bool BinPackInconclusiveFunctions;
141 llvm::Regex CommentPragmasRegex;
145 ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
146 bool AvoidBinPacking, bool NoLineBreak)
147 : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
148 FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
149 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
150 NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0),
151 StartOfFunctionCall(0), StartOfArraySubscripts(0),
152 NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0),
153 ContainsLineBreak(false), ContainsUnwrappedBuilder(0),
154 AlignColons(true), ObjCSelectorNameFound(false),
155 HasMultipleNestedBlocks(false), JSFunctionInlined(false) {}
157 /// \brief The position to which a specific parenthesis level needs to be
161 /// \brief The number of indentation levels of the block.
162 unsigned IndentLevel;
164 /// \brief The position of the last space on each level.
166 /// Used e.g. to break like:
167 /// functionCall(Parameter, otherCall(
168 /// OtherParameter));
171 /// \brief The position the first "<<" operator encountered on each level.
173 /// Used to align "<<" operators. 0 if no such operator has been encountered
175 unsigned FirstLessLess;
177 /// \brief Whether a newline needs to be inserted before the block's closing
180 /// We only want to insert a newline before the closing brace if there also
181 /// was a newline after the beginning left brace.
182 bool BreakBeforeClosingBrace;
184 /// \brief The column of a \c ? in a conditional expression;
185 unsigned QuestionColumn;
187 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
188 /// lines, in this context.
189 bool AvoidBinPacking;
191 /// \brief Break after the next comma (or all the commas in this context if
192 /// \c AvoidBinPacking is \c true).
193 bool BreakBeforeParameter;
195 /// \brief Line breaking in this context would break a formatting rule.
198 /// \brief True if the last binary operator on this level was wrapped to the
200 bool LastOperatorWrapped;
202 /// \brief The position of the colon in an ObjC method declaration/call.
205 /// \brief The start of the most recent function in a builder-type call.
206 unsigned StartOfFunctionCall;
208 /// \brief Contains the start of array subscript expressions, so that they
210 unsigned StartOfArraySubscripts;
212 /// \brief If a nested name specifier was broken over multiple lines, this
213 /// contains the start column of the second line. Otherwise 0.
214 unsigned NestedNameSpecifierContinuation;
216 /// \brief If a call expression was broken over multiple lines, this
217 /// contains the start column of the second line. Otherwise 0.
218 unsigned CallContinuation;
220 /// \brief The column of the first variable name in a variable declaration.
222 /// Used to align further variables if necessary.
223 unsigned VariablePos;
225 /// \brief \c true if this \c ParenState already contains a line-break.
227 /// The first line break in a certain \c ParenState causes extra penalty so
228 /// that clang-format prefers similar breaks, i.e. breaks in the same
230 bool ContainsLineBreak;
232 /// \brief \c true if this \c ParenState contains multiple segments of a
233 /// builder-type call on one line.
234 bool ContainsUnwrappedBuilder;
236 /// \brief \c true if the colons of the curren ObjC method expression should
239 /// Not considered for memoization as it will always have the same value at
243 /// \brief \c true if at least one selector name was found in the current
244 /// ObjC method expression.
246 /// Not considered for memoization as it will always have the same value at
248 bool ObjCSelectorNameFound;
250 /// \brief \c true if there are multiple nested blocks inside these parens.
252 /// Not considered for memoization as it will always have the same value at
254 bool HasMultipleNestedBlocks;
256 // \brief The previous JavaScript 'function' keyword is not wrapped to a new
258 bool JSFunctionInlined;
260 bool operator<(const ParenState &Other) const {
261 if (Indent != Other.Indent)
262 return Indent < Other.Indent;
263 if (LastSpace != Other.LastSpace)
264 return LastSpace < Other.LastSpace;
265 if (FirstLessLess != Other.FirstLessLess)
266 return FirstLessLess < Other.FirstLessLess;
267 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
268 return BreakBeforeClosingBrace;
269 if (QuestionColumn != Other.QuestionColumn)
270 return QuestionColumn < Other.QuestionColumn;
271 if (AvoidBinPacking != Other.AvoidBinPacking)
272 return AvoidBinPacking;
273 if (BreakBeforeParameter != Other.BreakBeforeParameter)
274 return BreakBeforeParameter;
275 if (NoLineBreak != Other.NoLineBreak)
277 if (LastOperatorWrapped != Other.LastOperatorWrapped)
278 return LastOperatorWrapped;
279 if (ColonPos != Other.ColonPos)
280 return ColonPos < Other.ColonPos;
281 if (StartOfFunctionCall != Other.StartOfFunctionCall)
282 return StartOfFunctionCall < Other.StartOfFunctionCall;
283 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
284 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
285 if (CallContinuation != Other.CallContinuation)
286 return CallContinuation < Other.CallContinuation;
287 if (VariablePos != Other.VariablePos)
288 return VariablePos < Other.VariablePos;
289 if (ContainsLineBreak != Other.ContainsLineBreak)
290 return ContainsLineBreak < Other.ContainsLineBreak;
291 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
292 return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder;
293 if (JSFunctionInlined != Other.JSFunctionInlined)
294 return JSFunctionInlined < Other.JSFunctionInlined;
299 /// \brief The current state when indenting a unwrapped line.
301 /// As the indenting tries different combinations this is copied by value.
303 /// \brief The number of used columns in the current line.
306 /// \brief The token that needs to be next formatted.
307 FormatToken *NextToken;
309 /// \brief \c true if this line contains a continued for-loop section.
310 bool LineContainsContinuedForLoopSection;
312 /// \brief The \c NestingLevel at the start of this line.
313 unsigned StartOfLineLevel;
315 /// \brief The lowest \c NestingLevel on the current line.
316 unsigned LowestLevelOnLine;
318 /// \brief The start column of the string literal, if we're in a string
319 /// literal sequence, 0 otherwise.
320 unsigned StartOfStringLiteral;
322 /// \brief A stack keeping track of properties applying to parenthesis
324 std::vector<ParenState> Stack;
326 /// \brief Ignore the stack of \c ParenStates for state comparison.
328 /// In long and deeply nested unwrapped lines, the current algorithm can
329 /// be insufficient for finding the best formatting with a reasonable amount
330 /// of time and memory. Setting this flag will effectively lead to the
331 /// algorithm not analyzing some combinations. However, these combinations
332 /// rarely contain the optimal solution: In short, accepting a higher
333 /// penalty early would need to lead to different values in the \c
334 /// ParenState stack (in an otherwise identical state) and these different
335 /// values would need to lead to a significant amount of avoided penalty
338 /// FIXME: Come up with a better algorithm instead.
339 bool IgnoreStackForComparison;
341 /// \brief The indent of the first token.
342 unsigned FirstIndent;
344 /// \brief The line that is being formatted.
346 /// Does not need to be considered for memoization because it doesn't change.
347 const AnnotatedLine *Line;
349 /// \brief Comparison operator to be able to used \c LineState in \c map.
350 bool operator<(const LineState &Other) const {
351 if (NextToken != Other.NextToken)
352 return NextToken < Other.NextToken;
353 if (Column != Other.Column)
354 return Column < Other.Column;
355 if (LineContainsContinuedForLoopSection !=
356 Other.LineContainsContinuedForLoopSection)
357 return LineContainsContinuedForLoopSection;
358 if (StartOfLineLevel != Other.StartOfLineLevel)
359 return StartOfLineLevel < Other.StartOfLineLevel;
360 if (LowestLevelOnLine != Other.LowestLevelOnLine)
361 return LowestLevelOnLine < Other.LowestLevelOnLine;
362 if (StartOfStringLiteral != Other.StartOfStringLiteral)
363 return StartOfStringLiteral < Other.StartOfStringLiteral;
364 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
366 return Stack < Other.Stack;
370 } // end namespace format
371 } // end namespace clang
373 #endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H