1 //===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements WhitespaceManager class.
13 //===----------------------------------------------------------------------===//
15 #include "WhitespaceManager.h"
16 #include "llvm/ADT/STLExtras.h"
21 bool WhitespaceManager::Change::IsBeforeInFile::
22 operator()(const Change &C1, const Change &C2) const {
23 return SourceMgr.isBeforeInTranslationUnit(
24 C1.OriginalWhitespaceRange.getBegin(),
25 C2.OriginalWhitespaceRange.getBegin());
28 WhitespaceManager::Change::Change(
29 bool CreateReplacement, SourceRange OriginalWhitespaceRange,
30 unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
31 unsigned NewlinesBefore, StringRef PreviousLinePostfix,
32 StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective,
33 bool IsStartOfDeclName)
34 : CreateReplacement(CreateReplacement),
35 OriginalWhitespaceRange(OriginalWhitespaceRange),
36 StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
37 PreviousLinePostfix(PreviousLinePostfix),
38 CurrentLinePrefix(CurrentLinePrefix), Kind(Kind),
39 ContinuesPPDirective(ContinuesPPDirective),
40 IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel),
41 Spaces(Spaces), IsTrailingComment(false), TokenLength(0),
42 PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
43 StartOfBlockComment(nullptr), IndentationOffset(0) {}
45 void WhitespaceManager::reset() {
50 void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
51 unsigned IndentLevel, unsigned Spaces,
52 unsigned StartOfTokenColumn,
56 Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
58 Change(/*CreateReplacement=*/true, Tok.WhitespaceRange, IndentLevel,
59 Spaces, StartOfTokenColumn, Newlines, "", "", Tok.Tok.getKind(),
60 InPPDirective && !Tok.IsFirst,
61 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
64 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
68 Changes.push_back(Change(
69 /*CreateReplacement=*/false, Tok.WhitespaceRange, /*IndentLevel=*/0,
70 /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
71 Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst,
72 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
75 void WhitespaceManager::replaceWhitespaceInToken(
76 const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
77 StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
78 unsigned Newlines, unsigned IndentLevel, int Spaces) {
81 SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
82 Changes.push_back(Change(
83 true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)),
84 IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix,
86 // If we don't add a newline this change doesn't start a comment. Thus,
87 // when we align line comments, we don't need to treat this change as one.
88 // FIXME: We still need to take this change in account to properly
89 // calculate the new length of the comment and to calculate the changes
90 // for which to do the alignment when aligning comments.
91 Tok.is(TT_LineComment) && Newlines > 0 ? tok::comment : tok::unknown,
92 InPPDirective && !Tok.IsFirst,
93 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
96 const tooling::Replacements &WhitespaceManager::generateReplacements() {
100 std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
101 calculateLineBreakInformation();
102 alignConsecutiveDeclarations();
103 alignConsecutiveAssignments();
104 alignTrailingComments();
105 alignEscapedNewlines();
111 void WhitespaceManager::calculateLineBreakInformation() {
112 Changes[0].PreviousEndOfTokenColumn = 0;
113 for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
114 unsigned OriginalWhitespaceStart =
115 SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin());
116 unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset(
117 Changes[i - 1].OriginalWhitespaceRange.getEnd());
118 Changes[i - 1].TokenLength = OriginalWhitespaceStart -
119 PreviousOriginalWhitespaceEnd +
120 Changes[i].PreviousLinePostfix.size() +
121 Changes[i - 1].CurrentLinePrefix.size();
123 Changes[i].PreviousEndOfTokenColumn =
124 Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
126 Changes[i - 1].IsTrailingComment =
127 (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof) &&
128 Changes[i - 1].Kind == tok::comment;
130 // FIXME: The last token is currently not always an eof token; in those
131 // cases, setting TokenLength of the last token to 0 is wrong.
132 Changes.back().TokenLength = 0;
133 Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment;
135 const WhitespaceManager::Change *LastBlockComment = nullptr;
136 for (auto &Change : Changes) {
137 Change.StartOfBlockComment = nullptr;
138 Change.IndentationOffset = 0;
139 if (Change.Kind == tok::comment) {
140 LastBlockComment = &Change;
141 } else if (Change.Kind == tok::unknown) {
142 if ((Change.StartOfBlockComment = LastBlockComment))
143 Change.IndentationOffset =
144 Change.StartOfTokenColumn -
145 Change.StartOfBlockComment->StartOfTokenColumn;
147 LastBlockComment = nullptr;
152 // Align a single sequence of tokens, see AlignTokens below.
153 template <typename F>
155 AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
156 SmallVector<WhitespaceManager::Change, 16> &Changes) {
157 bool FoundMatchOnLine = false;
159 for (unsigned i = Start; i != End; ++i) {
160 if (Changes[i].NewlinesBefore > 0) {
161 FoundMatchOnLine = false;
165 // If this is the first matching token to be aligned, remember by how many
166 // spaces it has to be shifted, so the rest of the changes on the line are
167 // shifted by the same amount
168 if (!FoundMatchOnLine && Matches(Changes[i])) {
169 FoundMatchOnLine = true;
170 Shift = Column - Changes[i].StartOfTokenColumn;
171 Changes[i].Spaces += Shift;
175 Changes[i].StartOfTokenColumn += Shift;
176 if (i + 1 != Changes.size())
177 Changes[i + 1].PreviousEndOfTokenColumn += Shift;
181 // Walk through all of the changes and find sequences of matching tokens to
182 // align. To do so, keep track of the lines and whether or not a matching token
183 // was found on a line. If a matching token is found, extend the current
184 // sequence. If the current line cannot be part of a sequence, e.g. because
185 // there is an empty line before it or it contains only non-matching tokens,
186 // finalize the previous sequence.
187 template <typename F>
188 static void AlignTokens(const FormatStyle &Style, F &&Matches,
189 SmallVector<WhitespaceManager::Change, 16> &Changes) {
190 unsigned MinColumn = 0;
191 unsigned MaxColumn = UINT_MAX;
193 // Line number of the start and the end of the current token sequence.
194 unsigned StartOfSequence = 0;
195 unsigned EndOfSequence = 0;
197 // Keep track of the nesting level of matching tokens, i.e. the number of
198 // surrounding (), [], or {}. We will only align a sequence of matching
199 // token that share the same scope depth.
201 // FIXME: This could use FormatToken::NestingLevel information, but there is
202 // an outstanding issue wrt the brace scopes.
203 unsigned NestingLevelOfLastMatch = 0;
204 unsigned NestingLevel = 0;
206 // Keep track of the number of commas before the matching tokens, we will only
207 // align a sequence of matching tokens if they are preceded by the same number
209 unsigned CommasBeforeLastMatch = 0;
210 unsigned CommasBeforeMatch = 0;
212 // Whether a matching token has been found on the current line.
213 bool FoundMatchOnLine = false;
215 // Aligns a sequence of matching tokens, on the MinColumn column.
217 // Sequences start from the first matching token to align, and end at the
218 // first token of the first line that doesn't need to be aligned.
220 // We need to adjust the StartOfTokenColumn of each Change that is on a line
221 // containing any matching token to be aligned and located after such token.
222 auto AlignCurrentSequence = [&] {
223 if (StartOfSequence > 0 && StartOfSequence < EndOfSequence)
224 AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches,
227 MaxColumn = UINT_MAX;
232 for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
233 if (Changes[i].NewlinesBefore != 0) {
234 CommasBeforeMatch = 0;
236 // If there is a blank line, or if the last line didn't contain any
237 // matching token, the sequence ends here.
238 if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine)
239 AlignCurrentSequence();
241 FoundMatchOnLine = false;
244 if (Changes[i].Kind == tok::comma) {
246 } else if (Changes[i].Kind == tok::r_brace ||
247 Changes[i].Kind == tok::r_paren ||
248 Changes[i].Kind == tok::r_square) {
250 } else if (Changes[i].Kind == tok::l_brace ||
251 Changes[i].Kind == tok::l_paren ||
252 Changes[i].Kind == tok::l_square) {
253 // We want sequences to skip over child scopes if possible, but not the
255 NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel);
259 if (!Matches(Changes[i]))
262 // If there is more than one matching token per line, or if the number of
263 // preceding commas, or the scope depth, do not match anymore, end the
265 if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch ||
266 NestingLevel != NestingLevelOfLastMatch)
267 AlignCurrentSequence();
269 CommasBeforeLastMatch = CommasBeforeMatch;
270 NestingLevelOfLastMatch = NestingLevel;
271 FoundMatchOnLine = true;
273 if (StartOfSequence == 0)
276 unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
277 int LineLengthAfter = -Changes[i].Spaces;
278 for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j)
279 LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength;
280 unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter;
282 // If we are restricted by the maximum column width, end the sequence.
283 if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn ||
284 CommasBeforeLastMatch != CommasBeforeMatch) {
285 AlignCurrentSequence();
289 MinColumn = std::max(MinColumn, ChangeMinColumn);
290 MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
293 EndOfSequence = Changes.size();
294 AlignCurrentSequence();
297 void WhitespaceManager::alignConsecutiveAssignments() {
298 if (!Style.AlignConsecutiveAssignments)
302 [&](const Change &C) {
303 // Do not align on equal signs that are first on a line.
304 if (C.NewlinesBefore > 0)
307 // Do not align on equal signs that are last on a line.
308 if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0)
311 return C.Kind == tok::equal;
316 void WhitespaceManager::alignConsecutiveDeclarations() {
317 if (!Style.AlignConsecutiveDeclarations)
320 // FIXME: Currently we don't handle properly the PointerAlignment: Right
321 // The * and & are not aligned and are left dangling. Something has to be done
322 // about it, but it raises the question of alignment of code like:
323 // const char* const* v1;
325 // SomeVeryLongType const& v3;
327 AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; },
331 void WhitespaceManager::alignTrailingComments() {
332 unsigned MinColumn = 0;
333 unsigned MaxColumn = UINT_MAX;
334 unsigned StartOfSequence = 0;
335 bool BreakBeforeNext = false;
336 unsigned Newlines = 0;
337 for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
338 if (Changes[i].StartOfBlockComment)
340 Newlines += Changes[i].NewlinesBefore;
341 if (!Changes[i].IsTrailingComment)
344 unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
345 unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
347 // If we don't create a replacement for this change, we have to consider
348 // it to be immovable.
349 if (!Changes[i].CreateReplacement)
350 ChangeMaxColumn = ChangeMinColumn;
352 if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
353 ChangeMaxColumn -= 2;
354 // If this comment follows an } in column 0, it probably documents the
355 // closing of a namespace and we don't want to align it.
356 bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
357 Changes[i - 1].Kind == tok::r_brace &&
358 Changes[i - 1].StartOfTokenColumn == 0;
359 bool WasAlignedWithStartOfNextLine = false;
360 if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
361 unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
362 Changes[i].OriginalWhitespaceRange.getEnd());
363 for (unsigned j = i + 1; j != e; ++j) {
364 if (Changes[j].Kind != tok::comment) { // Skip over comments.
365 unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
366 Changes[j].OriginalWhitespaceRange.getEnd());
367 // The start of the next token was previously aligned with the
368 // start of this comment.
369 WasAlignedWithStartOfNextLine =
370 CommentColumn == NextColumn ||
371 CommentColumn == NextColumn + Style.IndentWidth;
376 if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
377 alignTrailingComments(StartOfSequence, i, MinColumn);
378 MinColumn = ChangeMinColumn;
379 MaxColumn = ChangeMinColumn;
381 } else if (BreakBeforeNext || Newlines > 1 ||
382 (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) ||
383 // Break the comment sequence if the previous line did not end
384 // in a trailing comment.
385 (Changes[i].NewlinesBefore == 1 && i > 0 &&
386 !Changes[i - 1].IsTrailingComment) ||
387 WasAlignedWithStartOfNextLine) {
388 alignTrailingComments(StartOfSequence, i, MinColumn);
389 MinColumn = ChangeMinColumn;
390 MaxColumn = ChangeMaxColumn;
393 MinColumn = std::max(MinColumn, ChangeMinColumn);
394 MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
397 (i == 0) || (Changes[i].NewlinesBefore > 1) ||
398 // Never start a sequence with a comment at the beginning of
400 (Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
403 alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
406 void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
408 for (unsigned i = Start; i != End; ++i) {
410 if (Changes[i].IsTrailingComment) {
411 Shift = Column - Changes[i].StartOfTokenColumn;
413 if (Changes[i].StartOfBlockComment) {
414 Shift = Changes[i].IndentationOffset +
415 Changes[i].StartOfBlockComment->StartOfTokenColumn -
416 Changes[i].StartOfTokenColumn;
419 Changes[i].Spaces += Shift;
421 Changes[i + 1].PreviousEndOfTokenColumn += Shift;
422 Changes[i].StartOfTokenColumn += Shift;
426 void WhitespaceManager::alignEscapedNewlines() {
427 unsigned MaxEndOfLine =
428 Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
429 unsigned StartOfMacro = 0;
430 for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
431 Change &C = Changes[i];
432 if (C.NewlinesBefore > 0) {
433 if (C.ContinuesPPDirective) {
434 MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
436 alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
437 MaxEndOfLine = Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
442 alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
445 void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
447 for (unsigned i = Start; i < End; ++i) {
448 Change &C = Changes[i];
449 if (C.NewlinesBefore > 0) {
450 assert(C.ContinuesPPDirective);
451 if (C.PreviousEndOfTokenColumn + 1 > Column)
452 C.EscapedNewlineColumn = 0;
454 C.EscapedNewlineColumn = Column;
459 void WhitespaceManager::generateChanges() {
460 for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
461 const Change &C = Changes[i];
463 assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
464 C.OriginalWhitespaceRange.getBegin() &&
465 "Generating two replacements for the same location");
467 if (C.CreateReplacement) {
468 std::string ReplacementText = C.PreviousLinePostfix;
469 if (C.ContinuesPPDirective)
470 appendNewlineText(ReplacementText, C.NewlinesBefore,
471 C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
473 appendNewlineText(ReplacementText, C.NewlinesBefore);
474 appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces),
475 C.StartOfTokenColumn - std::max(0, C.Spaces));
476 ReplacementText.append(C.CurrentLinePrefix);
477 storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
482 void WhitespaceManager::storeReplacement(SourceRange Range,
484 unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
485 SourceMgr.getFileOffset(Range.getBegin());
486 // Don't create a replacement, if it does not change anything.
487 if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
488 WhitespaceLength) == Text)
490 Replaces.insert(tooling::Replacement(
491 SourceMgr, CharSourceRange::getCharRange(Range), Text));
494 void WhitespaceManager::appendNewlineText(std::string &Text,
496 for (unsigned i = 0; i < Newlines; ++i)
497 Text.append(UseCRLF ? "\r\n" : "\n");
500 void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
501 unsigned PreviousEndOfTokenColumn,
502 unsigned EscapedNewlineColumn) {
505 std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn);
506 for (unsigned i = 0; i < Newlines; ++i) {
507 Text.append(EscapedNewlineColumn - Offset - 1, ' ');
508 Text.append(UseCRLF ? "\\\r\n" : "\\\n");
514 void WhitespaceManager::appendIndentText(std::string &Text,
515 unsigned IndentLevel, unsigned Spaces,
516 unsigned WhitespaceStartColumn) {
517 switch (Style.UseTab) {
518 case FormatStyle::UT_Never:
519 Text.append(Spaces, ' ');
521 case FormatStyle::UT_Always: {
522 unsigned FirstTabWidth =
523 Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
524 // Indent with tabs only when there's at least one full tab.
525 if (FirstTabWidth + Style.TabWidth <= Spaces) {
526 Spaces -= FirstTabWidth;
529 Text.append(Spaces / Style.TabWidth, '\t');
530 Text.append(Spaces % Style.TabWidth, ' ');
533 case FormatStyle::UT_ForIndentation:
534 if (WhitespaceStartColumn == 0) {
535 unsigned Indentation = IndentLevel * Style.IndentWidth;
536 // This happens, e.g. when a line in a block comment is indented less than
538 if (Indentation > Spaces)
539 Indentation = Spaces;
540 unsigned Tabs = Indentation / Style.TabWidth;
541 Text.append(Tabs, '\t');
542 Spaces -= Tabs * Style.TabWidth;
544 Text.append(Spaces, ' ');
549 } // namespace format