///
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "format-token-breaker"
+
#include "BreakableToken.h"
+#include "clang/Format/Format.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
#include <algorithm>
namespace clang {
namespace format {
+namespace {
+
+// FIXME: Move helper string functions to where it makes sense.
+
+unsigned getOctalLength(StringRef Text) {
+ unsigned I = 1;
+ while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
+ ++I;
+ }
+ return I;
+}
+
+unsigned getHexLength(StringRef Text) {
+ unsigned I = 2; // Point after '\x'.
+ while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
+ (Text[I] >= 'a' && Text[I] <= 'f') ||
+ (Text[I] >= 'A' && Text[I] <= 'F'))) {
+ ++I;
+ }
+ return I;
+}
+
+unsigned getEscapeSequenceLength(StringRef Text) {
+ assert(Text[0] == '\\');
+ if (Text.size() < 2)
+ return 1;
-BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex,
- unsigned TailOffset,
- unsigned ColumnLimit) const {
- StringRef Text = getLine(LineIndex).substr(TailOffset);
- unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+ switch (Text[1]) {
+ case 'u':
+ return 6;
+ case 'U':
+ return 10;
+ case 'x':
+ return getHexLength(Text);
+ default:
+ if (Text[1] >= '0' && Text[1] <= '7')
+ return getOctalLength(Text);
+ return 2;
+ }
+}
+
+StringRef::size_type getStartOfCharacter(StringRef Text,
+ StringRef::size_type Offset) {
+ StringRef::size_type NextEscape = Text.find('\\');
+ while (NextEscape != StringRef::npos && NextEscape < Offset) {
+ StringRef::size_type SequenceLength =
+ getEscapeSequenceLength(Text.substr(NextEscape));
+ if (Offset < NextEscape + SequenceLength)
+ return NextEscape;
+ NextEscape = Text.find('\\', NextEscape + SequenceLength);
+ }
+ return Offset;
+}
+
+BreakableToken::Split getCommentSplit(StringRef Text,
+ unsigned ContentStartColumn,
+ unsigned ColumnLimit) {
if (ColumnLimit <= ContentStartColumn + 1)
- return Split(StringRef::npos, 0);
+ return BreakableToken::Split(StringRef::npos, 0);
unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
if (SpaceOffset == StringRef::npos ||
- Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
+ // Don't break at leading whitespace.
+ Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos)
SpaceOffset = Text.find(' ', MaxSplit);
- }
if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
return BreakableToken::Split(StringRef::npos, 0);
}
-void BreakableComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
- Split Split, bool InPPDirective,
- WhitespaceManager &Whitespaces) {
- StringRef Text = getLine(LineIndex).substr(TailOffset);
- StringRef AdditionalPrefix = Decoration;
- if (Text.size() == Split.first + Split.second) {
- // For all but the last line handle trailing space in trimLine.
- if (LineIndex < Lines.size() - 1)
- return;
- // For the last line we need to break before "*/", but not to add "* ".
- AdditionalPrefix = "";
- }
+BreakableToken::Split getStringSplit(StringRef Text,
+ unsigned ContentStartColumn,
+ unsigned ColumnLimit) {
- unsigned BreakOffset = Text.data() - TokenText.data() + Split.first;
- unsigned CharsToRemove = Split.second;
- Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix,
- InPPDirective, IndentAtLineBreak);
+ if (ColumnLimit <= ContentStartColumn)
+ return BreakableToken::Split(StringRef::npos, 0);
+ unsigned MaxSplit = ColumnLimit - ContentStartColumn;
+ // FIXME: Reduce unit test case.
+ if (Text.empty())
+ return BreakableToken::Split(StringRef::npos, 0);
+ MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
+ StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+ if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
+ return BreakableToken::Split(SpaceOffset + 1, 0);
+ StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
+ if (SlashOffset != StringRef::npos && SlashOffset != 0)
+ return BreakableToken::Split(SlashOffset + 1, 0);
+ StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
+ if (SplitPoint == StringRef::npos || SplitPoint == 0)
+ return BreakableToken::Split(StringRef::npos, 0);
+ return BreakableToken::Split(SplitPoint, 0);
}
-BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr,
- const AnnotatedToken &Token,
- unsigned StartColumn)
- : BreakableComment(SourceMgr, Token.FormatTok, StartColumn + 2) {
- assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
+} // namespace
+
+unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
+
+unsigned
+BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex,
+ unsigned TailOffset) const {
+ return StartColumn + Prefix.size() + Postfix.size() + Line.size() -
+ TailOffset;
+}
+
+void BreakableSingleLineToken::insertBreak(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ bool InPPDirective,
+ WhitespaceManager &Whitespaces) {
+ Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first,
+ Split.second, Postfix, Prefix, InPPDirective,
+ StartColumn);
+}
+
+BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
+ unsigned StartColumn,
+ StringRef Prefix,
+ StringRef Postfix)
+ : BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix),
+ Postfix(Postfix) {
+ assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
+ Line = Tok.TokenText.substr(
+ Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
+}
+
+BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
+ unsigned StartColumn)
+ : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {}
- OriginalStartColumn =
- SourceMgr.getSpellingColumnNumber(Tok.getStartOfNonWhitespace()) - 1;
+BreakableToken::Split
+BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const {
+ return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit);
+}
+static StringRef getLineCommentPrefix(StringRef Comment) {
+ const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
+ for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)
+ if (Comment.startswith(KnownPrefixes[i]))
+ return KnownPrefixes[i];
+ return "";
+}
+
+BreakableLineComment::BreakableLineComment(const FormatToken &Token,
+ unsigned StartColumn)
+ : BreakableSingleLineToken(Token, StartColumn,
+ getLineCommentPrefix(Token.TokenText), "") {}
+
+BreakableToken::Split
+BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const {
+ return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
+ ColumnLimit);
+}
+
+BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style,
+ const FormatToken &Token,
+ unsigned StartColumn,
+ unsigned OriginalStartColumn,
+ bool FirstInLine)
+ : BreakableToken(Token) {
+ StringRef TokenText(Token.TokenText);
+ assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
+ int IndentDelta = StartColumn - OriginalStartColumn;
bool NeedsStar = true;
- CommonPrefixLength = UINT_MAX;
- if (Lines.size() == 1) {
- if (Token.Parent == 0) {
- // Standalone block comments will be aligned and prefixed with *s.
- CommonPrefixLength = OriginalStartColumn + 1;
- } else {
- // Trailing comments can start on arbitrary column, and available
- // horizontal space can be too small to align consecutive lines with
- // the first one. We could, probably, align them to current
- // indentation level, but now we just wrap them without indentation
- // and stars.
- CommonPrefixLength = 0;
- NeedsStar = false;
+ LeadingWhitespace.resize(Lines.size());
+ StartOfLineColumn.resize(Lines.size());
+ if (Lines.size() == 1 && !FirstInLine) {
+ // Comments for which FirstInLine is false can start on arbitrary column,
+ // and available horizontal space can be too small to align consecutive
+ // lines with the first one.
+ // FIXME: We could, probably, align them to current indentation level, but
+ // now we just wrap them without stars.
+ NeedsStar = false;
+ }
+ StartOfLineColumn[0] = StartColumn + 2;
+ for (size_t i = 1; i < Lines.size(); ++i) {
+ adjustWhitespace(Style, i, IndentDelta);
+ if (Lines[i].empty())
+ // If the last line is empty, the closing "*/" will have a star.
+ NeedsStar = NeedsStar && i + 1 == Lines.size();
+ else
+ NeedsStar = NeedsStar && Lines[i][0] == '*';
+ }
+ Decoration = NeedsStar ? "* " : "";
+ IndentAtLineBreak = StartOfLineColumn[0] + 1;
+ for (size_t i = 1; i < Lines.size(); ++i) {
+ if (Lines[i].empty()) {
+ if (!NeedsStar && i + 1 != Lines.size())
+ // For all but the last line (which always ends in */), set the
+ // start column to 0 if they're empty, so we do not insert
+ // trailing whitespace anywhere.
+ StartOfLineColumn[i] = 0;
+ continue;
}
- } else {
- for (size_t i = 1; i < Lines.size(); ++i) {
- size_t FirstNonWhitespace = Lines[i].find_first_not_of(" ");
- if (FirstNonWhitespace != StringRef::npos) {
- NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*');
- CommonPrefixLength =
- std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace);
- }
+ if (NeedsStar) {
+ // The first line already excludes the star.
+ // For all other lines, adjust the line to exclude the star and
+ // (optionally) the first whitespace.
+ int Offset = Lines[i].startswith("* ") ? 2 : 1;
+ StartOfLineColumn[i] += Offset;
+ Lines[i] = Lines[i].substr(Offset);
+ LeadingWhitespace[i] += Offset;
}
+ IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);
}
- if (CommonPrefixLength == UINT_MAX)
- CommonPrefixLength = 0;
+ DEBUG({
+ for (size_t i = 0; i < Lines.size(); ++i) {
+ llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
+ << "\n";
+ }
+ });
+}
- Decoration = NeedsStar ? "* " : "";
+void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
+ unsigned LineIndex,
+ int IndentDelta) {
+ // Calculate the end of the non-whitespace text in the previous line.
+ size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
+ if (EndOfPreviousLine == StringRef::npos)
+ EndOfPreviousLine = 0;
+ else
+ ++EndOfPreviousLine;
+ // Calculate the start of the non-whitespace text in the current line.
+ size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");
+ if (StartOfLine == StringRef::npos)
+ StartOfLine = Lines[LineIndex].size();
+ // FIXME: Tabs are not always 8 characters. Make configurable in the style.
+ unsigned Column = 0;
+ StringRef OriginalIndentText = Lines[LineIndex].substr(0, StartOfLine);
+ for (int i = 0, e = OriginalIndentText.size(); i != e; ++i) {
+ if (Lines[LineIndex][i] == '\t')
+ Column += 8 - (Column % 8);
+ else
+ ++Column;
+ }
- IndentAtLineBreak =
- std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0);
+ // Adjust Lines to only contain relevant text.
+ Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
+ Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
+ // Adjust LeadingWhitespace to account all whitespace between the lines
+ // to the current line.
+ LeadingWhitespace[LineIndex] =
+ Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
+ // Adjust the start column uniformly accross all lines.
+ StartOfLineColumn[LineIndex] = std::max<int>(0, Column + IndentDelta);
}
-void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) {
- SourceLocation TokenLoc = Tok.getStartOfNonWhitespace();
- int IndentDelta = (StartColumn - 2) - OriginalStartColumn;
- if (IndentDelta > 0) {
- std::string WhiteSpace(IndentDelta, ' ');
- for (size_t i = 1; i < Lines.size(); ++i) {
- Whitespaces.addReplacement(
- TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0,
- WhiteSpace);
- }
- } else if (IndentDelta < 0) {
- std::string WhiteSpace(-IndentDelta, ' ');
- // Check that the line is indented enough.
- for (size_t i = 1; i < Lines.size(); ++i) {
- if (!Lines[i].startswith(WhiteSpace))
- return;
- }
- for (size_t i = 1; i < Lines.size(); ++i) {
- Whitespaces.addReplacement(
- TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()),
- -IndentDelta, "");
- }
- }
+unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
- for (unsigned i = 1; i < Lines.size(); ++i)
- Lines[i] = Lines[i].substr(CommonPrefixLength + Decoration.size());
+unsigned
+BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex,
+ unsigned TailOffset) const {
+ return getContentStartColumn(LineIndex, TailOffset) +
+ (Lines[LineIndex].size() - TailOffset) +
+ // The last line gets a "*/" postfix.
+ (LineIndex + 1 == Lines.size() ? 2 : 0);
}
-void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset,
- unsigned InPPDirective,
- WhitespaceManager &Whitespaces) {
- if (LineIndex == Lines.size() - 1)
- return;
+BreakableToken::Split
+BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const {
+ return getCommentSplit(Lines[LineIndex].substr(TailOffset),
+ getContentStartColumn(LineIndex, TailOffset),
+ ColumnLimit);
+}
+
+void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
+ Split Split, bool InPPDirective,
+ WhitespaceManager &Whitespaces) {
StringRef Text = Lines[LineIndex].substr(TailOffset);
+ StringRef Prefix = Decoration;
+ if (LineIndex + 1 == Lines.size() &&
+ Text.size() == Split.first + Split.second) {
+ // For the last line we need to break before "*/", but not to add "* ".
+ Prefix = "";
+ }
- // FIXME: The algorithm for trimming a line should naturally yield a
- // non-change if there is nothing to trim; removing this line breaks the
- // algorithm; investigate the root cause, and make sure to either document
- // why exactly this is needed for remove it.
- if (!Text.endswith(" ") && !InPPDirective)
+ unsigned BreakOffsetInToken =
+ Text.data() - Tok.TokenText.data() + Split.first;
+ unsigned CharsToRemove = Split.second;
+ Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix,
+ InPPDirective, IndentAtLineBreak - Decoration.size());
+}
+
+void
+BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
+ unsigned InPPDirective,
+ WhitespaceManager &Whitespaces) {
+ if (LineIndex == 0)
return;
+ StringRef Prefix = Decoration;
+ if (LineIndex + 1 == Lines.size() && Lines[LineIndex].empty())
+ Prefix = "";
- StringRef TrimmedLine = Text.rtrim();
- unsigned BreakOffset = TrimmedLine.end() - TokenText.data();
- unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1;
- // FIXME: It seems like we're misusing the call to breakToken to remove
- // whitespace instead of breaking a token. We should make this an explicit
- // call option to the WhitespaceManager, or handle trimming and alignment
- // of comments completely within in the WhitespaceManger. Passing '0' here
- // and relying on this not breaking assumptions of the WhitespaceManager seems
- // like a bad idea.
- Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective,
- 0);
-}
-
-BreakableLineComment::BreakableLineComment(const SourceManager &SourceMgr,
- const AnnotatedToken &Token,
- unsigned StartColumn)
- : BreakableComment(SourceMgr, Token.FormatTok, StartColumn) {
- assert(TokenText.startswith("//"));
- Decoration = getLineCommentPrefix(TokenText);
- Lines.push_back(TokenText.substr(Decoration.size()));
- IndentAtLineBreak = StartColumn;
- this->StartColumn += Decoration.size(); // Start column of the contents.
+ unsigned WhitespaceOffsetInToken =
+ Lines[LineIndex].data() - Tok.TokenText.data() -
+ LeadingWhitespace[LineIndex];
+ Whitespaces.breakToken(
+ Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
+ InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size());
}
-StringRef BreakableLineComment::getLineCommentPrefix(StringRef Comment) {
- const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
- for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
- if (Comment.startswith(KnownPrefixes[i]))
- return KnownPrefixes[i];
- return "";
+unsigned
+BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const {
+ // If we break, we always break at the predefined indent.
+ if (TailOffset != 0)
+ return IndentAtLineBreak;
+ return StartOfLineColumn[LineIndex];
}
} // namespace format
namespace clang {
namespace format {
+struct FormatStyle;
+
+/// \brief Base class for strategies on how to break tokens.
+///
+/// FIXME: The interface seems set in stone, so we might want to just pull the
+/// strategy into the class, instead of controlling it from the outside.
class BreakableToken {
public:
- BreakableToken(const SourceManager &SourceMgr, const FormatToken &Tok,
- unsigned StartColumn)
- : Tok(Tok), StartColumn(StartColumn),
- TokenText(SourceMgr.getCharacterData(Tok.getStartOfNonWhitespace()),
- Tok.TokenLength) {}
+ // Contains starting character index and length of split.
+ typedef std::pair<StringRef::size_type, unsigned> Split;
+
virtual ~BreakableToken() {}
+
+ /// \brief Returns the number of lines in this token in the original code.
virtual unsigned getLineCount() const = 0;
- virtual unsigned getLineSize(unsigned Index) const = 0;
+
+ /// \brief Returns the rest of the length of the line at \p LineIndex,
+ /// when broken at \p TailOffset.
+ ///
+ /// Note that previous breaks are not taken into account. \p TailOffset
+ /// is always specified from the start of the (original) line.
virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
unsigned TailOffset) const = 0;
- // Contains starting character index and length of split.
- typedef std::pair<StringRef::size_type, unsigned> Split;
+ /// \brief Returns a range (offset, length) at which to break the line at
+ /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
+ /// violate \p ColumnLimit.
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit) const = 0;
+
+ /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
bool InPPDirective,
WhitespaceManager &Whitespaces) = 0;
- virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
- unsigned InPPDirective,
- WhitespaceManager &Whitespaces) {}
+
+ /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
+ virtual void replaceWhitespaceBefore(unsigned LineIndex,
+ unsigned InPPDirective,
+ WhitespaceManager &Whitespaces) {}
+
protected:
+ BreakableToken(const FormatToken &Tok) : Tok(Tok) {}
+
const FormatToken &Tok;
- unsigned StartColumn;
- StringRef TokenText;
};
-class BreakableStringLiteral : public BreakableToken {
+/// \brief Base class for single line tokens that can be broken.
+///
+/// \c getSplit() needs to be implemented by child classes.
+class BreakableSingleLineToken : public BreakableToken {
public:
- BreakableStringLiteral(const SourceManager &SourceMgr, const FormatToken &Tok,
- unsigned StartColumn)
- : BreakableToken(SourceMgr, Tok, StartColumn) {
- assert(TokenText.startswith("\"") && TokenText.endswith("\""));
- }
-
- virtual unsigned getLineCount() const { return 1; }
-
- virtual unsigned getLineSize(unsigned Index) const {
- return Tok.TokenLength - 2; // Should be in sync with getLine
- }
-
+ virtual unsigned getLineCount() const;
virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
- unsigned TailOffset) const {
- return getDecorationLength() + getLine().size() - TailOffset;
- }
-
- virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
- unsigned ColumnLimit) const {
- StringRef Text = getLine().substr(TailOffset);
- if (ColumnLimit <= getDecorationLength())
- return Split(StringRef::npos, 0);
- unsigned MaxSplit = ColumnLimit - getDecorationLength();
- // FIXME: Reduce unit test case.
- if (Text.empty())
- return Split(StringRef::npos, 0);
- MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
- StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
- if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
- return Split(SpaceOffset + 1, 0);
- StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
- if (SlashOffset != StringRef::npos && SlashOffset != 0)
- return Split(SlashOffset + 1, 0);
- StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
- if (SplitPoint != StringRef::npos && SplitPoint > 1)
- // Do not split at 0.
- return Split(SplitPoint, 0);
- return Split(StringRef::npos, 0);
- }
-
+ unsigned TailOffset) const;
virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- bool InPPDirective, WhitespaceManager &Whitespaces) {
- Whitespaces.breakToken(Tok, 1 + TailOffset + Split.first, Split.second,
- "\"", "\"", InPPDirective, StartColumn);
- }
+ bool InPPDirective, WhitespaceManager &Whitespaces);
-private:
- StringRef getLine() const {
- // Get string without quotes.
- // FIXME: Handle string prefixes.
- return TokenText.substr(1, TokenText.size() - 2);
- }
-
- unsigned getDecorationLength() const { return StartColumn + 2; }
-
- static StringRef::size_type getStartOfCharacter(StringRef Text,
- StringRef::size_type Offset) {
- StringRef::size_type NextEscape = Text.find('\\');
- while (NextEscape != StringRef::npos && NextEscape < Offset) {
- StringRef::size_type SequenceLength =
- getEscapeSequenceLength(Text.substr(NextEscape));
- if (Offset < NextEscape + SequenceLength)
- return NextEscape;
- NextEscape = Text.find('\\', NextEscape + SequenceLength);
- }
- return Offset;
- }
-
- static unsigned getEscapeSequenceLength(StringRef Text) {
- assert(Text[0] == '\\');
- if (Text.size() < 2)
- return 1;
-
- switch (Text[1]) {
- case 'u':
- return 6;
- case 'U':
- return 10;
- case 'x':
- return getHexLength(Text);
- default:
- if (Text[1] >= '0' && Text[1] <= '7')
- return getOctalLength(Text);
- return 2;
- }
- }
-
- static unsigned getHexLength(StringRef Text) {
- unsigned I = 2; // Point after '\x'.
- while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
- (Text[I] >= 'a' && Text[I] <= 'f') ||
- (Text[I] >= 'A' && Text[I] <= 'F'))) {
- ++I;
- }
- return I;
- }
-
- static unsigned getOctalLength(StringRef Text) {
- unsigned I = 1;
- while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
- ++I;
- }
- return I;
- }
+protected:
+ BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
+ StringRef Prefix, StringRef Postfix);
+ // The column in which the token starts.
+ unsigned StartColumn;
+ // The prefix a line needs after a break in the token.
+ StringRef Prefix;
+ // The postfix a line needs before introducing a break.
+ StringRef Postfix;
+ // The token text excluding the prefix and postfix.
+ StringRef Line;
};
-class BreakableComment : public BreakableToken {
+class BreakableStringLiteral : public BreakableSingleLineToken {
public:
- virtual unsigned getLineSize(unsigned Index) const {
- return getLine(Index).size();
- }
-
- virtual unsigned getLineCount() const { return Lines.size(); }
-
- virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
- unsigned TailOffset) const {
- return getContentStartColumn(LineIndex, TailOffset) +
- getLine(LineIndex).size() - TailOffset;
- }
+ /// \brief Creates a breakable token for a single line string literal.
+ ///
+ /// \p StartColumn specifies the column in which the token will start
+ /// after formatting.
+ BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn);
virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
unsigned ColumnLimit) const;
- virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
- bool InPPDirective, WhitespaceManager &Whitespaces);
-
-protected:
- BreakableComment(const SourceManager &SourceMgr, const FormatToken &Tok,
- unsigned StartColumn)
- : BreakableToken(SourceMgr, Tok, StartColumn) {}
-
- // Get comment lines without /* */, common prefix and trailing whitespace.
- // Last line is not trimmed, as it is terminated by */, so its trailing
- // whitespace is not really trailing.
- StringRef getLine(unsigned Index) const {
- return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index];
- }
-
- unsigned getContentStartColumn(unsigned LineIndex,
- unsigned TailOffset) const {
- return (TailOffset == 0 && LineIndex == 0)
- ? StartColumn
- : IndentAtLineBreak + Decoration.size();
- }
-
- unsigned IndentAtLineBreak;
- StringRef Decoration;
- SmallVector<StringRef, 16> Lines;
};
-class BreakableBlockComment : public BreakableComment {
+class BreakableLineComment : public BreakableSingleLineToken {
public:
- BreakableBlockComment(const SourceManager &SourceMgr,
- const AnnotatedToken &Token, unsigned StartColumn);
+ /// \brief Creates a breakable token for a line comment.
+ ///
+ /// \p StartColumn specifies the column in which the comment will start
+ /// after formatting.
+ BreakableLineComment(const FormatToken &Token, unsigned StartColumn);
- void alignLines(WhitespaceManager &Whitespaces);
+ virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const;
+};
+class BreakableBlockComment : public BreakableToken {
+public:
+ /// \brief Creates a breakable token for a block comment.
+ ///
+ /// \p StartColumn specifies the column in which the comment will start
+ /// after formatting, while \p OriginalStartColumn specifies in which
+ /// column the comment started before formatting.
+ /// If the comment starts a line after formatting, set \p FirstInLine to true.
+ BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
+ unsigned StartColumn, unsigned OriginaStartColumn,
+ bool FirstInLine);
+
+ virtual unsigned getLineCount() const;
virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
- unsigned TailOffset) const {
- return BreakableComment::getLineLengthAfterSplit(LineIndex, TailOffset) +
- (LineIndex + 1 < Lines.size() ? 0 : 2);
- }
-
- virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
- unsigned InPPDirective, WhitespaceManager &Whitespaces);
+ unsigned TailOffset) const;
+ virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit) const;
+ virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+ bool InPPDirective, WhitespaceManager &Whitespaces);
+ virtual void replaceWhitespaceBefore(unsigned LineIndex,
+ unsigned InPPDirective,
+ WhitespaceManager &Whitespaces);
private:
- unsigned OriginalStartColumn;
- unsigned CommonPrefixLength;
-};
+ // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
+ // so that all whitespace between the lines is accounted to Lines[LineIndex]
+ // as leading whitespace:
+ // - Lines[LineIndex] points to the text after that whitespace
+ // - Lines[LineIndex-1] shrinks by its trailing whitespace
+ // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
+ // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
+ //
+ // Sets StartOfLineColumn to the intended column in which the text at
+ // Lines[LineIndex] starts (note that the decoration, if present, is not
+ // considered part of the text).
+ void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex,
+ int IndentDelta);
+
+ // Returns the column at which the text in line LineIndex starts, when broken
+ // at TailOffset. Note that the decoration (if present) is not considered part
+ // of the text.
+ unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
+
+ // Contains the text of the lines of the block comment, excluding the leading
+ // /* in the first line and trailing */ in the last line, and excluding all
+ // trailing whitespace between the lines. Note that the decoration (if
+ // present) is also not considered part of the text.
+ SmallVector<StringRef, 16> Lines;
-class BreakableLineComment : public BreakableComment {
-public:
- BreakableLineComment(const SourceManager &SourceMgr,
- const AnnotatedToken &Token, unsigned StartColumn);
+ // LeadingWhitespace[i] is the number of characters regarded as whitespace in
+ // front of Lines[i]. Note that this can include "* " sequences, which we
+ // regard as whitespace when all lines have a "*" prefix.
+ SmallVector<unsigned, 16> LeadingWhitespace;
+
+ // StartOfLineColumn[i] is the target column at which Line[i] should be.
+ // Note that this excludes a leading "* " or "*" in case all lines have
+ // a "*" prefix.
+ SmallVector<unsigned, 16> StartOfLineColumn;
+
+ // The column at which the text of a broken line should start.
+ // Note that an optional decoration would go before that column.
+ // IndentAtLineBreak is a uniform position for all lines in a block comment,
+ // regardless of their relative position.
+ // FIXME: Revisit the decision to do this; the main reason was to support
+ // patterns like
+ // /**************//**
+ // * Comment
+ // We could also support such patterns by special casing the first line
+ // instead.
+ unsigned IndentAtLineBreak;
-private:
- static StringRef getLineCommentPrefix(StringRef Comment);
+ // Either "* " if all lines begin with a "*", or empty.
+ StringRef Decoration;
};
} // namespace format
unsigned UnbreakableTailLength = Current.UnbreakableTailLength;
llvm::OwningPtr<BreakableToken> Token;
unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
+ unsigned OriginalStartColumn = SourceMgr.getSpellingColumnNumber(
+ Current.FormatTok.getStartOfNonWhitespace()) - 1;
+
if (Current.is(tok::string_literal) &&
Current.Type != TT_ImplicitStringLiteral) {
// Only break up default narrow strings.
if (!LiteralData || *LiteralData != '"')
return 0;
- Token.reset(new BreakableStringLiteral(SourceMgr, Current.FormatTok,
- StartColumn));
+ Token.reset(new BreakableStringLiteral(Current.FormatTok, StartColumn));
} else if (Current.Type == TT_BlockComment) {
BreakableBlockComment *BBC =
- new BreakableBlockComment(SourceMgr, Current, StartColumn);
- if (!DryRun)
- BBC->alignLines(Whitespaces);
+ new BreakableBlockComment(Style, Current.FormatTok, StartColumn,
+ OriginalStartColumn, !Current.Parent);
Token.reset(BBC);
} else if (Current.Type == TT_LineComment &&
(Current.Parent == NULL ||
Current.Parent->Type != TT_ImplicitStringLiteral)) {
- Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn));
+ Token.reset(new BreakableLineComment(Current.FormatTok, StartColumn));
} else {
return 0;
}
bool BreakInserted = false;
unsigned Penalty = 0;
unsigned PositionAfterLastLineInToken = 0;
- for (unsigned LineIndex = 0; LineIndex < Token->getLineCount();
- ++LineIndex) {
+ for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
+ LineIndex != EndIndex; ++LineIndex) {
+ if (!DryRun) {
+ Token->replaceWhitespaceBefore(LineIndex, Line.InPPDirective,
+ Whitespaces);
+ }
unsigned TailOffset = 0;
unsigned RemainingTokenLength =
Token->getLineLengthAfterSplit(LineIndex, TailOffset);
assert(Split.first != 0);
unsigned NewRemainingTokenLength = Token->getLineLengthAfterSplit(
LineIndex, TailOffset + Split.first + Split.second);
- if (NewRemainingTokenLength >= RemainingTokenLength)
- break;
+ assert(NewRemainingTokenLength < RemainingTokenLength);
if (!DryRun) {
Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
Whitespaces);
BreakInserted = true;
}
PositionAfterLastLineInToken = RemainingTokenLength;
- if (!DryRun) {
- Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces);
- }
}
if (BreakInserted) {
class LexerBasedFormatTokenSource : public FormatTokenSource {
public:
LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
- : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
- IdentTable(Lex.getLangOpts()) {
+ : GreaterStashed(false), TrailingWhitespace(0), Lex(Lex),
+ SourceMgr(SourceMgr), IdentTable(Lex.getLangOpts()) {
Lex.SetKeepWhitespaceMode(true);
}
FormatTok = FormatToken();
Lex.LexFromRawLexer(FormatTok.Tok);
StringRef Text = rawTokenText(FormatTok.Tok);
- SourceLocation WhitespaceStart = FormatTok.Tok.getLocation();
+ SourceLocation WhitespaceStart =
+ FormatTok.Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
FormatTok.IsFirst = true;
// Consume and record whitespace until we find a significant token.
- unsigned WhitespaceLength = 0;
+ unsigned WhitespaceLength = TrailingWhitespace;
while (FormatTok.Tok.is(tok::unknown)) {
unsigned Newlines = Text.count('\n');
if (Newlines > 0)
// Now FormatTok is the next non-whitespace token.
FormatTok.TokenLength = Text.size();
+ TrailingWhitespace = 0;
if (FormatTok.Tok.is(tok::comment)) {
- FormatTok.TrailingWhiteSpaceLength = Text.size() - Text.rtrim().size();
- FormatTok.TokenLength -= FormatTok.TrailingWhiteSpaceLength;
+ TrailingWhitespace = Text.size() - Text.rtrim().size();
+ FormatTok.TokenLength -= TrailingWhitespace;
}
// In case the token starts with escaped newlines, we want to
FormatTok.WhitespaceRange = SourceRange(
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
+ FormatTok.TokenText = StringRef(
+ SourceMgr.getCharacterData(FormatTok.getStartOfNonWhitespace()),
+ FormatTok.TokenLength);
return FormatTok;
}
private:
FormatToken FormatTok;
bool GreaterStashed;
+ unsigned TrailingWhitespace;
Lexer &Lex;
SourceManager &SourceMgr;
IdentifierTable IdentTable;
/// whitespace characters preceeding it.
struct FormatToken {
FormatToken()
- : NewlinesBefore(0), HasUnescapedNewline(false),
- LastNewlineOffset(0), TokenLength(0), IsFirst(false),
- MustBreakBefore(false), TrailingWhiteSpaceLength(0) {}
+ : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
+ TokenLength(0), IsFirst(false), MustBreakBefore(false) {}
/// \brief The \c Token.
Token Tok;
/// before the token.
bool MustBreakBefore;
- /// \brief Number of characters of trailing whitespace.
- unsigned TrailingWhiteSpaceLength;
-
/// \brief Returns actual token start location without leading escaped
/// newlines and whitespace.
///
SourceLocation getStartOfNonWhitespace() const {
return WhitespaceRange.getEnd();
}
+
+ /// \brief The raw text of the token.
+ ///
+ /// Contains the raw token text without leading whitespace and without leading
+ /// escaped newlines.
+ StringRef TokenText;
};
/// \brief An unwrapped line is a sequence of \c Token, that we would like to
true, Tok.FormatTok.WhitespaceRange,
Spaces, StartOfTokenColumn, Newlines, "", "", Tok.FormatTok.Tok.getKind(),
InPPDirective && !Tok.FormatTok.IsFirst));
-
- // Align line comments if they are trailing or if they continue other
- // trailing comments.
- // FIXME: Pull this out and generalize so it works the same way in broken
- // comments and unbroken comments with trailing whitespace.
- if (Tok.isTrailingComment()) {
- SourceLocation TokenEndLoc = Tok.FormatTok.getStartOfNonWhitespace()
- .getLocWithOffset(Tok.FormatTok.TokenLength);
- // Remove the comment's trailing whitespace.
- if (Tok.FormatTok.TrailingWhiteSpaceLength != 0)
- Replaces.insert(tooling::Replacement(
- SourceMgr, TokenEndLoc, Tok.FormatTok.TrailingWhiteSpaceLength, ""));
- }
}
void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
tok::unknown, InPPDirective && !Tok.IsFirst));
}
-void WhitespaceManager::addReplacement(const SourceLocation &SourceLoc,
- unsigned ReplaceChars, StringRef Text) {
- Replaces.insert(
- tooling::Replacement(SourceMgr, SourceLoc, ReplaceChars, Text));
-}
-
const tooling::Replacements &WhitespaceManager::generateReplacements() {
if (Changes.empty())
return Replaces;
/// \brief Returns all the \c Replacements created during formatting.
const tooling::Replacements &generateReplacements();
- /// \brief Replaces \p ReplaceChars after \p SourceLoc with \p Text.
- ///
- /// FIXME: This is currently used to align comments outside of the \c
- /// WhitespaceManager. Once this has been moved inside, get rid of this
- /// method.
- void addReplacement(const SourceLocation &SourceLoc, unsigned ReplaceChars,
- StringRef Text);
-
private:
/// \brief Represents a change before a token, a break inside a token,
/// or the layout of an unchanged token (or whitespace within).
" 1.1.1. to keep the formatting.\n"
" */"));
EXPECT_EQ("/*\n"
- " Don't try to outdent if there's not enough inentation.\n"
- " */",
+ "Don't try to outdent if there's not enough inentation.\n"
+ "*/",
format(" /*\n"
" Don't try to outdent if there's not enough inentation.\n"
" */"));
format("// A comment before a macro definition\n"
"#define a b",
getLLVMStyleWithColumns(20)));
+
+ EXPECT_EQ("/* A comment before\n"
+ " * a macro\n"
+ " * definition */\n"
+ "#define a b",
+ format("/* A comment before a macro definition */\n"
+ "#define a b",
+ getLLVMStyleWithColumns(20)));
+
+ EXPECT_EQ("/* some comment\n"
+ " * a comment\n"
+ "* that we break\n"
+ " * another comment\n"
+ "* we have to break\n"
+ "* a left comment\n"
+ " */",
+ format(" /* some comment\n"
+ " * a comment that we break\n"
+ " * another comment we have to break\n"
+ "* a left comment\n"
+ " */",
+ getLLVMStyleWithColumns(20)));
+
+ EXPECT_EQ("/*\n"
+ "\n"
+ "\n"
+ " */\n",
+ format(" /* \n"
+ " \n"
+ " \n"
+ " */\n"));
+}
+
+TEST_F(FormatTest, MultiLineCommentsInDefines) {
+ // FIXME: The line breaks are still suboptimal (current guess
+ // is that this is due to the token length being misused), but
+ // the comment handling is correct.
+ EXPECT_EQ("#define A( \\\n"
+ " x) /* \\\n"
+ "a comment \\\n"
+ "inside */ \\\n"
+ " f();",
+ format("#define A(x) /* \\\n"
+ " a comment \\\n"
+ " inside */ \\\n"
+ " f();",
+ getLLVMStyleWithColumns(17)));
+ EXPECT_EQ("#define A(x) /* \\\n"
+ " a \\\n"
+ " comment \\\n"
+ " inside \\\n"
+ " */ \\\n"
+ " f();",
+ format("#define A( \\\n"
+ " x) /* \\\n"
+ " a comment \\\n"
+ " inside */ \\\n"
+ " f();",
+ getLLVMStyleWithColumns(17)));
}
TEST_F(FormatTest, ParsesCommentsAdjacentToPPDirectives) {
" */", getLLVMStyleWithColumns(20)));
EXPECT_EQ("{\n"
" if (something) /* This is a\n"
- "long comment */\n"
+ " long\n"
+ " comment */\n"
" ;\n"
"}",
format("{\n"
"\t\t parameter2); \\\n"
"\t}",
Tab);
+ EXPECT_EQ("/*\n"
+ "\t a\t\tcomment\n"
+ "\t in multiple lines\n"
+ " */",
+ format(" /*\t \t \n"
+ " \t \t a\t\tcomment\t \t\n"
+ " \t \t in multiple lines\t\n"
+ " \t */",
+ Tab));
+ Tab.UseTab = false;
+ // FIXME: Change this test to a different tab size than
+ // 8 once configurable.
+ EXPECT_EQ("/*\n"
+ " a\t\tcomment\n"
+ " in multiple lines\n"
+ " */",
+ format(" /*\t \t \n"
+ " \t \t a\t\tcomment\t \t\n"
+ " \t \t in multiple lines\t\n"
+ " \t */",
+ Tab));
+
+ // FIXME: This is broken, as the spelling column number we
+ // get from the SourceManager counts tab as '1'.
+ // EXPECT_EQ("/* some\n"
+ // " comment */",
+ // format(" \t \t /* some\n"
+ // " \t \t comment */",
+ // Tab));
}
TEST_F(FormatTest, LinuxBraceBreaking) {