From 6f6154c5f5976e3e57f34f6a755bdfa95b7ff745 Mon Sep 17 00:00:00 2001 From: Alexander Kornienko Date: Tue, 10 Sep 2013 12:29:48 +0000 Subject: [PATCH] Correctly calculate OriginalColumn after multi-line tokens. Summary: This also unifies the handling of escaped newlines for all tokens. Reviewers: djasper Reviewed By: djasper CC: cfe-commits, klimek Differential Revision: http://llvm-reviews.chandlerc.com/D1638 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@190405 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Format/ContinuationIndenter.cpp | 26 ++++++--------------- lib/Format/ContinuationIndenter.h | 5 ++-- lib/Format/Format.cpp | 27 +++++++++++----------- unittests/Format/FormatTest.cpp | 36 ++++++++++++++++++++++++++--- 4 files changed, 55 insertions(+), 39 deletions(-) diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index 0b380041df..f1924c3776 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -611,9 +611,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, return Penalty; } -unsigned -ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current, - LineState &State) { +unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, + LineState &State) { // Break before further function parameters on all levels. for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) State.Stack[i].BreakBeforeParameter = true; @@ -631,6 +630,11 @@ ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current, unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun) { + // Don't break multi-line tokens other than block comments. Instead, just + // update the state. + if (Current.Type != TT_BlockComment && Current.IsMultiline) + return addMultilineToken(Current, State); + if (!Current.isOneOf(tok::string_literal, tok::comment)) return 0; @@ -639,12 +643,6 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, if (Current.is(tok::string_literal) && Current.Type != TT_ImplicitStringLiteral) { - // Don't break string literals with (in case of non-raw strings, escaped) - // newlines. As clang-format must not change the string's content, it is - // unlikely that we'll end up with a better format. - if (Current.IsMultiline) - return addMultilineStringLiteral(Current, State); - // Only break up default narrow strings. if (!Current.TokenText.startswith("\"")) return 0; @@ -662,16 +660,6 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, } else if (Current.Type == TT_LineComment && (Current.Previous == NULL || Current.Previous->Type != TT_ImplicitStringLiteral)) { - // Don't break line comments with escaped newlines. These look like - // separate line comments, but in fact contain a single line comment with - // multiple lines including leading whitespace and the '//' markers. - // - // FIXME: If we want to handle them correctly, we'll need to adjust - // leading whitespace in consecutive lines when changing indentation of - // the first line similar to what we do with block comments. - if (Current.IsMultiline) - return 0; - Token.reset(new BreakableLineComment( Current, StartColumn, State.Line->InPPDirective, Encoding, Style)); } else { diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h index ef698a7a52..608a827194 100644 --- a/lib/Format/ContinuationIndenter.h +++ b/lib/Format/ContinuationIndenter.h @@ -84,13 +84,12 @@ private: unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun); - /// \brief Adds a multiline string literal to the \p State. + /// \brief Adds a multiline token to the \p State. /// /// \returns Extra penalty for the first line of the literal: last line is /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't /// matter, as we don't change them. - unsigned addMultilineStringLiteral(const FormatToken &Current, - LineState &State); + unsigned addMultilineToken(const FormatToken &Current, LineState &State); /// \brief Returns \c true if the next token starts a multiline string /// literal. diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 3a0802a460..07c6cf974d 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -656,8 +656,6 @@ private: // In case the token starts with escaped newlines, we want to // take them into account as whitespace - this pattern is quite frequent // in macro definitions. - // FIXME: What do we want to do with other escaped spaces, and escaped - // spaces or newlines in the middle of tokens? // FIXME: Add a more explicit test. while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && FormatTok->TokenText[1] == '\n') { @@ -692,26 +690,27 @@ private: StringRef Text = FormatTok->TokenText; size_t FirstNewlinePos = Text.find('\n'); - if (FirstNewlinePos != StringRef::npos) { + if (FirstNewlinePos == StringRef::npos) { + // FIXME: ColumnWidth actually depends on the start column, we need to + // take this into account when the token is moved. + FormatTok->ColumnWidth = + encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); + Column += FormatTok->ColumnWidth; + } else { FormatTok->IsMultiline = true; + // FIXME: ColumnWidth actually depends on the start column, we need to + // take this into account when the token is moved. + FormatTok->ColumnWidth = encoding::columnWidthWithTabs( + Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding); + // The last line of the token always starts in column 0. // Thus, the length can be precomputed even in the presence of tabs. FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, Encoding); - Text = Text.substr(0, FirstNewlinePos); + Column = FormatTok->LastLineColumnWidth; } - // FIXME: ColumnWidth actually depends on the start column, we need to - // take this into account when the token is moved. - FormatTok->ColumnWidth = - encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); - - // FIXME: For multi-line tokens this should be LastLineColumnWidth. - // For line comments this should probably be zero. But before changing, - // we need good tests for this. - Column += FormatTok->ColumnWidth; - return FormatTok; } diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index a27d193cb1..c7991b4711 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -5736,9 +5736,6 @@ TEST_F(FormatTest, ConfigurableUseOfTab) { "};", Tab); - // FIXME: To correctly count mixed whitespace we need to - // also correctly count mixed whitespace in front of the comment. - Tab.TabWidth = 8; Tab.IndentWidth = 8; EXPECT_EQ("/*\n" @@ -5795,6 +5792,39 @@ TEST_F(FormatTest, ConfigurableUseOfTab) { "}")); } +TEST_F(FormatTest, CalculatesOriginalColumn) { + EXPECT_EQ("\"qqqqqqqqqqqqqqqqqqqqqqqqqq\\\n" + "q\"; /* some\n" + " comment */", + format(" \"qqqqqqqqqqqqqqqqqqqqqqqqqq\\\n" + "q\"; /* some\n" + " comment */", + getLLVMStyle())); + EXPECT_EQ("// qqqqqqqqqqqqqqqqqqqqqqqqqq\n" + "/* some\n" + " comment */", + format("// qqqqqqqqqqqqqqqqqqqqqqqqqq\n" + " /* some\n" + " comment */", + getLLVMStyle())); + EXPECT_EQ("// qqqqqqqqqqqqqqqqqqqqqqqqqq\\\n" + "qqq\n" + "/* some\n" + " comment */", + format("// qqqqqqqqqqqqqqqqqqqqqqqqqq\\\n" + "qqq\n" + " /* some\n" + " comment */", + getLLVMStyle())); + EXPECT_EQ("inttt qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\\\n" + "wwww; /* some\n" + " comment */", + format(" inttt qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\\\n" + "wwww; /* some\n" + " comment */", + getLLVMStyle())); +} + TEST_F(FormatTest, ConfigurableSpaceAfterControlStatementKeyword) { FormatStyle NoSpace = getLLVMStyle(); NoSpace.SpaceAfterControlStatementKeyword = false; -- 2.40.0