From: Daniel Jasper Date: Fri, 11 Oct 2013 19:45:02 +0000 (+0000) Subject: clang-format: Don't remove 'unknown' tokens. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1d82b1a33bcfe85f4834fb6920517ed07e9355d3;p=clang clang-format: Don't remove 'unknown' tokens. In certain macros or incorrect string literals, the token stream can contain 'unknown' tokens, e.g. a single backslash or a set of empty ticks. clang-format simply treated them as whitespace and removed them prior to this patch. This fixes llvm.org/PR17215 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@192490 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 129c119532..7fc66167f0 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -691,18 +691,32 @@ private: FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; Column = 0; break; + case '\r': + case '\f': + case '\v': + Column = 0; + break; case ' ': ++Column; break; case '\t': Column += Style.TabWidth - Column % Style.TabWidth; break; + case '\\': + ++Column; + if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' && + FormatTok->TokenText[i + 1] != '\n')) + FormatTok->Type = TT_ImplicitStringLiteral; + break; default: + FormatTok->Type = TT_ImplicitStringLiteral; ++Column; break; } } + if (FormatTok->Type == TT_ImplicitStringLiteral) + break; WhitespaceLength += FormatTok->Tok.getLength(); readRawToken(*FormatTok); diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 87549895b1..a564d52cde 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -525,7 +525,8 @@ private: // Reset token type in case we have already looked at it and then recovered // from an error (e.g. failure to find the matching >). - if (CurrentToken != NULL && CurrentToken->Type != TT_LambdaLSquare) + if (CurrentToken != NULL && CurrentToken->Type != TT_LambdaLSquare && + CurrentToken->Type != TT_ImplicitStringLiteral) CurrentToken->Type = TT_Unknown; } @@ -1292,6 +1293,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Tok) { if (Tok.Tok.getIdentifierInfo() && Tok.Previous->Tok.getIdentifierInfo()) return true; // Never ever merge two identifiers. + if (Tok.Previous->Type == TT_ImplicitStringLiteral) + return Tok.WhitespaceRange.getBegin() != Tok.WhitespaceRange.getEnd(); if (Line.Type == LT_ObjCMethodDecl) { if (Tok.Previous->Type == TT_ObjCMethodSpecifier) return true; diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp index c77616c96b..59596bc543 100644 --- a/unittests/Format/FormatTest.cpp +++ b/unittests/Format/FormatTest.cpp @@ -1890,6 +1890,16 @@ TEST_F(FormatTest, EndOfFileEndsPPDirective) { EXPECT_EQ("#define A B", format("# \\\n define \\\n A \\\n B")); } +TEST_F(FormatTest, DoesntRemoveUnknownTokens) { + verifyFormat("#define A \\x20"); + verifyFormat("#define A \\ x20"); + EXPECT_EQ("#define A \\ x20", format("#define A \\ x20")); + verifyFormat("#define A ''"); + verifyFormat("#define A ''qqq"); + verifyFormat("#define A `qqq"); + verifyFormat("f(\"aaaa, bbbb, \"\\\"ccccc\\\"\");"); +} + TEST_F(FormatTest, IndentsPPDirectiveInReducedSpace) { verifyFormat("#define A(BB)", getLLVMStyleWithColumns(13)); verifyFormat("#define A( \\\n BB)", getLLVMStyleWithColumns(12));