From 33a00075e1c1ec80650d190bd61f4bc9339de192 Mon Sep 17 00:00:00 2001 From: Daniel Jasper Date: Thu, 8 May 2014 07:01:45 +0000 Subject: [PATCH] clang-format: [JS] Initial support for regex literals. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@208281 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Format/Format.cpp | 43 +++++++++++++++++-- lib/Format/FormatToken.h | 1 + lib/Format/TokenAnnotator.cpp | 7 ++-- unittests/Format/FormatTestJS.cpp | 70 +++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+), 7 deletions(-) diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index efb790b359..c57993386f 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -1215,10 +1215,13 @@ private: return; if (Style.Language == FormatStyle::LK_JavaScript) { - static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal }; - static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal }; - static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater, - tok::greaterequal }; + if (tryMergeJSRegexLiteral()) + return; + + static tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; + static tok::TokenKind JSNotIdentity[] = {tok::exclaimequal, tok::equal}; + static tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater, + tok::greaterequal}; // FIXME: We probably need to change token type to mimic operator with the // correct priority. if (tryMergeTokens(JSIdentity)) @@ -1252,6 +1255,38 @@ private: return true; } + // Try to determine whether the current token ends a JavaScript regex literal. + // We heuristically assume that this is a regex literal if we find two + // unescaped slashes on a line and the token before the first slash is one of + // "(;,{}![:?" or a binary operator, as those cannot be followed by a + // division. + bool tryMergeJSRegexLiteral() { + if (Tokens.size() < 2 || Tokens.back()->isNot(tok::slash) || + Tokens[Tokens.size() - 2]->is(tok::unknown)) + return false; + unsigned TokenCount = 0; + unsigned LastColumn = Tokens.back()->OriginalColumn; + for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { + ++TokenCount; + if (I[0]->is(tok::slash) && I + 1 != E && + (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, + tok::r_brace, tok::exclaim, tok::l_square, + tok::colon, tok::comma, tok::question) || + I[1]->isBinaryOperator())) { + Tokens.resize(Tokens.size() - TokenCount); + Tokens.back()->Tok.setKind(tok::unknown); + Tokens.back()->Type = TT_RegexLiteral; + Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn; + return true; + } + + // There can't be a newline inside a regex literal. + if (I[0]->NewlinesBefore > 0) + return false; + } + return false; + } + bool tryMerge_TMacro() { if (Tokens.size() < 4) return false; diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index 749dfcb3e4..056d4bdd1b 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -60,6 +60,7 @@ enum TokenType { TT_PointerOrReference, TT_PureVirtualSpecifier, TT_RangeBasedForLoopColon, + TT_RegexLiteral, TT_StartOfName, TT_TemplateCloser, TT_TemplateOpener, diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 21cec905a8..2f8457c61b 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -609,6 +609,7 @@ private: if (CurrentToken->Type != TT_LambdaLSquare && CurrentToken->Type != TT_FunctionLBrace && CurrentToken->Type != TT_ImplicitStringLiteral && + CurrentToken->Type != TT_RegexLiteral && CurrentToken->Type != TT_TrailingReturnArrow) CurrentToken->Type = TT_Unknown; if (CurrentToken->Role) @@ -622,10 +623,8 @@ private: determineTokenType(*CurrentToken); CurrentToken->BindingStrength = Contexts.back().BindingStrength; CurrentToken->NestingLevel = Contexts.size() - 1; - } - - if (CurrentToken != NULL) CurrentToken = CurrentToken->Next; + } resetTokenMetadata(CurrentToken); } @@ -1555,6 +1554,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return true; if (Tok.Type == TT_TrailingUnaryOperator) return false; + if (Tok.Previous->Type == TT_RegexLiteral) + return false; return spaceRequiredBetween(Line, *Tok.Previous, Tok); } diff --git a/unittests/Format/FormatTestJS.cpp b/unittests/Format/FormatTestJS.cpp index a94ed4707d..52c85d3a9a 100644 --- a/unittests/Format/FormatTestJS.cpp +++ b/unittests/Format/FormatTestJS.cpp @@ -111,5 +111,75 @@ TEST_F(FormatTestJS, ClosureStyleComments) { verifyFormat("var x = /** @type {foo} */ (bar);"); } +TEST_F(FormatTestJS, RegexLiteralClassification) { + // Regex literals. + verifyFormat("var regex = /abc/;"); + verifyFormat("f(/abc/);"); + verifyFormat("f(abc, /abc/);"); + verifyFormat("some_map[/abc/];"); + verifyFormat("var x = a ? /abc/ : /abc/;"); + verifyFormat("for (var i = 0; /abc/.test(s[i]); i++) {\n}"); + verifyFormat("var x = !/abc/.test(y);"); + verifyFormat("var x = a && /abc/.test(y);"); + verifyFormat("var x = a || /abc/.test(y);"); + verifyFormat("var x = a + /abc/.search(y);"); + + // Not regex literals. + verifyFormat("var a = a / 2 + b / 3;"); +} + +TEST_F(FormatTestJS, RegexLiteralSpecialCharacters) { + verifyFormat("var regex = /a*/;"); + verifyFormat("var regex = /a+/;"); + verifyFormat("var regex = /a?/;"); + verifyFormat("var regex = /.a./;"); + verifyFormat("var regex = /a\\*/;"); + verifyFormat("var regex = /^a$/;"); + verifyFormat("var regex = /\\/a/;"); + verifyFormat("var regex = /(?:x)/;"); + verifyFormat("var regex = /x(?=y)/;"); + verifyFormat("var regex = /x(?!y)/;"); + verifyFormat("var regex = /x|y/;"); + verifyFormat("var regex = /a{2}/;"); + verifyFormat("var regex = /a{1,3}/;"); + verifyFormat("var regex = /[abc]/;"); + verifyFormat("var regex = /[^abc]/;"); + verifyFormat("var regex = /[\\b]/;"); + verifyFormat("var regex = /\\b/;"); + verifyFormat("var regex = /\\B/;"); + verifyFormat("var regex = /\\d/;"); + verifyFormat("var regex = /\\D/;"); + verifyFormat("var regex = /\\f/;"); + verifyFormat("var regex = /\\n/;"); + verifyFormat("var regex = /\\r/;"); + verifyFormat("var regex = /\\s/;"); + verifyFormat("var regex = /\\S/;"); + verifyFormat("var regex = /\\t/;"); + verifyFormat("var regex = /\\v/;"); + verifyFormat("var regex = /\\w/;"); + verifyFormat("var regex = /\\W/;"); + verifyFormat("var regex = /a(a)\\1/;"); + verifyFormat("var regex = /\\0/;"); +} + +TEST_F(FormatTestJS, RegexLiteralModifiers) { + verifyFormat("var regex = /abc/g;"); + verifyFormat("var regex = /abc/i;"); + verifyFormat("var regex = /abc/m;"); + verifyFormat("var regex = /abc/y;"); +} + +TEST_F(FormatTestJS, RegexLiteralLength) { + verifyFormat("var regex = /aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/;", + getGoogleJSStyleWithColumns(60)); + verifyFormat("var regex =\n" + " /aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/;", + getGoogleJSStyleWithColumns(60)); +} + +TEST_F(FormatTestJS, RegexLiteralExamples) { + verifyFormat("var regex = search.match(/(?:\?|&)times=([^?&]+)/i);"); +} + } // end namespace tooling } // end namespace clang -- 2.40.0