clang-format: [JS] nested and tagged template strings.

author Martin Probst <martin@probst.io>

Thu, 25 Aug 2016 10:13:21 +0000 (10:13 +0000)

committer Martin Probst <martin@probst.io>

Thu, 25 Aug 2016 10:13:21 +0000 (10:13 +0000)
author Martin Probst <martin@probst.io>
Thu, 25 Aug 2016 10:13:21 +0000 (10:13 +0000)
committer Martin Probst <martin@probst.io>
Thu, 25 Aug 2016 10:13:21 +0000 (10:13 +0000)
diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp

index 9778f84732d64a751c50d5a1657dae1da89140a7..8c795fb4d6d8f96c9048c07cc90382d01956a6d3 100644 (file)
--- a/lib/Format/FormatTokenLexer.cpp
+++ b/lib/Format/FormatTokenLexer.cpp
@@ -26,12 +26,11 @@ namespace format {
  FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
                                     const FormatStyle &Style,
                                     encoding::Encoding Encoding)
-    : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
-      LessStashed(false), Column(0), TrailingWhitespace(0),
-      SourceMgr(SourceMgr), ID(ID), Style(Style),
-      IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
-      Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
-      MacroBlockBeginRegex(Style.MacroBlockBegin),
+    : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
+      Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
+      Style(Style), IdentTable(getFormattingLangOpts(Style)),
+      Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
+      FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
        MacroBlockEndRegex(Style.MacroBlockEnd) {
    Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
                        getFormattingLangOpts(Style)));
@@ -49,7 +48,7 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
      Tokens.push_back(getNextToken());
      if (Style.Language == FormatStyle::LK_JavaScript) {
        tryParseJSRegexLiteral();
-      tryParseTemplateString();
+      handleTemplateStrings();
      }
      tryMergePreviousTokens();
      if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
@@ -228,17 +227,42 @@ void FormatTokenLexer::tryParseJSRegexLiteral() {
    resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
  }
  
-void FormatTokenLexer::tryParseTemplateString() {
+void FormatTokenLexer::handleTemplateStrings() {
    FormatToken *BacktickToken = Tokens.back();
-  if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
+
+  if (BacktickToken->is(tok::l_brace)) {
+    StateStack.push(LexerState::NORMAL);
      return;
+  }
+  if (BacktickToken->is(tok::r_brace)) {
+    StateStack.pop();
+    if (StateStack.top() != LexerState::TEMPLATE_STRING)
+      return;
+    // If back in TEMPLATE_STRING, fallthrough and continue parsing the
+  } else if (BacktickToken->is(tok::unknown) &&
+             BacktickToken->TokenText == "`") {
+    StateStack.push(LexerState::TEMPLATE_STRING);
+  } else {
+    return; // Not actually a template
+  }
  
    // 'Manually' lex ahead in the current file buffer.
    const char *Offset = Lex->getBufferLocation();
    const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
-  for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
-    if (*Offset == '\\')
+  for (; Offset != Lex->getBuffer().end(); ++Offset) {
+    if (Offset[0] == '`') {
+      StateStack.pop();
+      break;
+    }
+    if (Offset[0] == '\\') {
        ++Offset; // Skip the escaped character.
+    } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&
+               Offset[1] == '{') {
+      // '${' introduces an expression interpolation in the template string.
+      StateStack.push(LexerState::NORMAL);
+      ++Offset;
+      break;
+    }
    }
  
    StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
@@ -262,7 +286,10 @@ void FormatTokenLexer::tryParseTemplateString() {
          Style.TabWidth, Encoding);
    }
  
-  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
+  SourceLocation loc = Offset < Lex->getBuffer().end()
+                           ? Lex->getSourceLocation(Offset + 1)
+                           : SourceMgr.getLocForEndOfFile(ID);
+  resetLexer(SourceMgr.getFileOffset(loc));
  }
  
  bool FormatTokenLexer::tryMerge_TMacro() {
@@ -384,12 +411,8 @@ FormatToken *FormatTokenLexer::getStashedToken() {
  }
  
  FormatToken *FormatTokenLexer::getNextToken() {
-  if (GreaterStashed) {
-    GreaterStashed = false;
-    return getStashedToken();
-  }
-  if (LessStashed) {
-    LessStashed = false;
+  if (StateStack.top() == LexerState::TOKEN_STASHED) {
+    StateStack.pop();
      return getStashedToken();
    }
  
@@ -500,11 +523,11 @@ FormatToken *FormatTokenLexer::getNextToken() {
    } else if (FormatTok->Tok.is(tok::greatergreater)) {
      FormatTok->Tok.setKind(tok::greater);
      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
-    GreaterStashed = true;
+    StateStack.push(LexerState::TOKEN_STASHED);
    } else if (FormatTok->Tok.is(tok::lessless)) {
      FormatTok->Tok.setKind(tok::less);
      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
-    LessStashed = true;
+    StateStack.push(LexerState::TOKEN_STASHED);
    }
  
    // Now FormatTok is the next non-whitespace token.
diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h

index fa8c8882574f7be9652314cee16e49d59a173d81..c47b0e725d3662e7941a7ed86bc40665be93aa47 100644 (file)
--- a/lib/Format/FormatTokenLexer.h
+++ b/lib/Format/FormatTokenLexer.h
@@ -23,9 +23,17 @@
  #include "clang/Format/Format.h"
  #include "llvm/Support/Regex.h"
  
+#include <stack>
+
  namespace clang {
  namespace format {
  
+enum LexerState {
+  NORMAL,
+  TEMPLATE_STRING,
+  TOKEN_STASHED,
+};
+
  class FormatTokenLexer {
  public:
    FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
@@ -53,7 +61,16 @@ private:
    // its text if successful.
    void tryParseJSRegexLiteral();
  
-  void tryParseTemplateString();
+  // Handles JavaScript template strings.
+  //
+  // JavaScript template strings use backticks ('`') as delimiters, and allow
+  // embedding expressions nested in ${expr-here}. Template strings can be
+  // nested recursively, i.e. expressions can contain template strings in turn.
+  //
+  // The code below parses starting from a backtick, up to a closing backtick or
+  // an opening ${. It also maintains a stack of lexing contexts to handle
+  // nested template parts by balancing curly braces.
+  void handleTemplateStrings();
  
    bool tryMerge_TMacro();
  
@@ -65,7 +82,7 @@ private:
  
    FormatToken *FormatTok;
    bool IsFirstToken;
-  bool GreaterStashed, LessStashed;
+  std::stack<LexerState> StateStack;
    unsigned Column;
    unsigned TrailingWhitespace;
    std::unique_ptr<Lexer> Lex;
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp

index 081ea98d9ccd8e5424bdc9cc3f5dfb98b4bd3739..7f201119a79c6e9483b296975f700221f762675d 100644 (file)
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -858,7 +858,7 @@ private:
      if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
                                 TT_FunctionLBrace, TT_ImplicitStringLiteral,
                                 TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
-                               TT_RegexLiteral))
+                               TT_RegexLiteral, TT_TemplateString))
        CurrentToken->Type = TT_Unknown;
      CurrentToken->Role.reset();
      CurrentToken->MatchingParen = nullptr;
@@ -1816,6 +1816,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
        return 100;
      if (Left.is(TT_JsTypeColon))
        return 35;
+    if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+        (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+      return 100;
    }
  
    if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
@@ -2114,6 +2117,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
    } else if (Style.Language == FormatStyle::LK_JavaScript) {
      if (Left.is(TT_JsFatArrow))
        return true;
+    if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+        (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+      return false;
+    if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
+      return false;
      if (Right.is(tok::star) &&
          Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
        return false;
diff --git a/unittests/Format/FormatTestJS.cpp b/unittests/Format/FormatTestJS.cpp

index f6e8ff2f385e3217d87cc06b73cb8142d01bd188..60063934cd9acf4f1e4c9e8844cb0cb8a67bc5a5 100644 (file)
--- a/unittests/Format/FormatTestJS.cpp
+++ b/unittests/Format/FormatTestJS.cpp
@@ -1122,7 +1122,7 @@ TEST_F(FormatTestJS, ImportWrapping) {
  TEST_F(FormatTestJS, TemplateStrings) {
    // Keeps any whitespace/indentation within the template string.
    verifyFormat("var x = `hello\n"
-            "     ${  name    }\n"
+            "     ${name}\n"
              "  !`;",
              "var x    =    `hello\n"
                     "     ${  name    }\n"
@@ -1206,6 +1206,18 @@ TEST_F(FormatTestJS, TemplateStrings) {
                 "var y;",
                 "var x = ` \\` a`;\n"
                 "var y;");
+  // Escaped dollar.
+  verifyFormat("var x = ` \\${foo}`;\n");
+}
+
+TEST_F(FormatTestJS, NestedTemplateStrings) {
+  verifyFormat(
+      "var x = `<ul>${xs.map(x => `<li>${x}</li>`).join('\\n')}</ul>`;");
+  verifyFormat("var x = `he${({text: 'll'}.text)}o`;");
+}
+
+TEST_F(FormatTestJS, TaggedTemplateStrings) {
+  verifyFormat("var x = html`<ul>`;");
  }
  
  TEST_F(FormatTestJS, CastSyntax) {
author	Martin Probst <martin@probst.io>
	Thu, 25 Aug 2016 10:13:21 +0000 (10:13 +0000)
committer	Martin Probst <martin@probst.io>
	Thu, 25 Aug 2016 10:13:21 +0000 (10:13 +0000)
lib/Format/FormatTokenLexer.cpp		patch \| blob \| history
lib/Format/FormatTokenLexer.h		patch \| blob \| history
lib/Format/TokenAnnotator.cpp		patch \| blob \| history
unittests/Format/FormatTestJS.cpp		patch \| blob \| history