clang-format: [JS] Support regex literals with trailing escaped slash.

author Daniel Jasper <djasper@google.com>

Tue, 9 Sep 2014 14:37:39 +0000 (14:37 +0000)

committer Daniel Jasper <djasper@google.com>

Tue, 9 Sep 2014 14:37:39 +0000 (14:37 +0000)
author Daniel Jasper <djasper@google.com>
Tue, 9 Sep 2014 14:37:39 +0000 (14:37 +0000)
committer Daniel Jasper <djasper@google.com>
Tue, 9 Sep 2014 14:37:39 +0000 (14:37 +0000)
diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h

index 8665b791998d80ab9997759cdc556700ffe8f2ad..a6d17542e4cef4a961b8733ba1d2e8dfd2fec08c 100644 (file)
--- a/include/clang/Format/Format.h
+++ b/include/clang/Format/Format.h
@@ -487,14 +487,21 @@ std::string configurationAsText(const FormatStyle &Style);
  /// \brief Reformats the given \p Ranges in the token stream coming out of
  /// \c Lex.
  ///
+/// DEPRECATED: Do not use.
+tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
+                               SourceManager &SourceMgr,
+                               std::vector<CharSourceRange> Ranges);
+
+/// \brief Reformats the given \p Ranges in the file \p ID.
+///
  /// Each range is extended on either end to its next bigger logic unit, i.e.
  /// everything that might influence its formatting or might be influenced by its
  /// formatting.
  ///
  /// Returns the \c Replacements necessary to make all \p Ranges comply with
  /// \p Style.
-tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
-                               SourceManager &SourceMgr,
+tooling::Replacements reformat(const FormatStyle &Style,
+                               SourceManager &SourceMgr, FileID ID,
                                 std::vector<CharSourceRange> Ranges);
  
  /// \brief Reformats the given \p Ranges in \p Code.
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp

index c7bcc670ac7b1d83045d35a8f170e65c38b2a2e3..7e243b6e0ed605d39700120462aa8456633075cf 100644 (file)
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -1273,13 +1273,16 @@ private:
  
  class FormatTokenLexer {
  public:
-  FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
+  FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
                     encoding::Encoding Encoding)
        : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
-        Column(0), TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr),
-        Style(Style), IdentTable(getFormattingLangOpts(Style)),
-        Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false) {
-    Lex.SetKeepWhitespaceMode(true);
+        Column(0), TrailingWhitespace(0),
+        SourceMgr(SourceMgr), ID(ID), Style(Style),
+        IdentTable(getFormattingLangOpts(Style)), Encoding(Encoding),
+        FirstInLineIndex(0), FormattingDisabled(false) {
+    Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
+                        getFormattingLangOpts(Style)));
+    Lex->SetKeepWhitespaceMode(true);
  
      for (const std::string &ForEachMacro : Style.ForEachMacros)
        ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
@@ -1308,10 +1311,10 @@ private:
        return;
  
      if (Style.Language == FormatStyle::LK_JavaScript) {
-      if (tryMergeEscapeSequence())
-        return;
        if (tryMergeJSRegexLiteral())
          return;
+      if (tryMergeEscapeSequence())
+        return;
  
        static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
        static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
@@ -1376,9 +1379,18 @@ private:
    // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
    // a division.
    bool tryMergeJSRegexLiteral() {
-    if (Tokens.size() < 2 || Tokens.back()->isNot(tok::slash) ||
-        (Tokens[Tokens.size() - 2]->is(tok::unknown) &&
-         Tokens[Tokens.size() - 2]->TokenText == "\\"))
+    if (Tokens.size() < 2)
+      return false;
+    // If a regex literal ends in "\//", this gets represented by an unknown
+    // token "\" and a comment.
+    bool MightEndWithEscapedSlash =
+        Tokens.back()->is(tok::comment) &&
+        Tokens.back()->TokenText.startswith("//") &&
+        Tokens[Tokens.size() - 2]->TokenText == "\\";
+    if (!MightEndWithEscapedSlash &&
+        (Tokens.back()->isNot(tok::slash) ||
+         (Tokens[Tokens.size() - 2]->is(tok::unknown) &&
+          Tokens[Tokens.size() - 2]->TokenText == "\\")))
        return false;
      unsigned TokenCount = 0;
      unsigned LastColumn = Tokens.back()->OriginalColumn;
@@ -1389,6 +1401,17 @@ private:
                           tok::exclaim, tok::l_square, tok::colon, tok::comma,
                           tok::question, tok::kw_return) ||
             I[1]->isBinaryOperator())) {
+        if (MightEndWithEscapedSlash) {
+          StringRef Buffer = SourceMgr.getBufferData(ID);
+          // This regex literal ends in '\//'. Skip past the '//' of the last
+          // token and re-start lexing from there.
+          int offset =
+              SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 2;
+          Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
+                              getFormattingLangOpts(Style), Buffer.begin(),
+                              Buffer.begin() + offset, Buffer.end()));
+          Lex->SetKeepWhitespaceMode(true);
+        }
          Tokens.resize(Tokens.size() - TokenCount);
          Tokens.back()->Tok.setKind(tok::unknown);
          Tokens.back()->Type = TT_RegexLiteral;
@@ -1641,8 +1664,9 @@ private:
    bool GreaterStashed;
    unsigned Column;
    unsigned TrailingWhitespace;
-  Lexer &Lex;
+  std::unique_ptr<Lexer> Lex;
    SourceManager &SourceMgr;
+  FileID ID;
    FormatStyle &Style;
    IdentifierTable IdentTable;
    encoding::Encoding Encoding;
@@ -1655,7 +1679,7 @@ private:
    bool FormattingDisabled;
  
    void readRawToken(FormatToken &Tok) {
-    Lex.LexFromRawLexer(Tok.Tok);
+    Lex->LexFromRawLexer(Tok.Tok);
      Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
                                Tok.Tok.getLength());
      // For formatting, treat unterminated string literals like normal string
@@ -1692,12 +1716,13 @@ static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
  
  class Formatter : public UnwrappedLineConsumer {
  public:
-  Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
+  Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
              const std::vector<CharSourceRange> &Ranges)
-      : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
-        Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
+      : Style(Style), ID(ID), SourceMgr(SourceMgr),
+        Whitespaces(SourceMgr, Style,
+                    inputUsesCRLF(SourceMgr.getBufferData(ID))),
          Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
-        Encoding(encoding::detectEncoding(Lex.getBuffer())) {
+        Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
      DEBUG(llvm::dbgs() << "File encoding: "
                         << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
                                                                 : "unknown")
@@ -1708,7 +1733,7 @@ public:
  
    tooling::Replacements format() {
      tooling::Replacements Result;
-    FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
+    FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
  
      UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
      bool StructuralError = Parser.parse();
@@ -1962,7 +1987,7 @@ private:
    }
  
    FormatStyle Style;
-  Lexer &Lex;
+  FileID ID;
    SourceManager &SourceMgr;
    WhitespaceManager Whitespaces;
    SmallVector<CharSourceRange, 8> Ranges;
@@ -1977,18 +2002,27 @@ private:
  tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
                                 SourceManager &SourceMgr,
                                 std::vector<CharSourceRange> Ranges) {
-  if (Style.DisableFormat) {
-    tooling::Replacements EmptyResult;
-    return EmptyResult;
-  }
+  if (Style.DisableFormat)
+    return tooling::Replacements();
+  return reformat(Style, SourceMgr,
+                  SourceMgr.getFileID(Lex.getSourceLocation()), Ranges);
+}
  
-  Formatter formatter(Style, Lex, SourceMgr, Ranges);
+tooling::Replacements reformat(const FormatStyle &Style,
+                               SourceManager &SourceMgr, FileID ID,
+                               std::vector<CharSourceRange> Ranges) {
+  if (Style.DisableFormat)
+    return tooling::Replacements();
+  Formatter formatter(Style, SourceMgr, ID, Ranges);
    return formatter.format();
  }
  
  tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
                                 std::vector<tooling::Range> Ranges,
                                 StringRef FileName) {
+  if (Style.DisableFormat)
+    return tooling::Replacements();
+
    FileManager Files((FileSystemOptions()));
    DiagnosticsEngine Diagnostics(
        IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
@@ -2001,8 +2035,6 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
    SourceMgr.overrideFileContents(Entry, std::move(Buf));
    FileID ID =
        SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
-  Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
-            getFormattingLangOpts(Style));
    SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
    std::vector<CharSourceRange> CharRanges;
    for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
@@ -2010,7 +2042,7 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
      SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
      CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
    }
-  return reformat(Style, Lex, SourceMgr, CharRanges);
+  return reformat(Style, SourceMgr, ID, CharRanges);
  }
  
  LangOptions getFormattingLangOpts(const FormatStyle &Style) {
diff --git a/lib/Index/CommentToXML.cpp b/lib/Index/CommentToXML.cpp

index a67c806550d83415c6ed7db68e283491dbebf051..d1100c44e1699147338583aa63cabe3542fd09b0 100644 (file)
--- a/lib/Index/CommentToXML.cpp
+++ b/lib/Index/CommentToXML.cpp
@@ -15,7 +15,6 @@
  #include "clang/AST/CommentVisitor.h"
  #include "clang/Format/Format.h"
  #include "clang/Index/USRGeneration.h"
-#include "clang/Lex/Lexer.h"
  #include "llvm/ADT/StringExtras.h"
  #include "llvm/ADT/TinyPtrVector.h"
  #include "llvm/Support/raw_ostream.h"
@@ -611,12 +610,8 @@ void CommentASTToXMLConverter::formatTextOfDeclaration(
  
    std::vector<CharSourceRange> Ranges(
        1, CharSourceRange::getCharRange(Start, Start.getLocWithOffset(Length)));
-  ASTContext &Context = DI->CurrentDecl->getASTContext();
-  const LangOptions &LangOpts = Context.getLangOpts();
-  Lexer Lex(ID, FormatRewriterContext.Sources.getBuffer(ID),
-            FormatRewriterContext.Sources, LangOpts);
    tooling::Replacements Replace = reformat(
-      format::getLLVMStyle(), Lex, FormatRewriterContext.Sources, Ranges);
+      format::getLLVMStyle(), FormatRewriterContext.Sources, ID, Ranges);
    applyAllReplacements(Replace, FormatRewriterContext.Rewrite);
    Declaration = FormatRewriterContext.getRewrittenText(ID);
  }
diff --git a/tools/clang-format/ClangFormat.cpp b/tools/clang-format/ClangFormat.cpp

index 7dda9c6e1387de9ca4c08cac25df0b1c5445452d..614d9cd4bdd44edf72d36861d6e560273af8c8bc 100644 (file)
--- a/tools/clang-format/ClangFormat.cpp
+++ b/tools/clang-format/ClangFormat.cpp
@@ -19,7 +19,6 @@
  #include "clang/Basic/SourceManager.h"
  #include "clang/Basic/Version.h"
  #include "clang/Format/Format.h"
-#include "clang/Lex/Lexer.h"
  #include "clang/Rewrite/Core/Rewriter.h"
  #include "llvm/ADT/StringMap.h"
  #include "llvm/Support/Debug.h"
@@ -225,9 +224,7 @@ static bool format(StringRef FileName) {
  
    FormatStyle FormatStyle = getStyle(
        Style, (FileName == "-") ? AssumeFilename : FileName, FallbackStyle);
-  Lexer Lex(ID, Sources.getBuffer(ID), Sources,
-            getFormattingLangOpts(FormatStyle));
-  tooling::Replacements Replaces = reformat(FormatStyle, Lex, Sources, Ranges);
+  tooling::Replacements Replaces = reformat(FormatStyle, Sources, ID, Ranges);
    if (OutputXML) {
      llvm::outs()
          << "<?xml version='1.0'?>\n<replacements xml:space='preserve'>\n";
diff --git a/unittests/Format/FormatTestJS.cpp b/unittests/Format/FormatTestJS.cpp

index b161699d94351572f15196eeadb1e094c7f14a4e..bf763388ffeb4fb18bd5172ff20111f2fe423b5b 100644 (file)
--- a/unittests/Format/FormatTestJS.cpp
+++ b/unittests/Format/FormatTestJS.cpp
@@ -330,6 +330,8 @@ TEST_F(FormatTestJS, RegexLiteralSpecialCharacters) {
    verifyFormat("var regex = /\\\\/g;");
    verifyFormat("var regex = /\\a\\\\/g;");
    verifyFormat("var regex = /\a\\//g;");
+  verifyFormat("var regex = /a\\//;\n"
+               "var x = 0;");
  }
  
  TEST_F(FormatTestJS, RegexLiteralModifiers) {
author	Daniel Jasper <djasper@google.com>
	Tue, 9 Sep 2014 14:37:39 +0000 (14:37 +0000)
committer	Daniel Jasper <djasper@google.com>
	Tue, 9 Sep 2014 14:37:39 +0000 (14:37 +0000)
include/clang/Format/Format.h		patch \| blob \| history
lib/Format/Format.cpp		patch \| blob \| history
lib/Index/CommentToXML.cpp		patch \| blob \| history
tools/clang-format/ClangFormat.cpp		patch \| blob \| history
unittests/Format/FormatTestJS.cpp		patch \| blob \| history