clang-format: [JS] sort ES6 imports.

author Martin Probst <martin@probst.io>

Fri, 20 May 2016 11:24:24 +0000 (11:24 +0000)

committer Martin Probst <martin@probst.io>

Fri, 20 May 2016 11:24:24 +0000 (11:24 +0000)
author Martin Probst <martin@probst.io>
Fri, 20 May 2016 11:24:24 +0000 (11:24 +0000)
committer Martin Probst <martin@probst.io>
Fri, 20 May 2016 11:24:24 +0000 (11:24 +0000)
diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h

index e1bd196099b63993bae1c4549e0925faa2462b38..e695b8cd7d99fb66aee18d0ef7fd5cb4abe63ad8 100644 (file)
--- a/include/clang/Format/Format.h
+++ b/include/clang/Format/Format.h
@@ -852,6 +852,22 @@ extern const char *StyleOptionHelpDescription;
  FormatStyle getStyle(StringRef StyleName, StringRef FileName,
                       StringRef FallbackStyle, vfs::FileSystem *FS = nullptr);
  
+// \brief Returns a string representation of ``Language``.
+inline StringRef getLanguageName(FormatStyle::LanguageKind Language) {
+  switch (Language) {
+  case FormatStyle::LK_Cpp:
+    return "C++";
+  case FormatStyle::LK_Java:
+    return "Java";
+  case FormatStyle::LK_JavaScript:
+    return "JavaScript";
+  case FormatStyle::LK_Proto:
+    return "Proto";
+  default:
+    return "Unknown";
+  }
+}
+
  } // end namespace format
  } // end namespace clang
  
diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt

index 1f37a16f2bec192365a2151d483ae2f1e18524cc..cb46b9f255d2bb1278cec703c95f838c0d86499a 100644 (file)
--- a/lib/Format/CMakeLists.txt
+++ b/lib/Format/CMakeLists.txt
@@ -6,6 +6,9 @@ add_clang_library(clangFormat
    ContinuationIndenter.cpp
    Format.cpp
    FormatToken.cpp
+  FormatTokenLexer.cpp
+  SortJavaScriptImports.cpp
+  TokenAnalyzer.cpp
    TokenAnnotator.cpp
    UnwrappedLineFormatter.cpp
    UnwrappedLineParser.cpp
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp

index a70a7ef2a3c607306657153a17626316b690f616..9fe22d32c6f5244c40eb96311fd2f43d69eebe4f 100644 (file)
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -16,6 +16,9 @@
  #include "clang/Format/Format.h"
  #include "AffectedRangeManager.h"
  #include "ContinuationIndenter.h"
+#include "FormatTokenLexer.h"
+#include "SortJavaScriptImports.h"
+#include "TokenAnalyzer.h"
  #include "TokenAnnotator.h"
  #include "UnwrappedLineFormatter.h"
  #include "UnwrappedLineParser.h"
@@ -782,776 +785,6 @@ std::string configurationAsText(const FormatStyle &Style) {
  
  namespace {
  
-class FormatTokenLexer {
-public:
-  FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
-                   const FormatStyle &Style, encoding::Encoding Encoding)
-      : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
-        LessStashed(false), Column(0), TrailingWhitespace(0),
-        SourceMgr(SourceMgr), ID(ID), Style(Style),
-        IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
-        Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
-        MacroBlockBeginRegex(Style.MacroBlockBegin),
-        MacroBlockEndRegex(Style.MacroBlockEnd) {
-    Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
-                        getFormattingLangOpts(Style)));
-    Lex->SetKeepWhitespaceMode(true);
-
-    for (const std::string &ForEachMacro : Style.ForEachMacros)
-      ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
-    std::sort(ForEachMacros.begin(), ForEachMacros.end());
-  }
-
-  ArrayRef<FormatToken *> lex() {
-    assert(Tokens.empty());
-    assert(FirstInLineIndex == 0);
-    do {
-      Tokens.push_back(getNextToken());
-      if (Style.Language == FormatStyle::LK_JavaScript) {
-        tryParseJSRegexLiteral();
-        tryParseTemplateString();
-      }
-      tryMergePreviousTokens();
-      if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
-        FirstInLineIndex = Tokens.size() - 1;
-    } while (Tokens.back()->Tok.isNot(tok::eof));
-    return Tokens;
-  }
-
-  const AdditionalKeywords &getKeywords() { return Keywords; }
-
-private:
-  void tryMergePreviousTokens() {
-    if (tryMerge_TMacro())
-      return;
-    if (tryMergeConflictMarkers())
-      return;
-    if (tryMergeLessLess())
-      return;
-
-    if (Style.Language == FormatStyle::LK_JavaScript) {
-      static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
-      static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
-                                                     tok::equal};
-      static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
-                                                    tok::greaterequal};
-      static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
-      // FIXME: Investigate what token type gives the correct operator priority.
-      if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
-        return;
-      if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
-        return;
-      if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
-        return;
-      if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
-        return;
-    }
-  }
-
-  bool tryMergeLessLess() {
-    // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
-    if (Tokens.size() < 3)
-      return false;
-
-    bool FourthTokenIsLess = false;
-    if (Tokens.size() > 3)
-      FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
-
-    auto First = Tokens.end() - 3;
-    if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
-        First[0]->isNot(tok::less) || FourthTokenIsLess)
-      return false;
-
-    // Only merge if there currently is no whitespace between the two "<".
-    if (First[1]->WhitespaceRange.getBegin() !=
-        First[1]->WhitespaceRange.getEnd())
-      return false;
-
-    First[0]->Tok.setKind(tok::lessless);
-    First[0]->TokenText = "<<";
-    First[0]->ColumnWidth += 1;
-    Tokens.erase(Tokens.end() - 2);
-    return true;
-  }
-
-  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
-    if (Tokens.size() < Kinds.size())
-      return false;
-
-    SmallVectorImpl<FormatToken *>::const_iterator First =
-        Tokens.end() - Kinds.size();
-    if (!First[0]->is(Kinds[0]))
-      return false;
-    unsigned AddLength = 0;
-    for (unsigned i = 1; i < Kinds.size(); ++i) {
-      if (!First[i]->is(Kinds[i]) ||
-          First[i]->WhitespaceRange.getBegin() !=
-              First[i]->WhitespaceRange.getEnd())
-        return false;
-      AddLength += First[i]->TokenText.size();
-    }
-    Tokens.resize(Tokens.size() - Kinds.size() + 1);
-    First[0]->TokenText = StringRef(First[0]->TokenText.data(),
-                                    First[0]->TokenText.size() + AddLength);
-    First[0]->ColumnWidth += AddLength;
-    First[0]->Type = NewType;
-    return true;
-  }
-
-  // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
-  bool precedesOperand(FormatToken *Tok) {
-    // NB: This is not entirely correct, as an r_paren can introduce an operand
-    // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
-    // corner case to not matter in practice, though.
-    return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
-                        tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
-                        tok::colon, tok::question, tok::tilde) ||
-           Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
-                        tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
-                        tok::kw_typeof, Keywords.kw_instanceof,
-                        Keywords.kw_in) ||
-           Tok->isBinaryOperator();
-  }
-
-  bool canPrecedeRegexLiteral(FormatToken *Prev) {
-    if (!Prev)
-      return true;
-
-    // Regex literals can only follow after prefix unary operators, not after
-    // postfix unary operators. If the '++' is followed by a non-operand
-    // introducing token, the slash here is the operand and not the start of a
-    // regex.
-    if (Prev->isOneOf(tok::plusplus, tok::minusminus))
-      return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
-
-    // The previous token must introduce an operand location where regex
-    // literals can occur.
-    if (!precedesOperand(Prev))
-      return false;
-
-    return true;
-  }
-
-  // Tries to parse a JavaScript Regex literal starting at the current token,
-  // if that begins with a slash and is in a location where JavaScript allows
-  // regex literals. Changes the current token to a regex literal and updates
-  // its text if successful.
-  void tryParseJSRegexLiteral() {
-    FormatToken *RegexToken = Tokens.back();
-    if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
-      return;
-
-    FormatToken *Prev = nullptr;
-    for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
-      // NB: Because previous pointers are not initialized yet, this cannot use
-      // Token.getPreviousNonComment.
-      if ((*I)->isNot(tok::comment)) {
-        Prev = *I;
-        break;
-      }
-    }
-
-    if (!canPrecedeRegexLiteral(Prev))
-      return;
-
-    // 'Manually' lex ahead in the current file buffer.
-    const char *Offset = Lex->getBufferLocation();
-    const char *RegexBegin = Offset - RegexToken->TokenText.size();
-    StringRef Buffer = Lex->getBuffer();
-    bool InCharacterClass = false;
-    bool HaveClosingSlash = false;
-    for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
-      // Regular expressions are terminated with a '/', which can only be
-      // escaped using '\' or a character class between '[' and ']'.
-      // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
-      switch (*Offset) {
-      case '\\':
-        // Skip the escaped character.
-        ++Offset;
-        break;
-      case '[':
-        InCharacterClass = true;
-        break;
-      case ']':
-        InCharacterClass = false;
-        break;
-      case '/':
-        if (!InCharacterClass)
-          HaveClosingSlash = true;
-        break;
-      }
-    }
-
-    RegexToken->Type = TT_RegexLiteral;
-    // Treat regex literals like other string_literals.
-    RegexToken->Tok.setKind(tok::string_literal);
-    RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
-    RegexToken->ColumnWidth = RegexToken->TokenText.size();
-
-    resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
-  }
-
-  void tryParseTemplateString() {
-    FormatToken *BacktickToken = Tokens.back();
-    if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
-      return;
-
-    // 'Manually' lex ahead in the current file buffer.
-    const char *Offset = Lex->getBufferLocation();
-    const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
-    for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
-      if (*Offset == '\\')
-        ++Offset; // Skip the escaped character.
-    }
-
-    StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
-    BacktickToken->Type = TT_TemplateString;
-    BacktickToken->Tok.setKind(tok::string_literal);
-    BacktickToken->TokenText = LiteralText;
-
-    // Adjust width for potentially multiline string literals.
-    size_t FirstBreak = LiteralText.find('\n');
-    StringRef FirstLineText = FirstBreak == StringRef::npos
-                                  ? LiteralText
-                                  : LiteralText.substr(0, FirstBreak);
-    BacktickToken->ColumnWidth = encoding::columnWidthWithTabs(
-        FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);
-    size_t LastBreak = LiteralText.rfind('\n');
-    if (LastBreak != StringRef::npos) {
-      BacktickToken->IsMultiline = true;
-      unsigned StartColumn = 0; // The template tail spans the entire line.
-      BacktickToken->LastLineColumnWidth = encoding::columnWidthWithTabs(
-          LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
-          Style.TabWidth, Encoding);
-    }
-
-    resetLexer(
-        SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
-  }
-
-  bool tryMerge_TMacro() {
-    if (Tokens.size() < 4)
-      return false;
-    FormatToken *Last = Tokens.back();
-    if (!Last->is(tok::r_paren))
-      return false;
-
-    FormatToken *String = Tokens[Tokens.size() - 2];
-    if (!String->is(tok::string_literal) || String->IsMultiline)
-      return false;
-
-    if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
-      return false;
-
-    FormatToken *Macro = Tokens[Tokens.size() - 4];
-    if (Macro->TokenText != "_T")
-      return false;
-
-    const char *Start = Macro->TokenText.data();
-    const char *End = Last->TokenText.data() + Last->TokenText.size();
-    String->TokenText = StringRef(Start, End - Start);
-    String->IsFirst = Macro->IsFirst;
-    String->LastNewlineOffset = Macro->LastNewlineOffset;
-    String->WhitespaceRange = Macro->WhitespaceRange;
-    String->OriginalColumn = Macro->OriginalColumn;
-    String->ColumnWidth = encoding::columnWidthWithTabs(
-        String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
-    String->NewlinesBefore = Macro->NewlinesBefore;
-    String->HasUnescapedNewline = Macro->HasUnescapedNewline;
-
-    Tokens.pop_back();
-    Tokens.pop_back();
-    Tokens.pop_back();
-    Tokens.back() = String;
-    return true;
-  }
-
-  bool tryMergeConflictMarkers() {
-    if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
-      return false;
-
-    // Conflict lines look like:
-    // <marker> <text from the vcs>
-    // For example:
-    // >>>>>>> /file/in/file/system at revision 1234
-    //
-    // We merge all tokens in a line that starts with a conflict marker
-    // into a single token with a special token type that the unwrapped line
-    // parser will use to correctly rebuild the underlying code.
-
-    FileID ID;
-    // Get the position of the first token in the line.
-    unsigned FirstInLineOffset;
-    std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
-        Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
-    StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
-    // Calculate the offset of the start of the current line.
-    auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
-    if (LineOffset == StringRef::npos) {
-      LineOffset = 0;
-    } else {
-      ++LineOffset;
-    }
-
-    auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
-    StringRef LineStart;
-    if (FirstSpace == StringRef::npos) {
-      LineStart = Buffer.substr(LineOffset);
-    } else {
-      LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
-    }
-
-    TokenType Type = TT_Unknown;
-    if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
-      Type = TT_ConflictStart;
-    } else if (LineStart == "|||||||" || LineStart == "=======" ||
-               LineStart == "====") {
-      Type = TT_ConflictAlternative;
-    } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
-      Type = TT_ConflictEnd;
-    }
-
-    if (Type != TT_Unknown) {
-      FormatToken *Next = Tokens.back();
-
-      Tokens.resize(FirstInLineIndex + 1);
-      // We do not need to build a complete token here, as we will skip it
-      // during parsing anyway (as we must not touch whitespace around conflict
-      // markers).
-      Tokens.back()->Type = Type;
-      Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
-
-      Tokens.push_back(Next);
-      return true;
-    }
-
-    return false;
-  }
-
-  FormatToken *getStashedToken() {
-    // Create a synthesized second '>' or '<' token.
-    Token Tok = FormatTok->Tok;
-    StringRef TokenText = FormatTok->TokenText;
-
-    unsigned OriginalColumn = FormatTok->OriginalColumn;
-    FormatTok = new (Allocator.Allocate()) FormatToken;
-    FormatTok->Tok = Tok;
-    SourceLocation TokLocation =
-        FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
-    FormatTok->Tok.setLocation(TokLocation);
-    FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
-    FormatTok->TokenText = TokenText;
-    FormatTok->ColumnWidth = 1;
-    FormatTok->OriginalColumn = OriginalColumn + 1;
-
-    return FormatTok;
-  }
-
-  FormatToken *getNextToken() {
-    if (GreaterStashed) {
-      GreaterStashed = false;
-      return getStashedToken();
-    }
-    if (LessStashed) {
-      LessStashed = false;
-      return getStashedToken();
-    }
-
-    FormatTok = new (Allocator.Allocate()) FormatToken;
-    readRawToken(*FormatTok);
-    SourceLocation WhitespaceStart =
-        FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
-    FormatTok->IsFirst = IsFirstToken;
-    IsFirstToken = false;
-
-    // Consume and record whitespace until we find a significant token.
-    unsigned WhitespaceLength = TrailingWhitespace;
-    while (FormatTok->Tok.is(tok::unknown)) {
-      StringRef Text = FormatTok->TokenText;
-      auto EscapesNewline = [&](int pos) {
-        // A '\r' here is just part of '\r\n'. Skip it.
-        if (pos >= 0 && Text[pos] == '\r')
-          --pos;
-        // See whether there is an odd number of '\' before this.
-        unsigned count = 0;
-        for (; pos >= 0; --pos, ++count)
-          if (Text[pos] != '\\')
-            break;
-        return count & 1;
-      };
-      // FIXME: This miscounts tok:unknown tokens that are not just
-      // whitespace, e.g. a '`' character.
-      for (int i = 0, e = Text.size(); i != e; ++i) {
-        switch (Text[i]) {
-        case '\n':
-          ++FormatTok->NewlinesBefore;
-          FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
-          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
-          Column = 0;
-          break;
-        case '\r':
-          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
-          Column = 0;
-          break;
-        case '\f':
-        case '\v':
-          Column = 0;
-          break;
-        case ' ':
-          ++Column;
-          break;
-        case '\t':
-          Column += Style.TabWidth - Column % Style.TabWidth;
-          break;
-        case '\\':
-          if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
-            FormatTok->Type = TT_ImplicitStringLiteral;
-          break;
-        default:
-          FormatTok->Type = TT_ImplicitStringLiteral;
-          break;
-        }
-        if (FormatTok->Type == TT_ImplicitStringLiteral)
-          break;
-      }
-
-      if (FormatTok->is(TT_ImplicitStringLiteral))
-        break;
-      WhitespaceLength += FormatTok->Tok.getLength();
-
-      readRawToken(*FormatTok);
-    }
-
-    // In case the token starts with escaped newlines, we want to
-    // take them into account as whitespace - this pattern is quite frequent
-    // in macro definitions.
-    // FIXME: Add a more explicit test.
-    while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
-           FormatTok->TokenText[1] == '\n') {
-      ++FormatTok->NewlinesBefore;
-      WhitespaceLength += 2;
-      FormatTok->LastNewlineOffset = 2;
-      Column = 0;
-      FormatTok->TokenText = FormatTok->TokenText.substr(2);
-    }
-
-    FormatTok->WhitespaceRange = SourceRange(
-        WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
-
-    FormatTok->OriginalColumn = Column;
-
-    TrailingWhitespace = 0;
-    if (FormatTok->Tok.is(tok::comment)) {
-      // FIXME: Add the trimmed whitespace to Column.
-      StringRef UntrimmedText = FormatTok->TokenText;
-      FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
-      TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
-    } else if (FormatTok->Tok.is(tok::raw_identifier)) {
-      IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
-      FormatTok->Tok.setIdentifierInfo(&Info);
-      FormatTok->Tok.setKind(Info.getTokenID());
-      if (Style.Language == FormatStyle::LK_Java &&
-          FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
-                             tok::kw_operator)) {
-        FormatTok->Tok.setKind(tok::identifier);
-        FormatTok->Tok.setIdentifierInfo(nullptr);
-      } else if (Style.Language == FormatStyle::LK_JavaScript &&
-                 FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
-                                    tok::kw_operator)) {
-        FormatTok->Tok.setKind(tok::identifier);
-        FormatTok->Tok.setIdentifierInfo(nullptr);
-      }
-    } else if (FormatTok->Tok.is(tok::greatergreater)) {
-      FormatTok->Tok.setKind(tok::greater);
-      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
-      GreaterStashed = true;
-    } else if (FormatTok->Tok.is(tok::lessless)) {
-      FormatTok->Tok.setKind(tok::less);
-      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
-      LessStashed = true;
-    }
-
-    // Now FormatTok is the next non-whitespace token.
-
-    StringRef Text = FormatTok->TokenText;
-    size_t FirstNewlinePos = Text.find('\n');
-    if (FirstNewlinePos == StringRef::npos) {
-      // FIXME: ColumnWidth actually depends on the start column, we need to
-      // take this into account when the token is moved.
-      FormatTok->ColumnWidth =
-          encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
-      Column += FormatTok->ColumnWidth;
-    } else {
-      FormatTok->IsMultiline = true;
-      // FIXME: ColumnWidth actually depends on the start column, we need to
-      // take this into account when the token is moved.
-      FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
-          Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
-
-      // The last line of the token always starts in column 0.
-      // Thus, the length can be precomputed even in the presence of tabs.
-      FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
-          Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
-          Encoding);
-      Column = FormatTok->LastLineColumnWidth;
-    }
-
-    if (Style.Language == FormatStyle::LK_Cpp) {
-      if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
-            Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
-                tok::pp_define) &&
-          std::find(ForEachMacros.begin(), ForEachMacros.end(),
-                    FormatTok->Tok.getIdentifierInfo()) !=
-              ForEachMacros.end()) {
-        FormatTok->Type = TT_ForEachMacro;
-      } else if (FormatTok->is(tok::identifier)) {
-        if (MacroBlockBeginRegex.match(Text)) {
-          FormatTok->Type = TT_MacroBlockBegin;
-        } else if (MacroBlockEndRegex.match(Text)) {
-          FormatTok->Type = TT_MacroBlockEnd;
-        }
-      }
-    }
-
-    return FormatTok;
-  }
-
-  FormatToken *FormatTok;
-  bool IsFirstToken;
-  bool GreaterStashed, LessStashed;
-  unsigned Column;
-  unsigned TrailingWhitespace;
-  std::unique_ptr<Lexer> Lex;
-  const SourceManager &SourceMgr;
-  FileID ID;
-  const FormatStyle &Style;
-  IdentifierTable IdentTable;
-  AdditionalKeywords Keywords;
-  encoding::Encoding Encoding;
-  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
-  // Index (in 'Tokens') of the last token that starts a new line.
-  unsigned FirstInLineIndex;
-  SmallVector<FormatToken *, 16> Tokens;
-  SmallVector<IdentifierInfo *, 8> ForEachMacros;
-
-  bool FormattingDisabled;
-
-  llvm::Regex MacroBlockBeginRegex;
-  llvm::Regex MacroBlockEndRegex;
-
-  void readRawToken(FormatToken &Tok) {
-    Lex->LexFromRawLexer(Tok.Tok);
-    Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
-                              Tok.Tok.getLength());
-    // For formatting, treat unterminated string literals like normal string
-    // literals.
-    if (Tok.is(tok::unknown)) {
-      if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
-        Tok.Tok.setKind(tok::string_literal);
-        Tok.IsUnterminatedLiteral = true;
-      } else if (Style.Language == FormatStyle::LK_JavaScript &&
-                 Tok.TokenText == "''") {
-        Tok.Tok.setKind(tok::string_literal);
-      }
-    }
-
-    if (Style.Language == FormatStyle::LK_JavaScript &&
-        Tok.is(tok::char_constant)) {
-      Tok.Tok.setKind(tok::string_literal);
-    }
-
-    if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
-                                 Tok.TokenText == "/* clang-format on */")) {
-      FormattingDisabled = false;
-    }
-
-    Tok.Finalized = FormattingDisabled;
-
-    if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
-                                 Tok.TokenText == "/* clang-format off */")) {
-      FormattingDisabled = true;
-    }
-  }
-
-  void resetLexer(unsigned Offset) {
-    StringRef Buffer = SourceMgr.getBufferData(ID);
-    Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
-                        getFormattingLangOpts(Style), Buffer.begin(),
-                        Buffer.begin() + Offset, Buffer.end()));
-    Lex->SetKeepWhitespaceMode(true);
-    TrailingWhitespace = 0;
-  }
-};
-
-static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
-  switch (Language) {
-  case FormatStyle::LK_Cpp:
-    return "C++";
-  case FormatStyle::LK_Java:
-    return "Java";
-  case FormatStyle::LK_JavaScript:
-    return "JavaScript";
-  case FormatStyle::LK_Proto:
-    return "Proto";
-  default:
-    return "Unknown";
-  }
-}
-
-class Environment {
-public:
-  Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges)
-      : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {}
-
-  Environment(FileID ID, std::unique_ptr<FileManager> FileMgr,
-              std::unique_ptr<SourceManager> VirtualSM,
-              std::unique_ptr<DiagnosticsEngine> Diagnostics,
-              const std::vector<CharSourceRange> &CharRanges)
-      : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()),
-        SM(*VirtualSM), FileMgr(std::move(FileMgr)),
-        VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {}
-
-  // This sets up an virtual file system with file \p FileName containing \p
-  // Code.
-  static std::unique_ptr<Environment>
-  CreateVirtualEnvironment(StringRef Code, StringRef FileName,
-                           ArrayRef<tooling::Range> Ranges) {
-    // This is referenced by `FileMgr` and will be released by `FileMgr` when it
-    // is deleted.
-    IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
-        new vfs::InMemoryFileSystem);
-    // This is passed to `SM` as reference, so the pointer has to be referenced
-    // in `Environment` so that `FileMgr` can out-live this function scope.
-    std::unique_ptr<FileManager> FileMgr(
-        new FileManager(FileSystemOptions(), InMemoryFileSystem));
-    // This is passed to `SM` as reference, so the pointer has to be referenced
-    // by `Environment` due to the same reason above.
-    std::unique_ptr<DiagnosticsEngine> Diagnostics(new DiagnosticsEngine(
-        IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
-        new DiagnosticOptions));
-    // This will be stored as reference, so the pointer has to be stored in
-    // due to the same reason above.
-    std::unique_ptr<SourceManager> VirtualSM(
-        new SourceManager(*Diagnostics, *FileMgr));
-    InMemoryFileSystem->addFile(
-        FileName, 0, llvm::MemoryBuffer::getMemBuffer(
-                         Code, FileName, /*RequiresNullTerminator=*/false));
-    FileID ID = VirtualSM->createFileID(
-        FileMgr->getFile(FileName), SourceLocation(), clang::SrcMgr::C_User);
-    assert(ID.isValid());
-    SourceLocation StartOfFile = VirtualSM->getLocForStartOfFile(ID);
-    std::vector<CharSourceRange> CharRanges;
-    for (const tooling::Range &Range : Ranges) {
-      SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
-      SourceLocation End = Start.getLocWithOffset(Range.getLength());
-      CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
-    }
-    return llvm::make_unique<Environment>(ID, std::move(FileMgr),
-                                          std::move(VirtualSM),
-                                          std::move(Diagnostics), CharRanges);
-  }
-
-  FileID getFileID() const { return ID; }
-
-  StringRef getFileName() const { return FileName; }
-
-  ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
-
-  const SourceManager &getSourceManager() const { return SM; }
-
-private:
-  FileID ID;
-  StringRef FileName;
-  SmallVector<CharSourceRange, 8> CharRanges;
-  SourceManager &SM;
-
-  // The order of these fields are important - they should be in the same order
-  // as they are created in `CreateVirtualEnvironment` so that they can be
-  // deleted in the reverse order as they are created.
-  std::unique_ptr<FileManager> FileMgr;
-  std::unique_ptr<SourceManager> VirtualSM;
-  std::unique_ptr<DiagnosticsEngine> Diagnostics;
-};
-
-class TokenAnalyzer : public UnwrappedLineConsumer {
-public:
-  TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
-      : Style(Style), Env(Env),
-        AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
-        UnwrappedLines(1),
-        Encoding(encoding::detectEncoding(
-            Env.getSourceManager().getBufferData(Env.getFileID()))) {
-    DEBUG(llvm::dbgs() << "File encoding: "
-                       << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
-                                                               : "unknown")
-                       << "\n");
-    DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
-                       << "\n");
-  }
-
-  tooling::Replacements process() {
-    tooling::Replacements Result;
-    FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style,
-                            Encoding);
-
-    UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
-                               *this);
-    Parser.parse();
-    assert(UnwrappedLines.rbegin()->empty());
-    for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
-         ++Run) {
-      DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
-      SmallVector<AnnotatedLine *, 16> AnnotatedLines;
-
-      TokenAnnotator Annotator(Style, Tokens.getKeywords());
-      for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
-        AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
-        Annotator.annotate(*AnnotatedLines.back());
-      }
-
-      tooling::Replacements RunResult =
-          analyze(Annotator, AnnotatedLines, Tokens, Result);
-
-      DEBUG({
-        llvm::dbgs() << "Replacements for run " << Run << ":\n";
-        for (tooling::Replacements::iterator I = RunResult.begin(),
-                                             E = RunResult.end();
-             I != E; ++I) {
-          llvm::dbgs() << I->toString() << "\n";
-        }
-      });
-      for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
-        delete AnnotatedLines[i];
-      }
-      Result.insert(RunResult.begin(), RunResult.end());
-    }
-    return Result;
-  }
-
-protected:
-  virtual tooling::Replacements
-  analyze(TokenAnnotator &Annotator,
-          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
-          FormatTokenLexer &Tokens, tooling::Replacements &Result) = 0;
-
-  void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
-    assert(!UnwrappedLines.empty());
-    UnwrappedLines.back().push_back(TheLine);
-  }
-
-  void finishRun() override {
-    UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
-  }
-
-  FormatStyle Style;
-  // Stores Style, FileID and SourceManager etc.
-  const Environment &Env;
-  // AffectedRangeMgr stores ranges to be fixed.
-  AffectedRangeManager AffectedRangeMgr;
-  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
-  encoding::Encoding Encoding;
-};
-
  class Formatter : public TokenAnalyzer {
  public:
    Formatter(const Environment &Env, const FormatStyle &Style,
@@ -1974,7 +1207,7 @@ static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
  // Sorts a block of includes given by 'Includes' alphabetically adding the
  // necessary replacement to 'Replaces'. 'Includes' must be in strict source
  // order.
-static void sortIncludes(const FormatStyle &Style,
+static void sortCppIncludes(const FormatStyle &Style,
                           const SmallVectorImpl<IncludeDirective> &Includes,
                           ArrayRef<tooling::Range> Ranges, StringRef FileName,
                           tooling::Replacements &Replaces, unsigned *Cursor) {
@@ -2029,13 +1262,11 @@ static void sortIncludes(const FormatStyle &Style,
                                         result.size(), result));
  }
  
-tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
-                                   ArrayRef<tooling::Range> Ranges,
-                                   StringRef FileName, unsigned *Cursor) {
-  tooling::Replacements Replaces;
-  if (!Style.SortIncludes)
-    return Replaces;
-
+tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
+                                      ArrayRef<tooling::Range> Ranges,
+                                      StringRef FileName,
+                                      tooling::Replacements &Replaces,
+                                      unsigned *Cursor) {
    unsigned Prev = 0;
    unsigned SearchFrom = 0;
    llvm::Regex IncludeRegex(
@@ -2101,8 +1332,8 @@ tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
          }
          IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
        } else if (!IncludesInBlock.empty()) {
-        sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
-                     Cursor);
+        sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
+                        Cursor);
          IncludesInBlock.clear();
          FirstIncludeBlock = false;
        }
@@ -2113,7 +1344,19 @@ tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
      SearchFrom = Pos + 1;
    }
    if (!IncludesInBlock.empty())
-    sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
+    sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
+  return Replaces;
+}
+
+tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
+                                   ArrayRef<tooling::Range> Ranges,
+                                   StringRef FileName, unsigned *Cursor) {
+  tooling::Replacements Replaces;
+  if (!Style.SortIncludes)
+    return Replaces;
+  if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript)
+    return sortJavaScriptImports(Style, Code, Ranges, FileName);
+  sortCppIncludes(Style, Code, Ranges, FileName, Replaces, Cursor);
    return Replaces;
  }
  
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h

index d80bd0af5f006363152eee5c63b27910f9abfafd..8052e5efd1fa03d5a6fb247814f47a3a3cba9b50 100644 (file)
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h
@@ -535,6 +535,7 @@ struct AdditionalKeywords {
      kw_NS_ENUM = &IdentTable.get("NS_ENUM");
      kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS");
  
+    kw_as = &IdentTable.get("as");
      kw_async = &IdentTable.get("async");
      kw_await = &IdentTable.get("await");
      kw_finally = &IdentTable.get("finally");
@@ -585,6 +586,7 @@ struct AdditionalKeywords {
    IdentifierInfo *kw___except;
  
    // JavaScript keywords.
+  IdentifierInfo *kw_as;
    IdentifierInfo *kw_async;
    IdentifierInfo *kw_await;
    IdentifierInfo *kw_finally;
diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp

new file mode 100644 (file)

index 0000000..9778f84
--- /dev/null
+++ b/lib/Format/FormatTokenLexer.cpp
@@ -0,0 +1,597 @@
+//===--- FormatTokenLexer.cpp - Lex FormatTokens -------------*- C++ ----*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements FormatTokenLexer, which tokenizes a source file
+/// into a FormatToken stream suitable for ClangFormat.
+///
+//===----------------------------------------------------------------------===//
+
+#include "FormatTokenLexer.h"
+#include "FormatToken.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
+
+namespace clang {
+namespace format {
+
+FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
+                                   const FormatStyle &Style,
+                                   encoding::Encoding Encoding)
+    : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
+      LessStashed(false), Column(0), TrailingWhitespace(0),
+      SourceMgr(SourceMgr), ID(ID), Style(Style),
+      IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
+      Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
+      MacroBlockBeginRegex(Style.MacroBlockBegin),
+      MacroBlockEndRegex(Style.MacroBlockEnd) {
+  Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
+                      getFormattingLangOpts(Style)));
+  Lex->SetKeepWhitespaceMode(true);
+
+  for (const std::string &ForEachMacro : Style.ForEachMacros)
+    ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
+  std::sort(ForEachMacros.begin(), ForEachMacros.end());
+}
+
+ArrayRef<FormatToken *> FormatTokenLexer::lex() {
+  assert(Tokens.empty());
+  assert(FirstInLineIndex == 0);
+  do {
+    Tokens.push_back(getNextToken());
+    if (Style.Language == FormatStyle::LK_JavaScript) {
+      tryParseJSRegexLiteral();
+      tryParseTemplateString();
+    }
+    tryMergePreviousTokens();
+    if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
+      FirstInLineIndex = Tokens.size() - 1;
+  } while (Tokens.back()->Tok.isNot(tok::eof));
+  return Tokens;
+}
+
+void FormatTokenLexer::tryMergePreviousTokens() {
+  if (tryMerge_TMacro())
+    return;
+  if (tryMergeConflictMarkers())
+    return;
+  if (tryMergeLessLess())
+    return;
+
+  if (Style.Language == FormatStyle::LK_JavaScript) {
+    static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
+    static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
+                                                   tok::equal};
+    static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
+                                                  tok::greaterequal};
+    static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
+    // FIXME: Investigate what token type gives the correct operator priority.
+    if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
+      return;
+    if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
+      return;
+    if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
+      return;
+    if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
+      return;
+  }
+}
+
+bool FormatTokenLexer::tryMergeLessLess() {
+  // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
+  if (Tokens.size() < 3)
+    return false;
+
+  bool FourthTokenIsLess = false;
+  if (Tokens.size() > 3)
+    FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
+
+  auto First = Tokens.end() - 3;
+  if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
+      First[0]->isNot(tok::less) || FourthTokenIsLess)
+    return false;
+
+  // Only merge if there currently is no whitespace between the two "<".
+  if (First[1]->WhitespaceRange.getBegin() !=
+      First[1]->WhitespaceRange.getEnd())
+    return false;
+
+  First[0]->Tok.setKind(tok::lessless);
+  First[0]->TokenText = "<<";
+  First[0]->ColumnWidth += 1;
+  Tokens.erase(Tokens.end() - 2);
+  return true;
+}
+
+bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
+                                      TokenType NewType) {
+  if (Tokens.size() < Kinds.size())
+    return false;
+
+  SmallVectorImpl<FormatToken *>::const_iterator First =
+      Tokens.end() - Kinds.size();
+  if (!First[0]->is(Kinds[0]))
+    return false;
+  unsigned AddLength = 0;
+  for (unsigned i = 1; i < Kinds.size(); ++i) {
+    if (!First[i]->is(Kinds[i]) ||
+        First[i]->WhitespaceRange.getBegin() !=
+            First[i]->WhitespaceRange.getEnd())
+      return false;
+    AddLength += First[i]->TokenText.size();
+  }
+  Tokens.resize(Tokens.size() - Kinds.size() + 1);
+  First[0]->TokenText = StringRef(First[0]->TokenText.data(),
+                                  First[0]->TokenText.size() + AddLength);
+  First[0]->ColumnWidth += AddLength;
+  First[0]->Type = NewType;
+  return true;
+}
+
+// Returns \c true if \p Tok can only be followed by an operand in JavaScript.
+bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
+  // NB: This is not entirely correct, as an r_paren can introduce an operand
+  // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
+  // corner case to not matter in practice, though.
+  return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
+                      tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
+                      tok::colon, tok::question, tok::tilde) ||
+         Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
+                      tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
+                      tok::kw_typeof, Keywords.kw_instanceof, Keywords.kw_in) ||
+         Tok->isBinaryOperator();
+}
+
+bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
+  if (!Prev)
+    return true;
+
+  // Regex literals can only follow after prefix unary operators, not after
+  // postfix unary operators. If the '++' is followed by a non-operand
+  // introducing token, the slash here is the operand and not the start of a
+  // regex.
+  if (Prev->isOneOf(tok::plusplus, tok::minusminus))
+    return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
+
+  // The previous token must introduce an operand location where regex
+  // literals can occur.
+  if (!precedesOperand(Prev))
+    return false;
+
+  return true;
+}
+
+// Tries to parse a JavaScript Regex literal starting at the current token,
+// if that begins with a slash and is in a location where JavaScript allows
+// regex literals. Changes the current token to a regex literal and updates
+// its text if successful.
+void FormatTokenLexer::tryParseJSRegexLiteral() {
+  FormatToken *RegexToken = Tokens.back();
+  if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
+    return;
+
+  FormatToken *Prev = nullptr;
+  for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
+    // NB: Because previous pointers are not initialized yet, this cannot use
+    // Token.getPreviousNonComment.
+    if ((*I)->isNot(tok::comment)) {
+      Prev = *I;
+      break;
+    }
+  }
+
+  if (!canPrecedeRegexLiteral(Prev))
+    return;
+
+  // 'Manually' lex ahead in the current file buffer.
+  const char *Offset = Lex->getBufferLocation();
+  const char *RegexBegin = Offset - RegexToken->TokenText.size();
+  StringRef Buffer = Lex->getBuffer();
+  bool InCharacterClass = false;
+  bool HaveClosingSlash = false;
+  for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
+    // Regular expressions are terminated with a '/', which can only be
+    // escaped using '\' or a character class between '[' and ']'.
+    // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
+    switch (*Offset) {
+    case '\\':
+      // Skip the escaped character.
+      ++Offset;
+      break;
+    case '[':
+      InCharacterClass = true;
+      break;
+    case ']':
+      InCharacterClass = false;
+      break;
+    case '/':
+      if (!InCharacterClass)
+        HaveClosingSlash = true;
+      break;
+    }
+  }
+
+  RegexToken->Type = TT_RegexLiteral;
+  // Treat regex literals like other string_literals.
+  RegexToken->Tok.setKind(tok::string_literal);
+  RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
+  RegexToken->ColumnWidth = RegexToken->TokenText.size();
+
+  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
+}
+
+void FormatTokenLexer::tryParseTemplateString() {
+  FormatToken *BacktickToken = Tokens.back();
+  if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
+    return;
+
+  // 'Manually' lex ahead in the current file buffer.
+  const char *Offset = Lex->getBufferLocation();
+  const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
+  for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
+    if (*Offset == '\\')
+      ++Offset; // Skip the escaped character.
+  }
+
+  StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
+  BacktickToken->Type = TT_TemplateString;
+  BacktickToken->Tok.setKind(tok::string_literal);
+  BacktickToken->TokenText = LiteralText;
+
+  // Adjust width for potentially multiline string literals.
+  size_t FirstBreak = LiteralText.find('\n');
+  StringRef FirstLineText = FirstBreak == StringRef::npos
+                                ? LiteralText
+                                : LiteralText.substr(0, FirstBreak);
+  BacktickToken->ColumnWidth = encoding::columnWidthWithTabs(
+      FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);
+  size_t LastBreak = LiteralText.rfind('\n');
+  if (LastBreak != StringRef::npos) {
+    BacktickToken->IsMultiline = true;
+    unsigned StartColumn = 0; // The template tail spans the entire line.
+    BacktickToken->LastLineColumnWidth = encoding::columnWidthWithTabs(
+        LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
+        Style.TabWidth, Encoding);
+  }
+
+  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
+}
+
+bool FormatTokenLexer::tryMerge_TMacro() {
+  if (Tokens.size() < 4)
+    return false;
+  FormatToken *Last = Tokens.back();
+  if (!Last->is(tok::r_paren))
+    return false;
+
+  FormatToken *String = Tokens[Tokens.size() - 2];
+  if (!String->is(tok::string_literal) || String->IsMultiline)
+    return false;
+
+  if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
+    return false;
+
+  FormatToken *Macro = Tokens[Tokens.size() - 4];
+  if (Macro->TokenText != "_T")
+    return false;
+
+  const char *Start = Macro->TokenText.data();
+  const char *End = Last->TokenText.data() + Last->TokenText.size();
+  String->TokenText = StringRef(Start, End - Start);
+  String->IsFirst = Macro->IsFirst;
+  String->LastNewlineOffset = Macro->LastNewlineOffset;
+  String->WhitespaceRange = Macro->WhitespaceRange;
+  String->OriginalColumn = Macro->OriginalColumn;
+  String->ColumnWidth = encoding::columnWidthWithTabs(
+      String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
+  String->NewlinesBefore = Macro->NewlinesBefore;
+  String->HasUnescapedNewline = Macro->HasUnescapedNewline;
+
+  Tokens.pop_back();
+  Tokens.pop_back();
+  Tokens.pop_back();
+  Tokens.back() = String;
+  return true;
+}
+
+bool FormatTokenLexer::tryMergeConflictMarkers() {
+  if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
+    return false;
+
+  // Conflict lines look like:
+  // <marker> <text from the vcs>
+  // For example:
+  // >>>>>>> /file/in/file/system at revision 1234
+  //
+  // We merge all tokens in a line that starts with a conflict marker
+  // into a single token with a special token type that the unwrapped line
+  // parser will use to correctly rebuild the underlying code.
+
+  FileID ID;
+  // Get the position of the first token in the line.
+  unsigned FirstInLineOffset;
+  std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
+      Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
+  StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
+  // Calculate the offset of the start of the current line.
+  auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
+  if (LineOffset == StringRef::npos) {
+    LineOffset = 0;
+  } else {
+    ++LineOffset;
+  }
+
+  auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
+  StringRef LineStart;
+  if (FirstSpace == StringRef::npos) {
+    LineStart = Buffer.substr(LineOffset);
+  } else {
+    LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
+  }
+
+  TokenType Type = TT_Unknown;
+  if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
+    Type = TT_ConflictStart;
+  } else if (LineStart == "|||||||" || LineStart == "=======" ||
+             LineStart == "====") {
+    Type = TT_ConflictAlternative;
+  } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
+    Type = TT_ConflictEnd;
+  }
+
+  if (Type != TT_Unknown) {
+    FormatToken *Next = Tokens.back();
+
+    Tokens.resize(FirstInLineIndex + 1);
+    // We do not need to build a complete token here, as we will skip it
+    // during parsing anyway (as we must not touch whitespace around conflict
+    // markers).
+    Tokens.back()->Type = Type;
+    Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
+
+    Tokens.push_back(Next);
+    return true;
+  }
+
+  return false;
+}
+
+FormatToken *FormatTokenLexer::getStashedToken() {
+  // Create a synthesized second '>' or '<' token.
+  Token Tok = FormatTok->Tok;
+  StringRef TokenText = FormatTok->TokenText;
+
+  unsigned OriginalColumn = FormatTok->OriginalColumn;
+  FormatTok = new (Allocator.Allocate()) FormatToken;
+  FormatTok->Tok = Tok;
+  SourceLocation TokLocation =
+      FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
+  FormatTok->Tok.setLocation(TokLocation);
+  FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
+  FormatTok->TokenText = TokenText;
+  FormatTok->ColumnWidth = 1;
+  FormatTok->OriginalColumn = OriginalColumn + 1;
+
+  return FormatTok;
+}
+
+FormatToken *FormatTokenLexer::getNextToken() {
+  if (GreaterStashed) {
+    GreaterStashed = false;
+    return getStashedToken();
+  }
+  if (LessStashed) {
+    LessStashed = false;
+    return getStashedToken();
+  }
+
+  FormatTok = new (Allocator.Allocate()) FormatToken;
+  readRawToken(*FormatTok);
+  SourceLocation WhitespaceStart =
+      FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
+  FormatTok->IsFirst = IsFirstToken;
+  IsFirstToken = false;
+
+  // Consume and record whitespace until we find a significant token.
+  unsigned WhitespaceLength = TrailingWhitespace;
+  while (FormatTok->Tok.is(tok::unknown)) {
+    StringRef Text = FormatTok->TokenText;
+    auto EscapesNewline = [&](int pos) {
+      // A '\r' here is just part of '\r\n'. Skip it.
+      if (pos >= 0 && Text[pos] == '\r')
+        --pos;
+      // See whether there is an odd number of '\' before this.
+      unsigned count = 0;
+      for (; pos >= 0; --pos, ++count)
+        if (Text[pos] != '\\')
+          break;
+      return count & 1;
+    };
+    // FIXME: This miscounts tok:unknown tokens that are not just
+    // whitespace, e.g. a '`' character.
+    for (int i = 0, e = Text.size(); i != e; ++i) {
+      switch (Text[i]) {
+      case '\n':
+        ++FormatTok->NewlinesBefore;
+        FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
+        FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
+        Column = 0;
+        break;
+      case '\r':
+        FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
+        Column = 0;
+        break;
+      case '\f':
+      case '\v':
+        Column = 0;
+        break;
+      case ' ':
+        ++Column;
+        break;
+      case '\t':
+        Column += Style.TabWidth - Column % Style.TabWidth;
+        break;
+      case '\\':
+        if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
+          FormatTok->Type = TT_ImplicitStringLiteral;
+        break;
+      default:
+        FormatTok->Type = TT_ImplicitStringLiteral;
+        break;
+      }
+      if (FormatTok->Type == TT_ImplicitStringLiteral)
+        break;
+    }
+
+    if (FormatTok->is(TT_ImplicitStringLiteral))
+      break;
+    WhitespaceLength += FormatTok->Tok.getLength();
+
+    readRawToken(*FormatTok);
+  }
+
+  // In case the token starts with escaped newlines, we want to
+  // take them into account as whitespace - this pattern is quite frequent
+  // in macro definitions.
+  // FIXME: Add a more explicit test.
+  while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
+         FormatTok->TokenText[1] == '\n') {
+    ++FormatTok->NewlinesBefore;
+    WhitespaceLength += 2;
+    FormatTok->LastNewlineOffset = 2;
+    Column = 0;
+    FormatTok->TokenText = FormatTok->TokenText.substr(2);
+  }
+
+  FormatTok->WhitespaceRange = SourceRange(
+      WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
+
+  FormatTok->OriginalColumn = Column;
+
+  TrailingWhitespace = 0;
+  if (FormatTok->Tok.is(tok::comment)) {
+    // FIXME: Add the trimmed whitespace to Column.
+    StringRef UntrimmedText = FormatTok->TokenText;
+    FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
+    TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
+  } else if (FormatTok->Tok.is(tok::raw_identifier)) {
+    IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
+    FormatTok->Tok.setIdentifierInfo(&Info);
+    FormatTok->Tok.setKind(Info.getTokenID());
+    if (Style.Language == FormatStyle::LK_Java &&
+        FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
+                           tok::kw_operator)) {
+      FormatTok->Tok.setKind(tok::identifier);
+      FormatTok->Tok.setIdentifierInfo(nullptr);
+    } else if (Style.Language == FormatStyle::LK_JavaScript &&
+               FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
+                                  tok::kw_operator)) {
+      FormatTok->Tok.setKind(tok::identifier);
+      FormatTok->Tok.setIdentifierInfo(nullptr);
+    }
+  } else if (FormatTok->Tok.is(tok::greatergreater)) {
+    FormatTok->Tok.setKind(tok::greater);
+    FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
+    GreaterStashed = true;
+  } else if (FormatTok->Tok.is(tok::lessless)) {
+    FormatTok->Tok.setKind(tok::less);
+    FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
+    LessStashed = true;
+  }
+
+  // Now FormatTok is the next non-whitespace token.
+
+  StringRef Text = FormatTok->TokenText;
+  size_t FirstNewlinePos = Text.find('\n');
+  if (FirstNewlinePos == StringRef::npos) {
+    // FIXME: ColumnWidth actually depends on the start column, we need to
+    // take this into account when the token is moved.
+    FormatTok->ColumnWidth =
+        encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
+    Column += FormatTok->ColumnWidth;
+  } else {
+    FormatTok->IsMultiline = true;
+    // FIXME: ColumnWidth actually depends on the start column, we need to
+    // take this into account when the token is moved.
+    FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
+        Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
+
+    // The last line of the token always starts in column 0.
+    // Thus, the length can be precomputed even in the presence of tabs.
+    FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
+        Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, Encoding);
+    Column = FormatTok->LastLineColumnWidth;
+  }
+
+  if (Style.Language == FormatStyle::LK_Cpp) {
+    if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
+          Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
+              tok::pp_define) &&
+        std::find(ForEachMacros.begin(), ForEachMacros.end(),
+                  FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) {
+      FormatTok->Type = TT_ForEachMacro;
+    } else if (FormatTok->is(tok::identifier)) {
+      if (MacroBlockBeginRegex.match(Text)) {
+        FormatTok->Type = TT_MacroBlockBegin;
+      } else if (MacroBlockEndRegex.match(Text)) {
+        FormatTok->Type = TT_MacroBlockEnd;
+      }
+    }
+  }
+
+  return FormatTok;
+}
+
+void FormatTokenLexer::readRawToken(FormatToken &Tok) {
+  Lex->LexFromRawLexer(Tok.Tok);
+  Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
+                            Tok.Tok.getLength());
+  // For formatting, treat unterminated string literals like normal string
+  // literals.
+  if (Tok.is(tok::unknown)) {
+    if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
+      Tok.Tok.setKind(tok::string_literal);
+      Tok.IsUnterminatedLiteral = true;
+    } else if (Style.Language == FormatStyle::LK_JavaScript &&
+               Tok.TokenText == "''") {
+      Tok.Tok.setKind(tok::string_literal);
+    }
+  }
+
+  if (Style.Language == FormatStyle::LK_JavaScript &&
+      Tok.is(tok::char_constant)) {
+    Tok.Tok.setKind(tok::string_literal);
+  }
+
+  if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
+                               Tok.TokenText == "/* clang-format on */")) {
+    FormattingDisabled = false;
+  }
+
+  Tok.Finalized = FormattingDisabled;
+
+  if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
+                               Tok.TokenText == "/* clang-format off */")) {
+    FormattingDisabled = true;
+  }
+}
+
+void FormatTokenLexer::resetLexer(unsigned Offset) {
+  StringRef Buffer = SourceMgr.getBufferData(ID);
+  Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
+                      getFormattingLangOpts(Style), Buffer.begin(),
+                      Buffer.begin() + Offset, Buffer.end()));
+  Lex->SetKeepWhitespaceMode(true);
+  TrailingWhitespace = 0;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h

new file mode 100644 (file)

index 0000000..fa8c888
--- /dev/null
+++ b/lib/Format/FormatTokenLexer.h
@@ -0,0 +1,97 @@
+//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains FormatTokenLexer, which tokenizes a source file
+/// into a token stream suitable for ClangFormat.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
+#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
+
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
+
+namespace clang {
+namespace format {
+
+class FormatTokenLexer {
+public:
+  FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
+                   const FormatStyle &Style, encoding::Encoding Encoding);
+
+  ArrayRef<FormatToken *> lex();
+
+  const AdditionalKeywords &getKeywords() { return Keywords; }
+
+private:
+  void tryMergePreviousTokens();
+
+  bool tryMergeLessLess();
+
+  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
+
+  // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
+  bool precedesOperand(FormatToken *Tok);
+
+  bool canPrecedeRegexLiteral(FormatToken *Prev);
+
+  // Tries to parse a JavaScript Regex literal starting at the current token,
+  // if that begins with a slash and is in a location where JavaScript allows
+  // regex literals. Changes the current token to a regex literal and updates
+  // its text if successful.
+  void tryParseJSRegexLiteral();
+
+  void tryParseTemplateString();
+
+  bool tryMerge_TMacro();
+
+  bool tryMergeConflictMarkers();
+
+  FormatToken *getStashedToken();
+
+  FormatToken *getNextToken();
+
+  FormatToken *FormatTok;
+  bool IsFirstToken;
+  bool GreaterStashed, LessStashed;
+  unsigned Column;
+  unsigned TrailingWhitespace;
+  std::unique_ptr<Lexer> Lex;
+  const SourceManager &SourceMgr;
+  FileID ID;
+  const FormatStyle &Style;
+  IdentifierTable IdentTable;
+  AdditionalKeywords Keywords;
+  encoding::Encoding Encoding;
+  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
+  // Index (in 'Tokens') of the last token that starts a new line.
+  unsigned FirstInLineIndex;
+  SmallVector<FormatToken *, 16> Tokens;
+  SmallVector<IdentifierInfo *, 8> ForEachMacros;
+
+  bool FormattingDisabled;
+
+  llvm::Regex MacroBlockBeginRegex;
+  llvm::Regex MacroBlockEndRegex;
+
+  void readRawToken(FormatToken &Tok);
+
+  void resetLexer(unsigned Offset);
+};
+
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/lib/Format/SortJavaScriptImports.cpp b/lib/Format/SortJavaScriptImports.cpp

new file mode 100644 (file)

index 0000000..62a8363
--- /dev/null
+++ b/lib/Format/SortJavaScriptImports.cpp
@@ -0,0 +1,385 @@
+//===--- SortJavaScriptImports.h - Sort ES6 Imports -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a sort operation for JavaScript ES6 imports.
+///
+//===----------------------------------------------------------------------===//
+
+#include "SortJavaScriptImports.h"
+#include "SortJavaScriptImports.h"
+#include "TokenAnalyzer.h"
+#include "TokenAnnotator.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include <string>
+
+#define DEBUG_TYPE "format-formatter"
+
+namespace clang {
+namespace format {
+
+class FormatTokenLexer;
+
+using clang::format::FormatStyle;
+
+// An imported symbol in a JavaScript ES6 import/export, possibly aliased.
+struct JsImportedSymbol {
+  StringRef Symbol;
+  StringRef Alias;
+};
+
+// An ES6 module reference.
+//
+// ES6 implements a module system, where individual modules (~= source files)
+// can reference other modules, either importing symbols from them, or exporting
+// symbols from them:
+//   import {foo} from 'foo';
+//   export {foo};
+//   export {bar} from 'bar';
+//
+// `export`s with URLs are syntactic sugar for an import of the symbol from the
+// URL, followed by an export of the symbol, allowing this code to treat both
+// statements more or less identically, with the exception being that `export`s
+// are sorted last.
+//
+// imports and exports support individual symbols, but also a wildcard syntax:
+//   import * as prefix from 'foo';
+//   export * from 'bar';
+//
+// This struct represents both exports and imports to build up the information
+// required for sorting module references.
+struct JsModuleReference {
+  bool IsExport = false;
+  // Module references are sorted into these categories, in order.
+  enum ReferenceCategory {
+    SIDE_EFFECT,     // "import 'something';"
+    ABSOLUTE,        // from 'something'
+    RELATIVE_PARENT, // from '../*'
+    RELATIVE,        // from './*'
+  };
+  ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT;
+  // The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`.
+  StringRef URL;
+  // Prefix from "import * as prefix". Empty for symbol imports and `export *`.
+  // Implies an empty names list.
+  StringRef Prefix;
+  // Symbols from `import {SymbolA, SymbolB, ...} from ...;`.
+  SmallVector<JsImportedSymbol, 1> Symbols;
+  // Textual position of the import/export, including preceding and trailing
+  // comments.
+  SourceRange Range;
+};
+
+bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) {
+  if (LHS.IsExport != RHS.IsExport)
+    return LHS.IsExport < RHS.IsExport;
+  if (LHS.Category != RHS.Category)
+    return LHS.Category < RHS.Category;
+  if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT)
+    // Side effect imports might be ordering sensitive. Consider them equal so
+    // that they maintain their relative order in the stable sort below.
+    // This retains transitivity because LHS.Category == RHS.Category here.
+    return false;
+  // Empty URLs sort *last* (for export {...};).
+  if (LHS.URL.empty() != RHS.URL.empty())
+    return LHS.URL.empty() < RHS.URL.empty();
+  if (LHS.URL != RHS.URL)
+    return LHS.URL < RHS.URL;
+  // '*' imports (with prefix) sort before {a, b, ...} imports.
+  if (LHS.Prefix.empty() != RHS.Prefix.empty())
+    return LHS.Prefix.empty() < RHS.Prefix.empty();
+  if (LHS.Prefix != RHS.Prefix)
+    return LHS.Prefix > RHS.Prefix;
+  return false;
+}
+
+// JavaScriptImportSorter sorts JavaScript ES6 imports and exports. It is
+// implemented as a TokenAnalyzer because ES6 imports have substantial syntactic
+// structure, making it messy to sort them using regular expressions.
+class JavaScriptImportSorter : public TokenAnalyzer {
+public:
+  JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style)
+      : TokenAnalyzer(Env, Style),
+        FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {}
+
+  tooling::Replacements
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens, tooling::Replacements &Result) override {
+    AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+                                          AnnotatedLines.end());
+
+    const AdditionalKeywords &Keywords = Tokens.getKeywords();
+    SmallVector<JsModuleReference, 16> References;
+    AnnotatedLine *FirstNonImportLine;
+    std::tie(References, FirstNonImportLine) =
+        parseModuleReferences(Keywords, AnnotatedLines);
+
+    if (References.empty())
+      return Result;
+
+    SmallVector<unsigned, 16> Indices;
+    for (unsigned i = 0, e = References.size(); i != e; ++i)
+      Indices.push_back(i);
+    std::stable_sort(Indices.begin(), Indices.end(),
+                     [&](unsigned LHSI, unsigned RHSI) {
+                       return References[LHSI] < References[RHSI];
+                     });
+    // FIXME: Pull this into a common function.
+    bool OutOfOrder = false;
+    for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+      if (i != Indices[i]) {
+        OutOfOrder = true;
+        break;
+      }
+    }
+    if (!OutOfOrder)
+      return Result;
+
+    // Replace all existing import/export statements.
+    std::string ReferencesText;
+    for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+      JsModuleReference Reference = References[Indices[i]];
+      StringRef ReferenceStmt = getSourceText(Reference.Range);
+      ReferencesText += ReferenceStmt;
+      if (i + 1 < e) {
+        // Insert breaks between imports and exports.
+        ReferencesText += "\n";
+        // Separate imports groups with two line breaks, but keep all exports
+        // in a single group.
+        if (!Reference.IsExport &&
+            (Reference.IsExport != References[Indices[i + 1]].IsExport ||
+             Reference.Category != References[Indices[i + 1]].Category))
+          ReferencesText += "\n";
+      }
+    }
+    // Separate references from the main code body of the file.
+    if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2)
+      ReferencesText += "\n";
+
+    SourceRange InsertionPoint = References[0].Range;
+    InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());
+    DEBUG(llvm::dbgs() << "Replacing imports:\n"
+                       << getSourceText(InsertionPoint) << "\nwith:\n"
+                       << ReferencesText << "\n");
+    Result.insert(tooling::Replacement(
+        Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint),
+        ReferencesText));
+
+    return Result;
+  }
+
+private:
+  FormatToken *Current;
+  FormatToken *LineEnd;
+
+  FormatToken invalidToken;
+
+  StringRef FileContents;
+
+  void skipComments() { Current = skipComments(Current); }
+
+  FormatToken *skipComments(FormatToken *Tok) {
+    while (Tok && Tok->is(tok::comment))
+      Tok = Tok->Next;
+    return Tok;
+  }
+
+  void nextToken() {
+    Current = Current->Next;
+    skipComments();
+    if (!Current || Current == LineEnd->Next) {
+      // Set the current token to an invalid token, so that further parsing on
+      // this line fails.
+      invalidToken.Tok.setKind(tok::unknown);
+      Current = &invalidToken;
+    }
+  }
+
+  StringRef getSourceText(SourceRange Range) {
+    const SourceManager &SM = Env.getSourceManager();
+    return FileContents.substr(SM.getFileOffset(Range.getBegin()),
+                               SM.getFileOffset(Range.getEnd()) -
+                                   SM.getFileOffset(Range.getBegin()));
+  }
+
+  // Parses module references in the given lines. Returns the module references,
+  // and a pointer to the first "main code" line if that is adjacent to the
+  // affected lines of module references, nullptr otherwise.
+  std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine*>
+  parseModuleReferences(const AdditionalKeywords &Keywords,
+                        SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
+    SmallVector<JsModuleReference, 16> References;
+    SourceLocation Start;
+    bool FoundLines = false;
+    AnnotatedLine *FirstNonImportLine = nullptr;
+    for (auto Line : AnnotatedLines) {
+      if (!Line->Affected) {
+        // Only sort the first contiguous block of affected lines.
+        if (FoundLines)
+          break;
+        else
+          continue;
+      }
+      Current = Line->First;
+      LineEnd = Line->Last;
+      skipComments();
+      if (Start.isInvalid() || References.empty())
+        // After the first file level comment, consider line comments to be part
+        // of the import that immediately follows them by using the previously
+        // set Start.
+        Start = Line->First->Tok.getLocation();
+      if (!Current)
+        continue; // Only comments on this line.
+      FoundLines = true;
+      JsModuleReference Reference;
+      Reference.Range.setBegin(Start);
+      if (!parseModuleReference(Keywords, Reference)) {
+        FirstNonImportLine = Line;
+        break;
+      }
+      Reference.Range.setEnd(LineEnd->Tok.getEndLoc());
+      DEBUG({
+        llvm::dbgs() << "JsModuleReference: {"
+                     << "is_export: " << Reference.IsExport
+                     << ", cat: " << Reference.Category
+                     << ", url: " << Reference.URL
+                     << ", prefix: " << Reference.Prefix;
+        for (size_t i = 0; i < Reference.Symbols.size(); ++i)
+          llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as "
+                       << Reference.Symbols[i].Alias;
+        llvm::dbgs() << ", text: " << getSourceText(Reference.Range);
+        llvm::dbgs() << "}\n";
+      });
+      References.push_back(Reference);
+      Start = SourceLocation();
+    }
+    return std::make_pair(References, FirstNonImportLine);
+  }
+
+  // Parses a JavaScript/ECMAScript 6 module reference.
+  // See http://www.ecma-international.org/ecma-262/6.0/#sec-scripts-and-modules
+  // for grammar EBNF (production ModuleItem).
+  bool parseModuleReference(const AdditionalKeywords &Keywords,
+                            JsModuleReference &Reference) {
+    if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export))
+      return false;
+    Reference.IsExport = Current->is(tok::kw_export);
+
+    nextToken();
+    if (Current->isStringLiteral() && !Reference.IsExport) {
+      // "import 'side-effect';"
+      Reference.Category = JsModuleReference::ReferenceCategory::SIDE_EFFECT;
+      Reference.URL =
+          Current->TokenText.substr(1, Current->TokenText.size() - 2);
+      return true;
+    }
+
+    if (!parseModuleBindings(Keywords, Reference))
+      return false;
+    nextToken();
+
+    if (Current->is(Keywords.kw_from)) {
+      // imports have a 'from' clause, exports might not.
+      nextToken();
+      if (!Current->isStringLiteral())
+        return false;
+      // URL = TokenText without the quotes.
+      Reference.URL =
+          Current->TokenText.substr(1, Current->TokenText.size() - 2);
+      if (Reference.URL.startswith(".."))
+        Reference.Category =
+            JsModuleReference::ReferenceCategory::RELATIVE_PARENT;
+      else if (Reference.URL.startswith("."))
+        Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
+      else
+        Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE;
+    } else {
+      // w/o URL groups with "empty".
+      Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
+    }
+    return true;
+  }
+
+  bool parseModuleBindings(const AdditionalKeywords &Keywords,
+                           JsModuleReference &Reference) {
+    if (parseStarBinding(Keywords, Reference))
+      return true;
+    return parseNamedBindings(Keywords, Reference);
+  }
+
+  bool parseStarBinding(const AdditionalKeywords &Keywords,
+                        JsModuleReference &Reference) {
+    // * as prefix from '...';
+    if (Current->isNot(tok::star))
+      return false;
+    nextToken();
+    if (Current->isNot(Keywords.kw_as))
+      return false;
+    nextToken();
+    if (Current->isNot(tok::identifier))
+      return false;
+    Reference.Prefix = Current->TokenText;
+    return true;
+  }
+
+  bool parseNamedBindings(const AdditionalKeywords &Keywords,
+                          JsModuleReference &Reference) {
+    if (Current->isNot(tok::l_brace))
+      return false;
+
+    // {sym as alias, sym2 as ...} from '...';
+    nextToken();
+    while (true) {
+      if (Current->isNot(tok::identifier))
+        return false;
+
+      JsImportedSymbol Symbol;
+      Symbol.Symbol = Current->TokenText;
+      nextToken();
+
+      if (Current->is(Keywords.kw_as)) {
+        nextToken();
+        if (Current->isNot(tok::identifier))
+          return false;
+        Symbol.Alias = Current->TokenText;
+        nextToken();
+      }
+      Reference.Symbols.push_back(Symbol);
+
+      if (Current->is(tok::r_brace))
+        return true;
+      if (Current->isNot(tok::comma))
+        return false;
+      nextToken();
+    }
+  }
+};
+
+tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
+                                            StringRef Code,
+                                            ArrayRef<tooling::Range> Ranges,
+                                            StringRef FileName) {
+  // FIXME: Cursor support.
+  std::unique_ptr<Environment> Env =
+      Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+  JavaScriptImportSorter Sorter(*Env, Style);
+  return Sorter.process();
+}
+
+} // end namespace format
+} // end namespace clang
diff --git a/lib/Format/SortJavaScriptImports.h b/lib/Format/SortJavaScriptImports.h

new file mode 100644 (file)

index 0000000..f22a051
--- /dev/null
+++ b/lib/Format/SortJavaScriptImports.h
@@ -0,0 +1,36 @@
+//===--- SortJavaScriptImports.h - Sort ES6 Imports -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a sorter for JavaScript ES6 imports.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H
+#define LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H
+
+#include "clang/Basic/LLVM.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+namespace format {
+
+// Sort JavaScript ES6 imports/exports in ``Code``. The generated replacements
+// only monotonically increase the length of the given code.
+tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
+                                            StringRef Code,
+                                            ArrayRef<tooling::Range> Ranges,
+                                            StringRef FileName);
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/lib/Format/TokenAnalyzer.cpp b/lib/Format/TokenAnalyzer.cpp

new file mode 100644 (file)

index 0000000..89ac35f
--- /dev/null
+++ b/lib/Format/TokenAnalyzer.cpp
@@ -0,0 +1,138 @@
+//===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements an abstract TokenAnalyzer and associated helper
+/// classes. TokenAnalyzer can be extended to generate replacements based on
+/// an annotated and pre-processed token stream.
+///
+//===----------------------------------------------------------------------===//
+
+#include "TokenAnalyzer.h"
+#include "AffectedRangeManager.h"
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "FormatTokenLexer.h"
+#include "TokenAnnotator.h"
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "format-formatter"
+
+namespace clang {
+namespace format {
+
+// This sets up an virtual file system with file \p FileName containing \p
+// Code.
+std::unique_ptr<Environment>
+Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,
+                                      ArrayRef<tooling::Range> Ranges) {
+  // This is referenced by `FileMgr` and will be released by `FileMgr` when it
+  // is deleted.
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  // This is passed to `SM` as reference, so the pointer has to be referenced
+  // in `Environment` so that `FileMgr` can out-live this function scope.
+  std::unique_ptr<FileManager> FileMgr(
+      new FileManager(FileSystemOptions(), InMemoryFileSystem));
+  // This is passed to `SM` as reference, so the pointer has to be referenced
+  // by `Environment` due to the same reason above.
+  std::unique_ptr<DiagnosticsEngine> Diagnostics(new DiagnosticsEngine(
+      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
+      new DiagnosticOptions));
+  // This will be stored as reference, so the pointer has to be stored in
+  // due to the same reason above.
+  std::unique_ptr<SourceManager> VirtualSM(
+      new SourceManager(*Diagnostics, *FileMgr));
+  InMemoryFileSystem->addFile(
+      FileName, 0, llvm::MemoryBuffer::getMemBuffer(
+                       Code, FileName, /*RequiresNullTerminator=*/false));
+  FileID ID = VirtualSM->createFileID(FileMgr->getFile(FileName),
+                                      SourceLocation(), clang::SrcMgr::C_User);
+  assert(ID.isValid());
+  SourceLocation StartOfFile = VirtualSM->getLocForStartOfFile(ID);
+  std::vector<CharSourceRange> CharRanges;
+  for (const tooling::Range &Range : Ranges) {
+    SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
+    SourceLocation End = Start.getLocWithOffset(Range.getLength());
+    CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
+  }
+  return llvm::make_unique<Environment>(ID, std::move(FileMgr),
+                                        std::move(VirtualSM),
+                                        std::move(Diagnostics), CharRanges);
+}
+
+TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
+    : Style(Style), Env(Env),
+      AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
+      UnwrappedLines(1),
+      Encoding(encoding::detectEncoding(
+          Env.getSourceManager().getBufferData(Env.getFileID()))) {
+  DEBUG(
+      llvm::dbgs() << "File encoding: "
+                   << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
+                   << "\n");
+  DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
+                     << "\n");
+}
+
+tooling::Replacements TokenAnalyzer::process() {
+  tooling::Replacements Result;
+  FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style,
+                          Encoding);
+
+  UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this);
+  Parser.parse();
+  assert(UnwrappedLines.rbegin()->empty());
+  for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
+    DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
+    SmallVector<AnnotatedLine *, 16> AnnotatedLines;
+
+    TokenAnnotator Annotator(Style, Tokens.getKeywords());
+    for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
+      AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
+      Annotator.annotate(*AnnotatedLines.back());
+    }
+
+    tooling::Replacements RunResult =
+        analyze(Annotator, AnnotatedLines, Tokens, Result);
+
+    DEBUG({
+      llvm::dbgs() << "Replacements for run " << Run << ":\n";
+      for (tooling::Replacements::iterator I = RunResult.begin(),
+                                           E = RunResult.end();
+           I != E; ++I) {
+        llvm::dbgs() << I->toString() << "\n";
+      }
+    });
+    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+      delete AnnotatedLines[i];
+    }
+    Result.insert(RunResult.begin(), RunResult.end());
+  }
+  return Result;
+}
+
+void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) {
+  assert(!UnwrappedLines.empty());
+  UnwrappedLines.back().push_back(TheLine);
+}
+
+void TokenAnalyzer::finishRun() {
+  UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
+}
+
+} // end namespace format
+} // end namespace clang
diff --git a/lib/Format/TokenAnalyzer.h b/lib/Format/TokenAnalyzer.h

new file mode 100644 (file)

index 0000000..c1aa9c5
--- /dev/null
+++ b/lib/Format/TokenAnalyzer.h
@@ -0,0 +1,108 @@
+//===--- TokenAnalyzer.h - Analyze Token Streams ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares an abstract TokenAnalyzer, and associated helper
+/// classes. TokenAnalyzer can be extended to generate replacements based on
+/// an annotated and pre-processed token stream.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H
+#define LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H
+
+#include "AffectedRangeManager.h"
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "FormatTokenLexer.h"
+#include "TokenAnnotator.h"
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "format-formatter"
+
+namespace clang {
+namespace format {
+
+class Environment {
+public:
+  Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges)
+      : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {}
+
+  Environment(FileID ID, std::unique_ptr<FileManager> FileMgr,
+              std::unique_ptr<SourceManager> VirtualSM,
+              std::unique_ptr<DiagnosticsEngine> Diagnostics,
+              const std::vector<CharSourceRange> &CharRanges)
+      : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()),
+        SM(*VirtualSM), FileMgr(std::move(FileMgr)),
+        VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {}
+
+  // This sets up an virtual file system with file \p FileName containing \p
+  // Code.
+  static std::unique_ptr<Environment>
+  CreateVirtualEnvironment(StringRef Code, StringRef FileName,
+                           ArrayRef<tooling::Range> Ranges);
+
+  FileID getFileID() const { return ID; }
+
+  StringRef getFileName() const { return FileName; }
+
+  ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
+
+  const SourceManager &getSourceManager() const { return SM; }
+
+private:
+  FileID ID;
+  StringRef FileName;
+  SmallVector<CharSourceRange, 8> CharRanges;
+  SourceManager &SM;
+
+  // The order of these fields are important - they should be in the same order
+  // as they are created in `CreateVirtualEnvironment` so that they can be
+  // deleted in the reverse order as they are created.
+  std::unique_ptr<FileManager> FileMgr;
+  std::unique_ptr<SourceManager> VirtualSM;
+  std::unique_ptr<DiagnosticsEngine> Diagnostics;
+};
+
+class TokenAnalyzer : public UnwrappedLineConsumer {
+public:
+  TokenAnalyzer(const Environment &Env, const FormatStyle &Style);
+
+  tooling::Replacements process();
+
+protected:
+  virtual tooling::Replacements
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens, tooling::Replacements &Result) = 0;
+
+  void consumeUnwrappedLine(const UnwrappedLine &TheLine) override;
+
+  void finishRun() override;
+
+  FormatStyle Style;
+  // Stores Style, FileID and SourceManager etc.
+  const Environment &Env;
+  // AffectedRangeMgr stores ranges to be fixed.
+  AffectedRangeManager AffectedRangeMgr;
+  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
+  encoding::Encoding Encoding;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/unittests/Format/CMakeLists.txt b/unittests/Format/CMakeLists.txt

index 4cf8b731a0d9de881a149bb135c162fd08da078c..240be6ead2a5e149f2d8f00b6a22b06450410b9f 100644 (file)
--- a/unittests/Format/CMakeLists.txt
+++ b/unittests/Format/CMakeLists.txt
@@ -9,6 +9,7 @@ add_clang_unittest(FormatTests
    FormatTestJS.cpp
    FormatTestProto.cpp
    FormatTestSelective.cpp
+  SortImportsTestJS.cpp
    SortIncludesTest.cpp
    )
  
diff --git a/unittests/Format/SortImportsTestJS.cpp b/unittests/Format/SortImportsTestJS.cpp

new file mode 100644 (file)

index 0000000..ad692e6
--- /dev/null
+++ b/unittests/Format/SortImportsTestJS.cpp
@@ -0,0 +1,201 @@
+//===- unittest/Format/SortImportsTestJS.cpp - JS import sort unit tests --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FormatTestUtils.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Debug.h"
+#include "gtest/gtest.h"
+
+#define DEBUG_TYPE "format-test"
+
+namespace clang {
+namespace format {
+namespace {
+
+class SortImportsTestJS : public ::testing::Test {
+protected:
+  std::string sort(StringRef Code, unsigned Offset = 0, unsigned Length = 0) {
+    StringRef FileName = "input.js";
+    if (Length == 0U)
+      Length = Code.size() - Offset;
+    std::vector<tooling::Range> Ranges(1, tooling::Range(Offset, Length));
+    std::string Sorted =
+        applyAllReplacements(Code, sortIncludes(Style, Code, Ranges, FileName));
+    return applyAllReplacements(Sorted,
+                                reformat(Style, Sorted, Ranges, FileName));
+  }
+
+  void verifySort(llvm::StringRef Expected, llvm::StringRef Code,
+                  unsigned Offset = 0, unsigned Length = 0) {
+    std::string Result = sort(Code, Offset, Length);
+    EXPECT_EQ(Expected.str(), Result) << "Expected:\n"
+                                      << Expected << "\nActual:\n"
+                                      << Result;
+  }
+
+  FormatStyle Style = getGoogleStyle(FormatStyle::LK_JavaScript);
+};
+
+TEST_F(SortImportsTestJS, AlreadySorted) {
+  verifySort("import {sym} from 'a';\n"
+             "import {sym} from 'b';\n"
+             "import {sym} from 'c';\n"
+             "\n"
+             "let x = 1;",
+             "import {sym} from 'a';\n"
+             "import {sym} from 'b';\n"
+             "import {sym} from 'c';\n"
+             "\n"
+             "let x = 1;");
+}
+
+TEST_F(SortImportsTestJS, BasicSorting) {
+  verifySort("import {sym} from 'a';\n"
+             "import {sym} from 'b';\n"
+             "import {sym} from 'c';\n"
+             "\n"
+             "let x = 1;",
+             "import {sym} from 'a';\n"
+             "import {sym} from 'c';\n"
+             "import {sym} from 'b';\n"
+             "let x = 1;");
+}
+
+TEST_F(SortImportsTestJS, SeparateMainCodeBody) {
+  verifySort("import {sym} from 'a';"
+             "\n"
+             "let x = 1;\n",
+             "import {sym} from 'a'; let x = 1;\n");
+}
+
+TEST_F(SortImportsTestJS, Comments) {
+  verifySort("/** @fileoverview This is a great file. */\n"
+             "// A very important import follows.\n"
+             "import {sym} from 'a'; /* more comments */\n"
+             "import {sym} from 'b'; // from //foo:bar\n",
+             "/** @fileoverview This is a great file. */\n"
+             "import {sym} from 'b'; // from //foo:bar\n"
+             "// A very important import follows.\n"
+             "import {sym} from 'a'; /* more comments */\n");
+}
+
+TEST_F(SortImportsTestJS, SortStar) {
+  verifySort("import * as foo from 'a';\n"
+             "import {sym} from 'a';\n"
+             "import * as bar from 'b';\n",
+             "import {sym} from 'a';\n"
+             "import * as foo from 'a';\n"
+             "import * as bar from 'b';\n");
+}
+
+TEST_F(SortImportsTestJS, AliasesSymbols) {
+  verifySort("import {sym1 as alias1} from 'b';\n"
+             "import {sym2 as alias2, sym3 as alias3} from 'c';\n",
+             "import {sym2 as alias2, sym3 as alias3} from 'c';\n"
+             "import {sym1 as alias1} from 'b';\n");
+}
+
+TEST_F(SortImportsTestJS, GroupImports) {
+  verifySort("import {a} from 'absolute';\n"
+             "\n"
+             "import {b} from '../parent';\n"
+             "import {b} from '../parent/nested';\n"
+             "\n"
+             "import {b} from './relative/path';\n"
+             "import {b} from './relative/path/nested';\n"
+             "\n"
+             "let x = 1;\n",
+             "import {b} from './relative/path/nested';\n"
+             "import {b} from './relative/path';\n"
+             "import {b} from '../parent/nested';\n"
+             "import {b} from '../parent';\n"
+             "import {a} from 'absolute';\n"
+             "let x = 1;\n");
+}
+
+TEST_F(SortImportsTestJS, Exports) {
+  verifySort("import {S} from 'bpath';\n"
+             "\n"
+             "import {T} from './cpath';\n"
+             "\n"
+             "export {A, B} from 'apath';\n"
+             "export {P} from '../parent';\n"
+             "export {R} from './relative';\n"
+             "export {S};\n"
+             "\n"
+             "let x = 1;\n"
+             "export y = 1;\n",
+             "export {R} from './relative';\n"
+             "import {T} from './cpath';\n"
+             "export {S};\n"
+             "export {A, B} from 'apath';\n"
+             "import {S} from 'bpath';\n"
+             "export {P} from '../parent';\n"
+             "let x = 1;\n"
+             "export y = 1;\n");
+  verifySort("import {S} from 'bpath';\n"
+             "\n"
+             "export {T} from 'epath';\n",
+             "export {T} from 'epath';\n"
+             "import {S} from 'bpath';\n");
+}
+
+TEST_F(SortImportsTestJS, SideEffectImports) {
+  verifySort("import 'ZZside-effect';\n"
+             "import 'AAside-effect';\n"
+             "\n"
+             "import {A} from 'absolute';\n"
+             "\n"
+             "import {R} from './relative';\n",
+             "import {R} from './relative';\n"
+             "import 'ZZside-effect';\n"
+             "import {A} from 'absolute';\n"
+             "import 'AAside-effect';\n");
+}
+
+TEST_F(SortImportsTestJS, AffectedRange) {
+  // Sort excluding a suffix.
+  verifySort("import {sym} from 'b';\n"
+             "import {sym} from 'c';\n"
+             "import {sym} from 'a';\n"
+             "let x = 1;",
+             "import {sym} from 'c';\n"
+             "import {sym} from 'b';\n"
+             "import {sym} from 'a';\n"
+             "let x = 1;",
+             0, 30);
+  // Sort excluding a prefix.
+  verifySort("import {sym} from 'c';\n"
+             "import {sym} from 'a';\n"
+             "import {sym} from 'b';\n"
+             "\n"
+             "let x = 1;",
+             "import {sym} from 'c';\n"
+             "import {sym} from 'b';\n"
+             "import {sym} from 'a';\n"
+             "\n"
+             "let x = 1;",
+             30, 0);
+  // Sort a range within imports.
+  verifySort("import {sym} from 'c';\n"
+             "import {sym} from 'a';\n"
+             "import {sym} from 'b';\n"
+             "import {sym} from 'c';\n"
+             "let x = 1;",
+             "import {sym} from 'c';\n"
+             "import {sym} from 'b';\n"
+             "import {sym} from 'a';\n"
+             "import {sym} from 'c';\n"
+             "let x = 1;",
+             24, 30);
+}
+
+} // end namespace
+} // end namespace format
+} // end namespace clang
author	Martin Probst <martin@probst.io>
	Fri, 20 May 2016 11:24:24 +0000 (11:24 +0000)
committer	Martin Probst <martin@probst.io>
	Fri, 20 May 2016 11:24:24 +0000 (11:24 +0000)
include/clang/Format/Format.h		patch \| blob \| history
lib/Format/CMakeLists.txt		patch \| blob \| history
lib/Format/Format.cpp		patch \| blob \| history
lib/Format/FormatToken.h		patch \| blob \| history
lib/Format/FormatTokenLexer.cpp	[new file with mode: 0644]	patch \| blob
lib/Format/FormatTokenLexer.h	[new file with mode: 0644]	patch \| blob
lib/Format/SortJavaScriptImports.cpp	[new file with mode: 0644]	patch \| blob
lib/Format/SortJavaScriptImports.h	[new file with mode: 0644]	patch \| blob
lib/Format/TokenAnalyzer.cpp	[new file with mode: 0644]	patch \| blob
lib/Format/TokenAnalyzer.h	[new file with mode: 0644]	patch \| blob
unittests/Format/CMakeLists.txt		patch \| blob \| history
unittests/Format/SortImportsTestJS.cpp	[new file with mode: 0644]	patch \| blob