FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
const FormatStyle &Style,
encoding::Encoding Encoding)
- : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
- LessStashed(false), Column(0), TrailingWhitespace(0),
- SourceMgr(SourceMgr), ID(ID), Style(Style),
- IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
- Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
- MacroBlockBeginRegex(Style.MacroBlockBegin),
+ : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
+ Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
+ Style(Style), IdentTable(getFormattingLangOpts(Style)),
+ Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
+ FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
MacroBlockEndRegex(Style.MacroBlockEnd) {
Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
getFormattingLangOpts(Style)));
Tokens.push_back(getNextToken());
if (Style.Language == FormatStyle::LK_JavaScript) {
tryParseJSRegexLiteral();
- tryParseTemplateString();
+ handleTemplateStrings();
}
tryMergePreviousTokens();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
}
-void FormatTokenLexer::tryParseTemplateString() {
+void FormatTokenLexer::handleTemplateStrings() {
FormatToken *BacktickToken = Tokens.back();
- if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
+
+ if (BacktickToken->is(tok::l_brace)) {
+ StateStack.push(LexerState::NORMAL);
return;
+ }
+ if (BacktickToken->is(tok::r_brace)) {
+ StateStack.pop();
+ if (StateStack.top() != LexerState::TEMPLATE_STRING)
+ return;
+ // If back in TEMPLATE_STRING, fallthrough and continue parsing the
+ } else if (BacktickToken->is(tok::unknown) &&
+ BacktickToken->TokenText == "`") {
+ StateStack.push(LexerState::TEMPLATE_STRING);
+ } else {
+ return; // Not actually a template
+ }
// 'Manually' lex ahead in the current file buffer.
const char *Offset = Lex->getBufferLocation();
const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
- for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
- if (*Offset == '\\')
+ for (; Offset != Lex->getBuffer().end(); ++Offset) {
+ if (Offset[0] == '`') {
+ StateStack.pop();
+ break;
+ }
+ if (Offset[0] == '\\') {
++Offset; // Skip the escaped character.
+ } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&
+ Offset[1] == '{') {
+ // '${' introduces an expression interpolation in the template string.
+ StateStack.push(LexerState::NORMAL);
+ ++Offset;
+ break;
+ }
}
StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
Style.TabWidth, Encoding);
}
- resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
+ SourceLocation loc = Offset < Lex->getBuffer().end()
+ ? Lex->getSourceLocation(Offset + 1)
+ : SourceMgr.getLocForEndOfFile(ID);
+ resetLexer(SourceMgr.getFileOffset(loc));
}
bool FormatTokenLexer::tryMerge_TMacro() {
}
FormatToken *FormatTokenLexer::getNextToken() {
- if (GreaterStashed) {
- GreaterStashed = false;
- return getStashedToken();
- }
- if (LessStashed) {
- LessStashed = false;
+ if (StateStack.top() == LexerState::TOKEN_STASHED) {
+ StateStack.pop();
return getStashedToken();
}
} else if (FormatTok->Tok.is(tok::greatergreater)) {
FormatTok->Tok.setKind(tok::greater);
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
- GreaterStashed = true;
+ StateStack.push(LexerState::TOKEN_STASHED);
} else if (FormatTok->Tok.is(tok::lessless)) {
FormatTok->Tok.setKind(tok::less);
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
- LessStashed = true;
+ StateStack.push(LexerState::TOKEN_STASHED);
}
// Now FormatTok is the next non-whitespace token.
#include "clang/Format/Format.h"
#include "llvm/Support/Regex.h"
+#include <stack>
+
namespace clang {
namespace format {
+enum LexerState {
+ NORMAL,
+ TEMPLATE_STRING,
+ TOKEN_STASHED,
+};
+
class FormatTokenLexer {
public:
FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
// its text if successful.
void tryParseJSRegexLiteral();
- void tryParseTemplateString();
+ // Handles JavaScript template strings.
+ //
+ // JavaScript template strings use backticks ('`') as delimiters, and allow
+ // embedding expressions nested in ${expr-here}. Template strings can be
+ // nested recursively, i.e. expressions can contain template strings in turn.
+ //
+ // The code below parses starting from a backtick, up to a closing backtick or
+ // an opening ${. It also maintains a stack of lexing contexts to handle
+ // nested template parts by balancing curly braces.
+ void handleTemplateStrings();
bool tryMerge_TMacro();
FormatToken *FormatTok;
bool IsFirstToken;
- bool GreaterStashed, LessStashed;
+ std::stack<LexerState> StateStack;
unsigned Column;
unsigned TrailingWhitespace;
std::unique_ptr<Lexer> Lex;
if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
TT_FunctionLBrace, TT_ImplicitStringLiteral,
TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
- TT_RegexLiteral))
+ TT_RegexLiteral, TT_TemplateString))
CurrentToken->Type = TT_Unknown;
CurrentToken->Role.reset();
CurrentToken->MatchingParen = nullptr;
return 100;
if (Left.is(TT_JsTypeColon))
return 35;
+ if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+ (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+ return 100;
}
if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
} else if (Style.Language == FormatStyle::LK_JavaScript) {
if (Left.is(TT_JsFatArrow))
return true;
+ if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+ (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+ return false;
+ if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
+ return false;
if (Right.is(tok::star) &&
Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
return false;