[llvm-rc] Have the tokenizer discard single & block comments.

author Zachary Turner <zturner@google.com>

Mon, 9 Oct 2017 15:46:13 +0000 (15:46 +0000)

committer Zachary Turner <zturner@google.com>

Mon, 9 Oct 2017 15:46:13 +0000 (15:46 +0000)
author Zachary Turner <zturner@google.com>
Mon, 9 Oct 2017 15:46:13 +0000 (15:46 +0000)
committer Zachary Turner <zturner@google.com>
Mon, 9 Oct 2017 15:46:13 +0000 (15:46 +0000)
diff --git a/test/tools/llvm-rc/Inputs/tokens.rc b/test/tools/llvm-rc/Inputs/tokens.rc

index 20619149bb0253da28d851b41157d8010c8dbbdd..217d6017a9d7435387e0aa7036a991059943068e 100644 (file)
--- a/test/tools/llvm-rc/Inputs/tokens.rc
+++ b/test/tools/llvm-rc/Inputs/tokens.rc
@@ -3,6 +3,14 @@ He11o LLVM
  
  "RC string test.",L"Another RC string test.'&{",42,100
  
+Block Comment Ident /*block /* // comment */ ifier
  
+Line Comment // Identifier /*
+
+/* Multi line
+   block
+   comment */
+
+Multiple /* comments */ on /* a */ single // line
  
           ":))"
diff --git a/test/tools/llvm-rc/tokenizer.test b/test/tools/llvm-rc/tokenizer.test

index 08c01a2fe73c9c35571d5f5c5086c9dab94a6f88..99cd0f24b500340ba4aa789ae00afbcaa8fd697e 100644 (file)
--- a/test/tools/llvm-rc/tokenizer.test
+++ b/test/tools/llvm-rc/tokenizer.test
@@ -34,4 +34,13 @@
  ; CHECK-NEXT:  Int: 42; int value = 42
  ; CHECK-NEXT:  Comma: ,
  ; CHECK-NEXT:  Int: 100; int value = 100
+; CHECK-NEXT:  Identifier: Block
+; CHECK-NEXT:  Identifier: Comment
+; CHECK-NEXT:  Identifier: Ident
+; CHECK-NEXT:  Identifier: ifier
+; CHECK-NEXT:  Identifier: Line
+; CHECK-NEXT:  Identifier: Comment
+; CHECK-NEXT:  Identifier: Multiple
+; CHECK-NEXT:  Identifier: on
+; CHECK-NEXT:  Identifier: single
  ; CHECK-NEXT:  String: ":))"
diff --git a/tools/llvm-rc/ResourceScriptToken.cpp b/tools/llvm-rc/ResourceScriptToken.cpp

index 061070b479e0a8816602205676f0246e2dbf5c08..5a3473a4b083ca3b5f69652e2b2455fb00de468c 100644 (file)
--- a/tools/llvm-rc/ResourceScriptToken.cpp
+++ b/tools/llvm-rc/ResourceScriptToken.cpp
@@ -121,6 +121,17 @@ private:
  
    bool canStartString() const;
  
+  // Check if tokenizer can start reading a single line comment (e.g. a comment
+  // that begins with '//')
+  bool canStartLineComment() const;
+
+  // Check if tokenizer can start or finish reading a block comment (e.g. a
+  // comment that begins with '/*' and ends with '*/')
+  bool canStartBlockComment() const;
+
+  // Throw away all remaining characters on the current line.
+  void skipCurrentLine();
+
    bool streamEof() const;
  
    // Classify the token that is about to be read from the current position.
@@ -134,6 +145,14 @@ private:
    size_t DataLength, Pos;
  };
  
+void Tokenizer::skipCurrentLine() {
+  Pos = Data.find_first_of("\r\n", Pos);
+  Pos = Data.find_first_not_of("\r\n", Pos);
+
+  if (Pos == StringRef::npos)
+    Pos = DataLength;
+}
+
  Expected<std::vector<RCToken>> Tokenizer::run() {
    Pos = 0;
    std::vector<RCToken> Result;
@@ -154,6 +173,10 @@ Expected<std::vector<RCToken>> Tokenizer::run() {
      if (Error TokenError = consumeToken(TokenKind))
        return std::move(TokenError);
  
+    // Comments are just deleted, don't bother saving them.
+    if (TokenKind == Kind::LineComment || TokenKind == Kind::StartComment)
+      continue;
+
      RCToken Token(TokenKind, Data.take_front(Pos).drop_front(TokenStart));
      if (TokenKind == Kind::Identifier) {
        processIdentifier(Token);
@@ -195,6 +218,21 @@ Error Tokenizer::consumeToken(const Kind TokenKind) {
      advance();
      return Error::success();
  
+  case Kind::LineComment:
+    advance(2);
+    skipCurrentLine();
+    return Error::success();
+
+  case Kind::StartComment: {
+    advance(2);
+    auto EndPos = Data.find("*/", Pos);
+    if (EndPos == StringRef::npos)
+      return getStringError(
+          "Unclosed multi-line comment beginning at position " + Twine(Pos));
+    advance(EndPos - Pos);
+    advance(2);
+    return Error::success();
+  }
    case Kind::Identifier:
      while (!streamEof() && canContinueIdentifier())
        advance();
@@ -259,6 +297,16 @@ bool Tokenizer::canStartInt() const {
    return std::isdigit(Data[Pos]);
  }
  
+bool Tokenizer::canStartBlockComment() const {
+  assert(!streamEof());
+  return Data.drop_front(Pos).startswith("/*");
+}
+
+bool Tokenizer::canStartLineComment() const {
+  assert(!streamEof());
+  return Data.drop_front(Pos).startswith("//");
+}
+
  bool Tokenizer::canContinueInt() const {
    assert(!streamEof());
    return std::isalnum(Data[Pos]);
@@ -271,6 +319,11 @@ bool Tokenizer::canStartString() const {
  bool Tokenizer::streamEof() const { return Pos == DataLength; }
  
  Kind Tokenizer::classifyCurrentToken() const {
+  if (canStartBlockComment())
+    return Kind::StartComment;
+  if (canStartLineComment())
+    return Kind::LineComment;
+
    if (canStartInt())
      return Kind::Int;
    if (canStartString())
diff --git a/tools/llvm-rc/ResourceScriptTokenList.h b/tools/llvm-rc/ResourceScriptTokenList.h

index f8d7303e7a8af467d9a8a9a8ce09c17ffb5b38a0..2a7e15f93321efdd7a8f8cbe7646cdd584ac5aa9 100644 (file)
--- a/tools/llvm-rc/ResourceScriptTokenList.h
+++ b/tools/llvm-rc/ResourceScriptTokenList.h
@@ -18,6 +18,8 @@ TOKEN(Invalid)      // Invalid token. Should not occur in a valid script.
  TOKEN(Int)          // Integer (decimal, octal or hexadecimal).
  TOKEN(String)       // String value.
  TOKEN(Identifier)   // Script identifier (resource name or type).
+TOKEN(LineComment)  // Beginning of single-line comment.
+TOKEN(StartComment) // Beginning of multi-line comment.
  
  // Short tokens. They usually consist of exactly one character.
  // The definitions are of the form SHORT_TOKEN(TokenName, TokenChar).
author	Zachary Turner <zturner@google.com>
	Mon, 9 Oct 2017 15:46:13 +0000 (15:46 +0000)
committer	Zachary Turner <zturner@google.com>
	Mon, 9 Oct 2017 15:46:13 +0000 (15:46 +0000)
test/tools/llvm-rc/Inputs/tokens.rc		patch \| blob \| history
test/tools/llvm-rc/tokenizer.test		patch \| blob \| history
tools/llvm-rc/ResourceScriptToken.cpp		patch \| blob \| history
tools/llvm-rc/ResourceScriptTokenList.h		patch \| blob \| history