Comment parsing: don't parse comment marker followed by a digit as a command

author Dmitri Gribenko <gribozavr@gmail.com>

Fri, 14 Sep 2012 16:35:35 +0000 (16:35 +0000)

committer Dmitri Gribenko <gribozavr@gmail.com>

Fri, 14 Sep 2012 16:35:35 +0000 (16:35 +0000)
author Dmitri Gribenko <gribozavr@gmail.com>
Fri, 14 Sep 2012 16:35:35 +0000 (16:35 +0000)
committer Dmitri Gribenko <gribozavr@gmail.com>
Fri, 14 Sep 2012 16:35:35 +0000 (16:35 +0000)
diff --git a/lib/AST/CommentLexer.cpp b/lib/AST/CommentLexer.cpp

index fde2c40fa592a484d04b57e0a34aeaf7b7f02340..31a09f71d993c643e4b8b80ad1edf26a74c3a11f 100644 (file)
--- a/lib/AST/CommentLexer.cpp
+++ b/lib/AST/CommentLexer.cpp
@@ -226,6 +226,11 @@ bool isWhitespace(const char *BufferPtr, const char *BufferEnd) {
    return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
  }
  
+bool isCommandNameStartCharacter(char C) {
+  return (C >= 'a' && C <= 'z') ||
+         (C >= 'A' && C <= 'Z');
+}
+
  bool isCommandNameCharacter(char C) {
    return (C >= 'a' && C <= 'z') ||
           (C >= 'A' && C <= 'Z') ||
@@ -340,7 +345,7 @@ void Lexer::lexCommentText(Token &T) {
          }
  
          // Don't make zero-length commands.
-        if (!isCommandNameCharacter(*TokenPtr)) {
+        if (!isCommandNameStartCharacter(*TokenPtr)) {
            formTextToken(T, TokenPtr);
            return;
          }
diff --git a/unittests/AST/CommentLexer.cpp b/unittests/AST/CommentLexer.cpp

index 2ec741ba3d75f4e0f02e9a42b553221759bdb69e..cc4535a163c4e83506f4f6cc7079b97abb533419 100644 (file)
--- a/unittests/AST/CommentLexer.cpp
+++ b/unittests/AST/CommentLexer.cpp
@@ -322,7 +322,35 @@ TEST_F(CommentLexerTest, DoxygenCommand4) {
    }
  }
  
+// A command marker followed by a non-letter that is not a part of an escape
+// sequence.
  TEST_F(CommentLexerTest, DoxygenCommand5) {
+  const char *Source = "/// \\^ \\0";
+  std::vector<Token> Toks;
+
+  lexString(Source, Toks);
+
+  ASSERT_EQ(6U, Toks.size());
+
+  ASSERT_EQ(tok::text,       Toks[0].getKind());
+  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
+
+  ASSERT_EQ(tok::text,       Toks[1].getKind());
+  ASSERT_EQ(StringRef("\\"), Toks[1].getText());
+
+  ASSERT_EQ(tok::text,       Toks[2].getKind());
+  ASSERT_EQ(StringRef("^ "), Toks[2].getText());
+
+  ASSERT_EQ(tok::text,       Toks[3].getKind());
+  ASSERT_EQ(StringRef("\\"), Toks[3].getText());
+
+  ASSERT_EQ(tok::text,       Toks[4].getKind());
+  ASSERT_EQ(StringRef("0"),  Toks[4].getText());
+
+  ASSERT_EQ(tok::newline,    Toks[5].getKind());
+}
+
+TEST_F(CommentLexerTest, DoxygenCommand6) {
    const char *Source = "/// \\brief Aaa.";
    std::vector<Token> Toks;
  
@@ -342,7 +370,7 @@ TEST_F(CommentLexerTest, DoxygenCommand5) {
    ASSERT_EQ(tok::newline,       Toks[3].getKind());
  }
  
-TEST_F(CommentLexerTest, DoxygenCommand6) {
+TEST_F(CommentLexerTest, DoxygenCommand7) {
    const char *Source = "/// \\em\\em \\em\t\\em\n";
    std::vector<Token> Toks;
  
@@ -374,7 +402,7 @@ TEST_F(CommentLexerTest, DoxygenCommand6) {
    ASSERT_EQ(tok::newline,    Toks[7].getKind());
  }
  
-TEST_F(CommentLexerTest, DoxygenCommand7) {
+TEST_F(CommentLexerTest, DoxygenCommand8) {
    const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
    std::vector<Token> Toks;
  
@@ -406,7 +434,7 @@ TEST_F(CommentLexerTest, DoxygenCommand7) {
    ASSERT_EQ(tok::newline,     Toks[7].getKind());
  }
  
-TEST_F(CommentLexerTest, DoxygenCommand8) {
+TEST_F(CommentLexerTest, DoxygenCommand9) {
    const char *Source = "// \\c\n";
    std::vector<Token> Toks;
author	Dmitri Gribenko <gribozavr@gmail.com>
	Fri, 14 Sep 2012 16:35:35 +0000 (16:35 +0000)
committer	Dmitri Gribenko <gribozavr@gmail.com>
	Fri, 14 Sep 2012 16:35:35 +0000 (16:35 +0000)
lib/AST/CommentLexer.cpp		patch \| blob \| history
unittests/AST/CommentLexer.cpp		patch \| blob \| history