From: Chris Lattner <sabre@nondot.org>
Date: Fri, 16 Jan 2009 22:39:25 +0000 (+0000)
Subject: Fix PR2477 - clang misparses "//*" in C89 mode
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8402c73dd880e8af46c826d873681820aebe32eb;p=clang

Fix PR2477 - clang misparses "//*" in C89 mode


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@62368 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 0eb439a27b..fa29d0a38e 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -1300,7 +1300,8 @@ LexNextToken:
     
     // If the next token is obviously a // or /* */ comment, skip it efficiently
     // too (without going through the big switch stmt).
-    if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode()) {
+    if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
+        Features.BCPLComment) {
       SkipBCPLComment(Result, CurPtr+2);
       goto SkipIgnoredUnits;
     } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
@@ -1480,18 +1481,32 @@ LexNextToken:
     // 6.4.9: Comments
     Char = getCharAndSize(CurPtr, SizeTmp);
     if (Char == '/') {         // BCPL comment.
-      if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
-        return; // KeepCommentMode
+      // Even if BCPL comments are disabled (e.g. in C89 mode), we generally
+      // want to lex this as a comment.  There is one problem with this though,
+      // that in one particular corner case, this can change the behavior of the
+      // resultant program.  For example, In  "foo //**/ bar", C89 would lex
+      // this as "foo / bar" and langauges with BCPL comments would lex it as
+      // "foo".  Check to see if the character after the second slash is a '*'.
+      // If so, we will lex that as a "/" instead of the start of a comment.
+      if (Features.BCPLComment ||
+          getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*') {
+        if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
+          return; // KeepCommentMode
       
-      // It is common for the tokens immediately after a // comment to be
-      // whitespace (indentation for the next line).  Instead of going through
-      // the big switch, handle it efficiently now.
-      goto SkipIgnoredUnits;
-    } else if (Char == '*') {  // /**/ comment.
+        // It is common for the tokens immediately after a // comment to be
+        // whitespace (indentation for the next line).  Instead of going through
+        // the big switch, handle it efficiently now.
+        goto SkipIgnoredUnits;
+      }
+    }
+      
+    if (Char == '*') {  // /**/ comment.
       if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
         return; // KeepCommentMode
       goto LexNextToken;   // GCC isn't tail call eliminating.
-    } else if (Char == '=') {
+    }
+      
+    if (Char == '=') {
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::slashequal;
     } else {
diff --git a/test/Lexer/c90.c b/test/Lexer/c90.c
index 4bbddaf38b..d76c73ddc3 100644
--- a/test/Lexer/c90.c
+++ b/test/Lexer/c90.c
@@ -4,3 +4,10 @@
 enum { cast_hex = (long) (
       0x0p-1   /* expected-error {{hexadecimal floating constants are a C99 feature}} */
      ) };
+
+/* PR2477 */
+int test1(int a,int b) {return a//* This is a divide followed by block comment in c89 mode */
+b;}
+
+// comment accepted as extension    /* expected-error {{// comments are not allowed in this language}}
+