]> granicus.if.org Git - clang/commitdiff
[Lex] A source-file new-line in a raw string literal results in a new-line
authorDavid Majnemer <david.majnemer@gmail.com>
Wed, 23 Sep 2015 16:04:47 +0000 (16:04 +0000)
committerDavid Majnemer <david.majnemer@gmail.com>
Wed, 23 Sep 2015 16:04:47 +0000 (16:04 +0000)
Our string literal parser copied any source-file new-line characters
into the execution string-literal.  This is incorrect if the source-file
new-line character was a \r\n sequence because new-line characters are
merely \n.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@248392 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Lex/LiteralSupport.cpp
test/CXX/lex/lex.literal/lex.string/p4.cpp [new file with mode: 0644]

index beacbc9df15cea6ffe750f85f0eb30eb230d6e48..1a1b281bd914d6e4fcf0a901d1962a42447db846 100644 (file)
@@ -1417,10 +1417,23 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
       ThisTokEnd -= ThisTokBuf - Prefix;
       assert(ThisTokEnd >= ThisTokBuf && "malformed raw string literal");
 
-      // Copy the string over
-      if (CopyStringFragment(StringToks[i], ThisTokBegin,
-                             StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf)))
-        hadError = true;
+      // C++14 [lex.string]p4: A source-file new-line in a raw string literal
+      // results in a new-line in the resulting execution string-literal.
+      StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf);
+      while (!RemainingTokenSpan.empty()) {
+        // Split the string literal on \r\n boundaries.
+        size_t CRLFPos = RemainingTokenSpan.find("\r\n");
+        StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);
+        StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);
+
+        // Copy everything before the \r\n sequence into the string literal.
+        if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
+          hadError = true;
+
+        // Point into the \n inside the \r\n sequence and operate on the
+        // remaining portion of the literal.
+        RemainingTokenSpan = AfterCRLF.substr(1);
+      }
     } else {
       if (ThisTokBuf[0] != '"') {
         // The file may have come from PCH and then changed after loading the
diff --git a/test/CXX/lex/lex.literal/lex.string/p4.cpp b/test/CXX/lex/lex.literal/lex.string/p4.cpp
new file mode 100644 (file)
index 0000000..f8561ba
--- /dev/null
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s\r
+// expected-no-diagnostics\r
+\r
+// NOTE: This file intentionally uses DOS-style line endings to test\r
+// that we don't propagate them into string literals as per [lex.string]p4.\r
+\r
+constexpr const char* p = R"(a\\r
+b\r
+c)";\r
+\r
+static_assert(p[0] == 'a',  "");\r
+static_assert(p[1] == '\\', "");\r
+static_assert(p[2] == '\n', "");\r
+static_assert(p[3] == 'b',  "");\r
+static_assert(p[4] == '\n', "");\r
+static_assert(p[5] == 'c',  "");\r
+static_assert(p[6] == '\0', "");\r