Lexing support for user-defined literals. Currently these lex as the same token

author Richard Smith <richard-llvm@metafoo.co.uk>

Mon, 5 Mar 2012 04:02:15 +0000 (04:02 +0000)

committer Richard Smith <richard-llvm@metafoo.co.uk>

Mon, 5 Mar 2012 04:02:15 +0000 (04:02 +0000)
author Richard Smith <richard-llvm@metafoo.co.uk>
Mon, 5 Mar 2012 04:02:15 +0000 (04:02 +0000)
committer Richard Smith <richard-llvm@metafoo.co.uk>
Mon, 5 Mar 2012 04:02:15 +0000 (04:02 +0000)
diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td

index 12f23cea501252d5d082f2d3eb7801b3106c9721..96edbe040ffc10592927b898fe91dd3a36f28a93 100644 (file)
--- a/include/clang/Basic/DiagnosticLexKinds.td
+++ b/include/clang/Basic/DiagnosticLexKinds.td
@@ -133,6 +133,9 @@ def warn_cxx98_compat_unicode_literal : Warning<
    InGroup<CXX98Compat>, DefaultIgnore;
  def err_unsupported_string_concat : Error<
    "unsupported non-standard concatenation of string literals">;
+def err_string_concat_mixed_suffix : Error<
+  "differing user-defined suffixes ('%0' and '%1') in string literal "
+  "concatenation">;
  def err_bad_string_encoding : Error<
    "illegal character encoding in string literal">;
  def warn_bad_string_encoding : ExtWarn<
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h

index bad9844f66b2dda5f4d629b9a3c991e126850d88..df303949471a45b554579875bc6618ef724460db 100644 (file)
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -530,6 +530,8 @@ private:
    // Other lexer functions.
  
    void SkipBytes(unsigned Bytes, bool StartOfLine);
+
+  const char *LexUDSuffix(Token &Result, const char *CurPtr);
    
    // Helper functions to lex the remainder of a token of the specific type.
    void LexIdentifier         (Token &Result, const char *CurPtr);
diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h

index 6142f006c56c83dcf531cef93add223dd583d0b9..90ca58bff9daa679346409f7b6e420b27b6466c3 100644 (file)
--- a/include/clang/Lex/LiteralSupport.h
+++ b/include/clang/Lex/LiteralSupport.h
@@ -128,6 +128,7 @@ class CharLiteralParser {
    tok::TokenKind Kind;
    bool IsMultiChar;
    bool HadError;
+  SmallString<32> UDSuffixBuf;
  public:
    CharLiteralParser(const char *begin, const char *end,
                      SourceLocation Loc, Preprocessor &PP,
@@ -140,6 +141,7 @@ public:
    bool isUTF32() const { return Kind == tok::utf32_char_constant; }
    bool isMultiChar() const { return IsMultiChar; }
    uint64_t getValue() const { return Value; }
+  StringRef getUDSuffix() const { return UDSuffixBuf; }
  };
  
  /// StringLiteralParser - This decodes string escape characters and performs
@@ -157,6 +159,7 @@ class StringLiteralParser {
    tok::TokenKind Kind;
    SmallString<512> ResultBuf;
    char *ResultPtr; // cursor
+  SmallString<32> UDSuffixBuf;
  public:
    StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
                        Preprocessor &PP, bool Complain = true);
@@ -196,6 +199,8 @@ public:
    bool isUTF32() const { return Kind == tok::utf32_string_literal; }
    bool isPascal() const { return Pascal; }
  
+  StringRef getUDSuffix() const { return UDSuffixBuf; }
+
  private:
    void init(const Token *StringToks, unsigned NumStringToks);
    bool CopyStringFragment(StringRef Fragment);
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp

index c9f73764c9272d405e33b4350fcd13dcf62bc850..2b24d1cc75e9196abb6563ba8dd480cdd97a239c 100644 (file)
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -1078,6 +1078,12 @@ static void InitCharacterInfo() {
  }
  
  
+/// isIdentifierHead - Return true if this is the first character of an
+/// identifier, which is [a-zA-Z_].
+static inline bool isIdentifierHead(unsigned char c) {
+  return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false;
+}
+
  /// isIdentifierBody - Return true if this is the body character of an
  /// identifier, which is [a-zA-Z0-9_].
  static inline bool isIdentifierBody(unsigned char c) {
@@ -1543,7 +1549,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
    unsigned Size;
    char C = getCharAndSize(CurPtr, Size);
    char PrevCh = 0;
-  while (isNumberBody(C)) { // FIXME: UCNs?
+  while (isNumberBody(C)) { // FIXME: UCNs.
      CurPtr = ConsumeChar(CurPtr, Size, Result);
      PrevCh = C;
      C = getCharAndSize(CurPtr, Size);
@@ -1567,6 +1573,23 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
    Result.setLiteralData(TokStart);
  }
  
+/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes
+/// in C++11.
+const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) {
+  assert(getFeatures().CPlusPlus0x && "ud-suffix only exists in C++11");
+
+  // Maximally munch an identifier. FIXME: UCNs.
+  unsigned Size;
+  char C = getCharAndSize(CurPtr, Size);
+  if (isIdentifierHead(C)) {
+    do {
+      CurPtr = ConsumeChar(CurPtr, Size, Result);
+      C = getCharAndSize(CurPtr, Size);
+    } while (isIdentifierBody(C));
+  }
+  return CurPtr;
+}
+
  /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
  /// either " or L" or u8" or u" or U".
  void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
@@ -1606,6 +1629,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
      C = getAndAdvanceChar(CurPtr, Result);
    }
  
+  // If we are in C++11, lex the optional ud-suffix.
+  if (getFeatures().CPlusPlus0x)
+    CurPtr = LexUDSuffix(Result, CurPtr);
+
    // If a nul character existed in the string, warn about it.
    if (NulCharacter && !isLexingRawMode())
      Diag(NulCharacter, diag::null_in_string);
@@ -1685,6 +1712,10 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
      }
    }
  
+  // If we are in C++11, lex the optional ud-suffix.
+  if (getFeatures().CPlusPlus0x)
+    CurPtr = LexUDSuffix(Result, CurPtr);
+
    // Update the location of token as well as BufferPtr.
    const char *TokStart = BufferPtr;
    FormTokenWithChars(Result, CurPtr, Kind);
@@ -1768,6 +1799,10 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
      C = getAndAdvanceChar(CurPtr, Result);
    }
  
+  // If we are in C++11, lex the optional ud-suffix.
+  if (getFeatures().CPlusPlus0x)
+    CurPtr = LexUDSuffix(Result, CurPtr);
+
    // If a nul character existed in the character, warn about it.
    if (NulCharacter && !isLexingRawMode())
      Diag(NulCharacter, diag::null_in_char);
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp

index 547bd4e0c84ea55bc38b3b4ee76ad54d8f3cb4a4..e3ff77f4f040f568b2bfe7dff54744af4662543a 100644 (file)
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -731,7 +731,11 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
  }
  
  
-///       character-literal: [C++0x lex.ccon]
+///       user-defined-character-literal: [C++11 lex.ext]
+///         character-literal ud-suffix
+///       ud-suffix:
+///         identifier
+///       character-literal: [C++11 lex.ccon]
  ///         ' c-char-sequence '
  ///         u' c-char-sequence '
  ///         U' c-char-sequence '
@@ -744,7 +748,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
  ///           backslash \, or new-line character
  ///         escape-sequence
  ///         universal-character-name
-///       escape-sequence: [C++0x lex.ccon]
+///       escape-sequence:
  ///         simple-escape-sequence
  ///         octal-escape-sequence
  ///         hexadecimal-escape-sequence
@@ -757,7 +761,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
  ///       hexadecimal-escape-sequence:
  ///         \x hexadecimal-digit
  ///         hexadecimal-escape-sequence hexadecimal-digit
-///       universal-character-name:
+///       universal-character-name: [C++11 lex.charset]
  ///         \u hex-quad
  ///         \U hex-quad hex-quad
  ///       hex-quad:
@@ -780,8 +784,17 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
    assert(begin[0] == '\'' && "Invalid token lexed");
    ++begin;
  
+  // Remove an optional ud-suffix.
+  if (end[-1] != '\'') {
+    const char *UDSuffixEnd = end;
+    do {
+      --end;
+    } while (end[-1] != '\'');
+    UDSuffixBuf.assign(end, UDSuffixEnd);
+  }
+
    // Trim the ending quote.
-  assert(end[-1] == '\'' && "Invalid token lexed");
+  assert(end != begin && "Invalid token lexed");
    --end;
  
    // FIXME: The "Value" is an uint64_t so we can handle char literals of
@@ -1071,6 +1084,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
  
    Pascal = false;
  
+  SourceLocation UDSuffixTokLoc;
+
    for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
      const char *ThisTokBuf = &TokenBuf[0];
      // Get the spelling of the token, which eliminates trigraphs, etc.  We know
@@ -1085,7 +1100,39 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
        continue;
      }
  
-    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
+    const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
+
+    // Remove an optional ud-suffix.
+    if (ThisTokEnd[-1] != '"') {
+      const char *UDSuffixEnd = ThisTokEnd;
+      do {
+        --ThisTokEnd;
+      } while (ThisTokEnd[-1] != '"');
+
+      StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
+
+      if (UDSuffixBuf.empty()) {
+        UDSuffixBuf.assign(UDSuffix);
+        UDSuffixTokLoc = StringToks[i].getLocation();
+      } else if (!UDSuffixBuf.equals(UDSuffix)) {
+        // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
+        // result of a concatenation involving at least one user-defined-string-
+        // literal, all the participating user-defined-string-literals shall
+        // have the same ud-suffix.
+        if (Diags) {
+          SourceLocation TokLoc = StringToks[i].getLocation();
+          Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
+            << UDSuffixBuf << UDSuffix
+            << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
+            << SourceRange(TokLoc, TokLoc);
+        }
+        hadError = true;
+      }
+    }
+
+    // Strip the end quote.
+    --ThisTokEnd;
+
      // TODO: Input character set mapping support.
  
      // Skip marker for wide or unicode strings.
diff --git a/test/CXX/lex/lex.literal/lex.ext/p1.cpp b/test/CXX/lex/lex.literal/lex.ext/p1.cpp

index 39812280c0901bdcfde672c8396ad4cc17b437f0..c167e822ebdf8bbe5a3bdfe37faab96a7b899896 100644 (file)
--- a/test/CXX/lex/lex.literal/lex.ext/p1.cpp
+++ b/test/CXX/lex/lex.literal/lex.ext/p1.cpp
@@ -1,7 +1,7 @@
  // RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s
  
-int * operator "" p31(long double); // expected-warning{{user-defined literal with suffix 'p31' is preempted by C99 hexfloat extension}}
-long double operator "" _p31(long double);
+void operator "" p31(long double); // expected-warning{{user-defined literal with suffix 'p31' is preempted by C99 hexfloat extension}}
+void operator "" _p31(long double);
  long double operator "" pi(long double); // expected-warning{{user-defined literals not starting with '_' are reserved by the implementation}}
  
  float hexfloat = 0x1p31; // allow hexfloats
diff --git a/test/CXX/over/over.oper/over.literal/p5.cpp b/test/CXX/over/over.oper/over.literal/p5.cpp

index 7a782fca91ece99c9c9596f0759de4de476438e6..66f3f97eaac3703f14a8f2727815d6b9cbe46ec7 100644 (file)
--- a/test/CXX/over/over.oper/over.literal/p5.cpp
+++ b/test/CXX/over/over.oper/over.literal/p5.cpp
@@ -7,9 +7,13 @@ template<char...> void operator "" _a();
  template<char... C> S<C...> operator "" _a();
  
  template<typename T> struct U {
+  friend int operator "" _a(const char *, size_t);
    // FIXME: It's not entirely clear whether this is intended to be legal.
    friend U operator "" _a(const T *, size_t); // expected-error {{parameter}}
  };
+template<char...> struct V {
+  friend void operator "" _b(); // expected-error {{parameter}}
+};
  
  template<char... C, int N = 0> void operator "" _b(); // expected-error {{parameter}}
  template<char... C> void operator "" _b(int N = 0); // expected-error {{parameter}}
diff --git a/test/CXX/over/over.oper/over.literal/p8.cpp b/test/CXX/over/over.oper/over.literal/p8.cpp

index fe94b5348b5026e5da463ce64a81cb960802f33b..69d4e761e559e586f5d421f27dc3c415169992a3 100644 (file)
--- a/test/CXX/over/over.oper/over.literal/p8.cpp
+++ b/test/CXX/over/over.oper/over.literal/p8.cpp
@@ -9,8 +9,8 @@ void operator "" _km(long double); // ok
  string operator "" _i18n(const char*, std::size_t); // ok
  // FIXME: This should be accepted once we support UCNs
  template<char...> int operator "" \u03C0(); // ok, UCN for lowercase pi // expected-error {{expected identifier}}
-// FIXME: This should be rejected once we lex user-defined literal suffices
-float operator ""E(const char *); // expected-warning {{hexfloat}}
+// FIXME: Accept this as an extension, with a fix-it to add the space
+float operator ""E(const char *); // expected-error {{must be '""'}} expected-error {{expected identifier}}
  float operator " " B(const char *); // expected-error {{must be '""'}} expected-warning {{hexfloat}}
  string operator "" 5X(const char *, std::size_t); // expected-error {{expected identifier}}
  double operator "" _miles(double); // expected-error {{parameter}}
diff --git a/test/SemaCXX/cxx98-compat.cpp b/test/SemaCXX/cxx98-compat.cpp

index 8c15f5adc1e2a7ba8dae9bc65787cee4fe20c5c6..e9ba0dffc3c69496556daf871f58b2076b6e564b 100644 (file)
--- a/test/SemaCXX/cxx98-compat.cpp
+++ b/test/SemaCXX/cxx98-compat.cpp
@@ -50,7 +50,7 @@ int InitList() {
    return { 0 }; // expected-warning {{generalized initializer lists are incompatible with C++98}}
  }
  
-int operator""_hello(const char *); // expected-warning {{literal operators are incompatible with C++98}}
+int operator"" _hello(const char *); // expected-warning {{literal operators are incompatible with C++98}}
  
  enum EnumFixed : int { // expected-warning {{enumeration types with a fixed underlying type are incompatible with C++98}}
  };
author	Richard Smith <richard-llvm@metafoo.co.uk>
	Mon, 5 Mar 2012 04:02:15 +0000 (04:02 +0000)
committer	Richard Smith <richard-llvm@metafoo.co.uk>
	Mon, 5 Mar 2012 04:02:15 +0000 (04:02 +0000)
include/clang/Basic/DiagnosticLexKinds.td		patch \| blob \| history
include/clang/Lex/Lexer.h		patch \| blob \| history
include/clang/Lex/LiteralSupport.h		patch \| blob \| history
lib/Lex/Lexer.cpp		patch \| blob \| history
lib/Lex/LiteralSupport.cpp		patch \| blob \| history
test/CXX/lex/lex.literal/lex.ext/p1.cpp		patch \| blob \| history
test/CXX/over/over.oper/over.literal/p5.cpp		patch \| blob \| history
test/CXX/over/over.oper/over.literal/p8.cpp		patch \| blob \| history
test/SemaCXX/cxx98-compat.cpp		patch \| blob \| history