def err_conflict_marker : Error<"version control conflict marker in file">;
+def err_raw_delim_too_long : Error<
+ "raw string delimiter longer than 16 characters"
+ "; use PREFIX( )PREFIX to delimit raw string">;
+def err_invalid_char_raw_delim : Error<
+ "invalid character '%0' character in raw string delimiter"
+ "; use PREFIX( )PREFIX to delimit raw string">;
+def err_unterminated_raw_string : Error<
+ "raw string missing terminating delimiter )%0\"">;
+
def ext_multichar_character_literal : ExtWarn<
"multi-character character constant">, InGroup<MultiChar>;
def ext_four_char_character_literal : Extension<
void LexNumericConstant (Token &Result, const char *CurPtr);
void LexStringLiteral (Token &Result, const char *CurPtr,
tok::TokenKind Kind);
+ void LexRawStringLiteral (Token &Result, const char *CurPtr,
+ tok::TokenKind Kind);
void LexAngledStringLiteral(Token &Result, const char *CurPtr);
void LexCharConstant (Token &Result, const char *CurPtr,
tok::TokenKind Kind);
private:
void init(const Token *StringToks, unsigned NumStringToks);
+ void CopyStringFragment(const StringRef &Fragment);
};
} // end namespace clang
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cctype>
+#include <cstring>
using namespace clang;
static void InitCharacterInfo();
CHAR_LETTER = 0x04, // a-z,A-Z
CHAR_NUMBER = 0x08, // 0-9
CHAR_UNDER = 0x10, // _
- CHAR_PERIOD = 0x20 // .
+ CHAR_PERIOD = 0x20, // .
+ CHAR_RAWDEL = 0x40 // {}[]#<>%:;?*+-/^&|~!=,"'
};
// Statically initialize CharInfo table based on ASCII character set
0 , 0 , 0 , 0 ,
//32 SP 33 ! 34 " 35 #
//36 $ 37 % 38 & 39 '
- CHAR_HORZ_WS, 0 , 0 , 0 ,
- 0 , 0 , 0 , 0 ,
+ CHAR_HORZ_WS, CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
+ 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
//40 ( 41 ) 42 * 43 +
//44 , 45 - 46 . 47 /
- 0 , 0 , 0 , 0 ,
- 0 , 0 , CHAR_PERIOD , 0 ,
+ 0 , 0 , CHAR_RAWDEL , CHAR_RAWDEL ,
+ CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,
//48 0 49 1 50 2 51 3
//52 4 53 5 54 6 55 7
CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
//56 8 57 9 58 : 59 ;
//60 < 61 = 62 > 63 ?
- CHAR_NUMBER , CHAR_NUMBER , 0 , 0 ,
- 0 , 0 , 0 , 0 ,
+ CHAR_NUMBER , CHAR_NUMBER , CHAR_RAWDEL , CHAR_RAWDEL ,
+ CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
//64 @ 65 A 66 B 67 C
//68 D 69 E 70 F 71 G
0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
//88 X 89 Y 90 Z 91 [
//92 \ 93 ] 94 ^ 95 _
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,
- 0 , 0 , 0 , CHAR_UNDER ,
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
+ 0 , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER ,
//96 ` 97 a 98 b 99 c
//100 d 101 e 102 f 103 g
0 , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
//120 x 121 y 122 z 123 {
-//124 | 125 } 126 ~ 127 DEL
- CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0 ,
- 0 , 0 , 0 , 0
+//124 | 125 } 126 ~ 127 DEL
+ CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
+ CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0
};
static void InitCharacterInfo() {
true : false;
}
+/// isRawStringDelimBody - Return true if this is the body character of a
+/// raw string delimiter.
+static inline bool isRawStringDelimBody(unsigned char c) {
+ return (CharInfo[c] &
+ (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL)) ?
+ true : false;
+}
+
//===----------------------------------------------------------------------===//
// Diagnostics forwarding code.
Result.setLiteralData(TokStart);
}
+/// LexRawStringLiteral - Lex the remainder of a raw string literal, after
+/// having lexed R", LR", u8R", uR", or UR".
+void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
+ tok::TokenKind Kind) {
+ // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3:
+ // Between the initial and final double quote characters of the raw string,
+ // any transformations performed in phases 1 and 2 (trigraphs,
+ // universal-character-names, and line splicing) are reverted.
+
+ unsigned PrefixLen = 0;
+
+ while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen]))
+ ++PrefixLen;
+
+ // If the last character was not a '(', then we didn't lex a valid delimiter.
+ if (CurPtr[PrefixLen] != '(') {
+ if (!isLexingRawMode()) {
+ const char *PrefixEnd = &CurPtr[PrefixLen];
+ if (PrefixLen == 16) {
+ Diag(PrefixEnd, diag::err_raw_delim_too_long);
+ } else {
+ Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
+ << StringRef(PrefixEnd, 1);
+ }
+ }
+
+ // Search for the next '"' in hopes of salvaging the lexer. Unfortunately,
+ // it's possible the '"' was intended to be part of the raw string, but
+ // there's not much we can do about that.
+ while (1) {
+ char C = *CurPtr++;
+
+ if (C == '"')
+ break;
+ if (C == 0 && CurPtr-1 == BufferEnd) {
+ --CurPtr;
+ break;
+ }
+ }
+
+ FormTokenWithChars(Result, CurPtr, tok::unknown);
+ return;
+ }
+
+ // Save prefix and move CurPtr past it
+ const char *Prefix = CurPtr;
+ CurPtr += PrefixLen + 1; // skip over prefix and '('
+
+ while (1) {
+ char C = *CurPtr++;
+
+ if (C == ')') {
+ // Check for prefix match and closing quote.
+ if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') {
+ CurPtr += PrefixLen + 1; // skip over prefix and '"'
+ break;
+ }
+ } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file.
+ if (!isLexingRawMode())
+ Diag(BufferPtr, diag::err_unterminated_raw_string)
+ << StringRef(Prefix, PrefixLen);
+ FormTokenWithChars(Result, CurPtr-1, tok::unknown);
+ return;
+ }
+ }
+
+ // Update the location of token as well as BufferPtr.
+ const char *TokStart = BufferPtr;
+ FormTokenWithChars(Result, CurPtr, Kind);
+ Result.setLiteralData(TokStart);
+}
+
/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
/// after having lexed the '<' character. This is used for #include filenames.
void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
tok::utf16_char_constant);
- // UTF-8 string literal
- if (Char == '8' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
- return LexStringLiteral(Result,
- ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result),
- tok::utf8_string_literal);
+ // UTF-16 raw string literal
+ if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf16_string_literal);
+
+ if (Char == '8') {
+ char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
+
+ // UTF-8 string literal
+ if (Char2 == '"')
+ return LexStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf8_string_literal);
+
+ if (Char2 == 'R') {
+ unsigned SizeTmp3;
+ char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
+ // UTF-8 raw string literal
+ if (Char3 == '"') {
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ SizeTmp3, Result),
+ tok::utf8_string_literal);
+ }
+ }
+ }
}
// treat u like the start of an identifier.
if (Char == '\'')
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
tok::utf32_char_constant);
+
+ // UTF-32 raw string literal
+ if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::utf32_string_literal);
}
// treat U like the start of an identifier.
return LexIdentifier(Result, CurPtr);
+ case 'R': // Identifier or C++0x raw string literal
+ // Notify MIOpt that we read a non-whitespace/non-comment token.
+ MIOpt.ReadToken();
+
+ if (Features.CPlusPlus0x) {
+ Char = getCharAndSize(CurPtr, SizeTmp);
+
+ if (Char == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(CurPtr, SizeTmp, Result),
+ tok::string_literal);
+ }
+
+ // treat R like the start of an identifier.
+ return LexIdentifier(Result, CurPtr);
+
case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
tok::wide_string_literal);
+ // Wide raw string literal.
+ if (Features.CPlusPlus0x && Char == 'R' &&
+ getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
+ return LexRawStringLiteral(Result,
+ ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result),
+ tok::wide_string_literal);
+
// Wide character constant.
if (Char == '\'')
return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
// C99 6.4.2: Identifiers.
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N':
- case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': /*'U'*/
+ case 'O': case 'P': case 'Q': /*'R'*/case 'S': case 'T': /*'U'*/
case 'V': case 'W': case 'X': case 'Y': case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
}
+/// character-literal: [C++0x lex.ccon]
+/// ' c-char-sequence '
+/// u' c-char-sequence '
+/// U' c-char-sequence '
+/// L' c-char-sequence '
+/// c-char-sequence:
+/// c-char
+/// c-char-sequence c-char
+/// c-char:
+/// any member of the source character set except the single-quote ',
+/// backslash \, or new-line character
+/// escape-sequence
+/// universal-character-name
+/// escape-sequence: [C++0x lex.ccon]
+/// simple-escape-sequence
+/// octal-escape-sequence
+/// hexadecimal-escape-sequence
+/// simple-escape-sequence:
+/// one of \’ \" \? \\ \a \b \f \n \r \t \v
+/// octal-escape-sequence:
+/// \ octal-digit
+/// \ octal-digit octal-digit
+/// \ octal-digit octal-digit octal-digit
+/// hexadecimal-escape-sequence:
+/// \x hexadecimal-digit
+/// hexadecimal-escape-sequence hexadecimal-digit
+/// universal-character-name:
+/// \u hex-quad
+/// \U hex-quad hex-quad
+/// hex-quad:
+/// hex-digit hex-digit hex-digit hex-digit
+///
CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
SourceLocation Loc, Preprocessor &PP,
tok::TokenKind kind) {
}
-/// string-literal: [C99 6.4.5]
-/// " [s-char-sequence] "
-/// L" [s-char-sequence] "
+/// string-literal: [C++0x lex.string]
+/// encoding-prefix " [s-char-sequence] "
+/// encoding-prefix R raw-string
+/// encoding-prefix:
+/// u8
+/// u
+/// U
+/// L
/// s-char-sequence:
/// s-char
/// s-char-sequence s-char
/// s-char:
-/// any source character except the double quote ",
-/// backslash \, or newline character
-/// escape-character
-/// universal-character-name
-/// escape-character: [C99 6.4.4.4]
-/// \ escape-code
+/// any member of the source character set except the double-quote ",
+/// backslash \, or new-line character
+/// escape-sequence
/// universal-character-name
-/// escape-code:
-/// character-escape-code
-/// octal-escape-code
-/// hex-escape-code
-/// character-escape-code: one of
-/// n t b r f v a
-/// \ ' " ?
-/// octal-escape-code:
-/// octal-digit
-/// octal-digit octal-digit
-/// octal-digit octal-digit octal-digit
-/// hex-escape-code:
-/// x hex-digit
-/// hex-escape-code hex-digit
+/// raw-string:
+/// " d-char-sequence ( r-char-sequence ) d-char-sequence "
+/// r-char-sequence:
+/// r-char
+/// r-char-sequence r-char
+/// r-char:
+/// any member of the source character set, except a right parenthesis )
+/// followed by the initial d-char-sequence (which may be empty)
+/// followed by a double quote ".
+/// d-char-sequence:
+/// d-char
+/// d-char-sequence d-char
+/// d-char:
+/// any member of the basic source character set except:
+/// space, the left parenthesis (, the right parenthesis ),
+/// the backslash \, and the control characters representing horizontal
+/// tab, vertical tab, form feed, and newline.
+/// escape-sequence: [C++0x lex.ccon]
+/// simple-escape-sequence
+/// octal-escape-sequence
+/// hexadecimal-escape-sequence
+/// simple-escape-sequence:
+/// one of \’ \" \? \\ \a \b \f \n \r \t \v
+/// octal-escape-sequence:
+/// \ octal-digit
+/// \ octal-digit octal-digit
+/// \ octal-digit octal-digit octal-digit
+/// hexadecimal-escape-sequence:
+/// \x hexadecimal-digit
+/// hexadecimal-escape-sequence hexadecimal-digit
/// universal-character-name:
/// \u hex-quad
/// \U hex-quad hex-quad
++ThisTokBuf;
}
- assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
- ++ThisTokBuf;
+ // Check for raw string
+ if (ThisTokBuf[0] == 'R') {
+ ThisTokBuf += 2; // skip R"
- // Check if this is a pascal string
- if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
- ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
-
- // If the \p sequence is found in the first token, we have a pascal string
- // Otherwise, if we already have a pascal string, ignore the first \p
- if (i == 0) {
+ const char *Prefix = ThisTokBuf;
+ while (ThisTokBuf[0] != '(')
++ThisTokBuf;
- Pascal = true;
- } else if (Pascal)
- ThisTokBuf += 2;
- }
+ ++ThisTokBuf; // skip '('
+
+ // remove same number of characters from the end
+ if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
+ ThisTokEnd -= (ThisTokBuf - Prefix);
+
+ // Copy the string over
+ CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf));
+ } else {
+ assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+ ++ThisTokBuf; // skip "
+
+ // Check if this is a pascal string
+ if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
+ ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
- while (ThisTokBuf != ThisTokEnd) {
- // Is this a span of non-escape characters?
- if (ThisTokBuf[0] != '\\') {
- const char *InStart = ThisTokBuf;
- do {
+ // If the \p sequence is found in the first token, we have a pascal string
+ // Otherwise, if we already have a pascal string, ignore the first \p
+ if (i == 0) {
++ThisTokBuf;
- } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
-
- // Copy the character span over.
- unsigned Len = ThisTokBuf-InStart;
- if (CharByteWidth == 1) {
- memcpy(ResultPtr, InStart, Len);
- ResultPtr += Len;
- } else {
- // Note: our internal rep of wide char tokens is always little-endian.
- for (; Len; --Len, ++InStart) {
- *ResultPtr++ = InStart[0];
- // Add zeros at the end.
- for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
- *ResultPtr++ = 0;
- }
- }
- continue;
+ Pascal = true;
+ } else if (Pascal)
+ ThisTokBuf += 2;
}
- // Is this a Universal Character Name escape?
- if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
- EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
- hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
- CharByteWidth, Diags, Features);
- continue;
- }
- // Otherwise, this is a non-UCN escape character. Process it.
- unsigned ResultChar =
- ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
- FullSourceLoc(StringToks[i].getLocation(), SM),
- CharByteWidth*8, Diags);
- // Note: our internal rep of wide char tokens is always little-endian.
- *ResultPtr++ = ResultChar & 0xFF;
+ while (ThisTokBuf != ThisTokEnd) {
+ // Is this a span of non-escape characters?
+ if (ThisTokBuf[0] != '\\') {
+ const char *InStart = ThisTokBuf;
+ do {
+ ++ThisTokBuf;
+ } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+
+ // Copy the character span over.
+ CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart));
+ continue;
+ }
+ // Is this a Universal Character Name escape?
+ if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
+ EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
+ hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
+ CharByteWidth, Diags, Features);
+ continue;
+ }
+ // Otherwise, this is a non-UCN escape character. Process it.
+ unsigned ResultChar =
+ ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
+ FullSourceLoc(StringToks[i].getLocation(), SM),
+ CharByteWidth*8, Diags);
+
+ // Note: our internal rep of wide char tokens is always little-endian.
+ *ResultPtr++ = ResultChar & 0xFF;
- for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
- *ResultPtr++ = ResultChar >> i*8;
+ for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
+ *ResultPtr++ = ResultChar >> i*8;
+ }
}
}
}
+/// copyStringFragment - This function copies from Start to End into ResultPtr.
+/// Performs widening for multi-byte characters.
+void StringLiteralParser::CopyStringFragment(const StringRef &Fragment) {
+ // Copy the character span over.
+ if (CharByteWidth == 1) {
+ memcpy(ResultPtr, Fragment.data(), Fragment.size());
+ ResultPtr += Fragment.size();
+ } else {
+ // Note: our internal rep of wide char tokens is always little-endian.
+ for (StringRef::iterator I=Fragment.begin(), E=Fragment.end(); I!=E; ++I) {
+ *ResultPtr++ = *I;
+ // Add zeros at the end.
+ for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
+ *ResultPtr++ = 0;
+ }
+ }
+}
+
+
/// getOffsetOfStringByte - This function returns the offset of the
/// specified byte of the string data represented by Token. This handles
/// advancing over escape sequences in the string.
using namespace clang;
+/// IsStringPrefix - Return true if Str is a string prefix.
+/// 'L', 'u', 'U', or 'u8'. Including raw versions.
+static bool IsStringPrefix(const StringRef &Str, bool CPlusPlus0x) {
+
+ if (Str[0] == 'L' ||
+ (CPlusPlus0x && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {
+
+ if (Str.size() == 1)
+ return true; // "L", "u", "U", and "R"
+
+ // Check for raw flavors. Need to make sure the first character wasn't
+ // already R. Need CPlusPlus0x check for "LR".
+ if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus0x)
+ return true; // "LR", "uR", "UR"
+
+ // Check for "u8" and "u8R"
+ if (Str[0] == 'u' && Str[1] == '8') {
+ if (Str.size() == 2) return true; // "u8"
+ if (Str.size() == 3 && Str[2] == 'R') return true; // "u8R"
+ }
+ }
+
+ return false;
+}
+
/// IsIdentifierStringPrefix - Return true if the spelling of the token
-/// is literally 'L', 'u', 'U', or 'u8'.
+/// is literally 'L', 'u', 'U', or 'u8'. Including raw versions.
bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
const LangOptions &LangOpts = PP.getLangOptions();
if (!Tok.needsCleaning()) {
- if (Tok.getLength() != 1 && Tok.getLength() != 2)
+ if (Tok.getLength() < 1 || Tok.getLength() > 3)
return false;
SourceManager &SM = PP.getSourceManager();
const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
- if (Tok.getLength() == 1)
- return Ptr[0] == 'L' ||
- (LangOpts.CPlusPlus0x && (Ptr[0] == 'u' || Ptr[0] == 'U'));
- if (Tok.getLength() == 2)
- return LangOpts.CPlusPlus0x && Ptr[0] == 'u' && Ptr[1] == '8';
+ return IsStringPrefix(StringRef(Ptr, Tok.getLength()),
+ LangOpts.CPlusPlus0x);
}
if (Tok.getLength() < 256) {
char Buffer[256];
const char *TokPtr = Buffer;
unsigned length = PP.getSpelling(Tok, TokPtr);
- if (length == 1)
- return TokPtr[0] == 'L' ||
- (LangOpts.CPlusPlus0x && (TokPtr[0] == 'u' || TokPtr[0] == 'U'));
- if (length == 2)
- return LangOpts.CPlusPlus0x && TokPtr[0] == 'u' && TokPtr[1] == '8';
- return false;
+ return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus0x);
}
- std::string TokStr = PP.getSpelling(Tok);
- return TokStr == "L" || (LangOpts.CPlusPlus0x && (TokStr == "u8" ||
- TokStr == "u" ||
- TokStr == "U"));
+ return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus0x);
}
TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
-// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
+// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CPP0X %s
#include <stddef.h>
// CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"def\00", align 1
const char *g = u8"def";
+
+ // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"ghi\00", align 1
+ const char *h = R"foo(ghi)foo";
+
+ // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"jkl\00", align 1
+ const char *i = u8R"bar(jkl)bar";
+
+ // CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"G\00H\00\00\00", align 2
+ const char16_t *j = uR"foo(GH)foo";
+
+ // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"I\00\00\00J\00\00\00\00\00\00\00", align 4
+ const char32_t *k = UR"bar(IJ)bar";
+
+ // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"K\00\00\00L\00\00\00\00\00\00\00", align 4
+ const wchar_t *l = LR"bar(KL)bar";
+
+ // CHECK-CPP0X: private unnamed_addr constant [9 x i8] c"abc\5Cndef\00", align 1
+ const char *m = R"(abc\ndef)";
+
+ // CHECK-CPP0X: private unnamed_addr constant [8 x i8] c"abc\0Adef\00", align 1
+ const char *n = R"(abc
+def)";
+
#endif
}
--- /dev/null
+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string delimiter longer than 16 characters'
+
+const char *str = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz";
+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string delimiter longer than 16 characters'
+
+const char *str = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz";
--- /dev/null
+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string missing terminating delimiter )foo"'
+
+const char *str = R"foo(abc
+def)bar";
+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string missing terminating delimiter )foo"'
+
+const char *str = R"foo(abc
+def)bar";
ustr = u"a UTF-16 string"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [16]'}}
char32_t *Ustr;
Ustr = U"a UTF-32 string"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [16]'}}
+
+ char *Rstr;
+ Rstr = "a raw string"; // expected-warning{{conversion from string literal to 'char *' is deprecated}}
+ wchar_t *LRstr;
+ LRstr = LR"foo(a wide raw string)foo"; // expected-warning{{conversion from string literal to 'wchar_t *' is deprecated}}
+ char *u8Rstr;
+ u8Rstr = u8R"foo(a UTF-8 raw string)foo"; // expected-error {{assigning to 'char *' from incompatible type 'const char [19]'}}
+ char16_t *uRstr;
+ uRstr = uR"foo(a UTF-16 raw string)foo"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [20]'}}
+ char32_t *URstr;
+ URstr = UR"foo(a UTF-32 raw string)foo"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [20]'}}
}