/// char X[2] = "foobar";
/// In this case, getByteLength() will return 6, but the string literal will
/// have type "char[2]".
-class StringLiteral : public Expr {
+class StringLiteral final
+ : public Expr,
+ private llvm::TrailingObjects<StringLiteral, unsigned, SourceLocation,
+ char> {
+ friend class ASTStmtReader;
+ friend TrailingObjects;
+
+ /// StringLiteral is followed by several trailing objects. They are in order:
+ ///
+ /// * A single unsigned storing the length in characters of this string. The
+ /// length in bytes is this length times the width of a single character.
+ /// Always present and stored as a trailing objects because storing it in
+ /// StringLiteral would increase the size of StringLiteral by sizeof(void *)
+ /// due to alignment requirements. If you add some data to StringLiteral,
+ /// consider moving it inside StringLiteral.
+ ///
+ /// * An array of getNumConcatenated() SourceLocation, one for each of the
+ /// token this string is made of.
+ ///
+ /// * An array of getByteLength() char used to store the string data.
+
public:
enum StringKind { Ascii, Wide, UTF8, UTF16, UTF32 };
private:
- friend class ASTStmtReader;
+ unsigned numTrailingObjects(OverloadToken<unsigned>) const { return 1; }
+ unsigned numTrailingObjects(OverloadToken<SourceLocation>) const {
+ return getNumConcatenated();
+ }
- union {
- const char *asChar;
- const uint16_t *asUInt16;
- const uint32_t *asUInt32;
- } StrData;
- unsigned Length;
- unsigned CharByteWidth : 4;
- unsigned Kind : 3;
- unsigned IsPascal : 1;
- unsigned NumConcatenated;
- SourceLocation TokLocs[1];
-
- StringLiteral(QualType Ty) :
- Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary, false, false, false,
- false) {}
+ unsigned numTrailingObjects(OverloadToken<char>) const {
+ return getByteLength();
+ }
+
+ char *getStrDataAsChar() { return getTrailingObjects<char>(); }
+ const char *getStrDataAsChar() const { return getTrailingObjects<char>(); }
+
+ const uint16_t *getStrDataAsUInt16() const {
+ return reinterpret_cast<const uint16_t *>(getTrailingObjects<char>());
+ }
+
+ const uint32_t *getStrDataAsUInt32() const {
+ return reinterpret_cast<const uint32_t *>(getTrailingObjects<char>());
+ }
+
+ /// Build a string literal.
+ StringLiteral(const ASTContext &Ctx, StringRef Str, StringKind Kind,
+ bool Pascal, QualType Ty, const SourceLocation *Loc,
+ unsigned NumConcatenated);
+
+ /// Build an empty string literal.
+ StringLiteral(EmptyShell Empty, unsigned NumConcatenated, unsigned Length,
+ unsigned CharByteWidth);
/// Map a target and string kind to the appropriate character width.
static unsigned mapCharByteWidth(TargetInfo const &Target, StringKind SK);
+ /// Set one of the string literal token.
+ void setStrTokenLoc(unsigned TokNum, SourceLocation L) {
+ assert(TokNum < getNumConcatenated() && "Invalid tok number");
+ getTrailingObjects<SourceLocation>()[TokNum] = L;
+ }
+
public:
/// This is the "fully general" constructor that allows representation of
/// strings formed from multiple concatenated tokens.
- static StringLiteral *Create(const ASTContext &C, StringRef Str,
+ static StringLiteral *Create(const ASTContext &Ctx, StringRef Str,
StringKind Kind, bool Pascal, QualType Ty,
- const SourceLocation *Loc, unsigned NumStrs);
+ const SourceLocation *Loc,
+ unsigned NumConcatenated);
/// Simple constructor for string literals made from one token.
- static StringLiteral *Create(const ASTContext &C, StringRef Str,
+ static StringLiteral *Create(const ASTContext &Ctx, StringRef Str,
StringKind Kind, bool Pascal, QualType Ty,
SourceLocation Loc) {
- return Create(C, Str, Kind, Pascal, Ty, &Loc, 1);
+ return Create(Ctx, Str, Kind, Pascal, Ty, &Loc, 1);
}
/// Construct an empty string literal.
- static StringLiteral *CreateEmpty(const ASTContext &C, unsigned NumStrs);
+ static StringLiteral *CreateEmpty(const ASTContext &Ctx,
+ unsigned NumConcatenated, unsigned Length,
+ unsigned CharByteWidth);
StringRef getString() const {
- assert(CharByteWidth==1
- && "This function is used in places that assume strings use char");
- return StringRef(StrData.asChar, getByteLength());
+ assert(getCharByteWidth() == 1 &&
+ "This function is used in places that assume strings use char");
+ return StringRef(getStrDataAsChar(), getByteLength());
}
/// Allow access to clients that need the byte representation, such as
/// ASTWriterStmt::VisitStringLiteral().
StringRef getBytes() const {
// FIXME: StringRef may not be the right type to use as a result for this.
- if (CharByteWidth == 1)
- return StringRef(StrData.asChar, getByteLength());
- if (CharByteWidth == 4)
- return StringRef(reinterpret_cast<const char*>(StrData.asUInt32),
- getByteLength());
- assert(CharByteWidth == 2 && "unsupported CharByteWidth");
- return StringRef(reinterpret_cast<const char*>(StrData.asUInt16),
- getByteLength());
+ return StringRef(getStrDataAsChar(), getByteLength());
}
void outputString(raw_ostream &OS) const;
uint32_t getCodeUnit(size_t i) const {
- assert(i < Length && "out of bounds access");
- if (CharByteWidth == 1)
- return static_cast<unsigned char>(StrData.asChar[i]);
- if (CharByteWidth == 4)
- return StrData.asUInt32[i];
- assert(CharByteWidth == 2 && "unsupported CharByteWidth");
- return StrData.asUInt16[i];
+ assert(i < getLength() && "out of bounds access");
+ switch (getCharByteWidth()) {
+ case 1:
+ return static_cast<unsigned char>(getStrDataAsChar()[i]);
+ case 2:
+ return getStrDataAsUInt16()[i];
+ case 4:
+ return getStrDataAsUInt32()[i];
+ }
+ llvm_unreachable("Unsupported character width!");
}
- unsigned getByteLength() const { return CharByteWidth*Length; }
- unsigned getLength() const { return Length; }
- unsigned getCharByteWidth() const { return CharByteWidth; }
-
- /// Sets the string data to the given string data.
- void setString(const ASTContext &C, StringRef Str,
- StringKind Kind, bool IsPascal);
-
- StringKind getKind() const { return static_cast<StringKind>(Kind); }
+ unsigned getByteLength() const { return getCharByteWidth() * getLength(); }
+ unsigned getLength() const { return *getTrailingObjects<unsigned>(); }
+ unsigned getCharByteWidth() const { return StringLiteralBits.CharByteWidth; }
+ StringKind getKind() const {
+ return static_cast<StringKind>(StringLiteralBits.Kind);
+ }
- bool isAscii() const { return Kind == Ascii; }
- bool isWide() const { return Kind == Wide; }
- bool isUTF8() const { return Kind == UTF8; }
- bool isUTF16() const { return Kind == UTF16; }
- bool isUTF32() const { return Kind == UTF32; }
- bool isPascal() const { return IsPascal; }
+ bool isAscii() const { return getKind() == Ascii; }
+ bool isWide() const { return getKind() == Wide; }
+ bool isUTF8() const { return getKind() == UTF8; }
+ bool isUTF16() const { return getKind() == UTF16; }
+ bool isUTF32() const { return getKind() == UTF32; }
+ bool isPascal() const { return StringLiteralBits.IsPascal; }
bool containsNonAscii() const {
for (auto c : getString())
/// getNumConcatenated - Get the number of string literal tokens that were
/// concatenated in translation phase #6 to form this string literal.
- unsigned getNumConcatenated() const { return NumConcatenated; }
+ unsigned getNumConcatenated() const {
+ return StringLiteralBits.NumConcatenated;
+ }
+ /// Get one of the string literal token.
SourceLocation getStrTokenLoc(unsigned TokNum) const {
- assert(TokNum < NumConcatenated && "Invalid tok number");
- return TokLocs[TokNum];
- }
- void setStrTokenLoc(unsigned TokNum, SourceLocation L) {
- assert(TokNum < NumConcatenated && "Invalid tok number");
- TokLocs[TokNum] = L;
+ assert(TokNum < getNumConcatenated() && "Invalid tok number");
+ return getTrailingObjects<SourceLocation>()[TokNum];
}
/// getLocationOfByte - Return a source location that points to the specified
unsigned *StartTokenByteOffset = nullptr) const;
typedef const SourceLocation *tokloc_iterator;
- tokloc_iterator tokloc_begin() const { return TokLocs; }
- tokloc_iterator tokloc_end() const { return TokLocs + NumConcatenated; }
- SourceLocation getBeginLoc() const LLVM_READONLY { return TokLocs[0]; }
- SourceLocation getEndLoc() const LLVM_READONLY {
- return TokLocs[NumConcatenated - 1];
+ tokloc_iterator tokloc_begin() const {
+ return getTrailingObjects<SourceLocation>();
+ }
+
+ tokloc_iterator tokloc_end() const {
+ return getTrailingObjects<SourceLocation>() + getNumConcatenated();
}
+ SourceLocation getBeginLoc() const LLVM_READONLY { return *tokloc_begin(); }
+ SourceLocation getEndLoc() const LLVM_READONLY { return *(tokloc_end() - 1); }
+
static bool classof(const Stmt *T) {
return T->getStmtClass() == StringLiteralClass;
}
unsigned IsExact : 1;
};
+ class StringLiteralBitfields {
+ friend class ASTStmtReader;
+ friend class StringLiteral;
+
+ unsigned : NumExprBits;
+
+ /// The kind of this string literal.
+ /// One of the enumeration values of StringLiteral::StringKind.
+ unsigned Kind : 3;
+
+ /// The width of a single character in bytes. Only values of 1, 2,
+ /// and 4 bytes are supported. StringLiteral::mapCharByteWidth maps
+ /// the target + string kind to the appropriate CharByteWidth.
+ unsigned CharByteWidth : 3;
+
+ unsigned IsPascal : 1;
+
+ /// The number of concatenated token this string is made of.
+ /// This is the number of trailing SourceLocation.
+ unsigned NumConcatenated;
+ };
+
class CharacterLiteralBitfields {
friend class CharacterLiteral;
PredefinedExprBitfields PredefinedExprBits;
DeclRefExprBitfields DeclRefExprBits;
FloatingLiteralBitfields FloatingLiteralBits;
+ StringLiteralBitfields StringLiteralBits;
CharacterLiteralBitfields CharacterLiteralBits;
UnaryOperatorBitfields UnaryOperatorBits;
UnaryExprOrTypeTraitExprBitfields UnaryExprOrTypeTraitExprBits;
return CharByteWidth;
}
-StringLiteral *StringLiteral::Create(const ASTContext &C, StringRef Str,
- StringKind Kind, bool Pascal, QualType Ty,
- const SourceLocation *Loc,
- unsigned NumStrs) {
- assert(C.getAsConstantArrayType(Ty) &&
+StringLiteral::StringLiteral(const ASTContext &Ctx, StringRef Str,
+ StringKind Kind, bool Pascal, QualType Ty,
+ const SourceLocation *Loc,
+ unsigned NumConcatenated)
+ : Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary, false, false, false,
+ false) {
+ assert(Ctx.getAsConstantArrayType(Ty) &&
"StringLiteral must be of constant array type!");
+ unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind);
+ unsigned ByteLength = Str.size();
+ assert((ByteLength % CharByteWidth == 0) &&
+ "The size of the data must be a multiple of CharByteWidth!");
+
+ // Avoid the expensive division. The compiler should be able to figure it
+ // out by itself. However as of clang 7, even with the appropriate
+ // llvm_unreachable added just here, it is not able to do so.
+ unsigned Length;
+ switch (CharByteWidth) {
+ case 1:
+ Length = ByteLength;
+ break;
+ case 2:
+ Length = ByteLength / 2;
+ break;
+ case 4:
+ Length = ByteLength / 4;
+ break;
+ default:
+ llvm_unreachable("Unsupported character width!");
+ }
- // Allocate enough space for the StringLiteral plus an array of locations for
- // any concatenated string tokens.
- void *Mem =
- C.Allocate(sizeof(StringLiteral) + sizeof(SourceLocation) * (NumStrs - 1),
- alignof(StringLiteral));
- StringLiteral *SL = new (Mem) StringLiteral(Ty);
+ StringLiteralBits.Kind = Kind;
+ StringLiteralBits.CharByteWidth = CharByteWidth;
+ StringLiteralBits.IsPascal = Pascal;
+ StringLiteralBits.NumConcatenated = NumConcatenated;
+ *getTrailingObjects<unsigned>() = Length;
- // OPTIMIZE: could allocate this appended to the StringLiteral.
- SL->setString(C,Str,Kind,Pascal);
+ // Initialize the trailing array of SourceLocation.
+ // This is safe since SourceLocation is POD-like.
+ std::memcpy(getTrailingObjects<SourceLocation>(), Loc,
+ NumConcatenated * sizeof(SourceLocation));
- SL->TokLocs[0] = Loc[0];
- SL->NumConcatenated = NumStrs;
+ // Initialize the trailing array of char holding the string data.
+ std::memcpy(getTrailingObjects<char>(), Str.data(), ByteLength);
+}
- if (NumStrs != 1)
- memcpy(&SL->TokLocs[1], Loc+1, sizeof(SourceLocation)*(NumStrs-1));
- return SL;
+StringLiteral::StringLiteral(EmptyShell Empty, unsigned NumConcatenated,
+ unsigned Length, unsigned CharByteWidth)
+ : Expr(StringLiteralClass, Empty) {
+ StringLiteralBits.CharByteWidth = CharByteWidth;
+ StringLiteralBits.NumConcatenated = NumConcatenated;
+ *getTrailingObjects<unsigned>() = Length;
}
-StringLiteral *StringLiteral::CreateEmpty(const ASTContext &C,
- unsigned NumStrs) {
- void *Mem =
- C.Allocate(sizeof(StringLiteral) + sizeof(SourceLocation) * (NumStrs - 1),
- alignof(StringLiteral));
- StringLiteral *SL =
- new (Mem) StringLiteral(C.adjustStringLiteralBaseType(QualType()));
- SL->CharByteWidth = 0;
- SL->Length = 0;
- SL->NumConcatenated = NumStrs;
- return SL;
+StringLiteral *StringLiteral::Create(const ASTContext &Ctx, StringRef Str,
+ StringKind Kind, bool Pascal, QualType Ty,
+ const SourceLocation *Loc,
+ unsigned NumConcatenated) {
+ void *Mem = Ctx.Allocate(totalSizeToAlloc<unsigned, SourceLocation, char>(
+ 1, NumConcatenated, Str.size()),
+ alignof(StringLiteral));
+ return new (Mem)
+ StringLiteral(Ctx, Str, Kind, Pascal, Ty, Loc, NumConcatenated);
+}
+
+StringLiteral *StringLiteral::CreateEmpty(const ASTContext &Ctx,
+ unsigned NumConcatenated,
+ unsigned Length,
+ unsigned CharByteWidth) {
+ void *Mem = Ctx.Allocate(totalSizeToAlloc<unsigned, SourceLocation, char>(
+ 1, NumConcatenated, Length * CharByteWidth),
+ alignof(StringLiteral));
+ return new (Mem)
+ StringLiteral(EmptyShell(), NumConcatenated, Length, CharByteWidth);
}
void StringLiteral::outputString(raw_ostream &OS) const {
OS << '"';
}
-void StringLiteral::setString(const ASTContext &C, StringRef Str,
- StringKind Kind, bool IsPascal) {
- //FIXME: we assume that the string data comes from a target that uses the same
- // code unit size and endianness for the type of string.
- this->Kind = Kind;
- this->IsPascal = IsPascal;
-
- CharByteWidth = mapCharByteWidth(C.getTargetInfo(),Kind);
- assert((Str.size()%CharByteWidth == 0)
- && "size of data must be multiple of CharByteWidth");
- Length = Str.size()/CharByteWidth;
-
- switch(CharByteWidth) {
- case 1: {
- char *AStrData = new (C) char[Length];
- std::memcpy(AStrData,Str.data(),Length*sizeof(*AStrData));
- StrData.asChar = AStrData;
- break;
- }
- case 2: {
- uint16_t *AStrData = new (C) uint16_t[Length];
- std::memcpy(AStrData,Str.data(),Length*sizeof(*AStrData));
- StrData.asUInt16 = AStrData;
- break;
- }
- case 4: {
- uint32_t *AStrData = new (C) uint32_t[Length];
- std::memcpy(AStrData,Str.data(),Length*sizeof(*AStrData));
- StrData.asUInt32 = AStrData;
- break;
- }
- default:
- llvm_unreachable("unsupported CharByteWidth");
- }
-}
-
/// getLocationOfByte - Return a source location that points to the specified
/// byte of this string literal.
///
void ASTStmtReader::VisitStringLiteral(StringLiteral *E) {
VisitExpr(E);
- unsigned Len = Record.readInt();
- assert(Record.peekInt() == E->getNumConcatenated() &&
- "Wrong number of concatenated tokens!");
- Record.skipInts(1);
- auto kind = static_cast<StringLiteral::StringKind>(Record.readInt());
- bool isPascal = Record.readInt();
- // Read string data
- auto B = &Record.peekInt();
- SmallString<16> Str(B, B + Len);
- E->setString(Record.getContext(), Str, kind, isPascal);
- Record.skipInts(Len);
-
- // Read source locations
- for (unsigned I = 0, N = E->getNumConcatenated(); I != N; ++I)
+ // NumConcatenated, Length and CharByteWidth are set by the empty
+ // ctor since they are needed to allocate storage for the trailing objects.
+ unsigned NumConcatenated = Record.readInt();
+ unsigned Length = Record.readInt();
+ unsigned CharByteWidth = Record.readInt();
+ assert((NumConcatenated == E->getNumConcatenated()) &&
+ "Wrong number of concatenated tokens!");
+ assert((Length == E->getLength()) && "Wrong Length!");
+ assert((CharByteWidth == E->getCharByteWidth()) && "Wrong character width!");
+ E->StringLiteralBits.Kind = Record.readInt();
+ E->StringLiteralBits.IsPascal = Record.readInt();
+
+ // The character width is originally computed via mapCharByteWidth.
+ // Check that the deserialized character width is consistant with the result
+ // of calling mapCharByteWidth.
+ assert((CharByteWidth ==
+ StringLiteral::mapCharByteWidth(Record.getContext().getTargetInfo(),
+ E->getKind())) &&
+ "Wrong character width!");
+
+ // Deserialize the trailing array of SourceLocation.
+ for (unsigned I = 0; I < NumConcatenated; ++I)
E->setStrTokenLoc(I, ReadSourceLocation());
+
+ // Deserialize the trailing array of char holding the string data.
+ char *StrData = E->getStrDataAsChar();
+ for (unsigned I = 0; I < Length * CharByteWidth; ++I)
+ StrData[I] = Record.readInt();
}
void ASTStmtReader::VisitCharacterLiteral(CharacterLiteral *E) {
break;
case EXPR_STRING_LITERAL:
- S = StringLiteral::CreateEmpty(Context,
- Record[ASTStmtReader::NumExprFields + 1]);
+ S = StringLiteral::CreateEmpty(
+ Context,
+ /* NumConcatenated=*/Record[ASTStmtReader::NumExprFields + 0],
+ /* Length=*/Record[ASTStmtReader::NumExprFields + 1],
+ /* CharByteWidth=*/Record[ASTStmtReader::NumExprFields + 2]);
break;
case EXPR_CHARACTER_LITERAL:
void ASTStmtWriter::VisitStringLiteral(StringLiteral *E) {
VisitExpr(E);
- Record.push_back(E->getByteLength());
+
+ // Store the various bits of data of StringLiteral.
Record.push_back(E->getNumConcatenated());
+ Record.push_back(E->getLength());
+ Record.push_back(E->getCharByteWidth());
Record.push_back(E->getKind());
Record.push_back(E->isPascal());
- // FIXME: String data should be stored as a blob at the end of the
- // StringLiteral. However, we can't do so now because we have no
- // provision for coping with abbreviations when we're jumping around
- // the AST file during deserialization.
- Record.append(E->getBytes().begin(), E->getBytes().end());
+
+ // Store the trailing array of SourceLocation.
for (unsigned I = 0, N = E->getNumConcatenated(); I != N; ++I)
Record.AddSourceLocation(E->getStrTokenLoc(I));
+
+ // Store the trailing array of char holding the string data.
+ StringRef StrData = E->getBytes();
+ for (unsigned I = 0, N = E->getByteLength(); I != N; ++I)
+ Record.push_back(StrData[I]);
+
Code = serialization::EXPR_STRING_LITERAL;
}