#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/VTableBuilder.h"
#include "clang/Basic/ABI.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/MathExtras.h"
+
+#include <algorithm>
using namespace clang;
MicrosoftMangleContextImpl(ASTContext &Context, DiagnosticsEngine &Diags)
: MicrosoftMangleContext(Context, Diags) {}
bool shouldMangleCXXName(const NamedDecl *D) override;
+ bool shouldMangleStringLiteral(const StringLiteral *SL) override;
void mangleCXXName(const NamedDecl *D, raw_ostream &Out) override;
void mangleVirtualMemPtrThunk(const CXXMethodDecl *MD, raw_ostream &) override;
void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk,
void mangleDynamicInitializer(const VarDecl *D, raw_ostream &Out) override;
void mangleDynamicAtExitDestructor(const VarDecl *D,
raw_ostream &Out) override;
+ void mangleStringLiteral(const StringLiteral *SL, raw_ostream &Out) override;
bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) {
// Lambda closure types are already numbered.
if (isLambda(ND))
return true;
}
+bool
+MicrosoftMangleContextImpl::shouldMangleStringLiteral(const StringLiteral *SL) {
+ return SL->isAscii() || SL->isWide();
+ // TODO: This needs to be updated when MSVC gains support for Unicode
+ // literals.
+}
+
void MicrosoftCXXNameMangler::mangle(const NamedDecl *D,
StringRef Prefix) {
// MSVC doesn't mangle C++ names the same way it mangles extern "C" names.
mangleInitFiniStub(D, Out, 'F');
}
+void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL,
+ raw_ostream &Out) {
+ // <char-type> ::= 0 # char
+ // ::= 1 # wchar_t
+ // ::= ??? # char16_t/char32_t will need a mangling too...
+ //
+ // <literal-length> ::= <non-negative integer> # the length of the literal
+ //
+ // <encoded-crc> ::= <hex digit>+ @ # crc of the literal including
+ // # null-terminator
+ //
+ // <encoded-string> ::= <simple character> # uninteresting character
+ // ::= '?$' <hex digit> <hex digit> # these two nibbles
+ // # encode the byte for the
+ // # character
+ // ::= '?' [a-z] # \xe1 - \xfa
+ // ::= '?' [A-Z] # \xc1 - \xda
+ // ::= '?' [0-9] # [,/\:. \n\t'-]
+ //
+ // <literal> ::= '??_C@_' <char-type> <literal-length> <encoded-crc>
+ // <encoded-string> '@'
+ MicrosoftCXXNameMangler Mangler(*this, Out);
+ Mangler.getStream() << "\01??_C@_";
+
+ // <char-type>: The "kind" of string literal is encoded into the mangled name.
+ // TODO: This needs to be updated when MSVC gains support for unicode
+ // literals.
+ if (SL->isAscii())
+ Mangler.getStream() << '0';
+ else if (SL->isWide())
+ Mangler.getStream() << '1';
+ else
+ llvm_unreachable("unexpected string literal kind!");
+
+ // <literal-length>: The next part of the mangled name consists of the length
+ // of the string.
+ // The StringLiteral does not consider the NUL terminator byte(s) but the
+ // mangling does.
+ // N.B. The length is in terms of bytes, not characters.
+ Mangler.mangleNumber(SL->getByteLength() + SL->getCharByteWidth());
+
+ // We will use the "Rocksoft^tm Model CRC Algorithm" to describe the
+ // properties of our CRC:
+ // Width : 32
+ // Poly : 04C11DB7
+ // Init : FFFFFFFF
+ // RefIn : True
+ // RefOut : True
+ // XorOut : 00000000
+ // Check : 340BC6D9
+ uint32_t CRC = 0xFFFFFFFFU;
+
+ auto UpdateCRC = [&CRC](char Byte) {
+ for (unsigned i = 0; i < 8; ++i) {
+ bool Bit = CRC & 0x80000000U;
+ if (Byte & (1U << i))
+ Bit = !Bit;
+ CRC <<= 1;
+ if (Bit)
+ CRC ^= 0x04C11DB7U;
+ }
+ };
+
+ // CRC all the bytes of the StringLiteral.
+ for (char Byte : SL->getBytes())
+ UpdateCRC(Byte);
+
+ // The NUL terminator byte(s) were not present earlier,
+ // we need to manually process those bytes into the CRC.
+ for (unsigned NullTerminator = 0; NullTerminator < SL->getCharByteWidth();
+ ++NullTerminator)
+ UpdateCRC('\x00');
+
+ // The literature refers to the process of reversing the bits in the final CRC
+ // output as "reflection".
+ CRC = llvm::reverseBits(CRC);
+
+ // <encoded-crc>: The CRC is encoded utilizing the standard number mangling
+ // scheme.
+ Mangler.mangleNumber(CRC);
+
+ // <encoded-crc>: The mangled name also contains the first 32 _characters_
+ // (including null-terminator bytes) of the StringLiteral.
+ // Each character is encoded by splitting them into bytes and then encoding
+ // the constituent bytes.
+ auto MangleCharacter = [&Mangler](char Byte) {
+ // There are five different manglings for characters:
+ // - ?[0-9]: The set of [,/\:. \n\t'-].
+ // - [a-zA-Z0-9_$]: A one-to-one mapping.
+ // - ?[a-z]: The range from \xe1 to \xfa.
+ // - ?[A-Z]: The range from \xc1 to \xda.
+ // - ?$XX: A fallback which maps nibbles.
+ static const char SpecialMap[] = {',', '/', '\\', ':', '.',
+ ' ', '\n', '\t', '\'', '-'};
+ const char *SpecialMapI =
+ std::find(std::begin(SpecialMap), std::end(SpecialMap), Byte);
+ if (SpecialMapI != std::end(SpecialMap)) {
+ Mangler.getStream() << '?' << SpecialMapI - SpecialMap;
+ } else if ((Byte >= 'a' && Byte <= 'z') || (Byte >= 'A' && Byte <= 'Z') ||
+ (Byte >= '0' && Byte <= '9') || Byte == '_' || Byte == '$') {
+ Mangler.getStream() << Byte;
+ } else if (Byte >= '\xe1' && Byte <= '\xfa') {
+ Mangler.getStream() << '?' << (char)('a' + Byte - '\xe1');
+ } else if (Byte >= '\xc1' && Byte <= '\xda') {
+ Mangler.getStream() << '?' << (char)('A' + Byte - '\xc1');
+ } else {
+ Mangler.getStream() << "?$";
+ Mangler.getStream() << (char)('A' + ((Byte >> 4) & 0xf));
+ Mangler.getStream() << (char)('A' + (Byte & 0xf));
+ }
+ };
+
+ // Enforce our 32 character max.
+ unsigned MaxBytes = 32 * SL->getCharByteWidth();
+ StringRef Bytes = SL->getBytes().substr(0, MaxBytes);
+ size_t BytesLength = Bytes.size();
+
+ if (SL->isAscii()) {
+ // A character maps directly to a byte for ASCII StringLiterals.
+ for (char Byte : Bytes)
+ MangleCharacter(Byte);
+ } else if (SL->isWide()) {
+ // The ordering of bytes in a wide StringLiteral is like so:
+ // A B C D ...
+ // However, they are mangled in the following order:
+ // B A D C ...
+ for (size_t i = 0; i != BytesLength;) {
+ char FirstByte = Bytes[i];
+ ++i;
+ if (i != BytesLength) {
+ char SecondByte = Bytes[i];
+ ++i;
+ MangleCharacter(SecondByte);
+ }
+ MangleCharacter(FirstByte);
+ }
+ } else {
+ llvm_unreachable("unexpected string literal kind!");
+ // TODO: This needs to be updated when MSVC gains support for Unicode
+ // literals.
+ }
+
+ // We should also encode the NUL terminator(s) if we encoded less than 32
+ // characters.
+ if (BytesLength < MaxBytes) {
+ size_t PaddingBytes = SL->getCharByteWidth();
+ size_t BytesLeft = MaxBytes - BytesLength;
+ if (BytesLeft < PaddingBytes)
+ PaddingBytes = BytesLeft;
+ for (unsigned i = 0; i < PaddingBytes; ++i)
+ MangleCharacter('\x00');
+ }
+
+ Mangler.getStream() << '@';
+}
+
MicrosoftMangleContext *
MicrosoftMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) {
return new MicrosoftMangleContextImpl(Context, Diags);
llvm::Constant *
CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) {
CharUnits Align = getContext().getAlignOfGlobalVarInChars(S->getType());
- if (S->isAscii() || S->isUTF8()) {
- SmallString<64> Str(S->getString());
-
- // Resize the string to the right size, which is indicated by its type.
- const ConstantArrayType *CAT = Context.getAsConstantArrayType(S->getType());
- Str.resize(CAT->getSize().getZExtValue());
- return GetAddrOfConstantString(Str, /*GlobalName*/ 0, Align.getQuantity());
+
+ llvm::StringMapEntry<llvm::GlobalVariable *> *Entry = nullptr;
+ llvm::GlobalVariable *GV = nullptr;
+ if (!LangOpts.WritableStrings) {
+ llvm::StringMap<llvm::GlobalVariable *> *ConstantStringMap = nullptr;
+ switch (S->getCharByteWidth()) {
+ case 1:
+ ConstantStringMap = &Constant1ByteStringMap;
+ break;
+ case 2:
+ ConstantStringMap = &Constant2ByteStringMap;
+ break;
+ case 4:
+ ConstantStringMap = &Constant4ByteStringMap;
+ break;
+ default:
+ llvm_unreachable("unhandled byte width!");
+ }
+ Entry = &ConstantStringMap->GetOrCreateValue(S->getBytes());
+ GV = Entry->getValue();
+ }
+
+ if (!GV) {
+ StringRef GlobalVariableName;
+ llvm::GlobalValue::LinkageTypes LT;
+ if (!LangOpts.WritableStrings &&
+ getCXXABI().getMangleContext().shouldMangleStringLiteral(S)) {
+ LT = llvm::GlobalValue::LinkOnceODRLinkage;
+
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ getCXXABI().getMangleContext().mangleStringLiteral(S, Out);
+ Out.flush();
+
+ size_t Length = Buffer.size();
+ char *Name = MangledNamesAllocator.Allocate<char>(Length);
+ std::copy(Buffer.begin(), Buffer.end(), Name);
+ GlobalVariableName = StringRef(Name, Length);
+ } else {
+ LT = llvm::GlobalValue::PrivateLinkage;;
+ GlobalVariableName = ".str";
+ }
+
+ // OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
+ unsigned AddrSpace = 0;
+ if (getLangOpts().OpenCL)
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_constant);
+
+ llvm::Constant *C = GetConstantArrayFromStringLiteral(S);
+ GV = new llvm::GlobalVariable(
+ getModule(), C->getType(), !LangOpts.WritableStrings, LT, C,
+ GlobalVariableName, /*InsertBefore=*/nullptr,
+ llvm::GlobalVariable::NotThreadLocal, AddrSpace);
+ GV->setUnnamedAddr(true);
+ if (Entry)
+ Entry->setValue(GV);
}
- // FIXME: the following does not memoize wide strings.
- llvm::Constant *C = GetConstantArrayFromStringLiteral(S);
- llvm::GlobalVariable *GV =
- new llvm::GlobalVariable(getModule(),C->getType(),
- !LangOpts.WritableStrings,
- llvm::GlobalValue::PrivateLinkage,
- C,".str");
+ if (Align.getQuantity() > GV->getAlignment())
+ GV->setAlignment(Align.getQuantity());
- GV->setAlignment(Align.getQuantity());
- GV->setUnnamedAddr(true);
return GV;
}
llvm::Constant *C =
llvm::ConstantDataArray::getString(CGM.getLLVMContext(), str, false);
- // OpenCL v1.1 s6.5.3: a string literal is in the constant address space.
+ // OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
unsigned AddrSpace = 0;
if (CGM.getLangOpts().OpenCL)
AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
return GenerateStringLiteral(Str, false, *this, GlobalName, Alignment);
llvm::StringMapEntry<llvm::GlobalVariable *> &Entry =
- ConstantStringMap.GetOrCreateValue(Str);
+ Constant1ByteStringMap.GetOrCreateValue(Str);
if (llvm::GlobalVariable *GV = Entry.getValue()) {
if (Alignment > GV->getAlignment()) {