From: Reid Kleckner Date: Thu, 4 Sep 2014 20:04:38 +0000 (+0000) Subject: MS inline asm: Allow __asm blocks to set a return value X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3be1b0824b89cba958cd5e07739ac8b64e137127;p=clang MS inline asm: Allow __asm blocks to set a return value If control falls off the end of a function after an __asm block, MSVC assumes that the inline assembly filled the EAX and possibly EDX registers with an appropriate return value. This functionality is used in inline functions returning 64-bit integers in system headers, so we need some amount of compatibility. This is implemented in Clang by adding extra output constraints to every inline asm block, and storing the resulting output registers into the return value slot. If we see an asm block somewhere in the function body, we emit a normal epilogue instead of marking the end of the function with a return type unreachable. Normal returns in functions not using this functionality will overwrite the return value slot, and in most cases LLVM should be able to eliminate the dead stores. Fixes PR17201. Reviewed By: majnemer Differential Revision: http://reviews.llvm.org/D5177 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@217187 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 02ade5c858..1e5d181262 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -1901,7 +1901,19 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { } } - unsigned NumConstraints = S.getNumOutputs() + S.getNumInputs(); + // If this is a Microsoft-style asm blob, store the return registers (EAX:EDX) + // to the return value slot. Only do this when returning in registers. + if (isa(&S)) { + const ABIArgInfo &RetAI = CurFnInfo->getReturnInfo(); + if (RetAI.isDirect() || RetAI.isExtend()) { + // Make a fake lvalue for the return value slot. + LValue ReturnSlot = MakeAddrLValue(ReturnValue, FnRetTy); + CGM.getTargetCodeGenInfo().addReturnRegisterOutputs( + *this, ReturnSlot, Constraints, ResultRegTypes, ResultTruncRegTypes, + ResultRegDests, AsmString, S.getNumOutputs()); + SawAsmBlock = true; + } + } for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) { const Expr *InputExpr = S.getInputExpr(i); @@ -1974,9 +1986,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { StringRef Clobber = S.getClobber(i); if (Clobber != "memory" && Clobber != "cc") - Clobber = getTarget().getNormalizedGCCRegisterName(Clobber); + Clobber = getTarget().getNormalizedGCCRegisterName(Clobber); - if (i != 0 || NumConstraints != 0) + if (!Constraints.empty()) Constraints += ','; Constraints += "~{"; @@ -2035,6 +2047,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { } } + assert(RegResults.size() == ResultRegTypes.size()); + assert(RegResults.size() == ResultTruncRegTypes.size()); + assert(RegResults.size() == ResultRegDests.size()); for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { llvm::Value *Tmp = RegResults[i]; diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index f077a0b4b6..f49e9ec126 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -39,7 +39,7 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) CGBuilderInserterTy(this)), CapturedStmtInfo(nullptr), SanOpts(&CGM.getLangOpts().Sanitize), IsSanitizerScope(false), CurFuncIsThunk(false), AutoreleaseResult(false), - BlockInfo(nullptr), BlockPointer(nullptr), + SawAsmBlock(false), BlockInfo(nullptr), BlockPointer(nullptr), LambdaThisCaptureField(nullptr), NormalCleanupDest(nullptr), NextCleanupDestIndex(1), FirstBlockInfo(nullptr), EHResumeBlock(nullptr), ExceptionSlot(nullptr), EHSelectorSlot(nullptr), @@ -878,7 +878,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // C11 6.9.1p12: // If the '}' that terminates a function is reached, and the value of the // function call is used by the caller, the behavior is undefined. - if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() && + if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() && !SawAsmBlock && !FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) { if (SanOpts->Return) { SanitizerScope SanScope(this); diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 765a757090..c03b9ab26b 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -265,6 +265,10 @@ public: /// In ARC, whether we should autorelease the return value. bool AutoreleaseResult; + /// Whether we processed a Microsoft-style asm block during CodeGen. These can + /// potentially set the return value. + bool SawAsmBlock; + const CodeGen::CGBlockInfo *BlockInfo; llvm::Value *BlockPointer; diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 0da0e0843b..9bf1dbbd0d 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -15,6 +15,7 @@ #include "TargetInfo.h" #include "ABIInfo.h" #include "CGCXXABI.h" +#include "CGValue.h" #include "CodeGenFunction.h" #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" @@ -593,6 +594,14 @@ public: return X86AdjustInlineAsmType(CGF, Constraint, Ty); } + void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue, + std::string &Constraints, + std::vector &ResultRegTypes, + std::vector &ResultTruncRegTypes, + std::vector &ResultRegDests, + std::string &AsmString, + unsigned NumOutputs) const override; + llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { unsigned Sig = (0xeb << 0) | // jmp rel8 @@ -606,6 +615,85 @@ public: } +/// Rewrite input constraint references after adding some output constraints. +/// In the case where there is one output and one input and we add one output, +/// we need to replace all operand references greater than or equal to 1: +/// mov $0, $1 +/// mov eax, $1 +/// The result will be: +/// mov $0, $2 +/// mov eax, $2 +static void rewriteInputConstraintReferences(unsigned FirstIn, + unsigned NumNewOuts, + std::string &AsmString) { + std::string Buf; + llvm::raw_string_ostream OS(Buf); + size_t Pos = 0; + while (Pos < AsmString.size()) { + size_t DollarStart = AsmString.find('$', Pos); + if (DollarStart == std::string::npos) + DollarStart = AsmString.size(); + size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart); + if (DollarEnd == std::string::npos) + DollarEnd = AsmString.size(); + OS << StringRef(&AsmString[Pos], DollarEnd - Pos); + Pos = DollarEnd; + size_t NumDollars = DollarEnd - DollarStart; + if (NumDollars % 2 != 0 && Pos < AsmString.size()) { + // We have an operand reference. + size_t DigitStart = Pos; + size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart); + if (DigitEnd == std::string::npos) + DigitEnd = AsmString.size(); + StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart); + unsigned OperandIndex; + if (!OperandStr.getAsInteger(10, OperandIndex)) { + if (OperandIndex >= FirstIn) + OperandIndex += NumNewOuts; + OS << OperandIndex; + } else { + OS << OperandStr; + } + Pos = DigitEnd; + } + } + AsmString = std::move(OS.str()); +} + +/// Add output constraints for EAX:EDX because they are return registers. +void X86_32TargetCodeGenInfo::addReturnRegisterOutputs( + CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints, + std::vector &ResultRegTypes, + std::vector &ResultTruncRegTypes, + std::vector &ResultRegDests, std::string &AsmString, + unsigned NumOutputs) const { + uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType()); + + // Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is + // larger. + if (!Constraints.empty()) + Constraints += ','; + if (RetWidth <= 32) { + Constraints += "={eax}"; + ResultRegTypes.push_back(CGF.Int32Ty); + } else { + // Use the 'A' constraint for EAX:EDX. + Constraints += "=A"; + ResultRegTypes.push_back(CGF.Int64Ty); + } + + // Truncate EAX or EAX:EDX to an integer of the appropriate size. + llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth); + ResultTruncRegTypes.push_back(CoerceTy); + + // Coerce the integer by bitcasting the return slot pointer. + ReturnSlot.setAddress(CGF.Builder.CreateBitCast(ReturnSlot.getAddress(), + CoerceTy->getPointerTo())); + ResultRegDests.push_back(ReturnSlot); + + rewriteInputConstraintReferences(NumOutputs, 1, AsmString); +} + /// shouldReturnTypeInRegister - Determine if the given type should be /// passed in a register (for the Darwin ABI). bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h index 5b18f6b1e1..f14965009d 100644 --- a/lib/CodeGen/TargetInfo.h +++ b/lib/CodeGen/TargetInfo.h @@ -15,6 +15,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H #define LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H +#include "CGValue.h" #include "clang/AST/Type.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/SmallString.h" @@ -129,6 +130,14 @@ public: return Ty; } + /// Adds constraints and types for result registers. + virtual void addReturnRegisterOutputs( + CodeGen::CodeGenFunction &CGF, CodeGen::LValue ReturnValue, + std::string &Constraints, std::vector &ResultRegTypes, + std::vector &ResultTruncRegTypes, + std::vector &ResultRegDests, std::string &AsmString, + unsigned NumOutputs) const {} + /// doesReturnSlotInterfereWithArgs - Return true if the target uses an /// argument slot for an 'sret' type. virtual bool doesReturnSlotInterfereWithArgs() const { return true; } diff --git a/test/CodeGen/ms-inline-asm.c b/test/CodeGen/ms-inline-asm.c index eabbd6d320..0a7ac11043 100644 --- a/test/CodeGen/ms-inline-asm.c +++ b/test/CodeGen/ms-inline-asm.c @@ -66,7 +66,7 @@ int t8() { __asm int 4 return 10; // CHECK: t8 -// CHECK: call void asm sideeffect inteldialect "int $$4\0A\09int $$4", "~{dirflag},~{fpsr},~{flags}"() +// CHECK: call i32 asm sideeffect inteldialect "int $$4\0A\09int $$4", "={eax},~{dirflag},~{fpsr},~{flags}"() // CHECK: ret i32 10 } @@ -88,10 +88,11 @@ unsigned t10(void) { } return j; // CHECK: t10 +// CHECK: [[r:%[a-zA-Z0-9]+]] = alloca i32, align 4 // CHECK: [[I:%[a-zA-Z0-9]+]] = alloca i32, align 4 // CHECK: [[J:%[a-zA-Z0-9]+]] = alloca i32, align 4 // CHECK: store i32 1, i32* [[I]], align 4 -// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $1\0A\09mov dword ptr $0, eax", "=*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}}) +// CHECK: call i32 asm sideeffect inteldialect "mov eax, dword ptr $2\0A\09mov dword ptr $0, eax", "=*m,={eax},*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}}) // CHECK: [[RET:%[a-zA-Z0-9]+]] = load i32* [[J]], align 4 // CHECK: ret i32 [[RET]] } @@ -112,7 +113,7 @@ unsigned t12(void) { } return j + m; // CHECK: t12 -// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $2\0A\09mov dword ptr $0, eax\0A\09mov eax, dword ptr $3\0A\09mov dword ptr $1, eax", "=*m,=*m,*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}) +// CHECK: call i32 asm sideeffect inteldialect "mov eax, dword ptr $3\0A\09mov dword ptr $0, eax\0A\09mov eax, dword ptr $4\0A\09mov dword ptr $1, eax", "=*m,=*m,={eax},*m,*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}) } void t13() { @@ -319,11 +320,11 @@ int *t30() { int *res; __asm lea edi, results -// CHECK: lea edi, dword ptr $1 +// CHECK: lea edi, dword ptr $2 __asm mov res, edi // CHECK: mov dword ptr $0, edi return res; -// CHECK: "=*m,*m,~{edi},~{dirflag},~{fpsr},~{flags}"(i32** %{{.*}}, [2 x i32]* @{{.*}}) +// CHECK: "=*m,={eax},*m,~{edi},~{dirflag},~{fpsr},~{flags}"(i32** %{{.*}}, [2 x i32]* @{{.*}}) } void t31() { diff --git a/test/CodeGenCXX/ms-inline-asm-return.cpp b/test/CodeGenCXX/ms-inline-asm-return.cpp new file mode 100644 index 0000000000..1674b40245 --- /dev/null +++ b/test/CodeGenCXX/ms-inline-asm-return.cpp @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 %s -emit-llvm -o - -fasm-blocks | FileCheck %s + +// Check that we take EAX or EAX:EDX and return it from these functions for MSVC +// compatibility. + +extern "C" { + +long long f_i64() { + __asm { + mov eax, 1 + mov edx, 1 + } +} +// CHECK-LABEL: define i64 @f_i64() +// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=A,~{eax},{{.*}}" +// CHECK: ret i64 %[[r]] + +int f_i32() { + __asm { + mov eax, 1 + mov edx, 1 + } +} +// CHECK-LABEL: define i32 @f_i32() +// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}" +// CHECK: ret i32 %[[r]] + +short f_i16() { + __asm { + mov eax, 1 + mov edx, 1 + } +} +// CHECK-LABEL: define signext i16 @f_i16() +// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}" +// CHECK: %[[r_i16:[^ ]*]] = trunc i32 %[[r]] to i16 +// CHECK: ret i16 %[[r_i16]] + +char f_i8() { + __asm { + mov eax, 1 + mov edx, 1 + } +} +// CHECK-LABEL: define signext i8 @f_i8() +// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}" +// CHECK: %[[r_i8:[^ ]*]] = trunc i32 %[[r]] to i8 +// CHECK: ret i8 %[[r_i8]] + +bool f_i1() { + __asm { + mov eax, 1 + mov edx, 1 + } +} +// CHECK-LABEL: define zeroext i1 @f_i1() +// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}" +// CHECK: %[[r_i8:[^ ]*]] = trunc i32 %[[r]] to i8 +// CHECK: store i8 %[[r_i8]], i8* %{{.*}} +// CHECK: %[[r_i1:[^ ]*]] = load i1* %{{.*}} +// CHECK: ret i1 %[[r_i1]] + +struct FourChars { + char a, b, c, d; +}; +FourChars f_s4() { + __asm { + mov eax, 0x01010101 + } +} +// CHECK-LABEL: define i32 @f_s4() +// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$0x01010101", "={eax},~{eax},{{.*}}" +// CHECK: store i32 %[[r]], i32* %{{.*}} +// CHECK: %[[r_i32:[^ ]*]] = load i32* %{{.*}} +// CHECK: ret i32 %[[r_i32]] + +struct EightChars { + char a, b, c, d, e, f, g, h; +}; +EightChars f_s8() { + __asm { + mov eax, 0x01010101 + mov edx, 0x01010101 + } +} +// CHECK-LABEL: define i64 @f_s8() +// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$0x01010101\0A\09mov edx, $$0x01010101", "=A,~{eax},{{.*}}" +// CHECK: store i64 %[[r]], i64* %{{.*}} +// CHECK: %[[r_i64:[^ ]*]] = load i64* %{{.*}} +// CHECK: ret i64 %[[r_i64]] + +} // extern "C" + +int main() { + __asm xor eax, eax +} +// CHECK-LABEL: define i32 @main() +// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "xor eax, eax", "={eax},{{.*}}" +// CHECK: ret i32 %[[r]]