From: Michael Zolotukhin Date: Tue, 8 Sep 2015 23:52:33 +0000 (+0000) Subject: Introduce __builtin_nontemporal_store and __builtin_nontemporal_load. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2db8497948ecd5fe0552cef154e55a72a8eb083c;p=clang Introduce __builtin_nontemporal_store and __builtin_nontemporal_load. Summary: Currently clang provides no general way to generate nontemporal loads/stores. There are some architecture specific builtins for doing so (e.g. in x86), but there is no way to generate non-temporal store on, e.g. AArch64. This patch adds generic builtins which are expanded to a simple store with '!nontemporal' attribute in IR. Differential Revision: http://reviews.llvm.org/D12313 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@247104 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/Builtins.def b/include/clang/Basic/Builtins.def index 853c2c51b5..ef9e902f1d 100644 --- a/include/clang/Basic/Builtins.def +++ b/include/clang/Basic/Builtins.def @@ -1245,6 +1245,10 @@ BUILTIN(__builtin_operator_delete, "vv*", "n") BUILTIN(__builtin___get_unsafe_stack_start, "v*", "Fn") BUILTIN(__builtin___get_unsafe_stack_ptr, "v*", "Fn") +// Nontemporal loads/stores builtins +BUILTIN(__builtin_nontemporal_store, "v.", "t") +BUILTIN(__builtin_nontemporal_load, "v.", "t") + #undef BUILTIN #undef LIBBUILTIN #undef LANGBUILTIN diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 6ac5748925..2c3b22a21b 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -6200,6 +6200,12 @@ def err_atomic_load_store_uses_lib : Error< "atomic %select{load|store}0 requires runtime support that is not " "available for this target">; +def err_nontemporal_builtin_must_be_pointer : Error< + "address argument to nontemporal builtin must be a pointer (%0 invalid)">; +def err_nontemporal_builtin_must_be_pointer_intfltptr_or_vector : Error< + "address argument to nontemporal builtin must be a pointer to integer, float, " + "pointer, or a vector of such types (%0 invalid)">; + def err_deleted_function_use : Error<"attempt to use a deleted function">; def err_kern_type_not_void_return : Error< diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index bc4348253f..9e9e2bbacb 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -8851,6 +8851,7 @@ private: bool SemaBuiltinLongjmp(CallExpr *TheCall); bool SemaBuiltinSetjmp(CallExpr *TheCall); ExprResult SemaBuiltinAtomicOverloaded(ExprResult TheCallResult); + ExprResult SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult); ExprResult SemaAtomicOpsOverloaded(ExprResult TheCallResult, AtomicExpr::AtomicOp Op); bool SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum, diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index c35f25ad10..729c0a18b0 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -111,6 +111,28 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, return EmitFromInt(CGF, Result, T, ValueType); } +static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { + Value *Val = CGF.EmitScalarExpr(E->getArg(0)); + Value *Address = CGF.EmitScalarExpr(E->getArg(1)); + + // Convert the type of the pointer to a pointer to the stored type. + Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); + Value *BC = CGF.Builder.CreateBitCast( + Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); + LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); + LV.setNontemporal(true); + CGF.EmitStoreOfScalar(Val, LV, false); + return nullptr; +} + +static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { + Value *Address = CGF.EmitScalarExpr(E->getArg(0)); + + LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); + LV.setNontemporal(true); + return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); +} + static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E) { @@ -1143,6 +1165,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(nullptr); } + case Builtin::BI__builtin_nontemporal_load: + return RValue::get(EmitNontemporalLoad(*this, E)); + case Builtin::BI__builtin_nontemporal_store: + return RValue::get(EmitNontemporalStore(*this, E)); case Builtin::BI__c11_atomic_is_lock_free: case Builtin::BI__atomic_is_lock_free: { // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 6635e570c6..b7d1ef19b1 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -1160,7 +1160,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue, return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), lvalue.getType(), Loc, lvalue.getAlignmentSource(), lvalue.getTBAAInfo(), - lvalue.getTBAABaseType(), lvalue.getTBAAOffset()); + lvalue.getTBAABaseType(), lvalue.getTBAAOffset(), + lvalue.isNontemporal()); } static bool hasBooleanRepresentation(QualType Ty) { @@ -1226,7 +1227,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, AlignmentSource AlignSource, llvm::MDNode *TBAAInfo, QualType TBAABaseType, - uint64_t TBAAOffset) { + uint64_t TBAAOffset, + bool isNontemporal) { // For better performance, handle vector loads differently. if (Ty->isVectorType()) { const llvm::Type *EltTy = Addr.getElementType(); @@ -1258,6 +1260,11 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, } llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile); + if (isNontemporal) { + llvm::MDNode *Node = llvm::MDNode::get( + Load->getContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); + Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); + } if (TBAAInfo) { llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); @@ -1330,7 +1337,8 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, AlignmentSource AlignSource, llvm::MDNode *TBAAInfo, bool isInit, QualType TBAABaseType, - uint64_t TBAAOffset) { + uint64_t TBAAOffset, + bool isNontemporal) { // Handle vectors differently to get better performance. if (Ty->isVectorType()) { @@ -1365,6 +1373,12 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, } llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile); + if (isNontemporal) { + llvm::MDNode *Node = + llvm::MDNode::get(Store->getContext(), + llvm::ConstantAsMetadata::get(Builder.getInt32(1))); + Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); + } if (TBAAInfo) { llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); @@ -1378,7 +1392,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue, EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), lvalue.getType(), lvalue.getAlignmentSource(), lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(), - lvalue.getTBAAOffset()); + lvalue.getTBAAOffset(), lvalue.isNontemporal()); } /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index 195571ba07..3ccc4cda89 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -202,6 +202,10 @@ class LValue { unsigned AlignSource : 2; + // This flag shows if a nontemporal load/stores should be used when accessing + // this lvalue. + bool Nontemporal : 1; + Expr *BaseIvarExp; /// Used by struct-path-aware TBAA. @@ -228,6 +232,7 @@ private: // Initialize Objective-C flags. this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false; this->ImpreciseLifetime = false; + this->Nontemporal = false; this->ThreadLocalRef = false; this->BaseIvarExp = nullptr; @@ -277,6 +282,8 @@ public: void setARCPreciseLifetime(ARCPreciseLifetime_t value) { ImpreciseLifetime = (value == ARCImpreciseLifetime); } + bool isNontemporal() const { return Nontemporal; } + void setNontemporal(bool Value) { Nontemporal = Value; } bool isObjCWeak() const { return Quals.getObjCGCAttr() == Qualifiers::Weak; diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 86154e923e..3925932983 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2449,7 +2449,8 @@ public: AlignmentSource::Type, llvm::MDNode *TBAAInfo = nullptr, QualType TBAABaseTy = QualType(), - uint64_t TBAAOffset = 0); + uint64_t TBAAOffset = 0, + bool isNontemporal = false); /// EmitLoadOfScalar - Load a scalar value from an address, taking /// care to appropriately convert from the memory representation to @@ -2465,7 +2466,7 @@ public: AlignmentSource AlignSource = AlignmentSource::Type, llvm::MDNode *TBAAInfo = nullptr, bool isInit = false, QualType TBAABaseTy = QualType(), - uint64_t TBAAOffset = 0); + uint64_t TBAAOffset = 0, bool isNontemporal = false); /// EmitStoreOfScalar - Store a scalar value to an address, taking /// care to appropriately convert from the memory representation to diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 92c94560d5..c6a00b1dad 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -441,6 +441,9 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__sync_swap_8: case Builtin::BI__sync_swap_16: return SemaBuiltinAtomicOverloaded(TheCallResult); + case Builtin::BI__builtin_nontemporal_load: + case Builtin::BI__builtin_nontemporal_store: + return SemaBuiltinNontemporalOverloaded(TheCallResult); #define BUILTIN(ID, TYPE, ATTRS) #define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \ case Builtin::BI##ID: \ @@ -2210,6 +2213,78 @@ Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) { return TheCallResult; } +/// SemaBuiltinNontemporalOverloaded - We have a call to +/// __builtin_nontemporal_store or __builtin_nontemporal_load, which is an +/// overloaded function based on the pointer type of its last argument. +/// +/// This function goes through and does final semantic checking for these +/// builtins. +ExprResult Sema::SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult) { + CallExpr *TheCall = (CallExpr *)TheCallResult.get(); + DeclRefExpr *DRE = + cast(TheCall->getCallee()->IgnoreParenCasts()); + FunctionDecl *FDecl = cast(DRE->getDecl()); + unsigned BuiltinID = FDecl->getBuiltinID(); + assert((BuiltinID == Builtin::BI__builtin_nontemporal_store || + BuiltinID == Builtin::BI__builtin_nontemporal_load) && + "Unexpected nontemporal load/store builtin!"); + bool isStore = BuiltinID == Builtin::BI__builtin_nontemporal_store; + unsigned numArgs = isStore ? 2 : 1; + + // Ensure that we have the proper number of arguments. + if (checkArgCount(*this, TheCall, numArgs)) + return ExprError(); + + // Inspect the last argument of the nontemporal builtin. This should always + // be a pointer type, from which we imply the type of the memory access. + // Because it is a pointer type, we don't have to worry about any implicit + // casts here. + Expr *PointerArg = TheCall->getArg(numArgs - 1); + ExprResult PointerArgResult = + DefaultFunctionArrayLvalueConversion(PointerArg); + + if (PointerArgResult.isInvalid()) + return ExprError(); + PointerArg = PointerArgResult.get(); + TheCall->setArg(numArgs - 1, PointerArg); + + const PointerType *pointerType = PointerArg->getType()->getAs(); + if (!pointerType) { + Diag(DRE->getLocStart(), diag::err_nontemporal_builtin_must_be_pointer) + << PointerArg->getType() << PointerArg->getSourceRange(); + return ExprError(); + } + + QualType ValType = pointerType->getPointeeType(); + + // Strip any qualifiers off ValType. + ValType = ValType.getUnqualifiedType(); + if (!ValType->isIntegerType() && !ValType->isAnyPointerType() && + !ValType->isBlockPointerType() && !ValType->isFloatingType() && + !ValType->isVectorType()) { + Diag(DRE->getLocStart(), + diag::err_nontemporal_builtin_must_be_pointer_intfltptr_or_vector) + << PointerArg->getType() << PointerArg->getSourceRange(); + return ExprError(); + } + + if (!isStore) { + TheCall->setType(ValType); + return TheCallResult; + } + + ExprResult ValArg = TheCall->getArg(0); + InitializedEntity Entity = InitializedEntity::InitializeParameter( + Context, ValType, /*consume*/ false); + ValArg = PerformCopyInitialization(Entity, SourceLocation(), ValArg); + if (ValArg.isInvalid()) + return ExprError(); + + TheCall->setArg(0, ValArg.get()); + TheCall->setType(Context.VoidTy); + return TheCallResult; +} + /// CheckObjCString - Checks that the argument to the builtin /// CFString constructor is correct /// Note: It might also make sense to do the UTF-16 conversion here (would diff --git a/test/CodeGen/Nontemporal.cpp b/test/CodeGen/Nontemporal.cpp new file mode 100644 index 0000000000..4ddb9a1cbb --- /dev/null +++ b/test/CodeGen/Nontemporal.cpp @@ -0,0 +1,48 @@ +// Test frontend handling of nontemporal builtins. +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s + +signed char sc; +unsigned char uc; +signed short ss; +unsigned short us; +signed int si; +unsigned int ui; +signed long long sll; +unsigned long long ull; +float f1, f2; +double d1, d2; +float __attribute__((vector_size(16))) vf1, vf2; +char __attribute__((vector_size(8))) vc1, vc2; +bool b1, b2; + +void test_all_sizes(void) // CHECK-LABEL: test_all_sizes +{ + __builtin_nontemporal_store(true, &b1); // CHECK: store i8 1, i8* @b1, align 1, !nontemporal + __builtin_nontemporal_store(b1, &b2); // CHECK: store i8{{.*}}, align 1, !nontemporal + __builtin_nontemporal_store(1, &uc); // CHECK: store i8{{.*}}align 1, !nontemporal + __builtin_nontemporal_store(1, &sc); // CHECK: store i8{{.*}}align 1, !nontemporal + __builtin_nontemporal_store(1, &us); // CHECK: store i16{{.*}}align 2, !nontemporal + __builtin_nontemporal_store(1, &ss); // CHECK: store i16{{.*}}align 2, !nontemporal + __builtin_nontemporal_store(1, &ui); // CHECK: store i32{{.*}}align 4, !nontemporal + __builtin_nontemporal_store(1, &si); // CHECK: store i32{{.*}}align 4, !nontemporal + __builtin_nontemporal_store(1, &ull); // CHECK: store i64{{.*}}align 8, !nontemporal + __builtin_nontemporal_store(1, &sll); // CHECK: store i64{{.*}}align 8, !nontemporal + __builtin_nontemporal_store(1.0, &f1); // CHECK: store float{{.*}}align 4, !nontemporal + __builtin_nontemporal_store(1.0, &d1); // CHECK: store double{{.*}}align 8, !nontemporal + __builtin_nontemporal_store(vf1, &vf2); // CHECK: store <4 x float>{{.*}}align 16, !nontemporal + __builtin_nontemporal_store(vc1, &vc2); // CHECK: store <8 x i8>{{.*}}align 8, !nontemporal + + b1 = __builtin_nontemporal_load(&b2); // CHECK: load i8{{.*}}align 1, !nontemporal + uc = __builtin_nontemporal_load(&sc); // CHECK: load i8{{.*}}align 1, !nontemporal + sc = __builtin_nontemporal_load(&uc); // CHECK: load i8{{.*}}align 1, !nontemporal + us = __builtin_nontemporal_load(&ss); // CHECK: load i16{{.*}}align 2, !nontemporal + ss = __builtin_nontemporal_load(&us); // CHECK: load i16{{.*}}align 2, !nontemporal + ui = __builtin_nontemporal_load(&si); // CHECK: load i32{{.*}}align 4, !nontemporal + si = __builtin_nontemporal_load(&ui); // CHECK: load i32{{.*}}align 4, !nontemporal + ull = __builtin_nontemporal_load(&sll); // CHECK: load i64{{.*}}align 8, !nontemporal + sll = __builtin_nontemporal_load(&ull); // CHECK: load i64{{.*}}align 8, !nontemporal + f1 = __builtin_nontemporal_load(&f2); // CHECK: load float{{.*}}align 4, !nontemporal + d1 = __builtin_nontemporal_load(&d2); // CHECK: load double{{.*}}align 8, !nontemporal + vf2 = __builtin_nontemporal_load(&vf1); // CHECK: load <4 x float>{{.*}}align 16, !nontemporal + vc2 = __builtin_nontemporal_load(&vc1); // CHECK: load <8 x i8>{{.*}}align 8, !nontemporal +}