/// would typically be allowed using throughput or size cost models.
bool hasDivRemOp(Type *DataType, bool IsSigned) const;
+ /// Return true if the given instruction (assumed to be a memory access
+ /// instruction) has a volatile variant. If that's the case then we can avoid
+ /// addrspacecast to generic AS for volatile loads/stores. Default
+ /// implementation returns false, which prevents address space inference for
+ /// volatile loads/stores.
+ bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
+
/// Return true if target doesn't mind addresses in vectors.
bool prefersVectorizedAddressing() const;
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
virtual bool isLegalMaskedGather(Type *DataType) = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
+ virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
virtual bool prefersVectorizedAddressing() = 0;
virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
bool hasDivRemOp(Type *DataType, bool IsSigned) override {
return Impl.hasDivRemOp(DataType, IsSigned);
}
+ bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
+ return Impl.hasVolatileVariant(I, AddrSpace);
+ }
bool prefersVectorizedAddressing() override {
return Impl.prefersVectorizedAddressing();
}
bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
+ bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
+
bool prefersVectorizedAddressing() { return true; }
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
return TTIImpl->hasDivRemOp(DataType, IsSigned);
}
+bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
+ unsigned AddrSpace) const {
+ return TTIImpl->hasVolatileVariant(I, AddrSpace);
+}
+
bool TargetTransformInfo::prefersVectorizedAddressing() const {
return TTIImpl->prefersVectorizedAddressing();
}
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
+ // Volatile loads/stores are only supported for shared and global address
+ // spaces, or for generic AS that maps to them.
+ if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
+ AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
+ AddrSpace == llvm::ADDRESS_SPACE_SHARED))
+ return false;
+
+ switch(I->getOpcode()){
+ default:
+ return false;
+ case Instruction::Load:
+ case Instruction::Store:
+ return true;
+ }
+ }
};
} // end namespace llvm
// Changes the flat address expressions in function F to point to specific
// address spaces if InferredAddrSpace says so. Postorder is the postorder of
// all flat expressions in the use-def graph of function F.
- bool
- rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace,
- Function *F) const;
+ bool rewriteWithNewAddressSpaces(
+ const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
+ const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const;
void appendsFlatAddressExpressionToPostorderStack(
Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
// Changes the address spaces of the flat address expressions who are inferred
// to point to a specific address space.
- return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
+ return rewriteWithNewAddressSpaces(TTI, Postorder, InferredAddrSpace, &F);
}
// Constants need to be tracked through RAUW to handle cases with nested
/// \p returns true if \p U is the pointer operand of a memory instruction with
/// a single pointer operand that can have its address space changed by simply
-/// mutating the use to a new value.
-static bool isSimplePointerUseValidToReplace(Use &U) {
+/// mutating the use to a new value. If the memory instruction is volatile,
+/// return true only if the target allows the memory instruction to be volatile
+/// in the new address space.
+static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI,
+ Use &U, unsigned AddrSpace) {
User *Inst = U.getUser();
unsigned OpNo = U.getOperandNo();
+ bool VolatileIsAllowed = false;
+ if (auto *I = dyn_cast<Instruction>(Inst))
+ VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace);
if (auto *LI = dyn_cast<LoadInst>(Inst))
- return OpNo == LoadInst::getPointerOperandIndex() && !LI->isVolatile();
+ return OpNo == LoadInst::getPointerOperandIndex() &&
+ (VolatileIsAllowed || !LI->isVolatile());
if (auto *SI = dyn_cast<StoreInst>(Inst))
- return OpNo == StoreInst::getPointerOperandIndex() && !SI->isVolatile();
+ return OpNo == StoreInst::getPointerOperandIndex() &&
+ (VolatileIsAllowed || !SI->isVolatile());
if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst))
- return OpNo == AtomicRMWInst::getPointerOperandIndex() && !RMW->isVolatile();
+ return OpNo == AtomicRMWInst::getPointerOperandIndex() &&
+ (VolatileIsAllowed || !RMW->isVolatile());
if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() &&
- !CmpX->isVolatile();
+ (VolatileIsAllowed || !CmpX->isVolatile());
}
return false;
}
bool InferAddressSpaces::rewriteWithNewAddressSpaces(
- ArrayRef<WeakTrackingVH> Postorder,
+ const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
// pointer operands converted to the new address space. Since the pointer
// to the next instruction.
I = skipToNextUser(I, E);
- if (isSimplePointerUseValidToReplace(U)) {
+ if (isSimplePointerUseValidToReplace(
+ TTI, U, V->getType()->getPointerAddressSpace())) {
// If V is used as the pointer operand of a compatible memory operation,
// sets the pointer operand to NewV. This replacement does not change
// the element type, so the resultant load/store is still valid.
--- /dev/null
+# This test generates all variants of load/store instructions and verifies that
+# LLVM generates correct PTX for them.
+
+# RUN: python %s > %t.ll
+# RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P64 %t.ll
+# RUN: llc < %t.ll -march=nvptx -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P32 %t.ll
+
+from itertools import product
+from string import Template
+
+llvm_type_to_ptx_type = {
+ "i8": "u8",
+ "i16": "u16",
+ "i32": "u32",
+ "i64": "u64",
+ "half": "b16",
+ "<2 x half>": "b32",
+ "float": "f32",
+ "double": "f64"
+}
+
+llvm_type_to_ptx_reg = {
+ "i8": "r",
+ "i16": "r",
+ "i32": "r",
+ "i64": "rd",
+ "half": "h",
+ "<2 x half>": "hh",
+ "float": "f",
+ "double": "fd"
+}
+
+addrspace_id = {
+ "": 0,
+ ".global": 1,
+ ".shared": 3,
+ ".const": 4,
+ ".local": 5,
+ ".param": 101
+}
+
+
+def gen_load_tests():
+ load_template = """
+define ${type} @ld${_volatile}${_space}.${ptx_type}(${type} addrspace(${asid})* %ptr) {
+; CHECK_P32: ld${_volatile}${_volatile_as}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%r{{[0-9]+}}]
+; CHECK_P64: ld${_volatile}${_volatile_as}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%rd{{[0-9]+}}]
+; CHECK: ret
+ %p = ${generic_ptr}
+ %a = load ${volatile} ${type}, ${type}* %p
+ ret ${type} %a
+}
+"""
+ for op_type, volatile, space in product(
+ ["i8", "i16", "i32", "i64", "half", "float", "double", "<2 x half>"],
+ [True, False], # volatile
+ ["", ".shared", ".global", ".const", ".local", ".param"]):
+
+ # Volatile is only supported for global, shared and generic.
+ if volatile and not space in ["", ".global", ".shared"]:
+ continue
+
+ # Volatile is only supported for global, shared and generic.
+ # All other volatile accesses are done in generic AS.
+ if volatile and not space in ["", ".global", ".shared"]:
+ volatile_as = ""
+ else:
+ volatile_as = space
+
+ params = {
+ "type": op_type,
+ "volatile": "volatile" if volatile else "",
+ "_volatile": ".volatile" if volatile else "",
+ "_volatile_as": volatile_as,
+ "_space": space,
+ "ptx_reg": llvm_type_to_ptx_reg[op_type],
+ "ptx_type": llvm_type_to_ptx_type[op_type],
+ "asid": addrspace_id[space],
+ }
+
+ # LLVM does not accept "addrspacecast Type* addrspace(0) to Type*", so we
+ # need to avoid it for generic pointer tests.
+ if space:
+ generic_ptr_template = ("addrspacecast ${type} addrspace(${asid})* %ptr "
+ "to ${type}*")
+ else:
+ generic_ptr_template = "select i1 true, ${type}* %ptr, ${type}* %ptr"
+ params["generic_ptr"] = Template(generic_ptr_template).substitute(params)
+
+ print(Template(load_template).substitute(params))
+
+
+def main():
+ gen_load_tests()
+
+
+main()