llvm::ConstantAsMetadata::get(Loc)));
}
+ if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+ // Conservatively, mark all inline asm blocks in CUDA as convergent
+ // (meaning, they may call an intrinsically convergent op, such as bar.sync,
+ // and so can't have certain optimizations applied around them).
+ Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+ llvm::Attribute::Convergent);
+ }
+
// Extract all of the register value results from the asm.
std::vector<llvm::Value*> RegResults;
if (ResultRegTypes.size() == 1) {
__host__ __device__ void bar() {
// DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
baz();
+ // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
+ int x;
+ asm ("trap;" : "=l"(x));
+ // DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]]
+ asm volatile ("trap;");
}
// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
// DEVICE-SAME: convergent
// DEVICE-SAME: }
// DEVICE: attributes [[CALL_ATTR]] = { convergent }
+// DEVICE: attributes [[ASM_ATTR]] = { convergent
// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
// HOST: attributes [[BAZ_ATTR]] = {