[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
>;
+def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
+ Intrinsic<
+ [llvm_float_ty], [llvm_float_ty], [IntrNoMem]
+>;
+
def int_amdgcn_rsq_clamp : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
DAG.getValueType(VT));
}
-static SDValue emitNonHSAIntrinsicError(SelectionDAG& DAG, EVT VT) {
+static SDValue emitNonHSAIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(),
- "non-hsa intrinsic with hsa target");
+ "non-hsa intrinsic with hsa target",
+ DL.getDebugLoc());
+ DAG.getContext()->diagnose(BadIntrin);
+ return DAG.getUNDEF(VT);
+}
+
+static SDValue emitRemovedIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
+ DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(),
+ "intrinsic not supported on subtarget",
+ DL.getDebugLoc());
DAG.getContext()->diagnose(BadIntrin);
return DAG.getUNDEF(VT);
}
case Intrinsic::amdgcn_rsq:
case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
+ case Intrinsic::amdgcn_rsq_legacy: {
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return emitRemovedIntrinsicError(DAG, DL, VT);
+
+ return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+ }
case Intrinsic::amdgcn_rsq_clamp:
case AMDGPUIntrinsic::AMDGPU_rsq_clamped: { // Legacy name
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
}
case Intrinsic::r600_read_ngroups_x:
if (Subtarget->isAmdHsaOS())
- return emitNonHSAIntrinsicError(DAG, VT);
+ return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_X, false);
case Intrinsic::r600_read_ngroups_y:
if (Subtarget->isAmdHsaOS())
- return emitNonHSAIntrinsicError(DAG, VT);
+ return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_Y, false);
case Intrinsic::r600_read_ngroups_z:
if (Subtarget->isAmdHsaOS())
- return emitNonHSAIntrinsicError(DAG, VT);
+ return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_Z, false);
case Intrinsic::r600_read_global_size_x:
if (Subtarget->isAmdHsaOS())
- return emitNonHSAIntrinsicError(DAG, VT);
+ return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
case Intrinsic::r600_read_global_size_y:
if (Subtarget->isAmdHsaOS())
- return emitNonHSAIntrinsicError(DAG, VT);
+ return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
case Intrinsic::r600_read_global_size_z:
if (Subtarget->isAmdHsaOS())
- return emitNonHSAIntrinsicError(DAG, VT);
+ return emitNonHSAIntrinsicError(DAG, DL, VT);
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
case Intrinsic::r600_read_local_size_x:
if (Subtarget->isAmdHsaOS())
- return emitNonHSAIntrinsicError(DAG, VT);
+ return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_X);
case Intrinsic::r600_read_local_size_y:
if (Subtarget->isAmdHsaOS())
- return emitNonHSAIntrinsicError(DAG, VT);
+ return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Y);
case Intrinsic::r600_read_local_size_z:
if (Subtarget->isAmdHsaOS())
- return emitNonHSAIntrinsicError(DAG, VT);
+ return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerImplicitZextParam(DAG, Op, MVT::i16,
SI::KernelInputOffsets::LOCAL_SIZE_Z);
return DAG.getConstant(0, SDLoc(N), MVT::i1);
}
+ if (N->getOperand(0).isUndef())
+ return DAG.getUNDEF(MVT::i1);
+
return SDValue();
}
return performClassCombine(N, DCI);
case ISD::FCANONICALIZE:
return performFCanonicalizeCombine(N, DCI);
+ case AMDGPUISD::FRACT:
+ case AMDGPUISD::RCP:
+ case AMDGPUISD::RSQ:
+ case AMDGPUISD::RSQ_LEGACY:
+ case AMDGPUISD::RSQ_CLAMP:
+ case AMDGPUISD::LDEXP: {
+ SDValue Src = N->getOperand(0);
+ if (Src.isUndef())
+ return Src;
+ break;
+ }
}
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i1 @llvm.amdgcn.class.f32(float, i32) #1
declare i1 @llvm.amdgcn.class.f64(double, i32) #1
ret void
}
+; FIXME: Why is the extension still here?
+; SI-LABEL: {{^}}test_class_undef_f32:
+; SI-NOT: v_cmp_class
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1,
+; SI: buffer_store_dword
+define void @test_class_undef_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
+ %result = call i1 @llvm.amdgcn.class.f32(float undef, i32 %b) #1
+ %sext = sext i1 %result to i32
+ store i32 %sext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
ret void
}
+; GCN-LABEL: {{^}}v_fract_undef_f32:
+; GCN-NOT: v_fract_f32
+; GCN-NOT: v0
+; GCN: buffer_store_dword v0
+define void @v_fract_undef_f32(float addrspace(1)* %out) #1 {
+ %fract = call float @llvm.amdgcn.fract.f32(float undef)
+ store float %fract, float addrspace(1)* %out
+ ret void
+}
+
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare float @llvm.amdgcn.ldexp.f32(float, i32) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
}
+
+; SI-LABEL: {{^}}test_ldexp_undef_f32:
+; SI-NOT: v_ldexp_f32
+define void @test_ldexp_undef_f32(float addrspace(1)* %out, i32 %b) nounwind {
+ %result = call float @llvm.amdgcn.ldexp.f32(float undef, i32 %b) nounwind readnone
+ store float %result, float addrspace(1)* %out, align 4
+ ret void
+}
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=amdgcn -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
ret void
}
+; FUNC-LABEL: {{^}}rcp_undef_f32:
+; SI-NOT: v_rcp_f32
+define void @rcp_undef_f32(float addrspace(1)* %out) #1 {
+ %rcp = call float @llvm.amdgcn.rcp.f32(float undef) #0
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
ret void
}
+; FUNC-LABEL: {{^}}rsq_clamp_undef_f32:
+; SI-NOT: v_rsq_clamp_f32
+define void @rsq_clamp_undef_f32(float addrspace(1)* %out) #0 {
+ %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
+ store float %rsq_clamp, float addrspace(1)* %out
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
--- /dev/null
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.amdgcn.rsq.legacy(float) #0
+
+; FUNC-LABEL: {{^}}rsq_legacy_f32:
+; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.legacy(float %src) #0
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; TODO: Really these should be constant folded
+; FUNC-LABEL: {{^}}rsq_legacy_f32_constant_4.0
+; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, 4.0
+define void @rsq_legacy_f32_constant_4.0(float addrspace(1)* %out) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.legacy(float 4.0) #0
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_legacy_f32_constant_100.0
+; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, 0x42c80000
+define void @rsq_legacy_f32_constant_100.0(float addrspace(1)* %out) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.legacy(float 100.0) #0
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_legacy_undef_f32:
+; SI-NOT: v_rsq_legacy_f32
+define void @rsq_legacy_undef_f32(float addrspace(1)* %out) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.legacy(float undef)
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare float @llvm.amdgcn.rsq.f32(float) #0
ret void
}
+; FUNC-LABEL: {{^}}rsq_undef_f32:
+; SI-NOT: v_rsq_f32
+define void @rsq_undef_f32(float addrspace(1)* %out) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.f32(float undef)
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
--- /dev/null
+; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck -check-prefix=ERROR %s
+
+; ERROR: error: :1:42: in function rsq_legacy_f32 void (float addrspace(1)*, float): intrinsic not supported on subtarget
+
+declare float @llvm.amdgcn.rsq.legacy(float) #0
+
+define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) #1 {
+ %rsq = call float @llvm.amdgcn.rsq.legacy(float %src), !dbg !4
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_OpenCL, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug)
+!1 = !DIFile(filename: "foo.cl", directory: "/dev/null")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !DILocation(line: 1, column: 42, scope: !5)
+!5 = distinct !DISubprogram(name: "rsq_legacy_f32", scope: null, line: 1, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0)