From 86c801f95b1316c3d939e2ad80abccdda44464f8 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 28 Jul 2016 23:29:33 +0000 Subject: [PATCH] Merging r276435: ------------------------------------------------------------------------ r276435 | arsenm | 2016-07-22 10:01:21 -0700 (Fri, 22 Jul 2016) | 4 lines AMDGPU: Fix i1 fp_to_int R600's i1 fp_to_uint selected but was incorrect according to what instcombine constant folds to. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_39@277082 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUInstructions.td | 3 ++- lib/Target/AMDGPU/R600ISelLowering.cpp | 25 ++++++++++++++++----- lib/Target/AMDGPU/R600ISelLowering.h | 3 ++- lib/Target/AMDGPU/SIInstructions.td | 10 +++++++++ test/CodeGen/AMDGPU/fp_to_sint.f64.ll | 23 ++++++++++++++++++- test/CodeGen/AMDGPU/fp_to_sint.ll | 28 ++++++++++++++++++++--- test/CodeGen/AMDGPU/fp_to_uint.f64.ll | 23 ++++++++++++++++++- test/CodeGen/AMDGPU/fp_to_uint.ll | 30 +++++++++++++++++++++++-- 8 files changed, 131 insertions(+), 14 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index 6761b4b5df9..3944fdbd31e 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -420,9 +420,10 @@ int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding -int FP32_NEG_ONE = 0xbf800000; int FP32_ONE = 0x3f800000; +int FP32_NEG_ONE = 0xbf800000; int FP64_ONE = 0x3ff0000000000000; +int FP64_NEG_ONE = 0xbff0000000000000; } def CONST : Constants; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 8f78edd76a5..8ccd176930a 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -122,6 +122,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); @@ -832,13 +833,18 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::FP_TO_UINT: if (N->getValueType(0) == MVT::i1) { - Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG)); return; } // Fall-through. Since we don't care about out of bounds values // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint // considers some extra cases which are not necessary here. case ISD::FP_TO_SINT: { + if (N->getValueType(0) == MVT::i1) { + Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG)); + return; + } + SDValue Result; if (expandFP_TO_SINT(N, Result, DAG)) Results.push_back(Result); @@ -1052,15 +1058,24 @@ SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF); } -SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { +SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + return DAG.getNode( + ISD::SETCC, + DL, + MVT::i1, + Op, DAG.getConstantFP(1.0f, DL, MVT::f32), + DAG.getCondCode(ISD::SETEQ)); +} + +SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode( ISD::SETCC, DL, MVT::i1, - Op, DAG.getConstantFP(0.0f, DL, MVT::f32), - DAG.getCondCode(ISD::SETNE) - ); + Op, DAG.getConstantFP(-1.0f, DL, MVT::f32), + DAG.getCondCode(ISD::SETEQ)); } SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index 2fb6ee25caa..9700ce14c6f 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -72,7 +72,8 @@ private: SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 6427db87cd6..79038c9a9e6 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -3391,6 +3391,16 @@ def : Pat < (V_CNDMASK_B32_e64 0, -1, $src), sub1) >; +class FPToI1Pat : Pat < + (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))), + (i1 (Inst 0, KOne, $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)) +>; + +def : FPToI1Pat; +def : FPToI1Pat; +def : FPToI1Pat; +def : FPToI1Pat; + // If we need to perform a logical operation on i1 values, we need to // use vector comparisons since there is only one SCC register. Vector // comparisions still write to a pair of SGPRs, so treat these as diff --git a/test/CodeGen/AMDGPU/fp_to_sint.f64.ll b/test/CodeGen/AMDGPU/fp_to_sint.f64.ll index be23e10d708..1537d67cadc 100644 --- a/test/CodeGen/AMDGPU/fp_to_sint.f64.ll +++ b/test/CodeGen/AMDGPU/fp_to_sint.f64.ll @@ -1,7 +1,8 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() #1 +declare double @llvm.fabs.f64(double) #1 ; FUNC-LABEL: @fp_to_sint_f64_i32 ; SI: v_cvt_i32_f64_e32 @@ -54,3 +55,23 @@ define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in store i64 %cast, i64 addrspace(1)* %out, align 8 ret void } + +; FUNC-LABEL: {{^}}fp_to_sint_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{\[[0-9]+:[0-9]+\]}} +define void @fp_to_sint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %conv = fptosi double %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_sint_fabs_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{\[[0-9]+:[0-9]+\]}}| +define void @fp_to_sint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %in.fabs = call double @llvm.fabs.f64(double %in) + %conv = fptosi double %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/fp_to_sint.ll b/test/CodeGen/AMDGPU/fp_to_sint.ll index b39aeadc8cc..0cd0358bafd 100644 --- a/test/CodeGen/AMDGPU/fp_to_sint.ll +++ b/test/CodeGen/AMDGPU/fp_to_sint.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC -declare float @llvm.fabs.f32(float) #0 +declare float @llvm.fabs.f32(float) #1 ; FUNC-LABEL: {{^}}fp_to_sint_i32: ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} @@ -17,7 +17,7 @@ define void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { ; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs: ; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { - %in.fabs = call float @llvm.fabs.f32(float %in) #0 + %in.fabs = call float @llvm.fabs.f32(float %in) %conv = fptosi float %in.fabs to i32 store i32 %conv, i32 addrspace(1)* %out ret void @@ -227,4 +227,26 @@ define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { ret void } -attributes #0 = { nounwind readnone } +; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{[0-9]+}} + +; EG: AND_INT +; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, literal.y, +; EG-NEXT: -1082130432(-1.000000e+00) +define void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %conv = fptosi float %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{[0-9]+}}| +define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %in.fabs = call float @llvm.fabs.f32(float %in) + %conv = fptosi float %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/fp_to_uint.f64.ll b/test/CodeGen/AMDGPU/fp_to_uint.f64.ll index 760019ebdc0..d5bc416434d 100644 --- a/test/CodeGen/AMDGPU/fp_to_uint.f64.ll +++ b/test/CodeGen/AMDGPU/fp_to_uint.f64.ll @@ -1,7 +1,8 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() #1 +declare double @llvm.fabs.f64(double) #1 ; SI-LABEL: {{^}}fp_to_uint_i32_f64: ; SI: v_cvt_u32_f64_e32 @@ -68,3 +69,23 @@ define void @fp_to_uint_v4i64_v4f64(<4 x i64> addrspace(1)* %out, <4 x double> % store <4 x i64> %cast, <4 x i64> addrspace(1)* %out, align 32 ret void } + +; FUNC-LABEL: {{^}}fp_to_uint_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{\[[0-9]+:[0-9]+\]}} +define void @fp_to_uint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %conv = fptoui double %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f64_to_i1: +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{\[[0-9]+:[0-9]+\]}}| +define void @fp_to_uint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 { + %in.fabs = call double @llvm.fabs.f64(double %in) + %conv = fptoui double %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/fp_to_uint.ll b/test/CodeGen/AMDGPU/fp_to_uint.ll index b7b6ccc238b..8a0f9fa2ac2 100644 --- a/test/CodeGen/AMDGPU/fp_to_uint.ll +++ b/test/CodeGen/AMDGPU/fp_to_uint.ll @@ -1,6 +1,8 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC + +declare float @llvm.fabs.f32(float) #1 ; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32: ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} @@ -215,3 +217,27 @@ define void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> store <4 x i64> %conv, <4 x i64> addrspace(1)* %out ret void } + + +; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}} + +; EG: AND_INT +; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, 1.0, +define void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %conv = fptoui float %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1: +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{[0-9]+}}| +define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { + %in.fabs = call float @llvm.fabs.f32(float %in) + %conv = fptoui float %in.fabs to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } -- 2.40.0