From f9d63f2d76c38ccfdb8f9abd0aa66ab9e8bd995d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 17 May 2019 12:20:01 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Legalize G_INTRINSIC_TRUNC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361027 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 71 +++++- lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 3 +- .../GlobalISel/legalize-intrinsic-trunc.mir | 230 ++++++++++++++++-- 3 files changed, 278 insertions(+), 26 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 64ae29ec998..b0c01255c1f 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -289,17 +289,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .legalFor({{S32, S32}, {S32, S64}}) .scalarize(0); - getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND}) + getActionDefinitionsBuilder(G_INTRINSIC_ROUND) .legalFor({S32, S64}) .scalarize(0); if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { - getActionDefinitionsBuilder(G_FRINT) + getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FRINT}) .legalFor({S32, S64}) .clampScalar(0, S32, S64) .scalarize(0); } else { - getActionDefinitionsBuilder(G_FRINT) + getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FRINT}) .legalFor({S32}) .customFor({S64}) .clampScalar(0, S32, S64) @@ -689,6 +689,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, return legalizeAddrSpaceCast(MI, MRI, MIRBuilder); case TargetOpcode::G_FRINT: return legalizeFrint(MI, MRI, MIRBuilder); + case TargetOpcode::G_INTRINSIC_TRUNC: + return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder); default: return false; } @@ -872,3 +874,66 @@ bool AMDGPULegalizerInfo::legalizeFrint( MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2); return true; } + +static MachineInstrBuilder extractF64Exponent(unsigned Hi, + MachineIRBuilder &B) { + const unsigned FractBits = 52; + const unsigned ExpBits = 11; + LLT S32 = LLT::scalar(32); + + auto Const0 = B.buildConstant(S32, FractBits - 32); + auto Const1 = B.buildConstant(S32, ExpBits); + + auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false) + .addUse(Const0.getReg(0)) + .addUse(Const1.getReg(0)); + + return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023)); +} + +bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + + unsigned Src = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(Src); + assert(Ty.isScalar() && Ty.getSizeInBits() == 64); + + LLT S1 = LLT::scalar(1); + LLT S32 = LLT::scalar(32); + LLT S64 = LLT::scalar(64); + + // TODO: Should this use extract since the low half is unused? + auto Unmerge = B.buildUnmerge({S32, S32}, Src); + unsigned Hi = Unmerge.getReg(1); + + // Extract the upper half, since this is where we will find the sign and + // exponent. + auto Exp = extractF64Exponent(Hi, B); + + const unsigned FractBits = 52; + + // Extract the sign bit. + const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31); + auto SignBit = B.buildAnd(S32, Hi, SignBitMask); + + const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1); + + const auto Zero32 = B.buildConstant(S32, 0); + + // Extend back to 64-bits. + auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)}); + + auto Shr = B.buildAShr(S64, FractMask, Exp); + auto Not = B.buildNot(S64, Shr); + auto Tmp0 = B.buildAnd(S64, Src, Not); + auto FiftyOne = B.buildConstant(S32, FractBits - 1); + + auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32); + auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne); + + auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0); + B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1); + return true; +} diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 13ac2408a2e..3591f4f35de 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -40,7 +40,8 @@ public: MachineIRBuilder &MIRBuilder) const; bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; - + bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const; }; } // End llvm namespace. #endif diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir index 12936ec01dd..d1d9528e1fc 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir @@ -1,5 +1,53 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=legalizer -o - %s | FileCheck -check-prefix=CI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s + +--- +name: test_intrinsic_trunc_s16 +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_intrinsic_trunc_s16 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; CI-LABEL: name: test_intrinsic_trunc_s16 + ; CI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; CI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; CI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; CI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-LABEL: name: test_intrinsic_trunc_s16 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_intrinsic_trunc_s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_INTRINSIC_TRUNC %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... --- name: test_intrinsic_trunc_s32 @@ -7,9 +55,18 @@ body: | bb.0: liveins: $vgpr0 - ; CHECK-LABEL: name: test_intrinsic_trunc_s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: $vgpr0 = COPY [[COPY]](s32) + ; SI-LABEL: name: test_intrinsic_trunc_s32 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: $vgpr0 = COPY [[COPY]](s32) + ; CI-LABEL: name: test_intrinsic_trunc_s32 + ; CI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CI: $vgpr0 = COPY [[COPY]](s32) + ; VI-LABEL: name: test_intrinsic_trunc_s32 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: $vgpr0 = COPY [[COPY]](s32) + ; GFX9-LABEL: name: test_intrinsic_trunc_s32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: $vgpr0 = COPY [[COPY]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_INTRINSIC_TRUNC %0 $vgpr0 = COPY %0 @@ -21,28 +78,104 @@ body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_intrinsic_trunc_s64 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; CHECK: $vgpr0_vgpr1 = COPY [[INTRINSIC_TRUNC]](s64) + ; SI-LABEL: name: test_intrinsic_trunc_s64 + ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; SI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[C]](s32), [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) + ; SI: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) + ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] + ; SI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] + ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] + ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] + ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] + ; SI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] + ; SI: $vgpr0_vgpr1 = COPY [[INTRINSIC_TRUNC]](s64) + ; CI-LABEL: name: test_intrinsic_trunc_s64 + ; CI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] + ; CI: $vgpr0_vgpr1 = COPY [[INTRINSIC_TRUNC]](s64) + ; VI-LABEL: name: test_intrinsic_trunc_s64 + ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; VI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] + ; VI: $vgpr0_vgpr1 = COPY [[INTRINSIC_TRUNC]](s64) + ; GFX9-LABEL: name: test_intrinsic_trunc_s64 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] + ; GFX9: $vgpr0_vgpr1 = COPY [[INTRINSIC_TRUNC]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_INTRINSIC_TRUNC %0 $vgpr0_vgpr1 = COPY %1 ... +--- +name: test_intrinsic_trunc_v2s16 +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_intrinsic_trunc_v2s16 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; SI: $vgpr0 = COPY [[COPY]](<2 x s16>) + ; CI-LABEL: name: test_intrinsic_trunc_v2s16 + ; CI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CI: $vgpr0 = COPY [[COPY]](<2 x s16>) + ; VI-LABEL: name: test_intrinsic_trunc_v2s16 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; VI: $vgpr0 = COPY [[COPY]](<2 x s16>) + ; GFX9-LABEL: name: test_intrinsic_trunc_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: $vgpr0 = COPY [[COPY]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_INTRINSIC_TRUNC %0 + $vgpr0 = COPY %0 +... + --- name: test_intrinsic_trunc_v2s32 body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: test_intrinsic_trunc_v2s32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; CHECK: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s32), [[INTRINSIC_TRUNC1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-LABEL: name: test_intrinsic_trunc_v2s32 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] + ; SI: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s32), [[INTRINSIC_TRUNC1]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CI-LABEL: name: test_intrinsic_trunc_v2s32 + ; CI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] + ; CI: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s32), [[INTRINSIC_TRUNC1]](s32) + ; CI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-LABEL: name: test_intrinsic_trunc_v2s32 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] + ; VI: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s32), [[INTRINSIC_TRUNC1]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-LABEL: name: test_intrinsic_trunc_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] + ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s32), [[INTRINSIC_TRUNC1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_INTRINSIC_TRUNC %0 $vgpr0_vgpr1 = COPY %1 @@ -54,13 +187,66 @@ body: | bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-LABEL: name: test_intrinsic_trunc_v2s64 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; CHECK: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s64), [[INTRINSIC_TRUNC1]](s64) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-LABEL: name: test_intrinsic_trunc_v2s64 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; SI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[C]](s32), [[C1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] + ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 + ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) + ; SI: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) + ; SI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] + ; SI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[XOR]] + ; SI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] + ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] + ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] + ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] + ; SI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] + ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; SI: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[C]](s32), [[C1]](s32) + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C2]] + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] + ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND2]](s32) + ; SI: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32) + ; SI: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[ASHR1]], [[C6]] + ; SI: [[AND3:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]] + ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]] + ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]] + ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV1]], [[AND3]] + ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT1]] + ; SI: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s64), [[INTRINSIC_TRUNC1]](s64) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CI-LABEL: name: test_intrinsic_trunc_v2s64 + ; CI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] + ; CI: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] + ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s64), [[INTRINSIC_TRUNC1]](s64) + ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-LABEL: name: test_intrinsic_trunc_v2s64 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; VI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] + ; VI: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s64), [[INTRINSIC_TRUNC1]](s64) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-LABEL: name: test_intrinsic_trunc_v2s64 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] + ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INTRINSIC_TRUNC]](s64), [[INTRINSIC_TRUNC1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = G_INTRINSIC_TRUNC %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 -- 2.40.0