From 4bafedb149fd42cb6f2c3bb5614f9d3cefc1bdad Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 7 Feb 2019 18:14:39 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Legalize fsqrt git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353438 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 12 + lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 + .../AMDGPU/GlobalISel/legalize-fsqrt.mir | 288 ++++++++++++++++++ .../AMDGPU/GlobalISel/regbankselect-fsqrt.mir | 35 +++ 4 files changed, 336 insertions(+) create mode 100644 test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir create mode 100644 test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 3a157235bdd..419e7112316 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -161,6 +161,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .scalarize(0) .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64); + if (ST.has16BitInsts()) { + getActionDefinitionsBuilder(G_FSQRT) + .legalFor({S32, S64, S16}) + .scalarize(0) + .clampScalar(0, S16, S64); + } else { + getActionDefinitionsBuilder(G_FSQRT) + .legalFor({S32, S64}) + .scalarize(0) + .clampScalar(0, S32, S64); + } + getActionDefinitionsBuilder(G_FPTRUNC) .legalFor({{S32, S64}, {S16, S32}}) .scalarize(0); diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 0b33feb91e3..bb7a492fd8a 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -598,6 +598,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_FPTOUI: case AMDGPU::G_FMUL: case AMDGPU::G_FMA: + case AMDGPU::G_FSQRT: case AMDGPU::G_SITOFP: case AMDGPU::G_UITOFP: case AMDGPU::G_FPTRUNC: diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir new file mode 100644 index 00000000000..579b59361bc --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -0,0 +1,288 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: test_fsqrt_s32 +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_fsqrt_s32 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI-LABEL: name: test_fsqrt_s32 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-LABEL: name: test_fsqrt_s32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_FSQRT %0 +... +--- +name: test_fsqrt_s64 +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_fsqrt_s64 + ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; VI-LABEL: name: test_fsqrt_s64 + ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-LABEL: name: test_fsqrt_s64 + ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_FSQRT %0 +... +--- +name: test_fsqrt_s16 +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_fsqrt_s16 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-LABEL: name: test_fsqrt_s16 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_fsqrt_s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_FSQRT %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: test_fsqrt_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_fsqrt_v2s32 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) + ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-LABEL: name: test_fsqrt_v2s32 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; VI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) + ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-LABEL: name: test_fsqrt_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; GFX9: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_FSQRT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_fsqrt_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; SI-LABEL: name: test_fsqrt_v3s32 + ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-LABEL: name: test_fsqrt_v3s32 + ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; VI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; VI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) + ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-LABEL: name: test_fsqrt_v3s32 + ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; GFX9: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; GFX9: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s32>) = G_FSQRT %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + +--- +name: test_fsqrt_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; SI-LABEL: name: test_fsqrt_v2s64 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] + ; SI: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-LABEL: name: test_fsqrt_v2s64 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; VI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] + ; VI: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-LABEL: name: test_fsqrt_v2s64 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] + ; GFX9: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s64>) = G_FSQRT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_fsqrt_v2s16 +body: | + bb.0: + liveins: $vgpr0 + + ; SI-LABEL: name: test_fsqrt_v2s16 + ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] + ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16) + ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; VI-LABEL: name: test_fsqrt_v2s16 + ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]] + ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) + ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-LABEL: name: test_fsqrt_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]] + ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_FSQRT %0 + $vgpr0 = COPY %1 +... + +--- +name: test_fsqrt_v3s16 +body: | + bb.0: + + ; SI-LABEL: name: test_fsqrt_v3s16 + ; SI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] + ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] + ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16) + ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) + ; VI-LABEL: name: test_fsqrt_v3s16 + ; VI: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]] + ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]] + ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16) + ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) + ; GFX9-LABEL: name: test_fsqrt_v3s16 + ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]] + ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]] + ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16) + ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>) + %0:_(<3 x s16>) = G_IMPLICIT_DEF + %1:_(<3 x s16>) = G_FSQRT %0 + S_NOP 0, implicit %1 +... + +--- +name: test_fsqrt_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; SI-LABEL: name: test_fsqrt_v4s16 + ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] + ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) + ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] + ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) + ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; SI: [[FSQRT3:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT3]] + ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT3]](s32) + ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) + ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; VI-LABEL: name: test_fsqrt_v4s16 + ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]] + ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]] + ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]] + ; VI: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[UV3]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16), [[FSQRT3]](s16) + ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; GFX9-LABEL: name: test_fsqrt_v4s16 + ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[UV]] + ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[UV1]] + ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[UV2]] + ; GFX9: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[UV3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FSQRT]](s16), [[FSQRT1]](s16), [[FSQRT2]](s16), [[FSQRT3]](s16) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = G_FSQRT %0 + $vgpr0_vgpr1 = COPY %1 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir new file mode 100644 index 00000000000..a2e3a432059 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsqrt.mir @@ -0,0 +1,35 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s + +--- +name: fsqrt_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: fsqrt_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[FSQRT:%[0-9]+]]:vgpr(s32) = G_FSQRT [[COPY]] + ; CHECK: $vgpr0 = COPY [[FSQRT]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = G_FSQRT %0 + $vgpr0 = COPY %1 +... + +--- +name: fsqrt_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: fsqrt_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[FSQRT:%[0-9]+]]:vgpr(s32) = G_FSQRT [[COPY]] + ; CHECK: $vgpr0 = COPY [[FSQRT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_FSQRT %0 + $vgpr0 = COPY %1 +... -- 2.50.1