From: Matt Arsenault Date: Fri, 8 Feb 2019 15:06:24 +0000 (+0000) Subject: AMDGPU/GlobalISel: Fix shift legalization for non-power-of-2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7772407272537fb86a35a0c5f17247ac3d7615cc;p=llvm AMDGPU/GlobalISel: Fix shift legalization for non-power-of-2 clampScalar doesn't do anything for non-power-of-2 in range. There should probably be a combination rule to reduce the number of matching rules. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353526 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 0002aece557..e4406f255fa 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -452,12 +452,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, Shifts.clampScalar(1, S16, S32); Shifts.clampScalar(0, S16, S64); + Shifts.widenScalarToNextPow2(0, 16); } else { // Make sure we legalize the shift amount type first, as the general // expansion for the shifted type will produce much worse code if it hasn't // been truncated already. Shifts.clampScalar(1, S32, S32); Shifts.clampScalar(0, S32, S64); + Shifts.widenScalarToNextPow2(0, 32); } Shifts.scalarize(0); diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir index 5bc8db84473..f2d381904e9 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -292,6 +292,7 @@ body: | ... --- + name: test_shl_v2s32_v2s32 body: | bb.0: @@ -674,6 +675,90 @@ body: | $vgpr0_vgpr1 = COPY %2 ... +--- +name: test_shl_s7_s7 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_shl_s7_s7 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[AND]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: $vgpr0 = COPY [[COPY4]](s32) + ; VI-LABEL: name: test_shl_s7_s7 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_shl_s7_s7 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s7) = G_TRUNC %0 + %3:_(s7) = G_TRUNC %1 + %4:_(s7) = G_SHL %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: test_shl_i24_i32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_shl_i24_i32 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: $vgpr0 = COPY [[COPY3]](s32) + ; VI-LABEL: name: test_shl_i24_i32 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; VI: $vgpr0 = COPY [[COPY3]](s32) + ; GFX9-LABEL: name: test_shl_i24_i32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY1]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; GFX9: $vgpr0 = COPY [[COPY3]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s24) = G_TRUNC %0 + %3:_(s24) = G_SHL %2, %1 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + --- name: test_shl_s128_s128 body: |