From: Matt Arsenault Date: Mon, 7 Oct 2019 19:10:44 +0000 (+0000) Subject: AMDGPU/GlobalISel: Fix selection of 16-bit shifts X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f9eb3dcabce8263121b4fcbf57ea10addf3e71e1;p=llvm AMDGPU/GlobalISel: Fix selection of 16-bit shifts git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373945 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index e5f215fbcd7..15604aa1353 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -752,19 +752,22 @@ multiclass Bits_OpsRev_i16_Pats ; def : GCNPat< (i32 (zext (op i16:$src0, i16:$src1))), - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)) + !if(!eq(PreservesHI16,1), (ClearHI16 (inst VSrc_b32:$src1, VSrc_b32:$src0)), + (inst VSrc_b32:$src1, VSrc_b32:$src0)) >; def : GCNPat< (i64 (zext (op i16:$src0, i16:$src1))), (REG_SEQUENCE VReg_64, - !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)), + !if(!eq(PreservesHI16,1), (ClearHI16 (inst VSrc_b32:$src1, VSrc_b32:$src0)), + (inst VSrc_b32:$src1, VSrc_b32:$src0)), sub0, (V_MOV_B32_e32 (i32 0)), sub1) >; diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir index 1a90e609f7b..e1ce7872e93 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -10,51 +10,258 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark -# ERR-GFX8: remark: :0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_ss) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_sv) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_vs) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_vv) +# ERR: remark: :0:0: cannot select: %4:sgpr(s16) = G_ASHR %2:sgpr, %3:sgpr(s16) (in function: ashr_s16_s16_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_s32_vv) +# ERR-NEXT: remark: :0:0: cannot select: %5:vgpr(s64) = G_ZEXT %4:vgpr(s16) (in function: ashr_s16_vv_zext_to_s64) +# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_s32_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_s32_sv) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_s32_vs) +# ERR-NOT: remark -# ERR-GFX910: remark: :0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_ss) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_sv) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_vs) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_vv) +--- +name: ashr_s16_s16_ss +legalized: true +regBankSelected: true -# ERR-NOT: remark +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX8-LABEL: name: ashr_s16_s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-LABEL: name: ashr_s16_s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-LABEL: name: ashr_s16_s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:sgpr(s16) = G_ASHR %2, %3 + S_ENDPGM 0, implicit %4 +... --- -name: ashr_s16_ss +name: ashr_s16_s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: ashr_s16_s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX9-LABEL: name: ashr_s16_s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX10-LABEL: name: ashr_s16_s16_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_ASHRREV_I16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ASHR %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: ashr_s16_s32_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: ashr_s16_s32_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-LABEL: name: ashr_s16_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-LABEL: name: ashr_s16_s32_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_ASHR %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: ashr_s16_s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: ashr_s16_s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX9-LABEL: name: ashr_s16_s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX10-LABEL: name: ashr_s16_s16_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_ASHRREV_I16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ASHR %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: ashr_s16_s16_vv_zext_to_s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: ashr_s16_s16_vv_zext_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX9-LABEL: name: ashr_s16_s16_vv_zext_to_s32 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX10-LABEL: name: ashr_s16_s16_vv_zext_to_s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_ASHRREV_I16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_AND_B32_e64_]], 0, 16, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ASHR %2, %3 + %5:vgpr(s32) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: ashr_s16_vv_zext_to_s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: ashr_s16_vv_zext_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16) + ; GFX8: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX9-LABEL: name: ashr_s16_vv_zext_to_s64 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16) + ; GFX9: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX10-LABEL: name: ashr_s16_vv_zext_to_s64 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16) + ; GFX10: S_ENDPGM 0, implicit [[ZEXT]](s64) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ASHR %2, %3 + %5:vgpr(s64) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: ashr_s16_s32_ss legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $sgpr1 - ; GFX6-LABEL: name: ashr_s16_ss - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX7-LABEL: name: ashr_s16_ss - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX8-LABEL: name: ashr_s16_ss + + ; GFX8-LABEL: name: ashr_s16_s32_ss ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX9-LABEL: name: ashr_s16_ss + ; GFX9-LABEL: name: ashr_s16_s32_ss ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX10-LABEL: name: ashr_s16_ss + ; GFX10-LABEL: name: ashr_s16_s32_ss ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -68,38 +275,26 @@ body: | ... --- -name: ashr_s16_sv +name: ashr_s16_s32_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: ashr_s16_sv - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX7-LABEL: name: ashr_s16_sv - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX8-LABEL: name: ashr_s16_sv + ; GFX8-LABEL: name: ashr_s16_s32_sv ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX9-LABEL: name: ashr_s16_sv + ; GFX9-LABEL: name: ashr_s16_s32_sv ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX10-LABEL: name: ashr_s16_sv + ; GFX10-LABEL: name: ashr_s16_s32_sv ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -113,90 +308,67 @@ body: | ... --- -name: ashr_s16_vs +name: ashr_s16_s16_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: ashr_s16_vs - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX7-LABEL: name: ashr_s16_vs - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX8-LABEL: name: ashr_s16_vs - ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX9-LABEL: name: ashr_s16_vs - ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX10-LABEL: name: ashr_s16_vs - ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_ASHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX8-LABEL: name: ashr_s16_s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX9-LABEL: name: ashr_s16_s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I16_e64_]] + ; GFX10-LABEL: name: ashr_s16_s16_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_ASHRREV_I16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_ASHR %2, %3 + S_ENDPGM 0, implicit %4 ... --- -name: ashr_s16_vv +name: ashr_s16_s32_vs legalized: true regBankSelected: true body: | bb.0: - liveins: $vgpr0, $vgpr1 - ; GFX6-LABEL: name: ashr_s16_vv - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX7-LABEL: name: ashr_s16_vv - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX8-LABEL: name: ashr_s16_vv + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: ashr_s16_s32_vs ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX9-LABEL: name: ashr_s16_vv + ; GFX9-LABEL: name: ashr_s16_s32_vs ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) - ; GFX10-LABEL: name: ashr_s16_vv + ; GFX10-LABEL: name: ashr_s16_s32_vs ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_ASHR %2, %1 S_ENDPGM 0, implicit %3 diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir index 2a2f600c5b7..65e9af77196 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -10,51 +10,258 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark -# ERR-GFX8: remark: :0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_ss) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_sv) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_vs) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_vv) +# ERR: remark: :0:0: cannot select: %4:sgpr(s16) = G_LSHR %2:sgpr, %3:sgpr(s16) (in function: lshr_s16_s16_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_s32_vv) +# ERR-NEXT: remark: :0:0: cannot select: %5:vgpr(s64) = G_ZEXT %4:vgpr(s16) (in function: lshr_s16_vv_zext_to_s64) +# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_s32_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_s32_sv) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_s32_vs) +# ERR-NOT: remark -# ERR-GFX910: remark: :0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_ss) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_sv) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_vs) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_vv) +--- +name: lshr_s16_s16_ss +legalized: true +regBankSelected: true -# ERR-NOT: remark +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX8-LABEL: name: lshr_s16_s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-LABEL: name: lshr_s16_s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-LABEL: name: lshr_s16_s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:sgpr(s16) = G_LSHR %2, %3 + S_ENDPGM 0, implicit %4 +... --- -name: lshr_s16_ss +name: lshr_s16_s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: lshr_s16_s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX9-LABEL: name: lshr_s16_s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX10-LABEL: name: lshr_s16_s16_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHRREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_LSHR %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: lshr_s16_s32_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: lshr_s16_s32_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-LABEL: name: lshr_s16_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-LABEL: name: lshr_s16_s32_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_LSHR %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: lshr_s16_s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: lshr_s16_s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX9-LABEL: name: lshr_s16_s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX10-LABEL: name: lshr_s16_s16_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHRREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_LSHR %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: lshr_s16_s16_vv_zext_to_s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: lshr_s16_s16_vv_zext_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX9-LABEL: name: lshr_s16_s16_vv_zext_to_s32 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX10-LABEL: name: lshr_s16_s16_vv_zext_to_s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHRREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_AND_B32_e64_]], 0, 16, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_LSHR %2, %3 + %5:vgpr(s32) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: lshr_s16_vv_zext_to_s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: lshr_s16_vv_zext_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16) + ; GFX8: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX9-LABEL: name: lshr_s16_vv_zext_to_s64 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16) + ; GFX9: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX10-LABEL: name: lshr_s16_vv_zext_to_s64 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16) + ; GFX10: S_ENDPGM 0, implicit [[ZEXT]](s64) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_LSHR %2, %3 + %5:vgpr(s64) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: lshr_s16_s32_ss legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $sgpr1 - ; GFX6-LABEL: name: lshr_s16_ss - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX7-LABEL: name: lshr_s16_ss - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX8-LABEL: name: lshr_s16_ss + + ; GFX8-LABEL: name: lshr_s16_s32_ss ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX9-LABEL: name: lshr_s16_ss + ; GFX9-LABEL: name: lshr_s16_s32_ss ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX10-LABEL: name: lshr_s16_ss + ; GFX10-LABEL: name: lshr_s16_s32_ss ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -68,38 +275,26 @@ body: | ... --- -name: lshr_s16_sv +name: lshr_s16_s32_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: lshr_s16_sv - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX7-LABEL: name: lshr_s16_sv - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX8-LABEL: name: lshr_s16_sv + ; GFX8-LABEL: name: lshr_s16_s32_sv ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX9-LABEL: name: lshr_s16_sv + ; GFX9-LABEL: name: lshr_s16_s32_sv ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX10-LABEL: name: lshr_s16_sv + ; GFX10-LABEL: name: lshr_s16_s32_sv ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -113,90 +308,67 @@ body: | ... --- -name: lshr_s16_vs +name: lshr_s16_s16_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: lshr_s16_vs - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX7-LABEL: name: lshr_s16_vs - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX8-LABEL: name: lshr_s16_vs - ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX9-LABEL: name: lshr_s16_vs - ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX10-LABEL: name: lshr_s16_vs - ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_LSHR %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX8-LABEL: name: lshr_s16_s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX9-LABEL: name: lshr_s16_s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B16_e64_]] + ; GFX10-LABEL: name: lshr_s16_s16_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHRREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_LSHR %2, %3 + S_ENDPGM 0, implicit %4 ... --- -name: lshr_s16_vv +name: lshr_s16_s32_vs legalized: true regBankSelected: true body: | bb.0: - liveins: $vgpr0, $vgpr1 - ; GFX6-LABEL: name: lshr_s16_vv - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX7-LABEL: name: lshr_s16_vv - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX8-LABEL: name: lshr_s16_vv + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: lshr_s16_s32_vs ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX9-LABEL: name: lshr_s16_vv + ; GFX9-LABEL: name: lshr_s16_s32_vs ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) - ; GFX10-LABEL: name: lshr_s16_vv + ; GFX10-LABEL: name: lshr_s16_s32_vs ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_LSHR %2, %1 S_ENDPGM 0, implicit %3 diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir index d41cdee3904..07b4f9a3dcd 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -10,51 +10,258 @@ # RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t # ERR-NOT: remark -# ERR-GFX8: remark: :0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_ss) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_sv) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_vs) -# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_vv) +# ERR: remark: :0:0: cannot select: %4:sgpr(s16) = G_SHL %2:sgpr, %3:sgpr(s16) (in function: shl_s16_s16_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_s32_vv) +# ERR-NEXT: remark: :0:0: cannot select: %5:vgpr(s64) = G_ZEXT %4:vgpr(s16) (in function: shl_s16_vv_zext_to_s64) +# ERR-NEXT: remark: :0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_s32_ss) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_s32_sv) +# ERR-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_s32_vs) +# ERR-NOT: remark -# ERR-GFX910: remark: :0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_ss) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_sv) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_vs) -# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_vv) +--- +name: shl_s16_s16_ss +legalized: true +regBankSelected: true -# ERR-NOT: remark +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX8-LABEL: name: shl_s16_s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-LABEL: name: shl_s16_s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-LABEL: name: shl_s16_s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:sgpr(s16) = G_SHL %2, %3 + S_ENDPGM 0, implicit %4 +... --- -name: shl_s16_ss +name: shl_s16_s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: shl_s16_s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX9-LABEL: name: shl_s16_s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX10-LABEL: name: shl_s16_s16_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHLREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_SHL %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: shl_s16_s32_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: shl_s16_s32_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-LABEL: name: shl_s16_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-LABEL: name: shl_s16_s32_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_SHL %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: shl_s16_s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: shl_s16_s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX9-LABEL: name: shl_s16_s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX10-LABEL: name: shl_s16_s16_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHLREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_SHL %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: shl_s16_s16_vv_zext_to_s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: shl_s16_s16_vv_zext_to_s32 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX9-LABEL: name: shl_s16_s16_vv_zext_to_s32 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_BFE_U32_]] + ; GFX10-LABEL: name: shl_s16_s16_vv_zext_to_s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHLREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: [[V_BFE_U32_:%[0-9]+]]:vgpr_32 = V_BFE_U32 [[V_AND_B32_e64_]], 0, 16, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_BFE_U32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_SHL %2, %3 + %5:vgpr(s32) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: shl_s16_vv_zext_to_s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX8-LABEL: name: shl_s16_vv_zext_to_s64 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX8: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16) + ; GFX8: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX9-LABEL: name: shl_s16_vv_zext_to_s64 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16) + ; GFX9: S_ENDPGM 0, implicit [[ZEXT]](s64) + ; GFX10-LABEL: name: shl_s16_vv_zext_to_s64 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX10: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16) + ; GFX10: S_ENDPGM 0, implicit [[ZEXT]](s64) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_SHL %2, %3 + %5:vgpr(s64) = G_ZEXT %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: shl_s16_s32_ss legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $sgpr1 - ; GFX6-LABEL: name: shl_s16_ss - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX7-LABEL: name: shl_s16_ss - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX8-LABEL: name: shl_s16_ss + + ; GFX8-LABEL: name: shl_s16_s32_ss ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX9-LABEL: name: shl_s16_ss + ; GFX9-LABEL: name: shl_s16_s32_ss ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX10-LABEL: name: shl_s16_ss + ; GFX10-LABEL: name: shl_s16_s32_ss ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -68,38 +275,26 @@ body: | ... --- -name: shl_s16_sv +name: shl_s16_s32_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: shl_s16_sv - ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX7-LABEL: name: shl_s16_sv - ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX8-LABEL: name: shl_s16_sv + ; GFX8-LABEL: name: shl_s16_s32_sv ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX9-LABEL: name: shl_s16_sv + ; GFX9-LABEL: name: shl_s16_s32_sv ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX10-LABEL: name: shl_s16_sv + ; GFX10-LABEL: name: shl_s16_s32_sv ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) @@ -113,90 +308,67 @@ body: | ... --- -name: shl_s16_vs +name: shl_s16_s16_sv legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GFX6-LABEL: name: shl_s16_vs - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX7-LABEL: name: shl_s16_vs - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX8-LABEL: name: shl_s16_vs - ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX9-LABEL: name: shl_s16_vs - ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX10-LABEL: name: shl_s16_vs - ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) - %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr(s32) = COPY $sgpr0 - %2:vgpr(s16) = G_TRUNC %0 - %3:vgpr(s16) = G_SHL %2, %1 - S_ENDPGM 0, implicit %3 + ; GFX8-LABEL: name: shl_s16_s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX9-LABEL: name: shl_s16_s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B16_e64_]] + ; GFX10-LABEL: name: shl_s16_s16_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX10: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[V_LSHLREV_B16_e64_]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_TRUNC %1 + %4:vgpr(s16) = G_SHL %2, %3 + S_ENDPGM 0, implicit %4 ... --- -name: shl_s16_vv +name: shl_s16_s32_vs legalized: true regBankSelected: true body: | bb.0: - liveins: $vgpr0, $vgpr1 - ; GFX6-LABEL: name: shl_s16_vv - ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX7-LABEL: name: shl_s16_vv - ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) - ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) - ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX8-LABEL: name: shl_s16_vv + liveins: $sgpr0, $vgpr0 + ; GFX8-LABEL: name: shl_s16_s32_vs ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX9-LABEL: name: shl_s16_vv + ; GFX9-LABEL: name: shl_s16_s32_vs ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) - ; GFX10-LABEL: name: shl_s16_vv + ; GFX10-LABEL: name: shl_s16_s32_vs ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 + %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s16) = G_TRUNC %0 %3:vgpr(s16) = G_SHL %2, %1 S_ENDPGM 0, implicit %3