From: Simon Pilgrim Date: Fri, 13 Jan 2017 13:16:19 +0000 (+0000) Subject: [X86][AVX512] Add support for variable ASHR v2i64/v4i64 support without VLX X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=51af2c4d9f70967d94550207820321f5d75a1b29;p=llvm [X86][AVX512] Add support for variable ASHR v2i64/v4i64 support without VLX Use v8i64 variable ASHR instructions if we don't have VLX. This is a reduced version of D28537 that just adds support for variable shifts - I'll continue with that patch (for just constant/uniform shifts) once I've fixed the type legalization issue in avx512-cvt.ll. Differential Revision: https://reviews.llvm.org/D28604 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291901 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8a054462e5f..886782e49c5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -21346,7 +21346,7 @@ static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget, if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI()) return false; - if (VT.is512BitVector() || Subtarget.hasVLX()) + if (Subtarget.hasAVX512()) return true; bool LShift = VT.is128BitVector() || VT.is256BitVector(); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 230d1700b8d..a473073ead2 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4932,6 +4932,7 @@ multiclass avx512_var_shift_mb opc, string OpcodeStr, SDNode OpNode, SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; } + multiclass avx512_var_shift_sizes opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in @@ -4955,12 +4956,13 @@ multiclass avx512_var_shift_types opc, string OpcodeStr, } // Use 512bit version to implement 128/256 bit in case NoVLX. -multiclass avx512_var_shift_w_lowering { - let Predicates = [HasBWI, NoVLX] in { +multiclass avx512_var_shift_lowering p> { + let Predicates = p in { def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), (_.info256.VT _.info256.RC:$src2))), (EXTRACT_SUBREG - (!cast(NAME#"WZrr") + (!cast(OpcodeStr#"Zrr") (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), sub_ymm)>; @@ -4968,13 +4970,12 @@ multiclass avx512_var_shift_w_lowering { def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), (_.info128.VT _.info128.RC:$src2))), (EXTRACT_SUBREG - (!cast(NAME#"WZrr") + (!cast(OpcodeStr#"Zrr") (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), sub_xmm)>; } } - multiclass avx512_var_shift_w opc, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in @@ -4990,19 +4991,22 @@ multiclass avx512_var_shift_w opc, string OpcodeStr, } defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>, - avx512_var_shift_w<0x12, "vpsllvw", shl>, - avx512_var_shift_w_lowering; + avx512_var_shift_w<0x12, "vpsllvw", shl>; defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>, - avx512_var_shift_w<0x11, "vpsravw", sra>, - avx512_var_shift_w_lowering; + avx512_var_shift_w<0x11, "vpsravw", sra>; defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>, - avx512_var_shift_w<0x10, "vpsrlvw", srl>, - avx512_var_shift_w_lowering; + avx512_var_shift_w<0x10, "vpsrlvw", srl>; + defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>; defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>; +defm : avx512_var_shift_lowering; +defm : avx512_var_shift_lowering; +defm : avx512_var_shift_lowering; +defm : avx512_var_shift_lowering; + // Special handing for handling VPSRAV intrinsics. multiclass avx512_var_shift_int_lowering p> { diff --git a/test/CodeGen/X86/vector-shift-ashr-128.ll b/test/CodeGen/X86/vector-shift-ashr-128.ll index 9f0d4a7d726..77a25b311e4 100644 --- a/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -83,11 +83,10 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; ; AVX512-LABEL: var_shift_v2i64: ; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX512-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3 -; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsubq %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: # kill: %XMM1 %XMM1 %ZMM1 +; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: var_shift_v2i64: @@ -649,11 +648,10 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; ; AVX512-LABEL: splatvar_shift_v2i64: ; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX512-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 -; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1 +; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_shift_v2i64: @@ -1085,10 +1083,10 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind { ; ; AVX512-LABEL: constant_shift_v2i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [1,7] +; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: constant_shift_v2i64: diff --git a/test/CodeGen/X86/vector-shift-ashr-256.ll b/test/CodeGen/X86/vector-shift-ashr-256.ll index aee2857157b..27ed4592943 100644 --- a/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -71,11 +71,10 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { ; ; AVX512-LABEL: var_shift_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 -; AVX512-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3 -; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpsubq %ymm3, %ymm0, %ymm0 +; AVX512-NEXT: # kill: %YMM1 %YMM1 %ZMM1 +; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: var_shift_v4i64: @@ -491,11 +490,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { ; ; AVX512-LABEL: splatvar_shift_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 -; AVX512-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 -; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 -; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0 +; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1 +; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: splatvar_shift_v4i64: @@ -836,10 +834,10 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind { ; ; AVX512-LABEL: constant_shift_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2] -; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [1,7,31,62] +; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: constant_shift_v4i64: