if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI())
return false;
- if (VT.is512BitVector() || Subtarget.hasVLX())
+ if (Subtarget.hasAVX512())
return true;
bool LShift = VT.is128BitVector() || VT.is256BitVector();
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
}
+
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
-multiclass avx512_var_shift_w_lowering<AVX512VLVectorVTInfo _, SDNode OpNode> {
- let Predicates = [HasBWI, NoVLX] in {
+multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
+ SDNode OpNode, list<Predicate> p> {
+ let Predicates = p in {
def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
(_.info256.VT _.info256.RC:$src2))),
(EXTRACT_SUBREG
- (!cast<Instruction>(NAME#"WZrr")
+ (!cast<Instruction>(OpcodeStr#"Zrr")
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
sub_ymm)>;
def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
(_.info128.VT _.info128.RC:$src2))),
(EXTRACT_SUBREG
- (!cast<Instruction>(NAME#"WZrr")
+ (!cast<Instruction>(OpcodeStr#"Zrr")
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
sub_xmm)>;
}
}
-
multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasBWI] in
}
defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
- avx512_var_shift_w<0x12, "vpsllvw", shl>,
- avx512_var_shift_w_lowering<avx512vl_i16_info, shl>;
+ avx512_var_shift_w<0x12, "vpsllvw", shl>;
defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
- avx512_var_shift_w<0x11, "vpsravw", sra>,
- avx512_var_shift_w_lowering<avx512vl_i16_info, sra>;
+ avx512_var_shift_w<0x11, "vpsravw", sra>;
defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
- avx512_var_shift_w<0x10, "vpsrlvw", srl>,
- avx512_var_shift_w_lowering<avx512vl_i16_info, srl>;
+ avx512_var_shift_w<0x10, "vpsrlvw", srl>;
+
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
+defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
+
// Special handing for handling VPSRAV intrinsics.
multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
list<Predicate> p> {
;
; AVX512-LABEL: var_shift_v2i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX512-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3
-; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsubq %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: var_shift_v2i64:
;
; AVX512-LABEL: splatvar_shift_v2i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX512-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
-; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v2i64:
;
; AVX512-LABEL: constant_shift_v2i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
-; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [1,7]
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: constant_shift_v2i64:
;
; AVX512-LABEL: var_shift_v4i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
-; AVX512-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3
-; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpsubq %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: var_shift_v4i64:
;
; AVX512-LABEL: splatvar_shift_v4i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
-; AVX512-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
-; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
-; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: splatvar_shift_v4i64:
;
; AVX512-LABEL: constant_shift_v4i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
-; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [1,7,31,62]
+; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; AVX512VL-LABEL: constant_shift_v4i64: