case X86ISD::VSHLI: return "X86ISD::VSHLI";
case X86ISD::VSRLI: return "X86ISD::VSRLI";
case X86ISD::VSRAI: return "X86ISD::VSRAI";
+ case X86ISD::VSRAV: return "X86ISD::VSRAV";
case X86ISD::VROTLI: return "X86ISD::VROTLI";
case X86ISD::VROTRI: return "X86ISD::VROTRI";
case X86ISD::VPPERM: return "X86ISD::VPPERM";
// Vector shift elements
VSHL, VSRL, VSRA,
+ // Vector variable shift right arithmetic.
+ // Unlike ISD::SRA, in case shift count greater then element size
+ // use sign bit to fill destination data element.
+ VSRAV,
+
// Vector shift elements by immediate
VSHLI, VSRLI, VSRAI,
defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
avx512_var_shift_w<0x12, "vpsllvw", shl>,
avx512_var_shift_w_lowering<avx512vl_i16_info, shl>;
+
defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
avx512_var_shift_w<0x11, "vpsravw", sra>,
avx512_var_shift_w_lowering<avx512vl_i16_info, sra>;
+let isCodeGenOnly = 1 in
+ defm VPSRAV_Int : avx512_var_shift_types<0x46, "vpsrav", X86vsrav>,
+ avx512_var_shift_w<0x11, "vpsravw", X86vsrav>;
+
defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
avx512_var_shift_w<0x10, "vpsrlvw", srl>,
avx512_var_shift_w_lowering<avx512vl_i16_info, srl>;
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVec<2>]>>;
+def X86vsrav : SDNode<"X86ISD::VSRAV" , SDTIntShiftOp>;
+
def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
{ X86::VPSRAWYrr, X86::VPSRAWYrm, 0 },
{ X86::VPSRAVDrr, X86::VPSRAVDrm, 0 },
{ X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 },
+ { X86::VPSRAVD_Intrr, X86::VPSRAVD_Intrm, 0 },
+ { X86::VPSRAVD_IntYrr, X86::VPSRAVD_IntYrm, 0 },
{ X86::VPSRLDYrr, X86::VPSRLDYrm, 0 },
{ X86::VPSRLQYrr, X86::VPSRLQYrm, 0 },
{ X86::VPSRLWYrr, X86::VPSRLWYrm, 0 },
defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
+ let isCodeGenOnly = 1 in
+ defm VPSRAVD_Int : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>;
}
//===----------------------------------------------------------------------===//
// VGATHER - GATHER Operations
X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx2_psrai_w, VSHIFT, X86ISD::VSRAI, 0),
- X86_INTRINSIC_DATA(avx2_psrav_d, INTR_TYPE_2OP, ISD::SRA, 0),
- X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_TYPE_2OP, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx2_psrav_d, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
+ X86_INTRINSIC_DATA(avx2_psrav_d_256, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
X86_INTRINSIC_DATA(avx2_psrl_d, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx2_psrl_q, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx2_psrl_w, INTR_TYPE_2OP, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_wi_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrav_d, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrav_q_128, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrav_q_256, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrav16_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrav32_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrav4_si, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrav8_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
- X86_INTRINSIC_DATA(avx512_mask_psrav8_si, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav_d, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav_q_128, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav_q_256, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav16_hi, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav32_hi, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav4_si, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav8_hi, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav8_si, INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
%res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
+define <4 x i32> @test_x86_avx2_psrav_d_const(<4 x i32> %a0, <4 x i32> %a1) {
+; AVX2-LABEL: test_x86_avx2_psrav_d_const:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]
+; AVX2-NEXT: vpsravd LCPI90_1, %xmm0, %xmm0
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_psrav_d_const:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vmovdqa32 {{.*#+}} xmm0 = [2,9,4294967284,23]
+; AVX512VL-NEXT: vpsravd LCPI90_1, %xmm0, %xmm0
+; AVX512VL-NEXT: retl
+ %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
; AVX2-LABEL: test_x86_avx2_psrav_d_256:
%res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
+
+define <8 x i32> @test_x86_avx2_psrav_d_256_const(<8 x i32> %a0, <8 x i32> %a1) {
+; AVX2-LABEL: test_x86_avx2_psrav_d_256_const:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
+; AVX2-NEXT: vpsravd LCPI92_1, %ymm0, %ymm0
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
+; AVX512VL-NEXT: vpsravd LCPI92_1, %ymm0, %ymm0
+; AVX512VL-NEXT: retl
+ %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
+ ret <8 x i32> %res
+}
declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, <4 x i32> %idx, <2 x double> %mask) {
ret <32 x i16> %res4
}
+define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
+; AVX512BW-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
+; AVX512F-32-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>,
+ <32 x i16> <i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49>,
+ <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
ret <8 x i32> %res4
}
+define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si_const:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
+; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A]
+; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI510_0-4, kind: reloc_riprel_4byte
+; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A]
+; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI510_1-4, kind: reloc_riprel_4byte
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
ret <2 x i64> %res4
}
+define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607]
+; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A]
+; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI512_0-4, kind: reloc_riprel_4byte
+; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
+; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI512_1-4, kind: reloc_riprel_4byte
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
+ ret <2 x i64> %res
+}
+
declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {