; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshldq $127, %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm512_mask_shldi_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldq $127, %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 127)
+ %0 = tail call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 47)
%1 = bitcast i8 %__U to <8 x i1>
%2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
ret <8 x i64> %2
; X86-LABEL: test_mm512_maskz_shldi_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vpshldd $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_shldi_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldd $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <8 x i64> %__B to <16 x i32>
- %2 = tail call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %0, <16 x i32> %1, i32 63)
+ %2 = tail call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %0, <16 x i32> %1, i32 15)
%3 = bitcast i16 %__U to <16 x i1>
%4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
%5 = bitcast <16 x i32> %4 to <8 x i64>
; X86-LABEL: test_mm512_mask_shldi_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vpshldw $127, %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm512_mask_shldi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldw $127, %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <32 x i16>
%1 = bitcast <8 x i64> %__B to <32 x i16>
- %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 127)
+ %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 3)
%3 = bitcast <8 x i64> %__S to <32 x i16>
%4 = bitcast i32 %__U to <32 x i1>
%5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
; X86-LABEL: test_mm512_maskz_shldi_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vpshldw $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_shldi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldw $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <32 x i16>
%1 = bitcast <8 x i64> %__B to <32 x i16>
- %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 63)
+ %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 7)
%3 = bitcast i32 %__U to <32 x i1>
%4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
%5 = bitcast <32 x i16> %4 to <8 x i64>
define <8 x i64> @test_mm512_shldi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shldi_epi16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vpshldw $31, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpshldw $15, %zmm1, %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
%0 = bitcast <8 x i64> %__A to <32 x i16>
%1 = bitcast <8 x i64> %__B to <32 x i16>
- %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 31)
+ %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 15)
%3 = bitcast <32 x i16> %2 to <8 x i64>
ret <8 x i64> %3
}
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshrdq $127, %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm512_mask_shrdi_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdq $127, %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 127)
+ %0 = tail call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 47)
%1 = bitcast i8 %__U to <8 x i1>
%2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
ret <8 x i64> %2
; X86-LABEL: test_mm512_mask_shrdi_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vpshrdd $127, %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm512_mask_shrdi_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdd $127, %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <8 x i64> %__B to <16 x i32>
- %2 = tail call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %0, <16 x i32> %1, i32 127)
+ %2 = tail call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %0, <16 x i32> %1, i32 7)
%3 = bitcast <8 x i64> %__S to <16 x i32>
%4 = bitcast i16 %__U to <16 x i1>
%5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
; X86-LABEL: test_mm512_maskz_shrdi_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vpshrdd $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_shrdi_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdd $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <8 x i64> %__B to <16 x i32>
- %2 = tail call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %0, <16 x i32> %1, i32 63)
+ %2 = tail call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %0, <16 x i32> %1, i32 15)
%3 = bitcast i16 %__U to <16 x i1>
%4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
%5 = bitcast <16 x i32> %4 to <8 x i64>
; X86-LABEL: test_mm512_mask_shrdi_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vpshrdw $127, %zmm2, %zmm1, %zmm0 {%k1}
+; X86-NEXT: vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm512_mask_shrdi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdw $127, %zmm2, %zmm1, %zmm0 {%k1}
+; X64-NEXT: vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <32 x i16>
%1 = bitcast <8 x i64> %__B to <32 x i16>
- %2 = tail call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %0, <32 x i16> %1, i32 127)
+ %2 = tail call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %0, <32 x i16> %1, i32 3)
%3 = bitcast <8 x i64> %__S to <32 x i16>
%4 = bitcast i32 %__U to <32 x i1>
%5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
; X86-LABEL: test_mm512_maskz_shrdi_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vpshrdw $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X86-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_shrdi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdw $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; X64-NEXT: vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <32 x i16>
%1 = bitcast <8 x i64> %__B to <32 x i16>
- %2 = tail call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %0, <32 x i16> %1, i32 63)
+ %2 = tail call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %0, <32 x i16> %1, i32 15)
%3 = bitcast i32 %__U to <32 x i1>
%4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
%5 = bitcast <32 x i16> %4 to <8 x i64>
define <32 x i16>@test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
; X86: # %bb.0:
-; X86-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16]
+; X86-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x06]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16]
+; X86-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
; X64: # %bb.0:
-; X64-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16]
+; X64-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16]
+; X64-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 %x4)
- %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 -1)
+ %res = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6, <32 x i16> %x3, i32 %x4)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6, <32 x i16> %x3, i32 -1)
%res2 = add <32 x i16> %res, %res1
ret <32 x i16> %res2
}
define <32 x i16>@test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
; X86: # %bb.0:
-; X86-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16]
+; X86-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x06]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16]
+; X86-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
; X64: # %bb.0:
-; X64-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16]
+; X64-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16]
+; X64-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 %x4)
- %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 -1)
+ %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6, <32 x i16> %x3, i32 %x4)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6, <32 x i16> %x3, i32 -1)
%res2 = add <32 x i16> %res, %res1
ret <32 x i16> %res2
}
define <32 x i16>@test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
; X86: # %bb.0:
-; X86-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16]
+; X86-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x06]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16]
+; X86-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
; X64: # %bb.0:
-; X64-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16]
+; X64-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16]
+; X64-NEXT: vpshldw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x06]
; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %1 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22)
+ %1 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6)
%2 = bitcast i32 %x4 to <32 x i1>
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
- %4 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22)
+ %4 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6)
%res2 = add <32 x i16> %3, %4
ret <32 x i16> %res2
}
define <32 x i16>@test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
; X86: # %bb.0:
-; X86-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16]
+; X86-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x06]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16]
+; X86-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
; X64: # %bb.0:
-; X64-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16]
+; X64-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16]
+; X64-NEXT: vpshrdw $6, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x06]
; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %1 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22)
+ %1 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6)
%2 = bitcast i32 %x4 to <32 x i1>
%3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
- %4 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22)
+ %4 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 6)
%res2 = add <32 x i16> %3, %4
ret <32 x i16> %res2
}
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshldq $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X86-NEXT: vpshldq $47, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_shldi_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldq $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X64-NEXT: vpshldq $47, %ymm2, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 127)
+ %0 = tail call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 47)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshldq $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X86-NEXT: vpshldq $47, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_shldi_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldq $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: vpshldq $47, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 127)
+ %0 = tail call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 47)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S
ret <2 x i64> %2
}
-declare <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64>, <2 x i64>, i32) #3
+declare <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64>, <2 x i64>, i32)
define <2 x i64> @test_mm_maskz_shldi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shldi_epi64:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshldd $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X86-NEXT: vpshldd $7, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_shldi_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldd $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X64-NEXT: vpshldd $7, %ymm2, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
- %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 127)
+ %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 7)
%3 = bitcast <4 x i64> %__S to <8 x i32>
%4 = bitcast i8 %__U to <8 x i1>
%5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshldd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
+; X86-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_shldi_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
+; X64-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
- %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 63)
+ %2 = tail call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %0, <8 x i32> %1, i32 15)
%3 = bitcast i8 %__U to <8 x i1>
%4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
%5 = bitcast <8 x i32> %4 to <4 x i64>
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshldd $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X86-NEXT: vpshldd $7, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_shldi_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldd $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: vpshldd $7, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
- %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 127)
+ %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 7)
%3 = bitcast <2 x i64> %__S to <4 x i32>
%4 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshldd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; X86-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskz_shldi_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; X64-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
- %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 63)
+ %2 = tail call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %0, <4 x i32> %1, i32 15)
%3 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer
; X86-LABEL: test_mm256_mask_shldi_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vpshldw $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X86-NEXT: vpshldw $3, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_shldi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldw $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X64-NEXT: vpshldw $3, %ymm2, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <16 x i16>
%1 = bitcast <4 x i64> %__B to <16 x i16>
- %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 127)
+ %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 3)
%3 = bitcast <4 x i64> %__S to <16 x i16>
%4 = bitcast i16 %__U to <16 x i1>
%5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3
; X86-LABEL: test_mm256_maskz_shldi_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vpshldw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
+; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_shldi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
+; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <16 x i16>
%1 = bitcast <4 x i64> %__B to <16 x i16>
- %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 63)
+ %2 = tail call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %0, <16 x i16> %1, i32 7)
%3 = bitcast i16 %__U to <16 x i1>
%4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
%5 = bitcast <16 x i16> %4 to <4 x i64>
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshldw $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X86-NEXT: vpshldw $3, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_shldi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldw $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: vpshldw $3, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <8 x i16>
%1 = bitcast <2 x i64> %__B to <8 x i16>
- %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 127)
+ %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 3)
%3 = bitcast <2 x i64> %__S to <8 x i16>
%4 = bitcast i8 %__U to <8 x i1>
%5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshldw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskz_shldi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshldw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <8 x i16>
%1 = bitcast <2 x i64> %__B to <8 x i16>
- %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 63)
+ %2 = tail call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %0, <8 x i16> %1, i32 7)
%3 = bitcast i8 %__U to <8 x i1>
%4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer
%5 = bitcast <8 x i16> %4 to <2 x i64>
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshrdq $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X86-NEXT: vpshrdq $47, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_shrdi_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdq $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X64-NEXT: vpshrdq $47, %ymm2, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 127)
+ %0 = tail call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %__A, <4 x i64> %__B, i32 47)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__S
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshrdq $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X86-NEXT: vpshrdq $47, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_shrdi_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdq $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: vpshrdq $47, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
- %0 = tail call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 127)
+ %0 = tail call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %__A, <2 x i64> %__B, i32 47)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__S
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshrdd $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X86-NEXT: vpshrdd $7, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_shrdi_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdd $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X64-NEXT: vpshrdd $7, %ymm2, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
- %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 127)
+ %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 7)
%3 = bitcast <4 x i64> %__S to <8 x i32>
%4 = bitcast i8 %__U to <8 x i1>
%5 = select <8 x i1> %4, <8 x i32> %2, <8 x i32> %3
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshrdd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
+; X86-NEXT: vpshrdd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_shrdi_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdd $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
+; X64-NEXT: vpshrdd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
- %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 63)
+ %2 = tail call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %0, <8 x i32> %1, i32 15)
%3 = bitcast i8 %__U to <8 x i1>
%4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> zeroinitializer
%5 = bitcast <8 x i32> %4 to <4 x i64>
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshrdd $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X86-NEXT: vpshrdd $7, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_shrdi_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdd $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: vpshrdd $7, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
- %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 127)
+ %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 7)
%3 = bitcast <2 x i64> %__S to <4 x i32>
%4 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshrdd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; X86-NEXT: vpshrdd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskz_shrdi_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdd $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; X64-NEXT: vpshrdd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
- %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 63)
+ %2 = tail call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %0, <4 x i32> %1, i32 15)
%3 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = select <4 x i1> %extract, <4 x i32> %2, <4 x i32> zeroinitializer
; X86-LABEL: test_mm256_mask_shrdi_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vpshrdw $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X86-NEXT: vpshrdw $3, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_mask_shrdi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdw $127, %ymm2, %ymm1, %ymm0 {%k1}
+; X64-NEXT: vpshrdw $3, %ymm2, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <16 x i16>
%1 = bitcast <4 x i64> %__B to <16 x i16>
- %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 127)
+ %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 3)
%3 = bitcast <4 x i64> %__S to <16 x i16>
%4 = bitcast i16 %__U to <16 x i1>
%5 = select <16 x i1> %4, <16 x i16> %2, <16 x i16> %3
; X86-LABEL: test_mm256_maskz_shrdi_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: vpshrdw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
+; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_shrdi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdw $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
+; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <16 x i16>
%1 = bitcast <4 x i64> %__B to <16 x i16>
- %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 63)
+ %2 = tail call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %0, <16 x i16> %1, i32 7)
%3 = bitcast i16 %__U to <16 x i1>
%4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
%5 = bitcast <16 x i16> %4 to <4 x i64>
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshrdw $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X86-NEXT: vpshrdw $3, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_shrdi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdw $127, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: vpshrdw $3, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <8 x i16>
%1 = bitcast <2 x i64> %__B to <8 x i16>
- %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 127)
+ %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 3)
%3 = bitcast <2 x i64> %__S to <8 x i16>
%4 = bitcast i8 %__U to <8 x i1>
%5 = select <8 x i1> %4, <8 x i16> %2, <8 x i16> %3
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: kmovd %eax, %k1
-; X86-NEXT: vpshrdw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskz_shrdi_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpshrdw $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <8 x i16>
%1 = bitcast <2 x i64> %__B to <8 x i16>
- %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 63)
+ %2 = tail call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %0, <8 x i16> %1, i32 7)
%3 = bitcast i8 %__U to <8 x i1>
%4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer
%5 = bitcast <8 x i16> %4 to <2 x i64>
define <8 x i16>@test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
; X86: # %bb.0:
-; X86-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x16]
+; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x06]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
-; X86-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x16]
+; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
; X64: # %bb.0:
-; X64-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x16]
+; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x16]
+; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 %x4)
- %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 -1)
+ %res = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 -1)
%res2 = add <8 x i16> %res, %res1
ret <8 x i16> %res2
}
define <16 x i16>@test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
; X86: # %bb.0:
-; X86-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x16]
+; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x06]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x16]
+; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
; X64: # %bb.0:
-; X64-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x16]
+; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x16]
+; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 %x4)
- %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 -1)
+ %res = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 -1)
%res2 = add <16 x i16> %res, %res1
ret <16 x i16> %res2
}
define <8 x i16>@test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
; X86: # %bb.0:
-; X86-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x16]
+; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x06]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
-; X86-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x16]
+; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
; X64: # %bb.0:
-; X64-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x16]
+; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x16]
+; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 %x4)
- %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 -1)
+ %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 -1)
%res2 = add <8 x i16> %res, %res1
ret <8 x i16> %res2
}
define <16 x i16>@test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
; X86: # %bb.0:
-; X86-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x16]
+; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x06]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x16]
+; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
; X64: # %bb.0:
-; X64-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x16]
+; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x16]
+; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 %x4)
- %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 -1)
+ %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 -1)
%res2 = add <16 x i16> %res, %res1
ret <16 x i16> %res2
}
define <8 x i16>@test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
; X86: # %bb.0:
-; X86-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x16]
+; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x06]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
-; X86-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x16]
+; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
; X64: # %bb.0:
-; X64-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x16]
+; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x16]
+; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06]
; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %1 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22)
+ %1 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
%2 = bitcast i8 %x4 to <8 x i1>
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
- %4 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22)
+ %4 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
%res2 = add <8 x i16> %3, %4
ret <8 x i16> %res2
}
define <16 x i16>@test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
; X86: # %bb.0:
-; X86-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x16]
+; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x06]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x16]
+; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
; X64: # %bb.0:
-; X64-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x16]
+; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x16]
+; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06]
; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %1 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22)
+ %1 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
%2 = bitcast i16 %x4 to <16 x i1>
%3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
- %4 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22)
+ %4 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
%res2 = add <16 x i16> %3, %4
ret <16 x i16> %res2
}
define <8 x i16>@test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
; X86: # %bb.0:
-; X86-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x16]
+; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x06]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
-; X86-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x16]
+; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
; X64: # %bb.0:
-; X64-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x16]
+; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x16]
+; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06]
; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %1 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22)
+ %1 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
%2 = bitcast i8 %x4 to <8 x i1>
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
- %4 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22)
+ %4 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6)
%res2 = add <8 x i16> %3, %4
ret <8 x i16> %res2
}
define <16 x i16>@test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
; X86: # %bb.0:
-; X86-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x16]
+; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x06]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
-; X86-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x16]
+; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
; X64: # %bb.0:
-; X64-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x16]
+; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x06]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
-; X64-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x16]
+; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06]
; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
; X64-NEXT: retq # encoding: [0xc3]
- %1 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22)
+ %1 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
%2 = bitcast i16 %x4 to <16 x i1>
%3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
- %4 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22)
+ %4 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6)
%res2 = add <16 x i16> %3, %4
ret <16 x i16> %res2
}