case X86::VALIGNDZ128rri:
case X86::VALIGNDZ128rmi:
case X86::VALIGNQZ128rri:
- case X86::VALIGNQZ128rmi:
+ case X86::VALIGNQZ128rmi: {
assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
"Unexpected new opcode!");
unsigned Scale = (Opc == X86::VALIGNQZ128rri ||
Imm.setImm(Imm.getImm() * Scale);
break;
}
+ case X86::VSHUFF32X4Z256rmi:
+ case X86::VSHUFF32X4Z256rri:
+ case X86::VSHUFF64X2Z256rmi:
+ case X86::VSHUFF64X2Z256rri:
+ case X86::VSHUFI32X4Z256rmi:
+ case X86::VSHUFI32X4Z256rri:
+ case X86::VSHUFI64X2Z256rmi:
+ case X86::VSHUFI64X2Z256rri: {
+ assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
+ NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
+ "Unexpected new opcode!");
+ MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
+ int64_t ImmVal = Imm.getImm();
+ // Set bit 5, move bit 1 to bit 4, copy bit 0.
+ Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
+ break;
+ }
+ }
}
;
; SKX-LABEL: test_perm2f128:
; SKX: # BB#0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
;
; SKX-LABEL: test_perm2i128:
; SKX: # BB#0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7]
; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16]
; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16]
-; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT: vperm2f128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x30]
+; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3]
; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16]
; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
-; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16]
+; CHECK-NEXT: vperm2f128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x30]
; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xd1,0x16]
; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc1,0x16]
-; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT: vperm2i128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x46,0xc1,0x30]
+; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xd1,0x16]
; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
-; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc1,0x16]
+; CHECK-NEXT: vperm2i128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x46,0xc1,0x30]
; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
;
; AVX512VL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; AVX512VL-NEXT: retq
%ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
-; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40>
ret <32 x i8> %shuffle
}
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
-; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
-; AVX512VL-NEXT: retq
+; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
+; AVX2OR512VL: # BB#0:
+; AVX2OR512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
+; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
ret <32 x i8> %shuffle
}
}
define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_3254:
-; AVX1: # BB#0:
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_3254:
-; AVX2: # BB#0:
-; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_3254:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_3254:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_3276:
-; AVX1: # BB#0:
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_3276:
-; AVX2: # BB#0:
-; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_3276:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_3276:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
ret <4 x double> %shuffle
}
;
; AVX512VL-LABEL: shuffle_v4i64_3254:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
;
; AVX512VL-LABEL: shuffle_v4i64_3276:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
}
define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
-; AVX1OR2-LABEL: shuffle_v8f32_7654fedc:
-; AVX1OR2: # BB#0:
-; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_7654fedc:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8f32_7654fedc:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
ret <8 x float> %shuffle
}
define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
-; AVX1OR2-LABEL: shuffle_v8f32_fedc7654:
-; AVX1OR2: # BB#0:
-; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
-; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_fedc7654:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
-; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v8f32_fedc7654:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
+; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %shuffle
}
;
; AVX512VL-LABEL: shuffle_v8i32_7654fedc:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
;
; AVX512VL-LABEL: shuffle_v8i32_fedc7654:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
+; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
;
; AVX512VL-LABEL: concat_v8i32_4567CDEF_bc:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; AVX512VL-NEXT: retq
%a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
}
define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
-; AVX1OR2-LABEL: concat_v8f32_4567CDEF_bc:
-; AVX1OR2: # BB#0:
-; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX1OR2-NEXT: retq
-;
-; AVX512VL-LABEL: concat_v8f32_4567CDEF_bc:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: concat_v8f32_4567CDEF_bc:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT: retq
%a0 = bitcast <8 x float> %f0 to <4 x i64>
%a1 = bitcast <8 x float> %f1 to <8 x i32>
%a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
{"VALIGNQZ128rri", "VPALIGNRrri", true},
{"VALIGNDZ128rmi", "VPALIGNRrmi", true},
{"VALIGNQZ128rmi", "VPALIGNRrmi", true},
+ {"VSHUFF32X4Z256rmi", "VPERM2F128rm", false},
+ {"VSHUFF32X4Z256rri", "VPERM2F128rr", false},
+ {"VSHUFF64X2Z256rmi", "VPERM2F128rm", false},
+ {"VSHUFF64X2Z256rri", "VPERM2F128rr", false},
+ {"VSHUFI32X4Z256rmi", "VPERM2I128rm", false},
+ {"VSHUFI32X4Z256rri", "VPERM2I128rr", false},
+ {"VSHUFI64X2Z256rmi", "VPERM2I128rm", false},
+ {"VSHUFI64X2Z256rri", "VPERM2I128rr", false},
};
// Print the manually added entries