define <8 x float> @test2_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2) {
; GENERIC-LABEL: test2_8xfloat_shuff_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test2_8xfloat_shuff_mask0:
; SKX: # BB#0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x float> %res
define <8 x float> @test2_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2) {
; GENERIC-LABEL: test2_8xfloat_shuff_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test2_8xfloat_shuff_mask3:
; SKX: # BB#0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x float> %res
define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) {
; GENERIC-LABEL: test_8xfloat_shuff_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_shuff_mem_mask0:
; SKX: # BB#0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec2 = load <8 x float>, <8 x float>* %vec2p
%res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) {
; GENERIC-LABEL: test_8xfloat_shuff_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1]
+; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_shuff_mem_mask3:
; SKX: # BB#0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec2 = load <8 x float>, <8 x float>* %vec2p
%res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2) {
; GENERIC-LABEL: test_4xdouble_shuff_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_shuff_mask0:
; SKX: # BB#0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
ret <4 x double> %res
define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2) {
; GENERIC-LABEL: test_4xdouble_shuff_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_shuff_mask3:
; SKX: # BB#0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
ret <4 x double> %res
define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) {
; GENERIC-LABEL: test_4xdouble_shuff_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_shuff_mem_mask0:
; SKX: # BB#0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec2 = load <4 x double>, <4 x double>* %vec2p
%res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) {
; GENERIC-LABEL: test_4xdouble_shuff_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_shuff_mem_mask3:
; SKX: # BB#0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
+; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec2 = load <4 x double>, <4 x double>* %vec2p
%res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
define <8 x i32> @test_8xi32_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2) {
; GENERIC-LABEL: test_8xi32_shuff_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_shuff_mask0:
; SKX: # BB#0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
ret <8 x i32> %res
define <8 x i32> @test_8xi32_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2) {
; GENERIC-LABEL: test_8xi32_shuff_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_shuff_mask3:
; SKX: # BB#0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x i32> %res
define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) {
; GENERIC-LABEL: test_8xi32_shuff_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_shuff_mem_mask0:
; SKX: # BB#0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec2 = load <8 x i32>, <8 x i32>* %vec2p
%res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) {
; GENERIC-LABEL: test_8xi32_shuff_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_shuff_mem_mask3:
; SKX: # BB#0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec2 = load <8 x i32>, <8 x i32>* %vec2p
%res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
define <4 x i64> @test_4xi64_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2) {
; GENERIC-LABEL: test_4xi64_shuff_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_shuff_mask0:
; SKX: # BB#0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
ret <4 x i64> %res
define <4 x i64> @test_4xi64_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2) {
; GENERIC-LABEL: test_4xi64_shuff_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_shuff_mask3:
; SKX: # BB#0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
ret <4 x i64> %res
define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) {
; GENERIC-LABEL: test_4xi64_shuff_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_shuff_mem_mask0:
; SKX: # BB#0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec2 = load <4 x i64>, <4 x i64>* %vec2p
%res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) {
; GENERIC-LABEL: test_4xi64_shuff_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_shuff_mem_mask3:
; SKX: # BB#0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
+; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec2 = load <4 x i64>, <4 x i64>* %vec2p
%res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
define <8 x float> @test_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2) {
; CHECK-LABEL: test_8xfloat_shuff_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
; CHECK-NEXT: retq
%res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x float> %res
define <8 x float> @test_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2) {
; CHECK-LABEL: test_8xfloat_shuff_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
; CHECK-NEXT: retq
%res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x float> %res
define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) {
; CHECK-LABEL: test_8xfloat_shuff_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
; CHECK-NEXT: retq
%vec2 = load <8 x float>, <8 x float>* %vec2p
%res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) {
; CHECK-LABEL: test_8xfloat_shuff_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1]
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1]
; CHECK-NEXT: retq
%vec2 = load <8 x float>, <8 x float>* %vec2p
%res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2) {
; CHECK-LABEL: test_4xdouble_shuff_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
; CHECK-NEXT: retq
%res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
ret <4 x double> %res
define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2) {
; CHECK-LABEL: test_4xdouble_shuff_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; CHECK-NEXT: retq
%res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
ret <4 x double> %res
define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) {
; CHECK-LABEL: test_4xdouble_shuff_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
; CHECK-NEXT: retq
%vec2 = load <4 x double>, <4 x double>* %vec2p
%res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) {
; CHECK-LABEL: test_4xdouble_shuff_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
; CHECK-NEXT: retq
%vec2 = load <4 x double>, <4 x double>* %vec2p
%res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
define <8 x i32> @test_8xi32_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2) {
; CHECK-LABEL: test_8xi32_shuff_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; CHECK-NEXT: retq
%res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
ret <8 x i32> %res
define <8 x i32> @test_8xi32_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2) {
; CHECK-LABEL: test_8xi32_shuff_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
; CHECK-NEXT: retq
%res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x i32> %res
define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) {
; CHECK-LABEL: test_8xi32_shuff_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
; CHECK-NEXT: retq
%vec2 = load <8 x i32>, <8 x i32>* %vec2p
%res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) {
; CHECK-LABEL: test_8xi32_shuff_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1]
+; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1]
; CHECK-NEXT: retq
%vec2 = load <8 x i32>, <8 x i32>* %vec2p
%res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
define <4 x i64> @test_4xi64_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2) {
; CHECK-LABEL: test_4xi64_shuff_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
; CHECK-NEXT: retq
%res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
ret <4 x i64> %res
define <4 x i64> @test_4xi64_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2) {
; CHECK-LABEL: test_4xi64_shuff_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; CHECK-NEXT: retq
%res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
ret <4 x i64> %res
define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) {
; CHECK-LABEL: test_4xi64_shuff_mem_mask0:
; CHECK: # BB#0:
-; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
; CHECK-NEXT: retq
%vec2 = load <4 x i64>, <4 x i64>* %vec2p
%res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) {
; CHECK-LABEL: test_4xi64_shuff_mem_mask3:
; CHECK: # BB#0:
-; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
+; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
; CHECK-NEXT: retq
%vec2 = load <4 x i64>, <4 x i64>* %vec2p
%res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>