[X86] Teach EVEX->VEX pass to turn SHUFI32X4/SHUFF32X4/SHUFI64X/SHUFF64X2 into VPERM2...

author Craig Topper <craig.topper@intel.com>

Sat, 4 Nov 2017 18:10:03 +0000 (18:10 +0000)

committer Craig Topper <craig.topper@intel.com>

Sat, 4 Nov 2017 18:10:03 +0000 (18:10 +0000)
author Craig Topper <craig.topper@intel.com>
Sat, 4 Nov 2017 18:10:03 +0000 (18:10 +0000)
committer Craig Topper <craig.topper@intel.com>
Sat, 4 Nov 2017 18:10:03 +0000 (18:10 +0000)
diff --git a/lib/Target/X86/X86EvexToVex.cpp b/lib/Target/X86/X86EvexToVex.cpp

index 744510a3a3ba635ed98127e76d07b53b6f105e0f..6dd4631a48448dbc1381e18b320494b5a7031870 100644 (file)
--- a/lib/Target/X86/X86EvexToVex.cpp
+++ b/lib/Target/X86/X86EvexToVex.cpp
@@ -171,7 +171,7 @@ static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
    case X86::VALIGNDZ128rri:
    case X86::VALIGNDZ128rmi:
    case X86::VALIGNQZ128rri:
-  case X86::VALIGNQZ128rmi:
+  case X86::VALIGNQZ128rmi: {
      assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
             "Unexpected new opcode!");
      unsigned Scale = (Opc == X86::VALIGNQZ128rri ||
@@ -180,6 +180,24 @@ static void performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
      Imm.setImm(Imm.getImm() * Scale);
      break;
    }
+  case X86::VSHUFF32X4Z256rmi:
+  case X86::VSHUFF32X4Z256rri:
+  case X86::VSHUFF64X2Z256rmi:
+  case X86::VSHUFF64X2Z256rri:
+  case X86::VSHUFI32X4Z256rmi:
+  case X86::VSHUFI32X4Z256rri:
+  case X86::VSHUFI64X2Z256rmi:
+  case X86::VSHUFI64X2Z256rri: {
+    assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
+            NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
+           "Unexpected new opcode!");
+    MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1);
+    int64_t ImmVal = Imm.getImm();
+    // Set bit 5, move bit 1 to bit 4, copy bit 0.
+    Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
+    break;
+  }
+  }
  }
  
  
diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll

index a3e6a18fbc904c7ebece63ee14e8720d543025cf..44d13db65c9d646a52d77af4d8ffb638d5b7cc58 100644 (file)
--- a/test/CodeGen/X86/avx-schedule.ll
+++ b/test/CodeGen/X86/avx-schedule.ll
@@ -3447,8 +3447,8 @@ define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x dou
  ;
  ; SKX-LABEL: test_perm2f128:
  ; SKX:       # BB#0:
-; SKX-NEXT:    vshuff64x2 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT:    vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
+; SKX-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
+; SKX-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
  ; SKX-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
  ; SKX-NEXT:    retq # sched: [7:1.00]
  ;
diff --git a/test/CodeGen/X86/avx2-schedule.ll b/test/CodeGen/X86/avx2-schedule.ll

index 8febe046d810677b56a5a61b799424e64864ae3b..cec8ca94409d23331a65ee40a827a6e4be09df82 100644 (file)
--- a/test/CodeGen/X86/avx2-schedule.ll
+++ b/test/CodeGen/X86/avx2-schedule.ll
@@ -2531,8 +2531,8 @@ define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
  ;
  ; SKX-LABEL: test_perm2i128:
  ; SKX:       # BB#0:
-; SKX-NEXT:    vshufi64x2 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT:    vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
+; SKX-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
+; SKX-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
  ; SKX-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
  ; SKX-NEXT:    retq # sched: [7:1.00]
  ;
diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll

index 9098ca308971d03cb2f952849c4d92c8f1392bac..35fecf8955c06428dadd10d326cb38ddb9e7219c 100644 (file)
--- a/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -2729,8 +2729,8 @@ define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x
  ; CHECK-NEXT:    ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7]
  ; CHECK-NEXT:    vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16]
  ; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; CHECK-NEXT:    vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16]
-; CHECK-NEXT:    ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT:    vperm2f128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x30]
+; CHECK-NEXT:    ## ymm0 = ymm0[0,1],ymm1[2,3]
  ; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
  ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0]
  ; CHECK-NEXT:    retq ## encoding: [0xc3]
@@ -2752,7 +2752,7 @@ define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4
  ; CHECK-NEXT:    ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3]
  ; CHECK-NEXT:    vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16]
  ; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
-; CHECK-NEXT:    vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16]
+; CHECK-NEXT:    vperm2f128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x30]
  ; CHECK-NEXT:    ## ymm0 = ymm0[0,1],ymm1[2,3]
  ; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
  ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
@@ -2773,8 +2773,8 @@ define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32
  ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
  ; CHECK-NEXT:    vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xd1,0x16]
  ; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; CHECK-NEXT:    vshufi32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc1,0x16]
-; CHECK-NEXT:    ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT:    vperm2i128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x46,0xc1,0x30]
+; CHECK-NEXT:    ## ymm0 = ymm0[0,1],ymm1[2,3]
  ; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
  ; CHECK-NEXT:    retq ## encoding: [0xc3]
    %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
@@ -2791,7 +2791,7 @@ define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64
  ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
  ; CHECK-NEXT:    vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xd1,0x16]
  ; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3]
-; CHECK-NEXT:    vshufi64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc1,0x16]
+; CHECK-NEXT:    vperm2i128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x46,0xc1,0x30]
  ; CHECK-NEXT:    ## ymm0 = ymm0[0,1],ymm1[2,3]
  ; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
  ; CHECK-NEXT:    retq ## encoding: [0xc3]
diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll

index cbc190d0db35fe6eb5b7a7afcfa2fc10009d803f..7ef5bee54204141c01920fef7913ba4fce924058 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -3975,7 +3975,7 @@ define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_b
  ;
  ; AVX512VL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
  ; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
  ; AVX512VL-NEXT:    retq
    %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll

index c5c2312b161e38a35b546fd02b08ab4f48270406..3c69f6160ddb4cd329c03a81ec20952591f1face 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -1682,17 +1682,11 @@ define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_
  ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
  ; AVX1-NEXT:    retq
  ;
-; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
-; AVX2-NEXT:    retq
-;
-; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
-; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
-; AVX512VL-NEXT:    retq
+; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
+; AVX2OR512VL:       # BB#0:
+; AVX2OR512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
+; AVX2OR512VL-NEXT:    retq
    %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40>
    ret <32 x i8> %shuffle
  }
@@ -1708,17 +1702,11 @@ define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_
  ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
  ; AVX1-NEXT:    retq
  ;
-; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
-; AVX2-NEXT:    retq
-;
-; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
-; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
-; AVX512VL-NEXT:    retq
+; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
+; AVX2OR512VL:       # BB#0:
+; AVX2OR512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX2OR512VL-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
+; AVX2OR512VL-NEXT:    retq
    %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
    ret <32 x i8> %shuffle
  }
diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll

index 6e0e80b401650c011cadc8c965e3dc97dd4a482b..56567c7e794e47972cd67e29a3c22dd9e08035fa 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -418,45 +418,21 @@ define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
  }
  
  define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_3254:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: shuffle_v4f64_3254:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX2-NEXT:    retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_3254:
-; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX512VL-NEXT:    retq
+; ALL-LABEL: shuffle_v4f64_3254:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    retq
    %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
    ret <4 x double> %shuffle
  }
  
  define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_3276:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: shuffle_v4f64_3276:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX2-NEXT:    retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_3276:
-; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX512VL-NEXT:    retq
+; ALL-LABEL: shuffle_v4f64_3276:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    retq
    %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
    ret <4 x double> %shuffle
  }
@@ -1077,7 +1053,7 @@ define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) {
  ;
  ; AVX512VL-LABEL: shuffle_v4i64_3254:
  ; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
  ; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
  ; AVX512VL-NEXT:    retq
    %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
@@ -1099,7 +1075,7 @@ define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) {
  ;
  ; AVX512VL-LABEL: shuffle_v4i64_3276:
  ; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
  ; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
  ; AVX512VL-NEXT:    retq
    %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll

index 38891b465776d9689c0f4fa9f1864a7270cb042d..e4234c0584533dff84f998fa9a513e95411a5eb8 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -768,33 +768,21 @@ define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
  }
  
  define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
-; AVX1OR2-LABEL: shuffle_v8f32_7654fedc:
-; AVX1OR2:       # BB#0:
-; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX1OR2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1OR2-NEXT:    retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_7654fedc:
-; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX512VL-NEXT:    retq
+; ALL-LABEL: shuffle_v8f32_7654fedc:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT:    retq
    %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
    ret <8 x float> %shuffle
  }
  
  define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
-; AVX1OR2-LABEL: shuffle_v8f32_fedc7654:
-; AVX1OR2:       # BB#0:
-; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
-; AVX1OR2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX1OR2-NEXT:    retq
-;
-; AVX512VL-LABEL: shuffle_v8f32_fedc7654:
-; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshuff64x2 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
-; AVX512VL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; AVX512VL-NEXT:    retq
+; ALL-LABEL: shuffle_v8f32_fedc7654:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT:    retq
    %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
    ret <8 x float> %shuffle
  }
@@ -1809,7 +1797,7 @@ define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
  ;
  ; AVX512VL-LABEL: shuffle_v8i32_7654fedc:
  ; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
  ; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
  ; AVX512VL-NEXT:    retq
    %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
@@ -1825,7 +1813,7 @@ define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
  ;
  ; AVX512VL-LABEL: shuffle_v8i32_fedc7654:
  ; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshufi64x2 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
+; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
  ; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
  ; AVX512VL-NEXT:    retq
    %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
@@ -2208,7 +2196,7 @@ define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
  ;
  ; AVX512VL-LABEL: concat_v8i32_4567CDEF_bc:
  ; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
  ; AVX512VL-NEXT:    retq
    %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
@@ -2220,15 +2208,10 @@ define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
  }
  
  define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
-; AVX1OR2-LABEL: concat_v8f32_4567CDEF_bc:
-; AVX1OR2:       # BB#0:
-; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX1OR2-NEXT:    retq
-;
-; AVX512VL-LABEL: concat_v8f32_4567CDEF_bc:
-; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT:    retq
+; ALL-LABEL: concat_v8f32_4567CDEF_bc:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT:    retq
    %a0 = bitcast <8 x float> %f0 to <4 x i64>
    %a1 = bitcast <8 x float> %f1 to <8 x i32>
    %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
diff --git a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp

index 848e59c079039c9aceadb43482223111b4579dd4..05f30facd5478ef58ae9a0f725ab465fc2e4ab3d 100644 (file)
--- a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -155,6 +155,14 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
        {"VALIGNQZ128rri",      "VPALIGNRrri",     true},
        {"VALIGNDZ128rmi",      "VPALIGNRrmi",     true},
        {"VALIGNQZ128rmi",      "VPALIGNRrmi",     true},
+      {"VSHUFF32X4Z256rmi",   "VPERM2F128rm",    false},
+      {"VSHUFF32X4Z256rri",   "VPERM2F128rr",    false},
+      {"VSHUFF64X2Z256rmi",   "VPERM2F128rm",    false},
+      {"VSHUFF64X2Z256rri",   "VPERM2F128rr",    false},
+      {"VSHUFI32X4Z256rmi",   "VPERM2I128rm",    false},
+      {"VSHUFI32X4Z256rri",   "VPERM2I128rr",    false},
+      {"VSHUFI64X2Z256rmi",   "VPERM2I128rm",    false},
+      {"VSHUFI64X2Z256rri",   "VPERM2I128rr",    false},
    };
  
    // Print the manually added entries
author	Craig Topper <craig.topper@intel.com>
	Sat, 4 Nov 2017 18:10:03 +0000 (18:10 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Sat, 4 Nov 2017 18:10:03 +0000 (18:10 +0000)
lib/Target/X86/X86EvexToVex.cpp		patch \| blob \| history
test/CodeGen/X86/avx-schedule.ll		patch \| blob \| history
test/CodeGen/X86/avx2-schedule.ll		patch \| blob \| history
test/CodeGen/X86/avx512vl-intrinsics.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-256-v16.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-256-v32.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-256-v4.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-256-v8.ll		patch \| blob \| history
utils/TableGen/X86EVEX2VEXTablesEmitter.cpp		patch \| blob \| history