From: Craig Topper Date: Tue, 25 Jun 2019 17:31:52 +0000 (+0000) Subject: [X86] Remove isel patterns that look for (vzext_movl (scalar_to_vector (load))) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e7815eaa374164e72cbc3cb6c10c7ad934d2f7ba;p=llvm [X86] Remove isel patterns that look for (vzext_movl (scalar_to_vector (load))) I believe these all get canonicalized to vzext_movl. The only case where that wasn't true was when the load was loadi32 and the load was an extload aligned to 32 bits. But that was fixed in r364207. Differential Revision: https://reviews.llvm.org/D63701 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364337 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index aeb03ab9b1c..31cbb2d5584 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4313,16 +4313,6 @@ let Predicates = [HasAVX512, OptForSpeed] in { let Predicates = [HasAVX512] in { - // MOVSSrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (VMOVSSZrm addr:$src)>; - - // MOVSDrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (VMOVSDZrm addr:$src)>; - // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8f32 (X86vzload addr:$src)), @@ -4357,8 +4347,6 @@ let Predicates = [HasAVX512] in { // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), - (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzload addr:$src)), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v8i32 (X86vzload addr:$src)), @@ -8608,8 +8596,6 @@ let Predicates = [HasVLX] in { EVEX_CD8<32, CD8VH>; // Pattern match vcvtph2ps of a scalar i64 load. - def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), - (VCVTPH2PSZ128rm addr:$src)>; def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), (VCVTPH2PSZ128rm addr:$src)>; def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert @@ -9636,8 +9622,6 @@ multiclass AVX512_pmovx_patterns_base { let Predicates = [HasVLX, HasBWI] in { def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), (!cast(OpcPrefix#BWZ256rm) addr:$src)>; - def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BWZ256rm) addr:$src)>; def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BWZ256rm) addr:$src)>; } @@ -9645,15 +9629,11 @@ multiclass AVX512_pmovx_patterns_base { let Predicates = [HasVLX] in { def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), (!cast(OpcPrefix#WDZ256rm) addr:$src)>; - def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WDZ256rm) addr:$src)>; def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WDZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), (!cast(OpcPrefix#DQZ256rm) addr:$src)>; - def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#DQZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#DQZ256rm) addr:$src)>; } @@ -9686,8 +9666,6 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#BWZ128rm) addr:$src)>; - def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (loadv16i8 addr:$src))), @@ -9696,8 +9674,6 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#BDZ128rm) addr:$src)>; - def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))), @@ -9705,8 +9681,6 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#BQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))), @@ -9716,8 +9690,6 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#WDZ128rm) addr:$src)>; - def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (loadv8i16 addr:$src))), @@ -9725,8 +9697,6 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#WQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))), @@ -9736,8 +9706,6 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (loadv4i32 addr:$src))), @@ -9746,8 +9714,6 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#BDZ256rm) addr:$src)>; - def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))), @@ -9755,8 +9721,6 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#BQZ256rm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))), @@ -9764,8 +9728,6 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#WQZ256rm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))), diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 85fae57a6ef..814dfa18f01 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -939,13 +939,6 @@ def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>; def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>; def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>; -def vzmovl_v2i64 : PatFrag<(ops node:$src), - (bitconvert (v2i64 (X86vzmovl - (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; -def vzmovl_v4i32 : PatFrag<(ops node:$src), - (bitconvert (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; - def vzload_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzload node:$src)))>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index cf9acd965e6..15b260ebe60 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -263,20 +263,6 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { // Patterns let Predicates = [UseAVX] in { - // MOVSSrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (VMOVSSrm addr:$src)>; - def : Pat<(v4f32 (X86vzload addr:$src)), - (VMOVSSrm addr:$src)>; - - // MOVSDrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (VMOVSDrm addr:$src)>; - def : Pat<(v2f64 (X86vzload addr:$src)), - (VMOVSDrm addr:$src)>; - // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8f32 (X86vzload addr:$src)), @@ -313,16 +299,6 @@ let Predicates = [UseSSE1] in { def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; } - - // MOVSSrm already zeros the high parts of the register. - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (MOVSSrm addr:$src)>; -} - -let Predicates = [UseSSE2] in { - // MOVSDrm already zeros the high parts of the register. - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (MOVSDrm addr:$src)>; } //===----------------------------------------------------------------------===// @@ -4135,8 +4111,6 @@ let Predicates = [UseAVX] in { // These instructions also write zeros in the high part of a 256-bit register. def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), (VMOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), - (VMOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzload addr:$src)), (VMOVDI2PDIrm addr:$src)>; def : Pat<(v8i32 (X86vzload addr:$src)), @@ -4151,8 +4125,6 @@ let Predicates = [UseSSE2] in { (MOV64toPQIrr GR64:$src)>; def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), (MOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), - (MOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzload addr:$src)), (MOVDI2PDIrm addr:$src)>; } @@ -4944,8 +4916,6 @@ multiclass SS41I_pmovx_avx2_patterns(OpcPrefix#BWYrm) addr:$src)>; - def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BWYrm) addr:$src)>; def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BWYrm) addr:$src)>; } @@ -4969,15 +4939,11 @@ multiclass SS41I_pmovx_avx2_patterns(OpcPrefix#WDYrm) addr:$src)>; - def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WDYrm) addr:$src)>; def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WDYrm) addr:$src)>; def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#BDYrm) addr:$src)>; - def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))), @@ -4985,15 +4951,11 @@ multiclass SS41I_pmovx_avx2_patterns(OpcPrefix#DQYrm) addr:$src)>; - def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#DQYrm) addr:$src)>; def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#DQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#BQYrm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))), @@ -5001,8 +4963,6 @@ multiclass SS41I_pmovx_avx2_patterns(OpcPrefix#WQYrm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))), @@ -5057,8 +5017,6 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#BWrm) addr:$src)>; - def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))), @@ -5067,8 +5025,6 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#BDrm) addr:$src)>; - def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))), @@ -5076,8 +5032,6 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#BQrm) addr:$src)>; - def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))), @@ -5087,8 +5041,6 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#WDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#WDrm) addr:$src)>; - def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))), @@ -5096,8 +5048,6 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#WQrm) addr:$src)>; - def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#WQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))), @@ -5107,8 +5057,6 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#DQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#DQrm) addr:$src)>; - def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#DQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#DQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))), @@ -7314,8 +7262,6 @@ let Predicates = [HasF16C, NoVLX] in { WriteCvtPS2PHYSt>, VEX_L; // Pattern match vcvtph2ps of a scalar i64 load. - def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), - (VCVTPH2PSrm addr:$src)>; def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), (VCVTPH2PSrm addr:$src)>; def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert