[X86] Add patterns for memory forms of SARX/SHLX/SHRX with careful complexity adjustm...

author Craig Topper <craig.topper@intel.com>

Sun, 23 Jul 2017 03:59:37 +0000 (03:59 +0000)

committer Craig Topper <craig.topper@intel.com>

Sun, 23 Jul 2017 03:59:37 +0000 (03:59 +0000)
author Craig Topper <craig.topper@intel.com>
Sun, 23 Jul 2017 03:59:37 +0000 (03:59 +0000)
committer Craig Topper <craig.topper@intel.com>
Sun, 23 Jul 2017 03:59:37 +0000 (03:59 +0000)
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td

index 3547c6526f1366a9a45f9e5e658052b0271fa4a9..d1cab69856b37998e249531094b96155b9937c96 100644 (file)
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1709,6 +1709,35 @@ let Predicates = [HasBMI2] in {
                          (INSERT_SUBREG
                            (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
    }
+
+  let AddedComplexity = -20 in {
+    def : Pat<(sra (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+              (SARX32rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(sra (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+              (SARX64rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+    def : Pat<(srl (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+              (SHRX32rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(srl (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+              (SHRX64rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+    def : Pat<(shl (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+              (SHLX32rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(shl (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+              (SHLX64rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+  }
  }
  
  // (anyext (setcc_carry)) -> (setcc_carry)
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td

index 0efb383e1c8d4cd6fe5a4908871fbb635e526e69..44bcef6d98b7b7af601f546250e057c78198ac38 100644 (file)
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -961,16 +961,40 @@ let Predicates = [HasBMI2] in {
                            (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
    }
  
-  // Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor
-  //
+  // Artificially lower the complexity so that we'll favor
    //  mov (%ecx), %esi
    //  shl $imm, $esi
    //
    // over
    //
-  //  movb $imm %al
+  //  movb $imm, %al
    //  shlx %al, (%ecx), %esi
-  //
-  // As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole
-  // optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible.
+  let AddedComplexity = -20 in {
+    def : Pat<(sra (loadi32 addr:$src1), GR8:$src2),
+              (SARX32rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(sra (loadi64 addr:$src1), GR8:$src2),
+              (SARX64rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+    def : Pat<(srl (loadi32 addr:$src1), GR8:$src2),
+              (SHRX32rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(srl (loadi64 addr:$src1), GR8:$src2),
+              (SHRX64rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+    def : Pat<(shl (loadi32 addr:$src1), GR8:$src2),
+              (SHLX32rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+    def : Pat<(shl (loadi64 addr:$src1), GR8:$src2),
+              (SHLX64rm addr:$src1,
+                        (INSERT_SUBREG
+                          (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+  }
  }
diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll

index f9bca503218e3a63cab3b80c6bcca4631c329e43..008dce7bb609eaa71cfeb3fe483774f4a15a6c97 100644 (file)
--- a/test/CodeGen/X86/shift-bmi2.ll
+++ b/test/CodeGen/X86/shift-bmi2.ll
@@ -36,9 +36,9 @@ define i32 @shl32i(i32 %x) nounwind uwtable readnone {
  define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
  ; BMI2-LABEL: shl32p:
  ; BMI2:       # BB#0:
-; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; BMI2-NEXT:    shlxl %eax, (%ecx), %eax
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    shlxl %ecx, (%eax), %eax
  ; BMI2-NEXT:    retl
  ;
  ; BMI264-LABEL: shl32p:
@@ -126,9 +126,9 @@ define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
  define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
  ; BMI2-LABEL: lshr32p:
  ; BMI2:       # BB#0:
-; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; BMI2-NEXT:    shrxl %eax, (%ecx), %eax
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    shrxl %ecx, (%eax), %eax
  ; BMI2-NEXT:    retl
  ;
  ; BMI264-LABEL: lshr32p:
@@ -177,9 +177,9 @@ define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
  define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
  ; BMI2-LABEL: ashr32p:
  ; BMI2:       # BB#0:
-; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; BMI2-NEXT:    sarxl %eax, (%ecx), %eax
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT:    sarxl %ecx, (%eax), %eax
  ; BMI2-NEXT:    retl
  ;
  ; BMI264-LABEL: ashr32p:
author	Craig Topper <craig.topper@intel.com>
	Sun, 23 Jul 2017 03:59:37 +0000 (03:59 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Sun, 23 Jul 2017 03:59:37 +0000 (03:59 +0000)
lib/Target/X86/X86InstrCompiler.td		patch \| blob \| history
lib/Target/X86/X86InstrShiftRotate.td		patch \| blob \| history
test/CodeGen/X86/shift-bmi2.ll		patch \| blob \| history