[X86] Add patterns for rotr by immediate to fix PR41057.

author Craig Topper <craig.topper@intel.com>

Thu, 14 Mar 2019 07:07:26 +0000 (07:07 +0000)

committer Craig Topper <craig.topper@intel.com>

Thu, 14 Mar 2019 07:07:26 +0000 (07:07 +0000)
author Craig Topper <craig.topper@intel.com>
Thu, 14 Mar 2019 07:07:26 +0000 (07:07 +0000)
committer Craig Topper <craig.topper@intel.com>
Thu, 14 Mar 2019 07:07:26 +0000 (07:07 +0000)
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td

index 085305626470489494951e72c867b5d44ece7214..c808803974442804516c39551c3122ea3c0ea16f 100644 (file)
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -585,16 +585,16 @@ def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst),
  // Rotate by 1
  def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                   "ror{b}\t$dst",
-                 [(set GR8:$dst, (rotl GR8:$src1, (i8 7)))]>;
+                 [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
  def ROR16r1  : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
                   "ror{w}\t$dst",
-                 [(set GR16:$dst, (rotl GR16:$src1, (i8 15)))]>, OpSize16;
+                 [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize16;
  def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
                   "ror{l}\t$dst",
-                 [(set GR32:$dst, (rotl GR32:$src1, (i8 31)))]>, OpSize32;
+                 [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>, OpSize32;
  def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
                    "ror{q}\t$dst",
-                  [(set GR64:$dst, (rotl GR64:$src1, (i8 63)))]>;
+                  [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
  } // Constraints = "$src = $dst", SchedRW
  
  let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in {
@@ -633,18 +633,18 @@ def ROR64mi  : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, u8imm:$src),
  // Rotate by 1
  def ROR8m1   : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
                   "ror{b}\t$dst",
-                 [(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst)]>;
+                 [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
  def ROR16m1  : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
                   "ror{w}\t$dst",
-                 [(store (rotl (loadi16 addr:$dst), (i8 15)), addr:$dst)]>,
+                 [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
                   OpSize16;
  def ROR32m1  : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
                   "ror{l}\t$dst",
-                 [(store (rotl (loadi32 addr:$dst), (i8 31)), addr:$dst)]>,
+                 [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>,
                   OpSize32;
  def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
                   "ror{q}\t$dst",
-                 [(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst)]>,
+                 [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>,
                   Requires<[In64BitMode]>;
  } // SchedRW
  
@@ -806,6 +806,34 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
  
  } // Defs = [EFLAGS]
  
+// Use the opposite rotate if allows us to use the rotate by 1 instruction.
+def : Pat<(rotl GR8:$src1,  (i8 7)),  (ROR8r1  GR8:$src1)>;
+def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1 GR16:$src1)>;
+def : Pat<(rotl GR32:$src1, (i8 31)), (ROR32r1 GR32:$src1)>;
+def : Pat<(rotl GR64:$src1, (i8 63)), (ROR64r1 GR64:$src1)>;
+def : Pat<(rotr GR8:$src1,  (i8 7)),  (ROL8r1  GR8:$src1)>;
+def : Pat<(rotr GR16:$src1, (i8 15)), (ROL16r1 GR16:$src1)>;
+def : Pat<(rotr GR32:$src1, (i8 31)), (ROL32r1 GR32:$src1)>;
+def : Pat<(rotr GR64:$src1, (i8 63)), (ROL64r1 GR64:$src1)>;
+
+def : Pat<(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst),
+          (ROR8m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi16 addr:$dst), (i8 15)), addr:$dst),
+          (ROR16m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi32 addr:$dst), (i8 31)), addr:$dst),
+          (ROR32m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst),
+          (ROR64m1 addr:$dst)>, Requires<[In64BitMode]>;
+
+def : Pat<(store (rotr (loadi8 addr:$dst), (i8 7)), addr:$dst),
+          (ROL8m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi16 addr:$dst), (i8 15)), addr:$dst),
+          (ROL16m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi32 addr:$dst), (i8 31)), addr:$dst),
+          (ROL32m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi64 addr:$dst), (i8 63)), addr:$dst),
+          (ROL64m1 addr:$dst)>, Requires<[In64BitMode]>;
+
  // Sandy Bridge and newer Intel processors support faster rotates using
  // SHLD to avoid a partial flag update on the normal rotate instructions.
  let Predicates = [HasFastSHLDRotate], AddedComplexity = 5 in {
@@ -813,6 +841,11 @@ let Predicates = [HasFastSHLDRotate], AddedComplexity = 5 in {
              (SHLD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
    def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
              (SHLD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
+
+  def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
+            (SHRD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
+  def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
+            (SHRD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
  }
  
  def ROT32L2R_imm8  : SDNodeXForm<imm, [{
@@ -870,19 +903,29 @@ let Predicates = [HasBMI2] in {
  
    // Prefer RORX which is non-destructive and doesn't update EFLAGS.
    let AddedComplexity = 10 in {
+    def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
+              (RORX32ri GR32:$src, imm:$shamt)>;
+    def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
+              (RORX64ri GR64:$src, imm:$shamt)>;
+
      def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
                (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
      def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
                (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
    }
  
+  def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
+            (RORX32mi addr:$src, imm:$shamt)>;
+  def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
+            (RORX64mi addr:$src, imm:$shamt)>;
+
    def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
              (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
    def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
              (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
  
    // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
-  // immedidate shift, i.e. the following code is considered better
+  // immediate shift, i.e. the following code is considered better
    //
    //  mov %edi, %esi
    //  shl $imm, %esi
diff --git a/test/CodeGen/X86/funnel-shift-rot.ll b/test/CodeGen/X86/funnel-shift-rot.ll

index a66d771382a686462fdd7313fb22da74f4a39b87..29e1b3e065bb032d6092d2b75f41579ce77cdbbc 100644 (file)
--- a/test/CodeGen/X86/funnel-shift-rot.ll
+++ b/test/CodeGen/X86/funnel-shift-rot.ll
@@ -222,13 +222,13 @@ define i8 @rotr_i8_const_shift7(i8 %x) nounwind {
  ; X32-SSE2-LABEL: rotr_i8_const_shift7:
  ; X32-SSE2:       # %bb.0:
  ; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-SSE2-NEXT:    rorb $7, %al
+; X32-SSE2-NEXT:    rolb %al
  ; X32-SSE2-NEXT:    retl
  ;
  ; X64-AVX2-LABEL: rotr_i8_const_shift7:
  ; X64-AVX2:       # %bb.0:
  ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    rorb $7, %al
+; X64-AVX2-NEXT:    rolb %al
  ; X64-AVX2-NEXT:    # kill: def $al killed $al killed $eax
  ; X64-AVX2-NEXT:    retq
    %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 7)
diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll

index c7ad8f63ccd08e1fd024e48fac3fac241b6bd7ae..d6c5ae2213961a5eca5a793123ebbcc383e53d07 100644 (file)
--- a/test/CodeGen/X86/rot32.ll
+++ b/test/CodeGen/X86/rot32.ll
@@ -472,67 +472,157 @@ define i32 @fshl_load(i32* %p) nounwind {
  }
  
  define i32 @fshr(i32 %x) nounwind {
-; CHECK32-LABEL: fshr:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    rorl $7, %eax
-; CHECK32-NEXT:    retl
+; X86-LABEL: fshr:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    rorl $7, %eax
+; X86-NEXT:    retl
  ;
-; CHECK64-LABEL: fshr:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    movl %edi, %eax
-; CHECK64-NEXT:    rorl $7, %eax
-; CHECK64-NEXT:    retq
+; SHLD-LABEL: fshr:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SHLD-NEXT:    shrdl $7, %eax, %eax
+; SHLD-NEXT:    retl
+;
+; BMI2-LABEL: fshr:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxl $7, {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    retl
+;
+; X64-LABEL: fshr:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rorl $7, %eax
+; X64-NEXT:    retq
+;
+; SHLD64-LABEL: fshr:
+; SHLD64:       # %bb.0:
+; SHLD64-NEXT:    movl %edi, %eax
+; SHLD64-NEXT:    shrdl $7, %edi, %eax
+; SHLD64-NEXT:    retq
+;
+; BMI264-LABEL: fshr:
+; BMI264:       # %bb.0:
+; BMI264-NEXT:    rorxl $7, %edi, %eax
+; BMI264-NEXT:    retq
    %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 7)
    ret i32 %f
  }
  declare i32 @llvm.fshr.i32(i32, i32, i32)
  
  define i32 @fshr1(i32 %x) nounwind {
-; CHECK32-LABEL: fshr1:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    rorl $1, %eax
-; CHECK32-NEXT:    retl
+; X86-LABEL: fshr1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    rorl $1, %eax
+; X86-NEXT:    retl
  ;
-; CHECK64-LABEL: fshr1:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    movl %edi, %eax
-; CHECK64-NEXT:    rorl $1, %eax
-; CHECK64-NEXT:    retq
+; SHLD-LABEL: fshr1:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SHLD-NEXT:    shrdl $1, %eax, %eax
+; SHLD-NEXT:    retl
+;
+; BMI2-LABEL: fshr1:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxl $1, {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    retl
+;
+; X64-LABEL: fshr1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rorl $1, %eax
+; X64-NEXT:    retq
+;
+; SHLD64-LABEL: fshr1:
+; SHLD64:       # %bb.0:
+; SHLD64-NEXT:    movl %edi, %eax
+; SHLD64-NEXT:    shrdl $1, %edi, %eax
+; SHLD64-NEXT:    retq
+;
+; BMI264-LABEL: fshr1:
+; BMI264:       # %bb.0:
+; BMI264-NEXT:    rorxl $1, %edi, %eax
+; BMI264-NEXT:    retq
    %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 1)
    ret i32 %f
  }
  
  define i32 @fshr31(i32 %x) nounwind {
-; CHECK32-LABEL: fshr31:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    rorl $31, %eax
-; CHECK32-NEXT:    retl
+; X86-LABEL: fshr31:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    roll %eax
+; X86-NEXT:    retl
  ;
-; CHECK64-LABEL: fshr31:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    movl %edi, %eax
-; CHECK64-NEXT:    rorl $31, %eax
-; CHECK64-NEXT:    retq
+; SHLD-LABEL: fshr31:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SHLD-NEXT:    shrdl $31, %eax, %eax
+; SHLD-NEXT:    retl
+;
+; BMI2-LABEL: fshr31:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxl $31, {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    retl
+;
+; X64-LABEL: fshr31:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    roll %eax
+; X64-NEXT:    retq
+;
+; SHLD64-LABEL: fshr31:
+; SHLD64:       # %bb.0:
+; SHLD64-NEXT:    movl %edi, %eax
+; SHLD64-NEXT:    shrdl $31, %edi, %eax
+; SHLD64-NEXT:    retq
+;
+; BMI264-LABEL: fshr31:
+; BMI264:       # %bb.0:
+; BMI264-NEXT:    rorxl $31, %edi, %eax
+; BMI264-NEXT:    retq
    %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 31)
    ret i32 %f
  }
  
  define i32 @fshr_load(i32* %p) nounwind {
-; CHECK32-LABEL: fshr_load:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movl (%eax), %eax
-; CHECK32-NEXT:    rorl $7, %eax
-; CHECK32-NEXT:    retl
+; X86-LABEL: fshr_load:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl (%eax), %eax
+; X86-NEXT:    rorl $7, %eax
+; X86-NEXT:    retl
  ;
-; CHECK64-LABEL: fshr_load:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    movl (%rdi), %eax
-; CHECK64-NEXT:    rorl $7, %eax
-; CHECK64-NEXT:    retq
+; SHLD-LABEL: fshr_load:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SHLD-NEXT:    movl (%eax), %eax
+; SHLD-NEXT:    shrdl $7, %eax, %eax
+; SHLD-NEXT:    retl
+;
+; BMI2-LABEL: fshr_load:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    rorxl $7, (%eax), %eax
+; BMI2-NEXT:    retl
+;
+; X64-LABEL: fshr_load:
+; X64:       # %bb.0:
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    rorl $7, %eax
+; X64-NEXT:    retq
+;
+; SHLD64-LABEL: fshr_load:
+; SHLD64:       # %bb.0:
+; SHLD64-NEXT:    movl (%rdi), %eax
+; SHLD64-NEXT:    shrdl $7, %eax, %eax
+; SHLD64-NEXT:    retq
+;
+; BMI264-LABEL: fshr_load:
+; BMI264:       # %bb.0:
+; BMI264-NEXT:    rorxl $7, (%rdi), %eax
+; BMI264-NEXT:    retq
    %x = load i32, i32* %p
    %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 7)
    ret i32 %f
diff --git a/test/CodeGen/X86/rot64.ll b/test/CodeGen/X86/rot64.ll

index 7259f640ba024bef693e9577e018421d0c23eadb..43ece70ad369633b826986037b347382ffa7f6f9 100644 (file)
--- a/test/CodeGen/X86/rot64.ll
+++ b/test/CodeGen/X86/rot64.ll
@@ -278,42 +278,86 @@ define i64 @fshl_load(i64* %p) nounwind {
  }
  
  define i64 @fshr(i64 %x) nounwind {
-; ALL-LABEL: fshr:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movq %rdi, %rax
-; ALL-NEXT:    rorq $7, %rax
-; ALL-NEXT:    retq
+; X64-LABEL: fshr:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rorq $7, %rax
+; X64-NEXT:    retq
+;
+; SHLD-LABEL: fshr:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movq %rdi, %rax
+; SHLD-NEXT:    shrdq $7, %rdi, %rax
+; SHLD-NEXT:    retq
+;
+; BMI2-LABEL: fshr:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxq $7, %rdi, %rax
+; BMI2-NEXT:    retq
    %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 7)
    ret i64 %f
  }
  declare i64 @llvm.fshr.i64(i64, i64, i64)
  
  define i64 @fshr1(i64 %x) nounwind {
-; ALL-LABEL: fshr1:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movq %rdi, %rax
-; ALL-NEXT:    rorq $1, %rax
-; ALL-NEXT:    retq
+; X64-LABEL: fshr1:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rorq $1, %rax
+; X64-NEXT:    retq
+;
+; SHLD-LABEL: fshr1:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movq %rdi, %rax
+; SHLD-NEXT:    shrdq $1, %rdi, %rax
+; SHLD-NEXT:    retq
+;
+; BMI2-LABEL: fshr1:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxq $1, %rdi, %rax
+; BMI2-NEXT:    retq
    %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 1)
    ret i64 %f
  }
  
  define i64 @fshr63(i64 %x) nounwind {
-; ALL-LABEL: fshr63:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movq %rdi, %rax
-; ALL-NEXT:    rorq $63, %rax
-; ALL-NEXT:    retq
+; X64-LABEL: fshr63:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rolq %rax
+; X64-NEXT:    retq
+;
+; SHLD-LABEL: fshr63:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movq %rdi, %rax
+; SHLD-NEXT:    shrdq $63, %rdi, %rax
+; SHLD-NEXT:    retq
+;
+; BMI2-LABEL: fshr63:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxq $63, %rdi, %rax
+; BMI2-NEXT:    retq
    %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 63)
    ret i64 %f
  }
  
  define i64 @fshr_load(i64* %p) nounwind {
-; ALL-LABEL: fshr_load:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movq (%rdi), %rax
-; ALL-NEXT:    rorq $7, %rax
-; ALL-NEXT:    retq
+; X64-LABEL: fshr_load:
+; X64:       # %bb.0:
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    rorq $7, %rax
+; X64-NEXT:    retq
+;
+; SHLD-LABEL: fshr_load:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movq (%rdi), %rax
+; SHLD-NEXT:    shrdq $7, %rax, %rax
+; SHLD-NEXT:    retq
+;
+; BMI2-LABEL: fshr_load:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxq $7, (%rdi), %rax
+; BMI2-NEXT:    retq
    %x = load i64, i64* %p
    %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 7)
    ret i64 %f
author	Craig Topper <craig.topper@intel.com>
	Thu, 14 Mar 2019 07:07:26 +0000 (07:07 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Thu, 14 Mar 2019 07:07:26 +0000 (07:07 +0000)
lib/Target/X86/X86InstrShiftRotate.td		patch \| blob \| history
test/CodeGen/X86/funnel-shift-rot.ll		patch \| blob \| history
test/CodeGen/X86/rot32.ll		patch \| blob \| history
test/CodeGen/X86/rot64.ll		patch \| blob \| history