[X86][SSE] Provide execution domains for scalar floating point operations

author Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 21 Apr 2015 08:40:22 +0000 (08:40 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 21 Apr 2015 08:40:22 +0000 (08:40 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 21 Apr 2015 08:40:22 +0000 (08:40 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 21 Apr 2015 08:40:22 +0000 (08:40 +0000)
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 65b155c0a9d1cfcf644091320ef4a8d330fc9bea..be6bcc98e516928dbc2e2688bada0c95aed1af1a 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -241,21 +241,20 @@ def SSE_INTALU_ITINS_BLEND_P : OpndItins<
  /// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
  multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             RegisterClass RC, X86MemOperand x86memop,
-                           OpndItins itins,
-                           bit Is2Addr = 1> {
+                           Domain d, OpndItins itins, bit Is2Addr = 1> {
    let isCommutable = 1 in {
      def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
         !if(Is2Addr,
             !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,
+       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr, d>,
         Sched<[itins.Sched]>;
    }
    def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
         !if(Is2Addr,
             !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,
+       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm, d>,
         Sched<[itins.Sched.Folded, ReadAfterLd]>;
  }
  
@@ -263,8 +262,7 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
  multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
                               string asm, string SSEVer, string FPSizeStr,
                               Operand memopr, ComplexPattern mem_cpat,
-                             OpndItins itins,
-                             bit Is2Addr = 1> {
+                             Domain d, OpndItins itins, bit Is2Addr = 1> {
  let isCodeGenOnly = 1 in {
    def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
         !if(Is2Addr,
@@ -272,7 +270,7 @@ let isCodeGenOnly = 1 in {
             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
         [(set RC:$dst, (!cast<Intrinsic>(
                   !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, RC:$src2))], itins.rr>,
+             RC:$src1, RC:$src2))], itins.rr, d>,
         Sched<[itins.Sched]>;
    def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
         !if(Is2Addr,
@@ -280,7 +278,7 @@ let isCodeGenOnly = 1 in {
             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
         [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
                                            SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, mem_cpat:$src2))], itins.rm>,
+             RC:$src1, mem_cpat:$src2))], itins.rm, d>,
         Sched<[itins.Sched.Folded, ReadAfterLd]>;
  }
  }
@@ -3054,15 +3052,19 @@ multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
  multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                    SizeItins itins> {
    defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
-                         OpNode, FR32, f32mem, itins.s, 0>, XS, VEX_4V, VEX_LIG;
+                         OpNode, FR32, f32mem, SSEPackedSingle, itins.s, 0>,
+                         XS, VEX_4V, VEX_LIG;
    defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
-                         OpNode, FR64, f64mem, itins.d, 0>, XD, VEX_4V, VEX_LIG;
+                         OpNode, FR64, f64mem, SSEPackedDouble, itins.d, 0>,
+                         XD, VEX_4V, VEX_LIG;
  
    let Constraints = "$src1 = $dst" in {
      defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
-                              OpNode, FR32, f32mem, itins.s>, XS;
+                              OpNode, FR32, f32mem, SSEPackedSingle,
+                              itins.s>, XS;
      defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
-                              OpNode, FR64, f64mem, itins.d>, XD;
+                              OpNode, FR64, f64mem, SSEPackedDouble,
+                              itins.d>, XD;
    }
  }
  
@@ -3070,18 +3072,18 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
                                        SizeItins itins> {
    defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
                     !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
-                   itins.s, 0>, XS, VEX_4V, VEX_LIG;
+                   SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG;
    defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
                     !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
-                   itins.d, 0>, XD, VEX_4V, VEX_LIG;
+                   SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG;
  
    let Constraints = "$src1 = $dst" in {
      defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
                     !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
-                   itins.s>, XS;
+                   SSEPackedSingle, itins.s>, XS;
      defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
                     !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
-                   itins.d>, XD;
+                   SSEPackedDouble, itins.d>, XD;
    }
  }
  
@@ -3170,7 +3172,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
            (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
        (!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>;
    }
-  
+
    // With SSE 4.1, insertps/blendi are preferred to movsd, so match those too.
    let Predicates = [UseSSE41] in {
      // extracted scalar math op with insert via insertps
@@ -3203,7 +3205,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
            FR32:$src))), (iPTR 0))),
        (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,
            (COPY_TO_REGCLASS FR32:$src, VR128))>;
- 
+
      // extracted scalar math op with insert via blend
      def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
            (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
@@ -3251,7 +3253,7 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
            FR64:$src))), (i8 1))),
        (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,
            (COPY_TO_REGCLASS FR64:$src, VR128))>;
-          
+
      // vector math op with insert via blend
      def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst),
            (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))),
@@ -3345,17 +3347,17 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
                            ValueType vt, ValueType ScalarVT,
                            X86MemOperand x86memop, Operand vec_memop,
                            ComplexPattern mem_cpat, Intrinsic Intr,
-                          SDNode OpNode, OpndItins itins, Predicate target,
-                          string Suffix> {
+                          SDNode OpNode, Domain d, OpndItins itins,
+                          Predicate target, string Suffix> {
    let hasSideEffects = 0 in {
    def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
                !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
-            [(set RC:$dst, (OpNode RC:$src1))], itins.rr>, Sched<[itins.Sched]>,
+            [(set RC:$dst, (OpNode RC:$src1))], itins.rr, d>, Sched<[itins.Sched]>,
              Requires<[target]>;
    let mayLoad = 1 in
    def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
              !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
-            [(set RC:$dst, (OpNode (load addr:$src1)))], itins.rm>,
+            [(set RC:$dst, (OpNode (load addr:$src1)))], itins.rm, d>,
              Sched<[itins.Sched.Folded, ReadAfterLd]>,
              Requires<[target, OptForSize]>;
  
@@ -3378,7 +3380,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
    // because the high elements of the destination are unchanged in SSE.
    def : Pat<(Intr VR128:$src),
              (!cast<Instruction>(NAME#Suffix##r_Int) VR128:$src, VR128:$src)>;
-  def : Pat<(Intr (load addr:$src)), 
+  def : Pat<(Intr (load addr:$src)),
              (vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m)
                                        addr:$src), VR128))>;
     def : Pat<(Intr mem_cpat:$src),
@@ -3391,24 +3393,24 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
                            ValueType vt, ValueType ScalarVT,
                            X86MemOperand x86memop, Operand vec_memop,
                            ComplexPattern mem_cpat,
-                          Intrinsic Intr, SDNode OpNode, OpndItins itins,
-                          Predicate target, string Suffix> {
+                          Intrinsic Intr, SDNode OpNode, Domain d,
+                          OpndItins itins, Predicate target, string Suffix> {
    let hasSideEffects = 0 in {
    def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-            [], itins.rr>, Sched<[itins.Sched]>;
-  let mayLoad = 1 in 
+            [], itins.rr, d>, Sched<[itins.Sched]>;
+  let mayLoad = 1 in
    def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-            [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+            [], itins.rm, d>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
    let isCodeGenOnly = 1 in {
    // todo: uncomment when all r_Int forms will be added to X86InstrInfo.cpp
-  //def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), 
+  //def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
    //              (ins VR128:$src1, VR128:$src2),
    //           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
    //          []>, Sched<[itins.Sched.Folded]>;
    let mayLoad = 1 in
-  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), 
+  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
                  (ins VR128:$src1, vec_memop:$src2),
               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
@@ -3419,7 +3421,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
     def : Pat<(OpNode RC:$src),  (!cast<Instruction>("V"#NAME#Suffix##r)
                                  (ScalarVT (IMPLICIT_DEF)), RC:$src)>;
  
-   def : Pat<(vt (OpNode mem_cpat:$src)), 
+   def : Pat<(vt (OpNode mem_cpat:$src)),
               (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
                                    mem_cpat:$src)>;
  
@@ -3428,14 +3430,14 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
     //                 (VT (IMPLICIT_DEF)), VR128:$src)>;
     def : Pat<(Intr VR128:$src),
               (vt (COPY_TO_REGCLASS(
-             !cast<Instruction>("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)), 
+             !cast<Instruction>("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)),
                      (ScalarVT (COPY_TO_REGCLASS VR128:$src, RC))), VR128))>;
     def : Pat<(Intr mem_cpat:$src),
               (!cast<Instruction>("V"#NAME#Suffix##m_Int)
                      (vt (IMPLICIT_DEF)), mem_cpat:$src)>;
    }
    let Predicates = [target, OptForSize] in
-  def : Pat<(ScalarVT (OpNode (load addr:$src))), 
+  def : Pat<(ScalarVT (OpNode (load addr:$src))),
              (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
               addr:$src)>;
  }
@@ -3557,11 +3559,11 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
    defm SS        :  sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
                        ssmem, sse_load_f32,
                        !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
-                      itins, UseSSE1, "SS">, XS;
+                      SSEPackedSingle, itins, UseSSE1, "SS">, XS;
    defm V#NAME#SS  : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
                        f32mem, ssmem, sse_load_f32,
                        !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
-                      itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG;
+                      SSEPackedSingle, itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG;
  }
  
  multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3569,11 +3571,12 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
    defm SD         : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
                           sdmem, sse_load_f64,
                           !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
-                         OpNode, itins, UseSSE2, "SD">, XD;
+                         OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD;
    defm V#NAME#SD  : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
                           f64mem, sdmem, sse_load_f64,
                           !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
-                         OpNode, itins, UseAVX, "SD">, XD, VEX_4V, VEX_LIG;
+                         OpNode, SSEPackedDouble, itins, UseAVX, "SD">,
+                         XD, VEX_4V, VEX_LIG;
  }
  
  // Square root.
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll

index 2f70a83f52842e0d703e99a5bd8fdbd92dc4673b..972fbdf48cb56c2dd8ade95e741a1d18ea502e06 100644 (file)
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -9,7 +9,7 @@
  ; CHECK-NEXT: testb $1, %dil
  ; CHECK-NEXT: jne
  ; CHECK-NEXT: divsd
-; CHECK-NEXT: movaps
+; CHECK-NEXT: movapd
  ; CHECK-NEXT: ret
  ; CHECK:      divsd
  
@@ -28,7 +28,7 @@ define double @foo(double %x, double %y, i1 %c) nounwind {
  ; CHECK-NEXT: testb $1, %dil
  ; CHECK-NEXT: je
  ; CHECK:      divsd
-; CHECK:      movaps
+; CHECK:      movapd
  ; CHECK:      ret
  define double @split(double %x, double %y, i1 %c) nounwind {
    %a = fdiv double %x, 3.2
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll

index 4dcb54ca4b0b6c165951c905487227c838e5e911..e4d0373299fb1beb1bde4ebfb13e9829a24aa907 100644 (file)
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -805,7 +805,7 @@ define double @ule_inverse_y(double %x) nounwind {
  ; CHECK-LABEL: clampTo3k_a:
  ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
  ; CHECK-NEXT: minsd %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movapd %xmm1, %xmm0
  ; CHECK-NEXT: ret
  ; UNSAFE-LABEL: clampTo3k_a:
  ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
@@ -813,7 +813,7 @@ define double @ule_inverse_y(double %x) nounwind {
  ; FINITE-LABEL: clampTo3k_a:
  ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
  ; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @clampTo3k_a(double %x) nounwind readnone {
  entry:
@@ -831,7 +831,7 @@ entry:
  ; FINITE-LABEL: clampTo3k_b:
  ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
  ; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @clampTo3k_b(double %x) nounwind readnone {
  entry:
@@ -843,7 +843,7 @@ entry:
  ; CHECK-LABEL: clampTo3k_c:
  ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
  ; CHECK-NEXT: maxsd %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movapd %xmm1, %xmm0
  ; CHECK-NEXT: ret
  ; UNSAFE-LABEL: clampTo3k_c:
  ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
@@ -851,7 +851,7 @@ entry:
  ; FINITE-LABEL: clampTo3k_c:
  ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
  ; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @clampTo3k_c(double %x) nounwind readnone {
  entry:
@@ -869,7 +869,7 @@ entry:
  ; FINITE-LABEL: clampTo3k_d:
  ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
  ; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @clampTo3k_d(double %x) nounwind readnone {
  entry:
@@ -881,7 +881,7 @@ entry:
  ; CHECK-LABEL: clampTo3k_e:
  ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
  ; CHECK-NEXT: maxsd %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movapd %xmm1, %xmm0
  ; CHECK-NEXT: ret
  ; UNSAFE-LABEL: clampTo3k_e:
  ; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
@@ -889,7 +889,7 @@ entry:
  ; FINITE-LABEL: clampTo3k_e:
  ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
  ; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @clampTo3k_e(double %x) nounwind readnone {
  entry:
@@ -907,7 +907,7 @@ entry:
  ; FINITE-LABEL: clampTo3k_f:
  ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
  ; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @clampTo3k_f(double %x) nounwind readnone {
  entry:
@@ -919,7 +919,7 @@ entry:
  ; CHECK-LABEL: clampTo3k_g:
  ; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
  ; CHECK-NEXT: minsd %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movapd %xmm1, %xmm0
  ; CHECK-NEXT: ret
  ; UNSAFE-LABEL: clampTo3k_g:
  ; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
@@ -927,7 +927,7 @@ entry:
  ; FINITE-LABEL: clampTo3k_g:
  ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
  ; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @clampTo3k_g(double %x) nounwind readnone {
  entry:
@@ -945,7 +945,7 @@ entry:
  ; FINITE-LABEL: clampTo3k_h:
  ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
  ; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @clampTo3k_h(double %x) nounwind readnone {
  entry:
diff --git a/test/CodeGen/X86/sse-scalar-fp-arith.ll b/test/CodeGen/X86/sse-scalar-fp-arith.ll

index 8b1c6d0c88252148ed4d97e325f77b8a3e2ca1ad..4d7e1cb2e192c3a80799a18c5e4c272595c8daf5 100644 (file)
--- a/test/CodeGen/X86/sse-scalar-fp-arith.ll
+++ b/test/CodeGen/X86/sse-scalar-fp-arith.ll
@@ -76,6 +76,31 @@ define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
    ret <4 x float> %3
  }
  
+define <4 x float> @test_sqrt_ss(<4 x float> %a) {
+; SSE2-LABEL: test_sqrt_ss:
+; SSE2:       # BB#0:
+; SSE2-NEXT:   sqrtss %xmm0, %xmm1
+; SSE2-NEXT:   movss %xmm1, %xmm0
+; SSE2-NEXT:   retq
+;
+; SSE41-LABEL: test_sqrt_ss:
+; SSE41:       # BB#0:
+; SSE41-NEXT:  sqrtss %xmm0, %xmm1
+; SSE41-NEXT:  blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT:  retq
+;
+; AVX-LABEL: test_sqrt_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT:    retq
+  %1 = extractelement <4 x float> %a, i32 0
+  %2 = call float @llvm.sqrt.f32(float %1)
+  %3 = insertelement <4 x float> %a, float %2, i32 0
+  ret <4 x float> %3
+}\r
+declare float @llvm.sqrt.f32(float)
+
  define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: test_add_sd:
  ; SSE:       # BB#0:
@@ -144,6 +169,25 @@ define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
    ret <2 x double> %3
  }
  
+define <2 x double> @test_sqrt_sd(<2 x double> %a) {
+; SSE-LABEL: test_sqrt_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    sqrtsd %xmm0, %xmm1
+; SSE-NEXT:    movsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sqrt_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vmovsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = extractelement <2 x double> %a, i32 0
+  %2 = call double @llvm.sqrt.f64(double %1)
+  %3 = insertelement <2 x double> %a, double %2, i32 0
+  ret <2 x double> %3
+}\r
+declare double @llvm.sqrt.f64(double)
+
  define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
  ; SSE-LABEL: test2_add_ss:
  ; SSE:       # BB#0:
@@ -220,7 +264,7 @@ define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: test2_add_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    addsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: test2_add_sd:
@@ -238,7 +282,7 @@ define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: test2_sub_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    subsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: test2_sub_sd:
@@ -256,7 +300,7 @@ define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: test2_mul_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    mulsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: test2_mul_sd:
@@ -274,7 +318,7 @@ define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: test2_div_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    divsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: test2_div_sd:
@@ -371,7 +415,7 @@ define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
  }
  
  ; With SSE4.1 or greater, the shuffles in the following tests may
-; be lowered to X86Blendi nodes. 
+; be lowered to X86Blendi nodes.
  
  define <4 x float> @blend_add_ss(<4 x float> %a, float %b) {
  ; SSE-LABEL: blend_add_ss:
@@ -708,7 +752,7 @@ define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: insert_test2_add_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    addsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: insert_test2_add_sd:
@@ -724,7 +768,7 @@ define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: insert_test2_sub_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    subsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: insert_test2_sub_sd:
@@ -740,7 +784,7 @@ define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: insert_test2_mul_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    mulsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: insert_test2_mul_sd:
@@ -756,7 +800,7 @@ define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: insert_test2_div_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    divsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: insert_test2_div_sd:
@@ -956,7 +1000,7 @@ define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: insert_test4_add_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    addsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: insert_test4_add_sd:
@@ -972,7 +1016,7 @@ define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: insert_test4_sub_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    subsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: insert_test4_sub_sd:
@@ -988,7 +1032,7 @@ define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: insert_test4_mul_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    mulsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: insert_test4_mul_sd:
@@ -1004,7 +1048,7 @@ define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
  ; SSE-LABEL: insert_test4_div_sd:
  ; SSE:       # BB#0:
  ; SSE-NEXT:    divsd %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    movapd %xmm1, %xmm0
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: insert_test4_div_sd:
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 21 Apr 2015 08:40:22 +0000 (08:40 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 21 Apr 2015 08:40:22 +0000 (08:40 +0000)
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/sink-hoist.ll		patch \| blob \| history
test/CodeGen/X86/sse-minmax.ll		patch \| blob \| history
test/CodeGen/X86/sse-scalar-fp-arith.ll		patch \| blob \| history