[X86] Fix the execution domain for scalar SQRT intrinsic instruction.

author Craig Topper <craig.topper@gmail.com>

Sun, 26 Feb 2017 06:45:35 +0000 (06:45 +0000)

committer Craig Topper <craig.topper@gmail.com>

Sun, 26 Feb 2017 06:45:35 +0000 (06:45 +0000)
author Craig Topper <craig.topper@gmail.com>
Sun, 26 Feb 2017 06:45:35 +0000 (06:45 +0000)
committer Craig Topper <craig.topper@gmail.com>
Sun, 26 Feb 2017 06:45:35 +0000 (06:45 +0000)
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 9764b05a8595d15ddf715700ad36cdb7c0bea9bf..0c1697982891f67d4f739b3010d0d4f4f84a21cd 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3333,7 +3333,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
              Sched<[itins.Sched.Folded, ReadAfterLd]>,
              Requires<[target, OptForSize]>;
  
-  let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
+  let isCodeGenOnly = 1, Constraints = "$src1 = $dst", ExeDomain = d in {
    def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
              []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
@@ -3377,7 +3377,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
    def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [], itins.rm, d>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
-  let isCodeGenOnly = 1 in {
+  let isCodeGenOnly = 1, ExeDomain = d in {
    def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
                  (ins VR128:$src1, VR128:$src2),
               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

index 972a33f13cd005d9f32eac469c5fca0351a4e35e..3071155172e35359c73046408ad3aef126942abc 100644 (file)
--- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -2934,13 +2934,13 @@ define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwin
  ; X32-LABEL: test_mm_sqrt_sd:
  ; X32:       # BB#0:
  ; X32-NEXT:    sqrtsd %xmm0, %xmm1
-; X32-NEXT:    movaps %xmm1, %xmm0
+; X32-NEXT:    movapd %xmm1, %xmm0
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test_mm_sqrt_sd:
  ; X64:       # BB#0:
  ; X64-NEXT:    sqrtsd %xmm0, %xmm1
-; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    movapd %xmm1, %xmm0
  ; X64-NEXT:    retq
    %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
    %ext0 = extractelement <2 x double> %call, i32 0
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll

index 026f8e9196986a1095c202f158c0080411525ef7..b0a8744f5d8042a094103ed4bb46206283d1c9ac 100644 (file)
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -1512,21 +1512,21 @@ define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) {
  ; SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
  ; SSE:       ## BB#0:
  ; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SSE-NEXT:    movaps (%eax), %xmm0 ## encoding: [0x0f,0x28,0x00]
+; SSE-NEXT:    movapd (%eax), %xmm0 ## encoding: [0x66,0x0f,0x28,0x00]
  ; SSE-NEXT:    sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
  ; SSE-NEXT:    retl ## encoding: [0xc3]
  ;
  ; AVX2-LABEL: test_x86_sse2_sqrt_sd_vec_load:
  ; AVX2:       ## BB#0:
  ; AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; AVX2-NEXT:    vmovaps (%eax), %xmm0 ## encoding: [0xc5,0xf8,0x28,0x00]
+; AVX2-NEXT:    vmovapd (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x00]
  ; AVX2-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
  ; AVX2-NEXT:    retl ## encoding: [0xc3]
  ;
  ; SKX-LABEL: test_x86_sse2_sqrt_sd_vec_load:
  ; SKX:       ## BB#0:
  ; SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SKX-NEXT:    vmovaps (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
+; SKX-NEXT:    vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
  ; SKX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
  ; SKX-NEXT:    retl ## encoding: [0xc3]
    %a1 = load <2 x double>, <2 x double>* %a0, align 16
author	Craig Topper <craig.topper@gmail.com>
	Sun, 26 Feb 2017 06:45:35 +0000 (06:45 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Sun, 26 Feb 2017 06:45:35 +0000 (06:45 +0000)
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/sse2-intrinsics-fast-isel.ll		patch \| blob \| history
test/CodeGen/X86/sse2-intrinsics-x86.ll		patch \| blob \| history