From: Craig Topper Date: Thu, 12 Jul 2018 19:58:06 +0000 (+0000) Subject: [X86][FastISel] Support EVEX version of sqrt. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7cf5f05b8cd9473b32584723900535c6075bdb47;p=llvm [X86][FastISel] Support EVEX version of sqrt. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@336939 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 4199eb2ad27..d65d81b17f4 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2799,17 +2799,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT // is not generated by FastISel yet. // FIXME: Update this code once tablegen can handle it. - static const uint16_t SqrtOpc[2][2] = { - {X86::SQRTSSr, X86::VSQRTSSr}, - {X86::SQRTSDr, X86::VSQRTSDr} + static const uint16_t SqrtOpc[3][2] = { + { X86::SQRTSSr, X86::SQRTSDr }, + { X86::VSQRTSSr, X86::VSQRTSDr }, + { X86::VSQRTSSZr, X86::VSQRTSDZr }, }; - bool HasAVX = Subtarget->hasAVX(); + unsigned AVXLevel = Subtarget->hasAVX512() ? 2 : + Subtarget->hasAVX() ? 1 : + 0; unsigned Opc; - const TargetRegisterClass *RC; switch (VT.SimpleTy) { default: return false; - case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break; - case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break; + case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break; + case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break; } const Value *SrcVal = II->getArgOperand(0); @@ -2818,8 +2820,9 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (SrcReg == 0) return false; + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned ImplicitDefReg = 0; - if (HasAVX) { + if (AVXLevel > 0) { ImplicitDefReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 1b54562703e..fce52bf6699 100644 --- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -2614,10 +2614,15 @@ define float @test_mm_sqrt_ss_scalar(float %a0) { ; X64-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_sqrt_ss_scalar: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] -; X64-AVX-NEXT: retq # encoding: [0xc3] +; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %sqrt = call float @llvm.sqrt.f32(float %a0) ret float %sqrt } diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 03acbaafe82..be389890bb7 100644 --- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -4959,10 +4959,15 @@ define double @test_mm_sqrt_sd_scalar(double %a0) nounwind { ; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_sqrt_sd_scalar: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] -; X64-AVX-NEXT: retq # encoding: [0xc3] +; X64-AVX1-LABEL: test_mm_sqrt_sd_scalar: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_sqrt_sd_scalar: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %sqrt = call double @llvm.sqrt.f64(double %a0) ret double %sqrt }