From: Craig Topper Date: Fri, 30 Sep 2016 04:31:33 +0000 (+0000) Subject: [AVX-512] Always use the full 32 register vector classes for addRegisterClass regardl... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a466cc0b4e5420c59427268690d193967dd92ced;p=llvm [AVX-512] Always use the full 32 register vector classes for addRegisterClass regardless of whether AVX512/VLX is enabled or not. If AVX512 is disabled, the registers should already be marked reserved. Pattern predicates and register classes on instructions should take care of most of the rest. Loads/stores and physical register copies for XMM16-31 and YMM16-31 without VLX have already been taken care of. I'm a little unclear why this changed the register allocation of the SSE2 run of the sad.ll test, but the registers selected appear to be valid after this change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282835 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 13a30822954..7b3f2f29d18 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -485,10 +485,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) { // f32 and f64 use SSE. // Set up the FP register classes. - addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass - : &X86::FR32RegClass); - addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass - : &X86::FR64RegClass); + addRegisterClass(MVT::f32, &X86::FR32XRegClass); + addRegisterClass(MVT::f64, &X86::FR64XRegClass); for (auto VT : { MVT::f32, MVT::f64 }) { // Use ANDPD to simulate FABS. @@ -517,8 +515,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } else if (UseX87 && X86ScalarSSEf32) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. - addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass - : &X86::FR32RegClass); + addRegisterClass(MVT::f32, &X86::FR32XRegClass); addRegisterClass(MVT::f64, &X86::RFP64RegClass); // Use ANDPS to simulate FABS. @@ -721,8 +718,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { - addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass - : &X86::VR128RegClass); + addRegisterClass(MVT::v4f32, &X86::VR128XRegClass); setOperationAction(ISD::FNEG, MVT::v4f32, Custom); setOperationAction(ISD::FABS, MVT::v4f32, Custom); @@ -735,19 +731,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { - addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass - : &X86::VR128RegClass); + addRegisterClass(MVT::v2f64, &X86::VR128XRegClass); // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM // registers cannot be used even for integer operations. - addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass - : &X86::VR128RegClass); - addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass - : &X86::VR128RegClass); - addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass - : &X86::VR128RegClass); - addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass - : &X86::VR128RegClass); + addRegisterClass(MVT::v16i8, &X86::VR128XRegClass); + addRegisterClass(MVT::v8i16, &X86::VR128XRegClass); + addRegisterClass(MVT::v4i32, &X86::VR128XRegClass); + addRegisterClass(MVT::v2i64, &X86::VR128XRegClass); setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::MUL, MVT::v4i32, Custom); @@ -955,18 +946,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) { bool HasInt256 = Subtarget.hasInt256(); - addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass - : &X86::VR256RegClass); - addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass - : &X86::VR256RegClass); - addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass - : &X86::VR256RegClass); - addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass - : &X86::VR256RegClass); - addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass - : &X86::VR256RegClass); - addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass - : &X86::VR256RegClass); + addRegisterClass(MVT::v32i8, &X86::VR256XRegClass); + addRegisterClass(MVT::v16i16, &X86::VR256XRegClass); + addRegisterClass(MVT::v8i32, &X86::VR256XRegClass); + addRegisterClass(MVT::v8f32, &X86::VR256XRegClass); + addRegisterClass(MVT::v4i64, &X86::VR256XRegClass); + addRegisterClass(MVT::v4f64, &X86::VR256XRegClass); for (auto VT : { MVT::v8f32, MVT::v4f64 }) { setOperationAction(ISD::FFLOOR, VT, Legal); diff --git a/test/CodeGen/X86/sad.ll b/test/CodeGen/X86/sad.ll index 07c07485c88..4c57d3bec8a 100644 --- a/test/CodeGen/X86/sad.ll +++ b/test/CodeGen/X86/sad.ll @@ -155,12 +155,12 @@ define i32 @sad_32i8() nounwind { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm13, %xmm13 ; SSE2-NEXT: pxor %xmm15, %xmm15 -; SSE2-NEXT: pxor %xmm5, %xmm5 ; SSE2-NEXT: pxor %xmm14, %xmm14 +; SSE2-NEXT: pxor %xmm6, %xmm6 ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB1_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) # 16-byte Spill @@ -252,11 +252,9 @@ define i32 @sad_32i8() nounwind { ; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; SSE2-NEXT: paddd %xmm3, %xmm4 ; SSE2-NEXT: paddd %xmm6, %xmm0 -; SSE2-NEXT: paddd %xmm7, %xmm14 -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload -; SSE2-NEXT: paddd %xmm5, %xmm3 -; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) # 16-byte Spill -; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm5 # 16-byte Reload +; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload +; SSE2-NEXT: paddd %xmm7, %xmm6 +; SSE2-NEXT: paddd %xmm5, %xmm14 ; SSE2-NEXT: paddd %xmm8, %xmm1 ; SSE2-NEXT: movdqa -{{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload ; SSE2-NEXT: paddd %xmm2, %xmm3 @@ -266,9 +264,9 @@ define i32 @sad_32i8() nounwind { ; SSE2-NEXT: jne .LBB1_1 ; SSE2-NEXT: # BB#2: # %middle.block ; SSE2-NEXT: paddd %xmm15, %xmm4 -; SSE2-NEXT: paddd %xmm14, %xmm1 +; SSE2-NEXT: paddd %xmm6, %xmm1 ; SSE2-NEXT: paddd %xmm13, %xmm0 -; SSE2-NEXT: paddd %xmm5, %xmm2 +; SSE2-NEXT: paddd %xmm14, %xmm2 ; SSE2-NEXT: paddd %xmm4, %xmm1 ; SSE2-NEXT: paddd %xmm2, %xmm1 ; SSE2-NEXT: paddd %xmm0, %xmm1 diff --git a/test/CodeGen/X86/vector-half-conversions.ll b/test/CodeGen/X86/vector-half-conversions.ll index 57705ad7101..d64b37c2ffc 100644 --- a/test/CodeGen/X86/vector-half-conversions.ll +++ b/test/CodeGen/X86/vector-half-conversions.ll @@ -3350,69 +3350,69 @@ define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind { ; ; AVX512F-LABEL: cvt_16f32_to_16i16: ; AVX512F: # BB#0: -; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm2 -; AVX512F-NEXT: vmovd %xmm2, %eax -; AVX512F-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 -; AVX512F-NEXT: vmovd %eax, %xmm3 -; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] -; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 -; AVX512F-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: vmovd %xmm2, %eax -; AVX512F-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm14 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm3, %ymm3 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm4 = xmm0[3,1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm4, %ymm4 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0] +; AVX512F-NEXT: vcvtps2ph $4, %zmm5, %ymm5 +; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm7 +; AVX512F-NEXT: vextractf128 $1, %ymm7, %xmm8 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm6 = xmm8[3,1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm6, %ymm6 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm9 = xmm8[1,0] +; AVX512F-NEXT: vcvtps2ph $4, %zmm9, %ymm9 +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm10 = xmm8[1,1,3,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm10, %ymm10 +; AVX512F-NEXT: vcvtps2ph $4, %zmm8, %ymm8 +; AVX512F-NEXT: vpermilps {{.*#+}} xmm11 = xmm7[3,1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm11, %ymm11 +; AVX512F-NEXT: vpermilpd {{.*#+}} xmm12 = xmm7[1,0] +; AVX512F-NEXT: vcvtps2ph $4, %zmm12, %ymm12 +; AVX512F-NEXT: vcvtps2ph $4, %zmm7, %ymm13 +; AVX512F-NEXT: vmovd %xmm13, %eax +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm7 = xmm7[1,1,3,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm7, %ymm7 +; AVX512F-NEXT: vmovd %eax, %xmm2 +; AVX512F-NEXT: vmovd %xmm7, %eax +; AVX512F-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: vmovd %xmm12, %eax +; AVX512F-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: vmovd %xmm11, %eax +; AVX512F-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: vmovd %xmm8, %eax +; AVX512F-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: vmovd %xmm10, %eax +; AVX512F-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: vmovd %xmm9, %eax +; AVX512F-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: vmovd %xmm6, %eax +; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm6 +; AVX512F-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: vmovd %xmm6, %eax +; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 +; AVX512F-NEXT: vmovd %eax, %xmm6 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: vpinsrw $1, %eax, %xmm6, %xmm0 +; AVX512F-NEXT: vmovd %xmm5, %eax +; AVX512F-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm4, %eax +; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm4 +; AVX512F-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm4, %eax +; AVX512F-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm3, %eax +; AVX512F-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm14, %eax ; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 -; AVX512F-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm1 -; AVX512F-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 -; AVX512F-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] -; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 -; AVX512F-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1 -; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 -; AVX512F-NEXT: vpinsrw $6, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: vmovd %xmm2, %eax -; AVX512F-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2 -; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 -; AVX512F-NEXT: vmovd %eax, %xmm3 -; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 -; AVX512F-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: vmovd %xmm1, %eax -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 -; AVX512F-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: vmovd %xmm0, %eax -; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm0 -; AVX512F-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: vmovd %xmm0, %eax -; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 -; AVX512F-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: vmovd %xmm0, %eax -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] -; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 -; AVX512F-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 -; AVX512F-NEXT: vmovd %xmm0, %eax -; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[3,1,2,3] -; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 -; AVX512F-NEXT: vpinsrw $6, %eax, %xmm3, %xmm1 -; AVX512F-NEXT: vmovd %xmm0, %eax -; AVX512F-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0 +; AVX512F-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: retq ;