From: Craig Topper Date: Thu, 16 May 2019 16:53:50 +0000 (+0000) Subject: [X86] Use 0x9 instead of 0x1 as the immediate in some masked floor pattern. Similarly... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a5ac3e61acd77460126b5ea82824e65837698a3f;p=llvm [X86] Use 0x9 instead of 0x1 as the immediate in some masked floor pattern. Similarly change 0x2 to 0xA for ceil. This suppresses exceptions which is what we should be doing for ceil and floor. We already use the correct immediate in patterns without masking. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360915 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 5eed0e12bf3..73abd964aa1 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -9410,13 +9410,13 @@ multiclass avx512_masked_scalar_imm; + v4f32x_info, fp32imm0, 0x09, HasAVX512>; defm : avx512_masked_scalar_imm; + v4f32x_info, fp32imm0, 0x0A, HasAVX512>; defm : avx512_masked_scalar_imm; + v2f64x_info, fp64imm0, 0x09, HasAVX512>; defm : avx512_masked_scalar_imm; + v2f64x_info, fp64imm0, 0x0A, HasAVX512>; //------------------------------------------------- diff --git a/test/CodeGen/X86/vec_floor.ll b/test/CodeGen/X86/vec_floor.ll index 2dfc554e38d..448c5efce17 100644 --- a/test/CodeGen/X86/vec_floor.ll +++ b/test/CodeGen/X86/vec_floor.ll @@ -1374,7 +1374,7 @@ define <4 x float> @floor_mask_ss(<4 x float> %x, <4 x float> %y, <4 x float> %w ; AVX512-LABEL: floor_mask_ss: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovaps %xmm2, %xmm0 ; AVX512-NEXT: retq %mask = and i8 %k, 1 @@ -1415,7 +1415,7 @@ define <4 x float> @floor_maskz_ss(<4 x float> %x, <4 x float> %y, i8 %k) nounwi ; AVX512-LABEL: floor_maskz_ss: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask = and i8 %k, 1 %nmask = icmp eq i8 %mask, 0 @@ -1452,7 +1452,7 @@ define <2 x double> @floor_mask_sd(<2 x double> %x, <2 x double> %y, <2 x double ; AVX512-LABEL: floor_mask_sd: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovapd %xmm2, %xmm0 ; AVX512-NEXT: retq %mask = and i8 %k, 1 @@ -1493,7 +1493,7 @@ define <2 x double> @floor_maskz_sd(<2 x double> %x, <2 x double> %y, i8 %k) nou ; AVX512-LABEL: floor_maskz_sd: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask = and i8 %k, 1 %nmask = icmp eq i8 %mask, 0 @@ -1530,7 +1530,7 @@ define <4 x float> @floor_mask_ss_trunc(<4 x float> %x, <4 x float> %y, <4 x flo ; AVX512-LABEL: floor_mask_ss_trunc: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovaps %xmm2, %xmm0 ; AVX512-NEXT: retq %mask = trunc i16 %k to i1 @@ -1573,7 +1573,7 @@ define <4 x float> @floor_maskz_ss_trunc(<4 x float> %x, <4 x float> %y, i16 %k) ; AVX512-LABEL: floor_maskz_ss_trunc: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask = trunc i16 %k to i1 %s = extractelement <4 x float> %x, i64 0 @@ -1609,7 +1609,7 @@ define <2 x double> @floor_mask_sd_trunc(<2 x double> %x, <2 x double> %y, <2 x ; AVX512-LABEL: floor_mask_sd_trunc: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovapd %xmm2, %xmm0 ; AVX512-NEXT: retq %mask = trunc i16 %k to i1 @@ -1652,7 +1652,7 @@ define <2 x double> @floor_maskz_sd_trunc(<2 x double> %x, <2 x double> %y, i16 ; AVX512-LABEL: floor_maskz_sd_trunc: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask = trunc i16 %k to i1 %s = extractelement <2 x double> %x, i64 0 @@ -1684,7 +1684,7 @@ define <4 x float> @floor_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x flo ; AVX512-LABEL: floor_mask_ss_mask8: ; AVX512: ## %bb.0: ; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1 -; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovaps %xmm2, %xmm0 ; AVX512-NEXT: retq %mask1 = fcmp oeq <4 x float> %x, %y @@ -1717,7 +1717,7 @@ define <4 x float> @floor_maskz_ss_mask8(<4 x float> %x, <4 x float> %y) nounwin ; AVX512-LABEL: floor_maskz_ss_mask8: ; AVX512: ## %bb.0: ; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1 -; AVX512-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscaless $9, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask1 = fcmp oeq <4 x float> %x, %y %mask = extractelement <4 x i1> %mask1, i64 0 @@ -1750,7 +1750,7 @@ define <2 x double> @floor_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x ; AVX512-LABEL: floor_mask_sd_mask8: ; AVX512: ## %bb.0: ; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 -; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovapd %xmm2, %xmm0 ; AVX512-NEXT: retq %mask1 = fcmp oeq <2 x double> %x, %y @@ -1783,7 +1783,7 @@ define <2 x double> @floor_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) noun ; AVX512-LABEL: floor_maskz_sd_mask8: ; AVX512: ## %bb.0: ; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 -; AVX512-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscalesd $9, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask1 = fcmp oeq <2 x double> %x, %y %mask = extractelement <2 x i1> %mask1, i64 0 @@ -2350,7 +2350,7 @@ define <4 x float> @ceil_mask_ss(<4 x float> %x, <4 x float> %y, <4 x float> %w, ; AVX512-LABEL: ceil_mask_ss: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovaps %xmm2, %xmm0 ; AVX512-NEXT: retq %mask = and i8 %k, 1 @@ -2391,7 +2391,7 @@ define <4 x float> @ceil_maskz_ss(<4 x float> %x, <4 x float> %y, i8 %k) nounwin ; AVX512-LABEL: ceil_maskz_ss: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask = and i8 %k, 1 %nmask = icmp eq i8 %mask, 0 @@ -2428,7 +2428,7 @@ define <2 x double> @ceil_mask_sd(<2 x double> %x, <2 x double> %y, <2 x double> ; AVX512-LABEL: ceil_mask_sd: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovapd %xmm2, %xmm0 ; AVX512-NEXT: retq %mask = and i8 %k, 1 @@ -2469,7 +2469,7 @@ define <2 x double> @ceil_maskz_sd(<2 x double> %x, <2 x double> %y, i8 %k) noun ; AVX512-LABEL: ceil_maskz_sd: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask = and i8 %k, 1 %nmask = icmp eq i8 %mask, 0 @@ -2506,7 +2506,7 @@ define <4 x float> @ceil_mask_ss_trunc(<4 x float> %x, <4 x float> %y, <4 x floa ; AVX512-LABEL: ceil_mask_ss_trunc: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovaps %xmm2, %xmm0 ; AVX512-NEXT: retq %mask = trunc i16 %k to i1 @@ -2549,7 +2549,7 @@ define <4 x float> @ceil_maskz_ss_trunc(<4 x float> %x, <4 x float> %y, i16 %k) ; AVX512-LABEL: ceil_maskz_ss_trunc: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask = trunc i16 %k to i1 %s = extractelement <4 x float> %x, i64 0 @@ -2585,7 +2585,7 @@ define <2 x double> @ceil_mask_sd_trunc(<2 x double> %x, <2 x double> %y, <2 x d ; AVX512-LABEL: ceil_mask_sd_trunc: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovapd %xmm2, %xmm0 ; AVX512-NEXT: retq %mask = trunc i16 %k to i1 @@ -2628,7 +2628,7 @@ define <2 x double> @ceil_maskz_sd_trunc(<2 x double> %x, <2 x double> %y, i16 % ; AVX512-LABEL: ceil_maskz_sd_trunc: ; AVX512: ## %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask = trunc i16 %k to i1 %s = extractelement <2 x double> %x, i64 0 @@ -2660,7 +2660,7 @@ define <4 x float> @ceil_mask_ss_mask8(<4 x float> %x, <4 x float> %y, <4 x floa ; AVX512-LABEL: ceil_mask_ss_mask8: ; AVX512: ## %bb.0: ; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1 -; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovaps %xmm2, %xmm0 ; AVX512-NEXT: retq %mask1 = fcmp oeq <4 x float> %x, %y @@ -2693,7 +2693,7 @@ define <4 x float> @ceil_maskz_ss_mask8(<4 x float> %x, <4 x float> %y) nounwind ; AVX512-LABEL: ceil_maskz_ss_mask8: ; AVX512: ## %bb.0: ; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1 -; AVX512-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscaless $10, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask1 = fcmp oeq <4 x float> %x, %y %mask = extractelement <4 x i1> %mask1, i64 0 @@ -2726,7 +2726,7 @@ define <2 x double> @ceil_mask_sd_mask8(<2 x double> %x, <2 x double> %y, <2 x d ; AVX512-LABEL: ceil_mask_sd_mask8: ; AVX512: ## %bb.0: ; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 -; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1} +; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm2 {%k1} ; AVX512-NEXT: vmovapd %xmm2, %xmm0 ; AVX512-NEXT: retq %mask1 = fcmp oeq <2 x double> %x, %y @@ -2759,7 +2759,7 @@ define <2 x double> @ceil_maskz_sd_mask8(<2 x double> %x, <2 x double> %y) nounw ; AVX512-LABEL: ceil_maskz_sd_mask8: ; AVX512: ## %bb.0: ; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 -; AVX512-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: vrndscalesd $10, %xmm0, %xmm1, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %mask1 = fcmp oeq <2 x double> %x, %y %mask = extractelement <2 x i1> %mask1, i64 0