From: Craig Topper Date: Mon, 9 Sep 2019 17:48:05 +0000 (+0000) Subject: [X86] Allow _MM_FROUND_CUR_DIRECTION and _MM_FROUND_NO_EXC to be used together on... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a503e8648e12b2328f7fb931a08664857123e224;p=llvm [X86] Allow _MM_FROUND_CUR_DIRECTION and _MM_FROUND_NO_EXC to be used together on instructions that only support SAE and not embedded rounding. Current for SAE instructions we only allow _MM_FROUND_CUR_DIRECTION(bit 2) or _MM_FROUND_NO_EXC(bit 3) to be used as the immediate passed to the inrinsics. But these instructions don't perform rounding so _MM_FROUND_CUR_DIRECTION is just sort of a default placeholder when you don't want to suppress exceptions. Using _MM_FROUND_NO_EXC by itself is really bit equivalent to (_MM_FROUND_NO_EXC | _MM_FROUND_TO_NEAREST_INT) since _MM_FROUND_TO_NEAREST_INT is 0. Since we aren't rounding on these instructions we should also accept (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) as equivalent to (_MM_FROUND_NO_EXC). icc allows this, but gcc does not. Differential Revision: https://reviews.llvm.org/D67289 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371430 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0a8219214f4..2195f40c247 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -22706,8 +22706,16 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return false; }; auto isRoundModeSAE = [](SDValue Rnd) { - if (auto *C = dyn_cast(Rnd)) - return C->getAPIntValue() == X86::STATIC_ROUNDING::NO_EXC; + if (auto *C = dyn_cast(Rnd)) { + unsigned RC = C->getZExtValue(); + if (RC & X86::STATIC_ROUNDING::NO_EXC) { + // Clear the NO_EXC bit and check remaining bits. + RC ^= X86::STATIC_ROUNDING::NO_EXC; + // As a convenience we allow no other bits or explicitly + // current direction. + return RC == 0 || RC == X86::STATIC_ROUNDING::CUR_DIRECTION; + } + } return false; }; diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 6f0aba31cf1..b2d6ce4dfc9 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -755,7 +755,7 @@ define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) { ; CHECK: # %bb.0: ; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} - %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) + %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 12) ret <8 x double> %res } declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone