From 3f86a859d85ea1962b082b5a33dd7f6592cb25c0 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 2 Jun 2017 20:02:59 +0000 Subject: [PATCH] [X86] Correctly broadcast NaN-like integers as float on AVX. Since r288804, we try to lower build_vectors on AVX using broadcasts of float/double. However, when we broadcast integer values that happen to have a NaN float bitpattern, we lose the NaN payload, thereby changing the integer value being broadcast. This is caused by ConstantFP::get, to which we pass the splat i32 as a float (by bitcasting it using bitsToFloat). ConstantFP::get takes a double parameter, so we end up lossily converting a single-precision NaN to double-precision. Instead, avoid any kinds of conversions by directly building an APFloat from the splatted APInt. Note that this also fixes another piece of code (broadcast of subvectors), that currently isn't susceptible to the same problem. Also note that we could really just use APInt and ConstantInt throughout: the constant pool type doesn't matter much. Still, for consistency, use the appropriate type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304590 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 24 +++++++------- .../X86/broadcast-elm-cross-splat-vec.ll | 32 +++++++++++++++++++ 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a348a489aff..dff602323bd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6542,12 +6542,12 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue, APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i); Constant *Const; if (VT.isFloatingPoint()) { - assert((ScalarSize == 32 || ScalarSize == 64) && - "Unsupported floating point scalar size"); - if (ScalarSize == 32) - Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat()); - else - Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble()); + if (ScalarSize == 32) { + Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val)); + } else { + assert(ScalarSize == 64 && "Unsupported floating point scalar size"); + Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val)); + } } else Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val); ConstantVec.push_back(Const); @@ -6633,11 +6633,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // AVX have support for 32 and 64 bit broadcast for floats only. // No 64bit integer in 32bit subtarget. MVT CVT = MVT::getFloatingPointVT(SplatBitSize); - Constant *C = SplatBitSize == 32 - ? ConstantFP::get(Type::getFloatTy(*Ctx), - SplatValue.bitsToFloat()) - : ConstantFP::get(Type::getDoubleTy(*Ctx), - SplatValue.bitsToDouble()); + // Lower the splat via APFloat directly, to avoid any conversion. + Constant *C = + SplatBitSize == 32 + ? ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEsingle(), SplatValue)) + : ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEdouble(), SplatValue)); SDValue CP = DAG.getConstantPool(C, PVT); unsigned Repeat = VT.getSizeInBits() / SplatBitSize; diff --git a/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll b/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll index b13965a30ed..bbe31c5c2ac 100644 --- a/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll +++ b/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll @@ -1203,3 +1203,35 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) { ret <8 x double> %res2 } + + +; ALL: .LCPI38 +; ALL-NEXT: .long 4290379776 # 0xffba0000 + +; AVX: .LCPI38 +; AVX-NEXT: .long 4290379776 # float NaN + +define <8 x i16> @f8xi16_i32_NaN(<8 x i16> %a) { +; ALL32-LABEL: f8xi16_i32_NaN: +; ALL32: # BB#0: +; ALL32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1 +; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 +; ALL32-NEXT: retl +; +; ALL64-LABEL: f8xi16_i32_NaN: +; ALL64: # BB#0: +; ALL64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 +; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 +; ALL64-NEXT: retq +; +; AVX-LABEL: f8xi16_i32_NaN: +; AVX: # BB#0: +; AVX-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1 +; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 + %res1 = add <8 x i16> , %a + %res2 = and <8 x i16> , %res1 + ret <8 x i16> %res2 +} -- 2.50.1