From: Uriel Korach Date: Sun, 10 Sep 2017 08:40:13 +0000 (+0000) Subject: adding autoUpgrade support to broadcast[f|i]32x2 intrinsics X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=986634ba4eb441534de457c401d47189311d49e8;p=llvm adding autoUpgrade support to broadcast[f|i]32x2 intrinsics git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312879 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index c9c2d3ec11b..44455b6527f 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -4414,26 +4414,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_vbroadcast_sd_512 : Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_mask_broadcastf32x2_256 : - Intrinsic<[llvm_v8f32_ty], - [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcastf32x2_512 : - Intrinsic<[llvm_v16f32_ty], - [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti32x2_128 : - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti32x2_256 : - Intrinsic<[llvm_v8i32_ty], - [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_broadcasti32x2_512 : - Intrinsic<[llvm_v16i32_ty], - [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_broadcastmw_512 : GCCBuiltin<"__builtin_ia32_broadcastmw512">, Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>; diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 6184ce59dc1..188fdcf6cd5 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -72,7 +72,9 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { // like to use this information to remove upgrade code for some older // intrinsics. It is currently undecided how we will determine that future // point. - if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1 + if (Name.startswith("avx512.mask.broadcastf32x2") || // Added in 6.0 + Name.startswith("avx512.mask.broadcasti32x2") || // Added in 6.0 + Name.startswith("sse2.pcmpeq.") || // Added in 3.1 Name.startswith("sse2.pcmpgt.") || // Added in 3.1 Name.startswith("avx2.pcmpeq.") || // Added in 3.1 Name.startswith("avx2.pcmpgt.") || // Added in 3.1 @@ -803,6 +805,20 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, return Res; } +static Value *upgradeBroadcastf32x2(IRBuilder<> &Builder, CallInst &CI) { + Value *Op0 = CI.getArgOperand(0); + Value *RetArg = CI.getReturnedArgOperand(); + + llvm::VectorType *Ty = RetArg->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + uint32_t Indices[NumElts]; + for(unsigned i = 0; i < NumElts; ++i) + Indices[i] = i % 2; + + Value *Res = Builder.CreateShuffleVector(Op0,UndefValue::get(Ty),Indices); + return EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1)); +} + static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, unsigned CC, bool Signed) { Value *Op0 = CI.getArgOperand(0); @@ -1059,6 +1075,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Name.startswith("avx2.pmaxs") || Name.startswith("avx512.mask.pmaxs"))) { Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT); + } else if (IsX86 && (Name.startswith("avx512.mask.broadcastf32x2") || + Name.startswith("avx512.mask.broadcasti32x2"))) { + Rep =upgradeBroadcastf32x2(Builder, *CI); } else if (IsX86 && (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" || Name == "sse41.pmaxud" || diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 699b6effac3..4672c95e9fa 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -478,16 +478,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FADDS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FADDS_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, BRCST32x2_TO_VEC, - X86ISD::VBROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, BRCST32x2_TO_VEC, - X86ISD::VBROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_128, BRCST32x2_TO_VEC, - X86ISD::VBROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_256, BRCST32x2_TO_VEC, - X86ISD::VBROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_512, BRCST32x2_TO_VEC, - X86ISD::VBROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM,