From: Craig Topper Date: Mon, 16 Feb 2015 00:42:49 +0000 (+0000) Subject: [X86] Teach clang to lower __builtin_ia32_psrldqi256 and __builtin_ia32_pslldqi256... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=526511f2e41ec45f4bdc5ed96ba59e6f38f4fa79;p=clang [X86] Teach clang to lower __builtin_ia32_psrldqi256 and __builtin_ia32_pslldqi256 to vector shuffles the backend recognizes. This is a step towards removing the corresponding intrinsics from the backend. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@229348 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index fe46e6fce9..75c8716ecf 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -6025,6 +6025,60 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. return llvm::Constant::getNullValue(ConvertType(E->getType())); } + case X86::BI__builtin_ia32_pslldqi256: { + // Shift value is in bits so divide by 8. + unsigned shiftVal = cast(Ops[1])->getZExtValue() >> 3; + + // If pslldq is shifting the vector more than 15 bytes, emit zero. + if (shiftVal >= 16) + return llvm::Constant::getNullValue(ConvertType(E->getType())); + + SmallVector Indices; + // 256-bit pslldq operates on 128-bit lanes so we need to handle that + for (unsigned l = 0; l != 32; l += 16) { + for (unsigned i = 0; i != 16; ++i) { + unsigned Idx = 32 + i - shiftVal; + if (Idx < 32) Idx -= 16; // end of lane, switch operand. + Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); + } + } + + llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); + Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); + Value *Zero = llvm::Constant::getNullValue(VecTy); + + Value *SV = llvm::ConstantVector::get(Indices); + SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq"); + llvm::Type *ResultType = ConvertType(E->getType()); + return Builder.CreateBitCast(SV, ResultType, "cast"); + } + case X86::BI__builtin_ia32_psrldqi256: { + // Shift value is in bits so divide by 8. + unsigned shiftVal = cast(Ops[1])->getZExtValue() >> 3; + + // If psrldq is shifting the vector more than 15 bytes, emit zero. + if (shiftVal >= 16) + return llvm::Constant::getNullValue(ConvertType(E->getType())); + + SmallVector Indices; + // 256-bit psrldq operates on 128-bit lanes so we need to handle that + for (unsigned l = 0; l != 32; l += 16) { + for (unsigned i = 0; i != 16; ++i) { + unsigned Idx = i + shiftVal; + if (Idx >= 16) Idx += 16; // end of lane, switch operand. + Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); + } + } + + llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); + Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); + Value *Zero = llvm::Constant::getNullValue(VecTy); + + Value *SV = llvm::ConstantVector::get(Indices); + SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq"); + llvm::Type *ResultType = ConvertType(E->getType()); + return Builder.CreateBitCast(SV, ResultType, "cast"); + } case X86::BI__builtin_ia32_movntps: case X86::BI__builtin_ia32_movntps256: case X86::BI__builtin_ia32_movntpd: diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c index 04825ffa2f..a30f590ad1 100644 --- a/test/CodeGen/avx2-builtins.c +++ b/test/CodeGen/avx2-builtins.c @@ -462,7 +462,7 @@ __m256i test_mm256_sign_epi32(__m256i a, __m256i b) { } __m256i test_mm256_slli_si256(__m256i a) { - // CHECK: @llvm.x86.avx2.psll.dq + // CHECK: shufflevector <32 x i8> zeroinitializer, <32 x i8> %{{.*}}, <32 x i32> return _mm256_slli_si256(a, 3); } @@ -517,7 +517,7 @@ __m256i test_mm256_sra_epi32(__m256i a, __m128i b) { } __m256i test_mm256_srli_si256(__m256i a) { - // CHECK: @llvm.x86.avx2.psrl.dq + // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x i32> return _mm256_srli_si256(a, 3); }