From: Bill Wendling Date: Tue, 28 Sep 2010 01:28:56 +0000 (+0000) Subject: Accidentally committed some temporary changes on my branch when reverting patches. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=28cab383fd9e7647d2186340eca769303cc4fbdb;p=clang Accidentally committed some temporary changes on my branch when reverting patches. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@114936 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 1dd9b4365b..443b7c089f 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1869,33 +1869,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } } -llvm::Value * -CodeGenFunction::BuildVector(const llvm::SmallVectorImpl &Ops) { - assert((Ops.size() & (Ops.size() - 1)) == 0 && - "Not a power-of-two sized vector!"); - bool AllConstants = true; - for (unsigned I = 0, E = Ops.size(); I != E && AllConstants; ++I) - AllConstants &= isa(Ops[I]); - - // If this is a constant vector, create a ConstantVector. - if (AllConstants) { - std::vector CstOps; - for (unsigned I = 0, E = Ops.size(); I != E; ++I) - CstOps.push_back(cast(Ops[I])); - return ConstantVector::get(CstOps); - } - - // Otherwise, insertelement the values to build the vector. - Value *Result = - llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); - - for (unsigned I = 0, E = Ops.size(); I != E; ++I) - Result = Builder.CreateInsertElement(Result, Ops[I], - llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext), I)); - - return Result; -} - Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { @@ -2013,11 +1986,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), name); } - case X86::BI__builtin_ia32_vec_init_v8qi: - case X86::BI__builtin_ia32_vec_init_v4hi: - case X86::BI__builtin_ia32_vec_init_v2si: - return Builder.CreateBitCast(BuildVector(Ops), - llvm::Type::getX86_MMXTy(VMContext)); case X86::BI__builtin_ia32_cmpps: { llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps); return Builder.CreateCall(F, &Ops[0], &Ops[0] + Ops.size(), "cmpps"); @@ -2067,6 +2035,37 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); return Builder.CreateStore(Ops[1], Ops[0]); } + case X86::BI__builtin_ia32_palignr: { + unsigned shiftVal = cast(Ops[2])->getZExtValue(); + + // If palignr is shifting the pair of input vectors less than 9 bytes, + // emit a shuffle instruction. + if (shiftVal <= 8) { + llvm::SmallVector Indices; + for (unsigned i = 0; i != 8; ++i) + Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i)); + + Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); + return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); + } + + // If palignr is shifting the pair of input vectors more than 8 but less + // than 16 bytes, emit a logical right shift of the destination. + if (shiftVal < 16) { + // MMX has these as 1 x i64 vectors for some odd optimization reasons. + const llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1); + + Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); + Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8); + + // create i32 constant + llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q); + return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "palignr"); + } + + // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. + return llvm::Constant::getNullValue(ConvertType(E->getType())); + } case X86::BI__builtin_ia32_palignr128: { unsigned shiftVal = cast(Ops[2])->getZExtValue(); diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 8298443b6c..8dc8ac1e3d 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -536,9 +536,6 @@ public: llvm::BasicBlock *getInvokeDestImpl(); - // Build a vector out of the supplied Values. - llvm::Value *BuildVector(const llvm::SmallVectorImpl &Ops); - public: /// ObjCEHValueStack - Stack of Objective-C exception values, used for /// rethrows. diff --git a/lib/Headers/mmintrin.h b/lib/Headers/mmintrin.h index fefb42fd74..bad9e1c059 100644 --- a/lib/Headers/mmintrin.h +++ b/lib/Headers/mmintrin.h @@ -43,13 +43,14 @@ _mm_empty(void) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cvtsi32_si64(int __i) { - return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); + return (__m64)(__v2si){__i, 0}; } static __inline__ int __attribute__((__always_inline__, __nodebug__)) _mm_cvtsi64_si32(__m64 __m) { - return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); + __v2si __mmx_var2 = (__v2si)__m; + return __mmx_var2[0]; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) @@ -85,55 +86,59 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); + return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 8+4, 5, + 8+5, 6, 8+6, 7, 8+7); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); + return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 4+2, 3, + 4+3); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); + return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 2+1); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); + return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8+0, 1, + 8+1, 2, 8+2, 3, 8+3); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); + return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4+0, 1, + 4+1); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); + return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2+0); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_add_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)((__v8qi)__m1 + (__v8qi)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_add_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)((__v4hi)__m1 + (__v4hi)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_add_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); + return (__m64)((__v2si)__m1 + (__v2si)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) @@ -163,19 +168,19 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_sub_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)((__v8qi)__m1 - (__v8qi)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_sub_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)((__v4hi)__m1 - (__v4hi)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_sub_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); + return (__m64)((__v2si)__m1 - (__v2si)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) @@ -217,7 +222,7 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_mullo_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)((__v4hi)__m1 * (__v4hi)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) @@ -247,13 +252,13 @@ _mm_slli_pi32(__m64 __m, int __count) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_sll_si64(__m64 __m, __m64 __count) { - return (__m64)__builtin_ia32_psllq(__m, __count); + return __builtin_ia32_psllq(__m, __count); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_slli_si64(__m64 __m, int __count) { - return (__m64)__builtin_ia32_psllqi(__m, __count); + return __builtin_ia32_psllqi(__m, __count); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) @@ -313,67 +318,67 @@ _mm_srl_si64(__m64 __m, __m64 __count) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_srli_si64(__m64 __m, int __count) { - return (__m64)__builtin_ia32_psrlqi(__m, __count); + return __builtin_ia32_psrlqi(__m, __count); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_and_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_pand(__m1, __m2); + return __m1 & __m2; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_andnot_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_pandn(__m1, __m2); + return ~__m1 & __m2; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_or_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_por(__m1, __m2); + return __m1 | __m2; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_xor_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_pxor(__m1, __m2); + return __m1 ^ __m2; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)((__v8qi)__m1 == (__v8qi)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)((__v4hi)__m1 == (__v4hi)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); + return (__m64)((__v2si)__m1 == (__v2si)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)((__v8qi)__m1 > (__v8qi)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)((__v4hi)__m1 > (__v4hi)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); + return (__m64)((__v2si)__m1 > (__v2si)__m2); } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) @@ -385,58 +390,57 @@ _mm_setzero_si64(void) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_set_pi32(int __i1, int __i0) { - return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); + return (__m64)(__v2si){ __i0, __i1 }; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) { - return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); + return (__m64)(__v4hi){ __s0, __s1, __s2, __s3 }; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { - return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, - __b4, __b5, __b6, __b7); + return (__m64)(__v8qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7 }; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_set1_pi32(int __i) { - return _mm_set_pi32(__i, __i); + return (__m64)(__v2si){ __i, __i }; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) -_mm_set1_pi16(short __w) +_mm_set1_pi16(short __s) { - return _mm_set_pi16(__w, __w, __w, __w); + return (__m64)(__v4hi){ __s, __s, __s, __s }; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_set1_pi8(char __b) { - return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); + return (__m64)(__v8qi){ __b, __b, __b, __b, __b, __b, __b, __b }; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_setr_pi32(int __i1, int __i0) { - return _mm_set_pi32(__i1, __i0); + return (__m64)(__v2si){ __i1, __i0 }; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) -_mm_setr_pi16(short __w3, short __w2, short __w1, short __w0) +_mm_setr_pi16(short __s3, short __s2, short __s1, short __s0) { - return _mm_set_pi16(__w3, __w2, __w1, __w0); + return (__m64)(__v4hi){ __s3, __s2, __s1, __s0 }; } static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) _mm_setr_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { - return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); + return (__m64)(__v8qi){ __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0 }; }