BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "")
BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "")
BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "")
-BUILTIN(__builtin_ia32_loadups, "V4ffC*", "")
BUILTIN(__builtin_ia32_storeups, "vf*V4f", "")
BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "")
BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "")
BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "")
BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "")
BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "")
-BUILTIN(__builtin_ia32_loadupd, "V2ddC*", "")
BUILTIN(__builtin_ia32_storeupd, "vd*V2d", "")
BUILTIN(__builtin_ia32_movmskpd, "iV2d", "")
BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "")
BUILTIN(__builtin_ia32_clflush, "vvC*", "")
BUILTIN(__builtin_ia32_lfence, "v", "")
BUILTIN(__builtin_ia32_mfence, "v", "")
-BUILTIN(__builtin_ia32_loaddqu, "V16ccC*", "")
BUILTIN(__builtin_ia32_storedqu, "vc*V16c", "")
BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "")
BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "")
// If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
return llvm::Constant::getNullValue(ConvertType(E->getType()));
}
- case X86::BI__builtin_ia32_loadups:
- case X86::BI__builtin_ia32_loadupd:
- case X86::BI__builtin_ia32_loaddqu: {
- const llvm::Type *VecTy = ConvertType(E->getType());
- const llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), 128);
-
- Value *BC = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(IntTy),
- "cast");
- LoadInst *LI = Builder.CreateLoad(BC);
- LI->setAlignment(1); // Unaligned load.
- return Builder.CreateBitCast(LI, VecTy, "loadu.cast");
- }
case X86::BI__builtin_ia32_movntps:
case X86::BI__builtin_ia32_movntpd:
case X86::BI__builtin_ia32_movntdq:
#endif
tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f);
(void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp);
- tmp_V4f = __builtin_ia32_loadups(tmp_fCp);
(void) __builtin_ia32_storeups(tmp_fp, tmp_V4f);
(void) __builtin_ia32_storehps(tmp_V2ip, tmp_V4f);
(void) __builtin_ia32_storelps(tmp_V2ip, tmp_V4f);
tmp_V4f = __builtin_ia32_sqrtps(tmp_V4f);
tmp_V4f = __builtin_ia32_sqrtss(tmp_V4f);
(void) __builtin_ia32_maskmovdqu(tmp_V16c, tmp_V16c, tmp_cp);
- tmp_V2d = __builtin_ia32_loadupd(tmp_dCp);
(void) __builtin_ia32_storeupd(tmp_dp, tmp_V2d);
tmp_i = __builtin_ia32_movmskpd(tmp_V2d);
tmp_i = __builtin_ia32_pmovmskb128(tmp_V16c);
(void) __builtin_ia32_clflush(tmp_vCp);
(void) __builtin_ia32_lfence();
(void) __builtin_ia32_mfence();
- tmp_V16c = __builtin_ia32_loaddqu(tmp_cCp);
(void) __builtin_ia32_storedqu(tmp_cp, tmp_V16c);
tmp_V4s = __builtin_ia32_psllwi(tmp_V4s, tmp_i);
tmp_V2i = __builtin_ia32_pslldi(tmp_V2i, tmp_i);