BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "")
BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "")
-BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIc", "")
BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "")
BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "")
BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "")
BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "")
BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "")
-BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIc", "")
BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "")
BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "")
BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "")
BUILTIN(__builtin_ia32_vbroadcastss_ps256, "V8fV4f", "")
BUILTIN(__builtin_ia32_vbroadcastsd_pd256, "V4dV2d", "")
BUILTIN(__builtin_ia32_vbroadcastsi256, "V4LLiV2LLi", "")
-BUILTIN(__builtin_ia32_pblendd128, "V4iV4iV4iIc", "")
-BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIc", "")
BUILTIN(__builtin_ia32_pbroadcastb256, "V32cV16c", "")
BUILTIN(__builtin_ia32_pbroadcastw256, "V16sV8s", "")
BUILTIN(__builtin_ia32_pbroadcastd256, "V8iV4i", "")
tmp_V1LLi = __builtin_ia32_palignr(tmp_V1LLi, tmp_V1LLi, imm_i);
#ifdef USE_SSE4
tmp_V16c = __builtin_ia32_pblendvb128(tmp_V16c, tmp_V16c, tmp_V16c);
- tmp_V8s = __builtin_ia32_pblendw128(tmp_V8s, tmp_V8s, imm_i_0_256);
tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d);
tmp_V4f = __builtin_ia32_blendvps(tmp_V4f, tmp_V4f, tmp_V4f);
tmp_V8s = __builtin_ia32_packusdw128(tmp_V4i, tmp_V4i);