TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "LLiV2LLiIi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4si, "iV4iIi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4sf, "fV4fIi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v8hi, "sV8sIi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v8hi, "V8sV8ssIi", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "nc", "sse3")
TARGET_BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "nc", "sse3")
TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "nc", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "nc", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v16qi, "V16cV16ccIi", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4si, "V4iV4iiIi", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2LLiV2LLiLLiIi", "nc", "sse4.1")
// SSE 4.2
TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "nc", "sse4.2")
TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "n", "avx")
TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "n", "avx")
TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "n", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v8si, "iV8iIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "LLiV4LLiIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v32qi, "V32cV32ccIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v16hi, "V16sV16ssIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4LLiV4LLiLLiIi", "nc", "avx")
// AVX2
TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "nc", "avx2")
return Builder.CreateBitCast(BuildVector(Ops),
llvm::Type::getX86_MMXTy(getLLVMContext()));
case X86::BI__builtin_ia32_vec_ext_v2si:
- return Builder.CreateExtractElement(Ops[0],
- cast<ConstantInt>(Ops[1])->getZExtValue());
+ case X86::BI__builtin_ia32_vec_ext_v16qi:
+ case X86::BI__builtin_ia32_vec_ext_v8hi:
+ case X86::BI__builtin_ia32_vec_ext_v4si:
+ case X86::BI__builtin_ia32_vec_ext_v4sf:
+ case X86::BI__builtin_ia32_vec_ext_v2di:
+ case X86::BI__builtin_ia32_vec_ext_v32qi:
+ case X86::BI__builtin_ia32_vec_ext_v16hi:
+ case X86::BI__builtin_ia32_vec_ext_v8si:
+ case X86::BI__builtin_ia32_vec_ext_v4di:
+ // These builtins exist so we can ensure the index is an ICE and in range.
+ // Otherwise we could just do this in the header file.
+ return Builder.CreateExtractElement(Ops[0], Ops[1]);
+ case X86::BI__builtin_ia32_vec_set_v16qi:
+ case X86::BI__builtin_ia32_vec_set_v8hi:
+ case X86::BI__builtin_ia32_vec_set_v4si:
+ case X86::BI__builtin_ia32_vec_set_v2di:
+ case X86::BI__builtin_ia32_vec_set_v32qi:
+ case X86::BI__builtin_ia32_vec_set_v16hi:
+ case X86::BI__builtin_ia32_vec_set_v8si:
+ case X86::BI__builtin_ia32_vec_set_v4di:
+ // These builtins exist so we can ensure the index is an ICE and in range.
+ // Otherwise we could just do this in the header file.
+ return Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]);
case X86::BI_mm_setcsr:
case X86::BI__builtin_ia32_ldmxcsr: {
Address Tmp = CreateMemTemp(E->getArg(0)->getType());
/// element is extracted and returned.
/// \returns A 32-bit integer containing the extracted 32 bits of extended
/// packed data.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_extract_epi32(__m256i __a, const int __imm)
-{
- __v8si __b = (__v8si)__a;
- return __b[__imm & 7];
-}
+#define _mm256_extract_epi32(X, N) \
+ (int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))
/// Takes a [16 x i16] vector and returns the vector element value
/// indexed by the immediate constant operand.
/// element is extracted and returned.
/// \returns A 32-bit integer containing the extracted 16 bits of zero extended
/// packed data.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_extract_epi16(__m256i __a, const int __imm)
-{
- __v16hi __b = (__v16hi)__a;
- return (unsigned short)__b[__imm & 15];
-}
+#define _mm256_extract_epi16(X, N) \
+ (int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \
+ (int)(N))
/// Takes a [32 x i8] vector and returns the vector element value
/// indexed by the immediate constant operand.
/// element is extracted and returned.
/// \returns A 32-bit integer containing the extracted 8 bits of zero extended
/// packed data.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_extract_epi8(__m256i __a, const int __imm)
-{
- __v32qi __b = (__v32qi)__a;
- return (unsigned char)__b[__imm & 31];
-}
+#define _mm256_extract_epi8(X, N) \
+ (int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \
+ (int)(N))
#ifdef __x86_64__
/// Takes a [4 x i64] vector and returns the vector element value
/// element is extracted and returned.
/// \returns A 64-bit integer containing the extracted 64 bits of extended
/// packed data.
-static __inline long long __DEFAULT_FN_ATTRS
-_mm256_extract_epi64(__m256i __a, const int __imm)
-{
- __v4di __b = (__v4di)__a;
- return __b[__imm & 3];
-}
+#define _mm256_extract_epi64(X, N) \
+ (long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N))
#endif
/// Takes a [8 x i32] vector and replaces the vector element value
/// replaced.
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_insert_epi32(__m256i __a, int __b, int const __imm)
-{
- __v8si __c = (__v8si)__a;
- __c[__imm & 7] = __b;
- return (__m256i)__c;
-}
+#define _mm256_insert_epi32(X, I, N) \
+ (__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \
+ (int)(I), (int)(N))
/// Takes a [16 x i16] vector and replaces the vector element value
/// replaced.
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_insert_epi16(__m256i __a, int __b, int const __imm)
-{
- __v16hi __c = (__v16hi)__a;
- __c[__imm & 15] = __b;
- return (__m256i)__c;
-}
+#define _mm256_insert_epi16(X, I, N) \
+ (__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \
+ (int)(I), (int)(N))
/// Takes a [32 x i8] vector and replaces the vector element value
/// indexed by the immediate constant operand with a new value. Returns the
/// replaced.
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_insert_epi8(__m256i __a, int __b, int const __imm)
-{
- __v32qi __c = (__v32qi)__a;
- __c[__imm & 31] = __b;
- return (__m256i)__c;
-}
+#define _mm256_insert_epi8(X, I, N) \
+ (__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \
+ (int)(I), (int)(N))
#ifdef __x86_64__
/// Takes a [4 x i64] vector and replaces the vector element value
/// replaced.
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
-{
- __v4di __c = (__v4di)__a;
- __c[__imm & 3] = __b;
- return (__m256i)__c;
-}
+#define _mm256_insert_epi64(X, I, N) \
+ (__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \
+ (long long)(I), (int)(N))
#endif
/* Conversion */
/// 111: assign values from bits [127:112] of \a __a.
/// \returns An integer, whose lower 16 bits are selected from the 128-bit
/// integer vector parameter and the remaining bits are assigned zeros.
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm_extract_epi16(__m128i __a, int __imm)
-{
- __v8hi __b = (__v8hi)__a;
- return (unsigned short)__b[__imm & 7];
-}
+#define _mm_extract_epi16(a, imm) \
+ (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
+ (int)(imm))
/// Constructs a 128-bit integer vector by first making a copy of the
/// 128-bit integer vector parameter, and then inserting the lower 16 bits
/// An immediate value specifying the bit offset in the result at which the
/// lower 16 bits of \a __b are written.
/// \returns A 128-bit integer vector containing the constructed values.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_insert_epi16(__m128i __a, int __b, int __imm)
-{
- __v8hi __c = (__v8hi)__a;
- __c[__imm & 7] = __b;
- return (__m128i)__c;
-}
+#define _mm_insert_epi16(a, b, imm) \
+ (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
+ (int)(imm))
/// Copies the values of the most significant bits from each 8-bit
/// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask
/// 11: Bits [127:96] of parameter \a X are returned.
/// \returns A 32-bit integer containing the extracted 32 bits of float data.
#define _mm_extract_ps(X, N) (__extension__ \
- ({ union { int __i; float __f; } __t; \
- __v4sf __a = (__v4sf)(__m128)(X); \
- __t.__f = __a[(N) & 3]; \
- __t.__i;}))
+ ({ union { int __i; float __f; } __t; \
+ __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \
+ __t.__i;}))
/* Miscellaneous insert and extract macros. */
/* Extract a single-precision float from X at index N into D. */
-#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \
- (D) = __a[N]; }))
+#define _MM_EXTRACT_FLOAT(D, X, N) \
+ { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }
/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
an index suitable for _mm_insert_ps. */
/// 1110: Bits [119:112] of the result are used for insertion. \n
/// 1111: Bits [127:120] of the result are used for insertion.
/// \returns A 128-bit integer vector containing the constructed values.
-#define _mm_insert_epi8(X, I, N) (__extension__ \
- ({ __v16qi __a = (__v16qi)(__m128i)(X); \
- __a[(N) & 15] = (I); \
- (__m128i)__a;}))
+#define _mm_insert_epi8(X, I, N) \
+ (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \
+ (int)(I), (int)(N))
/// Constructs a 128-bit vector of [4 x i32] by first making a copy of
/// the 128-bit integer vector parameter, and then inserting the 32-bit
/// 10: Bits [95:64] of the result are used for insertion. \n
/// 11: Bits [127:96] of the result are used for insertion.
/// \returns A 128-bit integer vector containing the constructed values.
-#define _mm_insert_epi32(X, I, N) (__extension__ \
- ({ __v4si __a = (__v4si)(__m128i)(X); \
- __a[(N) & 3] = (I); \
- (__m128i)__a;}))
+#define _mm_insert_epi32(X, I, N) \
+ (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \
+ (int)(I), (int)(N))
#ifdef __x86_64__
/// Constructs a 128-bit vector of [2 x i64] by first making a copy of
/// 0: Bits [63:0] of the result are used for insertion. \n
/// 1: Bits [127:64] of the result are used for insertion. \n
/// \returns A 128-bit integer vector containing the constructed values.
-#define _mm_insert_epi64(X, I, N) (__extension__ \
- ({ __v2di __a = (__v2di)(__m128i)(X); \
- __a[(N) & 1] = (I); \
- (__m128i)__a;}))
+#define _mm_insert_epi64(X, I, N) \
+ (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \
+ (long long)(I), (int)(N))
#endif /* __x86_64__ */
/* Extract int from packed integer array at index. This returns the element
/// \returns An unsigned integer, whose lower 8 bits are selected from the
/// 128-bit integer vector parameter and the remaining bits are assigned
/// zeros.
-#define _mm_extract_epi8(X, N) (__extension__ \
- ({ __v16qi __a = (__v16qi)(__m128i)(X); \
- (int)(unsigned char) __a[(N) & 15];}))
+#define _mm_extract_epi8(X, N) \
+ (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \
+ (int)(N))
/// Extracts a 32-bit element from the 128-bit integer vector of
/// [4 x i32], using the immediate value parameter \a N as a selector.
/// 11: Bits [127:96] of the parameter \a X are exracted.
/// \returns An integer, whose lower 32 bits are selected from the 128-bit
/// integer vector parameter and the remaining bits are assigned zeros.
-#define _mm_extract_epi32(X, N) (__extension__ \
- ({ __v4si __a = (__v4si)(__m128i)(X); \
- (int)__a[(N) & 3];}))
+#define _mm_extract_epi32(X, N) \
+ (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))
#ifdef __x86_64__
/// Extracts a 64-bit element from the 128-bit integer vector of
/// 0: Bits [63:0] are returned. \n
/// 1: Bits [127:64] are returned. \n
/// \returns A 64-bit integer.
-#define _mm_extract_epi64(X, N) (__extension__ \
- ({ __v2di __a = (__v2di)(__m128i)(X); \
- (long long)__a[(N) & 1];}))
+#define _mm_extract_epi64(X, N) \
+ (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))
#endif /* __x86_64 */
/* SSE4 128-bit Packed Integer Comparisons. */
switch (BuiltinID) {
default:
return false;
- case X86::BI_mm_prefetch:
- i = 1; l = 0; u = 7;
- break;
case X86::BI__builtin_ia32_vec_ext_v2si:
+ case X86::BI__builtin_ia32_vec_ext_v2di:
i = 1; l = 0; u = 1;
break;
+ case X86::BI__builtin_ia32_vec_set_v2di:
+ i = 2; l = 0; u = 1;
+ break;
case X86::BI__builtin_ia32_vec_ext_v4hi:
+ case X86::BI__builtin_ia32_vec_ext_v4si:
+ case X86::BI__builtin_ia32_vec_ext_v4sf:
+ case X86::BI__builtin_ia32_vec_ext_v4di:
i = 1; l = 0; u = 3;
break;
+ case X86::BI_mm_prefetch:
+ case X86::BI__builtin_ia32_vec_ext_v8hi:
+ case X86::BI__builtin_ia32_vec_ext_v8si:
+ i = 1; l = 0; u = 7;
+ break;
case X86::BI__builtin_ia32_sha1rnds4:
case X86::BI__builtin_ia32_vec_set_v4hi:
+ case X86::BI__builtin_ia32_vec_set_v4si:
+ case X86::BI__builtin_ia32_vec_set_v4di:
i = 2; l = 0; u = 3;
break;
case X86::BI__builtin_ia32_vpermil2pd:
case X86::BI__builtin_ia32_vpcomw:
case X86::BI__builtin_ia32_vpcomd:
case X86::BI__builtin_ia32_vpcomq:
+ case X86::BI__builtin_ia32_vec_set_v8hi:
+ case X86::BI__builtin_ia32_vec_set_v8si:
i = 2; l = 0; u = 7;
break;
case X86::BI__builtin_ia32_roundps:
case X86::BI__builtin_ia32_roundpd:
case X86::BI__builtin_ia32_roundps256:
case X86::BI__builtin_ia32_roundpd256:
+ case X86::BI__builtin_ia32_vec_ext_v16qi:
+ case X86::BI__builtin_ia32_vec_ext_v16hi:
i = 1; l = 0; u = 15;
break;
case X86::BI__builtin_ia32_roundss:
case X86::BI__builtin_ia32_rangeps512_mask:
case X86::BI__builtin_ia32_getmantsd_round_mask:
case X86::BI__builtin_ia32_getmantss_round_mask:
+ case X86::BI__builtin_ia32_vec_set_v16qi:
+ case X86::BI__builtin_ia32_vec_set_v16hi:
i = 2; l = 0; u = 15;
break;
+ case X86::BI__builtin_ia32_vec_ext_v32qi:
+ i = 1; l = 0; u = 31;
+ break;
case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmpss:
case X86::BI__builtin_ia32_cmppd:
case X86::BI__builtin_ia32_cmppd512_mask:
case X86::BI__builtin_ia32_cmpsd_mask:
case X86::BI__builtin_ia32_cmpss_mask:
+ case X86::BI__builtin_ia32_vec_set_v32qi:
i = 2; l = 0; u = 31;
break;
case X86::BI__builtin_ia32_vcvtps2ph:
int test_mm256_extract_epi8(__m256i A) {
// CHECK-LABEL: test_mm256_extract_epi8
- // CHECK: and i32 %{{.*}}, 31
- // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}}
+ // CHECK: extractelement <32 x i8> %{{.*}}, i32 31
// CHECK: zext i8 %{{.*}} to i32
- return _mm256_extract_epi8(A, 32);
+ return _mm256_extract_epi8(A, 31);
}
int test_mm256_extract_epi16(__m256i A) {
// CHECK-LABEL: test_mm256_extract_epi16
- // CHECK: and i32 %{{.*}}, 15
- // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}}
+ // CHECK: extractelement <16 x i16> %{{.*}}, i32 15
// CHECK: zext i16 %{{.*}} to i32
- return _mm256_extract_epi16(A, 16);
+ return _mm256_extract_epi16(A, 15);
}
int test_mm256_extract_epi32(__m256i A) {
// CHECK-LABEL: test_mm256_extract_epi32
- // CHECK: and i32 %{{.*}}, 7
- // CHECK: extractelement <8 x i32> %{{.*}}, i32 %{{.*}}
- return _mm256_extract_epi32(A, 8);
+ // CHECK: extractelement <8 x i32> %{{.*}}, i32 7
+ return _mm256_extract_epi32(A, 7);
}
long long test_mm256_extract_epi64(__m256i A) {
// CHECK-LABEL: test_mm256_extract_epi64
- // CHECK: and i32 %{{.*}}, 3
- // CHECK: extractelement <4 x i64> %{{.*}}, i32 %{{.*}}
- return _mm256_extract_epi64(A, 5);
+ // CHECK: extractelement <4 x i64> %{{.*}}, i32 3
+ return _mm256_extract_epi64(A, 3);
}
__m128d test_mm256_extractf128_pd(__m256d A) {
__m256i test_mm256_insert_epi8(__m256i x, char b) {
// CHECK-LABEL: test_mm256_insert_epi8
- // CHECK: and i32 %{{.*}}, 31
- // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 %{{.*}}
- return _mm256_insert_epi8(x, b, 17);
+ // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+ return _mm256_insert_epi8(x, b, 14);
}
__m256i test_mm256_insert_epi16(__m256i x, int b) {
// CHECK-LABEL: test_mm256_insert_epi16
- // CHECK: and i32 %{{.*}}, 15
- // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 %{{.*}}
+ // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 4
return _mm256_insert_epi16(x, b, 4);
}
__m256i test_mm256_insert_epi32(__m256i x, int b) {
// CHECK-LABEL: test_mm256_insert_epi32
- // CHECK: and i32 %{{.*}}, 7
- // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}
+ // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 5
return _mm256_insert_epi32(x, b, 5);
}
__m256i test_mm256_insert_epi64(__m256i x, long long b) {
// CHECK-LABEL: test_mm256_insert_epi64
- // CHECK: and i32 %{{.*}}, 3
- // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 %{{.*}}
+ // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 2
return _mm256_insert_epi64(x, b, 2);
}
// Lowering to pextrw requires optimization.
int test_mm_extract_epi16(__m128i A) {
// CHECK-LABEL: test_mm_extract_epi16
- // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
- // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
+ // CHECK: extractelement <8 x i16> %{{.*}}, i32 1
// CHECK: zext i16 %{{.*}} to i32
- return _mm_extract_epi16(A, 9);
+ return _mm_extract_epi16(A, 1);
}
__m128i test_mm_insert_epi16(__m128i A, int B) {
// CHECK-LABEL: test_mm_insert_epi16
- // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
- // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]]
- return _mm_insert_epi16(A, B, 8);
+ // CHECK: insertelement <8 x i16> %{{.*}}, i32 0
+ return _mm_insert_epi16(A, B, 0);
}
void test_mm_lfence() {
__m128i test_mm_insert_epi8(__m128i x, char b) {
// CHECK-LABEL: test_mm_insert_epi8
- // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 0
- return _mm_insert_epi8(x, b, 16);
+ // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+ return _mm_insert_epi8(x, b, 1);
}
__m128i test_mm_insert_epi32(__m128i x, int b) {
// CHECK-LABEL: test_mm_insert_epi32
- // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
- return _mm_insert_epi32(x, b, 4);
+ // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
+ return _mm_insert_epi32(x, b, 1);
}
__m128i test_mm_insert_epi64(__m128i x, long long b) {
// CHECK-LABEL: test_mm_insert_epi64
- // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 0
- return _mm_insert_epi64(x, b, 2);
+ // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
+ return _mm_insert_epi64(x, b, 1);
}
__m128 test_mm_insert_ps(__m128 x, __m128 y) {
#if NEED_SSE42
int baz(__m256i a) {
- return _mm256_extract_epi32(a, 3); // expected-error {{always_inline function '_mm256_extract_epi32' requires target feature 'sse4.2', but would be inlined into function 'baz' that is compiled without support for 'sse4.2'}}
+ return _mm256_extract_epi32(a, 3); // expected-error {{'__builtin_ia32_vec_ext_v8si' needs target feature avx}}
}
#endif
-// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -O1 -target-cpu core2 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -O1 -target-cpu corei7 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
typedef short __v4hi __attribute__ ((__vector_size__ (8)));
void test1() {