Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
}
+ // These exist so that the builtin that takes an immediate can be bounds
+ // checked by clang to avoid passing bad immediates to the backend. Since
+ // AVX has a larger immediate than SSE we would need separate builtins to
+ // do the different bounds checking. Rather than create a clang specific
+ // SSE only builtin, this implements eight separate builtins to match gcc
+ // implementation.
+ auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
+ Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
+ llvm::Function *F = CGM.getIntrinsic(ID);
+ return Builder.CreateCall(F, Ops);
+ };
+
+ // For the vector forms of FP comparisons, translate the builtins directly to
+ // IR.
+ // TODO: The builtins could be removed if the SSE header files used vector
+ // extension comparisons directly (vector ordered/unordered may need
+ // additional support via __builtin_isnan()).
+ llvm::VectorType *V2F64 =
+ llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2);
+ llvm::VectorType *V4F32 =
+ llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4);
+
+ auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred,
+ llvm::VectorType *FPVecTy) {
+ Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+ llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
+ Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
+ return Builder.CreateBitCast(Sext, FPVecTy);
+ };
+
switch (BuiltinID) {
default: return nullptr;
case X86::BI__builtin_cpu_supports: {
Ops[0]);
return Builder.CreateExtractValue(Call, 1);
}
- // SSE comparison intrisics
+
+ // SSE packed comparison intrinsics
case X86::BI__builtin_ia32_cmpeqps:
+ return getVectorFCmpIR(CmpInst::FCMP_OEQ, V4F32);
case X86::BI__builtin_ia32_cmpltps:
+ return getVectorFCmpIR(CmpInst::FCMP_OLT, V4F32);
case X86::BI__builtin_ia32_cmpleps:
+ return getVectorFCmpIR(CmpInst::FCMP_OLE, V4F32);
case X86::BI__builtin_ia32_cmpunordps:
+ return getVectorFCmpIR(CmpInst::FCMP_UNO, V4F32);
case X86::BI__builtin_ia32_cmpneqps:
+ return getVectorFCmpIR(CmpInst::FCMP_UNE, V4F32);
case X86::BI__builtin_ia32_cmpnltps:
+ return getVectorFCmpIR(CmpInst::FCMP_UGE, V4F32);
case X86::BI__builtin_ia32_cmpnleps:
+ return getVectorFCmpIR(CmpInst::FCMP_UGT, V4F32);
case X86::BI__builtin_ia32_cmpordps:
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpordss:
+ return getVectorFCmpIR(CmpInst::FCMP_ORD, V4F32);
case X86::BI__builtin_ia32_cmpeqpd:
+ return getVectorFCmpIR(CmpInst::FCMP_OEQ, V2F64);
case X86::BI__builtin_ia32_cmpltpd:
+ return getVectorFCmpIR(CmpInst::FCMP_OLT, V2F64);
case X86::BI__builtin_ia32_cmplepd:
+ return getVectorFCmpIR(CmpInst::FCMP_OLE, V2F64);
case X86::BI__builtin_ia32_cmpunordpd:
+ return getVectorFCmpIR(CmpInst::FCMP_UNO, V2F64);
case X86::BI__builtin_ia32_cmpneqpd:
+ return getVectorFCmpIR(CmpInst::FCMP_UNE, V2F64);
case X86::BI__builtin_ia32_cmpnltpd:
+ return getVectorFCmpIR(CmpInst::FCMP_UGE, V2F64);
case X86::BI__builtin_ia32_cmpnlepd:
+ return getVectorFCmpIR(CmpInst::FCMP_UGT, V2F64);
case X86::BI__builtin_ia32_cmpordpd:
+ return getVectorFCmpIR(CmpInst::FCMP_ORD, V2F64);
+
+ // SSE scalar comparison intrinsics
+ case X86::BI__builtin_ia32_cmpeqss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
+ case X86::BI__builtin_ia32_cmpltss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
+ case X86::BI__builtin_ia32_cmpless:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
+ case X86::BI__builtin_ia32_cmpunordss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
+ case X86::BI__builtin_ia32_cmpneqss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
+ case X86::BI__builtin_ia32_cmpnltss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
+ case X86::BI__builtin_ia32_cmpnless:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
+ case X86::BI__builtin_ia32_cmpordss:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
case X86::BI__builtin_ia32_cmpeqsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
case X86::BI__builtin_ia32_cmpltsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
case X86::BI__builtin_ia32_cmplesd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
case X86::BI__builtin_ia32_cmpunordsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
case X86::BI__builtin_ia32_cmpneqsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
case X86::BI__builtin_ia32_cmpnltsd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
case X86::BI__builtin_ia32_cmpnlesd:
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
case X86::BI__builtin_ia32_cmpordsd:
- // These exist so that the builtin that takes an immediate can be bounds
- // checked by clang to avoid passing bad immediates to the backend. Since
- // AVX has a larger immediate than SSE we would need separate builtins to
- // do the different bounds checking. Rather than create a clang specific
- // SSE only builtin, this implements eight separate builtins to match gcc
- // implementation.
-
- // Choose the immediate.
- unsigned Imm;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpeqpd:
- case X86::BI__builtin_ia32_cmpeqsd:
- Imm = 0;
- break;
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpltpd:
- case X86::BI__builtin_ia32_cmpltsd:
- Imm = 1;
- break;
- case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmplepd:
- case X86::BI__builtin_ia32_cmplesd:
- Imm = 2;
- break;
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpunordpd:
- case X86::BI__builtin_ia32_cmpunordsd:
- Imm = 3;
- break;
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpneqpd:
- case X86::BI__builtin_ia32_cmpneqsd:
- Imm = 4;
- break;
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnltpd:
- case X86::BI__builtin_ia32_cmpnltsd:
- Imm = 5;
- break;
- case X86::BI__builtin_ia32_cmpnleps:
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpnlepd:
- case X86::BI__builtin_ia32_cmpnlesd:
- Imm = 6;
- break;
- case X86::BI__builtin_ia32_cmpordps:
- case X86::BI__builtin_ia32_cmpordss:
- case X86::BI__builtin_ia32_cmpordpd:
- case X86::BI__builtin_ia32_cmpordsd:
- Imm = 7;
- break;
- }
-
- // Choose the intrinsic ID.
- const char *name;
- Intrinsic::ID ID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpeqps:
- case X86::BI__builtin_ia32_cmpltps:
- case X86::BI__builtin_ia32_cmpleps:
- case X86::BI__builtin_ia32_cmpunordps:
- case X86::BI__builtin_ia32_cmpneqps:
- case X86::BI__builtin_ia32_cmpnltps:
- case X86::BI__builtin_ia32_cmpnleps:
- case X86::BI__builtin_ia32_cmpordps:
- name = "cmpps";
- ID = Intrinsic::x86_sse_cmp_ps;
- break;
- case X86::BI__builtin_ia32_cmpeqss:
- case X86::BI__builtin_ia32_cmpltss:
- case X86::BI__builtin_ia32_cmpless:
- case X86::BI__builtin_ia32_cmpunordss:
- case X86::BI__builtin_ia32_cmpneqss:
- case X86::BI__builtin_ia32_cmpnltss:
- case X86::BI__builtin_ia32_cmpnless:
- case X86::BI__builtin_ia32_cmpordss:
- name = "cmpss";
- ID = Intrinsic::x86_sse_cmp_ss;
- break;
- case X86::BI__builtin_ia32_cmpeqpd:
- case X86::BI__builtin_ia32_cmpltpd:
- case X86::BI__builtin_ia32_cmplepd:
- case X86::BI__builtin_ia32_cmpunordpd:
- case X86::BI__builtin_ia32_cmpneqpd:
- case X86::BI__builtin_ia32_cmpnltpd:
- case X86::BI__builtin_ia32_cmpnlepd:
- case X86::BI__builtin_ia32_cmpordpd:
- name = "cmppd";
- ID = Intrinsic::x86_sse2_cmp_pd;
- break;
- case X86::BI__builtin_ia32_cmpeqsd:
- case X86::BI__builtin_ia32_cmpltsd:
- case X86::BI__builtin_ia32_cmplesd:
- case X86::BI__builtin_ia32_cmpunordsd:
- case X86::BI__builtin_ia32_cmpneqsd:
- case X86::BI__builtin_ia32_cmpnltsd:
- case X86::BI__builtin_ia32_cmpnlesd:
- case X86::BI__builtin_ia32_cmpordsd:
- name = "cmpsd";
- ID = Intrinsic::x86_sse2_cmp_sd;
- break;
- }
-
- Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
- llvm::Function *F = CGM.getIntrinsic(ID);
- return Builder.CreateCall(F, Ops, name);
+ return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
}
}
__m128d test_mm_i32gather_pd(double const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
// CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
return _mm_i32gather_pd(b, c, 2);
}
__m128 test_mm_i32gather_ps(float const *b, __m128i c) {
// CHECK-LABEL: test_mm_i32gather_ps
- // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
// CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
return _mm_i32gather_ps(b, c, 2);
}
__m128d test_mm_i64gather_pd(double const *b, __m128i c) {
// CHECK-LABEL: test_mm_i64gather_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
// CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> undef, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
return _mm_i64gather_pd(b, c, 2);
}
__m128 test_mm_i64gather_ps(float const *b, __m128i c) {
// CHECK-LABEL: test_mm_i64gather_ps
- // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
// CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> undef, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
return _mm_i64gather_ps(b, c, 2);
}
__m128 test_mm256_i64gather_ps(float const *b, __m256i c) {
// CHECK-LABEL: test_mm256_i64gather_ps
- // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
// CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
return _mm256_i64gather_ps(b, c, 2);
}
__m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpeq_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpeq_ps(__a, __b);
}
__m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpge_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+ // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpge_ps(__a, __b);
}
__m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpgt_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+ // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpgt_ps(__a, __b);
}
__m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmple_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+ // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmple_ps(__a, __b);
}
__m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmplt_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
+ // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmplt_ps(__a, __b);
}
__m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpneq_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
+ // CHECK: [[CMP:%.*]] = fcmp une <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpneq_ps(__a, __b);
}
__m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpnge_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+ // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpnge_ps(__a, __b);
}
__m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpngt_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+ // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpngt_ps(__a, __b);
}
__m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpnle_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
+ // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpnle_ps(__a, __b);
}
__m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpnlt_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
+ // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpnlt_ps(__a, __b);
}
__m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpord_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
+ // CHECK: [[CMP:%.*]] = fcmp ord <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpord_ps(__a, __b);
}
__m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
// CHECK-LABEL: @test_mm_cmpunord_ps
- // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
+ // CHECK: [[CMP:%.*]] = fcmp uno <4 x float>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+ // CHECK-NEXT: ret <4 x float> [[BC]]
return _mm_cmpunord_ps(__a, __b);
}
__m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpeq_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+ // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpeq_pd(A, B);
}
__m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpge_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+ // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpge_pd(A, B);
}
__m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpgt_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+ // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpgt_pd(A, B);
}
__m128d test_mm_cmple_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmple_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+ // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmple_pd(A, B);
}
__m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmplt_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+ // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmplt_pd(A, B);
}
__m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpneq_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
+ // CHECK: [[CMP:%.*]] = fcmp une <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpneq_pd(A, B);
}
__m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnge_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+ // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpnge_pd(A, B);
}
__m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpngt_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+ // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpngt_pd(A, B);
}
__m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnle_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+ // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpnle_pd(A, B);
}
__m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpnlt_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+ // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpnlt_pd(A, B);
}
__m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpord_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
+ // CHECK: [[CMP:%.*]] = fcmp ord <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpord_pd(A, B);
}
__m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_cmpunord_pd
- // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
+ // CHECK: [[CMP:%.*]] = fcmp uno <2 x double>
+ // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+ // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+ // CHECK-NEXT: ret <2 x double> [[BC]]
return _mm_cmpunord_pd(A, B);
}