From: Simon Pilgrim Date: Sun, 20 Jan 2019 16:40:44 +0000 (+0000) Subject: [X86] Replace VPCOM/VPCOMU with generic integer comparisons (llvm) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=dab40ce6097a6e1809045c008fbc860ceeb54e11;p=llvm [X86] Replace VPCOM/VPCOMU with generic integer comparisons (llvm) These intrinsics can always be replaced with generic integer comparisons without any regression in codegen, even for -O0/-fast-isel cases. Noticed while cleaning up vector integer comparison costs for PR40376. A future commit will remove/autoupgrade the existing VPCOM/VPCOMU llvm intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351688 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index d0617820aa7..ea0b03b68a7 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -1908,28 +1908,28 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_xop_vpcomb : GCCBuiltin<"__builtin_ia32_vpcomb">, + def int_x86_xop_vpcomb : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomw : GCCBuiltin<"__builtin_ia32_vpcomw">, + def int_x86_xop_vpcomw : Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomd : GCCBuiltin<"__builtin_ia32_vpcomd">, + def int_x86_xop_vpcomd : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomq : GCCBuiltin<"__builtin_ia32_vpcomq">, + def int_x86_xop_vpcomq : Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomub : GCCBuiltin<"__builtin_ia32_vpcomub">, + def int_x86_xop_vpcomub : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomuw : GCCBuiltin<"__builtin_ia32_vpcomuw">, + def int_x86_xop_vpcomuw : Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomud : GCCBuiltin<"__builtin_ia32_vpcomud">, + def int_x86_xop_vpcomud : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vpcomuq : GCCBuiltin<"__builtin_ia32_vpcomuq">, + def int_x86_xop_vpcomuq : Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; diff --git a/test/CodeGen/X86/xop-intrinsics-fast-isel.ll b/test/CodeGen/X86/xop-intrinsics-fast-isel.ll index 43f59708db5..037c8f123aa 100644 --- a/test/CodeGen/X86/xop-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/xop-intrinsics-fast-isel.ll @@ -577,11 +577,11 @@ define <2 x i64> @test_mm_com_epu8(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> - %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %arg0, <16 x i8> %arg1, i8 0) + %cmp = icmp ult <16 x i8> %arg0, %arg1 + %res = sext <16 x i1> %cmp to <16 x i8> %bc = bitcast <16 x i8> %res to <2 x i64> ret <2 x i64> %bc } -declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone define <2 x i64> @test_mm_com_epu16(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-LABEL: test_mm_com_epu16: @@ -590,11 +590,11 @@ define <2 x i64> @test_mm_com_epu16(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> - %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0) + %cmp = icmp ult <8 x i16> %arg0, %arg1 + %res = sext <8 x i1> %cmp to <8 x i16> %bc = bitcast <8 x i16> %res to <2 x i64> ret <2 x i64> %bc } -declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone define <2 x i64> @test_mm_com_epu32(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-LABEL: test_mm_com_epu32: @@ -603,21 +603,21 @@ define <2 x i64> @test_mm_com_epu32(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> - %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %arg0, <4 x i32> %arg1, i8 0) + %cmp = icmp ult <4 x i32> %arg0, %arg1 + %res = sext <4 x i1> %cmp to <4 x i32> %bc = bitcast <4 x i32> %res to <2 x i64> ret <2 x i64> %bc } -declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone define <2 x i64> @test_mm_com_epu64(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-LABEL: test_mm_com_epu64: ; ALL: # %bb.0: ; ALL-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0 ; ALL-NEXT: ret{{[l|q]}} - %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) + %cmp = icmp ult <2 x i64> %a0, %a1 + %res = sext <2 x i1> %cmp to <2 x i64> ret <2 x i64> %res } -declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone define <2 x i64> @test_mm_com_epi8(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-LABEL: test_mm_com_epi8: @@ -626,11 +626,11 @@ define <2 x i64> @test_mm_com_epi8(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> - %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %arg0, <16 x i8> %arg1, i8 0) + %cmp = icmp slt <16 x i8> %arg0, %arg1 + %res = sext <16 x i1> %cmp to <16 x i8> %bc = bitcast <16 x i8> %res to <2 x i64> ret <2 x i64> %bc } -declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone define <2 x i64> @test_mm_com_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-LABEL: test_mm_com_epi16: @@ -639,11 +639,11 @@ define <2 x i64> @test_mm_com_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> - %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0) + %cmp = icmp slt <8 x i16> %arg0, %arg1 + %res = sext <8 x i1> %cmp to <8 x i16> %bc = bitcast <8 x i16> %res to <2 x i64> ret <2 x i64> %bc } -declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone define <2 x i64> @test_mm_com_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-LABEL: test_mm_com_epi32: @@ -652,21 +652,21 @@ define <2 x i64> @test_mm_com_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> - %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %arg0, <4 x i32> %arg1, i8 0) + %cmp = icmp slt <4 x i32> %arg0, %arg1 + %res = sext <4 x i1> %cmp to <4 x i32> %bc = bitcast <4 x i32> %res to <2 x i64> ret <2 x i64> %bc } -declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone define <2 x i64> @test_mm_com_epi64(<2 x i64> %a0, <2 x i64> %a1) { ; ALL-LABEL: test_mm_com_epi64: ; ALL: # %bb.0: ; ALL-NEXT: vpcomltq %xmm1, %xmm0, %xmm0 ; ALL-NEXT: ret{{[l|q]}} - %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) + %cmp = icmp slt <2 x i64> %a0, %a1 + %res = sext <2 x i1> %cmp to <2 x i64> ret <2 x i64> %res } -declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone define <2 x double> @test_mm_permute2_pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) { ; ALL-LABEL: test_mm_permute2_pd: