From: Craig Topper Date: Sun, 24 Sep 2017 17:28:14 +0000 (+0000) Subject: [X86] Add IFMA instructions to the load folding tables and make them commutable for... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f878b2615510b28751030dbc6b88a57ac814f08d;p=llvm [X86] Add IFMA instructions to the load folding tables and make them commutable for the multiply operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314080 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 1155b6dcb49..a8b7c80cdab 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -6484,7 +6484,7 @@ multiclass avx512_pmadd52_rm opc, string OpcodeStr, SDNode OpNode, defm r: AVX512_maskable_3src, + (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3)), 1, 1>, AVX512FMA3Base; defm m: AVX512_maskable_3src@test_int_x86_avx512_maskz_vpmadd52l_uq_512(<8 x i64> %x0, <8 x %res6 = add <8 x i64> %res5, %res4 ret <8 x i64> %res6 } + +define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr) { +; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 +; CHECK-NEXT: retq + + %x2 = load <8 x i64>, <8 x i64>* %x2ptr + %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2) { +; CHECK-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 +; CHECK-NEXT: retq + + %x1 = load <8 x i64>, <8 x i64>* %x1ptr + %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + + %x2 = load <8 x i64>, <8 x i64>* %x2ptr + %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} +; CHECK-NEXT: retq + + %x1 = load <8 x i64>, <8 x i64>* %x1ptr + %res = call <8 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2ptr, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + + %x2 = load <8 x i64>, <8 x i64>* %x2ptr + %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute(<8 x i64> %x0, <8 x i64>* %x1ptr, <8 x i64> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmadd52huq (%rdi), %zmm1, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + + %x1 = load <8 x i64>, <8 x i64>* %x1ptr + %res = call <8 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +}