From: Craig Topper Date: Sat, 11 Feb 2017 06:24:03 +0000 (+0000) Subject: [AVX-512] Add VPSADBW instructions to load folding tables. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7334434419ad3de2b3105f93e557a5e2819a9f5d;p=llvm [AVX-512] Add VPSADBW instructions to load folding tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294827 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index f6fac236368..639838c20b7 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1940,6 +1940,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, { X86::VPORDZrr, X86::VPORDZrm, 0 }, { X86::VPORQZrr, X86::VPORQZrm, 0 }, + { X86::VPSADBWZ512rr, X86::VPSADBWZ512rm, 0 }, { X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 }, { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 }, { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 }, @@ -2133,6 +2134,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPORDZ256rr, X86::VPORDZ256rm, 0 }, { X86::VPORQZ128rr, X86::VPORQZ128rm, 0 }, { X86::VPORQZ256rr, X86::VPORQZ256rm, 0 }, + { X86::VPSADBWZ128rr, X86::VPSADBWZ128rm, 0 }, + { X86::VPSADBWZ256rr, X86::VPSADBWZ256rm, 0 }, { X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 }, { X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 }, { X86::VPSLLDZ128rr, X86::VPSLLDZ128rm, 0 }, diff --git a/test/CodeGen/X86/stack-folding-int-avx512.ll b/test/CodeGen/X86/stack-folding-int-avx512.ll index ef390ff8dfa..ae56a9a8750 100644 --- a/test/CodeGen/X86/stack-folding-int-avx512.ll +++ b/test/CodeGen/X86/stack-folding-int-avx512.ll @@ -928,6 +928,15 @@ define <8 x i64> @stack_fold_pmovzxwq_maskz_zmm(<8 x i16> %a0, i8 %mask) { ret <8 x i64> %4 } +define <8 x i64> @stack_fold_psadbw(<64 x i8> %a0, <64 x i8> %a1) { + ;CHECK-LABEL: stack_fold_psadbw + ;CHECK: vpsadbw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %a0, <64 x i8> %a1) + ret <8 x i64> %2 +} +declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) nounwind readnone + define <64 x i8> @stack_fold_pshufb_zmm(<64 x i8> %a0, <64 x i8> %a1) { ;CHECK-LABEL: stack_fold_pshufb_zmm ;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload diff --git a/test/CodeGen/X86/stack-folding-int-avx512vl.ll b/test/CodeGen/X86/stack-folding-int-avx512vl.ll index 9665703b445..84ead8c0bed 100644 --- a/test/CodeGen/X86/stack-folding-int-avx512vl.ll +++ b/test/CodeGen/X86/stack-folding-int-avx512vl.ll @@ -1100,6 +1100,24 @@ define <4 x i64> @stack_fold_pmuludq_ymm_maskz(<8 x i32> %a0, <8 x i32> %a1, i8 ret <4 x i64> %5 } +define <2 x i64> @stack_fold_psadbw(<16 x i8> %a0, <16 x i8> %a1) { + ;CHECK-LABEL: stack_fold_psadbw + ;CHECK: vpsadbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) + ret <2 x i64> %2 +} +declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone + +define <4 x i64> @stack_fold_psadbw_ymm(<32 x i8> %a0, <32 x i8> %a1) { + ;CHECK-LABEL: stack_fold_psadbw_ymm + ;CHECK: vpsadbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) + ret <4 x i64> %2 +} +declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone + define <16 x i8> @stack_fold_pshufb(<16 x i8> %a0, <16 x i8> %a1) { ;CHECK-LABEL: stack_fold_pshufb ;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload