From: Craig Topper Date: Sat, 26 Nov 2016 07:21:00 +0000 (+0000) Subject: [AVX-512] Add masked 512-bit integer add/sub instructions to load folding tables. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=73b5f3ece91612b065317131f5384d71913281b6;p=llvm [AVX-512] Add masked 512-bit integer add/sub instructions to load folding tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287972 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index fd145870eab..91ea11d37ee 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2141,12 +2141,28 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 }, { X86::VORPDZrrkz, X86::VORPDZrmkz, 0 }, { X86::VORPSZrrkz, X86::VORPSZrmkz, 0 }, + { X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 }, + { X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 }, + { X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 }, + { X86::VPADDSBZrrkz, X86::VPADDSBZrmkz, 0 }, + { X86::VPADDSWZrrkz, X86::VPADDSWZrmkz, 0 }, + { X86::VPADDUSBZrrkz, X86::VPADDUSBZrmkz, 0 }, + { X86::VPADDUSWZrrkz, X86::VPADDUSWZrmkz, 0 }, + { X86::VPADDWZrrkz, X86::VPADDWZrmkz, 0 }, { X86::VPANDDZrrkz, X86::VPANDDZrmkz, 0 }, { X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 }, { X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 }, { X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 }, { X86::VPORDZrrkz, X86::VPORDZrmkz, 0 }, { X86::VPORQZrrkz, X86::VPORQZrmkz, 0 }, + { X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 }, + { X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 }, + { X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 }, + { X86::VPSUBSBZrrkz, X86::VPSUBSBZrmkz, 0 }, + { X86::VPSUBSWZrrkz, X86::VPSUBSWZrmkz, 0 }, + { X86::VPSUBUSBZrrkz, X86::VPSUBUSBZrmkz, 0 }, + { X86::VPSUBUSWZrrkz, X86::VPSUBUSWZrmkz, 0 }, + { X86::VPSUBWZrrkz, X86::VPSUBWZrmkz, 0 }, { X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 }, { X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 }, { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 }, @@ -2259,12 +2275,27 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 }, { X86::VORPDZrrk, X86::VORPDZrmk, 0 }, { X86::VORPSZrrk, X86::VORPSZrmk, 0 }, + { X86::VPADDBZrrk, X86::VPADDBZrmk, 0 }, + { X86::VPADDDZrrk, X86::VPADDDZrmk, 0 }, + { X86::VPADDQZrrk, X86::VPADDQZrmk, 0 }, + { X86::VPADDSBZrrk, X86::VPADDSBZrmk, 0 }, + { X86::VPADDSWZrrk, X86::VPADDSWZrmk, 0 }, + { X86::VPADDUSBZrrk, X86::VPADDUSBZrmk, 0 }, + { X86::VPADDUSWZrrk, X86::VPADDUSWZrmk, 0 }, + { X86::VPADDWZrrk, X86::VPADDWZrmk, 0 }, { X86::VPANDDZrrk, X86::VPANDDZrmk, 0 }, { X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 }, { X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 }, { X86::VPANDQZrrk, X86::VPANDQZrmk, 0 }, { X86::VPORDZrrk, X86::VPORDZrmk, 0 }, { X86::VPORQZrrk, X86::VPORQZrmk, 0 }, + { X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 }, + { X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 }, + { X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 }, + { X86::VPSUBSBZrrk, X86::VPSUBSBZrmk, 0 }, + { X86::VPSUBSWZrrk, X86::VPSUBSWZrmk, 0 }, + { X86::VPSUBUSBZrrk, X86::VPSUBUSBZrmk, 0 }, + { X86::VPSUBUSWZrrk, X86::VPSUBUSWZrmk, 0 }, { X86::VPTERNLOGDZrrik, X86::VPTERNLOGDZrmik, 0 }, { X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 }, { X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmik, 0 }, diff --git a/test/CodeGen/X86/stack-folding-int-avx512.ll b/test/CodeGen/X86/stack-folding-int-avx512.ll index 289ecef2cf1..21d6a65bffc 100644 --- a/test/CodeGen/X86/stack-folding-int-avx512.ll +++ b/test/CodeGen/X86/stack-folding-int-avx512.ll @@ -16,6 +16,28 @@ define <64 x i8> @stack_fold_paddb(<64 x i8> %a0, <64 x i8> %a1) { ret <64 x i8> %2 } +define <64 x i8> @stack_fold_paddb_mask(<64 x i8> %a0, <64 x i8> %a1, <64 x i8>* %a2, i64 %mask) { + ;CHECK-LABEL: stack_fold_paddb_mask + ;CHECK: vpaddb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm0},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = add <64 x i8> %a0, %a1 + %3 = bitcast i64 %mask to <64 x i1> + ; load needed to keep the operation from being scheduled about the asm block + %4 = load <64 x i8>, <64 x i8>* %a2 + %5 = select <64 x i1> %3, <64 x i8> %2, <64 x i8> %4 + ret <64 x i8> %5 +} + +define <64 x i8> @stack_fold_paddb_maskz(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { + ;CHECK-LABEL: stack_fold_paddb_maskz + ;CHECK: vpaddb {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = add <64 x i8> %a0, %a1 + %3 = bitcast i64 %mask to <64 x i1> + %4 = select <64 x i1> %3, <64 x i8> %2, <64 x i8> zeroinitializer + ret <64 x i8> %4 +} + define <16 x i32> @stack_fold_paddd(<16 x i32> %a0, <16 x i32> %a1) { ;CHECK-LABEL: stack_fold_paddd ;CHECK: vpaddd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload