From: Craig Topper Date: Sun, 27 Nov 2016 08:55:31 +0000 (+0000) Subject: [AVX-512] Add masked EVEX vpmovzx/sx instructions to load folding tables. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9a7445365cd5d6f0660c55fd955a49c5edb7e9d1;p=llvm [AVX-512] Add masked EVEX vpmovzx/sx instructions to load folding tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287995 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e966e2ae980..7ef7d7657ce 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2028,6 +2028,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VXORPSZ128rr, X86::VXORPSZ128rm, 0 }, { X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 }, + // AVX-512 masked foldable instructions + { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 }, + { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, 0 }, + { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 }, + { X86::VPMOVSXDQZrrkz, X86::VPMOVSXDQZrmkz, 0 }, + { X86::VPMOVSXWDZrrkz, X86::VPMOVSXWDZrmkz, 0 }, + { X86::VPMOVSXWQZrrkz, X86::VPMOVSXWQZrmkz, 0 }, + { X86::VPMOVZXBDZrrkz, X86::VPMOVZXBDZrmkz, 0 }, + { X86::VPMOVZXBQZrrkz, X86::VPMOVZXBQZrmkz, 0 }, + { X86::VPMOVZXBWZrrkz, X86::VPMOVZXBWZrmkz, 0 }, + { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 }, + { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 }, + { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 }, + + // AVX-512VL 256-bit masked foldable instructions + { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, 0 }, + { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, 0 }, + { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 }, + { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz, 0 }, + { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz, 0 }, + { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz, 0 }, + { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz, 0 }, + { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz, 0 }, + { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz, 0 }, + { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz, 0 }, + { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz, 0 }, + { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz, 0 }, + + // AVX-512VL 128-bit masked foldable instructions + { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, 0 }, + { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, 0 }, + { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, 0 }, + { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz, 0 }, + { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz, 0 }, + { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz, 0 }, + { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz, 0 }, + { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz, 0 }, + { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz, 0 }, + { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz, 0 }, + { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz, 0 }, + { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz, 0 }, + // AES foldable instructions { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 }, @@ -2305,6 +2347,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 }, { X86::VXORPDZ128rrkz, X86::VXORPDZ128rmkz, 0 }, { X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 }, + + // AVX-512 masked foldable instructions + { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 }, + { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, 0 }, + { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 }, + { X86::VPMOVSXDQZrrk, X86::VPMOVSXDQZrmk, 0 }, + { X86::VPMOVSXWDZrrk, X86::VPMOVSXWDZrmk, 0 }, + { X86::VPMOVSXWQZrrk, X86::VPMOVSXWQZrmk, 0 }, + { X86::VPMOVZXBDZrrk, X86::VPMOVZXBDZrmk, 0 }, + { X86::VPMOVZXBQZrrk, X86::VPMOVZXBQZrmk, 0 }, + { X86::VPMOVZXBWZrrk, X86::VPMOVZXBWZrmk, 0 }, + { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 }, + { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 }, + { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 }, + + // AVX-512VL 256-bit masked foldable instructions + { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, 0 }, + { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, 0 }, + { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 }, + { X86::VPMOVSXDQZ256rrk, X86::VPMOVSXDQZ256rmk, 0 }, + { X86::VPMOVSXWDZ256rrk, X86::VPMOVSXWDZ256rmk, 0 }, + { X86::VPMOVSXWQZ256rrk, X86::VPMOVSXWQZ256rmk, 0 }, + { X86::VPMOVZXBDZ256rrk, X86::VPMOVZXBDZ256rmk, 0 }, + { X86::VPMOVZXBQZ256rrk, X86::VPMOVZXBQZ256rmk, 0 }, + { X86::VPMOVZXBWZ256rrk, X86::VPMOVZXBWZ256rmk, 0 }, + { X86::VPMOVZXDQZ256rrk, X86::VPMOVZXDQZ256rmk, 0 }, + { X86::VPMOVZXWDZ256rrk, X86::VPMOVZXWDZ256rmk, 0 }, + { X86::VPMOVZXWQZ256rrk, X86::VPMOVZXWQZ256rmk, 0 }, + + // AVX-512VL 128-bit masked foldable instructions + { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, 0 }, + { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, 0 }, + { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, 0 }, + { X86::VPMOVSXDQZ128rrk, X86::VPMOVSXDQZ128rmk, 0 }, + { X86::VPMOVSXWDZ128rrk, X86::VPMOVSXWDZ128rmk, 0 }, + { X86::VPMOVSXWQZ128rrk, X86::VPMOVSXWQZ128rmk, 0 }, + { X86::VPMOVZXBDZ128rrk, X86::VPMOVZXBDZ128rmk, 0 }, + { X86::VPMOVZXBQZ128rrk, X86::VPMOVZXBQZ128rmk, 0 }, + { X86::VPMOVZXBWZ128rrk, X86::VPMOVZXBWZ128rmk, 0 }, + { X86::VPMOVZXDQZ128rrk, X86::VPMOVZXDQZ128rmk, 0 }, + { X86::VPMOVZXWDZ128rrk, X86::VPMOVZXWDZ128rmk, 0 }, + { X86::VPMOVZXWQZ128rrk, X86::VPMOVZXWQZ128rmk, 0 }, }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) { diff --git a/test/CodeGen/X86/stack-folding-int-avx512.ll b/test/CodeGen/X86/stack-folding-int-avx512.ll index dcfaacecb67..600bfe43413 100644 --- a/test/CodeGen/X86/stack-folding-int-avx512.ll +++ b/test/CodeGen/X86/stack-folding-int-avx512.ll @@ -583,6 +583,26 @@ define <8 x i64> @stack_fold_pmovsxwq_zmm(<8 x i16> %a0) { ret <8 x i64> %2 } +define <8 x i64> @stack_fold_pmovsxwq_mask_zmm(<8 x i64> %passthru, <8 x i16> %a0, i8 %mask) { + ;CHECK-LABEL: stack_fold_pmovsxwq_mask_zmm + ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = sext <8 x i16> %a0 to <8 x i64> + %3 = bitcast i8 %mask to <8 x i1> + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> %passthru + ret <8 x i64> %4 +} + +define <8 x i64> @stack_fold_pmovsxwq_maskz_zmm(<8 x i16> %a0, i8 %mask) { + ;CHECK-LABEL: stack_fold_pmovsxwq_maskz_zmm + ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = sext <8 x i16> %a0 to <8 x i64> + %3 = bitcast i8 %mask to <8 x i1> + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + ret <8 x i64> %4 +} + define <16 x i32> @stack_fold_pmovzxbd_zmm(<16 x i8> %a0) { ;CHECK-LABEL: stack_fold_pmovzxbd_zmm ;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload @@ -631,3 +651,23 @@ define <8 x i64> @stack_fold_pmovzxwq_zmm(<8 x i16> %a0) { %2 = zext <8 x i16> %a0 to <8 x i64> ret <8 x i64> %2 } + +define <8 x i64> @stack_fold_pmovzxwq_mask_zmm(<8 x i64> %passthru, <8 x i16> %a0, i8 %mask) { + ;CHECK-LABEL: stack_fold_pmovzxwq_mask_zmm + ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = zext <8 x i16> %a0 to <8 x i64> + %3 = bitcast i8 %mask to <8 x i1> + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> %passthru + ret <8 x i64> %4 +} + +define <8 x i64> @stack_fold_pmovzxwq_maskz_zmm(<8 x i16> %a0, i8 %mask) { + ;CHECK-LABEL: stack_fold_pmovzxwq_maskz_zmm + ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = zext <8 x i16> %a0 to <8 x i64> + %3 = bitcast i8 %mask to <8 x i1> + %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> zeroinitializer + ret <8 x i64> %4 +} diff --git a/test/CodeGen/X86/stack-folding-int-avx512vl.ll b/test/CodeGen/X86/stack-folding-int-avx512vl.ll index 09304e3734d..6446ee1ac0a 100644 --- a/test/CodeGen/X86/stack-folding-int-avx512vl.ll +++ b/test/CodeGen/X86/stack-folding-int-avx512vl.ll @@ -908,3 +908,27 @@ define <4 x i64> @stack_fold_pmovzxwq_ymm(<8 x i16> %a0) { %3 = zext <4 x i16> %2 to <4 x i64> ret <4 x i64> %3 } + +define <4 x i64> @stack_fold_pmovzxwq_maskz_ymm(<8 x i16> %a0, i8 %mask) { + ;CHECK-LABEL: stack_fold_pmovzxwq_maskz_ymm + ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> + %3 = zext <4 x i16> %2 to <4 x i64> + %4 = bitcast i8 %mask to <8 x i1> + %5 = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> zeroinitializer + ret <4 x i64> %6 +} + +define <4 x i64> @stack_fold_pmovzxwq_mask_ymm(<4 x i64> %passthru, <8 x i16> %a0, i8 %mask) { + ;CHECK-LABEL: stack_fold_pmovzxwq_mask_ymm + ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> + %3 = zext <4 x i16> %2 to <4 x i64> + %4 = bitcast i8 %mask to <8 x i1> + %5 = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> + %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %passthru + ret <4 x i64> %6 +}