From: Craig Topper Date: Mon, 6 Feb 2017 03:17:58 +0000 (+0000) Subject: [AVX-512] Add VSHUFPS/PD to load folding tables. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1464e37f6d9783fdf1588955e03bb92ceb7a69e6;p=llvm [AVX-512] Add VSHUFPS/PD to load folding tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294168 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index be03b8fe884..ca29885f521 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2115,6 +2115,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 }, { X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 }, { X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 }, + { X86::VSHUFPDZ128rri, X86::VSHUFPDZ128rmi, 0 }, + { X86::VSHUFPDZ256rri, X86::VSHUFPDZ256rmi, 0 }, + { X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmi, 0 }, + { X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmi, 0 }, { X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0 }, { X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 }, { X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 }, @@ -2415,6 +2419,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 }, { X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 }, { X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 }, + { X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 }, + { X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 }, { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 }, { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 }, { X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE }, @@ -2502,6 +2508,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 }, { X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 }, { X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 }, + { X86::VSHUFPDZ256rrikz, X86::VSHUFPDZ256rmikz, 0 }, + { X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmikz, 0 }, { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 }, { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 }, { X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 }, @@ -2579,6 +2587,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 }, { X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 }, { X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 }, + { X86::VSHUFPDZ128rrikz, X86::VSHUFPDZ128rmikz, 0 }, + { X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmikz, 0 }, { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 }, { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 }, { X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 }, @@ -2782,6 +2792,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 }, { X86::VPXORDZrrk, X86::VPXORDZrmk, 0 }, { X86::VPXORQZrrk, X86::VPXORQZrmk, 0 }, + { X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 }, + { X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 }, { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 }, { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 }, { X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE }, @@ -2883,6 +2895,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 }, { X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 }, { X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 }, + { X86::VSHUFPDZ256rrik, X86::VSHUFPDZ256rmik, 0 }, + { X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 }, { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 }, { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 }, { X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 }, @@ -2974,6 +2988,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 }, { X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 }, { X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 }, + { X86::VSHUFPDZ128rrik, X86::VSHUFPDZ128rmik, 0 }, + { X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0 }, { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 }, { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 }, { X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 }, diff --git a/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/test/CodeGen/X86/stack-folding-fp-avx512vl.ll index c6ae85dda43..292829a01cb 100644 --- a/test/CodeGen/X86/stack-folding-fp-avx512vl.ll +++ b/test/CodeGen/X86/stack-folding-fp-avx512vl.ll @@ -402,6 +402,45 @@ define <8 x float> @stack_fold_orps_ymm(<8 x float> %a0, <8 x float> %a1) { ret <8 x float> %6 } +define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { + ;CHECK-LABEL: stack_fold_shufps + ;CHECK: vshufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> + ret <4 x float> %2 +} + +define <4 x float> @stack_fold_shufps_mask(<4 x float>* %passthru, <4 x float> %a0, <4 x float> %a1, i8 %mask) { + ;CHECK-LABEL: stack_fold_shufps_mask + ;CHECK: vshufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> + %3 = bitcast i8 %mask to <8 x i1> + %4 = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %5 = load <4 x float>, <4 x float>* %passthru + %6 = select <4 x i1> %4, <4 x float> %2, <4 x float> %5 + ret <4 x float> %6 +} + +define <4 x float> @stack_fold_shufps_maskz(<4 x float> %a0, <4 x float> %a1, i8 %mask) { + ;CHECK-LABEL: stack_fold_shufps_maskz + ;CHECK: vshufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> + %3 = bitcast i8 %mask to <8 x i1> + %4 = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> + %5 = select <4 x i1> %4, <4 x float> %2, <4 x float> zeroinitializer + ret <4 x float> %5 +} + +define <8 x float> @stack_fold_shufps_ymm(<8 x float> %a0, <8 x float> %a1) { + ;CHECK-LABEL: stack_fold_shufps_ymm + ;CHECK: vshufps $148, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> + ret <8 x float> %2 +} + define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { ;CHECK-LABEL: stack_fold_subpd ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload