From: Craig Topper Date: Mon, 6 Feb 2017 05:12:14 +0000 (+0000) Subject: [AVX-512] Add VPSLLDQ/VPSRLDQ to load folding tables. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e7499853bfaf3547a65348484220a1d65762e4da;p=llvm [AVX-512] Add VPSLLDQ/VPSRLDQ to load folding tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294170 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index abd6160a8b2..bc786b1e9c9 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -904,6 +904,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 }, { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 }, { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 }, + { X86::VPSLLDQZ512rr, X86::VPSLLDQZ512rm, 0 }, + { X86::VPSRLDQZ512rr, X86::VPSRLDQZ512rm, 0 }, // AVX-512 foldable instructions (256-bit versions) { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, @@ -941,6 +943,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 }, { X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 }, { X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 }, + { X86::VPSLLDQZ256rr, X86::VPSLLDQZ256rm, 0 }, + { X86::VPSRLDQZ256rr, X86::VPSRLDQZ256rm, 0 }, // AVX-512 foldable instructions (128-bit versions) { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, @@ -975,6 +979,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 }, { X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 }, { X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 }, + { X86::VPSLLDQZ128rr, X86::VPSLLDQZ128rm, 0 }, + { X86::VPSRLDQZ128rr, X86::VPSRLDQZ128rm, 0 }, // F16C foldable instructions { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 }, diff --git a/test/CodeGen/X86/stack-folding-int-avx512.ll b/test/CodeGen/X86/stack-folding-int-avx512.ll index 0bbbe7b54b3..c99e8136929 100644 --- a/test/CodeGen/X86/stack-folding-int-avx512.ll +++ b/test/CodeGen/X86/stack-folding-int-avx512.ll @@ -1154,3 +1154,19 @@ define <64 x i8> @stack_fold_palignr_maskz(<64 x i8> %a0, <64 x i8> %a1, i64 %ma %4 = select <64 x i1> %3, <64 x i8> %2, <64 x i8> zeroinitializer ret <64 x i8> %4 } + +define <64 x i8> @stack_fold_pslldq(<64 x i8> %a, <64 x i8> %b) { + ;CHECK-LABEL: stack_fold_pslldq + ;CHECK: vpslldq $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> + ret <64 x i8> %2 +} + +define <64 x i8> @stack_fold_psrldq(<64 x i8> %a, <64 x i8> %b) { + ;CHECK-LABEL: stack_fold_psrldq + ;CHECK: vpsrldq $2, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> + ret <64 x i8> %2 +} diff --git a/test/CodeGen/X86/stack-folding-int-avx512vl.ll b/test/CodeGen/X86/stack-folding-int-avx512vl.ll index ff82be346b0..272f9251fe9 100644 --- a/test/CodeGen/X86/stack-folding-int-avx512vl.ll +++ b/test/CodeGen/X86/stack-folding-int-avx512vl.ll @@ -1533,3 +1533,35 @@ define <32 x i8> @stack_fold_palignr_maskz(<32 x i8> %a0, <32 x i8> %a1, i32 %ma %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> zeroinitializer ret <32 x i8> %4 } + +define <16 x i8> @stack_fold_pslldq(<16 x i8> %a) { + ;CHECK-LABEL: stack_fold_pslldq + ;CHECK: vpslldq $12, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> + ret <16 x i8> %2 +} + +define <32 x i8> @stack_fold_pslldq_ymm(<32 x i8> %a) { + ;CHECK-LABEL: stack_fold_pslldq_ymm + ;CHECK: vpslldq $15, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> + ret <32 x i8> %2 +} + +define <16 x i8> @stack_fold_psrldq(<16 x i8> %a) { + ;CHECK-LABEL: stack_fold_psrldq + ;CHECK: vpsrldq $12, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> + ret <16 x i8> %2 +} + +define <32 x i8> @stack_fold_psrldq_ymm(<32 x i8> %a) { + ;CHECK-LABEL: stack_fold_psrldq_ymm + ;CHECK: vpsrldq $15, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> + ret <32 x i8> %2 +}