From 23a0d7597f1eca70f5b2c64b7f260f2ba01ee09c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 13 Jun 2017 07:13:47 +0000 Subject: [PATCH] [AVX-512] Mark masked version of vpcmpeq as being commutable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305275 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 1 + test/CodeGen/X86/stack-folding-int-avx512.ll | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index d8702693884..56ae8f6cc79 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1631,6 +1631,7 @@ multiclass avx512_icmp_packed opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2)))))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + let isCommutable = IsCommutable in def rrk : AVX512BI %a0, <16 x i32> %a1, <16 x i32>* ret i16 %7 } +define i16 @stack_fold_pcmpeqd_mask_commuted(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) { + ;CHECK-LABEL: stack_fold_pcmpeqd_mask_commuted + ;CHECK: vpcmpeqd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + ; load and add are here to keep the operations below the side effecting block and to avoid folding the wrong load + %2 = load <16 x i32>, <16 x i32>* %a2 + %3 = add <16 x i32> %a1, %2 + %4 = bitcast i16 %mask to <16 x i1> + %5 = icmp eq <16 x i32> %a0, %3 + %6 = and <16 x i1> %4, %5 + %7 = bitcast <16 x i1> %6 to i16 + ret i16 %7 +} + define i16 @stack_fold_pcmpled_mask(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) { ;CHECK-LABEL: stack_fold_pcmpled_mask ;CHECK: vpcmpled {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload -- 2.50.1