From 09d9f45186b04f3a6961e5abad5f765e3eafbb60 Mon Sep 17 00:00:00 2001 From: Ayman Musa Date: Sun, 28 May 2017 12:39:37 +0000 Subject: [PATCH] [X86] Adding FoldGenRegForm helper field (for memory folding tables tableGen backend) to X86Inst class and set its value for the relevant instructions. Some register-register instructions can be encoded in 2 different ways, this happens when 2 register operands can be folded (separately). For example if we look at the MOV8rr and MOV8rr_REV, both instructions perform exactly the same operation, but are encoded differently. Here is the relevant information about these instructions from Intel's 64-ia-32-architectures-software-developer-manual: Opcode Instruction Op/En 64-Bit Mode Compat/Leg Mode Description 8A /r MOV r8,r/m8 RM Valid Valid Move r/m8 to r8. 88 /r MOV r/m8,r8 MR Valid Valid Move r8 to r/m8. Here we can see that in order to enable the folding of the output and input registers, we had to define 2 "encodings", and as a result we got 2 move 8-bit register-register instructions. In the X86 backend, we define both of these instructions, usually one has a regular name (MOV8rr) while the other has "_REV" suffix (MOV8rr_REV), must be marked with isCodeGenOnly flag and is not emitted from CodeGen. Automatically generating the memory folding tables relies on matching encodings of instructions, but in these cases where we want to map both memory forms of the mov 8-bit (MOV8rm & MOV8mr) to MOV8rr (not to MOV8rr_REV) we have to somehow point from the MOV8rr_REV to the "regular" appropriate instruction which in this case is MOV8rr. This field enable this "pointing" mechanism - which is used in the TableGen backend for generating memory folding tables. Differential Revision: https://reviews.llvm.org/D32683 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304087 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 117 +++++++++++++++++++-------- lib/Target/X86/X86InstrArithmetic.td | 24 +++--- lib/Target/X86/X86InstrFMA.td | 13 ++- lib/Target/X86/X86InstrFormats.td | 10 +++ lib/Target/X86/X86InstrInfo.td | 21 +++-- lib/Target/X86/X86InstrMMX.td | 5 +- lib/Target/X86/X86InstrSSE.td | 66 +++++++++------ lib/Target/X86/X86InstrXOP.td | 8 +- 8 files changed, 175 insertions(+), 89 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 5e9434830b0..d8702693884 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2693,22 +2693,22 @@ multiclass avx512_load_vl opc, string OpcodeStr, } multiclass avx512_store opc, string OpcodeStr, X86VectorVTInfo _, - PatFrag st_frag, PatFrag mstore> { + PatFrag st_frag, PatFrag mstore, string Name> { let hasSideEffects = 0 in { def rr_REV : AVX512PI, EVEX; + [], _.ExeDomain>, EVEX, FoldGenData; def rrk_REV : AVX512PI, EVEX, EVEX_K; + [], _.ExeDomain>, EVEX, EVEX_K, FoldGenData; def rrkz_REV : AVX512PI, EVEX, EVEX_KZ; + [], _.ExeDomain>, EVEX, EVEX_KZ, FoldGenData; } def mr : AVX512PI opc, string OpcodeStr, X86VectorVTInfo _, multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + string Name> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; + masked_store_unaligned, Name#Z>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; + masked_store_unaligned, Name#Z256>, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; + masked_store_unaligned, Name#Z128>, EVEX_V128; } } multiclass avx512_alignedstore_vl opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + string Name> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; + masked_store_aligned512, Name#Z>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; + masked_store_aligned256, Name#Z256>, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; + masked_store_aligned128, Name#Z128>, EVEX_V128; } } defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, HasAVX512>, avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, - HasAVX512>, PS, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVAPS">, + PS, EVEX_CD8<32, CD8VF>; defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, HasAVX512>, avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, - HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVAPD">, + PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, null_frag>, - avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>, + avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, + "VMOVUPS">, PS, EVEX_CD8<32, CD8VF>; defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, null_frag>, - avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>, + avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, + "VMOVUPD">, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, - HasAVX512>, PD, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVDQA32">, + PD, EVEX_CD8<32, CD8VF>; defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, - HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVDQA64">, + PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>, - avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, - HasBWI>, XD, EVEX_CD8<8, CD8VF>; + avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, + HasBWI, "VMOVDQU8">, + XD, EVEX_CD8<8, CD8VF>; defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>, avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, - HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>; + HasBWI, "VMOVDQU16">, + XD, VEX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, null_frag>, avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, - HasAVX512>, XS, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVDQU32">, + XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, null_frag>, avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, - HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVDQU64">, + XS, VEX_W, EVEX_CD8<64, CD8VF>; // Special instructions to help with spilling when we don't have VLX. We need // to load or store from a ZMM register instead. These are converted in @@ -3354,17 +3366,52 @@ def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; -let hasSideEffects = 0 in -defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info, - (outs VR128X:$dst), (ins VR128X:$src1, FR32X:$src2), - "vmovss.s", "$src2, $src1", "$src1, $src2", []>, - XS, EVEX_4V, VEX_LIG; - -let hasSideEffects = 0 in -defm VMOVSDZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info, - (outs VR128X:$dst), (ins VR128X:$src1, FR64X:$src2), - "vmovsd.s", "$src2, $src1", "$src1, $src2", []>, - XD, EVEX_4V, VEX_LIG, VEX_W; +let hasSideEffects = 0 in { + def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [], NoItinerary>, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrr">; + +let Constraints = "$src0 = $dst" in + def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, + VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst {${mask}}|"# + "$dst {${mask}}, $src1, $src2}", + [], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrrk">; + + def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# + "$dst {${mask}} {z}, $src1, $src2}", + [], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrrkz">; + + def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W, + FoldGenData<"VMOVSDZrr">; + +let Constraints = "$src0 = $dst" in + def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, + VR128X:$src1, FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# + "$dst {${mask}}, $src1, $src2}", + [], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG, + VEX_W, FoldGenData<"VMOVSDZrrk">; + + def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f64x_info.KRCWM:$mask, VR128X:$src1, + FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# + "$dst {${mask}} {z}, $src1, $src2}", + [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, + VEX_W, FoldGenData<"VMOVSDZrrkz">; +} let Predicates = [HasAVX512] in { let AddedComplexity = 15 in { @@ -8830,7 +8877,7 @@ multiclass avx512_extract_elt_w { def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - EVEX, TAPD; + EVEX, TAPD, FoldGenData; defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; } diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 66382014f6e..e38bbc9b3d3 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -964,10 +964,10 @@ multiclass ArithBinOp_RF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_Rev; - def NAME#16rr_REV : BinOpRR_Rev; - def NAME#32rr_REV : BinOpRR_Rev; - def NAME#64rr_REV : BinOpRR_Rev; + def NAME#8rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_Rev, FoldGenData; def NAME#8rm : BinOpRM_RF; def NAME#16rm : BinOpRM_RF; @@ -1049,10 +1049,10 @@ multiclass ArithBinOp_RFF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_RFF_Rev; - def NAME#16rr_REV : BinOpRR_RFF_Rev; - def NAME#32rr_REV : BinOpRR_RFF_Rev; - def NAME#64rr_REV : BinOpRR_RFF_Rev; + def NAME#8rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_RFF_Rev, FoldGenData; def NAME#8rm : BinOpRM_RFF; def NAME#16rm : BinOpRM_RFF; @@ -1129,10 +1129,10 @@ multiclass ArithBinOp_F BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } } // isCommutable - def NAME#8rr_REV : BinOpRR_F_Rev; - def NAME#16rr_REV : BinOpRR_F_Rev; - def NAME#32rr_REV : BinOpRR_F_Rev; - def NAME#64rr_REV : BinOpRR_F_Rev; + def NAME#8rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_F_Rev, FoldGenData; def NAME#8rm : BinOpRM_F; def NAME#16rm : BinOpRM_F; diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 1941ae57f0f..3a3cdc9fa57 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -297,7 +297,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, - VEX_LIG; + VEX_LIG, FoldGenData; } multiclass fma4s_int opc, string OpcodeStr, Operand memop, @@ -321,6 +321,12 @@ let isCodeGenOnly = 1 in { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG; +let hasSideEffects = 0 in + def rr_Int_REV : FMA4, VEX_LIG, FoldGenData; } // isCodeGenOnly = 1 } @@ -372,12 +378,13 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def rr_REV : FMA4; + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, + FoldGenData; def Yrr_REV : FMA4, - VEX_L; + VEX_L, FoldGenData; } // isCodeGenOnly = 1 } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index c2fe786732d..bfcbf71d252 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -225,6 +225,12 @@ class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } class XOP { Encoding OpEnc = EncXOP; } class XOP_4V : XOP { bit hasVEX_4V = 1; } +// Specify the alternative register form instruction to replace the current +// instruction in case it was picked during generation of memory folding tables +class FoldGenData { + string FoldGenRegForm = _RegisterForm; +} + class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, InstrItinClass itin, @@ -304,6 +310,10 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, CD8_EltSize, !srl(VectSize, CD8_Form{1-0}))), 0); + // Used in the memory folding generation (TableGen backend) to point to an alternative + // instruction to replace the current one in case it got picked during generation. + string FoldGenRegForm = ?; + // TSFlags layout should be kept in sync with X86BaseInfo.h. let TSFlags{6-0} = FormBits; let TSFlags{8-7} = OpSizeBits; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c3aa08c3ce5..fab70e918b8 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1438,11 +1438,14 @@ def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), // Longer forms that use a ModR/M byte. Needed for disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MOV8ri_alt : Ii8 <0xC6, MRM0r, (outs GR8 :$dst), (ins i8imm :$src), - "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV8ri">; def MOV16ri_alt : Ii16<0xC7, MRM0r, (outs GR16:$dst), (ins i16imm:$src), - "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16, + FoldGenData<"MOV16ri">; def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src), - "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32, + FoldGenData<"MOV32ri">; } } // SchedRW @@ -1565,13 +1568,17 @@ def MOV64o64a : RIi64<0xA3, RawFrmMemOffs, (outs), (ins offset64_64:$dst), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), - "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV8rr">; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16, + FoldGenData<"MOV16rr">; def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32, + FoldGenData<"MOV32rr">; def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV64rr">; } let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index dc3800ce381..2c047722db2 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -248,7 +248,8 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (MMX_X86movd2w (x86mmx VR64:$src)))], - IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>; + IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>, + FoldGenData<"MMX_MOVD64rr">; let isBitcast = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), @@ -277,7 +278,7 @@ def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; + IIC_MMX_MOVQ_RR>, FoldGenData<"MMX_MOVQ64rr">; } } // SchedRW diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f73d85e7e01..a3e67720930 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -507,7 +507,8 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, multiclass sse12_move_rr { + string asm_opr, Domain d = GenericDomain, + string Name> { let isCommutable = 1 in def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), @@ -521,15 +522,17 @@ multiclass sse12_move_rr, Sched<[WriteFShuffle]>; + [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>, + FoldGenData; } multiclass sse12_move { + Domain d = GenericDomain, string Name> { // AVX defm V#NAME : sse12_move_rr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d, + "V"#Name>, VEX_4V, VEX_LIG, VEX_WIG; def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), @@ -539,7 +542,7 @@ multiclass sse12_move; + "\t{$src2, $dst|$dst, $src2}", d, Name>; } def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), @@ -563,9 +566,9 @@ multiclass sse12_move_rm, XS; + SSEPackedSingle, "MOVSS">, XS; defm MOVSD : sse12_move, XD; + SSEPackedDouble, "MOVSD">, XD; let canFoldAsLoad = 1, isReMaterializable = 1 in { defm MOVSS : sse12_move_rm, VEX, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVAPSrr">; def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVAPDrr">; def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVUPSrr">; def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVUPDrr">; def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movaps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVAPSYrr">; def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVAPDYrr">; def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVUPSYrr">; def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVUPDYrr">; } // Aliases to help the assembler pick two byte VEX encodings by swapping the @@ -938,16 +949,16 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, SchedRW = [WriteFShuffle] in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVAPSrr">; def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVAPDrr">; def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>; + IIC_SSE_MOVU_P_RR>, FoldGenData<"MOVUPSrr">; def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>; + IIC_SSE_MOVU_P_RR>, FoldGenData<"MOVUPDrr">; } let Predicates = [HasAVX, NoVLX] in { @@ -3752,17 +3763,19 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, - VEX, VEX_WIG; + VEX, VEX_WIG, FoldGenData<"VMOVDQArr">; def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVDQAYrr">; def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, - VEX, VEX_WIG; + VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">; def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqu\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVDQUYrr">; } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, @@ -3820,11 +3833,12 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVDQArr">; def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>, + FoldGenData<"MOVDQUrr">; } } // SchedRW @@ -5915,7 +5929,7 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { (ins VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, Sched<[WriteShuffle]>; + []>, Sched<[WriteShuffle]>, FoldGenData; let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteShuffleLd, WriteRMW] in diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td index 53224431c0e..5dde2d07bab 100644 --- a/lib/Target/X86/X86InstrXOP.td +++ b/lib/Target/X86/X86InstrXOP.td @@ -111,7 +111,7 @@ multiclass xop3op opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - XOP_4V, VEX_W, Sched<[WriteVarVecShift]>; + XOP_4V, VEX_W, Sched<[WriteVarVecShift]>, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -282,7 +282,7 @@ multiclass xop4op opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W; + []>, XOP_4V, VEX_W, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -318,7 +318,7 @@ multiclass xop4op_int opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W; + []>, XOP_4V, VEX_W, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -357,7 +357,7 @@ multiclass xop_vpermil2 Opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, VEX_W; + []>, VEX_W, FoldGenData; } let ExeDomain = SSEPackedDouble in { -- 2.40.0