From: Simon Pilgrim Date: Tue, 28 Nov 2017 19:39:47 +0000 (+0000) Subject: [X86][SSE] Add SSE_HADDSUB/SSE_PABS/SSE_PALIGN OpndItins X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=219ddf4678749fb554e5bb758a57676929cc0179;p=llvm [X86][SSE] Add SSE_HADDSUB/SSE_PABS/SSE_PALIGN OpndItins Update multi-classes to take the scheduling OpndItins instead of hard coding it. Will be reused in the AVX512 equivalents. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319209 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ac465e3963e..59b00785af3 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4831,77 +4831,82 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { // SSE3 Instructions //===---------------------------------------------------------------------===// +let Sched = WriteFHAdd in +def SSE_HADDSUB : OpndItins< + IIC_SSE_HADDSUB_RR, IIC_SSE_HADDSUB_RM +>; + // Horizontal ops multiclass S3D_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, - X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, - bit Is2Addr = 1> { + X86MemOperand x86memop, SDNode OpNode, OpndItins itins, + PatFrag ld_frag, bit Is2Addr = 1> { def rr : S3DI, - Sched<[WriteFHAdd]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : S3DI, Sched<[WriteFHAddLd, ReadAfterLd]>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, - X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, - bit Is2Addr = 1> { + X86MemOperand x86memop, SDNode OpNode, OpndItins itins, + PatFrag ld_frag, bit Is2Addr = 1> { def rr : S3I, - Sched<[WriteFHAdd]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : S3I, Sched<[WriteFHAddLd, ReadAfterLd]>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, - X86fhadd, loadv4f32, 0>, VEX_4V, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv4f32, 0>, VEX_4V, VEX_WIG; defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, - X86fhsub, loadv4f32, 0>, VEX_4V, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv4f32, 0>, VEX_4V, VEX_WIG; defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, - X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, - X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble in { defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, - X86fhadd, loadv2f64, 0>, VEX_4V, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv2f64, 0>, VEX_4V, VEX_WIG; defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, - X86fhsub, loadv2f64, 0>, VEX_4V, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv2f64, 0>, VEX_4V, VEX_WIG; defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, - X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, - X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; } } let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in { defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd, - memopv4f32>; + SSE_HADDSUB, memopv4f32>; defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub, - memopv4f32>; + SSE_HADDSUB, memopv4f32>; } let ExeDomain = SSEPackedDouble in { defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd, - memopv2f64>; + SSE_HADDSUB, memopv2f64>; defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub, - memopv2f64>; + SSE_HADDSUB, memopv2f64>; } } @@ -4909,59 +4914,63 @@ let Constraints = "$src1 = $dst" in { // SSSE3 - Packed Absolute Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecALU in +def SSE_PABS : OpndItins< + IIC_SSE_PABS_RR, IIC_SSE_PABS_RM +>; /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm opc, string OpcodeStr, ValueType vt, - SDNode OpNode, PatFrag ld_frag> { + SDNode OpNode, OpndItins itins, PatFrag ld_frag> { def rr : SS38I, Sched<[WriteVecALU]>; + itins.rr>, Sched<[itins.Sched]>; def rm : SS38I, Sched<[WriteVecALULd]>; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm_y opc, string OpcodeStr, ValueType vt, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { def Yrr : SS38I, - Sched<[WriteVecALU]>; + [(set VR256:$dst, (vt (OpNode VR256:$src)))], itins.rr>, + Sched<[itins.Sched]>; def Yrm : SS38I, - Sched<[WriteVecALULd]>; + (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))], itins.rm>, + Sched<[itins.Sched.Folded]>; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX, VEX_WIG; - defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX, VEX_WIG; + defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG; + defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG; } let Predicates = [HasAVX, NoVLX] in { - defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX, VEX_WIG; + defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L, VEX_WIG; - defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L, VEX_WIG; + defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SSE_PABS>, VEX, VEX_L, VEX_WIG; + defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SSE_PABS>, VEX, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX] in { - defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L, VEX_WIG; + defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SSE_PABS>, VEX, VEX_L, VEX_WIG; } -defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>; -defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>; -defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>; +defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SSE_PABS, memopv2i64>; +defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SSE_PABS, memopv2i64>; +defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SSE_PABS, memopv2i64>; //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions @@ -5181,9 +5190,14 @@ defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// +let Sched = WriteShuffle in +def SSE_PALIGN : OpndItins< + IIC_SSE_PALIGNRR, IIC_SSE_PALIGNRM +>; + multiclass ssse3_palignr { + OpndItins itins, bit Is2Addr = 1> { let hasSideEffects = 0 in { def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$src3), @@ -5192,7 +5206,7 @@ multiclass ssse3_palignr, Sched<[WriteShuffle]>; + itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1 in def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$src3), @@ -5203,19 +5217,19 @@ multiclass ssse3_palignr, Sched<[WriteShuffleLd, ReadAfterLd]>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, loadv2i64, - i128mem, 0>, VEX_4V, VEX_WIG; + i128mem, SSE_PALIGN, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, loadv4i64, - i256mem, 0>, VEX_4V, VEX_L, VEX_WIG; + i256mem, SSE_PALIGN, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memopv2i64, - i128mem>; + i128mem, SSE_PALIGN>; //===---------------------------------------------------------------------===// // SSSE3 - Thread synchronization