DAG/GlobalISel: Correct type profile of bitcount ops

author Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 13 Sep 2019 00:11:14 +0000 (00:11 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 13 Sep 2019 00:11:14 +0000 (00:11 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 13 Sep 2019 00:11:14 +0000 (00:11 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 13 Sep 2019 00:11:14 +0000 (00:11 +0000)
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td

index 3c53d550a00960cc020db7f46551b45e28da559c..1abb47afd9a7571a455d5ef26d98680982a4f04e 100644 (file)
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -137,9 +137,12 @@ def SDTFPSignOp : SDTypeProfile<1, 2, [     // fcopysign.
  def SDTFPTernaryOp : SDTypeProfile<1, 3, [  // fmadd, fnmsub, etc.
    SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>
  ]>;
-def SDTIntUnaryOp : SDTypeProfile<1, 1, [   // ctlz, cttz
+def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // bitreverse
    SDTCisSameAs<0, 1>, SDTCisInt<0>
  ]>;
+def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [   // ctlz, cttz
+  SDTCisInt<0>, SDTCisInt<1>
+]>;
  def SDTIntExtendOp : SDTypeProfile<1, 1, [  // sext, zext, anyext
    SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1>
  ]>;
@@ -405,11 +408,11 @@ def zext_invec : SDNode<"ISD::ZERO_EXTEND_VECTOR_INREG", SDTExtInvec>;
  def abs        : SDNode<"ISD::ABS"        , SDTIntUnaryOp>;
  def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
  def bswap      : SDNode<"ISD::BSWAP"      , SDTIntUnaryOp>;
-def ctlz       : SDNode<"ISD::CTLZ"       , SDTIntUnaryOp>;
-def cttz       : SDNode<"ISD::CTTZ"       , SDTIntUnaryOp>;
-def ctpop      : SDNode<"ISD::CTPOP"      , SDTIntUnaryOp>;
-def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntUnaryOp>;
-def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntUnaryOp>;
+def ctlz       : SDNode<"ISD::CTLZ"       , SDTIntBitCountUnaryOp>;
+def cttz       : SDNode<"ISD::CTTZ"       , SDTIntBitCountUnaryOp>;
+def ctpop      : SDNode<"ISD::CTPOP"      , SDTIntBitCountUnaryOp>;
+def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>;
+def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>;
  def sext       : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>;
  def zext       : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>;
  def anyext     : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td

index e3dd0c22d332ab92e8b6d2361d1efff0f5da8010..2f89849e9a26a2f06eb6e34d331f6820602d9261 100644 (file)
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -812,7 +812,7 @@ def : GCNPat <
  >;
  }
  def : GCNPat <
-  (i16 (add (i16 (trunc (getDivergentFrag<ctpop>.ret i32:$popcnt))), i16:$val)),
+  (i16 (add (i16 (trunc (i32 (getDivergentFrag<ctpop>.ret i32:$popcnt)))), i16:$val)),
    (V_BCNT_U32_B32_e64 $popcnt, $val)
  >;
  
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td

index a9ba989b028c82aae0424f0bfb6cf67f41a6ca62..c54be2f16f1d6daebbcd691c0e1c96c24ef107e1 100644 (file)
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -1677,19 +1677,19 @@ def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
  //
  
  // Count leading zeros.
-def: Pat<(ctlz I32:$Rs),                      (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (ctlz I32:$Rs)),                (S2_cl0 I32:$Rs)>;
  def: Pat<(i32 (trunc (ctlz I64:$Rss))),       (S2_cl0p I64:$Rss)>;
  
  // Count trailing zeros.
-def: Pat<(cttz I32:$Rs),                      (S2_ct0 I32:$Rs)>;
+def: Pat<(i32 (cttz I32:$Rs)),                (S2_ct0 I32:$Rs)>;
  def: Pat<(i32 (trunc (cttz I64:$Rss))),       (S2_ct0p I64:$Rss)>;
  
  // Count leading ones.
-def: Pat<(ctlz (not I32:$Rs)),                (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (ctlz (not I32:$Rs))),          (S2_cl1 I32:$Rs)>;
  def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
  
  // Count trailing ones.
-def: Pat<(cttz (not I32:$Rs)),                (S2_ct1 I32:$Rs)>;
+def: Pat<(i32 (cttz (not I32:$Rs))),           (S2_ct1 I32:$Rs)>;
  def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
  
  // Define leading/trailing patterns that require zero-extensions to 64 bits.
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td

index 62da3c79f4659145b80e5abf274da6acb6976715..e5580258d8c049a0a65d7ce4550ad016528795f5 100644 (file)
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2908,7 +2908,7 @@ def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>;
  // ctz instruction always returns a 32-bit value.  For ctlz.i64, convert the
  // ptx value to 64 bits to match the ISD node's semantics, unless we know we're
  // truncating back down to 32 bits.
-def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
+def : Pat<(i64 (ctlz Int64Regs:$a)), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
  def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>;
  
  // For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the
@@ -2925,10 +2925,10 @@ def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>;
  // and then ctlz that value.  This way we don't have to subtract 16 from the
  // result.  Unfortunately today we don't have a way to generate
  // "mov b32reg, {b16imm, b16reg}", so we don't do this optimization.
-def : Pat<(ctlz Int16Regs:$a),
+def : Pat<(i16 (ctlz Int16Regs:$a)),
            (SUBi16ri (CVT_u16_u32
             (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>;
-def : Pat<(i32 (zext (ctlz Int16Regs:$a))),
+def : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))),
            (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>;
  
  // Population count
@@ -2953,7 +2953,7 @@ def : Pat<(i32 (trunc (ctpop Int64Regs:$a))), (POPCr64 Int64Regs:$a)>;
  // If we know that we're storing into an i32, we can avoid the final trunc.
  def : Pat<(ctpop Int16Regs:$a),
            (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>;
-def : Pat<(i32 (zext (ctpop Int16Regs:$a))),
+def : Pat<(i32 (zext (i16 (ctpop Int16Regs:$a)))),
            (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>;
  
  // fpround f32 -> f16
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td

index 2d4f687f72d2ac7e185b34ee182b7ba0f5ff21da..d18ab3b1370bafd067ec944e9af2e174ec58798f 100644 (file)
--- a/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -177,7 +177,7 @@ def LEAX_ADDri : F3_2<2, 0b000000,
  
  def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>;
  def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>;
-def : Pat<(ctpop i64:$src), (POPCrr $src)>;
+def : Pat<(i64 (ctpop i64:$src)), (POPCrr $src)>;
  
  } // Predicates = [Is64Bit]
  
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td

index 8474c7abffb3568cf851ba121f8437bdc1caf608..73dbdc4f443e2ba488d32ccd9fc3f9b6fe1bdd21 100644 (file)
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -516,9 +516,9 @@ let DecoderMethod = "DecodeLoadQFP" in
    defm LDQF  : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>,
                 Requires<[HasV9, HasHardQuad]>;
  
-let DecoderMethod = "DecodeLoadCP" in 
-  defm LDC   : Load<"ld", 0b110000, load, CoprocRegs, i32>; 
-let DecoderMethod = "DecodeLoadCPPair" in 
+let DecoderMethod = "DecodeLoadCP" in
+  defm LDC   : Load<"ld", 0b110000, load, CoprocRegs, i32>;
+let DecoderMethod = "DecodeLoadCPPair" in
    defm LDDC   : Load<"ldd", 0b110011, load, CoprocPair, v2i32, IIC_ldd>;
  
  let DecoderMethod = "DecodeLoadCP", Defs = [CPSR] in {
@@ -1508,7 +1508,7 @@ let rs1 = 0 in
    def POPCrr : F3_1<2, 0b101110,
                      (outs IntRegs:$rd), (ins IntRegs:$rs2),
                      "popc $rs2, $rd", []>, Requires<[HasV9]>;
-def : Pat<(ctpop i32:$src),
+def : Pat<(i32 (ctpop i32:$src)),
            (POPCrr (SRLri $src, 0))>;
  
  let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td

index 9b6cbf7f1bc989000a57e069a62e2ea5d13ac2da..8b334756611a46b32f7636987f77e35e05a0bf44 100644 (file)
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -2082,7 +2082,7 @@ let Predicates = [FeatureProcessorAssist] in {
  // cleared.  We only use the first result here.
  let Defs = [CC] in
    def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>;
-def : Pat<(ctlz GR64:$src),
+def : Pat<(i64 (ctlz GR64:$src)),
            (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
  
  // Population count.  Counts bits set per byte or doubleword.
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td

index 22e6353d3c08691a2c8f75873f849e47f9f7d092..b8c936feac12505b991f183804076f3a8e70b823 100644 (file)
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -10640,13 +10640,13 @@ multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
    defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                      (ins _.RC:$src1), OpcodeStr,
                      "$src1", "$src1",
-                    (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
+                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
                      Sched<[sched]>;
  
    defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                    (ins _.MemOp:$src1), OpcodeStr,
                    "$src1", "$src1",
-                  (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
+                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
              EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
              Sched<[sched.Folded]>;
    }
@@ -10659,8 +10659,8 @@ multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
                    (ins _.ScalarMemOp:$src1), OpcodeStr,
                    "${src1}"##_.BroadcastStr,
                    "${src1}"##_.BroadcastStr,
-                  (_.VT (OpNode (X86VBroadcast
-                                    (_.ScalarLdFrag addr:$src1))))>,
+                  (_.VT (OpNode (_.VT (X86VBroadcast
+                                       (_.ScalarLdFrag addr:$src1)))))>,
               EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
               Sched<[sched.Folded]>;
  }
@@ -10744,7 +10744,7 @@ let Predicates = [HasAVX512, NoVLX] in {
  multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
                                   AVX512VLVectorVTInfo _, Predicate prd> {
    let Predicates = [prd, NoVLX] in {
-    def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
+    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
                (EXTRACT_SUBREG
                  (!cast<Instruction>(InstrStr # "Zrr")
                    (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
@@ -10752,7 +10752,7 @@ multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
                                   _.info256.SubRegIdx)),
                _.info256.SubRegIdx)>;
  
-    def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
+    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
                (EXTRACT_SUBREG
                  (!cast<Instruction>(InstrStr # "Zrr")
                    (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 13 Sep 2019 00:11:14 +0000 (00:11 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 13 Sep 2019 00:11:14 +0000 (00:11 +0000)
include/llvm/Target/TargetSelectionDAG.td		patch \| blob \| history
lib/Target/AMDGPU/SIInstructions.td		patch \| blob \| history
lib/Target/Hexagon/HexagonPatterns.td		patch \| blob \| history
lib/Target/NVPTX/NVPTXInstrInfo.td		patch \| blob \| history
lib/Target/Sparc/SparcInstr64Bit.td		patch \| blob \| history
lib/Target/Sparc/SparcInstrInfo.td		patch \| blob \| history
lib/Target/SystemZ/SystemZInstrInfo.td		patch \| blob \| history
lib/Target/X86/X86InstrAVX512.td		patch \| blob \| history