From 69d8d82e3c637522a905fd47e242aca891b253a6 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 23 Feb 2017 15:02:09 +0000 Subject: [PATCH] [Hexagon] Patterns for CTPOP, BSWAP and BITREVERSE git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295981 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 22 ++-- lib/Target/Hexagon/HexagonISelLowering.h | 2 - lib/Target/Hexagon/HexagonPatterns.td | 15 ++- test/CodeGen/Hexagon/bitmanip.ll | 135 +++++++++++++++++++++ test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll | 36 ------ 5 files changed, 151 insertions(+), 59 deletions(-) create mode 100644 test/CodeGen/Hexagon/bitmanip.ll delete mode 100644 test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index be44dfb8da3..889885c3fa8 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1292,17 +1292,6 @@ static bool isSExtFree(SDValue N) { return false; } -SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue InpVal = Op.getOperand(0); - if (isa(InpVal)) { - uint64_t V = cast(InpVal)->getZExtValue(); - return DAG.getTargetConstant(countPopulation(V), dl, MVT::i64); - } - SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal); - return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut); -} - SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -1911,7 +1900,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::i8, Promote); setOperationAction(ISD::CTPOP, MVT::i16, Promote); setOperationAction(ISD::CTPOP, MVT::i32, Promote); - setOperationAction(ISD::CTPOP, MVT::i64, Custom); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + setOperationAction(ISD::BSWAP, MVT::i32, Legal); + setOperationAction(ISD::BSWAP, MVT::i64, Legal); // We custom lower i64 to i64 mul, so that it is not considered as a legal // operation. There is a pattern that will match i64 mul and transform it @@ -1921,7 +1915,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, for (unsigned IntExpOp : { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR, - ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, + ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::SMUL_LOHI, ISD::UMUL_LOHI }) { setOperationAction(IntExpOp, MVT::i32, Expand); setOperationAction(IntExpOp, MVT::i64, Expand); @@ -2288,7 +2282,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP"; case HexagonISD::JT: return "HexagonISD::JT"; case HexagonISD::PACKHL: return "HexagonISD::PACKHL"; - case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT"; case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB"; case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH"; @@ -2989,7 +2982,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VSELECT: return LowerVSELECT(Op, DAG); - case ISD::CTPOP: return LowerCTPOP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 80add5df5b6..aa0f00cd5bb 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -50,7 +50,6 @@ namespace HexagonISD { JT, // Jump table. CP, // Constant pool. - POPCOUNT, COMBINE, PACKHL, VSPLATB, @@ -181,7 +180,6 @@ namespace HexagonISD { SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index 8ce1c6db3b5..bbd29a789f4 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -1591,6 +1591,15 @@ def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>; def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>; def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>; +def: Pat<(i64 (ctpop I64:$Rss)), (ToZext64 (S5_popcountp I64:$Rss))>; +def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>; + +def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>; +def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>; + +def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>; +def: Pat<(bswap I64:$Rss), (A2_combinew (A2_swiz (LoReg $Rss)), + (A2_swiz (HiReg $Rss)))>; let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), @@ -2239,12 +2248,6 @@ def ftoi : SDNodeXForm; -def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, i64>]>; -def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>; - -def: Pat<(HexagonPOPCOUNT I64:$Rss), (S5_popcountp I64:$Rss)>; - let AddedComplexity = 20 in { defm: Loadx_pat; defm: Loadx_pat; diff --git a/test/CodeGen/Hexagon/bitmanip.ll b/test/CodeGen/Hexagon/bitmanip.ll new file mode 100644 index 00000000000..2044a2fdd08 --- /dev/null +++ b/test/CodeGen/Hexagon/bitmanip.ll @@ -0,0 +1,135 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK-LABEL: popcount_16 +; CHECK: zxth +; CHECK: popcount +define i16 @popcount_16(i16 %p) #0 { + %t = call i16 @llvm.ctpop.i16(i16 %p) #0 + ret i16 %t +} + +; CHECK-LABEL: popcount_32 +; CHECK: popcount +define i32 @popcount_32(i32 %p) #0 { + %t = call i32 @llvm.ctpop.i32(i32 %p) #0 + ret i32 %t +} + +; CHECK-LABEL: popcount_64 +; CHECK: popcount +define i64 @popcount_64(i64 %p) #0 { + %t = call i64 @llvm.ctpop.i64(i64 %p) #0 + ret i64 %t +} + +; CHECK-LABEL: ctlz_16 +; CHECK: [[REG0:r[0-9]+]] = zxth +; CHECK: [[REG1:r[0-9]+]] = cl0([[REG0]]) +; CHECK: add([[REG1]],#-16) +define i16 @ctlz_16(i16 %p) #0 { + %t = call i16 @llvm.ctlz.i16(i16 %p, i1 true) #0 + ret i16 %t +} + +; CHECK-LABEL: ctlz_32 +; CHECK: cl0 +define i32 @ctlz_32(i32 %p) #0 { + %t = call i32 @llvm.ctlz.i32(i32 %p, i1 true) #0 + ret i32 %t +} + +; CHECK-LABEL: ctlz_64 +; CHECK: cl0 +define i64 @ctlz_64(i64 %p) #0 { + %t = call i64 @llvm.ctlz.i64(i64 %p, i1 true) #0 + ret i64 %t +} + +; CHECK-LABEL: cttz_16 +; CHECK: ct0 +define i16 @cttz_16(i16 %p) #0 { + %t = call i16 @llvm.cttz.i16(i16 %p, i1 true) #0 + ret i16 %t +} + +; CHECK-LABEL: cttz_32 +; CHECK: ct0 +define i32 @cttz_32(i32 %p) #0 { + %t = call i32 @llvm.cttz.i32(i32 %p, i1 true) #0 + ret i32 %t +} + +; CHECK-LABEL: cttz_64 +; CHECK: ct0 +define i64 @cttz_64(i64 %p) #0 { + %t = call i64 @llvm.cttz.i64(i64 %p, i1 true) #0 + ret i64 %t +} + +; CHECK-LABEL: brev_16 +; CHECK: [[REG:r[0-9]+]] = brev +; CHECK: lsr([[REG]],#16) +define i16 @brev_16(i16 %p) #0 { + %t = call i16 @llvm.bitreverse.i16(i16 %p) #0 + ret i16 %t +} + +; CHECK-LABEL: brev_32 +; CHECK: brev +define i32 @brev_32(i32 %p) #0 { + %t = call i32 @llvm.bitreverse.i32(i32 %p) #0 + ret i32 %t +} + +; CHECK-LABEL: brev_64 +; CHECK: brev +define i64 @brev_64(i64 %p) #0 { + %t = call i64 @llvm.bitreverse.i64(i64 %p) #0 + ret i64 %t +} + +; CHECK-LABEL: bswap_16 +; CHECK: [[REG:r[0-9]+]] = swiz +; CHECK: lsr([[REG]],#16) +define i16 @bswap_16(i16 %p) #0 { + %t = call i16 @llvm.bswap.i16(i16 %p) #0 + ret i16 %t +} + +; CHECK-LABEL: bswap_32 +; CHECK: swiz +define i32 @bswap_32(i32 %p) #0 { + %t = call i32 @llvm.bswap.i32(i32 %p) #0 + ret i32 %t +} + +; CHECK-LABEL: bswap_64 +; CHECK: swiz +; CHECK: swiz +; CHECK: combine +define i64 @bswap_64(i64 %p) #0 { + %t = call i64 @llvm.bswap.i64(i64 %p) #0 + ret i64 %t +} + +declare i16 @llvm.ctpop.i16(i16) #0 +declare i32 @llvm.ctpop.i32(i32) #0 +declare i64 @llvm.ctpop.i64(i64) #0 + +declare i16 @llvm.ctlz.i16(i16, i1) #0 +declare i32 @llvm.ctlz.i32(i32, i1) #0 +declare i64 @llvm.ctlz.i64(i64, i1) #0 + +declare i16 @llvm.cttz.i16(i16, i1) #0 +declare i32 @llvm.cttz.i32(i32, i1) #0 +declare i64 @llvm.cttz.i64(i64, i1) #0 + +declare i16 @llvm.bitreverse.i16(i16) #0 +declare i32 @llvm.bitreverse.i32(i32) #0 +declare i64 @llvm.bitreverse.i64(i64) #0 + +declare i16 @llvm.bswap.i16(i16) #0 +declare i32 @llvm.bswap.i32(i32) #0 +declare i64 @llvm.bswap.i64(i64) #0 + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll b/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll deleted file mode 100644 index c8b1b0a2ca0..00000000000 --- a/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s - -; CHECK-DAG: ct0({{r[0-9]*:[0-9]*}}) -; CHECK-DAG: cl0({{r[0-9]*:[0-9]*}}) -; CHECK-DAG: ct0({{r[0-9]*}}) -; CHECK-DAG: cl0({{r[0-9]*}}) -; CHECK-DAG: r{{[0-9]+}} += lsr(r{{[0-9]+}},#4) - -define i32 @foo(i64 %a, i32 %b) nounwind { -entry: - %tmp0 = tail call i64 @llvm.ctlz.i64( i64 %a, i1 true ) - %tmp1 = tail call i64 @llvm.cttz.i64( i64 %a, i1 true ) - %tmp2 = tail call i32 @llvm.ctlz.i32( i32 %b, i1 true ) - %tmp3 = tail call i32 @llvm.cttz.i32( i32 %b, i1 true ) - %tmp4 = tail call i64 @llvm.ctpop.i64( i64 %a ) - %tmp5 = tail call i32 @llvm.ctpop.i32( i32 %b ) - - - %tmp6 = trunc i64 %tmp0 to i32 - %tmp7 = trunc i64 %tmp1 to i32 - %tmp8 = trunc i64 %tmp4 to i32 - %tmp9 = add i32 %tmp6, %tmp7 - %tmp10 = add i32 %tmp9, %tmp8 - %tmp11 = add i32 %tmp10, %tmp2 - %tmp12 = add i32 %tmp11, %tmp3 - %tmp13 = add i32 %tmp12, %tmp5 - - ret i32 %tmp13 -} - -declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone -declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone -declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone -declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone -declare i64 @llvm.ctpop.i64(i64) nounwind readnone -declare i32 @llvm.ctpop.i32(i32) nounwind readnone -- 2.40.0