From 7a0ca1142cca2ef4e2f85dc43f945b2f976ba314 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 17 Jul 2017 15:45:45 +0000 Subject: [PATCH] [Hexagon] Remove custom lowering of loads of v4i16 The target-independent lowering works fine, except concatenating 32-bit words. Add a pattern to generate A2_combinew instead of 64-bit asl/or. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308186 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 83 +------------------ lib/Target/Hexagon/HexagonISelLowering.h | 1 - lib/Target/Hexagon/HexagonPatterns.td | 6 ++ test/CodeGen/Hexagon/vect/vect-load-v4i16.ll | 23 +++++ .../vect/{vect-loadv4i16.ll => vect-v4i16.ll} | 0 5 files changed, 30 insertions(+), 83 deletions(-) create mode 100644 test/CodeGen/Hexagon/vect/vect-load-v4i16.ll rename test/CodeGen/Hexagon/vect/{vect-loadv4i16.ll => vect-v4i16.ll} (100%) diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 15931d5d1c5..3997702bc96 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1364,79 +1364,6 @@ HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// Handle only specific vector loads. -SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - SDLoc DL(Op); - LoadSDNode *LoadNode = cast(Op); - SDValue Chain = LoadNode->getChain(); - SDValue Ptr = Op.getOperand(1); - SDValue LoweredLoad; - SDValue Result; - SDValue Base = LoadNode->getBasePtr(); - ISD::LoadExtType Ext = LoadNode->getExtensionType(); - unsigned Alignment = LoadNode->getAlignment(); - SDValue LoadChain; - - if(Ext == ISD::NON_EXTLOAD) - Ext = ISD::ZEXTLOAD; - - if (VT == MVT::v4i16) { - if (Alignment == 2) { - SDValue Loads[4]; - // Base load. - Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // Base+2 load. - SDValue Increment = DAG.getConstant(2, DL, MVT::i32); - Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); - Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // SHL 16, then OR base and base+2. - SDValue ShiftAmount = DAG.getConstant(16, DL, MVT::i32); - SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount); - SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]); - // Base + 4. - Increment = DAG.getConstant(4, DL, MVT::i32); - Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); - Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // Base + 6. - Increment = DAG.getConstant(6, DL, MVT::i32); - Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment); - Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr, - LoadNode->getPointerInfo(), MVT::i16, Alignment, - LoadNode->getMemOperand()->getFlags()); - // SHL 16, then OR base+4 and base+6. - Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount); - SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]); - // Combine to i64. This could be optimised out later if we can - // affect reg allocation of this code. - Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2); - LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - Loads[0].getValue(1), Loads[1].getValue(1), - Loads[2].getValue(1), Loads[3].getValue(1)); - } else { - // Perform default type expansion. - Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(), - LoadNode->getAlignment(), - LoadNode->getMemOperand()->getFlags()); - LoadChain = Result.getValue(1); - } - } else - llvm_unreachable("Custom lowering unsupported load"); - - Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); - // Since we pretend to lower a load, we need the original chain - // info attached to the result. - SDValue Ops[] = { Result, LoadChain }; - - return DAG.getMergeValues(Ops, DL); -} - SDValue HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT ValTy = Op.getValueType(); @@ -1961,18 +1888,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Handling of vector operations. // - // Custom lower v4i16 load only. Let v4i16 store to be - // promoted for now. promoteLdStType(MVT::v4i8, MVT::i32); promoteLdStType(MVT::v2i16, MVT::i32); promoteLdStType(MVT::v8i8, MVT::i64); + promoteLdStType(MVT::v4i16, MVT::i64); promoteLdStType(MVT::v2i32, MVT::i64); - setOperationAction(ISD::LOAD, MVT::v4i16, Custom); - setOperationAction(ISD::STORE, MVT::v4i16, Promote); - AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64); - AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64); - // Set the action for vector operations to "expand", then override it with // either "custom" or "legal" for specific cases. static const unsigned VectExpOps[] = { @@ -2970,8 +2891,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); - // Custom lower some vector loads. - case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VSELECT: return LowerVSELECT(Op, DAG); diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index d480af71b58..d66cbc95e91 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -165,7 +165,6 @@ namespace HexagonISD { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index 3c1893406b5..804a547d5b3 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -2250,6 +2250,12 @@ def: Storea_pat, I32, addrgp, PS_storerhabs>; def: Storea_pat, I32, addrgp, PS_storeriabs>; def: Storea_pat, I64, addrgp, PS_storerdabs>; +// Prefer this pattern to S2_asl_i_p_or for the special case of joining +// two 32-bit words into a 64-bit word. +let AddedComplexity = 200 in +def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)), + (A2_combinew I32:$a, I32:$b)>; + def: Pat<(or (or (or (shl (i64 (zext (and I32:$b, (i32 65535)))), (i32 16)), (i64 (zext (i32 (and I32:$a, (i32 65535)))))), (shl (i64 (anyext (and I32:$c, (i32 65535)))), (i32 32))), diff --git a/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll b/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll new file mode 100644 index 00000000000..32abb75f20f --- /dev/null +++ b/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=hexagon -O0 < %s | FileCheck %s + +; CHECK-LABEL: danny: +; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r0+#0) +; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r0+#2) +; CHECK: [[T0]] |= asl([[T1]],#16) +; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r0+#4) +; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r0+#6) +; CHECK: [[T2]] |= asl([[T3]],#16) +; CHECK: combine([[T2]],[[T0]]) +define <4 x i16> @danny(<4 x i16>* %p) { + %t0 = load <4 x i16>, <4 x i16>* %p, align 2 + ret <4 x i16> %t0 +} + +; CHECK-LABEL: sammy: +; CHECK-DAG: [[T0:r[0-9]+]] = memw(r0+#0) +; CHECK-DAG: [[T1:r[0-9]+]] = memw(r0+#4) +; CHECK: combine([[T1]],[[T0]]) +define <4 x i16> @sammy(<4 x i16>* %p) { + %t0 = load <4 x i16>, <4 x i16>* %p, align 4 + ret <4 x i16> %t0 +} diff --git a/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll b/test/CodeGen/Hexagon/vect/vect-v4i16.ll similarity index 100% rename from test/CodeGen/Hexagon/vect/vect-loadv4i16.ll rename to test/CodeGen/Hexagon/vect/vect-v4i16.ll -- 2.40.0