From bf2724cd222ff90249f7f430b8c5f3704dd9ca1e Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 15 Aug 2019 16:13:17 +0000 Subject: [PATCH] [Hexagon] Generate vector min/max for HVX git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369014 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonISelLowering.cpp | 11 + lib/Target/Hexagon/HexagonISelLowering.h | 7 +- lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 24 ++ lib/Target/Hexagon/HexagonPatternsHVX.td | 25 ++ test/CodeGen/Hexagon/autohvx/minmax-128b.ll | 344 ++++++++++++++++++ test/CodeGen/Hexagon/autohvx/minmax-64b.ll | 344 ++++++++++++++++++ .../Hexagon/autohvx/vector-compare-128b.ll | 180 ++++----- .../Hexagon/autohvx/vector-compare-64b.ll | 180 ++++----- 8 files changed, 934 insertions(+), 181 deletions(-) create mode 100644 test/CodeGen/Hexagon/autohvx/minmax-128b.ll create mode 100644 test/CodeGen/Hexagon/autohvx/minmax-64b.ll diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 7ddc33e3eed..eaae3f23216 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2877,6 +2877,17 @@ HexagonTargetLowering::ReplaceNodeResults(SDNode *N, } } +SDValue +HexagonTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) + const { + SDValue Op(N, 0); + if (isHvxOperation(Op)) { + if (SDValue V = PerformHvxDAGCombine(N, DCI)) + return V; + } + return SDValue(); +} + /// Returns relocation base for the given PIC jumptable. SDValue HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table, diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index e9222406925..4bc49dd508c 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -223,6 +223,8 @@ namespace HexagonISD { const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; unsigned getRegisterByName(const char* RegName, EVT VT, @@ -301,7 +303,8 @@ namespace HexagonISD { const AttributeList &FuncAttributes) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const override; + unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) + const override; /// Returns relocation base for the given PIC jumptable. SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) @@ -458,6 +461,8 @@ namespace HexagonISD { bool isHvxOperation(SDValue Op) const; SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; + + SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 345c657787a..bc8a9959c91 100644 --- a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -193,6 +193,8 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::OR, BoolV, Legal); setOperationAction(ISD::XOR, BoolV, Legal); } + + setTargetDAGCombine(ISD::VSELECT); } SDValue @@ -1580,6 +1582,28 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("Unhandled HVX operation"); } +SDValue +HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) + const { + const SDLoc &dl(N); + SDValue Op(N, 0); + + unsigned Opc = Op.getOpcode(); + if (Opc == ISD::VSELECT) { + // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) + SDValue Cond = Op.getOperand(0); + if (Cond->getOpcode() == ISD::XOR) { + SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); + if (C1->getOpcode() == HexagonISD::QTRUE) { + SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, + Op.getOperand(2), Op.getOperand(1)); + return VSel; + } + } + } + return SDValue(); +} + bool HexagonTargetLowering::isHvxOperation(SDValue Op) const { // If the type of the result, or any operand type are HVX vector types, diff --git a/lib/Target/Hexagon/HexagonPatternsHVX.td b/lib/Target/Hexagon/HexagonPatternsHVX.td index a4cfca9ac7d..4f3d87263b5 100644 --- a/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -259,6 +259,31 @@ class Vneg1 class Vnot : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1)>; +multiclass VMinMax_pat { + def: Pat<(vselect (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)), + CmpPred:$Vt, CmpPred:$Vs), + (MinInst CmpPred:$Vs, CmpPred:$Vt)>; + def: Pat<(vselect (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)), + CmpPred:$Vs, CmpPred:$Vt), + (MaxInst CmpPred:$Vs, CmpPred:$Vt)>; +} + +let Predicates = [UseHVX] in { + let AddedComplexity = 220 in { + defm: VMinMax_pat; + defm: VMinMax_pat; + defm: VMinMax_pat; + defm: VMinMax_pat; + defm: VMinMax_pat; + defm: VMinMax_pat; + defm: VMinMax_pat; + defm: VMinMax_pat; + defm: VMinMax_pat; + defm: VMinMax_pat; + } +} + let Predicates = [UseHVX] in { let AddedComplexity = 200 in { def: Pat<(Vnot HVI8:$Vs), (V6_vnot HvxVR:$Vs)>; diff --git a/test/CodeGen/Hexagon/autohvx/minmax-128b.ll b/test/CodeGen/Hexagon/autohvx/minmax-128b.ll new file mode 100644 index 00000000000..bd5ca331e75 --- /dev/null +++ b/test/CodeGen/Hexagon/autohvx/minmax-128b.ll @@ -0,0 +1,344 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; minb + +; CHECK: test_00: +; CHECK: v0.b = vmin(v1.b,v0.b) +define <128 x i8> @test_00(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp slt <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK: test_01: +; CHECK: v0.b = vmin(v0.b,v1.b) +define <128 x i8> @test_01(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp sle <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK: test_02: +; CHECK: v0.b = vmin(v0.b,v1.b) +define <128 x i8> @test_02(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp sgt <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0 + ret <128 x i8> %t1 +} + +; CHECK: test_03: +; CHECK: v0.b = vmin(v1.b,v0.b) +define <128 x i8> @test_03(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp sge <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0 + ret <128 x i8> %t1 +} + +; maxb + +; CHECK: test_04: +; CHECK: v0.b = vmax(v1.b,v0.b) +define <128 x i8> @test_04(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp slt <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0 + ret <128 x i8> %t1 +} + +; CHECK: test_05: +; CHECK: v0.b = vmax(v0.b,v1.b) +define <128 x i8> @test_05(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp sle <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0 + ret <128 x i8> %t1 +} + +; CHECK: test_06: +; CHECK: v0.b = vmax(v0.b,v1.b) +define <128 x i8> @test_06(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp sgt <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK: test_07: +; CHECK: v0.b = vmax(v1.b,v0.b) +define <128 x i8> @test_07(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp sge <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; minub + +; CHECK: test_08: +; CHECK: v0.ub = vmin(v1.ub,v0.ub) +define <128 x i8> @test_08(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp ult <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK: test_09: +; CHECK: v0.ub = vmin(v0.ub,v1.ub) +define <128 x i8> @test_09(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp ule <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK: test_0a: +; CHECK: v0.ub = vmin(v0.ub,v1.ub) +define <128 x i8> @test_0a(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp ugt <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0 + ret <128 x i8> %t1 +} + +; CHECK: test_0b: +; CHECK: v0.ub = vmin(v1.ub,v0.ub) +define <128 x i8> @test_0b(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp uge <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0 + ret <128 x i8> %t1 +} + +; maxub + +; CHECK: test_0c: +; CHECK: v0.ub = vmax(v1.ub,v0.ub) +define <128 x i8> @test_0c(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp ult <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0 + ret <128 x i8> %t1 +} + +; CHECK: test_0d: +; CHECK: v0.ub = vmax(v0.ub,v1.ub) +define <128 x i8> @test_0d(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp ule <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v0 + ret <128 x i8> %t1 +} + +; CHECK: test_0e: +; CHECK: v0.ub = vmax(v0.ub,v1.ub) +define <128 x i8> @test_0e(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp ugt <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; CHECK: test_0f: +; CHECK: v0.ub = vmax(v1.ub,v0.ub) +define <128 x i8> @test_0f(<128 x i8> %v0, <128 x i8> %v1) #0 { + %t0 = icmp uge <128 x i8> %v0, %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + ret <128 x i8> %t1 +} + +; minh + +; CHECK: test_10: +; CHECK: v0.h = vmin(v1.h,v0.h) +define <64 x i16> @test_10(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp slt <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK: test_11: +; CHECK: v0.h = vmin(v0.h,v1.h) +define <64 x i16> @test_11(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp sle <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK: test_12: +; CHECK: v0.h = vmin(v0.h,v1.h) +define <64 x i16> @test_12(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp sgt <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0 + ret <64 x i16> %t1 +} + +; CHECK: test_13: +; CHECK: v0.h = vmin(v1.h,v0.h) +define <64 x i16> @test_13(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp sge <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0 + ret <64 x i16> %t1 +} + +; maxh + +; CHECK: test_14: +; CHECK: v0.h = vmax(v1.h,v0.h) +define <64 x i16> @test_14(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp slt <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0 + ret <64 x i16> %t1 +} + +; CHECK: test_15: +; CHECK: v0.h = vmax(v0.h,v1.h) +define <64 x i16> @test_15(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp sle <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0 + ret <64 x i16> %t1 +} + +; CHECK: test_16: +; CHECK: v0.h = vmax(v0.h,v1.h) +define <64 x i16> @test_16(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp sgt <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK: test_17: +; CHECK: v0.h = vmax(v1.h,v0.h) +define <64 x i16> @test_17(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp sge <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; minuh + +; CHECK: test_18: +; CHECK: v0.uh = vmin(v1.uh,v0.uh) +define <64 x i16> @test_18(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp ult <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK: test_19: +; CHECK: v0.uh = vmin(v0.uh,v1.uh) +define <64 x i16> @test_19(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp ule <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK: test_1a: +; CHECK: v0.uh = vmin(v0.uh,v1.uh) +define <64 x i16> @test_1a(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp ugt <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0 + ret <64 x i16> %t1 +} + +; CHECK: test_1b: +; CHECK: v0.uh = vmin(v1.uh,v0.uh) +define <64 x i16> @test_1b(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp uge <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0 + ret <64 x i16> %t1 +} + +; maxuh + +; CHECK: test_1c: +; CHECK: v0.uh = vmax(v1.uh,v0.uh) +define <64 x i16> @test_1c(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp ult <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0 + ret <64 x i16> %t1 +} + +; CHECK: test_1d: +; CHECK: v0.uh = vmax(v0.uh,v1.uh) +define <64 x i16> @test_1d(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp ule <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v0 + ret <64 x i16> %t1 +} + +; CHECK: test_1e: +; CHECK: v0.uh = vmax(v0.uh,v1.uh) +define <64 x i16> @test_1e(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp ugt <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; CHECK: test_1f: +; CHECK: v0.uh = vmax(v1.uh,v0.uh) +define <64 x i16> @test_1f(<64 x i16> %v0, <64 x i16> %v1) #0 { + %t0 = icmp uge <64 x i16> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + ret <64 x i16> %t1 +} + +; minw + +; CHECK: test_20: +; CHECK: v0.w = vmin(v1.w,v0.w) +define <32 x i32> @test_20(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp slt <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK: test_21: +; CHECK: v0.w = vmin(v0.w,v1.w) +define <32 x i32> @test_21(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp sle <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK: test_22: +; CHECK: v0.w = vmin(v0.w,v1.w) +define <32 x i32> @test_22(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp sgt <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v0 + ret <32 x i32> %t1 +} + +; CHECK: test_23: +; CHECK: v0.w = vmin(v1.w,v0.w) +define <32 x i32> @test_23(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp sge <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v0 + ret <32 x i32> %t1 +} + +; maxw + +; CHECK: test_24: +; CHECK: v0.w = vmax(v1.w,v0.w) +define <32 x i32> @test_24(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp slt <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v0 + ret <32 x i32> %t1 +} + +; CHECK: test_25: +; CHECK: v0.w = vmax(v0.w,v1.w) +define <32 x i32> @test_25(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp sle <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v0 + ret <32 x i32> %t1 +} + +; CHECK: test_26: +; CHECK: v0.w = vmax(v0.w,v1.w) +define <32 x i32> @test_26(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp sgt <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +; CHECK: test_27: +; CHECK: v0.w = vmax(v1.w,v0.w) +define <32 x i32> @test_27(<32 x i32> %v0, <32 x i32> %v1) #0 { + %t0 = icmp sge <32 x i32> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + ret <32 x i32> %t1 +} + +attributes #0 = { readnone nounwind "target-cpu"="hexagonv62" "target-features"="+hvx,+hvx-length128b" } + diff --git a/test/CodeGen/Hexagon/autohvx/minmax-64b.ll b/test/CodeGen/Hexagon/autohvx/minmax-64b.ll new file mode 100644 index 00000000000..15e33e32296 --- /dev/null +++ b/test/CodeGen/Hexagon/autohvx/minmax-64b.ll @@ -0,0 +1,344 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; minb + +; CHECK: test_00: +; CHECK: v0.b = vmin(v1.b,v0.b) +define <64 x i8> @test_00(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp slt <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK: test_01: +; CHECK: v0.b = vmin(v0.b,v1.b) +define <64 x i8> @test_01(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp sle <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK: test_02: +; CHECK: v0.b = vmin(v0.b,v1.b) +define <64 x i8> @test_02(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp sgt <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0 + ret <64 x i8> %t1 +} + +; CHECK: test_03: +; CHECK: v0.b = vmin(v1.b,v0.b) +define <64 x i8> @test_03(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp sge <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0 + ret <64 x i8> %t1 +} + +; maxb + +; CHECK: test_04: +; CHECK: v0.b = vmax(v1.b,v0.b) +define <64 x i8> @test_04(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp slt <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0 + ret <64 x i8> %t1 +} + +; CHECK: test_05: +; CHECK: v0.b = vmax(v0.b,v1.b) +define <64 x i8> @test_05(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp sle <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0 + ret <64 x i8> %t1 +} + +; CHECK: test_06: +; CHECK: v0.b = vmax(v0.b,v1.b) +define <64 x i8> @test_06(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp sgt <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK: test_07: +; CHECK: v0.b = vmax(v1.b,v0.b) +define <64 x i8> @test_07(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp sge <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; minub + +; CHECK: test_08: +; CHECK: v0.ub = vmin(v1.ub,v0.ub) +define <64 x i8> @test_08(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp ult <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK: test_09: +; CHECK: v0.ub = vmin(v0.ub,v1.ub) +define <64 x i8> @test_09(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp ule <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK: test_0a: +; CHECK: v0.ub = vmin(v0.ub,v1.ub) +define <64 x i8> @test_0a(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp ugt <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0 + ret <64 x i8> %t1 +} + +; CHECK: test_0b: +; CHECK: v0.ub = vmin(v1.ub,v0.ub) +define <64 x i8> @test_0b(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp uge <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0 + ret <64 x i8> %t1 +} + +; maxub + +; CHECK: test_0c: +; CHECK: v0.ub = vmax(v1.ub,v0.ub) +define <64 x i8> @test_0c(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp ult <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0 + ret <64 x i8> %t1 +} + +; CHECK: test_0d: +; CHECK: v0.ub = vmax(v0.ub,v1.ub) +define <64 x i8> @test_0d(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp ule <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v0 + ret <64 x i8> %t1 +} + +; CHECK: test_0e: +; CHECK: v0.ub = vmax(v0.ub,v1.ub) +define <64 x i8> @test_0e(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp ugt <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; CHECK: test_0f: +; CHECK: v0.ub = vmax(v1.ub,v0.ub) +define <64 x i8> @test_0f(<64 x i8> %v0, <64 x i8> %v1) #0 { + %t0 = icmp uge <64 x i8> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + ret <64 x i8> %t1 +} + +; minh + +; CHECK: test_10: +; CHECK: v0.h = vmin(v1.h,v0.h) +define <32 x i16> @test_10(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp slt <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK: test_11: +; CHECK: v0.h = vmin(v0.h,v1.h) +define <32 x i16> @test_11(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp sle <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK: test_12: +; CHECK: v0.h = vmin(v0.h,v1.h) +define <32 x i16> @test_12(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp sgt <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0 + ret <32 x i16> %t1 +} + +; CHECK: test_13: +; CHECK: v0.h = vmin(v1.h,v0.h) +define <32 x i16> @test_13(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp sge <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0 + ret <32 x i16> %t1 +} + +; maxh + +; CHECK: test_14: +; CHECK: v0.h = vmax(v1.h,v0.h) +define <32 x i16> @test_14(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp slt <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0 + ret <32 x i16> %t1 +} + +; CHECK: test_15: +; CHECK: v0.h = vmax(v0.h,v1.h) +define <32 x i16> @test_15(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp sle <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0 + ret <32 x i16> %t1 +} + +; CHECK: test_16: +; CHECK: v0.h = vmax(v0.h,v1.h) +define <32 x i16> @test_16(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp sgt <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK: test_17: +; CHECK: v0.h = vmax(v1.h,v0.h) +define <32 x i16> @test_17(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp sge <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; minuh + +; CHECK: test_18: +; CHECK: v0.uh = vmin(v1.uh,v0.uh) +define <32 x i16> @test_18(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp ult <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK: test_19: +; CHECK: v0.uh = vmin(v0.uh,v1.uh) +define <32 x i16> @test_19(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp ule <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK: test_1a: +; CHECK: v0.uh = vmin(v0.uh,v1.uh) +define <32 x i16> @test_1a(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp ugt <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0 + ret <32 x i16> %t1 +} + +; CHECK: test_1b: +; CHECK: v0.uh = vmin(v1.uh,v0.uh) +define <32 x i16> @test_1b(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp uge <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0 + ret <32 x i16> %t1 +} + +; maxuh + +; CHECK: test_1c: +; CHECK: v0.uh = vmax(v1.uh,v0.uh) +define <32 x i16> @test_1c(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp ult <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0 + ret <32 x i16> %t1 +} + +; CHECK: test_1d: +; CHECK: v0.uh = vmax(v0.uh,v1.uh) +define <32 x i16> @test_1d(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp ule <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v0 + ret <32 x i16> %t1 +} + +; CHECK: test_1e: +; CHECK: v0.uh = vmax(v0.uh,v1.uh) +define <32 x i16> @test_1e(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp ugt <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; CHECK: test_1f: +; CHECK: v0.uh = vmax(v1.uh,v0.uh) +define <32 x i16> @test_1f(<32 x i16> %v0, <32 x i16> %v1) #0 { + %t0 = icmp uge <32 x i16> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + ret <32 x i16> %t1 +} + +; minw + +; CHECK: test_20: +; CHECK: v0.w = vmin(v1.w,v0.w) +define <16 x i32> @test_20(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp slt <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK: test_21: +; CHECK: v0.w = vmin(v0.w,v1.w) +define <16 x i32> @test_21(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp sle <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK: test_22: +; CHECK: v0.w = vmin(v0.w,v1.w) +define <16 x i32> @test_22(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp sgt <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v0 + ret <16 x i32> %t1 +} + +; CHECK: test_23: +; CHECK: v0.w = vmin(v1.w,v0.w) +define <16 x i32> @test_23(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp sge <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v0 + ret <16 x i32> %t1 +} + +; maxw + +; CHECK: test_24: +; CHECK: v0.w = vmax(v1.w,v0.w) +define <16 x i32> @test_24(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp slt <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v0 + ret <16 x i32> %t1 +} + +; CHECK: test_25: +; CHECK: v0.w = vmax(v0.w,v1.w) +define <16 x i32> @test_25(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp sle <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v0 + ret <16 x i32> %t1 +} + +; CHECK: test_26: +; CHECK: v0.w = vmax(v0.w,v1.w) +define <16 x i32> @test_26(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp sgt <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +; CHECK: test_27: +; CHECK: v0.w = vmax(v1.w,v0.w) +define <16 x i32> @test_27(<16 x i32> %v0, <16 x i32> %v1) #0 { + %t0 = icmp sge <16 x i32> %v0, %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + ret <16 x i32> %t1 +} + +attributes #0 = { readnone nounwind "target-cpu"="hexagonv62" "target-features"="+hvx,+hvx-length64b" } + diff --git a/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll b/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll index 8205c06d996..2252d95979c 100644 --- a/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll +++ b/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll @@ -4,91 +4,91 @@ ; CHECK-LABEL: test_00: ; CHECK: q[[Q000:[0-3]]] = vcmp.eq(v0.b,v1.b) -; CHECK: v0 = vmux(q[[Q000]],v0,v1) -define <128 x i8> @test_00(<128 x i8> %v0, <128 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q000]],v1,v2) +define <128 x i8> @test_00(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 { %t0 = icmp eq <128 x i8> %v0, %v1 - %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2 ret <128 x i8> %t1 } ; CHECK-LABEL: test_01: ; CHECK: q[[Q010:[0-3]]] = vcmp.eq(v0.b,v1.b) -; CHECK: v0 = vmux(q[[Q010]],v1,v0) -define <128 x i8> @test_01(<128 x i8> %v0, <128 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q010]],v2,v1) +define <128 x i8> @test_01(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 { %t0 = icmp ne <128 x i8> %v0, %v1 - %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2 ret <128 x i8> %t1 } ; CHECK-LABEL: test_02: ; CHECK: q[[Q020:[0-3]]] = vcmp.gt(v1.b,v0.b) -; CHECK: v0 = vmux(q[[Q020]],v0,v1) -define <128 x i8> @test_02(<128 x i8> %v0, <128 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q020]],v1,v2) +define <128 x i8> @test_02(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 { %t0 = icmp slt <128 x i8> %v0, %v1 - %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2 ret <128 x i8> %t1 } ; CHECK-LABEL: test_03: ; CHECK: q[[Q030:[0-3]]] = vcmp.gt(v0.b,v1.b) -; CHECK: v0 = vmux(q[[Q030]],v1,v0) -define <128 x i8> @test_03(<128 x i8> %v0, <128 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q030]],v2,v1) +define <128 x i8> @test_03(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 { %t0 = icmp sle <128 x i8> %v0, %v1 - %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2 ret <128 x i8> %t1 } ; CHECK-LABEL: test_04: ; CHECK: q[[Q040:[0-3]]] = vcmp.gt(v0.b,v1.b) -; CHECK: v0 = vmux(q[[Q040]],v0,v1) -define <128 x i8> @test_04(<128 x i8> %v0, <128 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q040]],v1,v2) +define <128 x i8> @test_04(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 { %t0 = icmp sgt <128 x i8> %v0, %v1 - %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2 ret <128 x i8> %t1 } ; CHECK-LABEL: test_05: ; CHECK: q[[Q050:[0-3]]] = vcmp.gt(v1.b,v0.b) -; CHECK: v0 = vmux(q[[Q050]],v1,v0) -define <128 x i8> @test_05(<128 x i8> %v0, <128 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q050]],v2,v1) +define <128 x i8> @test_05(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 { %t0 = icmp sge <128 x i8> %v0, %v1 - %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2 ret <128 x i8> %t1 } ; CHECK-LABEL: test_06: ; CHECK: q[[Q060:[0-3]]] = vcmp.gt(v1.ub,v0.ub) -; CHECK: v0 = vmux(q[[Q060]],v0,v1) -define <128 x i8> @test_06(<128 x i8> %v0, <128 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q060]],v1,v2) +define <128 x i8> @test_06(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 { %t0 = icmp ult <128 x i8> %v0, %v1 - %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2 ret <128 x i8> %t1 } ; CHECK-LABEL: test_07: ; CHECK: q[[Q070:[0-3]]] = vcmp.gt(v0.ub,v1.ub) -; CHECK: v0 = vmux(q[[Q070]],v1,v0) -define <128 x i8> @test_07(<128 x i8> %v0, <128 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q070]],v2,v1) +define <128 x i8> @test_07(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 { %t0 = icmp ule <128 x i8> %v0, %v1 - %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2 ret <128 x i8> %t1 } ; CHECK-LABEL: test_08: ; CHECK: q[[Q080:[0-3]]] = vcmp.gt(v0.ub,v1.ub) -; CHECK: v0 = vmux(q[[Q080]],v0,v1) -define <128 x i8> @test_08(<128 x i8> %v0, <128 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q080]],v1,v2) +define <128 x i8> @test_08(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 { %t0 = icmp ugt <128 x i8> %v0, %v1 - %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2 ret <128 x i8> %t1 } ; CHECK-LABEL: test_09: ; CHECK: q[[Q090:[0-3]]] = vcmp.gt(v1.ub,v0.ub) -; CHECK: v0 = vmux(q[[Q090]],v1,v0) -define <128 x i8> @test_09(<128 x i8> %v0, <128 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q090]],v2,v1) +define <128 x i8> @test_09(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 { %t0 = icmp uge <128 x i8> %v0, %v1 - %t1 = select <128 x i1> %t0, <128 x i8> %v0, <128 x i8> %v1 + %t1 = select <128 x i1> %t0, <128 x i8> %v1, <128 x i8> %v2 ret <128 x i8> %t1 } @@ -195,91 +195,91 @@ define <128 x i8> @test_0i(<128 x i8> %v0, <128 x i8> %v1, <128 x i8> %v2) #0 { ; CHECK-LABEL: test_10: ; CHECK: q[[Q100:[0-3]]] = vcmp.eq(v0.h,v1.h) -; CHECK: v0 = vmux(q[[Q100]],v0,v1) -define <64 x i16> @test_10(<64 x i16> %v0, <64 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q100]],v1,v2) +define <64 x i16> @test_10(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 { %t0 = icmp eq <64 x i16> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2 ret <64 x i16> %t1 } ; CHECK-LABEL: test_11: ; CHECK: q[[Q110:[0-3]]] = vcmp.eq(v0.h,v1.h) -; CHECK: v0 = vmux(q[[Q110]],v1,v0) -define <64 x i16> @test_11(<64 x i16> %v0, <64 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q110]],v2,v1) +define <64 x i16> @test_11(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 { %t0 = icmp ne <64 x i16> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2 ret <64 x i16> %t1 } ; CHECK-LABEL: test_12: ; CHECK: q[[Q120:[0-3]]] = vcmp.gt(v1.h,v0.h) -; CHECK: v0 = vmux(q[[Q120]],v0,v1) -define <64 x i16> @test_12(<64 x i16> %v0, <64 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q120]],v1,v2) +define <64 x i16> @test_12(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 { %t0 = icmp slt <64 x i16> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2 ret <64 x i16> %t1 } ; CHECK-LABEL: test_13: ; CHECK: q[[Q130:[0-3]]] = vcmp.gt(v0.h,v1.h) -; CHECK: v0 = vmux(q[[Q130]],v1,v0) -define <64 x i16> @test_13(<64 x i16> %v0, <64 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q130]],v2,v1) +define <64 x i16> @test_13(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 { %t0 = icmp sle <64 x i16> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2 ret <64 x i16> %t1 } ; CHECK-LABEL: test_14: ; CHECK: q[[Q140:[0-3]]] = vcmp.gt(v0.h,v1.h) -; CHECK: v0 = vmux(q[[Q140]],v0,v1) -define <64 x i16> @test_14(<64 x i16> %v0, <64 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q140]],v1,v2) +define <64 x i16> @test_14(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 { %t0 = icmp sgt <64 x i16> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2 ret <64 x i16> %t1 } ; CHECK-LABEL: test_15: ; CHECK: q[[Q150:[0-3]]] = vcmp.gt(v1.h,v0.h) -; CHECK: v0 = vmux(q[[Q150]],v1,v0) -define <64 x i16> @test_15(<64 x i16> %v0, <64 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q150]],v2,v1) +define <64 x i16> @test_15(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 { %t0 = icmp sge <64 x i16> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2 ret <64 x i16> %t1 } ; CHECK-LABEL: test_16: ; CHECK: q[[Q160:[0-3]]] = vcmp.gt(v1.uh,v0.uh) -; CHECK: v0 = vmux(q[[Q160]],v0,v1) -define <64 x i16> @test_16(<64 x i16> %v0, <64 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q160]],v1,v2) +define <64 x i16> @test_16(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 { %t0 = icmp ult <64 x i16> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2 ret <64 x i16> %t1 } ; CHECK-LABEL: test_17: ; CHECK: q[[Q170:[0-3]]] = vcmp.gt(v0.uh,v1.uh) -; CHECK: v0 = vmux(q[[Q170]],v1,v0) -define <64 x i16> @test_17(<64 x i16> %v0, <64 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q170]],v2,v1) +define <64 x i16> @test_17(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 { %t0 = icmp ule <64 x i16> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2 ret <64 x i16> %t1 } ; CHECK-LABEL: test_18: ; CHECK: q[[Q180:[0-3]]] = vcmp.gt(v0.uh,v1.uh) -; CHECK: v0 = vmux(q[[Q180]],v0,v1) -define <64 x i16> @test_18(<64 x i16> %v0, <64 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q180]],v1,v2) +define <64 x i16> @test_18(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 { %t0 = icmp ugt <64 x i16> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2 ret <64 x i16> %t1 } ; CHECK-LABEL: test_19: ; CHECK: q[[Q190:[0-3]]] = vcmp.gt(v1.uh,v0.uh) -; CHECK: v0 = vmux(q[[Q190]],v1,v0) -define <64 x i16> @test_19(<64 x i16> %v0, <64 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q190]],v2,v1) +define <64 x i16> @test_19(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 { %t0 = icmp uge <64 x i16> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i16> %v0, <64 x i16> %v1 + %t1 = select <64 x i1> %t0, <64 x i16> %v1, <64 x i16> %v2 ret <64 x i16> %t1 } @@ -386,91 +386,91 @@ define <64 x i16> @test_1i(<64 x i16> %v0, <64 x i16> %v1, <64 x i16> %v2) #0 { ; CHECK-LABEL: test_20: ; CHECK: q[[Q200:[0-3]]] = vcmp.eq(v0.w,v1.w) -; CHECK: v0 = vmux(q[[Q200]],v0,v1) -define <32 x i32> @test_20(<32 x i32> %v0, <32 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q200]],v1,v2) +define <32 x i32> @test_20(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 { %t0 = icmp eq <32 x i32> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2 ret <32 x i32> %t1 } ; CHECK-LABEL: test_21: ; CHECK: q[[Q210:[0-3]]] = vcmp.eq(v0.w,v1.w) -; CHECK: v0 = vmux(q[[Q210]],v1,v0) -define <32 x i32> @test_21(<32 x i32> %v0, <32 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q210]],v2,v1) +define <32 x i32> @test_21(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 { %t0 = icmp ne <32 x i32> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2 ret <32 x i32> %t1 } ; CHECK-LABEL: test_22: ; CHECK: q[[Q220:[0-3]]] = vcmp.gt(v1.w,v0.w) -; CHECK: v0 = vmux(q[[Q220]],v0,v1) -define <32 x i32> @test_22(<32 x i32> %v0, <32 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q220]],v1,v2) +define <32 x i32> @test_22(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 { %t0 = icmp slt <32 x i32> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2 ret <32 x i32> %t1 } ; CHECK-LABEL: test_23: ; CHECK: q[[Q230:[0-3]]] = vcmp.gt(v0.w,v1.w) -; CHECK: v0 = vmux(q[[Q230]],v1,v0) -define <32 x i32> @test_23(<32 x i32> %v0, <32 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q230]],v2,v1) +define <32 x i32> @test_23(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 { %t0 = icmp sle <32 x i32> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2 ret <32 x i32> %t1 } ; CHECK-LABEL: test_24: ; CHECK: q[[Q240:[0-3]]] = vcmp.gt(v0.w,v1.w) -; CHECK: v0 = vmux(q[[Q240]],v0,v1) -define <32 x i32> @test_24(<32 x i32> %v0, <32 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q240]],v1,v2) +define <32 x i32> @test_24(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 { %t0 = icmp sgt <32 x i32> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2 ret <32 x i32> %t1 } ; CHECK-LABEL: test_25: ; CHECK: q[[Q250:[0-3]]] = vcmp.gt(v1.w,v0.w) -; CHECK: v0 = vmux(q[[Q250]],v1,v0) -define <32 x i32> @test_25(<32 x i32> %v0, <32 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q250]],v2,v1) +define <32 x i32> @test_25(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 { %t0 = icmp sge <32 x i32> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2 ret <32 x i32> %t1 } ; CHECK-LABEL: test_26: ; CHECK: q[[Q260:[0-3]]] = vcmp.gt(v1.uw,v0.uw) -; CHECK: v0 = vmux(q[[Q260]],v0,v1) -define <32 x i32> @test_26(<32 x i32> %v0, <32 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q260]],v1,v2) +define <32 x i32> @test_26(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 { %t0 = icmp ult <32 x i32> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2 ret <32 x i32> %t1 } ; CHECK-LABEL: test_27: ; CHECK: q[[Q270:[0-3]]] = vcmp.gt(v0.uw,v1.uw) -; CHECK: v0 = vmux(q[[Q270]],v1,v0) -define <32 x i32> @test_27(<32 x i32> %v0, <32 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q270]],v2,v1) +define <32 x i32> @test_27(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 { %t0 = icmp ule <32 x i32> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2 ret <32 x i32> %t1 } ; CHECK-LABEL: test_28: ; CHECK: q[[Q280:[0-3]]] = vcmp.gt(v0.uw,v1.uw) -; CHECK: v0 = vmux(q[[Q280]],v0,v1) -define <32 x i32> @test_28(<32 x i32> %v0, <32 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q280]],v1,v2) +define <32 x i32> @test_28(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 { %t0 = icmp ugt <32 x i32> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2 ret <32 x i32> %t1 } ; CHECK-LABEL: test_29: ; CHECK: q[[Q290:[0-3]]] = vcmp.gt(v1.uw,v0.uw) -; CHECK: v0 = vmux(q[[Q290]],v1,v0) -define <32 x i32> @test_29(<32 x i32> %v0, <32 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q290]],v2,v1) +define <32 x i32> @test_29(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 { %t0 = icmp uge <32 x i32> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i32> %v0, <32 x i32> %v1 + %t1 = select <32 x i1> %t0, <32 x i32> %v1, <32 x i32> %v2 ret <32 x i32> %t1 } diff --git a/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll b/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll index 9475eb6a880..55ac8981a1b 100644 --- a/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll +++ b/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll @@ -4,91 +4,91 @@ ; CHECK-LABEL: test_00: ; CHECK: q[[Q000:[0-3]]] = vcmp.eq(v0.b,v1.b) -; CHECK: v0 = vmux(q[[Q000]],v0,v1) -define <64 x i8> @test_00(<64 x i8> %v0, <64 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q000]],v1,v2) +define <64 x i8> @test_00(<64 x i8> %v0, <64 x i8> %v1, <64 x i8> %v2) #0 { %t0 = icmp eq <64 x i8> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v2 ret <64 x i8> %t1 } ; CHECK-LABEL: test_01: ; CHECK: q[[Q010:[0-3]]] = vcmp.eq(v0.b,v1.b) -; CHECK: v0 = vmux(q[[Q010]],v1,v0) -define <64 x i8> @test_01(<64 x i8> %v0, <64 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q010]],v2,v1) +define <64 x i8> @test_01(<64 x i8> %v0, <64 x i8> %v1, <64 x i8> %v2) #0 { %t0 = icmp ne <64 x i8> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v2 ret <64 x i8> %t1 } ; CHECK-LABEL: test_02: ; CHECK: q[[Q020:[0-3]]] = vcmp.gt(v1.b,v0.b) -; CHECK: v0 = vmux(q[[Q020]],v0,v1) -define <64 x i8> @test_02(<64 x i8> %v0, <64 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q020]],v1,v2) +define <64 x i8> @test_02(<64 x i8> %v0, <64 x i8> %v1, <64 x i8> %v2) #0 { %t0 = icmp slt <64 x i8> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v2 ret <64 x i8> %t1 } ; CHECK-LABEL: test_03: ; CHECK: q[[Q030:[0-3]]] = vcmp.gt(v0.b,v1.b) -; CHECK: v0 = vmux(q[[Q030]],v1,v0) -define <64 x i8> @test_03(<64 x i8> %v0, <64 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q030]],v2,v1) +define <64 x i8> @test_03(<64 x i8> %v0, <64 x i8> %v1, <64 x i8> %v2) #0 { %t0 = icmp sle <64 x i8> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v2 ret <64 x i8> %t1 } ; CHECK-LABEL: test_04: ; CHECK: q[[Q040:[0-3]]] = vcmp.gt(v0.b,v1.b) -; CHECK: v0 = vmux(q[[Q040]],v0,v1) -define <64 x i8> @test_04(<64 x i8> %v0, <64 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q040]],v1,v2) +define <64 x i8> @test_04(<64 x i8> %v0, <64 x i8> %v1, <64 x i8> %v2) #0 { %t0 = icmp sgt <64 x i8> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v2 ret <64 x i8> %t1 } ; CHECK-LABEL: test_05: ; CHECK: q[[Q050:[0-3]]] = vcmp.gt(v1.b,v0.b) -; CHECK: v0 = vmux(q[[Q050]],v1,v0) -define <64 x i8> @test_05(<64 x i8> %v0, <64 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q050]],v2,v1) +define <64 x i8> @test_05(<64 x i8> %v0, <64 x i8> %v1, <64 x i8> %v2) #0 { %t0 = icmp sge <64 x i8> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v2 ret <64 x i8> %t1 } ; CHECK-LABEL: test_06: ; CHECK: q[[Q060:[0-3]]] = vcmp.gt(v1.ub,v0.ub) -; CHECK: v0 = vmux(q[[Q060]],v0,v1) -define <64 x i8> @test_06(<64 x i8> %v0, <64 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q060]],v1,v2) +define <64 x i8> @test_06(<64 x i8> %v0, <64 x i8> %v1, <64 x i8> %v2) #0 { %t0 = icmp ult <64 x i8> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v2 ret <64 x i8> %t1 } ; CHECK-LABEL: test_07: ; CHECK: q[[Q070:[0-3]]] = vcmp.gt(v0.ub,v1.ub) -; CHECK: v0 = vmux(q[[Q070]],v1,v0) -define <64 x i8> @test_07(<64 x i8> %v0, <64 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q070]],v2,v1) +define <64 x i8> @test_07(<64 x i8> %v0, <64 x i8> %v1, <64 x i8> %v2) #0 { %t0 = icmp ule <64 x i8> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v2 ret <64 x i8> %t1 } ; CHECK-LABEL: test_08: ; CHECK: q[[Q080:[0-3]]] = vcmp.gt(v0.ub,v1.ub) -; CHECK: v0 = vmux(q[[Q080]],v0,v1) -define <64 x i8> @test_08(<64 x i8> %v0, <64 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q080]],v1,v2) +define <64 x i8> @test_08(<64 x i8> %v0, <64 x i8> %v1, <64 x i8> %v2) #0 { %t0 = icmp ugt <64 x i8> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v2 ret <64 x i8> %t1 } ; CHECK-LABEL: test_09: ; CHECK: q[[Q090:[0-3]]] = vcmp.gt(v1.ub,v0.ub) -; CHECK: v0 = vmux(q[[Q090]],v1,v0) -define <64 x i8> @test_09(<64 x i8> %v0, <64 x i8> %v1) #0 { +; CHECK: v0 = vmux(q[[Q090]],v2,v1) +define <64 x i8> @test_09(<64 x i8> %v0, <64 x i8> %v1, <64 x i8> %v2) #0 { %t0 = icmp uge <64 x i8> %v0, %v1 - %t1 = select <64 x i1> %t0, <64 x i8> %v0, <64 x i8> %v1 + %t1 = select <64 x i1> %t0, <64 x i8> %v1, <64 x i8> %v2 ret <64 x i8> %t1 } @@ -196,91 +196,91 @@ define <64 x i8> @test_0i(<64 x i8> %v0, <64 x i8> %v1, <64 x i8> %v2) #0 { ; CHECK-LABEL: test_10: ; CHECK: q[[Q100:[0-3]]] = vcmp.eq(v0.h,v1.h) -; CHECK: v0 = vmux(q[[Q100]],v0,v1) -define <32 x i16> @test_10(<32 x i16> %v0, <32 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q100]],v1,v2) +define <32 x i16> @test_10(<32 x i16> %v0, <32 x i16> %v1, <32 x i16> %v2) #0 { %t0 = icmp eq <32 x i16> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v2 ret <32 x i16> %t1 } ; CHECK-LABEL: test_11: ; CHECK: q[[Q110:[0-3]]] = vcmp.eq(v0.h,v1.h) -; CHECK: v0 = vmux(q[[Q110]],v1,v0) -define <32 x i16> @test_11(<32 x i16> %v0, <32 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q110]],v2,v1) +define <32 x i16> @test_11(<32 x i16> %v0, <32 x i16> %v1, <32 x i16> %v2) #0 { %t0 = icmp ne <32 x i16> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v2 ret <32 x i16> %t1 } ; CHECK-LABEL: test_12: ; CHECK: q[[Q120:[0-3]]] = vcmp.gt(v1.h,v0.h) -; CHECK: v0 = vmux(q[[Q120]],v0,v1) -define <32 x i16> @test_12(<32 x i16> %v0, <32 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q120]],v1,v2) +define <32 x i16> @test_12(<32 x i16> %v0, <32 x i16> %v1, <32 x i16> %v2) #0 { %t0 = icmp slt <32 x i16> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v2 ret <32 x i16> %t1 } ; CHECK-LABEL: test_13: ; CHECK: q[[Q130:[0-3]]] = vcmp.gt(v0.h,v1.h) -; CHECK: v0 = vmux(q[[Q130]],v1,v0) -define <32 x i16> @test_13(<32 x i16> %v0, <32 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q130]],v2,v1) +define <32 x i16> @test_13(<32 x i16> %v0, <32 x i16> %v1, <32 x i16> %v2) #0 { %t0 = icmp sle <32 x i16> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v2 ret <32 x i16> %t1 } ; CHECK-LABEL: test_14: ; CHECK: q[[Q140:[0-3]]] = vcmp.gt(v0.h,v1.h) -; CHECK: v0 = vmux(q[[Q140]],v0,v1) -define <32 x i16> @test_14(<32 x i16> %v0, <32 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q140]],v1,v2) +define <32 x i16> @test_14(<32 x i16> %v0, <32 x i16> %v1, <32 x i16> %v2) #0 { %t0 = icmp sgt <32 x i16> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v2 ret <32 x i16> %t1 } ; CHECK-LABEL: test_15: ; CHECK: q[[Q150:[0-3]]] = vcmp.gt(v1.h,v0.h) -; CHECK: v0 = vmux(q[[Q150]],v1,v0) -define <32 x i16> @test_15(<32 x i16> %v0, <32 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q150]],v2,v1) +define <32 x i16> @test_15(<32 x i16> %v0, <32 x i16> %v1, <32 x i16> %v2) #0 { %t0 = icmp sge <32 x i16> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v2 ret <32 x i16> %t1 } ; CHECK-LABEL: test_16: ; CHECK: q[[Q160:[0-3]]] = vcmp.gt(v1.uh,v0.uh) -; CHECK: v0 = vmux(q[[Q160]],v0,v1) -define <32 x i16> @test_16(<32 x i16> %v0, <32 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q160]],v1,v2) +define <32 x i16> @test_16(<32 x i16> %v0, <32 x i16> %v1, <32 x i16> %v2) #0 { %t0 = icmp ult <32 x i16> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v2 ret <32 x i16> %t1 } ; CHECK-LABEL: test_17: ; CHECK: q[[Q170:[0-3]]] = vcmp.gt(v0.uh,v1.uh) -; CHECK: v0 = vmux(q[[Q170]],v1,v0) -define <32 x i16> @test_17(<32 x i16> %v0, <32 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q170]],v2,v1) +define <32 x i16> @test_17(<32 x i16> %v0, <32 x i16> %v1, <32 x i16> %v2) #0 { %t0 = icmp ule <32 x i16> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v2 ret <32 x i16> %t1 } ; CHECK-LABEL: test_18: ; CHECK: q[[Q180:[0-3]]] = vcmp.gt(v0.uh,v1.uh) -; CHECK: v0 = vmux(q[[Q180]],v0,v1) -define <32 x i16> @test_18(<32 x i16> %v0, <32 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q180]],v1,v2) +define <32 x i16> @test_18(<32 x i16> %v0, <32 x i16> %v1, <32 x i16> %v2) #0 { %t0 = icmp ugt <32 x i16> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v2 ret <32 x i16> %t1 } ; CHECK-LABEL: test_19: ; CHECK: q[[Q190:[0-3]]] = vcmp.gt(v1.uh,v0.uh) -; CHECK: v0 = vmux(q[[Q190]],v1,v0) -define <32 x i16> @test_19(<32 x i16> %v0, <32 x i16> %v1) #0 { +; CHECK: v0 = vmux(q[[Q190]],v2,v1) +define <32 x i16> @test_19(<32 x i16> %v0, <32 x i16> %v1, <32 x i16> %v2) #0 { %t0 = icmp uge <32 x i16> %v0, %v1 - %t1 = select <32 x i1> %t0, <32 x i16> %v0, <32 x i16> %v1 + %t1 = select <32 x i1> %t0, <32 x i16> %v1, <32 x i16> %v2 ret <32 x i16> %t1 } @@ -387,91 +387,91 @@ define <32 x i16> @test_1i(<32 x i16> %v0, <32 x i16> %v1, <32 x i16> %v2) #0 { ; CHECK-LABEL: test_20: ; CHECK: q[[Q200:[0-3]]] = vcmp.eq(v0.w,v1.w) -; CHECK: v0 = vmux(q[[Q200]],v0,v1) -define <16 x i32> @test_20(<16 x i32> %v0, <16 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q200]],v1,v2) +define <16 x i32> @test_20(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 { %t0 = icmp eq <16 x i32> %v0, %v1 - %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v2 ret <16 x i32> %t1 } ; CHECK-LABEL: test_21: ; CHECK: q[[Q210:[0-3]]] = vcmp.eq(v0.w,v1.w) -; CHECK: v0 = vmux(q[[Q210]],v1,v0) -define <16 x i32> @test_21(<16 x i32> %v0, <16 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q210]],v2,v1) +define <16 x i32> @test_21(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 { %t0 = icmp ne <16 x i32> %v0, %v1 - %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v2 ret <16 x i32> %t1 } ; CHECK-LABEL: test_22: ; CHECK: q[[Q220:[0-3]]] = vcmp.gt(v1.w,v0.w) -; CHECK: v0 = vmux(q[[Q220]],v0,v1) -define <16 x i32> @test_22(<16 x i32> %v0, <16 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q220]],v1,v2) +define <16 x i32> @test_22(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 { %t0 = icmp slt <16 x i32> %v0, %v1 - %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v2 ret <16 x i32> %t1 } ; CHECK-LABEL: test_23: ; CHECK: q[[Q230:[0-3]]] = vcmp.gt(v0.w,v1.w) -; CHECK: v0 = vmux(q[[Q230]],v1,v0) -define <16 x i32> @test_23(<16 x i32> %v0, <16 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q230]],v2,v1) +define <16 x i32> @test_23(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 { %t0 = icmp sle <16 x i32> %v0, %v1 - %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v2 ret <16 x i32> %t1 } ; CHECK-LABEL: test_24: ; CHECK: q[[Q240:[0-3]]] = vcmp.gt(v0.w,v1.w) -; CHECK: v0 = vmux(q[[Q240]],v0,v1) -define <16 x i32> @test_24(<16 x i32> %v0, <16 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q240]],v1,v2) +define <16 x i32> @test_24(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 { %t0 = icmp sgt <16 x i32> %v0, %v1 - %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v2 ret <16 x i32> %t1 } ; CHECK-LABEL: test_25: ; CHECK: q[[Q250:[0-3]]] = vcmp.gt(v1.w,v0.w) -; CHECK: v0 = vmux(q[[Q250]],v1,v0) -define <16 x i32> @test_25(<16 x i32> %v0, <16 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q250]],v2,v1) +define <16 x i32> @test_25(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 { %t0 = icmp sge <16 x i32> %v0, %v1 - %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v2 ret <16 x i32> %t1 } ; CHECK-LABEL: test_26: ; CHECK: q[[Q260:[0-3]]] = vcmp.gt(v1.uw,v0.uw) -; CHECK: v0 = vmux(q[[Q260]],v0,v1) -define <16 x i32> @test_26(<16 x i32> %v0, <16 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q260]],v1,v2) +define <16 x i32> @test_26(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 { %t0 = icmp ult <16 x i32> %v0, %v1 - %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v2 ret <16 x i32> %t1 } ; CHECK-LABEL: test_27: ; CHECK: q[[Q270:[0-3]]] = vcmp.gt(v0.uw,v1.uw) -; CHECK: v0 = vmux(q[[Q270]],v1,v0) -define <16 x i32> @test_27(<16 x i32> %v0, <16 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q270]],v2,v1) +define <16 x i32> @test_27(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 { %t0 = icmp ule <16 x i32> %v0, %v1 - %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v2 ret <16 x i32> %t1 } ; CHECK-LABEL: test_28: ; CHECK: q[[Q280:[0-3]]] = vcmp.gt(v0.uw,v1.uw) -; CHECK: v0 = vmux(q[[Q280]],v0,v1) -define <16 x i32> @test_28(<16 x i32> %v0, <16 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q280]],v1,v2) +define <16 x i32> @test_28(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 { %t0 = icmp ugt <16 x i32> %v0, %v1 - %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v2 ret <16 x i32> %t1 } ; CHECK-LABEL: test_29: ; CHECK: q[[Q290:[0-3]]] = vcmp.gt(v1.uw,v0.uw) -; CHECK: v0 = vmux(q[[Q290]],v1,v0) -define <16 x i32> @test_29(<16 x i32> %v0, <16 x i32> %v1) #0 { +; CHECK: v0 = vmux(q[[Q290]],v2,v1) +define <16 x i32> @test_29(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 { %t0 = icmp uge <16 x i32> %v0, %v1 - %t1 = select <16 x i1> %t0, <16 x i32> %v0, <16 x i32> %v1 + %t1 = select <16 x i1> %t0, <16 x i32> %v1, <16 x i32> %v2 ret <16 x i32> %t1 } -- 2.40.0