return;
break;
- case ISD::EXTRACT_VECTOR_ELT: {
- // Extracting lane zero is a special case where we can just use a plain
- // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
- // the rest of the compiler, especially the register allocator and copyi
- // propagation, to reason about, so is preferred when it's possible to
- // use it.
- ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
- // Bail and use the default Select() for non-zero lanes.
- if (LaneNode->getZExtValue() != 0)
- break;
- // If the element type is not the same as the result type, likewise
- // bail and use the default Select(), as there's more to do than just
- // a cross-class COPY. This catches extracts of i8 and i16 elements
- // since they will need an explicit zext.
- if (VT != Node->getOperand(0).getValueType().getVectorElementType())
- break;
- unsigned SubReg;
- switch (Node->getOperand(0)
- .getValueType()
- .getVectorElementType()
- .getSizeInBits()) {
- default:
- llvm_unreachable("Unexpected vector element type!");
- case 64:
- SubReg = AArch64::dsub;
- break;
- case 32:
- SubReg = AArch64::ssub;
- break;
- case 16:
- SubReg = AArch64::hsub;
- break;
- case 8:
- llvm_unreachable("unexpected zext-requiring extract element!");
- }
- SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
- Node->getOperand(0));
- LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
- LLVM_DEBUG(Extract->dumpr(CurDAG));
- LLVM_DEBUG(dbgs() << "\n");
- ReplaceNode(Node, Extract.getNode());
- return;
- }
case ISD::Constant: {
// Materialize zero constants as copies from WZR/XZR. This allows
// the coalescer to propagate these into other instructions.
def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
+// Extracting lane zero is a special case where we can just use a plain
+// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
+// rest of the compiler, especially the register allocator and copy propagation,
+// to reason about, so is preferred when it's possible to use it.
+let AddedComplexity = 10 in {
+ def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
+ def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
+ def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
+}
+
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"