}
/// Returns a vector of specified type with all bits set.
-/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
-/// no AVX2 support, use two <4 x i32> inserted in a <8 x i32> appropriately.
+/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
/// Then bitcast to their original type, ensuring they get CSE'd.
static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget,
SelectionDAG &DAG, const SDLoc &dl) {
APInt Ones = APInt::getAllOnesValue(32);
unsigned NumElts = VT.getSizeInBits() / 32;
- SDValue Vec;
- if (!Subtarget.hasInt256() && NumElts == 8) {
- Vec = DAG.getConstant(Ones, dl, MVT::v4i32);
- Vec = concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
- } else {
- Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
- }
+ SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
return DAG.getBitcast(VT, Vec);
}
N01 = peekThroughBitcasts(N01);
- // Either match a direct AllOnes for 128, 256, and 512-bit vectors, or an
- // insert_subvector building a 256-bit AllOnes vector.
- if (!ISD::isBuildVectorAllOnes(N01.getNode())) {
- if (!VT.is256BitVector() || N01->getOpcode() != ISD::INSERT_SUBVECTOR)
- return SDValue();
+ if (!ISD::isBuildVectorAllOnes(N01.getNode()))
+ return SDValue();
- SDValue V1 = N01->getOperand(0);
- SDValue V2 = N01->getOperand(1);
- if (V1.getOpcode() != ISD::INSERT_SUBVECTOR ||
- !V1.getOperand(0).isUndef() ||
- !ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) ||
- !ISD::isBuildVectorAllOnes(V2.getNode()))
- return SDValue();
- }
return DAG.getNode(X86ISD::ANDNP, DL, VT, N00, N1);
}
[(set VR256:$dst, (v8i32 immAllOnesV))]>;
}
-
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move FP Scalar Instructions
//
[]>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L;
}
+
+// Without AVX2 we need to concat two v4i32 V_SETALLONES to create a 256-bit
+// all ones value.
+let Predicates = [HasAVX1Only] in
+def : Pat<(v8i32 immAllOnesV),
+ (VINSERTF128rr
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), (V_SETALLONES), sub_xmm),
+ (V_SETALLONES), 1)>;
+
multiclass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
PatFrag memop_frag> {
def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2),