RecursionLimit);
}
+/// For the given destination element of a shuffle, peek through shuffles to
+/// match a root vector source operand that contains that element in the same
+/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s).
+static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
+ Constant *Mask, Value *RootVec, int RootElt,
+ unsigned MaxRecurse) {
+ if (!MaxRecurse--)
+ return nullptr;
+
+ // Bail out if any mask value is undefined. That kind of shuffle may be
+ // simplified further based on demanded bits or other folds.
+ int MaskVal = ShuffleVectorInst::getMaskValue(Mask, RootElt);
+ if (MaskVal == -1)
+ return nullptr;
+
+ // The mask value chooses which source operand we need to look at next.
+ Value *SourceOp;
+ int InVecNumElts = Op0->getType()->getVectorNumElements();
+ if (MaskVal < InVecNumElts) {
+ RootElt = MaskVal;
+ SourceOp = Op0;
+ } else {
+ RootElt = MaskVal - InVecNumElts;
+ SourceOp = Op1;
+ }
+
+ // If the source operand is a shuffle itself, look through it to find the
+ // matching root vector.
+ if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) {
+ return foldIdentityShuffles(
+ DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1),
+ SourceShuf->getMask(), RootVec, RootElt, MaxRecurse);
+ }
+
+ // TODO: Look through bitcasts? What if the bitcast changes the vector element
+ // size?
+
+ // The source operand is not a shuffle. Initialize the root vector value for
+ // this shuffle if that has not been done yet.
+ if (!RootVec)
+ RootVec = SourceOp;
+
+ // Give up as soon as a source operand does not match the existing root value.
+ if (RootVec != SourceOp)
+ return nullptr;
+
+ // The element must be coming from the same lane in the source vector
+ // (although it may have crossed lanes in intermediate shuffles).
+ if (RootElt != DestElt)
+ return nullptr;
+
+ return RootVec;
+}
+
static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
Type *RetTy, const Query &Q,
unsigned MaxRecurse) {
OpShuf->getMask()->getSplatValue())
return Op1;
- return nullptr;
+ // Don't fold a shuffle with undef mask elements. This may get folded in a
+ // better way using demanded bits or other analysis.
+ // TODO: Should we allow this?
+ for (unsigned i = 0; i != MaskNumElts; ++i)
+ if (ShuffleVectorInst::getMaskValue(Mask, i) == -1)
+ return nullptr;
+
+ // Check if every element of this shuffle can be mapped back to the
+ // corresponding element of a single root vector. If so, we don't need this
+ // shuffle. This handles simple identity shuffles as well as chains of
+ // shuffles that may widen/narrow and/or move elements across lanes and back.
+ Value *RootVec = nullptr;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ // Note that recursion is limited for each vector element, so if any element
+ // exceeds the limit, this will fail to simplify.
+ RootVec = foldIdentityShuffles(i, Op0, Op1, Mask, RootVec, i, MaxRecurse);
+
+ // We can't replace a widening/narrowing shuffle with one of its operands.
+ if (!RootVec || RootVec->getType() != RetTy)
+ return nullptr;
+ }
+ return RootVec;
}
/// Given operands for a ShuffleVectorInst, fold the result or return null.
define <4 x i32> @identity_mask_0(<4 x i32> %x) {
; CHECK-LABEL: @identity_mask_0(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: ret <4 x i32> [[SHUF]]
+; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %shuf
define <4 x i32> @identity_mask_1(<4 x i32> %x) {
; CHECK-LABEL: @identity_mask_1(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> undef, <4 x i32> [[X:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <4 x i32> [[SHUF]]
+; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%shuf = shufflevector <4 x i32> undef, <4 x i32> %x, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
ret <4 x i32> %shuf
define <4 x i32> @pseudo_identity_mask(<4 x i32> %x) {
; CHECK-LABEL: @pseudo_identity_mask(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT: ret <4 x i32> [[SHUF]]
+; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i32> %shuf
}
+define <4 x i32> @not_identity_mask(<4 x i32> %x) {
+; CHECK-LABEL: @not_identity_mask(
+; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT: ret <4 x i32> [[SHUF]]
+;
+ %shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+ ret <4 x i32> %shuf
+}
+
+; TODO: Should we simplify if the mask has an undef element?
+
+define <4 x i32> @possible_identity_mask(<4 x i32> %x) {
+; CHECK-LABEL: @possible_identity_mask(
+; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT: ret <4 x i32> [[SHUF]]
+;
+ %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+ ret <4 x i32> %shuf
+}
+
define <4 x i32> @const_operand(<4 x i32> %x) {
; CHECK-LABEL: @const_operand(
; CHECK-NEXT: ret <4 x i32> <i32 42, i32 45, i32 44, i32 43>
define <4 x i32> @merge(<4 x i32> %x) {
; CHECK-LABEL: @merge(
-; CHECK-NEXT: [[LOWER:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[UPPER:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[MERGED:%.*]] = shufflevector <2 x i32> [[UPPER]], <2 x i32> [[LOWER]], <4 x i32> <i32 3, i32 2, i32 0, i32 1>
-; CHECK-NEXT: ret <4 x i32> [[MERGED]]
+; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%lower = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 1, i32 0>
%upper = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
ret <4 x i32> %merged
}
+; This crosses lanes from the source op.
+
+define <4 x i32> @not_merge(<4 x i32> %x) {
+; CHECK-LABEL: @not_merge(
+; CHECK-NEXT: [[L:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[U:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[MERGED:%.*]] = shufflevector <2 x i32> [[U]], <2 x i32> [[L]], <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+; CHECK-NEXT: ret <4 x i32> [[MERGED]]
+;
+ %l = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %u = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %merged = shufflevector <2 x i32> %u, <2 x i32> %l, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+ ret <4 x i32> %merged
+}
+
define <8 x double> @extract_and_concat(<8 x double> %x) {
; CHECK-LABEL: @extract_and_concat(
-; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x double> [[X:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
-; CHECK-NEXT: [[S5:%.*]] = shufflevector <2 x double> [[S1]], <2 x double> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[S6:%.*]] = shufflevector <2 x double> [[S3]], <2 x double> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[S7:%.*]] = shufflevector <4 x double> [[S5]], <4 x double> [[S6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <8 x double> [[S7]]
+; CHECK-NEXT: ret <8 x double> [[X:%.*]]
;
%s1 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%s2 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 2, i32 3>
define <8 x i64> @PR30630(<8 x i64> %x) {
; CHECK-LABEL: @PR30630(
-; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i64> [[X:%.*]], <8 x i64> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-NEXT: [[S5:%.*]] = shufflevector <2 x i64> [[S1]], <2 x i64> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[S6:%.*]] = shufflevector <2 x i64> [[S3]], <2 x i64> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[S7:%.*]] = shufflevector <4 x i64> [[S5]], <4 x i64> [[S6]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: ret <8 x i64> [[S7]]
+; CHECK-NEXT: ret <8 x i64> [[X:%.*]]
;
%s1 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
%s2 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 1, i32 5>