}
}
+ SDValue NewV1 = V1; // Save operand in case early exit happens.
if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
- V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
- ShuffleVT) &&
+ NewV1, DL, DAG, Subtarget, Shuffle,
+ ShuffleSrcVT, ShuffleVT) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- Res = DAG.getBitcast(ShuffleSrcVT, V1);
+ Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
DCI.AddToWorklist(Res.getNode());
}
}
+ SDValue NewV1 = V1; // Save operands in case early exit happens.
+ SDValue NewV2 = V2;
if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
- V1, V2, DL, DAG, Subtarget, Shuffle,
+ NewV1, NewV2, DL, DAG, Subtarget, Shuffle,
ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- V1 = DAG.getBitcast(ShuffleSrcVT, V1);
- DCI.AddToWorklist(V1.getNode());
- V2 = DAG.getBitcast(ShuffleSrcVT, V2);
- DCI.AddToWorklist(V2.getNode());
- Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
+ NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
+ DCI.AddToWorklist(NewV1.getNode());
+ NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
+ DCI.AddToWorklist(NewV2.getNode());
+ Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2);
DCI.AddToWorklist(Res.getNode());
return DAG.getBitcast(RootVT, Res);
}
- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
- AllowIntDomain, V1, V2, DL, DAG,
- Subtarget, Shuffle, ShuffleVT,
- PermuteImm) &&
+ NewV1 = V1; // Save operands in case early exit happens.
+ NewV2 = V2;
+ if (matchBinaryPermuteVectorShuffle(
+ MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
+ NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- V1 = DAG.getBitcast(ShuffleVT, V1);
- DCI.AddToWorklist(V1.getNode());
- V2 = DAG.getBitcast(ShuffleVT, V2);
- DCI.AddToWorklist(V2.getNode());
- Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
+ NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
+ DCI.AddToWorklist(NewV1.getNode());
+ NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
+ DCI.AddToWorklist(NewV2.getNode());
+ Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
DAG.getConstant(PermuteImm, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
return DAG.getBitcast(RootVT, Res);
ret <2 x double> %res
}
+; PR35977
+define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) {
+; CHECK-LABEL: test_zext_v8i8_to_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0
+; CHECK-NEXT: vmovdqa %xmm0, (%rsi)
+; CHECK-NEXT: retq
+ %tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0
+ %tmp2 = load <8 x i8>, <8 x i8>* %tmp
+ %tmp3 = extractelement <8 x i8> %tmp2, i32 0
+ %tmp4 = zext i8 %tmp3 to i16
+ %tmp5 = insertelement <8 x i16> undef, i16 %tmp4, i32 0
+ %tmp6 = extractelement <8 x i8> %tmp2, i32 1
+ %tmp7 = zext i8 %tmp6 to i16
+ %tmp8 = insertelement <8 x i16> %tmp5, i16 %tmp7, i32 1
+ %tmp9 = extractelement <8 x i8> %tmp2, i32 2
+ %tmp10 = zext i8 %tmp9 to i16
+ %tmp11 = insertelement <8 x i16> %tmp8, i16 %tmp10, i32 2
+ %tmp12 = extractelement <8 x i8> %tmp2, i32 3
+ %tmp13 = zext i8 %tmp12 to i16
+ %tmp14 = insertelement <8 x i16> %tmp11, i16 %tmp13, i32 3
+ %tmp15 = extractelement <8 x i8> %tmp2, i32 4
+ %tmp16 = zext i8 %tmp15 to i16
+ %tmp17 = insertelement <8 x i16> %tmp14, i16 %tmp16, i32 4
+ %tmp18 = extractelement <8 x i8> %tmp2, i32 5
+ %tmp19 = zext i8 %tmp18 to i16
+ %tmp20 = insertelement <8 x i16> %tmp17, i16 %tmp19, i32 5
+ %tmp21 = extractelement <8 x i8> %tmp2, i32 6
+ %tmp22 = zext i8 %tmp21 to i16
+ %tmp23 = insertelement <8 x i16> %tmp20, i16 %tmp22, i32 6
+ %tmp24 = extractelement <8 x i8> %tmp2, i32 7
+ %tmp25 = zext i8 %tmp24 to i16
+ %tmp26 = insertelement <8 x i16> %tmp23, i16 %tmp25, i32 7
+ %tmp27 = shl <8 x i16> %tmp26, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %tmp28 = getelementptr <8 x i16>, <8 x i16>* %arg1, i32 0
+ store <8 x i16> %tmp27, <8 x i16>* %tmp28
+ ret void
+}