SubVec.getOperand(1),
DAG.getIntPtrConstant(IdxVal + Idx2Val, dl));
}
+
+ // If we're inserting a bitcast into zeros, rewrite the insert and move the
+ // bitcast to the other side. This helps with detecting zero extending
+ // during isel.
+ // TODO: Is this useful for other indices than 0?
+ if (SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) {
+ MVT CastVT = SubVec.getOperand(0).getSimpleValueType();
+ unsigned NumElems = OpVT.getSizeInBits() / CastVT.getScalarSizeInBits();
+ MVT NewVT = MVT::getVectorVT(CastVT.getVectorElementType(), NumElems);
+ SDValue Insert = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT,
+ DAG.getBitcast(NewVT, Vec),
+ SubVec.getOperand(0), N->getOperand(2));
+ return DAG.getBitcast(OpVT, Insert);
+ }
}
// If this is an insert of an extract, combine to a shuffle. Don't do this
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
; AVX2-NEXT: vmovdqu a+1024(%rax), %xmm2
; AVX2-NEXT: vpsadbw b+1024(%rax), %xmm2, %xmm2
-; AVX2-NEXT: vmovdqa %xmm2, %xmm2
; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
; AVX2-NEXT: addq $4, %rax
; AVX2-NEXT: jne .LBB0_1
; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512F-NEXT: vmovdqu a+1024(%rax), %xmm1
; AVX512F-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1
-; AVX512F-NEXT: vmovdqa %xmm1, %xmm1
; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: addq $4, %rax
; AVX512F-NEXT: jne .LBB0_1
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512BW-NEXT: vmovdqu a+1024(%rax), %xmm1
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %xmm1, %xmm1
-; AVX512BW-NEXT: vmovdqa %xmm1, %xmm1
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: addq $4, %rax
; AVX512BW-NEXT: jne .LBB0_1
; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512F-NEXT: vmovdqa a+1024(%rax), %ymm2
; AVX512F-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2
-; AVX512F-NEXT: vmovdqa %ymm2, %ymm2
; AVX512F-NEXT: vpaddd %zmm1, %zmm2, %zmm1
; AVX512F-NEXT: addq $4, %rax
; AVX512F-NEXT: jne .LBB1_1
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512BW-NEXT: vmovdqa a+1024(%rax), %ymm2
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %ymm2, %ymm2
-; AVX512BW-NEXT: vmovdqa %ymm2, %ymm2
; AVX512BW-NEXT: vpaddd %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: addq $4, %rax
; AVX512BW-NEXT: jne .LBB1_1