/// Otherwise, returns an expansion of \p ExtractFrom based on the following
/// patterns:
///
+/// (or (add v v) (shrl v bitwidth-1)):
+/// expands (add v v) -> (shl v 1)
+///
/// (or (mul v c0) (shrl (mul v c1) c2)):
/// expands (mul v c0) -> (shl (mul v c1) c3)
///
"Existing shift must be valid as a rotate half");
ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
+
+ // Value and Type of the shift.
+ SDValue OppShiftLHS = OppShift.getOperand(0);
+ EVT ShiftedVT = OppShiftLHS.getValueType();
+
+ // Amount of the existing shift.
+ ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
+
+ // (add v v) -> (shl v 1)
+ if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
+ ExtractFrom.getOpcode() == ISD::ADD &&
+ ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
+ ExtractFrom.getOperand(0) == OppShiftLHS &&
+ OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
+ return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
+ DAG.getShiftAmountConstant(1, ShiftedVT, DL));
+
// Preconditions:
// (or (op0 v c0) (shiftl/r (op0 v c1) c2))
//
// op0 must be the same opcode on both sides, have the same LHS argument,
// and produce the same value type.
- SDValue OppShiftLHS = OppShift.getOperand(0);
- EVT ShiftedVT = OppShiftLHS.getValueType();
if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
ShiftedVT != ExtractFrom.getValueType())
return SDValue();
- // Amount of the existing shift.
- ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
// Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
// Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
define <4 x i32> @extract_add_1(<4 x i32> %i) nounwind {
; CHECK-LABEL: extract_add_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm1
-; CHECK-NEXT: vpsrld $31, %xmm0, %xmm0
-; CHECK-NEXT: vpor %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; CHECK-NEXT: vprold $1, %zmm0, %zmm0
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%ii = add <4 x i32> %i, %i
%rhs = lshr <4 x i32> %i, <i32 31, i32 31, i32 31, i32 31>
define <4 x i32> @extract_add_1_comut(<4 x i32> %i) nounwind {
; CHECK-LABEL: extract_add_1_comut:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm1
-; CHECK-NEXT: vpsrld $31, %xmm0, %xmm0
-; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; CHECK-NEXT: vprold $1, %zmm0, %zmm0
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%ii = add <4 x i32> %i, %i
%lhs = lshr <4 x i32> %i, <i32 31, i32 31, i32 31, i32 31>
define i32 @extract_add_1(i32 %i) nounwind {
; X86-LABEL: extract_add_1:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: leal (%ecx,%ecx), %eax
-; X86-NEXT: shrl $31, %ecx
-; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: roll %eax
; X86-NEXT: retl
;
; X64-LABEL: extract_add_1:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: leal (%rdi,%rdi), %eax
-; X64-NEXT: shrl $31, %edi
-; X64-NEXT: orl %edi, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: roll %eax
; X64-NEXT: retq
%ii = add i32 %i, %i
%rhs = lshr i32 %i, 31
; X86-LABEL: extract_add_1_comut:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal (%eax,%eax), %ecx
-; X86-NEXT: shrl $31, %eax
-; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: roll %eax
; X86-NEXT: retl
;
; X64-LABEL: extract_add_1_comut:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: leal (%rdi,%rdi), %eax
-; X64-NEXT: shrl $31, %edi
-; X64-NEXT: orl %edi, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: roll %eax
; X64-NEXT: retq
%ii = add i32 %i, %i
%lhs = lshr i32 %i, 31