SDValue visitFMULForFMADistributiveCombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
- SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1);
+ SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
}
-SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
- SDValue N1, SDNodeFlags Flags) {
+// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
+// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
+SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
+ SDValue N0, SDValue N1) {
+ EVT VT = N0.getValueType();
+
+ if (N0.getOpcode() != Opc)
+ return SDValue();
+
// Don't reassociate reductions.
- if (Flags.hasVectorReduction())
+ if (N0->getFlags().hasVectorReduction())
return SDValue();
- EVT VT = N0.getValueType();
- if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
- if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
- // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
- return SDValue();
- }
- if (N0.hasOneUse()) {
- // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
- // use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
- if (!OpNode.getNode())
- return SDValue();
- AddToWorklist(OpNode.getNode());
- return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
- }
+ if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+ if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ return SDValue();
}
- }
-
- if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
- if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
- if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
- // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
- return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ if (N0.hasOneUse()) {
+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+ // iff (op x, c1) has one use
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
+ if (!OpNode.getNode())
return SDValue();
- }
- if (N1.hasOneUse()) {
- // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
- // use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
- if (!OpNode.getNode())
- return SDValue();
- AddToWorklist(OpNode.getNode());
- return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
- }
+ AddToWorklist(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
+ return SDValue();
+}
+// Try to reassociate commutative binops.
+SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1, SDNodeFlags Flags) {
+ assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
+ // Don't reassociate reductions.
+ if (Flags.hasVectorReduction())
+ return SDValue();
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
+ return Combined;
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
+ return Combined;
return SDValue();
}
return NewSel;
// reassociate add
- if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
+ if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
return RADD;
// fold ((0-A) + B) -> B-A
N0.getOperand(1), N1));
// reassociate mul
- if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
return SDValue();
return NewSel;
// reassociate and
- if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
return RAND;
// Try to convert a constant mask AND into a shuffle clear mask.
return BSwap;
// reassociate or
- if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
return NewSel;
// reassociate xor
- if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
+ if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
// fold !(x cc y) -> (x !cc y)
; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
; CHECK-NEXT: b.ne
; Next BB
-; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], [[I2]]
-; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], [[I1]]
+; CHECK: add [[BLOCKBASE1:x[0-9]+]], [[I1]], [[BLOCKBASE]]
+; CHECK-NEXT: add [[BLOCKBASE2:x[0-9]+]], [[I2]], [[BLOCKBASE]]
; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1]
; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1]
; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]
; VI: s_and_b32 s1, s0, 0xffff0000
; VI: s_add_i32 s0, s0, 1
; VI: s_and_b32 s0, s0, 0xffff
-; VI: s_or_b32 s0, s0, s1
+; VI: s_or_b32 s0, s1, s0
; VI: s_add_i32 s0, s0, 0x10000
; VI: v_mov_b32_e32 v0, s0
; SI: s_lshl_b32 s1, s1, 16
; SI: s_add_i32 s0, s0, 1
; SI: s_and_b32 s0, s0, 0xffff
-; SI: s_or_b32 s0, s0, s1
+; SI: s_or_b32 s0, s1, s0
; SI: s_add_i32 s0, s0, 0x10000
define amdgpu_ps void @ps_mesa_inreg_v2i16(<2 x i16> inreg %arg0) {
%add = add <2 x i16> %arg0, <i16 1, i16 1>
; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v1
; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v1
; SI-NEXT: v_cvt_f32_ubyte1_e32 v1, v6
-; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4
; SI-NEXT: v_and_b32_e32 v7, s12, v7
+; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
; SI-NEXT: s_waitcnt expcnt(0)
-; SI-NEXT: v_or_b32_e32 v1, v7, v6
+; SI-NEXT: v_or_b32_e32 v0, v6, v7
; SI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
-; SI-NEXT: v_and_b32_e32 v0, s12, v4
-; SI-NEXT: v_or_b32_e32 v0, v0, v5
-; SI-NEXT: v_add_i32_e32 v1, vcc, 0x900, v1
-; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; SI-NEXT: v_and_b32_e32 v1, s12, v4
+; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0
+; SI-NEXT: v_or_b32_e32 v1, v5, v1
+; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-NEXT: v_or_b32_e32 v0, v1, v0
; SI-NEXT: v_add_i32_e32 v0, vcc, 0x9000000, v0
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: v_add_u16_e32 v9, 9, v5
; VI-NEXT: v_add_u16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v7
-; VI-NEXT: v_or_b32_sdwa v0, v9, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_sdwa v0, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; VI-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; VI-NEXT: v_add_u16_e32 v0, s8, v0
; VI-NEXT: v_add_u16_sdwa v1, v1, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; FUNC-LABEL: {{^}}test_add_shl_add_constant_inv:
; SI-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Y:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13
; SI: s_lshl_b32 [[SHL3:s[0-9]+]], s[[X]], 3
-; SI: s_add_i32 [[TMP:s[0-9]+]], s[[Y]], [[SHL3]]
+; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], s[[Y]]
; SI: s_addk_i32 [[TMP]], 0x3d8
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
; SI: buffer_store_dword [[VRESULT]]
; SI-NEXT: s_add_i32 s0, s0, 12
; SI-NEXT: s_or_b32 s0, s0, 4
; SI-NEXT: s_and_b32 s0, s0, 0xff
-; SI-NEXT: s_or_b32 s0, s0, s1
+; SI-NEXT: s_or_b32 s0, s1, s0
; SI-NEXT: s_addk_i32 s0, 0x2c00
; SI-NEXT: s_or_b32 s0, s0, 0x300
; SI-NEXT: v_mov_b32_e32 v0, s0
define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, i32* nocapture readonly %b) {
; ARM-LABEL: cmp_and8_short_int:
; ARM: @ %bb.0: @ %entry
-; ARM-NEXT: ldrb r0, [r0]
; ARM-NEXT: ldrb r1, [r1]
-; ARM-NEXT: and r0, r1, r0
+; ARM-NEXT: ldrb r0, [r0]
+; ARM-NEXT: and r0, r0, r1
; ARM-NEXT: clz r0, r0
; ARM-NEXT: lsr r0, r0, #5
; ARM-NEXT: bx lr
;
; ARMEB-LABEL: cmp_and8_short_int:
; ARMEB: @ %bb.0: @ %entry
-; ARMEB-NEXT: ldrb r0, [r0, #1]
; ARMEB-NEXT: ldrb r1, [r1, #3]
-; ARMEB-NEXT: and r0, r1, r0
+; ARMEB-NEXT: ldrb r0, [r0, #1]
+; ARMEB-NEXT: and r0, r0, r1
; ARMEB-NEXT: clz r0, r0
; ARMEB-NEXT: lsr r0, r0, #5
; ARMEB-NEXT: bx lr
;
; THUMB1-LABEL: cmp_and8_short_int:
; THUMB1: @ %bb.0: @ %entry
-; THUMB1-NEXT: ldrb r0, [r0]
; THUMB1-NEXT: ldrb r1, [r1]
-; THUMB1-NEXT: ands r1, r0
-; THUMB1-NEXT: rsbs r0, r1, #0
-; THUMB1-NEXT: adcs r0, r1
+; THUMB1-NEXT: ldrb r2, [r0]
+; THUMB1-NEXT: ands r2, r1
+; THUMB1-NEXT: rsbs r0, r2, #0
+; THUMB1-NEXT: adcs r0, r2
; THUMB1-NEXT: bx lr
;
; THUMB2-LABEL: cmp_and8_short_int:
; THUMB2: @ %bb.0: @ %entry
-; THUMB2-NEXT: ldrb r0, [r0]
; THUMB2-NEXT: ldrb r1, [r1]
+; THUMB2-NEXT: ldrb r0, [r0]
; THUMB2-NEXT: ands r0, r1
; THUMB2-NEXT: clz r0, r0
; THUMB2-NEXT: lsrs r0, r0, #5
; ARM-LABEL: test6:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r0, [r0]
-; ARM-NEXT: and r0, r0, r1
+; ARM-NEXT: and r0, r1, r0
; ARM-NEXT: uxtb r1, r2
; ARM-NEXT: sub r0, r0, r1
; ARM-NEXT: clz r0, r0
; ARMEB-LABEL: test6:
; ARMEB: @ %bb.0: @ %entry
; ARMEB-NEXT: ldrb r0, [r0]
-; ARMEB-NEXT: and r0, r0, r1
+; ARMEB-NEXT: and r0, r1, r0
; ARMEB-NEXT: uxtb r1, r2
; ARMEB-NEXT: sub r0, r0, r1
; ARMEB-NEXT: clz r0, r0
; ARM-LABEL: test7:
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r0, [r0]
-; ARM-NEXT: and r0, r0, r1
+; ARM-NEXT: and r0, r1, r0
; ARM-NEXT: uxtb r1, r2
; ARM-NEXT: sub r0, r0, r1
; ARM-NEXT: clz r0, r0
; ARMEB-LABEL: test7:
; ARMEB: @ %bb.0: @ %entry
; ARMEB-NEXT: ldrb r0, [r0, #1]
-; ARMEB-NEXT: and r0, r0, r1
+; ARMEB-NEXT: and r0, r1, r0
; ARMEB-NEXT: uxtb r1, r2
; ARMEB-NEXT: sub r0, r0, r1
; ARMEB-NEXT: clz r0, r0
ret i64 %and
}
+define void @test27(i32* nocapture %ptr) {
; ARM-LABEL: test27:
-; ARM: @ %bb.0:
+; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r1, [r0, #1]
; ARM-NEXT: lsl r1, r1, #16
; ARM-NEXT: str r1, [r0]
; ARM-NEXT: bx lr
;
; ARMEB-LABEL: test27:
-; ARMEB: @ %bb.0:
-; ARMEB-NEXT: ldrb r1, [r0, #2]
-; ARMEB-NEXT: lsl r1, r1, #16
-; ARMEB-NEXT: str r1, [r0]
-; ARMEB-NEXT: bx lr
+; ARMEB: @ %bb.0: @ %entry
+; ARMEB-NEXT: ldrb r1, [r0, #2]
+; ARMEB-NEXT: lsl r1, r1, #16
+; ARMEB-NEXT: str r1, [r0]
+; ARMEB-NEXT: bx lr
;
; THUMB1-LABEL: test27:
-; THUMB1: @ %bb.0:
-; THUMB1-NEXT: ldrb r1, [r0, #1]
-; THUMB1-NEXT: lsls r1, r1, #16
-; THUMB1-NEXT: str r1, [r0]
-; THUMB1-NEXT: bx lr
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: ldrb r1, [r0, #1]
+; THUMB1-NEXT: lsls r1, r1, #16
+; THUMB1-NEXT: str r1, [r0]
+; THUMB1-NEXT: bx lr
;
; THUMB2-LABEL: test27:
-; THUMB2: @ %bb.0:
+; THUMB2: @ %bb.0: @ %entry
; THUMB2-NEXT: ldrb r1, [r0, #1]
; THUMB2-NEXT: lsls r1, r1, #16
; THUMB2-NEXT: str r1, [r0]
; THUMB2-NEXT: bx lr
-define void @test27(i32* nocapture %ptr) {
entry:
%0 = load i32, i32* %ptr, align 4
%and = and i32 %0, 65280
; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
-; CHECK: add r0, r0, r1
+; CHECK: add r0, r1, r0
; CHECK-NEXT: mov r1, #65280
; CHECK-NEXT: mov r2, #16711680
; CHECK-NEXT: ldr r0, [r0, #13]
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
-; CHECK-ARMv6: add r0, r0, r1
+; CHECK-ARMv6: add r0, r1, r0
; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
; CHECK-ARMv6-NEXT: rev r0, r0
; CHECK-ARMv6-NEXT: bx lr
; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
-; CHECK: add r0, r0, r1
+; CHECK: add r0, r1, r0
; CHECK-NEXT: ldr r0, [r0, #13]
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
-; CHECK-ARMv6: add r0, r0, r1
+; CHECK-ARMv6: add r0, r1, r0
; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
; CHECK-ARMv6-NEXT: bx lr
%tmp = add nuw nsw i32 %i, 4
; CHECK-NEXT: vn %v0, %v0, %v0
; CHECK-NEXT: vno %v2, %v2, %v2
; CHECK-NEXT: vceqg %v0, %v0, %v1
-; CHECK-NEXT: vx %v0, %v2, %v0
+; CHECK-NEXT: vx %v0, %v0, %v2
; CHECK-NEXT: vnc %v0, %v2, %v0
; CHECK-NEXT: vlgvf %r0, %v0, 1
; CHECK-NEXT: tmll %r0, 1
; CHECK-V6M-NEXT: adds r0, r1, r0
; CHECK-V6M-NEXT: bx lr
; CHECK-V6M-NEXT: .LBB0_5:
-; CHECK-V6M-NEXT: adds r0, r1, r0
+; CHECK-V6M-NEXT: adds r0, r0, r1
; CHECK-V6M-NEXT: adds r0, r0, #4
; CHECK-V6M-NEXT: .LBB0_6:
; CHECK-V6M-NEXT: bx lr
; CHECK-V6M-NEXT: .LBB0_7:
-; CHECK-V6M-NEXT: adds r0, r1, r0
+; CHECK-V6M-NEXT: adds r0, r0, r1
; CHECK-V6M-NEXT: adds r0, r0, #1
; CHECK-V6M-NEXT: bx lr
; CHECK-V6M-NEXT: .LBB0_8:
-; CHECK-V6M-NEXT: adds r0, r1, r0
+; CHECK-V6M-NEXT: adds r0, r0, r1
; CHECK-V6M-NEXT: adds r0, r0, #2
; CHECK-V6M-NEXT: bx lr
; CHECK-V6M-NEXT: .p2align 2
; NON-PIC-NEXT: i32.load $push4=, 0($pop3){{$}}
; NON-PIC-NEXT: return $pop4{{$}}
-; PIC-NEXT: global.get $push2=, g@GOT{{$}}
; PIC-NEXT: i32.const $push0=, 2{{$}}
; PIC-NEXT: i32.shl $push1=, $0, $pop0{{$}}
-; PIC-NEXT: i32.add $push3=, $pop2, $pop1{{$}}
+; PIC-NEXT: global.get $push2=, g@GOT{{$}}
+; PIC-NEXT: i32.add $push3=, $pop1, $pop2{{$}}
; PIC-NEXT: i32.const $push4=, -40{{$}}
; PIC-NEXT: i32.add $push5=, $pop3, $pop4{{$}}
; PIC-NEXT: i32.load $push6=, 0($pop5){{$}}
; CHECK-NEXT: .functype load_test12 (i32, i32) -> (i32){{$}}
; CHECK-NEXT: i32.const $push0=, 2{{$}}
; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}}
-; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}}
+; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}}
; CHECK-NEXT: i32.const $push3=, 40{{$}}
; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}}
; CHECK-NEXT: .functype load_test13 (i32, i32) -> (i32){{$}}
; CHECK-NEXT: i32.const $push0=, 2{{$}}
; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}}
-; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}}
+; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}}
; CHECK-NEXT: i32.const $push3=, 40{{$}}
; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}}
; CHECK-NEXT: .functype load_test17 (i32, i32) -> (i32){{$}}
; CHECK-NEXT: i32.const $push0=, 2{{$}}
; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}}
-; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}}
+; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}}
; CHECK-NEXT: i32.const $push3=, 40{{$}}
; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}}
; CHECK-NEXT: .functype load_test19 (i32, i32) -> (i32){{$}}
; CHECK-NEXT: i32.const $push0=, 2{{$}}
; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}}
-; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}}
+; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}}
; CHECK-NEXT: i32.const $push3=, 40{{$}}
; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}}
; CHECK-NEXT: .functype load_test21 (i32, i32) -> (i32){{$}}
; CHECK-NEXT: i32.const $push0=, 2{{$}}
; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}}
-; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}}
+; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}}
; CHECK-NEXT: i32.const $push3=, -40{{$}}
; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}}
; NON-PIC-NEXT: i32.const $push2=, g-40{{$}}
; NON-PIC-NEXT: i32.add $push3=, $pop1, $pop2{{$}}
; NON-PIC-NEXT: i32.store 0($pop3), $1{{$}}
-; PIC-NEXT: global.get $push2=, g@GOT{{$}}
; PIC-NEXT: i32.const $push0=, 2{{$}}
; PIC-NEXT: i32.shl $push1=, $0, $pop0{{$}}
-; PIC-NEXT: i32.add $push3=, $pop2, $pop1{{$}}
+; PIC-NEXT: global.get $push2=, g@GOT{{$}}
+; PIC-NEXT: i32.add $push3=, $pop1, $pop2{{$}}
; PIC-NEXT: i32.const $push4=, -40{{$}}
; PIC-NEXT: i32.add $push5=, $pop3, $pop4{{$}}
; PIC-NEXT: i32.store 0($pop5), $1{{$}}
; CHECK-NEXT: .functype store_test12 (i32, i32, i32) -> (){{$}}
; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
-; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
+; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}}
; NON-PIC-NEXT: i32.const $push3=, 40{{$}}
; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
; CHECK-NEXT: .functype store_test13 (i32, i32, i32) -> (){{$}}
; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
-; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
+; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}}
; NON-PIC-NEXT: i32.const $push3=, 40{{$}}
; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
; CHECK-NEXT: .functype store_test17 (i32, i32, i32) -> (){{$}}
; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
-; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
+; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}}
; NON-PIC-NEXT: i32.const $push3=, 40{{$}}
; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
; CHECK-NEXT: .functype store_test19 (i32, i32, i32) -> (){{$}}
; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
-; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
+; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}}
; NON-PIC-NEXT: i32.const $push3=, 40{{$}}
; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
; CHECK-NEXT: .functype store_test21 (i32, i32, i32) -> (){{$}}
; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
-; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
+; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}}
; NON-PIC-NEXT: i32.const $push3=, -40{{$}}
; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
; CHECK-LABEL: add_nsw_sext_add:
; CHECK: # %bb.0:
; CHECK-NEXT: movslq %edi, %rax
-; CHECK-NEXT: leaq 5(%rsi,%rax), %rax
+; CHECK-NEXT: leaq 5(%rax,%rsi), %rax
; CHECK-NEXT: retq
%add = add nsw i32 %i, 5
; CHECK-LABEL: gep8:
; CHECK: # %bb.0:
; CHECK-NEXT: movslq %edi, %rax
-; CHECK-NEXT: leaq 5(%rsi,%rax), %rax
+; CHECK-NEXT: leaq 5(%rax,%rsi), %rax
; CHECK-NEXT: retq
%add = add nsw i32 %i, 5
; CHECK: # %bb.0:
; CHECK-NEXT: movslq %edi, %rax
; CHECK-NEXT: shlq $4, %rax
-; CHECK-NEXT: leaq 80(%rsi,%rax), %rax
+; CHECK-NEXT: leaq 80(%rax,%rsi), %rax
; CHECK-NEXT: retq
%add = add nsw i32 %i, 5
; The same as @PR20134 but sign extension is replaced with zero extension
define void @PR20134_zext(i32* %a, i32 %i) {
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx
-; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx
-; CHECK-NEXT: movl %ecx, (%rdi,%rax,4)
-; CHECK-NEXT: retq
+; CHECK-LABEL: PR20134_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx
+; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx
+; CHECK-NEXT: movl %ecx, (%rdi,%rax,4)
+; CHECK-NEXT: retq
%add1 = add nuw i32 %i, 1
%idx1 = zext i32 %add1 to i64
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: imull $400, %ecx, %edx # imm = 0x190
-; CHECK-NEXT: leal (%eax,%edx), %esi
+; CHECK-NEXT: leal (%edx,%eax), %esi
; CHECK-NEXT: movl $11, 2020(%esi,%ecx,4)
-; CHECK-NEXT: movl $22, 2080(%eax,%edx)
-; CHECK-NEXT: movl $33, 10080(%eax,%edx)
+; CHECK-NEXT: movl $22, 2080(%edx,%eax)
+; CHECK-NEXT: movl $33, 10080(%edx,%eax)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
entry:
; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK64: # %bb.0:
; CHECK64-NEXT: movl %esi, %eax
-; CHECK64-NEXT: movl 13(%rdi,%rax), %eax
+; CHECK64-NEXT: movl 13(%rax,%rdi), %eax
; CHECK64-NEXT: retq
%tmp = add nuw nsw i32 %i, 4
%tmp2 = add nuw nsw i32 %i, 3
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl 12(%eax,%ecx), %eax
+; CHECK-NEXT: movl 12(%ecx,%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl 12(%eax,%ecx), %eax
+; CHECK-NEXT: movl 12(%ecx,%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
; GENERIC-NEXT: movzbl 2(%r8,%rbx,4), %ebx
; GENERIC-NEXT: shll $16, %ebx
; GENERIC-NEXT: orl %eax, %ebx
-; GENERIC-NEXT: xorl 16(%rdx,%rcx), %ebx
+; GENERIC-NEXT: xorl 16(%rcx,%rdx), %ebx
; GENERIC-NEXT: shrl $8, %edi
; GENERIC-NEXT: movzbl 3(%r9,%rdi,4), %eax
; GENERIC-NEXT: shll $24, %eax
; GENERIC-NEXT: movzbl 2(%r8,%rdi,4), %edi
; GENERIC-NEXT: shll $16, %edi
; GENERIC-NEXT: orl %eax, %edi
-; GENERIC-NEXT: xorl 20(%rdx,%rcx), %edi
+; GENERIC-NEXT: xorl 20(%rcx,%rdx), %edi
; GENERIC-NEXT: movl %ebx, %eax
; GENERIC-NEXT: shrl $24, %eax
; GENERIC-NEXT: movb %al, (%rsi)
; ATOM-NEXT: shll $16, %eax
; ATOM-NEXT: orl %edi, %ebp
; ATOM-NEXT: orl %r15d, %eax
-; ATOM-NEXT: xorl 20(%rdx,%rcx), %ebp
-; ATOM-NEXT: xorl 16(%rdx,%rcx), %eax
+; ATOM-NEXT: xorl 20(%rcx,%rdx), %ebp
+; ATOM-NEXT: xorl 16(%rcx,%rdx), %eax
; ATOM-NEXT: movl %eax, %edi
; ATOM-NEXT: shrl $16, %eax
; ATOM-NEXT: shrl $24, %edi
define void @indexed_store_merge(i64 %p, i8* %v) {
; CHECK-LABEL: indexed_store_merge:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl $0, 2(%rsi,%rdi)
+; CHECK-NEXT: movl $0, 2(%rdi,%rsi)
; CHECK-NEXT: movb $0, (%rsi)
; CHECK-NEXT: retq
entry:
; SSE2-NEXT: movdqu (%rdi), %xmm0
; SSE2-NEXT: movdqu (%rsi), %xmm1
; SSE2-NEXT: psadbw %xmm0, %xmm1
+; SSE2-NEXT: movdqu (%rdx), %xmm0
+; SSE2-NEXT: movdqu (%rcx), %xmm2
+; SSE2-NEXT: psadbw %xmm0, %xmm2
; SSE2-NEXT: movl $1, %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: paddd %xmm1, %xmm0
-; SSE2-NEXT: movdqu (%rdx), %xmm1
-; SSE2-NEXT: movdqu (%rcx), %xmm2
-; SSE2-NEXT: psadbw %xmm1, %xmm2
-; SSE2-NEXT: paddd %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
; SSE2-NEXT: paddd %xmm2, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE2-NEXT: paddd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSE2-NEXT: paddd %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: paddd %xmm1, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: sad_unroll_nonzero_initial:
; AVX1-NEXT: vpsadbw (%rcx), %xmm1, %xmm1
; AVX1-NEXT: movl $1, %eax
; AVX1-NEXT: vmovd %eax, %xmm2
-; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; AVX2: # %bb.0: # %bb
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
; AVX2-NEXT: vpsadbw (%rsi), %xmm0, %xmm0
-; AVX2-NEXT: movl $1, %eax
-; AVX2-NEXT: vmovd %eax, %xmm1
-; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vmovdqu (%rdx), %xmm1
; AVX2-NEXT: vpsadbw (%rcx), %xmm1, %xmm1
-; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: movl $1, %eax
+; AVX2-NEXT: vmovd %eax, %xmm2
+; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX512: # %bb.0: # %bb
; AVX512-NEXT: vmovdqu (%rdi), %xmm0
; AVX512-NEXT: vpsadbw (%rsi), %xmm0, %xmm0
-; AVX512-NEXT: movl $1, %eax
-; AVX512-NEXT: vmovd %eax, %xmm1
-; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovdqu (%rdx), %xmm1
; AVX512-NEXT: vpsadbw (%rcx), %xmm1, %xmm1
-; AVX512-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: movl $1, %eax
+; AVX512-NEXT: vmovd %eax, %xmm2
+; AVX512-NEXT: vpaddd %zmm2, %zmm1, %zmm1
+; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
;
; AVX2-LABEL: zext_and_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vandps %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq
%xz = zext <8 x i8> %x to <8 x i16>