SmallSet<LoadSDNode *, 8> Loads;
LoadSDNode *FirstLoad = nullptr;
+ int64_t FirstOffset = INT64_MAX;
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
auto ByteAt = IsBigEndianTarget ? BigEndianByteAt : LittleEndianByteAt;
ByteOffsets[i] = ByteOffsetFromBase;
// Remember the first byte load
- if (ByteOffsetFromBase == 0)
+ if (ByteOffsetFromBase < FirstOffset) {
FirstLoad = L;
+ FirstOffset = ByteOffsetFromBase;
+ }
Loads.insert(L);
}
assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value");
assert(Base && "Base address of the accessed memory location must be set");
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
bool BigEndian = true, LittleEndian = true;
for (unsigned i = 0; i < ByteWidth; i++) {
- LittleEndian &= ByteOffsets[i] == LittleEndianByteAt(ByteWidth, i);
- BigEndian &= ByteOffsets[i] == BigEndianByteAt(ByteWidth, i);
+ int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
+ LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
+ BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
if (!BigEndian && !LittleEndian)
return SDValue();
}
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
-; CHECK: ldrb w8, [x0, #1]
-; CHECK-NEXT: ldrb w9, [x0, #2]
-; CHECK-NEXT: ldrb w10, [x0, #3]
-; CHECK-NEXT: ldrb w11, [x0, #4]
-; CHECK-NEXT: bfi w8, w9, #8, #8
-; CHECK-NEXT: bfi w8, w10, #16, #8
-; CHECK-NEXT: bfi w8, w11, #24, #8
-; CHECK-NEXT: mov w0, w8
+; CHECK: ldur w8, [x0, #1]
+; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret
+
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 4
; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset:
-; CHECK: ldurb w8, [x0, #-4]
-; CHECK-NEXT: ldurb w9, [x0, #-3]
-; CHECK-NEXT: ldurb w10, [x0, #-2]
-; CHECK-NEXT: ldurb w11, [x0, #-1]
-; CHECK-NEXT: bfi w8, w9, #8, #8
-; CHECK-NEXT: bfi w8, w10, #16, #8
-; CHECK-NEXT: bfi w8, w11, #24, #8
-; CHECK-NEXT: mov w0, w8
+; CHECK: ldur w8, [x0, #-4]
+; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret
+
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
%tmp2 = load i8, i8* %tmp1, align 4
; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
-; CHECK: ldrb w8, [x0, #4]
-; CHECK-NEXT: ldrb w9, [x0, #3]
-; CHECK-NEXT: ldrb w10, [x0, #2]
-; CHECK-NEXT: ldrb w11, [x0, #1]
-; CHECK-NEXT: bfi w8, w9, #8, #8
-; CHECK-NEXT: bfi w8, w10, #16, #8
-; CHECK-NEXT: bfi w8, w11, #24, #8
-; CHECK-NEXT: mov w0, w8
+; CHECK: ldur w0, [x0, #1]
; CHECK-NEXT: ret
+
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
%tmp2 = load i8, i8* %tmp1, align 1
; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
-; CHECK: ldurb w8, [x0, #-1]
-; CHECK-NEXT: ldurb w9, [x0, #-2]
-; CHECK-NEXT: ldurb w10, [x0, #-3]
-; CHECK-NEXT: ldurb w11, [x0, #-4]
-; CHECK-NEXT: bfi w8, w9, #8, #8
-; CHECK-NEXT: bfi w8, w10, #16, #8
-; CHECK-NEXT: bfi w8, w11, #24, #8
-; CHECK-NEXT: mov w0, w8
+; CHECK: ldur w0, [x0, #-4]
; CHECK-NEXT: ret
+
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
%tmp2 = load i8, i8* %tmp1, align 1
define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK: add x8, x0, w1, uxtw
-; CHECK-NEXT: ldrb w0, [x8, #13]
-; CHECK-NEXT: ldrb w9, [x8, #14]
-; CHECK-NEXT: ldrb w10, [x8, #15]
-; CHECK-NEXT: ldrb w8, [x8, #16]
-; CHECK-NEXT: bfi w0, w9, #8, #8
-; CHECK-NEXT: bfi w0, w10, #16, #8
-; CHECK-NEXT: bfi w0, w8, #24, #8
+; CHECK-NEXT: ldur w8, [x8, #13]
+; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret
%tmp = add nuw nsw i32 %i, 4
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
-; CHECK: ldrb w8, [x0, #1]
-; CHECK-NEXT: ldrb w9, [x0, #2]
-; CHECK-NEXT: ldrb w10, [x0, #3]
-; CHECK-NEXT: ldrb w11, [x0, #4]
-; CHECK-NEXT: bfi w8, w9, #8, #8
-; CHECK-NEXT: bfi w8, w10, #16, #8
-; CHECK-NEXT: bfi w8, w11, #24, #8
-; CHECK-NEXT: mov w0, w8
+; CHECK: ldur w0, [x0, #1]
; CHECK-NEXT: ret
+
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
%tmp2 = load i8, i8* %tmp1, align 4
; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset:
-; CHECK: ldurb w8, [x0, #-4]
-; CHECK-NEXT: ldurb w9, [x0, #-3]
-; CHECK-NEXT: ldurb w10, [x0, #-2]
-; CHECK-NEXT: ldurb w11, [x0, #-1]
-; CHECK-NEXT: bfi w8, w9, #8, #8
-; CHECK-NEXT: bfi w8, w10, #16, #8
-; CHECK-NEXT: bfi w8, w11, #24, #8
-; CHECK-NEXT: mov w0, w8
+; CHECK: ldur w0, [x0, #-4]
; CHECK-NEXT: ret
+
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
%tmp2 = load i8, i8* %tmp1, align 4
; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
-; CHECK: ldrb w8, [x0, #4]
-; CHECK-NEXT: ldrb w9, [x0, #3]
-; CHECK-NEXT: ldrb w10, [x0, #2]
-; CHECK-NEXT: ldrb w11, [x0, #1]
-; CHECK-NEXT: bfi w8, w9, #8, #8
-; CHECK-NEXT: bfi w8, w10, #16, #8
-; CHECK-NEXT: bfi w8, w11, #24, #8
-; CHECK-NEXT: mov w0, w8
+; CHECK: ldur w8, [x0, #1]
+; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret
+
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
%tmp2 = load i8, i8* %tmp1, align 1
; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
-; CHECK: ldurb w8, [x0, #-1]
-; CHECK-NEXT: ldurb w9, [x0, #-2]
-; CHECK-NEXT: ldurb w10, [x0, #-3]
-; CHECK-NEXT: ldurb w11, [x0, #-4]
-; CHECK-NEXT: bfi w8, w9, #8, #8
-; CHECK-NEXT: bfi w8, w10, #16, #8
-; CHECK-NEXT: bfi w8, w11, #24, #8
-; CHECK-NEXT: mov w0, w8
+; CHECK: ldur w8, [x0, #-4]
+; CHECK-NEXT: rev w0, w8
; CHECK-NEXT: ret
+
%tmp = bitcast i32* %arg to i8*
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
%tmp2 = load i8, i8* %tmp1, align 1
define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK: add x8, x0, w1, uxtw
-; CHECK-NEXT: ldrb w0, [x8, #13]
-; CHECK-NEXT: ldrb w9, [x8, #14]
-; CHECK-NEXT: ldrb w10, [x8, #15]
-; CHECK-NEXT: ldrb w8, [x8, #16]
-; CHECK-NEXT: bfi w0, w9, #8, #8
-; CHECK-NEXT: bfi w0, w10, #16, #8
-; CHECK-NEXT: bfi w0, w8, #24, #8
+; CHECK-NEXT: ldur w0, [x8, #13]
; CHECK-NEXT: ret
%tmp = add nuw nsw i32 %i, 4
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
-; CHECK: ldrb r1, [r0, #1]
-; CHECK-NEXT: ldrb r2, [r0, #2]
-; CHECK-NEXT: ldrb r3, [r0, #3]
-; CHECK-NEXT: ldrb r0, [r0, #4]
-; CHECK-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-NEXT: orr r0, r1, r0, lsl #24
+; CHECK: ldr r0, [r0, #1]
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov pc, lr
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
-; CHECK-ARMv6: ldrb r1, [r0, #1]
-; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
-; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
-; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
-; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-ARMv6: ldr r0, [r0, #1]
+; CHECK-ARMv6-NEXT: rev r0, r0
; CHECK-ARMv6-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset:
-; CHECK: ldrb r1, [r0, #-4]
-; CHECK-NEXT: ldrb r2, [r0, #-3]
-; CHECK-NEXT: ldrb r3, [r0, #-2]
-; CHECK-NEXT: ldrb r0, [r0, #-1]
-; CHECK-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-NEXT: orr r0, r1, r0, lsl #24
+; CHECK: ldr r0, [r0, #-4]
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov pc, lr
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
-; CHECK-ARMv6: ldrb r1, [r0, #-4]
-; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
-; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
-; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
-; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-ARMv6: ldr r0, [r0, #-4]
+; CHECK-ARMv6-NEXT: rev r0, r0
; CHECK-ARMv6-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
-; CHECK: ldrb r1, [r0, #1]
-; CHECK-NEXT: ldrb r2, [r0, #2]
-; CHECK-NEXT: ldrb r3, [r0, #3]
-; CHECK-NEXT: ldrb r0, [r0, #4]
-; CHECK-NEXT: orr r0, r0, r3, lsl #8
-; CHECK-NEXT: orr r0, r0, r2, lsl #16
-; CHECK-NEXT: orr r0, r0, r1, lsl #24
+; CHECK: ldr r0, [r0, #1]
; CHECK-NEXT: mov pc, lr
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
-; CHECK-ARMv6: ldrb r1, [r0, #1]
-; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
-; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
-; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
-; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
-; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
-; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-ARMv6: ldr r0, [r0, #1]
; CHECK-ARMv6-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
-; CHECK: ldrb r1, [r0, #-4]
-; CHECK-NEXT: ldrb r2, [r0, #-3]
-; CHECK-NEXT: ldrb r3, [r0, #-2]
-; CHECK-NEXT: ldrb r0, [r0, #-1]
-; CHECK-NEXT: orr r0, r0, r3, lsl #8
-; CHECK-NEXT: orr r0, r0, r2, lsl #16
-; CHECK-NEXT: orr r0, r0, r1, lsl #24
+; CHECK: ldr r0, [r0, #-4]
; CHECK-NEXT: mov pc, lr
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
-; CHECK-ARMv6: ldrb r1, [r0, #-4]
-; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
-; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
-; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
-; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
-; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
-; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-ARMv6: ldr r0, [r0, #-4]
; CHECK-ARMv6-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK: add r0, r0, r1
-; CHECK-NEXT: ldrb r1, [r0, #13]
-; CHECK-NEXT: ldrb r2, [r0, #14]
-; CHECK-NEXT: ldrb r3, [r0, #15]
-; CHECK-NEXT: ldrb r0, [r0, #16]
-; CHECK-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: ldr r0, [r0, #13]
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK-ARMv6: add r0, r0, r1
-; CHECK-ARMv6-NEXT: ldrb r1, [r0, #13]
-; CHECK-ARMv6-NEXT: ldrb r2, [r0, #14]
-; CHECK-ARMv6-NEXT: ldrb r3, [r0, #15]
-; CHECK-ARMv6-NEXT: ldrb r0, [r0, #16]
-; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
+; CHECK-ARMv6-NEXT: rev r0, r0
; CHECK-ARMv6-NEXT: bx lr
%tmp = add nuw nsw i32 %i, 4
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
-; CHECK: ldrb r1, [r0, #1]
-; CHECK-NEXT: ldrb r2, [r0, #2]
-; CHECK-NEXT: ldrb r3, [r0, #3]
-; CHECK-NEXT: ldrb r0, [r0, #4]
-; CHECK-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-NEXT: orr r0, r1, r0, lsl #24
+; CHECK: ldr r0, [r0, #1]
; CHECK-NEXT: mov pc, lr
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
-; CHECK-ARMv6: ldrb r1, [r0, #1]
-; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
-; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
-; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
-; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-ARMv6: ldr r0, [r0, #1]
; CHECK-ARMv6-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset:
-; CHECK: ldrb r1, [r0, #-4]
-; CHECK-NEXT: ldrb r2, [r0, #-3]
-; CHECK-NEXT: ldrb r3, [r0, #-2]
-; CHECK-NEXT: ldrb r0, [r0, #-1]
-; CHECK-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-NEXT: orr r0, r1, r0, lsl #24
+; CHECK: ldr r0, [r0, #-4]
; CHECK-NEXT: mov pc, lr
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
-; CHECK-ARMv6: ldrb r1, [r0, #-4]
-; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
-; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
-; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
-; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-ARMv6: ldr r0, [r0, #-4]
; CHECK-ARMv6-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
-; CHECK: ldrb r1, [r0, #1]
-; CHECK-NEXT: ldrb r2, [r0, #2]
-; CHECK-NEXT: ldrb r3, [r0, #3]
-; CHECK-NEXT: ldrb r0, [r0, #4]
-; CHECK-NEXT: orr r0, r0, r3, lsl #8
-; CHECK-NEXT: orr r0, r0, r2, lsl #16
-; CHECK-NEXT: orr r0, r0, r1, lsl #24
+; CHECK: ldr r0, [r0, #1]
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov pc, lr
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
-; CHECK-ARMv6: ldrb r1, [r0, #1]
-; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
-; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
-; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
-; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
-; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
-; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-ARMv6: ldr r0, [r0, #1]
+; CHECK-ARMv6-NEXT: rev r0, r0
; CHECK-ARMv6-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
-; CHECK: ldrb r1, [r0, #-4]
-; CHECK-NEXT: ldrb r2, [r0, #-3]
-; CHECK-NEXT: ldrb r3, [r0, #-2]
-; CHECK-NEXT: ldrb r0, [r0, #-1]
-; CHECK-NEXT: orr r0, r0, r3, lsl #8
-; CHECK-NEXT: orr r0, r0, r2, lsl #16
-; CHECK-NEXT: orr r0, r0, r1, lsl #24
+; CHECK: ldr r0, [r0, #-4]
+; CHECK-NEXT: mov r1, #65280
+; CHECK-NEXT: mov r2, #16711680
+; CHECK-NEXT: and r1, r1, r0, lsr #8
+; CHECK-NEXT: and r2, r2, r0, lsl #8
+; CHECK-NEXT: orr r1, r1, r0, lsr #24
+; CHECK-NEXT: orr r0, r2, r0, lsl #24
+; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: mov pc, lr
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
-; CHECK-ARMv6: ldrb r1, [r0, #-4]
-; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
-; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
-; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
-; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
-; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
-; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
+; CHECK-ARMv6: ldr r0, [r0, #-4]
+; CHECK-ARMv6-NEXT: rev r0, r0
; CHECK-ARMv6-NEXT: bx lr
%tmp = bitcast i32* %arg to i8*
define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK: add r0, r0, r1
-; CHECK-NEXT: ldrb r1, [r0, #13]
-; CHECK-NEXT: ldrb r2, [r0, #14]
-; CHECK-NEXT: ldrb r3, [r0, #15]
-; CHECK-NEXT: ldrb r0, [r0, #16]
-; CHECK-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-NEXT: ldr r0, [r0, #13]
; CHECK-NEXT: mov pc, lr
;
; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK-ARMv6: add r0, r0, r1
-; CHECK-ARMv6-NEXT: ldrb r1, [r0, #13]
-; CHECK-ARMv6-NEXT: ldrb r2, [r0, #14]
-; CHECK-ARMv6-NEXT: ldrb r3, [r0, #15]
-; CHECK-ARMv6-NEXT: ldrb r0, [r0, #16]
-; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
-; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
-; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
+; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
; CHECK-ARMv6-NEXT: bx lr
%tmp = add nuw nsw i32 %i, 4
ret i32 %tmp19
}
-; Non-zero offsets are not supported for now
; i8* p;
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movzbl 1(%eax), %ecx
-; CHECK-NEXT: movzbl 2(%eax), %edx
-; CHECK-NEXT: shll $8, %edx
-; CHECK-NEXT: orl %ecx, %edx
-; CHECK-NEXT: movzbl 3(%eax), %ecx
-; CHECK-NEXT: shll $16, %ecx
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movzbl 4(%eax), %eax
-; CHECK-NEXT: shll $24, %eax
-; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: movl 1(%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
; CHECK64: # BB#0:
-; CHECK64-NEXT: movzbl 1(%rdi), %eax
-; CHECK64-NEXT: movzbl 2(%rdi), %ecx
-; CHECK64-NEXT: shll $8, %ecx
-; CHECK64-NEXT: orl %eax, %ecx
-; CHECK64-NEXT: movzbl 3(%rdi), %edx
-; CHECK64-NEXT: shll $16, %edx
-; CHECK64-NEXT: orl %ecx, %edx
-; CHECK64-NEXT: movzbl 4(%rdi), %eax
-; CHECK64-NEXT: shll $24, %eax
-; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: movl 1(%rdi), %eax
; CHECK64-NEXT: retq
%tmp = bitcast i32* %arg to i8*
; CHECK-LABEL: load_i32_by_i8_neg_offset:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movzbl -4(%eax), %ecx
-; CHECK-NEXT: movzbl -3(%eax), %edx
-; CHECK-NEXT: shll $8, %edx
-; CHECK-NEXT: orl %ecx, %edx
-; CHECK-NEXT: movzbl -2(%eax), %ecx
-; CHECK-NEXT: shll $16, %ecx
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movzbl -1(%eax), %eax
-; CHECK-NEXT: shll $24, %eax
-; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: movl -4(%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_neg_offset:
; CHECK64: # BB#0:
-; CHECK64-NEXT: movzbl -4(%rdi), %eax
-; CHECK64-NEXT: movzbl -3(%rdi), %ecx
-; CHECK64-NEXT: shll $8, %ecx
-; CHECK64-NEXT: orl %eax, %ecx
-; CHECK64-NEXT: movzbl -2(%rdi), %edx
-; CHECK64-NEXT: shll $16, %edx
-; CHECK64-NEXT: orl %ecx, %edx
-; CHECK64-NEXT: movzbl -1(%rdi), %eax
-; CHECK64-NEXT: shll $24, %eax
-; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: movl -4(%rdi), %eax
; CHECK64-NEXT: retq
%tmp = bitcast i32* %arg to i8*
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movzbl 4(%eax), %ecx
-; CHECK-NEXT: movzbl 3(%eax), %edx
-; CHECK-NEXT: shll $8, %edx
-; CHECK-NEXT: orl %ecx, %edx
-; CHECK-NEXT: movzbl 2(%eax), %ecx
-; CHECK-NEXT: shll $16, %ecx
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movzbl 1(%eax), %eax
-; CHECK-NEXT: shll $24, %eax
-; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: movl 1(%eax), %eax
+; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
; CHECK64: # BB#0:
-; CHECK64-NEXT: movzbl 4(%rdi), %eax
-; CHECK64-NEXT: movzbl 3(%rdi), %ecx
-; CHECK64-NEXT: shll $8, %ecx
-; CHECK64-NEXT: orl %eax, %ecx
-; CHECK64-NEXT: movzbl 2(%rdi), %edx
-; CHECK64-NEXT: shll $16, %edx
-; CHECK64-NEXT: orl %ecx, %edx
-; CHECK64-NEXT: movzbl 1(%rdi), %eax
-; CHECK64-NEXT: shll $24, %eax
-; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: movl 1(%rdi), %eax
+; CHECK64-NEXT: bswapl %eax
; CHECK64-NEXT: retq
%tmp = bitcast i32* %arg to i8*
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movzbl -1(%eax), %ecx
-; CHECK-NEXT: movzbl -2(%eax), %edx
-; CHECK-NEXT: shll $8, %edx
-; CHECK-NEXT: orl %ecx, %edx
-; CHECK-NEXT: movzbl -3(%eax), %ecx
-; CHECK-NEXT: shll $16, %ecx
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movzbl -4(%eax), %eax
-; CHECK-NEXT: shll $24, %eax
-; CHECK-NEXT: orl %ecx, %eax
+; CHECK-NEXT: movl -4(%eax), %eax
+; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_neg_offset_bswap:
; CHECK64: # BB#0:
-; CHECK64-NEXT: movzbl -1(%rdi), %eax
-; CHECK64-NEXT: movzbl -2(%rdi), %ecx
-; CHECK64-NEXT: shll $8, %ecx
-; CHECK64-NEXT: orl %eax, %ecx
-; CHECK64-NEXT: movzbl -3(%rdi), %edx
-; CHECK64-NEXT: shll $16, %edx
-; CHECK64-NEXT: orl %ecx, %edx
-; CHECK64-NEXT: movzbl -4(%rdi), %eax
-; CHECK64-NEXT: shll $24, %eax
-; CHECK64-NEXT: orl %edx, %eax
+; CHECK64-NEXT: movl -4(%rdi), %eax
+; CHECK64-NEXT: bswapl %eax
; CHECK64-NEXT: retq
%tmp = bitcast i32* %arg to i8*
; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
; CHECK64: # BB#0:
; CHECK64-NEXT: movl %esi, %eax
-; CHECK64-NEXT: movzbl 13(%rdi,%rax), %ecx
-; CHECK64-NEXT: movzbl 14(%rdi,%rax), %edx
-; CHECK64-NEXT: shll $8, %edx
-; CHECK64-NEXT: orl %ecx, %edx
-; CHECK64-NEXT: movzbl 15(%rdi,%rax), %ecx
-; CHECK64-NEXT: shll $16, %ecx
-; CHECK64-NEXT: orl %edx, %ecx
-; CHECK64-NEXT: movzbl 16(%rdi,%rax), %eax
-; CHECK64-NEXT: shll $24, %eax
-; CHECK64-NEXT: orl %ecx, %eax
+; CHECK64-NEXT: movl 13(%rdi,%rax), %eax
; CHECK64-NEXT: retq
%tmp = add nuw nsw i32 %i, 4
%tmp2 = add nuw nsw i32 %i, 3