From: Artur Pilipenko Date: Thu, 9 Feb 2017 15:13:40 +0000 (+0000) Subject: Add DAGCombiner load combine tests for partially available values X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2105006c6e66e05becaa325932d4eab21488c51c;p=llvm Add DAGCombiner load combine tests for partially available values If some of the trailing or leading bytes of a load combine pattern are zeroes we can combine the pattern to a load + zext and shift. Currently we don't support it, so the tests check the current codegen without load combine. This change will make the patch to support this kind of combine a bit more clear. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294591 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/AArch64/load-combine-big-endian.ll b/test/CodeGen/AArch64/load-combine-big-endian.ll index 53110b7e578..8e533b1fbea 100644 --- a/test/CodeGen/AArch64/load-combine-big-endian.ll +++ b/test/CodeGen/AArch64/load-combine-big-endian.ll @@ -429,3 +429,137 @@ define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { %tmp48 = or i32 %tmp42, %tmp47 ret i32 %tmp48 } +; i8* p; // p is 2 byte aligned +; (i32) p[0] | ((i32) p[1] << 8) +define i32 @zext_load_i32_by_i8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8: +; CHECK: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x0, #1] +; CHECK-NEXT: bfi w8, w9, #8, #8 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 8) | ((i32) p[1] << 16) +define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x0, #1] +; CHECK-NEXT: lsl w0, w8, #8 +; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 16) | ((i32) p[1] << 24) +define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x0, #1] +; CHECK-NEXT: lsl w0, w8, #16 +; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} +; i8* p; // p is 2 byte aligned +; (i32) p[1] | ((i32) p[0] << 8) +define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap: +; CHECK: ldrb w8, [x0, #1] +; CHECK-NEXT: ldrb w9, [x0] +; CHECK-NEXT: bfi w8, w9, #8, #8 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 8) | ((i32) p[0] << 16) +define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK: ldrb w8, [x0, #1] +; CHECK-NEXT: ldrb w9, [x0] +; CHECK-NEXT: lsl w0, w8, #8 +; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 16) | ((i32) p[0] << 24) +define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK: ldrb w8, [x0, #1] +; CHECK-NEXT: ldrb w9, [x0] +; CHECK-NEXT: lsl w0, w8, #16 +; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} diff --git a/test/CodeGen/AArch64/load-combine.ll b/test/CodeGen/AArch64/load-combine.ll index 04de21f3a17..59622fc3e0a 100644 --- a/test/CodeGen/AArch64/load-combine.ll +++ b/test/CodeGen/AArch64/load-combine.ll @@ -415,4 +415,139 @@ define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { %tmp47 = shl nuw i32 %tmp46, 24 %tmp48 = or i32 %tmp42, %tmp47 ret i32 %tmp48 -} \ No newline at end of file +} + +; i8* p; // p is 2 byte aligned +; (i32) p[0] | ((i32) p[1] << 8) +define i32 @zext_load_i32_by_i8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8: +; CHECK: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x0, #1] +; CHECK-NEXT: bfi w8, w9, #8, #8 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 8) | ((i32) p[1] << 16) +define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x0, #1] +; CHECK-NEXT: lsl w0, w8, #8 +; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 16) | ((i32) p[1] << 24) +define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x0, #1] +; CHECK-NEXT: lsl w0, w8, #16 +; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} +; i8* p; // p is 2 byte aligned +; (i32) p[1] | ((i32) p[0] << 8) +define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap: +; CHECK: ldrb w8, [x0, #1] +; CHECK-NEXT: ldrb w9, [x0] +; CHECK-NEXT: bfi w8, w9, #8, #8 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 8) | ((i32) p[0] << 16) +define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK: ldrb w8, [x0, #1] +; CHECK-NEXT: ldrb w9, [x0] +; CHECK-NEXT: lsl w0, w8, #8 +; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 16) | ((i32) p[0] << 24) +define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK: ldrb w8, [x0, #1] +; CHECK-NEXT: ldrb w9, [x0] +; CHECK-NEXT: lsl w0, w8, #16 +; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: ret + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} diff --git a/test/CodeGen/ARM/load-combine-big-endian.ll b/test/CodeGen/ARM/load-combine-big-endian.ll index baf565757e8..047c732183e 100644 --- a/test/CodeGen/ARM/load-combine-big-endian.ll +++ b/test/CodeGen/ARM/load-combine-big-endian.ll @@ -578,4 +578,178 @@ define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { %tmp47 = shl nuw i32 %tmp46, 24 %tmp48 = or i32 %tmp42, %tmp47 ret i32 %tmp48 -} \ No newline at end of file +} + +; i8* p; // p is 2 byte aligned +; (i32) p[0] | ((i32) p[1] << 8) +define i32 @zext_load_i32_by_i8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 8) | ((i32) p[1] << 16) +define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r0, r0, #16 +; CHECK-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r0, r0, #16 +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 16) | ((i32) p[1] << 24) +define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r1, lsl #16 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r0, r0, #24 +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #16 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; (i32) p[1] | ((i32) p[0] << 8) +define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 8) | ((i32) p[0] << 16) +define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r1, r1, #16 +; CHECK-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r1, r1, #16 +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 16) | ((i32) p[0] << 24) +define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r1, r1, #24 +; CHECK-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r1, r1, #24 +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} diff --git a/test/CodeGen/ARM/load-combine.ll b/test/CodeGen/ARM/load-combine.ll index c6d94660039..f19911a8e66 100644 --- a/test/CodeGen/ARM/load-combine.ll +++ b/test/CodeGen/ARM/load-combine.ll @@ -522,3 +522,177 @@ define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { %tmp48 = or i32 %tmp42, %tmp47 ret i32 %tmp48 } + +; i8* p; // p is 2 byte aligned +; (i32) p[0] | ((i32) p[1] << 8) +define i32 @zext_load_i32_by_i8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 8) | ((i32) p[1] << 16) +define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r0, r0, #16 +; CHECK-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r0, r0, #16 +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[0] << 16) | ((i32) p[1] << 24) +define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r1, lsl #16 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r0, r0, #24 +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #16 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 2 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; (i32) p[1] | ((i32) p[0] << 8) +define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 8) | ((i32) p[0] << 16) +define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r1, r1, #16 +; CHECK-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r1, r1, #16 +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; // p is 2 byte aligned +; ((i32) p[1] << 16) | ((i32) p[0] << 24) +define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK: ldrb r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: lsl r1, r1, #24 +; CHECK-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK-ARMv6: ldrb r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1] +; CHECK-ARMv6-NEXT: lsl r1, r1, #24 +; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #16 +; CHECK-ARMv6-NEXT: bx lr + + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 2 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} diff --git a/test/CodeGen/X86/load-combine.ll b/test/CodeGen/X86/load-combine.ll index a3e604f1da0..c2966f026fc 100644 --- a/test/CodeGen/X86/load-combine.ll +++ b/test/CodeGen/X86/load-combine.ll @@ -1142,3 +1142,201 @@ define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) { %tmp52 = or i32 %tmp46, %tmp51 ret i32 %tmp52 } + +; i8* p; +; (i32) p[0] | ((i32) p[1] << 8) +define i32 @zext_load_i32_by_i8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl (%eax), %ecx +; CHECK-NEXT: movzbl 1(%eax), %eax +; CHECK-NEXT: shll $8, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: retl +; +; CHECK64-LABEL: zext_load_i32_by_i8: +; CHECK64: # BB#0: +; CHECK64-NEXT: movzbl (%rdi), %ecx +; CHECK64-NEXT: movzbl 1(%rdi), %eax +; CHECK64-NEXT: shll $8, %eax +; CHECK64-NEXT: orl %ecx, %eax +; CHECK64-NEXT: retq + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; +; ((i32) p[0] << 8) | ((i32) p[1] << 16) +define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl (%eax), %ecx +; CHECK-NEXT: shll $8, %ecx +; CHECK-NEXT: movzbl 1(%eax), %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: retl +; +; CHECK64-LABEL: zext_load_i32_by_i8_shl_8: +; CHECK64: # BB#0: +; CHECK64-NEXT: movzbl (%rdi), %ecx +; CHECK64-NEXT: shll $8, %ecx +; CHECK64-NEXT: movzbl 1(%rdi), %eax +; CHECK64-NEXT: shll $16, %eax +; CHECK64-NEXT: orl %ecx, %eax +; CHECK64-NEXT: retq + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; +; ((i32) p[0] << 16) | ((i32) p[1] << 24) +define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl (%eax), %ecx +; CHECK-NEXT: shll $16, %ecx +; CHECK-NEXT: movzbl 1(%eax), %eax +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: retl +; +; CHECK64-LABEL: zext_load_i32_by_i8_shl_16: +; CHECK64: # BB#0: +; CHECK64-NEXT: movzbl (%rdi), %ecx +; CHECK64-NEXT: shll $16, %ecx +; CHECK64-NEXT: movzbl 1(%rdi), %eax +; CHECK64-NEXT: shll $24, %eax +; CHECK64-NEXT: orl %ecx, %eax +; CHECK64-NEXT: retq + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; +; (i32) p[1] | ((i32) p[0] << 8) +define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl 1(%eax), %ecx +; CHECK-NEXT: movzbl (%eax), %eax +; CHECK-NEXT: shll $8, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: retl +; +; CHECK64-LABEL: zext_load_i32_by_i8_bswap: +; CHECK64: # BB#0: +; CHECK64-NEXT: movzbl 1(%rdi), %ecx +; CHECK64-NEXT: movzbl (%rdi), %eax +; CHECK64-NEXT: shll $8, %eax +; CHECK64-NEXT: orl %ecx, %eax +; CHECK64-NEXT: retq + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 8 + %tmp8 = or i32 %tmp7, %tmp3 + ret i32 %tmp8 +} + +; i8* p; +; ((i32) p[1] << 8) | ((i32) p[0] << 16) +define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl 1(%eax), %ecx +; CHECK-NEXT: shll $8, %ecx +; CHECK-NEXT: movzbl (%eax), %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: retl +; +; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8: +; CHECK64: # BB#0: +; CHECK64-NEXT: movzbl 1(%rdi), %ecx +; CHECK64-NEXT: shll $8, %ecx +; CHECK64-NEXT: movzbl (%rdi), %eax +; CHECK64-NEXT: shll $16, %eax +; CHECK64-NEXT: orl %ecx, %eax +; CHECK64-NEXT: retq + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 8 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 16 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +} + +; i8* p; +; ((i32) p[1] << 16) | ((i32) p[0] << 24) +define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { +; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl 1(%eax), %ecx +; CHECK-NEXT: shll $16, %ecx +; CHECK-NEXT: movzbl (%eax), %eax +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: retl +; +; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16: +; CHECK64: # BB#0: +; CHECK64-NEXT: movzbl 1(%rdi), %ecx +; CHECK64-NEXT: shll $16, %ecx +; CHECK64-NEXT: movzbl (%rdi), %eax +; CHECK64-NEXT: shll $24, %eax +; CHECK64-NEXT: orl %ecx, %eax +; CHECK64-NEXT: retq + %tmp = bitcast i32* %arg to i8* + %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 + %tmp2 = load i8, i8* %tmp1, align 1 + %tmp3 = zext i8 %tmp2 to i32 + %tmp30 = shl nuw nsw i32 %tmp3, 16 + %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 + %tmp5 = load i8, i8* %tmp4, align 1 + %tmp6 = zext i8 %tmp5 to i32 + %tmp7 = shl nuw nsw i32 %tmp6, 24 + %tmp8 = or i32 %tmp7, %tmp30 + ret i32 %tmp8 +}