]> granicus.if.org Git - llvm/commitdiff
[CGP, PowerPC] try to constant fold before creating loads for memcmp expansion
authorSanjay Patel <spatel@rotateright.com>
Mon, 19 Jun 2017 19:48:35 +0000 (19:48 +0000)
committerSanjay Patel <spatel@rotateright.com>
Mon, 19 Jun 2017 19:48:35 +0000 (19:48 +0000)
This is the last step needed to avoid regressions for x86 before we flip the switch to allow
expansion of the smallest set of memcpy() via CGP. The DAG version checks for constant strings,
so we need to do that here too.

FWIW, the 2 constant test is not handled by LibCallSimplifier::optimizeMemCmp() because that
code is limited to 8-bit constant arrays. LibCallSimplifier will also fail to optimize some 1
constant tests because its alignment requirements are too strict (shouldn't require alignment
for a constant operand).

Differential Revision: https://reviews.llvm.org/D34071

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305734 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/CodeGenPrepare.cpp
test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

index 37e176099ea7acaac35a8970fdd4a55fc25df24d..06076d06852bdec95d29c188a6efe0c97a56910f 100644 (file)
@@ -1851,9 +1851,19 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
                                   ConstantInt::get(LoadSizeType, GEPIndex));
     }
 
-    // Load LoadSizeType from the base address.
-    Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
-    Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+    // Get a constant or load a value for each source address.
+    Value *LoadSrc1 = nullptr;
+    if (auto *Source1C = dyn_cast<Constant>(Source1))
+      LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
+    if (!LoadSrc1)
+      LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+
+    Value *LoadSrc2 = nullptr;
+    if (auto *Source2C = dyn_cast<Constant>(Source2))
+      LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
+    if (!LoadSrc2)
+      LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
     if (NumLoads != 1) {
       if (LoadSizeType != MaxLoadType) {
         LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
index ad9078c8206634d23319532465e017b28340990a..d398dfe7fc922f23cbcb17a2707b02f3087deaa7 100644 (file)
@@ -168,26 +168,8 @@ define signext i32 @zeroEqualityTest05() {
 ; Validate with memcmp()?:
 define signext i32 @equalityFoldTwoConstants() {
 ; CHECK-LABEL: equalityFoldTwoConstants:
-; CHECK:       # BB#0: # %loadbb
-; CHECK-NEXT:    addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha
-; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha
-; CHECK-NEXT:    ld 3, .LzeroEqualityTest04.buffer1@toc@l(3)
-; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer2@toc@l(4)
-; CHECK-NEXT:    cmpld 3, 4
-; CHECK-NEXT:    bne 0, .LBB5_2
-; CHECK-NEXT:  # BB#1: # %loadbb1
-; CHECK-NEXT:    addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha+8
-; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha+8
-; CHECK-NEXT:    ld 3, .LzeroEqualityTest04.buffer1@toc@l+8(3)
-; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer2@toc@l+8(4)
-; CHECK-NEXT:    cmpld 3, 4
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    beq 0, .LBB5_3
-; CHECK-NEXT:  .LBB5_2: # %res_block
+; CHECK:       # BB#0: # %endblock
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:  .LBB5_3: # %endblock
-; CHECK-NEXT:    cntlzw 3, 3
-; CHECK-NEXT:    srwi 3, 3, 5
 ; CHECK-NEXT:    blr
   %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16)
   %not.tobool = icmp eq i32 %call, 0
@@ -198,16 +180,17 @@ define signext i32 @equalityFoldTwoConstants() {
 define signext i32 @equalityFoldOneConstant(i8* %X) {
 ; CHECK-LABEL: equalityFoldOneConstant:
 ; CHECK:       # BB#0: # %loadbb
-; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer1@toc@ha
+; CHECK-NEXT:    li 4, 1
 ; CHECK-NEXT:    ld 5, 0(3)
-; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer1@toc@l(4)
-; CHECK-NEXT:    cmpld 4, 5
+; CHECK-NEXT:    sldi 4, 4, 32
+; CHECK-NEXT:    cmpld 5, 4
 ; CHECK-NEXT:    bne 0, .LBB6_2
 ; CHECK-NEXT:  # BB#1: # %loadbb1
-; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer1@toc@ha+8
+; CHECK-NEXT:    li 4, 3
 ; CHECK-NEXT:    ld 3, 8(3)
-; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer1@toc@l+8(4)
-; CHECK-NEXT:    cmpld 4, 3
+; CHECK-NEXT:    sldi 4, 4, 32
+; CHECK-NEXT:    ori 4, 4, 2
+; CHECK-NEXT:    cmpld 3, 4
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    beq 0, .LBB6_3
 ; CHECK-NEXT:  .LBB6_2: # %res_block