From: Sanjay Patel Date: Mon, 19 Jun 2017 19:48:35 +0000 (+0000) Subject: [CGP, PowerPC] try to constant fold before creating loads for memcmp expansion X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7a0e66cc56651377d3288a18b37071ae03a0ba2c;p=llvm [CGP, PowerPC] try to constant fold before creating loads for memcmp expansion This is the last step needed to avoid regressions for x86 before we flip the switch to allow expansion of the smallest set of memcpy() via CGP. The DAG version checks for constant strings, so we need to do that here too. FWIW, the 2 constant test is not handled by LibCallSimplifier::optimizeMemCmp() because that code is limited to 8-bit constant arrays. LibCallSimplifier will also fail to optimize some 1 constant tests because its alignment requirements are too strict (shouldn't require alignment for a constant operand). Differential Revision: https://reviews.llvm.org/D34071 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305734 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 37e176099ea..06076d06852 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -1851,9 +1851,19 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, ConstantInt::get(LoadSizeType, GEPIndex)); } - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + // Get a constant or load a value for each source address. + Value *LoadSrc1 = nullptr; + if (auto *Source1C = dyn_cast(Source1)) + LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); + if (!LoadSrc1) + LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + + Value *LoadSrc2 = nullptr; + if (auto *Source2C = dyn_cast(Source2)) + LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); + if (!LoadSrc2) + LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + if (NumLoads != 1) { if (LoadSizeType != MaxLoadType) { LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); diff --git a/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll index ad9078c8206..d398dfe7fc9 100644 --- a/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -168,26 +168,8 @@ define signext i32 @zeroEqualityTest05() { ; Validate with memcmp()?: define signext i32 @equalityFoldTwoConstants() { ; CHECK-LABEL: equalityFoldTwoConstants: -; CHECK: # BB#0: # %loadbb -; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha -; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha -; CHECK-NEXT: ld 3, .LzeroEqualityTest04.buffer1@toc@l(3) -; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer2@toc@l(4) -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: bne 0, .LBB5_2 -; CHECK-NEXT: # BB#1: # %loadbb1 -; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha+8 -; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha+8 -; CHECK-NEXT: ld 3, .LzeroEqualityTest04.buffer1@toc@l+8(3) -; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer2@toc@l+8(4) -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: beq 0, .LBB5_3 -; CHECK-NEXT: .LBB5_2: # %res_block +; CHECK: # BB#0: # %endblock ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB5_3: # %endblock -; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: srwi 3, 3, 5 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16) %not.tobool = icmp eq i32 %call, 0 @@ -198,16 +180,17 @@ define signext i32 @equalityFoldTwoConstants() { define signext i32 @equalityFoldOneConstant(i8* %X) { ; CHECK-LABEL: equalityFoldOneConstant: ; CHECK: # BB#0: # %loadbb -; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer1@toc@ha +; CHECK-NEXT: li 4, 1 ; CHECK-NEXT: ld 5, 0(3) -; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer1@toc@l(4) -; CHECK-NEXT: cmpld 4, 5 +; CHECK-NEXT: sldi 4, 4, 32 +; CHECK-NEXT: cmpld 5, 4 ; CHECK-NEXT: bne 0, .LBB6_2 ; CHECK-NEXT: # BB#1: # %loadbb1 -; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer1@toc@ha+8 +; CHECK-NEXT: li 4, 3 ; CHECK-NEXT: ld 3, 8(3) -; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer1@toc@l+8(4) -; CHECK-NEXT: cmpld 4, 3 +; CHECK-NEXT: sldi 4, 4, 32 +; CHECK-NEXT: ori 4, 4, 2 +; CHECK-NEXT: cmpld 3, 4 ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: beq 0, .LBB6_3 ; CHECK-NEXT: .LBB6_2: # %res_block