From 5aca549a9a89a96c7aed118197e450277c66d348 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 21 Aug 2017 19:13:14 +0000 Subject: [PATCH] [LibCallSimplifier] try harder to fold memcmp with constant arguments (2nd try) The 1st try was reverted because it could inf-loop by creating a dead instruction. Fixed that to not happen and added a test case to verify. Original commit message: Try to fold: memcmp(X, C, ConstantLength) == 0 --> load X == *C Without this change, we're unnecessarily checking the alignment of the constant data, so we miss the transform in the first 2 tests in the patch. I noted this shortcoming of LibCallSimpifier in one of the recent CGP memcmp expansion patches. This doesn't help the example in: https://bugs.llvm.org/show_bug.cgi?id=34032#c13 ...directly, but it's worth short-circuiting more of these simple cases since we're already trying to do that. The benefit of transforming to load+cmp is that existing IR analysis/transforms may further simplify that code. For example, if the load of the variable is common to multiple memcmp calls, CSE can remove the duplicate instructions. Differential Revision: https://reviews.llvm.org/D36922 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311366 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyLibCalls.cpp | 44 +++++++++++------ .../InstCombine/memcmp-constant-fold.ll | 49 +++++++++++++++---- 2 files changed, 69 insertions(+), 24 deletions(-) diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 8257dbcf858..37cfc2cbbc1 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -751,29 +752,44 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { } // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0 + // TODO: The case where both inputs are constants does not need to be limited + // to legal integers or equality comparison. See block below this. if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) { - IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8); unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType); - if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment && - getKnownAlignment(RHS, DL, CI) >= PrefAlignment) { - - Type *LHSPtrTy = - IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); - Type *RHSPtrTy = - IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); - - Value *LHSV = - B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv"); - Value *RHSV = - B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv"); + // First, see if we can fold either argument to a constant. + Value *LHSV = nullptr; + if (auto *LHSC = dyn_cast(LHS)) { + LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo()); + LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL); + } + Value *RHSV = nullptr; + if (auto *RHSC = dyn_cast(RHS)) { + RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo()); + RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL); + } + // Don't generate unaligned loads. If either source is constant data, + // alignment doesn't matter for that source because there is no load. + if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) && + (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) { + if (!LHSV) { + Type *LHSPtrTy = + IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); + LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy), "lhsv"); + } + if (!RHSV) { + Type *RHSPtrTy = + IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); + RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy), "rhsv"); + } return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); } } - // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + // Constant folding: memcmp(x, y, Len) -> constant (all arguments are const). + // TODO: This is limited to i8 arrays. StringRef LHSStr, RHSStr; if (getConstantStringInfo(LHS, LHSStr) && getConstantStringInfo(RHS, RHSStr)) { diff --git a/test/Transforms/InstCombine/memcmp-constant-fold.ll b/test/Transforms/InstCombine/memcmp-constant-fold.ll index b19f17c818d..211b3b5ab2c 100644 --- a/test/Transforms/InstCombine/memcmp-constant-fold.ll +++ b/test/Transforms/InstCombine/memcmp-constant-fold.ll @@ -3,31 +3,45 @@ declare i32 @memcmp(i8*, i8*, i64) -; TODO: The alignment of this constant does not matter. We constant fold the load. +; The alignment of this constant does not matter. We constant fold the load. @charbuf = private unnamed_addr constant [4 x i8] [i8 0, i8 0, i8 0, i8 1], align 1 define i1 @memcmp_4bytes_unaligned_constant_i8(i8* align 4 %x) { -; ALL-LABEL: @memcmp_4bytes_unaligned_constant_i8( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @charbuf, i64 0, i64 0), i64 4) -; ALL-NEXT: [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0 -; ALL-NEXT: ret i1 [[CMPEQ0]] +; LE-LABEL: @memcmp_4bytes_unaligned_constant_i8( +; LE-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32* +; LE-NEXT: [[LHSV:%.*]] = load i32, i32* [[TMP1]], align 4 +; LE-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LHSV]], 16777216 +; LE-NEXT: ret i1 [[TMP2]] +; +; BE-LABEL: @memcmp_4bytes_unaligned_constant_i8( +; BE-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32* +; BE-NEXT: [[LHSV:%.*]] = load i32, i32* [[TMP1]], align 4 +; BE-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LHSV]], 1 +; BE-NEXT: ret i1 [[TMP2]] ; %call = tail call i32 @memcmp(i8* %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @charbuf, i64 0, i64 0), i64 4) %cmpeq0 = icmp eq i32 %call, 0 ret i1 %cmpeq0 } -; TODO: We still don't care about alignment of the constant. We are not limited to constant folding only i8 arrays. +; We still don't care about alignment of the constant. We are not limited to constant folding only i8 arrays. ; It doesn't matter if the constant operand is the first operand to the memcmp. @intbuf_unaligned = private unnamed_addr constant [4 x i16] [i16 1, i16 2, i16 3, i16 4], align 1 define i1 @memcmp_4bytes_unaligned_constant_i16(i8* align 4 %x) { -; ALL-LABEL: @memcmp_4bytes_unaligned_constant_i16( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* bitcast ([4 x i16]* @intbuf_unaligned to i8*), i8* %x, i64 4) -; ALL-NEXT: [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0 -; ALL-NEXT: ret i1 [[CMPEQ0]] +; LE-LABEL: @memcmp_4bytes_unaligned_constant_i16( +; LE-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32* +; LE-NEXT: [[RHSV:%.*]] = load i32, i32* [[TMP1]], align 4 +; LE-NEXT: [[TMP2:%.*]] = icmp eq i32 [[RHSV]], 131073 +; LE-NEXT: ret i1 [[TMP2]] +; +; BE-LABEL: @memcmp_4bytes_unaligned_constant_i16( +; BE-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32* +; BE-NEXT: [[RHSV:%.*]] = load i32, i32* [[TMP1]], align 4 +; BE-NEXT: [[TMP2:%.*]] = icmp eq i32 [[RHSV]], 65538 +; BE-NEXT: ret i1 [[TMP2]] ; %call = tail call i32 @memcmp(i8* bitcast (i16* getelementptr inbounds ([4 x i16], [4 x i16]* @intbuf_unaligned, i64 0, i64 0) to i8*), i8* %x, i64 4) %cmpeq0 = icmp eq i32 %call, 0 @@ -49,3 +63,18 @@ define i1 @memcmp_3bytes_aligned_constant_i32(i8* align 4 %x) { ret i1 %cmpeq0 } +; A sloppy implementation would infinite loop by recreating the unused instructions. + +define i1 @memcmp_4bytes_one_unaligned_i8(i8* align 4 %x, i8* align 1 %y) { +; ALL-LABEL: @memcmp_4bytes_one_unaligned_i8( +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4) +; ALL-NEXT: [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0 +; ALL-NEXT: ret i1 [[CMPEQ0]] +; + %bc = bitcast i8* %x to i32* + %lhsv = load i32, i32* %bc + %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4) + %cmpeq0 = icmp eq i32 %call, 0 + ret i1 %cmpeq0 +} + -- 2.50.1