From: Amara Emerson Date: Sat, 28 Sep 2019 07:55:42 +0000 (+0000) Subject: [GlobalISel Enable memcpy inlining with optsize. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0134b97df57b96941387aeb26338139b5ce9de58;p=llvm [GlobalISel Enable memcpy inlining with optsize. We should be disabling inline for minsize, not optsize. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373143 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h index 16529cc1297..ad645a46bbe 100644 --- a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h @@ -31,7 +31,7 @@ public: bool MinSize) : IllegalOpsAllowed(AllowIllegalOps), LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo), - EnableOpt(OptEnabled), EnableOptSize(OptSize), EnableMinSize(OptSize) { + EnableOpt(OptEnabled), EnableOptSize(OptSize), EnableMinSize(MinSize) { assert(((AllowIllegalOps || !LegalizeIllegalOps) || LInfo) && "Expecting legalizerInfo when illegalops not allowed"); } diff --git a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp index 108970c64e3..6df3f944f8c 100644 --- a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp +++ b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp @@ -73,7 +73,7 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, // heuristics decide. unsigned MaxLen = EnableOpt ? 0 : 32; // Try to inline memcpy type calls if optimizations are enabled. - return (!EnableOptSize) ? Helper.tryCombineMemCpyFamily(MI, MaxLen) + return (!EnableMinSize) ? Helper.tryCombineMemCpyFamily(MI, MaxLen) : false; } default: diff --git a/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir b/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir index 02d52973e10..ddb9d3507d9 100644 --- a/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir +++ b/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir @@ -22,6 +22,22 @@ ret void } + define void @test_memcpy2_const_optsize(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #2 { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false) + ret void + } + + define void @test_memcpy2_const_minsize(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #3 { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false) + ret void + } + define void @test_memcpy3_const_arrays_unaligned(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 { entry: %0 = bitcast i32* %dst to i8* @@ -32,6 +48,8 @@ attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2,+zcm,+zcz" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind } + attributes #2 = { optsize } + attributes #3 = { minsize } ... --- @@ -107,6 +125,80 @@ body: | G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) RET_ReallyLR +... +--- +name: test_memcpy2_const_optsize +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy2_const_optsize + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C3]](s64) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4) + ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 72 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + RET_ReallyLR + +... +--- +name: test_memcpy2_const_minsize +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy2_const_minsize + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72 + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 72 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + RET_ReallyLR + ... --- name: test_memcpy3_const_arrays_unaligned