return true;
}
-bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI) {
+bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
// This combine is fairly complex so it's not written with a separate
// matcher function.
assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
return true;
}
+ if (MaxLen && KnownLen > MaxLen)
+ return false;
+
if (ID == Intrinsic::memcpy)
return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
if (ID == Intrinsic::memmove)
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=aarch64 -run-pass=aarch64-prelegalizer-combiner -O0 -verify-machineinstrs %s -o - | FileCheck %s
+--- |
+ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+ target triple = "arm64-apple-darwin"
+
+ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1
+
+ define void @test_small_memcpy(i32* nocapture %dst, i32* nocapture readonly %src) {
+ entry:
+ %0 = bitcast i32* %dst to i8*
+ %1 = bitcast i32* %src to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 32, i1 false)
+ ret void
+ }
+
+ define void @test_large_memcpy(i32* nocapture %dst, i32* nocapture readonly %src) {
+ entry:
+ %0 = bitcast i32* %dst to i8*
+ %1 = bitcast i32* %src to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 36, i1 false)
+ ret void
+ }
+
+ attributes #1 = { argmemonly nounwind }
+
+...
+---
+name: test_small_memcpy
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+machineFunctionInfo: {}
+body: |
+ bb.1.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: test_small_memcpy
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
+ ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C]](s64)
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
+ ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64)
+ ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+ ; CHECK: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 32
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+ RET_ReallyLR
+
+...
+---
+name: test_large_memcpy
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+ - { id: 2, class: _ }
+machineFunctionInfo: {}
+body: |
+ bb.1.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: test_large_memcpy
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+ ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[COPY]](p0), [[COPY1]](p0), [[C]](s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+ ; CHECK: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 36
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+ RET_ReallyLR
+
+...