From db1fed765e27764c9f948322e61a0a8221df0607 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 6 May 2019 22:25:59 +0000 Subject: [PATCH] [Tests] Add tests for optimized lowerings of element.unordered.atomic memset/memcmove/memcopy git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360093 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../element-wise-atomic-memory-intrinsics.ll | 459 ++++++++++++++++++ 1 file changed, 459 insertions(+) diff --git a/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll b/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll index f84977fa403..d4089cd0e61 100644 --- a/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll +++ b/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll @@ -341,6 +341,465 @@ define void @test_memset_args(i8** %Storage, i8* %V) { ret void } +;; Next batch of tests are cases where we could profitably lower to +;; atomic loads and stores directly, just as we do for non-atomic ones for +;; non element.unorderered.atomic variants. + +define i8* @test_memcpy1_64(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memcpy1_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_1 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 1 %Q, i32 64, i32 1) + ret i8* %P +} + +; Ensure align 16 generates vector load/stores even with small element size +define i8* @test_memcpy1_64_align4(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memcpy1_64_align4: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_1 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 64, i32 1) + ret i8* %P +} + +define i8* @test_memcpy1_64_align8(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memcpy1_64_align8: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_1 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 64, i32 1) + ret i8* %P +} + + +define i8* @test_memcpy1_64_align16(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memcpy1_64_align16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_1 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 1) + ret i8* %P +} + + +; Make sure that different source & dest alignments are handled correctly. +define i8* @test_memcpy1_64_diff_aligns(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memcpy1_64_diff_aligns: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_1 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 16 %Q, i32 64, i32 1) + ret i8* %P +} + +define i8* @test_memcpy2_64(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memcpy2_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_2 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %P, i8* align 2 %Q, i32 64, i32 2) + ret i8* %P +} + +define i8* @test_memcpy4_64(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memcpy4_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_4 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 64, i32 4) + ret i8* %P +} + +define i8* @test_memcpy8_64(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memcpy8_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_8 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 64, i32 8) + ret i8* %P +} + +define i8* @test_memcpy16_64(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memcpy16_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_16 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 16) + ret i8* %P +} + +; ================================== + +define i8* @test_memmove1_64(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memmove1_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_1 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 1 %Q, i32 64, i32 1) + ret i8* %P +} + +; Ensure align 16 generates vector load/stores even with small element size +define i8* @test_memmove1_64_align16(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memmove1_64_align16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_1 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 1) + ret i8* %P +} + +; Make sure that different source & dest alignments are handled correctly. +define i8* @test_memmove1_64_diff_aligns(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memmove1_64_diff_aligns: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_1 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 16 %Q, i32 64, i32 1) + ret i8* %P +} + +define i8* @test_memmove2_64(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memmove2_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_2 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %P, i8* align 2 %Q, i32 64, i32 2) + ret i8* %P +} + +define i8* @test_memmove4_64(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memmove4_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_4 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 64, i32 4) + ret i8* %P +} + +define i8* @test_memmove8_64(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memmove8_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_8 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 64, i32 8) + ret i8* %P +} + +define i8* @test_memmove16_64(i8* %P, i8* %Q) { +; CHECK-LABEL: test_memmove16_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_16 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 16) + ret i8* %P +} + +; ============================== + + +define i8* @test_memset1_64(i8* %P, i8 %V) { +; CHECK-LABEL: test_memset1_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_1 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %P, i8 %V, i32 64, i32 1) + ret i8* %P +} + +define i8* @test_memset1_64_align16(i8* %P, i8 %V) { +; CHECK-LABEL: test_memset1_64_align16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_1 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 64, i32 1) + ret i8* %P +} + +define i8* @test_memset2_64(i8* %P, i8 %V) { +; CHECK-LABEL: test_memset2_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_2 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %P, i8 %V, i32 64, i32 2) + ret i8* %P +} + +define i8* @test_memset4_64(i8* %P, i8 %V) { +; CHECK-LABEL: test_memset4_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 64, i32 4) + ret i8* %P +} + +define i8* @test_memset4_12(i8* %P, i8 %V) { +; CHECK-LABEL: test_memset4_12: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $12, %edx +; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 12, i32 4) + ret i8* %P +} + +define i8* @test_memset4_16(i8* %P, i8 %V) { +; CHECK-LABEL: test_memset4_16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $16, %edx +; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 16, i32 4) + ret i8* %P +} + +define i8* @test_memset4_60(i8* %P, i8 %V) { +; CHECK-LABEL: test_memset4_60: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $60, %edx +; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 60, i32 4) + ret i8* %P +} + +define i8* @test_memset8_64(i8* %P, i8 %V) { +; CHECK-LABEL: test_memset8_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_8 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 64, i32 8) + ret i8* %P +} + +define i8* @test_memset16_64(i8* %P, i8 %V) { +; CHECK-LABEL: test_memset16_64: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_16 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 64, i32 16) + ret i8* %P +} + +define i8* @test_memset16_64_zero(i8* %P) { +; CHECK-LABEL: test_memset16_64_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: movl $64, %edx +; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_16 +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 0, i32 64, i32 16) + ret i8* %P +} + + declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture, i8, i32, i32) nounwind -- 2.40.0