; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=SSE
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=SSE
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2,-slow-unaligned-mem-16 | FileCheck %s --check-prefix=SSE2FAST
-; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512f -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512-ymm
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512bw -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512-ymm
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512dq -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512-ymm
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512f -mattr=-prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512bw -mattr=-prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
; https://llvm.org/bugs/show_bug.cgi?id=27100
; SSE2FAST-NEXT: movups %xmm0, (%rdi)
; SSE2FAST-NEXT: retq
;
-; AVX-LABEL: memset_64_nonzero_bytes:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
-; AVX-NEXT: vmovups %ymm0, 32(%rdi)
-; AVX-NEXT: vmovups %ymm0, (%rdi)
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; AVX1-LABEL: memset_64_nonzero_bytes:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: memset_64_nonzero_bytes:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX2-NEXT: vmovups %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-ymm-LABEL: memset_64_nonzero_bytes:
+; AVX512-ymm: # %bb.0:
+; AVX512-ymm-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX512-ymm-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX512-ymm-NEXT: vmovups %ymm0, (%rdi)
+; AVX512-ymm-NEXT: vzeroupper
+; AVX512-ymm-NEXT: retq
+;
+; AVX512F-LABEL: memset_64_nonzero_bytes:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378]
+; AVX512F-NEXT: vmovups %zmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: memset_64_nonzero_bytes:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX512BW-NEXT: vmovups %zmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+; AVX512NW-NEXT: retq
%call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 64, i64 -1)
ret void
}
; SSE2FAST-NEXT: movups %xmm0, (%rdi)
; SSE2FAST-NEXT: retq
;
-; AVX-LABEL: memset_128_nonzero_bytes:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
-; AVX-NEXT: vmovups %ymm0, 96(%rdi)
-; AVX-NEXT: vmovups %ymm0, 64(%rdi)
-; AVX-NEXT: vmovups %ymm0, 32(%rdi)
-; AVX-NEXT: vmovups %ymm0, (%rdi)
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; AVX1-LABEL: memset_128_nonzero_bytes:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: memset_128_nonzero_bytes:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 64(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX2-NEXT: vmovups %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-ymm-LABEL: memset_128_nonzero_bytes:
+; AVX512-ymm: # %bb.0:
+; AVX512-ymm-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX512-ymm-NEXT: vmovups %ymm0, 96(%rdi)
+; AVX512-ymm-NEXT: vmovups %ymm0, 64(%rdi)
+; AVX512-ymm-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX512-ymm-NEXT: vmovups %ymm0, (%rdi)
+; AVX512-ymm-NEXT: vzeroupper
+; AVX512-ymm-NEXT: retq
+;
+; AVX512F-LABEL: memset_128_nonzero_bytes:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378]
+; AVX512F-NEXT: vmovups %zmm0, 64(%rdi)
+; AVX512F-NEXT: vmovups %zmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: memset_128_nonzero_bytes:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX512BW-NEXT: vmovups %zmm0, 64(%rdi)
+; AVX512BW-NEXT: vmovups %zmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 128, i64 -1)
ret void
}
; SSE2FAST-NEXT: movups %xmm0, (%rdi)
; SSE2FAST-NEXT: retq
;
-; AVX-LABEL: memset_256_nonzero_bytes:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
-; AVX-NEXT: vmovups %ymm0, 224(%rdi)
-; AVX-NEXT: vmovups %ymm0, 192(%rdi)
-; AVX-NEXT: vmovups %ymm0, 160(%rdi)
-; AVX-NEXT: vmovups %ymm0, 128(%rdi)
-; AVX-NEXT: vmovups %ymm0, 96(%rdi)
-; AVX-NEXT: vmovups %ymm0, 64(%rdi)
-; AVX-NEXT: vmovups %ymm0, 32(%rdi)
-; AVX-NEXT: vmovups %ymm0, (%rdi)
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; AVX1-LABEL: memset_256_nonzero_bytes:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX1-NEXT: vmovups %ymm0, 224(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 192(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 160(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 128(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
+; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: memset_256_nonzero_bytes:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX2-NEXT: vmovups %ymm0, 224(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 192(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 160(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 128(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 64(%rdi)
+; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX2-NEXT: vmovups %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-ymm-LABEL: memset_256_nonzero_bytes:
+; AVX512-ymm: # %bb.0:
+; AVX512-ymm-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX512-ymm-NEXT: vmovups %ymm0, 224(%rdi)
+; AVX512-ymm-NEXT: vmovups %ymm0, 192(%rdi)
+; AVX512-ymm-NEXT: vmovups %ymm0, 160(%rdi)
+; AVX512-ymm-NEXT: vmovups %ymm0, 128(%rdi)
+; AVX512-ymm-NEXT: vmovups %ymm0, 96(%rdi)
+; AVX512-ymm-NEXT: vmovups %ymm0, 64(%rdi)
+; AVX512-ymm-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX512-ymm-NEXT: vmovups %ymm0, (%rdi)
+; AVX512-ymm-NEXT: vzeroupper
+; AVX512-ymm-NEXT: retq
+;
+; AVX512F-LABEL: memset_256_nonzero_bytes:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378]
+; AVX512F-NEXT: vmovups %zmm0, 192(%rdi)
+; AVX512F-NEXT: vmovups %zmm0, 128(%rdi)
+; AVX512F-NEXT: vmovups %zmm0, 64(%rdi)
+; AVX512F-NEXT: vmovups %zmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: memset_256_nonzero_bytes:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+; AVX512BW-NEXT: vmovups %zmm0, 192(%rdi)
+; AVX512BW-NEXT: vmovups %zmm0, 128(%rdi)
+; AVX512BW-NEXT: vmovups %zmm0, 64(%rdi)
+; AVX512BW-NEXT: vmovups %zmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 256, i64 -1)
ret void
}
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: memset_64_nonconst_bytes:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovd %esi, %xmm0
-; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512-NEXT: vmovdqu %ymm0, 32(%rdi)
-; AVX512-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512-ymm-LABEL: memset_64_nonconst_bytes:
+; AVX512-ymm: # %bb.0:
+; AVX512-ymm-NEXT: vmovd %esi, %xmm0
+; AVX512-ymm-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX512-ymm-NEXT: vmovdqu %ymm0, 32(%rdi)
+; AVX512-ymm-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX512-ymm-NEXT: vzeroupper
+; AVX512-ymm-NEXT: retq
+;
+; AVX512F-LABEL: memset_64_nonconst_bytes:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: movzbl %sil, %eax
+; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0
+; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: memset_64_nonconst_bytes:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0
+; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i1 false)
ret void
}
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: memset_128_nonconst_bytes:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovd %esi, %xmm0
-; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512-NEXT: vmovdqu %ymm0, 96(%rdi)
-; AVX512-NEXT: vmovdqu %ymm0, 64(%rdi)
-; AVX512-NEXT: vmovdqu %ymm0, 32(%rdi)
-; AVX512-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512-ymm-LABEL: memset_128_nonconst_bytes:
+; AVX512-ymm: # %bb.0:
+; AVX512-ymm-NEXT: vmovd %esi, %xmm0
+; AVX512-ymm-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX512-ymm-NEXT: vmovdqu %ymm0, 96(%rdi)
+; AVX512-ymm-NEXT: vmovdqu %ymm0, 64(%rdi)
+; AVX512-ymm-NEXT: vmovdqu %ymm0, 32(%rdi)
+; AVX512-ymm-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX512-ymm-NEXT: vzeroupper
+; AVX512-ymm-NEXT: retq
+;
+; AVX512F-LABEL: memset_128_nonconst_bytes:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: movzbl %sil, %eax
+; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0
+; AVX512F-NEXT: vmovdqu64 %zmm0, 64(%rdi)
+; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: memset_128_nonconst_bytes:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0
+; AVX512BW-NEXT: vmovdqu64 %zmm0, 64(%rdi)
+; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i1 false)
ret void
}
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: memset_256_nonconst_bytes:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovd %esi, %xmm0
-; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512-NEXT: vmovdqu %ymm0, 224(%rdi)
-; AVX512-NEXT: vmovdqu %ymm0, 192(%rdi)
-; AVX512-NEXT: vmovdqu %ymm0, 160(%rdi)
-; AVX512-NEXT: vmovdqu %ymm0, 128(%rdi)
-; AVX512-NEXT: vmovdqu %ymm0, 96(%rdi)
-; AVX512-NEXT: vmovdqu %ymm0, 64(%rdi)
-; AVX512-NEXT: vmovdqu %ymm0, 32(%rdi)
-; AVX512-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512-ymm-LABEL: memset_256_nonconst_bytes:
+; AVX512-ymm: # %bb.0:
+; AVX512-ymm-NEXT: vmovd %esi, %xmm0
+; AVX512-ymm-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX512-ymm-NEXT: vmovdqu %ymm0, 224(%rdi)
+; AVX512-ymm-NEXT: vmovdqu %ymm0, 192(%rdi)
+; AVX512-ymm-NEXT: vmovdqu %ymm0, 160(%rdi)
+; AVX512-ymm-NEXT: vmovdqu %ymm0, 128(%rdi)
+; AVX512-ymm-NEXT: vmovdqu %ymm0, 96(%rdi)
+; AVX512-ymm-NEXT: vmovdqu %ymm0, 64(%rdi)
+; AVX512-ymm-NEXT: vmovdqu %ymm0, 32(%rdi)
+; AVX512-ymm-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX512-ymm-NEXT: vzeroupper
+; AVX512-ymm-NEXT: retq
+;
+; AVX512F-LABEL: memset_256_nonconst_bytes:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: movzbl %sil, %eax
+; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0
+; AVX512F-NEXT: vmovdqu64 %zmm0, 192(%rdi)
+; AVX512F-NEXT: vmovdqu64 %zmm0, 128(%rdi)
+; AVX512F-NEXT: vmovdqu64 %zmm0, 64(%rdi)
+; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: memset_256_nonconst_bytes:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0
+; AVX512BW-NEXT: vmovdqu64 %zmm0, 192(%rdi)
+; AVX512BW-NEXT: vmovdqu64 %zmm0, 128(%rdi)
+; AVX512BW-NEXT: vmovdqu64 %zmm0, 64(%rdi)
+; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i1 false)
ret void
}