; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F --check-prefix=X64-AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512BW
; This tests codegen time inlining/optimization of memcmp
; rdar://6480398
; X64-AVX512F-NEXT: setae %al
; X64-AVX512F-NEXT: vzeroupper
; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length64_eq:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k0
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setae %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
%cmp = icmp ne i32 %call, 0
ret i1 %cmp
; X64-AVX512F-NEXT: setb %al
; X64-AVX512F-NEXT: vzeroupper
; X64-AVX512F-NEXT: retq
+;
+; X64-AVX512BW-LABEL: length64_eq_const:
+; X64-AVX512BW: # %bb.0:
+; X64-AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; X64-AVX512BW-NEXT: vpcmpeqb {{.*}}(%rip), %zmm0, %k0
+; X64-AVX512BW-NEXT: kortestq %k0, %k0
+; X64-AVX512BW-NEXT: setb %al
+; X64-AVX512BW-NEXT: vzeroupper
+; X64-AVX512BW-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
%c = icmp eq i32 %m, 0
ret i1 %c
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: ne_i512:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setae %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512F-LABEL: ne_i512:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT: xorl %eax, %eax
+; AVX512F-NEXT: kortestw %k0, %k0
+; AVX512F-NEXT: setae %al
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: ne_i512:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: xorl %eax, %eax
+; AVX512BW-NEXT: kortestq %k0, %k0
+; AVX512BW-NEXT: setae %al
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%bcx = bitcast <8 x i64> %x to i512
%bcy = bitcast <8 x i64> %y to i512
%cmp = icmp ne i512 %bcx, %bcy
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: eq_i512:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512F-LABEL: eq_i512:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT: xorl %eax, %eax
+; AVX512F-NEXT: kortestw %k0, %k0
+; AVX512F-NEXT: setb %al
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: eq_i512:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: xorl %eax, %eax
+; AVX512BW-NEXT: kortestq %k0, %k0
+; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%bcx = bitcast <8 x i64> %x to i512
%bcy = bitcast <8 x i64> %y to i512
%cmp = icmp eq i512 %bcx, %bcy
; NO512-NEXT: setne %al
; NO512-NEXT: retq
;
-; AVX512-LABEL: ne_i512_pair:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
-; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1
-; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
-; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setae %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512F-LABEL: ne_i512_pair:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
+; AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
+; AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512F-NEXT: xorl %eax, %eax
+; AVX512F-NEXT: kortestw %k0, %k0
+; AVX512F-NEXT: setae %al
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: ne_i512_pair:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
+; AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1
+; AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512BW-NEXT: xorl %eax, %eax
+; AVX512BW-NEXT: kortestq %k0, %k0
+; AVX512BW-NEXT: setae %al
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%a0 = load i512, i512* %a
%b0 = load i512, i512* %b
%xor1 = xor i512 %a0, %b0
; NO512-NEXT: sete %al
; NO512-NEXT: retq
;
-; AVX512-LABEL: eq_i512_pair:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
-; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1
-; AVX512-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
-; AVX512-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
-; AVX512-NEXT: xorl %eax, %eax
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX512F-LABEL: eq_i512_pair:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm1
+; AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k1
+; AVX512F-NEXT: vpcmpeqd 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512F-NEXT: xorl %eax, %eax
+; AVX512F-NEXT: kortestw %k0, %k0
+; AVX512F-NEXT: setb %al
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: eq_i512_pair:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm1
+; AVX512BW-NEXT: vpcmpeqb (%rsi), %zmm0, %k1
+; AVX512BW-NEXT: vpcmpeqb 64(%rsi), %zmm1, %k0 {%k1}
+; AVX512BW-NEXT: xorl %eax, %eax
+; AVX512BW-NEXT: kortestq %k0, %k0
+; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
%a0 = load i512, i512* %a
%b0 = load i512, i512* %b
%xor1 = xor i512 %a0, %b0