From 85ad85b52ea35056887e642ecf13ff7b3a406d25 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 13 Mar 2017 21:58:54 +0000 Subject: [PATCH] [AVX-512] Fix another case where we are copying from a mask register using AH/BH/CH/DH with fastisel. Fixes PR32256. Still planning to do an audit for other possible cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297678 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 3 +- test/CodeGen/X86/pr32256.ll | 67 ++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/X86/pr32256.ll diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 6ffff041929..4eec15779a1 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1751,7 +1751,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) { unsigned KOpReg = OpReg; - OpReg = createResultReg(&X86::GR8RegClass); + OpReg = createResultReg(Subtarget->is64Bit() ? &X86::GR8RegClass + : &X86::GR8_ABCD_LRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), OpReg) .addReg(KOpReg); diff --git a/test/CodeGen/X86/pr32256.ll b/test/CodeGen/X86/pr32256.ll new file mode 100644 index 00000000000..a6ec02d8069 --- /dev/null +++ b/test/CodeGen/X86/pr32256.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -O0 -mcpu=skx | FileCheck %s + +@c = external global i8, align 1 + +; Function Attrs: noinline nounwind +define void @_Z1av() { +; CHECK-LABEL: _Z1av: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: .Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: .Lcfi1: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Lcfi2: +; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movb %al, %cl +; CHECK-NEXT: movb $1, %dl +; CHECK-NEXT: movb c, %ch +; CHECK-NEXT: # implicit-def: %EAX +; CHECK-NEXT: movb %ch, %al +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: kmovw %eax, %k0 +; CHECK-NEXT: kmovq %k0, %k1 +; CHECK-NEXT: kxnorw %k0, %k0, %k2 +; CHECK-NEXT: kshiftrw $15, %k2, %k2 +; CHECK-NEXT: kxorw %k2, %k0, %k0 +; CHECK-NEXT: kmovb %k0, %ebx +; CHECK-NEXT: testb $1, %bl +; CHECK-NEXT: kmovb %ecx, %k0 +; CHECK-NEXT: kmovw %k1, {{[0-9]+}}(%esp) # 2-byte Spill +; CHECK-NEXT: movb %dl, {{[0-9]+}}(%esp) # 1-byte Spill +; CHECK-NEXT: kmovw %k0, (%esp) # 2-byte Spill +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: jmp .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %land.rhs +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movb %al, %cl +; CHECK-NEXT: kmovb %ecx, %k0 +; CHECK-NEXT: kmovw %k0, (%esp) # 2-byte Spill +; CHECK-NEXT: jmp .LBB0_2 +; CHECK-NEXT: .LBB0_2: # %land.end +; CHECK-NEXT: kmovw (%esp), %k0 # 2-byte Reload +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl +entry: + %b = alloca i8, align 1 + %0 = load i8, i8* @c, align 1 + %tobool = trunc i8 %0 to i1 + %lnot = xor i1 %tobool, true + br i1 %lnot, label %land.rhs, label %land.end + +land.rhs: ; preds = %entry + br label %land.end + +land.end: ; preds = %land.rhs, %entry + %1 = phi i1 [ false, %entry ], [ false, %land.rhs ] + %conv = zext i1 %1 to i8 + store i8 %conv, i8* %b, align 1 + ret void +} -- 2.50.1