From b3da570deaab8e1e94f328962ba4fe32715bdaca Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 5 Mar 2019 18:37:33 +0000 Subject: [PATCH] [X86] Enable 8-bit OR with disjoint bits to convert to LEA We already support 8-bits adds in convertToThreeAddress. But we can also support 8-bit OR if the bits are disjoint. We already do this for 16/32/64. Differential Revision: https://reviews.llvm.org/D58863 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355423 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrCompiler.td | 7 +++ lib/Target/X86/X86InstrFoldTables.cpp | 3 ++ lib/Target/X86/X86InstrInfo.cpp | 26 ++++++---- lib/Target/X86/X86InstrInfo.h | 3 +- lib/Target/X86/X86MCInstLower.cpp | 2 + lib/Target/X86/X86MacroFusion.cpp | 2 + test/CodeGen/X86/bitreverse.ll | 52 +++++++++---------- test/CodeGen/X86/bool-math.ll | 6 +-- test/CodeGen/X86/fshl.ll | 7 +-- test/CodeGen/X86/select.ll | 27 +++++++--- test/CodeGen/X86/select_const.ll | 6 +-- ...asked-merge-scalar-constmask-innerouter.ll | 14 ++--- ...-merge-scalar-constmask-interleavedbits.ll | 14 ++--- ...-scalar-constmask-interleavedbytehalves.ll | 14 ++--- ...d-masked-merge-scalar-constmask-lowhigh.ll | 14 ++--- test/CodeGen/X86/vector-bitreverse.ll | 52 +++++++++---------- 16 files changed, 146 insertions(+), 103 deletions(-) diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index fe826c4fa13..ea7453d5b4b 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1381,6 +1381,9 @@ let SchedRW = [WriteALU] in { let isConvertibleToThreeAddress = 1, Constraints = "$src1 = $dst", Defs = [EFLAGS] in { let isCommutable = 1 in { +def ADD8rr_DB : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "", // orb/addb REG, REG + [(set GR8:$dst, (or_is_add GR8:$src1, GR8:$src2))]>; def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "", // orw/addw REG, REG [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>; @@ -1395,6 +1398,10 @@ def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. +def ADD8ri_DB : I<0, Pseudo, + (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "", // orb/addb REG, imm8 + [(set GR8:$dst, (or_is_add GR8:$src1, imm:$src2))]>; def ADD16ri8_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "", // orw/addw REG, imm8 diff --git a/lib/Target/X86/X86InstrFoldTables.cpp b/lib/Target/X86/X86InstrFoldTables.cpp index 17ef1d67af9..5cd327ff23b 100644 --- a/lib/Target/X86/X86InstrFoldTables.cpp +++ b/lib/Target/X86/X86InstrFoldTables.cpp @@ -65,7 +65,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = { { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE }, { X86::ADD8ri, X86::ADD8mi, 0 }, { X86::ADD8ri8, X86::ADD8mi8, 0 }, + { X86::ADD8ri_DB, X86::ADD8mi, TB_NO_REVERSE }, { X86::ADD8rr, X86::ADD8mr, 0 }, + { X86::ADD8rr_DB, X86::ADD8mr, TB_NO_REVERSE }, { X86::AND16ri, X86::AND16mi, 0 }, { X86::AND16ri8, X86::AND16mi8, 0 }, { X86::AND16rr, X86::AND16mr, 0 }, @@ -1218,6 +1220,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = { { X86::ADD64rr, X86::ADD64rm, 0 }, { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE }, { X86::ADD8rr, X86::ADD8rm, 0 }, + { X86::ADD8rr_DB, X86::ADD8rm, TB_NO_REVERSE }, { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 }, { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 }, { X86::ADDSDrr, X86::ADDSDrm, 0 }, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index acc790c68c8..f17d8812a8a 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -710,11 +710,10 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI, - LiveVariables *LV) const { + LiveVariables *LV, bool Is8BitOp) const { // We handle 8-bit adds and various 16-bit opcodes in the switch below. - bool Is16BitOp = !(MIOpc == X86::ADD8rr || MIOpc == X86::ADD8ri); MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); - assert((!Is16BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( + assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits( *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) && "Unexpected type for LEA transform"); @@ -744,7 +743,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( unsigned Src = MI.getOperand(1).getReg(); bool IsDead = MI.getOperand(0).isDead(); bool IsKill = MI.getOperand(1).isKill(); - unsigned SubReg = Is16BitOp ? X86::sub_16bit : X86::sub_8bit; + unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit; assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization"); BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA); MachineInstr *InsMI = @@ -769,6 +768,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( addRegOffset(MIB, InRegLEA, true, -1); break; case X86::ADD8ri: + case X86::ADD8ri_DB: case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD16ri_DB: @@ -776,6 +776,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm()); break; case X86::ADD8rr: + case X86::ADD8rr_DB: case X86::ADD16rr: case X86::ADD16rr_DB: { unsigned Src2 = MI.getOperand(2).getReg(); @@ -862,6 +863,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *NewMI = nullptr; bool Is64Bit = Subtarget.is64Bit(); + bool Is8BitOp = false; unsigned MIOpc = MI.getOpcode(); switch (MIOpc) { default: return nullptr; @@ -919,7 +921,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = getTruncatedShiftCount(MI, 2); if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; - return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV); + return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); } case X86::INC64r: case X86::INC32r: { @@ -944,7 +946,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } case X86::INC16r: - return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV); + return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); case X86::DEC64r: case X86::DEC32r: { assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!"); @@ -969,7 +971,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } case X86::DEC16r: - return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV); + return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); case X86::ADD64rr: case X86::ADD64rr_DB: case X86::ADD32rr: @@ -1008,9 +1010,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } case X86::ADD8rr: + case X86::ADD8rr_DB: + Is8BitOp = true; + LLVM_FALLTHROUGH; case X86::ADD16rr: case X86::ADD16rr_DB: - return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV); + return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD64ri32_DB: @@ -1044,11 +1049,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } case X86::ADD8ri: + case X86::ADD8ri_DB: + Is8BitOp = true; + LLVM_FALLTHROUGH; case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD16ri_DB: case X86::ADD16ri8_DB: - return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV); + return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); case X86::VMOVDQU8Z128rmk: case X86::VMOVDQU8Z256rmk: case X86::VMOVDQU8Zrmk: diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index d18e93ad39a..f95681b14e2 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -592,7 +592,8 @@ private: MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI, - LiveVariables *LV) const; + LiveVariables *LV, + bool Is8BitOp) const; /// Handles memory folding for special case instructions, for instance those /// requiring custom manipulation of the address. diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index cc2a1a43b67..ce6bdafbc27 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -586,9 +586,11 @@ ReSimplify: // These are pseudo-ops for OR to help with the OR->ADD transformation. We do // this with an ugly goto in case the resultant OR uses EAX and needs the // short form. + case X86::ADD8rr_DB: OutMI.setOpcode(X86::OR8rr); goto ReSimplify; case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify; case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify; case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify; + case X86::ADD8ri_DB: OutMI.setOpcode(X86::OR8ri); goto ReSimplify; case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify; case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify; case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify; diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp index 8c2d0fe6690..e5e80a2339a 100644 --- a/lib/Target/X86/X86MacroFusion.cpp +++ b/lib/Target/X86/X86MacroFusion.cpp @@ -140,8 +140,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, case X86::ADD64rr: case X86::ADD64rr_DB: case X86::ADD8ri: + case X86::ADD8ri_DB: case X86::ADD8rm: case X86::ADD8rr: + case X86::ADD8rr_DB: case X86::SUB16ri: case X86::SUB16ri8: case X86::SUB16rm: diff --git a/test/CodeGen/X86/bitreverse.ll b/test/CodeGen/X86/bitreverse.ll index ed3fdefce7a..5da95c574eb 100644 --- a/test/CodeGen/X86/bitreverse.ll +++ b/test/CodeGen/X86/bitreverse.ll @@ -340,20 +340,20 @@ define i8 @test_bitreverse_i8(i8 %a) { ; ; X64-LABEL: test_bitreverse_i8: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: rolb $4, %dil ; X64-NEXT: movl %edi, %eax -; X64-NEXT: rolb $4, %al -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andb $51, %cl -; X64-NEXT: shlb $2, %cl -; X64-NEXT: andb $-52, %al -; X64-NEXT: shrb $2, %al -; X64-NEXT: orb %cl, %al -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andb $85, %cl -; X64-NEXT: addb %cl, %cl -; X64-NEXT: andb $-86, %al -; X64-NEXT: shrb %al -; X64-NEXT: orb %cl, %al +; X64-NEXT: andb $51, %al +; X64-NEXT: shlb $2, %al +; X64-NEXT: andb $-52, %dil +; X64-NEXT: shrb $2, %dil +; X64-NEXT: orb %al, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $85, %al +; X64-NEXT: addb %al, %al +; X64-NEXT: andb $-86, %dil +; X64-NEXT: shrb %dil +; X64-NEXT: leal (%rdi,%rax), %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) @@ -384,20 +384,20 @@ define i4 @test_bitreverse_i4(i4 %a) { ; ; X64-LABEL: test_bitreverse_i4: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: rolb $4, %dil ; X64-NEXT: movl %edi, %eax -; X64-NEXT: rolb $4, %al -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andb $51, %cl -; X64-NEXT: shlb $2, %cl -; X64-NEXT: andb $-52, %al -; X64-NEXT: shrb $2, %al -; X64-NEXT: orb %cl, %al -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andb $80, %cl -; X64-NEXT: addb %cl, %cl -; X64-NEXT: andb $-96, %al -; X64-NEXT: shrb %al -; X64-NEXT: orb %cl, %al +; X64-NEXT: andb $51, %al +; X64-NEXT: shlb $2, %al +; X64-NEXT: andb $-52, %dil +; X64-NEXT: shrb $2, %dil +; X64-NEXT: orb %al, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb $80, %al +; X64-NEXT: addb %al, %al +; X64-NEXT: andb $-96, %dil +; X64-NEXT: shrb %dil +; X64-NEXT: leal (%rdi,%rax), %eax ; X64-NEXT: shrb $4, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq diff --git a/test/CodeGen/X86/bool-math.ll b/test/CodeGen/X86/bool-math.ll index 3a7193bd631..c0a7a5bd4fb 100644 --- a/test/CodeGen/X86/bool-math.ll +++ b/test/CodeGen/X86/bool-math.ll @@ -47,9 +47,9 @@ define i32 @sub_zext_cmp_mask_wider_result(i8 %x) { define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) { ; X64-LABEL: sub_zext_cmp_mask_narrower_result: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $1, %al -; X64-NEXT: orb $46, %al +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: andb $1, %dil +; X64-NEXT: leal 46(%rdi), %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; diff --git a/test/CodeGen/X86/fshl.ll b/test/CodeGen/X86/fshl.ll index ccf451e0451..0e1bcb2e26d 100644 --- a/test/CodeGen/X86/fshl.ll +++ b/test/CodeGen/X86/fshl.ll @@ -381,10 +381,11 @@ define i8 @const_shift_i8(i8 %x, i8 %y) nounwind { ; ; X64-LABEL: const_shift_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: shrb %sil -; X64-NEXT: shlb $7, %al -; X64-NEXT: orb %sil, %al +; X64-NEXT: shlb $7, %dil +; X64-NEXT: leal (%rdi,%rsi), %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 7) diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll index 99c4a99b6ed..558dc7db42d 100644 --- a/test/CodeGen/X86/select.ll +++ b/test/CodeGen/X86/select.ll @@ -1088,14 +1088,25 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind { } define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) { -; CHECK-LABEL: trunc_select_miscompile: -; CHECK: ## %bb.0: -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: orb $2, %cl -; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: shll %cl, %eax -; CHECK-NEXT: retq +; GENERIC-LABEL: trunc_select_miscompile: +; GENERIC: ## %bb.0: +; GENERIC-NEXT: ## kill: def $esi killed $esi def $rsi +; GENERIC-NEXT: movl %edi, %eax +; GENERIC-NEXT: leal 2(%rsi), %ecx +; GENERIC-NEXT: ## kill: def $cl killed $cl killed $ecx +; GENERIC-NEXT: shll %cl, %eax +; GENERIC-NEXT: retq +; +; ATOM-LABEL: trunc_select_miscompile: +; ATOM: ## %bb.0: +; ATOM-NEXT: ## kill: def $esi killed $esi def $rsi +; ATOM-NEXT: leal 2(%rsi), %ecx +; ATOM-NEXT: movl %edi, %eax +; ATOM-NEXT: ## kill: def $cl killed $cl killed $ecx +; ATOM-NEXT: shll %cl, %eax +; ATOM-NEXT: nop +; ATOM-NEXT: nop +; ATOM-NEXT: retq ; ; ATHLON-LABEL: trunc_select_miscompile: ; ATHLON: ## %bb.0: diff --git a/test/CodeGen/X86/select_const.ll b/test/CodeGen/X86/select_const.ll index 8ab603d396f..8f7989e2bed 100644 --- a/test/CodeGen/X86/select_const.ll +++ b/test/CodeGen/X86/select_const.ll @@ -328,9 +328,9 @@ define i32 @sel_neg1_1_32(i32 %x) { define i8 @select_pow2_diff(i1 zeroext %cond) { ; CHECK-LABEL: select_pow2_diff: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shlb $4, %al -; CHECK-NEXT: orb $3, %al +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: shlb $4, %dil +; CHECK-NEXT: leal 3(%rdi), %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %sel = select i1 %cond, i8 19, i8 3 diff --git a/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll b/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll index ac554781653..8daa0a6e969 100644 --- a/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll +++ b/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -10,19 +10,21 @@ define i8 @out8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: out8_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NOBMI-NEXT: andb $60, %dil -; CHECK-NOBMI-NEXT: andb $-61, %al -; CHECK-NOBMI-NEXT: orb %dil, %al +; CHECK-NOBMI-NEXT: andb $-61, %sil +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out8_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-BMI-NEXT: andb $60, %dil -; CHECK-BMI-NEXT: andb $-61, %al -; CHECK-BMI-NEXT: orb %dil, %al +; CHECK-BMI-NEXT: andb $-61, %sil +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %mx = and i8 %x, 60 diff --git a/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll b/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll index 95de3bb2e03..33b6b66b66d 100644 --- a/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll +++ b/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll @@ -10,19 +10,21 @@ define i8 @out8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: out8_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NOBMI-NEXT: andb $85, %dil -; CHECK-NOBMI-NEXT: andb $-86, %al -; CHECK-NOBMI-NEXT: orb %dil, %al +; CHECK-NOBMI-NEXT: andb $-86, %sil +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out8_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-BMI-NEXT: andb $85, %dil -; CHECK-BMI-NEXT: andb $-86, %al -; CHECK-BMI-NEXT: orb %dil, %al +; CHECK-BMI-NEXT: andb $-86, %sil +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %mx = and i8 %x, 85 diff --git a/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll b/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll index c7579e2aa9d..bbc987d4474 100644 --- a/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll +++ b/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll @@ -10,19 +10,21 @@ define i8 @out8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: out8_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NOBMI-NEXT: andb $15, %dil -; CHECK-NOBMI-NEXT: andb $-16, %al -; CHECK-NOBMI-NEXT: orb %dil, %al +; CHECK-NOBMI-NEXT: andb $-16, %sil +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out8_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-BMI-NEXT: andb $15, %dil -; CHECK-BMI-NEXT: andb $-16, %al -; CHECK-BMI-NEXT: orb %dil, %al +; CHECK-BMI-NEXT: andb $-16, %sil +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %mx = and i8 %x, 15 diff --git a/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll b/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll index 4a63eba04d8..78faa3ca717 100644 --- a/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ b/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -10,19 +10,21 @@ define i8 @out8_constmask(i8 %x, i8 %y) { ; CHECK-NOBMI-LABEL: out8_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NOBMI-NEXT: andb $15, %dil -; CHECK-NOBMI-NEXT: andb $-16, %al -; CHECK-NOBMI-NEXT: orb %dil, %al +; CHECK-NOBMI-NEXT: andb $-16, %sil +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out8_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-BMI-NEXT: andb $15, %dil -; CHECK-BMI-NEXT: andb $-16, %al -; CHECK-BMI-NEXT: orb %dil, %al +; CHECK-BMI-NEXT: andb $-16, %sil +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax ; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BMI-NEXT: retq %mx = and i8 %x, 15 diff --git a/test/CodeGen/X86/vector-bitreverse.ll b/test/CodeGen/X86/vector-bitreverse.ll index df1b5041caf..a564bbc1743 100644 --- a/test/CodeGen/X86/vector-bitreverse.ll +++ b/test/CodeGen/X86/vector-bitreverse.ll @@ -14,39 +14,39 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind { ; SSE-LABEL: test_bitreverse_i8: ; SSE: # %bb.0: +; SSE-NEXT: # kill: def $edi killed $edi def $rdi +; SSE-NEXT: rolb $4, %dil ; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: rolb $4, %al -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: andb $51, %cl -; SSE-NEXT: shlb $2, %cl -; SSE-NEXT: andb $-52, %al -; SSE-NEXT: shrb $2, %al -; SSE-NEXT: orb %cl, %al -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: andb $85, %cl -; SSE-NEXT: addb %cl, %cl -; SSE-NEXT: andb $-86, %al -; SSE-NEXT: shrb %al -; SSE-NEXT: orb %cl, %al +; SSE-NEXT: andb $51, %al +; SSE-NEXT: shlb $2, %al +; SSE-NEXT: andb $-52, %dil +; SSE-NEXT: shrb $2, %dil +; SSE-NEXT: orb %al, %dil +; SSE-NEXT: movl %edi, %eax +; SSE-NEXT: andb $85, %al +; SSE-NEXT: addb %al, %al +; SSE-NEXT: andb $-86, %dil +; SSE-NEXT: shrb %dil +; SSE-NEXT: leal (%rdi,%rax), %eax ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq ; ; AVX-LABEL: test_bitreverse_i8: ; AVX: # %bb.0: +; AVX-NEXT: # kill: def $edi killed $edi def $rdi +; AVX-NEXT: rolb $4, %dil ; AVX-NEXT: movl %edi, %eax -; AVX-NEXT: rolb $4, %al -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: andb $51, %cl -; AVX-NEXT: shlb $2, %cl -; AVX-NEXT: andb $-52, %al -; AVX-NEXT: shrb $2, %al -; AVX-NEXT: orb %cl, %al -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: andb $85, %cl -; AVX-NEXT: addb %cl, %cl -; AVX-NEXT: andb $-86, %al -; AVX-NEXT: shrb %al -; AVX-NEXT: orb %cl, %al +; AVX-NEXT: andb $51, %al +; AVX-NEXT: shlb $2, %al +; AVX-NEXT: andb $-52, %dil +; AVX-NEXT: shrb $2, %dil +; AVX-NEXT: orb %al, %dil +; AVX-NEXT: movl %edi, %eax +; AVX-NEXT: andb $85, %al +; AVX-NEXT: addb %al, %al +; AVX-NEXT: andb $-86, %dil +; AVX-NEXT: shrb %dil +; AVX-NEXT: leal (%rdi,%rax), %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; -- 2.50.1