// non-casted variable when we check for possible users.
switch (ArithOp.getOpcode()) {
case ISD::ADD:
- // Due to an isel shortcoming, be conservative if this add is likely to be
- // selected as part of a load-modify-store instruction. When the root node
- // in a match is a store, isel doesn't know how to remap non-chain non-flag
- // uses of other nodes in the match, such as the ADD in this case. This
- // leads to the ADD being left around and reselected, with the result being
- // two adds in the output. Alas, even if none our users are stores, that
- // doesn't prove we're O.K. Ergo, if we have any parents that aren't
- // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require
- // climbing the DAG back to the root, and it doesn't seem to be worth the
- // effort.
+ // We only want to rewrite this as a target-specific node with attached
+ // flags if there is a reasonable chance of either using that to do custom
+ // instructions selection that can fold some of the memory operands, or if
+ // only the flags are used. If there are other uses, leave the node alone
+ // and emit a test instruction.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() != ISD::CopyToReg &&
case ISD::SUB:
case ISD::OR:
case ISD::XOR:
- // Due to the ISEL shortcoming noted above, be conservative if this op is
- // likely to be selected as part of a load-modify-store instruction.
+ // Similar to ISD::ADD above, check if the uses will preclude useful
+ // lowering of the target-specific node.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() == ISD::STORE)
+ if (UI->getOpcode() != ISD::CopyToReg &&
+ UI->getOpcode() != ISD::SETCC &&
+ UI->getOpcode() != ISD::STORE)
goto default_case;
// Otherwise use a regular EFLAGS-setting instruction.
; LINUX-NEXT: .p2align 4, 0x90
; LINUX-NEXT: .LBB0_1: # %atomicrmw.start
; LINUX-NEXT: # =>This Inner Loop Header: Depth=1
-; LINUX-NEXT: xorl %ecx, %ecx
; LINUX-NEXT: cmpl %eax, %esi
+; LINUX-NEXT: movl $0, %ecx
; LINUX-NEXT: sbbl %edx, %ecx
-; LINUX-NEXT: setl %cl
-; LINUX-NEXT: andb $1, %cl
-; LINUX-NEXT: movl %eax, %ebx
-; LINUX-NEXT: jne .LBB0_3
-; LINUX-NEXT: # BB#2: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB0_1 Depth=1
+; LINUX-NEXT: movl $0, %ecx
+; LINUX-NEXT: cmovll %edx, %ecx
; LINUX-NEXT: movl $5, %ebx
-; LINUX-NEXT: .LBB0_3: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB0_1 Depth=1
-; LINUX-NEXT: testb %cl, %cl
-; LINUX-NEXT: movl %edx, %ecx
-; LINUX-NEXT: jne .LBB0_5
-; LINUX-NEXT: # BB#4: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB0_1 Depth=1
-; LINUX-NEXT: xorl %ecx, %ecx
-; LINUX-NEXT: .LBB0_5: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB0_1 Depth=1
+; LINUX-NEXT: cmovll %eax, %ebx
; LINUX-NEXT: lock cmpxchg8b sc64
; LINUX-NEXT: jne .LBB0_1
-; LINUX-NEXT: # BB#6: # %atomicrmw.end
+; LINUX-NEXT: # BB#2: # %atomicrmw.end
; LINUX-NEXT: popl %esi
; LINUX-NEXT: popl %ebx
; LINUX-NEXT: retl
; PIC-NEXT: .p2align 4, 0x90
; PIC-NEXT: LBB0_1: ## %atomicrmw.start
; PIC-NEXT: ## =>This Inner Loop Header: Depth=1
-; PIC-NEXT: xorl %ecx, %ecx
; PIC-NEXT: cmpl %eax, %edi
+; PIC-NEXT: movl $0, %ecx
; PIC-NEXT: sbbl %edx, %ecx
-; PIC-NEXT: setl %cl
-; PIC-NEXT: andb $1, %cl
-; PIC-NEXT: movl %eax, %ebx
-; PIC-NEXT: jne LBB0_3
-; PIC-NEXT: ## BB#2: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB0_1 Depth=1
+; PIC-NEXT: movl $0, %ecx
+; PIC-NEXT: cmovll %edx, %ecx
; PIC-NEXT: movl $5, %ebx
-; PIC-NEXT: LBB0_3: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB0_1 Depth=1
-; PIC-NEXT: testb %cl, %cl
-; PIC-NEXT: movl %edx, %ecx
-; PIC-NEXT: jne LBB0_5
-; PIC-NEXT: ## BB#4: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB0_1 Depth=1
-; PIC-NEXT: xorl %ecx, %ecx
-; PIC-NEXT: LBB0_5: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB0_1 Depth=1
+; PIC-NEXT: cmovll %eax, %ebx
; PIC-NEXT: lock cmpxchg8b (%esi)
; PIC-NEXT: jne LBB0_1
-; PIC-NEXT: ## BB#6: ## %atomicrmw.end
+; PIC-NEXT: ## BB#2: ## %atomicrmw.end
; PIC-NEXT: popl %esi
; PIC-NEXT: popl %edi
; PIC-NEXT: popl %ebx
; LINUX-NEXT: cmpl $7, %eax
; LINUX-NEXT: movl %edx, %ecx
; LINUX-NEXT: sbbl $0, %ecx
-; LINUX-NEXT: setl %cl
-; LINUX-NEXT: andb $1, %cl
-; LINUX-NEXT: movl %eax, %ebx
-; LINUX-NEXT: jne .LBB1_3
-; LINUX-NEXT: # BB#2: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB1_1 Depth=1
+; LINUX-NEXT: movl $0, %ecx
+; LINUX-NEXT: cmovll %edx, %ecx
; LINUX-NEXT: movl $6, %ebx
-; LINUX-NEXT: .LBB1_3: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB1_1 Depth=1
-; LINUX-NEXT: testb %cl, %cl
-; LINUX-NEXT: movl %edx, %ecx
-; LINUX-NEXT: jne .LBB1_5
-; LINUX-NEXT: # BB#4: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB1_1 Depth=1
-; LINUX-NEXT: xorl %ecx, %ecx
-; LINUX-NEXT: .LBB1_5: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB1_1 Depth=1
+; LINUX-NEXT: cmovll %eax, %ebx
; LINUX-NEXT: lock cmpxchg8b sc64
; LINUX-NEXT: jne .LBB1_1
-; LINUX-NEXT: # BB#6: # %atomicrmw.end
+; LINUX-NEXT: # BB#2: # %atomicrmw.end
; LINUX-NEXT: popl %ebx
; LINUX-NEXT: retl
;
; PIC-NEXT: cmpl $7, %eax
; PIC-NEXT: movl %edx, %ecx
; PIC-NEXT: sbbl $0, %ecx
-; PIC-NEXT: setl %cl
-; PIC-NEXT: andb $1, %cl
-; PIC-NEXT: movl %eax, %ebx
-; PIC-NEXT: jne LBB1_3
-; PIC-NEXT: ## BB#2: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB1_1 Depth=1
+; PIC-NEXT: movl $0, %ecx
+; PIC-NEXT: cmovll %edx, %ecx
; PIC-NEXT: movl $6, %ebx
-; PIC-NEXT: LBB1_3: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB1_1 Depth=1
-; PIC-NEXT: testb %cl, %cl
-; PIC-NEXT: movl %edx, %ecx
-; PIC-NEXT: jne LBB1_5
-; PIC-NEXT: ## BB#4: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB1_1 Depth=1
-; PIC-NEXT: xorl %ecx, %ecx
-; PIC-NEXT: LBB1_5: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB1_1 Depth=1
+; PIC-NEXT: cmovll %eax, %ebx
; PIC-NEXT: lock cmpxchg8b (%esi)
; PIC-NEXT: jne LBB1_1
-; PIC-NEXT: ## BB#6: ## %atomicrmw.end
+; PIC-NEXT: ## BB#2: ## %atomicrmw.end
; PIC-NEXT: popl %esi
; PIC-NEXT: popl %ebx
; PIC-NEXT: retl
; LINUX-NEXT: .p2align 4, 0x90
; LINUX-NEXT: .LBB2_1: # %atomicrmw.start
; LINUX-NEXT: # =>This Inner Loop Header: Depth=1
-; LINUX-NEXT: xorl %ecx, %ecx
; LINUX-NEXT: cmpl %eax, %esi
+; LINUX-NEXT: movl $0, %ecx
; LINUX-NEXT: sbbl %edx, %ecx
-; LINUX-NEXT: setb %cl
-; LINUX-NEXT: andb $1, %cl
-; LINUX-NEXT: movl %eax, %ebx
-; LINUX-NEXT: jne .LBB2_3
-; LINUX-NEXT: # BB#2: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB2_1 Depth=1
+; LINUX-NEXT: movl $0, %ecx
+; LINUX-NEXT: cmovbl %edx, %ecx
; LINUX-NEXT: movl $7, %ebx
-; LINUX-NEXT: .LBB2_3: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB2_1 Depth=1
-; LINUX-NEXT: testb %cl, %cl
-; LINUX-NEXT: movl %edx, %ecx
-; LINUX-NEXT: jne .LBB2_5
-; LINUX-NEXT: # BB#4: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB2_1 Depth=1
-; LINUX-NEXT: xorl %ecx, %ecx
-; LINUX-NEXT: .LBB2_5: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB2_1 Depth=1
+; LINUX-NEXT: cmovbl %eax, %ebx
; LINUX-NEXT: lock cmpxchg8b sc64
; LINUX-NEXT: jne .LBB2_1
-; LINUX-NEXT: # BB#6: # %atomicrmw.end
+; LINUX-NEXT: # BB#2: # %atomicrmw.end
; LINUX-NEXT: popl %esi
; LINUX-NEXT: popl %ebx
; LINUX-NEXT: retl
; PIC-NEXT: .p2align 4, 0x90
; PIC-NEXT: LBB2_1: ## %atomicrmw.start
; PIC-NEXT: ## =>This Inner Loop Header: Depth=1
-; PIC-NEXT: xorl %ecx, %ecx
; PIC-NEXT: cmpl %eax, %edi
+; PIC-NEXT: movl $0, %ecx
; PIC-NEXT: sbbl %edx, %ecx
-; PIC-NEXT: setb %cl
-; PIC-NEXT: andb $1, %cl
-; PIC-NEXT: movl %eax, %ebx
-; PIC-NEXT: jne LBB2_3
-; PIC-NEXT: ## BB#2: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB2_1 Depth=1
+; PIC-NEXT: movl $0, %ecx
+; PIC-NEXT: cmovbl %edx, %ecx
; PIC-NEXT: movl $7, %ebx
-; PIC-NEXT: LBB2_3: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB2_1 Depth=1
-; PIC-NEXT: testb %cl, %cl
-; PIC-NEXT: movl %edx, %ecx
-; PIC-NEXT: jne LBB2_5
-; PIC-NEXT: ## BB#4: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB2_1 Depth=1
-; PIC-NEXT: xorl %ecx, %ecx
-; PIC-NEXT: LBB2_5: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB2_1 Depth=1
+; PIC-NEXT: cmovbl %eax, %ebx
; PIC-NEXT: lock cmpxchg8b (%esi)
; PIC-NEXT: jne LBB2_1
-; PIC-NEXT: ## BB#6: ## %atomicrmw.end
+; PIC-NEXT: ## BB#2: ## %atomicrmw.end
; PIC-NEXT: popl %esi
; PIC-NEXT: popl %edi
; PIC-NEXT: popl %ebx
; LINUX-NEXT: cmpl $9, %eax
; LINUX-NEXT: movl %edx, %ecx
; LINUX-NEXT: sbbl $0, %ecx
-; LINUX-NEXT: setb %cl
-; LINUX-NEXT: andb $1, %cl
-; LINUX-NEXT: movl %eax, %ebx
-; LINUX-NEXT: jne .LBB3_3
-; LINUX-NEXT: # BB#2: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB3_1 Depth=1
+; LINUX-NEXT: movl $0, %ecx
+; LINUX-NEXT: cmovbl %edx, %ecx
; LINUX-NEXT: movl $8, %ebx
-; LINUX-NEXT: .LBB3_3: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB3_1 Depth=1
-; LINUX-NEXT: testb %cl, %cl
-; LINUX-NEXT: movl %edx, %ecx
-; LINUX-NEXT: jne .LBB3_5
-; LINUX-NEXT: # BB#4: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB3_1 Depth=1
-; LINUX-NEXT: xorl %ecx, %ecx
-; LINUX-NEXT: .LBB3_5: # %atomicrmw.start
-; LINUX-NEXT: # in Loop: Header=BB3_1 Depth=1
+; LINUX-NEXT: cmovbl %eax, %ebx
; LINUX-NEXT: lock cmpxchg8b sc64
; LINUX-NEXT: jne .LBB3_1
-; LINUX-NEXT: # BB#6: # %atomicrmw.end
+; LINUX-NEXT: # BB#2: # %atomicrmw.end
; LINUX-NEXT: popl %ebx
; LINUX-NEXT: retl
;
; PIC-NEXT: cmpl $9, %eax
; PIC-NEXT: movl %edx, %ecx
; PIC-NEXT: sbbl $0, %ecx
-; PIC-NEXT: setb %cl
-; PIC-NEXT: andb $1, %cl
-; PIC-NEXT: movl %eax, %ebx
-; PIC-NEXT: jne LBB3_3
-; PIC-NEXT: ## BB#2: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB3_1 Depth=1
+; PIC-NEXT: movl $0, %ecx
+; PIC-NEXT: cmovbl %edx, %ecx
; PIC-NEXT: movl $8, %ebx
-; PIC-NEXT: LBB3_3: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB3_1 Depth=1
-; PIC-NEXT: testb %cl, %cl
-; PIC-NEXT: movl %edx, %ecx
-; PIC-NEXT: jne LBB3_5
-; PIC-NEXT: ## BB#4: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB3_1 Depth=1
-; PIC-NEXT: xorl %ecx, %ecx
-; PIC-NEXT: LBB3_5: ## %atomicrmw.start
-; PIC-NEXT: ## in Loop: Header=BB3_1 Depth=1
+; PIC-NEXT: cmovbl %eax, %ebx
; PIC-NEXT: lock cmpxchg8b (%esi)
; PIC-NEXT: jne LBB3_1
-; PIC-NEXT: ## BB#6: ## %atomicrmw.end
+; PIC-NEXT: ## BB#2: ## %atomicrmw.end
; PIC-NEXT: popl %esi
; PIC-NEXT: popl %ebx
; PIC-NEXT: retl
; CHECK-NEXT: cmpq %rax, %rsi
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: sbbq %rdx, %rcx
-; CHECK-NEXT: setge %cl
-; CHECK-NEXT: andb $1, %cl
-; CHECK-NEXT: movq %rax, %rbx
-; CHECK-NEXT: jne LBB5_3
-; CHECK-NEXT: ## BB#2: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1
-; CHECK-NEXT: movq %rsi, %rbx
-; CHECK-NEXT: LBB5_3: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1
-; CHECK-NEXT: testb %cl, %cl
-; CHECK-NEXT: movq %rdx, %rcx
-; CHECK-NEXT: jne LBB5_5
-; CHECK-NEXT: ## BB#4: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1
; CHECK-NEXT: movq %r8, %rcx
-; CHECK-NEXT: LBB5_5: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB5_1 Depth=1
+; CHECK-NEXT: cmovgeq %rdx, %rcx
+; CHECK-NEXT: movq %rsi, %rbx
+; CHECK-NEXT: cmovgeq %rax, %rbx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB5_1
-; CHECK-NEXT: ## BB#6: ## %atomicrmw.end
+; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: cmpq %rsi, %rax
; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: sbbq %r8, %rcx
-; CHECK-NEXT: setge %cl
-; CHECK-NEXT: andb $1, %cl
-; CHECK-NEXT: movq %rax, %rbx
-; CHECK-NEXT: jne LBB6_3
-; CHECK-NEXT: ## BB#2: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1
-; CHECK-NEXT: movq %rsi, %rbx
-; CHECK-NEXT: LBB6_3: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1
-; CHECK-NEXT: testb %cl, %cl
-; CHECK-NEXT: movq %rdx, %rcx
-; CHECK-NEXT: jne LBB6_5
-; CHECK-NEXT: ## BB#4: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1
; CHECK-NEXT: movq %r8, %rcx
-; CHECK-NEXT: LBB6_5: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB6_1 Depth=1
+; CHECK-NEXT: cmovgeq %rdx, %rcx
+; CHECK-NEXT: movq %rsi, %rbx
+; CHECK-NEXT: cmovgeq %rax, %rbx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB6_1
-; CHECK-NEXT: ## BB#6: ## %atomicrmw.end
+; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: cmpq %rax, %rsi
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: sbbq %rdx, %rcx
-; CHECK-NEXT: setae %cl
-; CHECK-NEXT: andb $1, %cl
-; CHECK-NEXT: movq %rax, %rbx
-; CHECK-NEXT: jne LBB7_3
-; CHECK-NEXT: ## BB#2: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1
-; CHECK-NEXT: movq %rsi, %rbx
-; CHECK-NEXT: LBB7_3: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1
-; CHECK-NEXT: testb %cl, %cl
-; CHECK-NEXT: movq %rdx, %rcx
-; CHECK-NEXT: jne LBB7_5
-; CHECK-NEXT: ## BB#4: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1
; CHECK-NEXT: movq %r8, %rcx
-; CHECK-NEXT: LBB7_5: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB7_1 Depth=1
+; CHECK-NEXT: cmovaeq %rdx, %rcx
+; CHECK-NEXT: movq %rsi, %rbx
+; CHECK-NEXT: cmovaeq %rax, %rbx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB7_1
-; CHECK-NEXT: ## BB#6: ## %atomicrmw.end
+; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: cmpq %rax, %rsi
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: sbbq %rdx, %rcx
-; CHECK-NEXT: setb %cl
-; CHECK-NEXT: andb $1, %cl
-; CHECK-NEXT: movq %rax, %rbx
-; CHECK-NEXT: jne LBB8_3
-; CHECK-NEXT: ## BB#2: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1
-; CHECK-NEXT: movq %rsi, %rbx
-; CHECK-NEXT: LBB8_3: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1
-; CHECK-NEXT: testb %cl, %cl
-; CHECK-NEXT: movq %rdx, %rcx
-; CHECK-NEXT: jne LBB8_5
-; CHECK-NEXT: ## BB#4: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1
; CHECK-NEXT: movq %r8, %rcx
-; CHECK-NEXT: LBB8_5: ## %atomicrmw.start
-; CHECK-NEXT: ## in Loop: Header=BB8_1 Depth=1
+; CHECK-NEXT: cmovbq %rdx, %rcx
+; CHECK-NEXT: movq %rsi, %rbx
+; CHECK-NEXT: cmovbq %rax, %rbx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB8_1
-; CHECK-NEXT: ## BB#6: ## %atomicrmw.end
+; CHECK-NEXT: ## BB#2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, {{.*}}(%rip)
; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip)
; CHECK-NEXT: popq %rbx
define i32 @smin(i32 %x) {
; CHECK-LABEL: smin:
; CHECK: # BB#0:
-; CHECK-NEXT: xorl $-1, %edi
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: movl $-1, %eax
; CHECK-NEXT: cmovsl %edi, %eax
; CHECK-NEXT: retq
; CHECK-LABEL: test20:
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; CHECK-NEXT: andl $16777215, %edi ## encoding: [0x81,0xe7,0xff,0xff,0xff,0x00]
+; CHECK-NEXT: testl $16777215, %edi ## encoding: [0xf7,0xc7,0xff,0xff,0xff,0x00]
; CHECK-NEXT: ## imm = 0xFFFFFF
; CHECK-NEXT: setne %al ## encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: movzbl %sil, %ecx ## encoding: [0x40,0x0f,0xb6,0xce]
; CHECK-NEXT: addl %eax, %ecx ## encoding: [0x01,0xc1]
; CHECK-NEXT: setne (%rdx) ## encoding: [0x0f,0x95,0x02]
-; CHECK-NEXT: testl %edi, %edi ## encoding: [0x85,0xff]
+; CHECK-NEXT: testl $16777215, %edi ## encoding: [0xf7,0xc7,0xff,0xff,0xff,0x00]
+; CHECK-NEXT: ## imm = 0xFFFFFF
; CHECK-NEXT: setne {{.*}}(%rip) ## encoding: [0x0f,0x95,0x05,A,A,A,A]
; CHECK-NEXT: ## fixup A - offset: 3, value: _d-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq ## encoding: [0xc3]
; MCU-LABEL: test5:
; MCU: # BB#0:
; MCU-NEXT: pushl %esi
-; MCU-NEXT: andb $1, %al
+; MCU-NEXT: movl {{[0-9]+}}(%esp), %esi
+; MCU-NEXT: testb $1, %al
; MCU-NEXT: jne .LBB4_2
; MCU-NEXT: # BB#1:
+; MCU-NEXT: movw {{[0-9]+}}(%esp), %cx
; MCU-NEXT: movw {{[0-9]+}}(%esp), %dx
; MCU-NEXT: .LBB4_2:
-; MCU-NEXT: movl {{[0-9]+}}(%esp), %esi
-; MCU-NEXT: testb %al, %al
-; MCU-NEXT: jne .LBB4_4
-; MCU-NEXT: # BB#3:
-; MCU-NEXT: movw {{[0-9]+}}(%esp), %cx
-; MCU-NEXT: .LBB4_4:
-; MCU-NEXT: movw %dx, (%esi)
; MCU-NEXT: movw %cx, 2(%esi)
+; MCU-NEXT: movw %dx, (%esi)
; MCU-NEXT: popl %esi
; MCU-NEXT: retl
%x = select i1 %c, <2 x i16> %a, <2 x i16> %b
define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
; GENERIC-LABEL: test8:
; GENERIC: ## BB#0:
-; GENERIC-NEXT: andb $1, %dil
+; GENERIC-NEXT: testb $1, %dil
; GENERIC-NEXT: jne LBB7_1
; GENERIC-NEXT: ## BB#2:
-; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; GENERIC-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; GENERIC-NEXT: jmp LBB7_3
-; GENERIC-NEXT: LBB7_1:
; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; GENERIC-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; GENERIC-NEXT: LBB7_3:
-; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; GENERIC-NEXT: testb %dil, %dil
-; GENERIC-NEXT: jne LBB7_4
-; GENERIC-NEXT: ## BB#5:
+; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; GENERIC-NEXT: jmp LBB7_3
+; GENERIC-NEXT: LBB7_1:
+; GENERIC-NEXT: movd %r9d, %xmm0
+; GENERIC-NEXT: movd %r8d, %xmm1
+; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; GENERIC-NEXT: movd %ecx, %xmm2
+; GENERIC-NEXT: movd %edx, %xmm0
+; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; GENERIC-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; GENERIC-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; GENERIC-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; GENERIC-NEXT: jmp LBB7_6
-; GENERIC-NEXT: LBB7_4:
-; GENERIC-NEXT: movd %r9d, %xmm1
-; GENERIC-NEXT: movd %r8d, %xmm2
-; GENERIC-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; GENERIC-NEXT: movd %ecx, %xmm3
-; GENERIC-NEXT: movd %edx, %xmm1
-; GENERIC-NEXT: LBB7_6:
-; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; GENERIC-NEXT: LBB7_3:
+; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; GENERIC-NEXT: pcmpeqd %xmm2, %xmm2
-; GENERIC-NEXT: paddd %xmm2, %xmm1
; GENERIC-NEXT: paddd %xmm2, %xmm0
-; GENERIC-NEXT: movq %xmm0, 16(%rsi)
-; GENERIC-NEXT: movdqa %xmm1, (%rsi)
+; GENERIC-NEXT: paddd %xmm2, %xmm1
+; GENERIC-NEXT: movq %xmm1, 16(%rsi)
+; GENERIC-NEXT: movdqa %xmm0, (%rsi)
; GENERIC-NEXT: retq
; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test8:
; ATOM: ## BB#0:
-; ATOM-NEXT: andb $1, %dil
+; ATOM-NEXT: testb $1, %dil
; ATOM-NEXT: jne LBB7_1
; ATOM-NEXT: ## BB#2:
-; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; ATOM-NEXT: jmp LBB7_3
-; ATOM-NEXT: LBB7_1:
-; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; ATOM-NEXT: LBB7_3:
-; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; ATOM-NEXT: testb %dil, %dil
-; ATOM-NEXT: jne LBB7_4
-; ATOM-NEXT: ## BB#5:
; ATOM-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; ATOM-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; ATOM-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; ATOM-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
-; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; ATOM-NEXT: jmp LBB7_6
-; ATOM-NEXT: LBB7_4:
-; ATOM-NEXT: movd %r9d, %xmm1
+; ATOM-NEXT: jmp LBB7_3
+; ATOM-NEXT: LBB7_1:
+; ATOM-NEXT: movd %r9d, %xmm0
; ATOM-NEXT: movd %r8d, %xmm2
-; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; ATOM-NEXT: movd %ecx, %xmm3
-; ATOM-NEXT: movd %edx, %xmm1
-; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; ATOM-NEXT: LBB7_6:
+; ATOM-NEXT: movd %edx, %xmm0
+; ATOM-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; ATOM-NEXT: LBB7_3:
+; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; ATOM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; ATOM-NEXT: pcmpeqd %xmm2, %xmm2
+; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
; ATOM-NEXT: paddd %xmm2, %xmm0
; ATOM-NEXT: paddd %xmm2, %xmm1
-; ATOM-NEXT: movq %xmm0, 16(%rsi)
-; ATOM-NEXT: movdqa %xmm1, (%rsi)
+; ATOM-NEXT: movq %xmm1, 16(%rsi)
+; ATOM-NEXT: movdqa %xmm0, (%rsi)
; ATOM-NEXT: retq
; ATOM-NEXT: ## -- End function
;
; MCU-NEXT: pushl %ebx
; MCU-NEXT: pushl %edi
; MCU-NEXT: pushl %esi
-; MCU-NEXT: andb $1, %al
+; MCU-NEXT: testb $1, %al
; MCU-NEXT: jne .LBB7_1
; MCU-NEXT: # BB#2:
-; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; MCU-NEXT: movl (%ecx), %ecx
+; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax
+; MCU-NEXT: movl (%eax), %eax
; MCU-NEXT: je .LBB7_5
; MCU-NEXT: .LBB7_4:
-; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi
-; MCU-NEXT: movl (%esi), %esi
+; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; MCU-NEXT: movl (%ecx), %ecx
; MCU-NEXT: je .LBB7_8
; MCU-NEXT: .LBB7_7:
-; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi
-; MCU-NEXT: movl (%edi), %edi
+; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi
+; MCU-NEXT: movl (%esi), %esi
; MCU-NEXT: je .LBB7_11
; MCU-NEXT: .LBB7_10:
-; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx
-; MCU-NEXT: movl (%ebx), %ebx
+; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi
+; MCU-NEXT: movl (%edi), %edi
; MCU-NEXT: je .LBB7_14
; MCU-NEXT: .LBB7_13:
+; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx
+; MCU-NEXT: movl (%ebx), %ebx
+; MCU-NEXT: je .LBB7_17
+; MCU-NEXT: .LBB7_16:
; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp
-; MCU-NEXT: jmp .LBB7_15
+; MCU-NEXT: jmp .LBB7_18
; MCU-NEXT: .LBB7_1:
-; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; MCU-NEXT: movl (%ecx), %ecx
+; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax
+; MCU-NEXT: movl (%eax), %eax
; MCU-NEXT: jne .LBB7_4
; MCU-NEXT: .LBB7_5:
-; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi
-; MCU-NEXT: movl (%esi), %esi
+; MCU-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; MCU-NEXT: movl (%ecx), %ecx
; MCU-NEXT: jne .LBB7_7
; MCU-NEXT: .LBB7_8:
-; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi
-; MCU-NEXT: movl (%edi), %edi
+; MCU-NEXT: leal {{[0-9]+}}(%esp), %esi
+; MCU-NEXT: movl (%esi), %esi
; MCU-NEXT: jne .LBB7_10
; MCU-NEXT: .LBB7_11:
-; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx
-; MCU-NEXT: movl (%ebx), %ebx
+; MCU-NEXT: leal {{[0-9]+}}(%esp), %edi
+; MCU-NEXT: movl (%edi), %edi
; MCU-NEXT: jne .LBB7_13
; MCU-NEXT: .LBB7_14:
-; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp
-; MCU-NEXT: .LBB7_15:
-; MCU-NEXT: movl (%ebp), %ebp
-; MCU-NEXT: testb %al, %al
+; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebx
+; MCU-NEXT: movl (%ebx), %ebx
; MCU-NEXT: jne .LBB7_16
-; MCU-NEXT: # BB#17:
-; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax
-; MCU-NEXT: jmp .LBB7_18
-; MCU-NEXT: .LBB7_16:
-; MCU-NEXT: leal {{[0-9]+}}(%esp), %eax
+; MCU-NEXT: .LBB7_17:
+; MCU-NEXT: leal {{[0-9]+}}(%esp), %ebp
; MCU-NEXT: .LBB7_18:
-; MCU-NEXT: movl (%eax), %eax
-; MCU-NEXT: decl %eax
+; MCU-NEXT: movl (%ebp), %ebp
; MCU-NEXT: decl %ebp
; MCU-NEXT: decl %ebx
; MCU-NEXT: decl %edi
; MCU-NEXT: decl %esi
; MCU-NEXT: decl %ecx
-; MCU-NEXT: movl %ecx, 20(%edx)
-; MCU-NEXT: movl %esi, 16(%edx)
-; MCU-NEXT: movl %edi, 12(%edx)
-; MCU-NEXT: movl %ebx, 8(%edx)
-; MCU-NEXT: movl %ebp, 4(%edx)
-; MCU-NEXT: movl %eax, (%edx)
+; MCU-NEXT: decl %eax
+; MCU-NEXT: movl %eax, 20(%edx)
+; MCU-NEXT: movl %ecx, 16(%edx)
+; MCU-NEXT: movl %esi, 12(%edx)
+; MCU-NEXT: movl %edi, 8(%edx)
+; MCU-NEXT: movl %ebx, 4(%edx)
+; MCU-NEXT: movl %ebp, (%edx)
; MCU-NEXT: popl %esi
; MCU-NEXT: popl %edi
; MCU-NEXT: popl %ebx
define i32 @test_x86_tbm_bextri_u32_z(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_bextri_u32_z:
; CHECK: # BB#0:
-; CHECK-NEXT: shrl $4, %edi
-; CHECK-NEXT: andl $4095, %edi # imm = 0xFFF
-; CHECK-NEXT: cmovel %esi, %edi
-; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: bextr $3076, %edi, %eax # imm = 0xC04
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: cmovel %esi, %eax
; CHECK-NEXT: retq
%t0 = lshr i32 %a, 4
%t1 = and i32 %t0, 4095
define i64 @test_x86_tbm_bextri_u64_z(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_bextri_u64_z:
; CHECK: # BB#0:
-; CHECK-NEXT: shrl $4, %edi
-; CHECK-NEXT: andl $4095, %edi # imm = 0xFFF
-; CHECK-NEXT: cmoveq %rsi, %rdi
-; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: bextr $3076, %edi, %eax # imm = 0xC04
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: cmovneq %rax, %rsi
+; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: retq
%t0 = lshr i64 %a, 4
%t1 = and i64 %t0, 4095
define i32 @test_x86_tbm_blcfill_u32_z(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blcfill_u32_z:
; CHECK: # BB#0:
-; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; CHECK-NEXT: leal 1(%rdi), %eax
-; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: blcfill %edi, %eax
+; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: cmovel %esi, %eax
; CHECK-NEXT: retq
%t0 = add i32 %a, 1
define i64 @test_x86_tbm_blcfill_u64_z(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blcfill_u64_z:
; CHECK: # BB#0:
-; CHECK-NEXT: leaq 1(%rdi), %rax
-; CHECK-NEXT: andq %rdi, %rax
+; CHECK-NEXT: blcfill %rdi, %rax
+; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: cmoveq %rsi, %rax
; CHECK-NEXT: retq
%t0 = add i64 %a, 1
define i32 @test_x86_tbm_blci_u32_z(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blci_u32_z:
; CHECK: # BB#0:
-; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; CHECK-NEXT: leal 1(%rdi), %eax
-; CHECK-NEXT: notl %eax
-; CHECK-NEXT: orl %edi, %eax
+; CHECK-NEXT: blci %edi, %eax
+; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: cmovel %esi, %eax
; CHECK-NEXT: retq
%t0 = add i32 1, %a
define i64 @test_x86_tbm_blci_u64_z(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blci_u64_z:
; CHECK: # BB#0:
-; CHECK-NEXT: leaq 1(%rdi), %rax
-; CHECK-NEXT: notq %rax
-; CHECK-NEXT: orq %rdi, %rax
+; CHECK-NEXT: blci %rdi, %rax
+; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: cmoveq %rsi, %rax
; CHECK-NEXT: retq
%t0 = add i64 1, %a
define i32 @test_x86_tbm_blcic_u32_z(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blcic_u32_z:
; CHECK: # BB#0:
-; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; CHECK-NEXT: leal 1(%rdi), %eax
-; CHECK-NEXT: movl %edi, %ecx
-; CHECK-NEXT: notl %ecx
-; CHECK-NEXT: andl %ecx, %eax
+; CHECK-NEXT: blcic %edi, %eax
+; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: cmovel %esi, %eax
; CHECK-NEXT: retq
%t0 = xor i32 %a, -1
define i64 @test_x86_tbm_blcic_u64_z(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blcic_u64_z:
; CHECK: # BB#0:
-; CHECK-NEXT: leaq 1(%rdi), %rax
-; CHECK-NEXT: notq %rdi
-; CHECK-NEXT: andq %rdi, %rax
+; CHECK-NEXT: blcic %rdi, %rax
+; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: cmoveq %rsi, %rax
; CHECK-NEXT: retq
%t0 = xor i64 %a, -1
define i32 @test_x86_tbm_blcmsk_u32_z(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blcmsk_u32_z:
; CHECK: # BB#0:
-; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; CHECK-NEXT: leal 1(%rdi), %eax
-; CHECK-NEXT: xorl %edi, %eax
+; CHECK-NEXT: blcmsk %edi, %eax
+; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: cmovel %esi, %eax
; CHECK-NEXT: retq
%t0 = add i32 %a, 1
define i64 @test_x86_tbm_blcmsk_u64_z(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blcmsk_u64_z:
; CHECK: # BB#0:
-; CHECK-NEXT: leaq 1(%rdi), %rax
-; CHECK-NEXT: xorq %rdi, %rax
+; CHECK-NEXT: blcmsk %rdi, %rax
+; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: cmoveq %rsi, %rax
; CHECK-NEXT: retq
%t0 = add i64 %a, 1
define i32 @test_x86_tbm_blcs_u32_z(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blcs_u32_z:
; CHECK: # BB#0:
-; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; CHECK-NEXT: leal 1(%rdi), %eax
-; CHECK-NEXT: orl %edi, %eax
+; CHECK-NEXT: blcs %edi, %eax
+; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: cmovel %esi, %eax
; CHECK-NEXT: retq
%t0 = add i32 %a, 1
define i64 @test_x86_tbm_blcs_u64_z(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blcs_u64_z:
; CHECK: # BB#0:
-; CHECK-NEXT: leaq 1(%rdi), %rax
-; CHECK-NEXT: orq %rdi, %rax
+; CHECK-NEXT: blcs %rdi, %rax
+; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: cmoveq %rsi, %rax
; CHECK-NEXT: retq
%t0 = add i64 %a, 1
define i32 @test_x86_tbm_blsfill_u32_z(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blsfill_u32_z:
; CHECK: # BB#0:
-; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; CHECK-NEXT: leal -1(%rdi), %eax
-; CHECK-NEXT: orl %edi, %eax
+; CHECK-NEXT: blsfill %edi, %eax
+; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: cmovel %esi, %eax
; CHECK-NEXT: retq
%t0 = add i32 %a, -1
define i64 @test_x86_tbm_blsfill_u64_z(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blsfill_u64_z:
; CHECK: # BB#0:
-; CHECK-NEXT: leaq -1(%rdi), %rax
-; CHECK-NEXT: orq %rdi, %rax
+; CHECK-NEXT: blsfill %rdi, %rax
+; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: cmoveq %rsi, %rax
; CHECK-NEXT: retq
%t0 = add i64 %a, -1
define i32 @test_x86_tbm_blsic_u32_z(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blsic_u32_z:
; CHECK: # BB#0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: notl %eax
-; CHECK-NEXT: decl %edi
-; CHECK-NEXT: orl %eax, %edi
-; CHECK-NEXT: cmovel %esi, %edi
-; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: blsic %edi, %eax
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: cmovel %esi, %eax
; CHECK-NEXT: retq
%t0 = xor i32 %a, -1
%t1 = add i32 %a, -1
define i64 @test_x86_tbm_blsic_u64_z(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_blsic_u64_z:
; CHECK: # BB#0:
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: notq %rax
-; CHECK-NEXT: decq %rdi
-; CHECK-NEXT: orq %rax, %rdi
-; CHECK-NEXT: cmoveq %rsi, %rdi
-; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: blsic %rdi, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmoveq %rsi, %rax
; CHECK-NEXT: retq
%t0 = xor i64 %a, -1
%t1 = add i64 %a, -1
define i32 @test_x86_tbm_t1mskc_u32_z(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_t1mskc_u32_z:
; CHECK: # BB#0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: notl %eax
-; CHECK-NEXT: incl %edi
-; CHECK-NEXT: orl %eax, %edi
-; CHECK-NEXT: cmovel %esi, %edi
-; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: t1mskc %edi, %eax
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: cmovel %esi, %eax
; CHECK-NEXT: retq
%t0 = xor i32 %a, -1
%t1 = add i32 %a, 1
define i64 @test_x86_tbm_t1mskc_u64_z(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_t1mskc_u64_z:
; CHECK: # BB#0:
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: notq %rax
-; CHECK-NEXT: incq %rdi
-; CHECK-NEXT: orq %rax, %rdi
-; CHECK-NEXT: cmoveq %rsi, %rdi
-; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: t1mskc %rdi, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmoveq %rsi, %rax
; CHECK-NEXT: retq
%t0 = xor i64 %a, -1
%t1 = add i64 %a, 1
define i32 @test_x86_tbm_tzmsk_u32_z(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_tzmsk_u32_z:
; CHECK: # BB#0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: notl %eax
-; CHECK-NEXT: decl %edi
-; CHECK-NEXT: andl %eax, %edi
-; CHECK-NEXT: cmovel %esi, %edi
-; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: tzmsk %edi, %eax
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: cmovel %esi, %eax
; CHECK-NEXT: retq
%t0 = xor i32 %a, -1
%t1 = add i32 %a, -1
define i64 @test_x86_tbm_tzmsk_u64_z(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_x86_tbm_tzmsk_u64_z:
; CHECK: # BB#0:
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: notq %rax
-; CHECK-NEXT: decq %rdi
-; CHECK-NEXT: andq %rax, %rdi
-; CHECK-NEXT: cmoveq %rsi, %rdi
-; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: tzmsk %rdi, %rax
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmoveq %rsi, %rax
; CHECK-NEXT: retq
%t0 = xor i64 %a, -1
%t1 = add i64 %a, -1