}
bool foldLoadStoreIntoMemOperand(SDNode *Node);
+
+ bool matchBEXTRFromAnd(SDNode *Node);
};
}
return true;
}
+// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
+bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
+ MVT NVT = Node->getSimpleValueType(0);
+ SDLoc dl(Node);
+
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ if (!Subtarget->hasBMI() && !Subtarget->hasTBM())
+ return false;
+
+ // Must have a shift right.
+ if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
+ return false;
+
+ // Shift can't have additional users.
+ if (!N0->hasOneUse())
+ return false;
+
+ // Only supported for 32 and 64 bits.
+ if (NVT != MVT::i32 && NVT != MVT::i64)
+ return false;
+
+ // Shift amount and RHS of and must be constant.
+ ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!MaskCst || !ShiftCst)
+ return false;
+
+ // And RHS must be a mask.
+ uint64_t Mask = MaskCst->getZExtValue();
+ if (!isMask_64(Mask))
+ return false;
+
+ uint64_t Shift = ShiftCst->getZExtValue();
+ uint64_t MaskSize = countPopulation(Mask);
+
+ // Don't interfere with something that can be handled by extracting AH.
+ // TODO: If we are able to fold a load, BEXTR might still be better than AH.
+ if (Shift == 8 && MaskSize == 8)
+ return false;
+
+ // Make sure we are only using bits that were in the original value, not
+ // shifted in.
+ if (Shift + MaskSize > NVT.getSizeInBits())
+ return false;
+
+ SDValue New = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
+ unsigned ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
+ unsigned MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
+
+ // BMI requires the immediate to placed in a register.
+ if (!Subtarget->hasTBM()) {
+ ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
+ MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
+ SDNode *Move = CurDAG->getMachineNode(X86::MOV32ri, dl, NVT, New);
+ New = SDValue(Move, 0);
+ }
+
+ MachineSDNode *NewNode;
+ SDValue Input = N0->getOperand(0);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (tryFoldLoad(Node, Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
+ SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
+ NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(NewNode, 1));
+ // Record the mem-refs
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Input);
+ if (LoadNode) {
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LoadNode->getMemOperand();
+ NewNode->setMemRefs(MemOp, MemOp + 1);
+ }
+ } else {
+ NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New);
+ }
+
+ ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+ CurDAG->RemoveDeadNode(Node);
+ return true;
+}
+
void X86DAGToDAGISel::Select(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
unsigned Opc, MOpc;
}
case ISD::AND:
+ // Try to match BEXTR/BEXTRI instruction.
+ if (matchBEXTRFromAnd(Node))
+ return;
+
+ LLVM_FALLTHROUGH;
case ISD::OR:
case ISD::XOR: {
+
// For operations of the form (x << C1) op C2, check if we can use a smaller
// encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
SDValue N0 = Node->getOperand(0);
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
- case X86ISD::BEXTR: return "X86ISD::BEXTR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::MOVMSK: return "X86ISD::MOVMSK";
case X86ISD::PTEST: return "X86ISD::PTEST";
return ShiftRight;
EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDLoc DL(N);
// Attempt to recursively combine a bitmask AND with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
return SDValue(); // This routine will use CombineTo to replace N.
}
- // Create BEXTR instructions
- // BEXTR is ((X >> imm) & (2**size-1))
- if (VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
-
- if (!Subtarget.hasBMI() && !Subtarget.hasTBM())
- return SDValue();
- if (N0.getOpcode() != ISD::SRA && N0.getOpcode() != ISD::SRL)
- return SDValue();
-
- ConstantSDNode *MaskNode = dyn_cast<ConstantSDNode>(N1);
- ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (MaskNode && ShiftNode) {
- uint64_t Mask = MaskNode->getZExtValue();
- uint64_t Shift = ShiftNode->getZExtValue();
- if (isMask_64(Mask)) {
- uint64_t MaskSize = countPopulation(Mask);
- if (Shift + MaskSize <= VT.getSizeInBits())
- return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0),
- DAG.getConstant(Shift | (MaskSize << 8), DL,
- VT));
- }
- }
return SDValue();
}
ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
- // Bit field extract.
- BEXTR,
-
// LOW, HI, FLAGS = umul LHS, RHS.
UMUL,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
SDNPMemOperand]>;
-def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
-
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA,
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
} // HasBMI2
-let Predicates = [HasBMI] in {
- def : Pat<(X86bextr GR32:$src1, GR32:$src2),
- (BEXTR32rr GR32:$src1, GR32:$src2)>;
- def : Pat<(X86bextr (loadi32 addr:$src1), GR32:$src2),
- (BEXTR32rm addr:$src1, GR32:$src2)>;
- def : Pat<(X86bextr GR64:$src1, GR64:$src2),
- (BEXTR64rr GR64:$src1, GR64:$src2)>;
- def : Pat<(X86bextr (loadi64 addr:$src1), GR64:$src2),
- (BEXTR64rm addr:$src1, GR64:$src2)>;
-} // HasBMI
-
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
X86MemOperand x86memop, Intrinsic Int,
PatFrag ld_frag> {
//===----------------------------------------------------------------------===//
let Predicates = [HasTBM] in {
- def : Pat<(X86bextr GR32:$src1, (i32 imm:$src2)),
- (BEXTRI32ri GR32:$src1, imm:$src2)>;
- def : Pat<(X86bextr (loadi32 addr:$src1), (i32 imm:$src2)),
- (BEXTRI32mi addr:$src1, imm:$src2)>;
- def : Pat<(X86bextr GR64:$src1, i64immSExt32:$src2),
- (BEXTRI64ri GR64:$src1, i64immSExt32:$src2)>;
- def : Pat<(X86bextr (loadi64 addr:$src1), i64immSExt32:$src2),
- (BEXTRI64mi addr:$src1, i64immSExt32:$src2)>;
-
// FIXME: patterns for the load versions are not implemented
def : Pat<(and GR32:$src, (add GR32:$src, 1)),
(BLCFILL32rr GR32:$src)>;
ret i32 %2
}
+; Make sure we still use AH subreg trick to extract 15:8
+define i32 @bextr32_subreg(i32 %x) uwtable ssp {
+; CHECK-LABEL: bextr32_subreg:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: movzbl %ah, %eax # NOREX
+; CHECK-NEXT: retq
+ %1 = lshr i32 %x, 8
+ %2 = and i32 %1, 255
+ ret i32 %2
+}
+
define i32 @bextr32b_load(i32* %x) uwtable ssp {
; CHECK-LABEL: bextr32b_load:
; CHECK: # BB#0:
ret i64 %2
}
+; Make sure we still use the AH subreg trick to extract 15:8
+define i64 @bextr64_subreg(i64 %x) uwtable ssp {
+; CHECK-LABEL: bextr64_subreg:
+; CHECK: # BB#0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movzbl %ah, %eax # NOREX
+; CHECK-NEXT: retq
+ %1 = lshr i64 %x, 8
+ %2 = and i64 %1, 255
+ ret i64 %2
+}
+
define i64 @bextr64b_load(i64* %x) {
; CHECK-LABEL: bextr64b_load:
; CHECK: # BB#0:
ret i32 %t1
}
+; Make sure we still use AH subreg trick for extracting bits 15:8
+define i32 @test_x86_tbm_bextri_u32_subreg(i32 %a) nounwind {
+; CHECK-LABEL: test_x86_tbm_bextri_u32_subreg:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: movzbl %ah, %eax # NOREX
+; CHECK-NEXT: retq
+ %t0 = lshr i32 %a, 8
+ %t1 = and i32 %t0, 255
+ ret i32 %t1
+}
+
define i32 @test_x86_tbm_bextri_u32_m(i32* nocapture %a) nounwind {
; CHECK-LABEL: test_x86_tbm_bextri_u32_m:
; CHECK: # BB#0:
define i32 @test_x86_tbm_bextri_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
; CHECK-LABEL: test_x86_tbm_bextri_u32_z2:
; CHECK: # BB#0:
-; CHECK-NEXT: bextr $3076, %edi, %eax # imm = 0xC04
+; CHECK-NEXT: shrl $4, %edi
+; CHECK-NEXT: testw $4095, %di # imm = 0xFFF
; CHECK-NEXT: cmovnel %edx, %esi
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: retq
ret i64 %t1
}
+; Make sure we still use AH subreg trick for extracting bits 15:8
+define i64 @test_x86_tbm_bextri_u64_subreg(i64 %a) nounwind {
+; CHECK-LABEL: test_x86_tbm_bextri_u64_subreg:
+; CHECK: # BB#0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movzbl %ah, %eax # NOREX
+; CHECK-NEXT: retq
+ %t0 = lshr i64 %a, 8
+ %t1 = and i64 %t0, 255
+ ret i64 %t1
+}
+
define i64 @test_x86_tbm_bextri_u64_m(i64* nocapture %a) nounwind {
; CHECK-LABEL: test_x86_tbm_bextri_u64_m:
; CHECK: # BB#0:
define i64 @test_x86_tbm_bextri_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
; CHECK-LABEL: test_x86_tbm_bextri_u64_z2:
; CHECK: # BB#0:
-; CHECK-NEXT: bextr $3076, %edi, %eax # imm = 0xC04
+; CHECK-NEXT: shrl $4, %edi
+; CHECK-NEXT: testw $4095, %di # imm = 0xFFF
; CHECK-NEXT: cmovneq %rdx, %rsi
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: retq