typedef SmallDenseMap<int, BlockISELList> ISELInstructionList;
// A map of MBB numbers to their lists of contained ISEL instructions.
+ // Please note when we traverse this list and expand ISEL, we only remove
+ // the ISEL from the MBB not from this list.
ISELInstructionList ISELInstructions;
/// Initialize the object.
#endif
bool runOnMachineFunction(MachineFunction &MF) override {
- if (!isExpandISELEnabled(MF))
- return false;
-
DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n");
initialize(MF);
}
void PPCExpandISEL::expandAndMergeISELs() {
- for (auto &BlockList : ISELInstructions) {
+ bool ExpandISELEnabled = isExpandISELEnabled(*MF);
- DEBUG(dbgs() << printMBBReference(*MF->getBlockNumbered(BlockList.first))
- << ":\n");
+ for (auto &BlockList : ISELInstructions) {
DEBUG(dbgs() << "Expanding ISEL instructions in "
<< printMBBReference(*MF->getBlockNumbered(BlockList.first))
<< "\n");
-
BlockISELList &CurrentISELList = BlockList.second;
auto I = CurrentISELList.begin();
auto E = CurrentISELList.end();
while (I != E) {
- BlockISELList SubISELList;
-
- SubISELList.push_back(*I++);
-
- // Collect the ISELs that can be merged together.
- while (I != E && canMerge(SubISELList.back(), *I))
+ assert(isISEL(**I) && "Expecting an ISEL instruction");
+ MachineOperand &Dest = (*I)->getOperand(0);
+ MachineOperand &TrueValue = (*I)->getOperand(1);
+ MachineOperand &FalseValue = (*I)->getOperand(2);
+
+ // Special case 1, all registers used by ISEL are the same one.
+ // The non-redundant isel 0, 0, 0, N would not satisfy these conditions
+ // as it would be ISEL %R0, %ZERO, %R0, %CRN.
+ if (useSameRegister(Dest, TrueValue) &&
+ useSameRegister(Dest, FalseValue)) {
+ DEBUG(dbgs() << "Remove redudant ISEL instruction: " << **I << "\n");
+ // FIXME: if the CR field used has no other uses, we could eliminate the
+ // instruction that defines it. This would have to be done manually
+ // since this pass runs too late to run DCE after it.
+ NumRemoved++;
+ (*I)->eraseFromParent();
+ I++;
+ } else if (useSameRegister(TrueValue, FalseValue)) {
+ // Special case 2, the two input registers used by ISEL are the same.
+ // Note: the non-foldable isel RX, 0, 0, N would not satisfy this
+ // condition as it would be ISEL %RX, %ZERO, %R0, %CRN, which makes it
+ // safe to fold ISEL to MR(OR) instead of ADDI.
+ MachineBasicBlock *MBB = (*I)->getParent();
+ DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy:\n");
+ DEBUG(dbgs() << "ISEL: " << **I << "\n");
+ NumFolded++;
+ // Note: we're using both the TrueValue and FalseValue operands so as
+ // not to lose the kill flag if it is set on either of them.
+ BuildMI(*MBB, (*I), dl, TII->get(isISEL8(**I) ? PPC::OR8 : PPC::OR))
+ .add(Dest)
+ .add(TrueValue)
+ .add(FalseValue);
+ (*I)->eraseFromParent();
+ I++;
+ } else if (ExpandISELEnabled) { // Normal cases expansion enabled
+ DEBUG(dbgs() << "Expand ISEL instructions:\n");
+ DEBUG(dbgs() << "ISEL: " << **I << "\n");
+ BlockISELList SubISELList;
SubISELList.push_back(*I++);
-
- expandMergeableISELs(SubISELList);
- }
- }
+ // Collect the ISELs that can be merged together.
+ // This will eat up ISEL instructions without considering whether they
+ // may be redundant or foldable to a register copy. So we still keep
+ // the handleSpecialCases() downstream to handle them.
+ while (I != E && canMerge(SubISELList.back(), *I)) {
+ DEBUG(dbgs() << "ISEL: " << **I << "\n");
+ SubISELList.push_back(*I++);
+ }
+
+ expandMergeableISELs(SubISELList);
+ } else { // Normal cases expansion disabled
+ I++; // leave the ISEL as it is
+ }
+ } // end while
+ } // end for
}
void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL,
// Similarly, if at least one of the ISEL instructions satisfy the
// following condition, we need the False Block:
// The Dest Register and False Value Register are not the same.
-
bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue);
bool IsORIInstRequired = !useSameRegister(Dest, FalseValue);
// Special case 1, all registers used by ISEL are the same one.
if (!IsADDIInstRequired && !IsORIInstRequired) {
DEBUG(dbgs() << "Remove redudant ISEL instruction.");
+ // FIXME: if the CR field used has no other uses, we could eliminate the
+ // instruction that defines it. This would have to be done manually
+ // since this pass runs too late to run DCE after it.
NumRemoved++;
(*MI)->eraseFromParent();
// Setting MI to the erase result keeps the iterator valid and increased.
// PPC::ZERO8 will be used for the first operand if the value is meant to
// be zero. In this case, the useSameRegister method will return false,
// thereby preventing this ISEL from being folded.
-
if (useSameRegister(TrueValue, FalseValue) && (BIL.size() == 1)) {
DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy.");
NumFolded++;
- BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::ADDI8 : PPC::ADDI))
+ // Note: we're using both the TrueValue and FalseValue operands so as
+ // not to lose the kill flag if it is set on either of them.
+ BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::OR8 : PPC::OR))
.add(Dest)
.add(TrueValue)
- .add(MachineOperand::CreateImm(0));
+ .add(FalseValue);
(*MI)->eraseFromParent();
// Setting MI to the erase result keeps the iterator valid and increased.
MI = BIL.erase(MI);
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; This file mainly tests that one of the ISEL instruction in the group uses the same register for operand RT, RA, RB
+; This redudant ISEL is introduced during simple register coalescing stage.
+
+; Simple register coalescing first create the foldable ISEL instruction as we have seen in expand-foldable-isel.ll:
+; %vreg85<def> = ISEL8 %vreg83, %vreg83, %vreg33:sub_eq
+
+; Later the register coalescer figures out it could further coalesce %vreg85 with %vreg83:
+; merge %vreg85:1@2288r into %vreg83:5@400B --> @400B
+; erased: 2288r %vreg85<def> = COPY %vreg83
+
+; After that we have:
+; updated: 1504B %vreg83<def> = ISEL8 %vreg83, %vreg83, %vreg33:sub_eq
+
; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=true < %s | FileCheck %s --check-prefix=CHECK-GEN-ISEL-TRUE
; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel
-; Function Attrs: norecurse nounwind readnone
+
@.str = private unnamed_addr constant [3 x i8] c"]]\00", align 1
@.str.1 = private unnamed_addr constant [35 x i8] c"Index < Length && \22Invalid index!\22\00", align 1
@.str.2 = private unnamed_addr constant [50 x i8] c"/home/jtony/src/llvm/include/llvm/ADT/StringRef.h\00", align 1
@__PRETTY_FUNCTION__._ZNK4llvm9StringRefixEm = private unnamed_addr constant [47 x i8] c"char llvm::StringRef::operator[](size_t) const\00", align 1
@.str.3 = private unnamed_addr constant [95 x i8] c"(data || length == 0) && \22StringRef cannot be built from a NULL argument with non-null length\22\00", align 1
@__PRETTY_FUNCTION__._ZN4llvm9StringRefC2EPKcm = private unnamed_addr constant [49 x i8] c"llvm::StringRef::StringRef(const char *, size_t)\00", align 1
-; Function Attrs: nounwind
-define i64 @_Z3fn1N4llvm9StringRefE([2 x i64] %Str.coerce) local_unnamed_addr #0 {
+define i64 @_Z3fn1N4llvm9StringRefE([2 x i64] %Str.coerce) {
entry:
%Str.coerce.fca.0.extract = extractvalue [2 x i64] %Str.coerce, 0
%Str.coerce.fca.1.extract = extractvalue [2 x i64] %Str.coerce, 1
br label %while.cond.outer
-while.cond.outer: ; preds = %_ZNK4llvm9StringRef6substrEmm.exit, %entry
+while.cond.outer:
%Str.sroa.0.0.ph = phi i64 [ %8, %_ZNK4llvm9StringRef6substrEmm.exit ], [ %Str.coerce.fca.0.extract, %entry ]
%.sink.ph = phi i64 [ %sub.i, %_ZNK4llvm9StringRef6substrEmm.exit ], [ %Str.coerce.fca.1.extract, %entry ]
%BracketDepth.0.ph = phi i64 [ %BracketDepth.1, %_ZNK4llvm9StringRef6substrEmm.exit ], [ undef, %entry ]
%cmp65 = icmp eq i64 %BracketDepth.0.ph, 0
br i1 %cmp65, label %while.cond.us.preheader, label %while.cond.preheader
-while.cond.us.preheader: ; preds = %while.cond.outer
+while.cond.us.preheader:
br label %while.cond.us
-while.cond.preheader: ; preds = %while.cond.outer
+while.cond.preheader:
%cmp.i34129 = icmp eq i64 %.sink.ph, 0
br i1 %cmp.i34129, label %cond.false.i.loopexit135, label %_ZNK4llvm9StringRefixEm.exit.preheader
-_ZNK4llvm9StringRefixEm.exit.preheader: ; preds = %while.cond.preheader
+_ZNK4llvm9StringRefixEm.exit.preheader:
br label %_ZNK4llvm9StringRefixEm.exit
-while.cond.us: ; preds = %while.cond.us.preheader, %_ZNK4llvm9StringRef6substrEmm.exit50.us
+while.cond.us:
%Str.sroa.0.0.us = phi i64 [ %3, %_ZNK4llvm9StringRef6substrEmm.exit50.us ], [ %Str.sroa.0.0.ph, %while.cond.us.preheader ]
%.sink.us = phi i64 [ %sub.i41.us, %_ZNK4llvm9StringRef6substrEmm.exit50.us ], [ %.sink.ph, %while.cond.us.preheader ]
%cmp.i30.us = icmp ult i64 %.sink.us, 2
br i1 %cmp.i30.us, label %if.end.us, label %if.end.i.i.us
-if.end.i.i.us: ; preds = %while.cond.us
+if.end.i.i.us:
%0 = inttoptr i64 %Str.sroa.0.0.us to i8*
- %call.i.i.us = tail call signext i32 @memcmp(i8* %0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i64 2) #3
+ %call.i.i.us = tail call signext i32 @memcmp(i8* %0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i64 2)
%phitmp.i.us = icmp eq i32 %call.i.i.us, 0
br i1 %phitmp.i.us, label %if.then, label %_ZNK4llvm9StringRefixEm.exit.us
-if.end.us: ; preds = %while.cond.us
+if.end.us:
%cmp.i34.us = icmp eq i64 %.sink.us, 0
br i1 %cmp.i34.us, label %cond.false.i.loopexit, label %_ZNK4llvm9StringRefixEm.exit.us
-_ZNK4llvm9StringRefixEm.exit.us: ; preds = %if.end.i.i.us, %if.end.us
+_ZNK4llvm9StringRefixEm.exit.us:
%1 = inttoptr i64 %Str.sroa.0.0.us to i8*
- %2 = load i8, i8* %1, align 1, !tbaa !2
+ %2 = load i8, i8* %1, align 1
switch i8 %2, label %_ZNK4llvm9StringRef6substrEmm.exit.loopexit [
i8 92, label %if.then4.us
i8 93, label %if.then9
]
-if.then4.us: ; preds = %_ZNK4llvm9StringRefixEm.exit.us
+if.then4.us:
%.sroa.speculated12.i38.us = select i1 %cmp.i30.us, i64 %.sink.us, i64 2
%add.ptr.i40.us = getelementptr inbounds i8, i8* %1, i64 %.sroa.speculated12.i38.us
%sub.i41.us = sub i64 %.sink.us, %.sroa.speculated12.i38.us
%cmp.i4.i45.us = icmp eq i64 %sub.i41.us, 0
%or.cond.i.i46.us = or i1 %tobool.i.i44.us, %cmp.i4.i45.us
br i1 %or.cond.i.i46.us, label %_ZNK4llvm9StringRef6substrEmm.exit50.us, label %cond.false.i.i47.loopexit
-_ZNK4llvm9StringRef6substrEmm.exit50.us: ; preds = %if.then4.us
+_ZNK4llvm9StringRef6substrEmm.exit50.us:
%3 = ptrtoint i8* %add.ptr.i40.us to i64
br label %while.cond.us
-if.then: ; preds = %if.end.i.i.us
+if.then:
ret i64 undef
-cond.false.i.loopexit: ; preds = %if.end.us
+cond.false.i.loopexit:
br label %cond.false.i
-cond.false.i.loopexit134: ; preds = %_ZNK4llvm9StringRef6substrEmm.exit50
+cond.false.i.loopexit134:
br label %cond.false.i
-cond.false.i.loopexit135: ; preds = %while.cond.preheader
+cond.false.i.loopexit135:
br label %cond.false.i
-cond.false.i: ; preds = %cond.false.i.loopexit135, %cond.false.i.loopexit134, %cond.false.i.loopexit
- tail call void @__assert_fail(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.1, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 225, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @__PRETTY_FUNCTION__._ZNK4llvm9StringRefixEm, i64 0, i64 0)) #4
+cond.false.i:
+ tail call void @__assert_fail(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.1, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 225, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @__PRETTY_FUNCTION__._ZNK4llvm9StringRefixEm, i64 0, i64 0))
unreachable
-_ZNK4llvm9StringRefixEm.exit: ; preds = %_ZNK4llvm9StringRefixEm.exit.preheader, %_ZNK4llvm9StringRef6substrEmm.exit50
+_ZNK4llvm9StringRefixEm.exit:
%.sink131 = phi i64 [ %sub.i41, %_ZNK4llvm9StringRef6substrEmm.exit50 ], [ %.sink.ph, %_ZNK4llvm9StringRefixEm.exit.preheader ]
%Str.sroa.0.0130 = phi i64 [ %6, %_ZNK4llvm9StringRef6substrEmm.exit50 ], [ %Str.sroa.0.0.ph, %_ZNK4llvm9StringRefixEm.exit.preheader ]
%4 = inttoptr i64 %Str.sroa.0.0130 to i8*
- %5 = load i8, i8* %4, align 1, !tbaa !2
+ %5 = load i8, i8* %4, align 1
switch i8 %5, label %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 [
i8 92, label %if.then4
i8 93, label %if.end10
]
-if.then4: ; preds = %_ZNK4llvm9StringRefixEm.exit
+if.then4:
%cmp.i.i37 = icmp ult i64 %.sink131, 2
%.sroa.speculated12.i38 = select i1 %cmp.i.i37, i64 %.sink131, i64 2
%add.ptr.i40 = getelementptr inbounds i8, i8* %4, i64 %.sroa.speculated12.i38
%cmp.i4.i45 = icmp eq i64 %sub.i41, 0
%or.cond.i.i46 = or i1 %tobool.i.i44, %cmp.i4.i45
br i1 %or.cond.i.i46, label %_ZNK4llvm9StringRef6substrEmm.exit50, label %cond.false.i.i47.loopexit133
-cond.false.i.i47.loopexit: ; preds = %if.then4.us
+cond.false.i.i47.loopexit:
br label %cond.false.i.i47
-cond.false.i.i47.loopexit133: ; preds = %if.then4
+cond.false.i.i47.loopexit133:
br label %cond.false.i.i47
-cond.false.i.i47: ; preds = %cond.false.i.i47.loopexit133, %cond.false.i.i47.loopexit
- tail call void @__assert_fail(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 90, i8* getelementptr inbounds ([49 x i8], [49 x i8]* @__PRETTY_FUNCTION__._ZN4llvm9StringRefC2EPKcm, i64 0, i64 0)) #4
+cond.false.i.i47:
+ tail call void @__assert_fail(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 90, i8* getelementptr inbounds ([49 x i8], [49 x i8]* @__PRETTY_FUNCTION__._ZN4llvm9StringRefC2EPKcm, i64 0, i64 0))
unreachable
-_ZNK4llvm9StringRef6substrEmm.exit50: ; preds = %if.then4
+_ZNK4llvm9StringRef6substrEmm.exit50:
%6 = ptrtoint i8* %add.ptr.i40 to i64
%cmp.i34 = icmp eq i64 %sub.i41, 0
br i1 %cmp.i34, label %cond.false.i.loopexit134, label %_ZNK4llvm9StringRefixEm.exit
-if.then9: ; preds = %_ZNK4llvm9StringRefixEm.exit.us
- tail call void @exit(i32 signext 1) #4
+if.then9:
+ tail call void @exit(i32 signext 1)
unreachable
-if.end10: ; preds = %_ZNK4llvm9StringRefixEm.exit
+if.end10:
%dec = add i64 %BracketDepth.0.ph, -1
br label %_ZNK4llvm9StringRef6substrEmm.exit
-_ZNK4llvm9StringRef6substrEmm.exit.loopexit: ; preds = %_ZNK4llvm9StringRefixEm.exit.us
+_ZNK4llvm9StringRef6substrEmm.exit.loopexit:
br label %_ZNK4llvm9StringRef6substrEmm.exit
-_ZNK4llvm9StringRef6substrEmm.exit.loopexit132: ; preds = %_ZNK4llvm9StringRefixEm.exit
+_ZNK4llvm9StringRef6substrEmm.exit.loopexit132:
br label %_ZNK4llvm9StringRef6substrEmm.exit
-_ZNK4llvm9StringRef6substrEmm.exit: ; preds = %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit, %if.end10
+_ZNK4llvm9StringRef6substrEmm.exit:
%.sink76 = phi i64 [ %.sink131, %if.end10 ], [ %.sink.us, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit ], [ %.sink131, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 ]
%7 = phi i8* [ %4, %if.end10 ], [ %1, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit ], [ %4, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 ]
%BracketDepth.1 = phi i64 [ %dec, %if.end10 ], [ 0, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit ], [ %BracketDepth.0.ph, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 ]
br label %while.cond.outer
; CHECK-LABEL: @_Z3fn1N4llvm9StringRefE
-; CHECK-GEN-ISEL-TRUE: isel [[SAME:r[0-9]+]], [[SAME]], [[SAME]]
+; Unecessary ISEL (all the registers are the same) is always removed
+; CHECK-GEN-ISEL-TRUE-NOT: isel [[SAME:r[0-9]+]], [[SAME]], [[SAME]]
; CHECK-GEN-ISEL-TRUE: isel [[SAME:r[0-9]+]], {{r[0-9]+}}, [[SAME]]
; CHECK: bc 12, eq, [[TRUE:.LBB[0-9]+]]
; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]]
-; Function Attrs: noreturn nounwind
-declare void @exit(i32 signext) local_unnamed_addr #1
-; Function Attrs: nounwind readonly
-declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #2
-; Function Attrs: noreturn nounwind
-declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*) local_unnamed_addr #1
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readonly }
-attributes #4 = { noreturn nounwind }
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 4.0.0 (trunk 286863) (llvm/trunk 286967)"}
-!2 = !{!3, !3, i64 0}
-!3 = !{!"omnipotent char", !4, i64 0}
-!4 = !{!"Simple C++ TBAA"}
+declare void @exit(i32 signext)
+declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64)
+declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*)
--- /dev/null
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+; This file mainly tests the case that the two input registers of the ISEL instruction are the same register.
+; The foldable ISEL in this test case is introduced at simple register coalescing stage.
+
+; Before that stage we have:
+; %vreg18<def> = ISEL8 %vreg5, %vreg2, %vreg15<undef>;
+
+; At simple register coalescing stage, the register coalescer figures out it could remove the copy
+; from %vreg2 to %vreg5, put the original value %X3 into %vreg5 directly
+; erased: 336r %vreg5<def> = COPY %vreg2
+; updated: 288B %vreg5<def> = COPY %X3;
+
+; After that we have:
+; updated: 416B %vreg18<def> = ISEL8 %vreg5, %vreg5, %vreg15<undef>;
+
+; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=true < %s | FileCheck %s --check-prefix=CHECK-GEN-ISEL-TRUE
+; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel
+%"struct.pov::ot_block_struct" = type { %"struct.pov::ot_block_struct"*, [3 x double], [3 x double], float, float, float, float, float, float, float, float, float, [3 x float], float, float, [3 x double], i16 }
+%"struct.pov::ot_node_struct" = type { %"struct.pov::ot_id_struct", %"struct.pov::ot_block_struct"*, [8 x %"struct.pov::ot_node_struct"*] }
+%"struct.pov::ot_id_struct" = type { i32, i32, i32, i32 }
+
+define void @_ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot_id_structE(%"struct.pov::ot_block_struct"* %new_block) {
+; CHECK-GEN-ISEL-TRUE-LABEL: _ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot_id_structE:
+; Note: the following line fold the original isel (isel r4, r3, r3)
+; CHECK-GEN-ISEL-TRUE: mr r4, r3
+; CHECK-GEN-ISEL-TRUE: isel r29, r5, r6, 4*cr5+lt
+; CHECK-GEN-ISEL-TRUE: blr
+;
+; CHECK-LABEL: _ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot_id_structE:
+; CHECK: mr r4, r3
+; CHECK: bc 12, 4*cr5+lt, .LBB0_3
+; CHECK: # %bb.2:
+; CHECK: ori r29, r6, 0
+; CHECK: b .LBB0_4
+; CHECK: .LBB0_3:
+; CHECK: addi r29, r5, 0
+; CHECK: .LBB0_4:
+; CHECK: blr
+entry:
+ br label %while.cond11
+
+while.cond11:
+ %this_node.0250 = phi %"struct.pov::ot_node_struct"* [ undef, %entry ], [ %1, %cond.false21.i156 ], [ %1, %cond.true18.i153 ]
+ %temp_id.sroa.21.1 = phi i32 [ undef, %entry ], [ %shr2039.i152, %cond.true18.i153 ], [ %div24.i155, %cond.false21.i156 ]
+ %0 = load i32, i32* undef, align 4
+ %cmp17 = icmp eq i32 0, %0
+ br i1 %cmp17, label %lor.rhs, label %while.body21
+
+lor.rhs:
+ %Values = getelementptr inbounds %"struct.pov::ot_node_struct", %"struct.pov::ot_node_struct"* %this_node.0250, i64 0, i32 1
+ store %"struct.pov::ot_block_struct"* %new_block, %"struct.pov::ot_block_struct"** %Values, align 8
+ ret void
+
+while.body21:
+ %call.i84 = tail call i8* @ZN3pov10pov_callocEmmPKciS1_pov()
+ store i8* %call.i84, i8** undef, align 8
+ %1 = bitcast i8* %call.i84 to %"struct.pov::ot_node_struct"*
+ br i1 undef, label %cond.true18.i153, label %cond.false21.i156
+
+cond.true18.i153:
+ %shr2039.i152 = lshr i32 %temp_id.sroa.21.1, 1
+ br label %while.cond11
+
+cond.false21.i156:
+ %add23.i154 = add nsw i32 %temp_id.sroa.21.1, 1
+ %div24.i155 = sdiv i32 %add23.i154, 2
+ br label %while.cond11
+}
+
+declare i8* @ZN3pov10pov_callocEmmPKciS1_pov()
--- /dev/null
+# This file tests the scenario: ISEL RX, RX, RX, CR (X != 0),
+# which is redudant and removed.
+# RUN: llc -ppc-gen-isel=true -run-pass ppc-expand-isel -o - %s | FileCheck %s
+
+--- |
+ target datalayout = "E-m:e-i64:64-n32:64"
+ target triple = "powerpc64-unknown-linux-gnu"
+ define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) {
+ entry:
+ %cmp = icmp sgt i32 %i, 0
+ %add = add nsw i32 %i, 1
+ %cond = select i1 %cmp, i32 %add, i32 %j
+ ret i32 %cond
+ }
+
+...
+---
+name: testExpandISEL
+alignment: 2
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+liveins:
+ - { reg: '%x3' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ liveins: %x3
+
+ %r5 = ADDI %r3, 1
+ %cr0 = CMPWI %r3, 0
+ %r3 = ISEL %r3, %r3, %cr0gt
+ %x3 = EXTSW_32_64 %r3
+ ; CHECK: %r5 = ADDI %r3, 1
+ ; CHECK: %cr0 = CMPWI %r3, 0
+ ; CHECK-NOT: %r3 = ISEL %r3, %r3, %cr0gt
+ ; CHECK: %x3 = EXTSW_32_64 %r3
+
+...
--- /dev/null
+# This file tests the scenario: ISEL RX, RY, RY, CR (X != 0 && Y != 0)
+# It is folded into a copy (%RX = OR %RY, %RY)
+# RUN: llc -ppc-gen-isel=true -run-pass ppc-expand-isel -o - %s | FileCheck %s
+
+--- |
+ target datalayout = "E-m:e-i64:64-n32:64"
+ target triple = "powerpc64-unknown-linux-gnu"
+ define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) {
+ entry:
+ %cmp = icmp sgt i32 %i, 0
+ %add = add nsw i32 %i, 1
+ %cond = select i1 %cmp, i32 %add, i32 %j
+ ret i32 %cond
+ }
+
+...
+---
+name: testExpandISEL
+alignment: 2
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+liveins:
+ - { reg: '%x3' }
+ - { reg: '%x4' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ liveins: %x3, %x4
+
+ %r5 = ADDI %r3, 1
+ %cr0 = CMPWI %r3, 0
+ %r3 = ISEL %r4, %r4, %cr0gt
+ ; Test fold ISEL to a copy
+ ; CHECK: %r3 = OR %r4, %r4
+
+ %x3 = EXTSW_32_64 %r3
+
+...
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel
-; Function Attrs: norecurse nounwind readnone
+
define signext i32 @testExpandISELToIfElse(i32 signext %i, i32 signext %j) {
entry:
%cmp = icmp sgt i32 %i, 0
}
-; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELToIf(i32 signext %i, i32 signext %j) {
entry:
%cmp = icmp sgt i32 %i, 0
; CHECK-NEXT: blr
}
-; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELToElse(i32 signext %i, i32 signext %j) {
entry:
%cmp = icmp sgt i32 %i, 0
; CHECK-NEXT: blr
}
-; Function Attrs: norecurse nounwind readnone
-define signext i32 @testReplaceISELWithCopy(i32 signext %i, i32 signext %j) {
-entry:
- %cmp = icmp sgt i32 %i, 0
- %cond = select i1 %cmp, i32 %j, i32 %j
- ret i32 %cond
-
-; CHECK-LABEL: @testReplaceISELWithCopy
-
-; Fix me should really check: addi r3, r4, 0
-; but for some reason it's optimized to mr r3, r4
-; CHECK: mr r3, r4
-; CHECK-NEXT: blr
-}
-; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELToNull(i32 signext %i, i32 signext %j) {
entry:
%cmp = icmp sgt i32 %i, 0
; CHECK: blr
}
-; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELsTo2ORIs2ADDIs
(i32 signext %a, i32 signext %b, i32 signext %d,
i32 signext %f, i32 signext %g) {
; CHECK-NEXT: blr
}
-; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELsTo2ORIs1ADDI
(i32 signext %a, i32 signext %b, i32 signext %d,
i32 signext %f, i32 signext %g) {
; CHECK-NEXT: blr
}
-; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELsTo1ORI1ADDI
(i32 signext %a, i32 signext %b, i32 signext %d,
i32 signext %f, i32 signext %g) {
; CHECK-NEXT: blr
}
-; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELsTo0ORI2ADDIs
(i32 signext %a, i32 signext %b, i32 signext %d,
i32 signext %f, i32 signext %g) {