// Minimum weight of an edge. Please note, that weight is NEVER 0.
static const uint32_t MIN_WEIGHT = 1;
-static uint32_t getMaxWeightFor(BasicBlock *BB) {
- return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
-}
-
-
/// \brief Calculate edge weights for successors lead to unreachable.
///
/// Predict that a successor which leads necessarily to an
if (!WeightsNode)
return false;
+ // Check that the number of successors is manageable.
+ assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
+
// Ensure there are weights for all of the successors. Note that the first
// operand to the metadata node is a name, not a weight.
if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1)
return false;
- // Build up the final weights that will be used in a temporary buffer, but
- // don't add them until all weights are present. Each weight value is clamped
- // to [1, getMaxWeightFor(BB)].
- uint32_t WeightLimit = getMaxWeightFor(BB);
+ // Build up the final weights that will be used in a temporary buffer.
+ // Compute the sum of all weights to later decide whether they need to
+ // be scaled to fit in 32 bits.
+ uint64_t WeightSum = 0;
SmallVector<uint32_t, 2> Weights;
Weights.reserve(TI->getNumSuccessors());
for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i));
if (!Weight)
return false;
- Weights.push_back(Weight->getLimitedValue(WeightLimit));
+ assert(Weight->getValue().getActiveBits() <= 32 &&
+ "Too many bits for uint32_t");
+ Weights.push_back(Weight->getZExtValue());
+ WeightSum += Weights.back();
}
assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- setEdgeWeight(BB, i, Weights[i]);
+
+ // If the sum of weights does not fit in 32 bits, scale every weight down
+ // accordingly.
+ uint64_t ScalingFactor =
+ (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
+
+ WeightSum = 0;
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ uint32_t W = Weights[i] / ScalingFactor;
+ WeightSum += W;
+ setEdgeWeight(BB, i, W);
+ }
+ assert(WeightSum <= UINT32_MAX &&
+ "Expected weights to scale down to 32 bits");
return true;
}
--- /dev/null
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+
+; In this test, the else clause is taken about 90% of the time. This was not
+; reflected in the probability computation because the weight is larger than
+; the branch weight cap (about 2 billion).
+;
+; CHECK: edge for.body -> if.then probability is 216661881 / 2166666667 = 9.9
+; CHECK: edge for.body -> if.else probability is 1950004786 / 2166666667 = 90.0
+
+@y = common global i64 0, align 8
+@x = common global i64 0, align 8
+@.str = private unnamed_addr constant [17 x i8] c"x = %lu\0Ay = %lu\0A\00", align 1
+
+; Function Attrs: inlinehint nounwind uwtable
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ %i = alloca i64, align 8
+ store i32 0, i32* %retval
+ store i64 0, i64* @y, align 8
+ store i64 0, i64* @x, align 8
+ call void @srand(i32 422304) #3
+ store i64 0, i64* %i, align 8
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i64, i64* %i, align 8
+ %cmp = icmp ult i64 %0, 13000000000
+ br i1 %cmp, label %for.body, label %for.end, !prof !1
+
+for.body: ; preds = %for.cond
+ %call = call i32 @rand() #3
+ %conv = sitofp i32 %call to double
+ %mul = fmul double %conv, 1.000000e+02
+ %div = fdiv double %mul, 0x41E0000000000000
+ %cmp1 = fcmp ogt double %div, 9.000000e+01
+ br i1 %cmp1, label %if.then, label %if.else, !prof !2
+
+if.then: ; preds = %for.body
+ %1 = load i64, i64* @x, align 8
+ %inc = add i64 %1, 1
+ store i64 %inc, i64* @x, align 8
+ br label %if.end
+
+if.else: ; preds = %for.body
+ %2 = load i64, i64* @y, align 8
+ %inc3 = add i64 %2, 1
+ store i64 %inc3, i64* @y, align 8
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %3 = load i64, i64* %i, align 8
+ %inc4 = add i64 %3, 1
+ store i64 %inc4, i64* %i, align 8
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %4 = load i64, i64* @x, align 8
+ %5 = load i64, i64* @y, align 8
+ %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str, i32 0, i32 0), i64 %4, i64 %5)
+ ret i32 0
+}
+
+; Function Attrs: nounwind
+declare void @srand(i32) #1
+
+; Function Attrs: nounwind
+declare i32 @rand() #1
+
+declare i32 @printf(i8*, ...) #2
+
+attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 236218) (llvm/trunk 236235)"}
+!1 = !{!"branch_weights", i32 -1044967295, i32 1}
+!2 = !{!"branch_weights", i32 433323762, i32 -394957723}