Skip to content

Commit b2ce508

Browse files
committed
[SelectionDAGBuilder] Fix non-determanism in shouldKeepJumpConditionsTogether
The issue was we where iteration on `SmallPtrSet` order which is based on runtime address and thus can vary with the same input. Add a `SmallVector` for iterating on `RhsDeps`. This has the benefit of also dramatically simplifying the pruning code (we no longer are removing from the same list we where iterating on).
1 parent 1f613bc commit b2ce508

File tree

1 file changed

+28
-33
lines changed

1 file changed

+28
-33
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

+28-33
Original file line numberDiff line numberDiff line change
@@ -2450,11 +2450,11 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
24502450

24512451
// Collect dependencies on V recursively. This is used for the cost analysis in
24522452
// `shouldKeepJumpConditionsTogether`.
2453-
static bool
2454-
collectInstructionDeps(SmallPtrSet<const Instruction *, 8> *Deps,
2455-
const Value *V,
2456-
SmallPtrSet<const Instruction *, 8> *Necessary = nullptr,
2457-
unsigned Depth = 0) {
2453+
static bool collectInstructionDeps(
2454+
SmallPtrSetImpl<const Instruction *> *Deps, const Value *V,
2455+
SmallPtrSetImpl<const Instruction *> *Necessary = nullptr,
2456+
SmallVectorImpl<const Instruction *> *ItOrder = nullptr,
2457+
unsigned Depth = 0) {
24582458
// Return false if we have an incomplete count.
24592459
if (Depth >= SelectionDAG::MaxRecursionDepth)
24602460
return false;
@@ -2474,8 +2474,11 @@ collectInstructionDeps(SmallPtrSet<const Instruction *, 8> *Deps,
24742474
if (!Deps->insert(I).second)
24752475
return true;
24762476

2477+
if (ItOrder != nullptr)
2478+
ItOrder->push_back(I);
2479+
24772480
for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx)
2478-
if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary,
2481+
if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary, ItOrder,
24792482
Depth + 1))
24802483
return false;
24812484
return true;
@@ -2527,16 +2530,20 @@ bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
25272530

25282531
// Collect "all" instructions that lhs condition is dependent on.
25292532
SmallPtrSet<const Instruction *, 8> LhsDeps, RhsDeps;
2533+
SmallVector<const Instruction *> RhsDepsItOrder;
25302534
collectInstructionDeps(&LhsDeps, Lhs);
25312535
// Collect "all" instructions that rhs condition is dependent on AND are
25322536
// dependencies of lhs. This gives us an estimate on which instructions we
25332537
// stand to save by splitting the condition.
2534-
if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps))
2538+
if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps, &RhsDepsItOrder))
25352539
return false;
25362540
// Add the compare instruction itself unless its a dependency on the LHS.
2537-
if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
2538-
if (!LhsDeps.contains(RhsI))
2541+
if (const auto *RhsI = dyn_cast<Instruction>(Rhs)) {
2542+
if (!LhsDeps.contains(RhsI)) {
25392543
RhsDeps.insert(RhsI);
2544+
RhsDepsItOrder.push_back(RhsI);
2545+
}
2546+
}
25402547

25412548
const auto &TLI = DAG.getTargetLoweringInfo();
25422549
const auto &TTI =
@@ -2555,31 +2562,19 @@ bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
25552562
return true;
25562563
};
25572564

2558-
// Prune instructions from RHS Deps that are dependencies of unrelated
2559-
// instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly
2560-
// arbitrary and just meant to cap the how much time we spend in the pruning
2561-
// loop. Its highly unlikely to come into affect.
2562-
const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth;
2563-
// Stop after a certain point. No incorrectness from including too many
2564-
// instructions.
2565-
for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
2566-
const Instruction *ToDrop = nullptr;
2567-
for (const auto *Ins : RhsDeps) {
2568-
if (!ShouldCountInsn(Ins)) {
2569-
ToDrop = Ins;
2570-
break;
2571-
}
2565+
// Finally accumulate latency that we can only attribute to computing the
2566+
// RHS condition. Use latency because we are essentially trying to calculate
2567+
// the cost of the dependency chain.
2568+
// Possible TODO: We could try to estimate ILP and make this more precise.
2569+
// NB: This loop is capped by the number of rhs dep instructions we added
2570+
// which in turn is roughly limitted by `MaxRecursiveDepth`
2571+
for (const auto *Ins : RhsDepsItOrder) {
2572+
// Skip instructions that are dependencies of unrelated
2573+
// instructions (will need to the computed anyways).
2574+
if (!ShouldCountInsn(Ins)) {
2575+
RhsDeps.erase(Ins);
2576+
continue;
25722577
}
2573-
if (ToDrop == nullptr)
2574-
break;
2575-
RhsDeps.erase(ToDrop);
2576-
}
2577-
2578-
for (const auto *Ins : RhsDeps) {
2579-
// Finally accumulate latency that we can only attribute to computing the
2580-
// RHS condition. Use latency because we are essentially trying to calculate
2581-
// the cost of the dependency chain.
2582-
// Possible TODO: We could try to estimate ILP and make this more precise.
25832578
CostOfIncluding +=
25842579
TTI.getInstructionCost(Ins, TargetTransformInfo::TCK_Latency);
25852580

0 commit comments

Comments
 (0)