@@ -2450,11 +2450,11 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
2450
2450
2451
2451
// Collect dependencies on V recursively. This is used for the cost analysis in
2452
2452
// `shouldKeepJumpConditionsTogether`.
2453
- static bool
2454
- collectInstructionDeps(SmallPtrSet <const Instruction *, 8 > *Deps,
2455
- const Value *V ,
2456
- SmallPtrSet <const Instruction *, 8 > *Necessary = nullptr,
2457
- unsigned Depth = 0) {
2453
+ static bool collectInstructionDeps(
2454
+ SmallPtrSetImpl <const Instruction *> *Deps, const Value *V ,
2455
+ SmallPtrSetImpl<const Instruction *> *Necessary = nullptr ,
2456
+ SmallVectorImpl <const Instruction *> *ItOrder = nullptr,
2457
+ unsigned Depth = 0) {
2458
2458
// Return false if we have an incomplete count.
2459
2459
if (Depth >= SelectionDAG::MaxRecursionDepth)
2460
2460
return false;
@@ -2474,8 +2474,11 @@ collectInstructionDeps(SmallPtrSet<const Instruction *, 8> *Deps,
2474
2474
if (!Deps->insert(I).second)
2475
2475
return true;
2476
2476
2477
+ if (ItOrder != nullptr)
2478
+ ItOrder->push_back(I);
2479
+
2477
2480
for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx)
2478
- if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary,
2481
+ if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary, ItOrder,
2479
2482
Depth + 1))
2480
2483
return false;
2481
2484
return true;
@@ -2527,16 +2530,20 @@ bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
2527
2530
2528
2531
// Collect "all" instructions that lhs condition is dependent on.
2529
2532
SmallPtrSet<const Instruction *, 8> LhsDeps, RhsDeps;
2533
+ SmallVector<const Instruction *> RhsDepsItOrder;
2530
2534
collectInstructionDeps(&LhsDeps, Lhs);
2531
2535
// Collect "all" instructions that rhs condition is dependent on AND are
2532
2536
// dependencies of lhs. This gives us an estimate on which instructions we
2533
2537
// stand to save by splitting the condition.
2534
- if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps))
2538
+ if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps, &RhsDepsItOrder ))
2535
2539
return false;
2536
2540
// Add the compare instruction itself unless its a dependency on the LHS.
2537
- if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
2538
- if (!LhsDeps.contains(RhsI))
2541
+ if (const auto *RhsI = dyn_cast<Instruction>(Rhs)) {
2542
+ if (!LhsDeps.contains(RhsI)) {
2539
2543
RhsDeps.insert(RhsI);
2544
+ RhsDepsItOrder.push_back(RhsI);
2545
+ }
2546
+ }
2540
2547
2541
2548
const auto &TLI = DAG.getTargetLoweringInfo();
2542
2549
const auto &TTI =
@@ -2555,31 +2562,19 @@ bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
2555
2562
return true;
2556
2563
};
2557
2564
2558
- // Prune instructions from RHS Deps that are dependencies of unrelated
2559
- // instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly
2560
- // arbitrary and just meant to cap the how much time we spend in the pruning
2561
- // loop. Its highly unlikely to come into affect.
2562
- const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth;
2563
- // Stop after a certain point. No incorrectness from including too many
2564
- // instructions.
2565
- for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
2566
- const Instruction *ToDrop = nullptr;
2567
- for (const auto *Ins : RhsDeps) {
2568
- if (!ShouldCountInsn(Ins)) {
2569
- ToDrop = Ins;
2570
- break;
2571
- }
2565
+ // Finally accumulate latency that we can only attribute to computing the
2566
+ // RHS condition. Use latency because we are essentially trying to calculate
2567
+ // the cost of the dependency chain.
2568
+ // Possible TODO: We could try to estimate ILP and make this more precise.
2569
+ // NB: This loop is capped by the number of rhs dep instructions we added
2570
+ // which in turn is roughly limitted by `MaxRecursiveDepth`
2571
+ for (const auto *Ins : RhsDepsItOrder) {
2572
+ // Skip instructions that are dependencies of unrelated
2573
+ // instructions (will need to the computed anyways).
2574
+ if (!ShouldCountInsn(Ins)) {
2575
+ RhsDeps.erase(Ins);
2576
+ continue;
2572
2577
}
2573
- if (ToDrop == nullptr)
2574
- break;
2575
- RhsDeps.erase(ToDrop);
2576
- }
2577
-
2578
- for (const auto *Ins : RhsDeps) {
2579
- // Finally accumulate latency that we can only attribute to computing the
2580
- // RHS condition. Use latency because we are essentially trying to calculate
2581
- // the cost of the dependency chain.
2582
- // Possible TODO: We could try to estimate ILP and make this more precise.
2583
2578
CostOfIncluding +=
2584
2579
TTI.getInstructionCost(Ins, TargetTransformInfo::TCK_Latency);
2585
2580
0 commit comments