|
26 | 26 | #include "llvm/Analysis/Loads.h"
|
27 | 27 | #include "llvm/Analysis/MemoryLocation.h"
|
28 | 28 | #include "llvm/Analysis/TargetLibraryInfo.h"
|
| 29 | +#include "llvm/Analysis/TargetTransformInfo.h" |
29 | 30 | #include "llvm/Analysis/ValueTracking.h"
|
30 | 31 | #include "llvm/Analysis/VectorUtils.h"
|
31 | 32 | #include "llvm/CodeGen/Analysis.h"
|
|
93 | 94 | #include "llvm/Support/CommandLine.h"
|
94 | 95 | #include "llvm/Support/Compiler.h"
|
95 | 96 | #include "llvm/Support/Debug.h"
|
| 97 | +#include "llvm/Support/InstructionCost.h" |
96 | 98 | #include "llvm/Support/MathExtras.h"
|
97 | 99 | #include "llvm/Support/raw_ostream.h"
|
98 | 100 | #include "llvm/Target/TargetIntrinsicInfo.h"
|
@@ -2446,6 +2448,147 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
|
2446 | 2448 | SL->SwitchCases.push_back(CB);
|
2447 | 2449 | }
|
2448 | 2450 |
|
| 2451 | +// Collect dependencies on V recursively. This is used for the cost analysis in |
| 2452 | +// `shouldKeepJumpConditionsTogether`. |
| 2453 | +static bool |
| 2454 | +collectInstructionDeps(SmallPtrSet<const Instruction *, 8> *Deps, |
| 2455 | + const Value *V, |
| 2456 | + SmallPtrSet<const Instruction *, 8> *Necessary = nullptr, |
| 2457 | + unsigned Depth = 0) { |
| 2458 | + // Return false if we have an incomplete count. |
| 2459 | + if (Depth >= SelectionDAG::MaxRecursionDepth) |
| 2460 | + return false; |
| 2461 | + |
| 2462 | + auto *I = dyn_cast<Instruction>(V); |
| 2463 | + if (I == nullptr) |
| 2464 | + return true; |
| 2465 | + |
| 2466 | + if (Necessary != nullptr) { |
| 2467 | + // This instruction is necessary for the other side of the condition so |
| 2468 | + // don't count it. |
| 2469 | + if (Necessary->contains(I)) |
| 2470 | + return true; |
| 2471 | + } |
| 2472 | + |
| 2473 | + // Already added this dep. |
| 2474 | + if (!Deps->insert(I).second) |
| 2475 | + return true; |
| 2476 | + |
| 2477 | + for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx) |
| 2478 | + if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary, |
| 2479 | + Depth + 1)) |
| 2480 | + return false; |
| 2481 | + return true; |
| 2482 | +} |
| 2483 | + |
| 2484 | +bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether( |
| 2485 | + const FunctionLoweringInfo &FuncInfo, const BranchInst &I, |
| 2486 | + Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs, |
| 2487 | + TargetLoweringBase::CondMergingParams Params) const { |
| 2488 | + if (I.getNumSuccessors() != 2) |
| 2489 | + return false; |
| 2490 | + |
| 2491 | + if (Params.BaseCost < 0) |
| 2492 | + return false; |
| 2493 | + |
| 2494 | + // Baseline cost. |
| 2495 | + InstructionCost CostThresh = Params.BaseCost; |
| 2496 | + |
| 2497 | + BranchProbabilityInfo *BPI = nullptr; |
| 2498 | + if (Params.LikelyBias || Params.UnlikelyBias) |
| 2499 | + BPI = FuncInfo.BPI; |
| 2500 | + if (BPI != nullptr) { |
| 2501 | + // See if we are either likely to get an early out or compute both lhs/rhs |
| 2502 | + // of the condition. |
| 2503 | + BasicBlock *IfFalse = I.getSuccessor(0); |
| 2504 | + BasicBlock *IfTrue = I.getSuccessor(1); |
| 2505 | + |
| 2506 | + std::optional<bool> Likely; |
| 2507 | + if (BPI->isEdgeHot(I.getParent(), IfTrue)) |
| 2508 | + Likely = true; |
| 2509 | + else if (BPI->isEdgeHot(I.getParent(), IfFalse)) |
| 2510 | + Likely = false; |
| 2511 | + |
| 2512 | + if (Likely) { |
| 2513 | + if (Opc == (*Likely ? Instruction::And : Instruction::Or)) |
| 2514 | + // Its likely we will have to compute both lhs and rhs of condition |
| 2515 | + CostThresh += Params.LikelyBias; |
| 2516 | + else { |
| 2517 | + if (Params.UnlikelyBias < 0) |
| 2518 | + return false; |
| 2519 | + // Its likely we will get an early out. |
| 2520 | + CostThresh -= Params.UnlikelyBias; |
| 2521 | + } |
| 2522 | + } |
| 2523 | + } |
| 2524 | + |
| 2525 | + if (CostThresh <= 0) |
| 2526 | + return false; |
| 2527 | + |
| 2528 | + // Collect "all" instructions that lhs condition is dependent on. |
| 2529 | + SmallPtrSet<const Instruction *, 8> LhsDeps, RhsDeps; |
| 2530 | + collectInstructionDeps(&LhsDeps, Lhs); |
| 2531 | + // Collect "all" instructions that rhs condition is dependent on AND are |
| 2532 | + // dependencies of lhs. This gives us an estimate on which instructions we |
| 2533 | + // stand to save by splitting the condition. |
| 2534 | + if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps)) |
| 2535 | + return false; |
| 2536 | + // Add the compare instruction itself unless its a dependency on the LHS. |
| 2537 | + if (const auto *RhsI = dyn_cast<Instruction>(Rhs)) |
| 2538 | + if (!LhsDeps.contains(RhsI)) |
| 2539 | + RhsDeps.insert(RhsI); |
| 2540 | + |
| 2541 | + const auto &TLI = DAG.getTargetLoweringInfo(); |
| 2542 | + const auto &TTI = |
| 2543 | + TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction()); |
| 2544 | + |
| 2545 | + InstructionCost CostOfIncluding = 0; |
| 2546 | + // See if this instruction will need to computed independently of whether RHS |
| 2547 | + // is. |
| 2548 | + auto ShouldCountInsn = [&RhsDeps](const Instruction *Ins) { |
| 2549 | + for (const auto *U : Ins->users()) { |
| 2550 | + // If user is independent of RHS calculation we don't need to count it. |
| 2551 | + if (auto *UIns = dyn_cast<Instruction>(U)) |
| 2552 | + if (!RhsDeps.contains(UIns)) |
| 2553 | + return false; |
| 2554 | + } |
| 2555 | + return true; |
| 2556 | + }; |
| 2557 | + |
| 2558 | + // Prune instructions from RHS Deps that are dependencies of unrelated |
| 2559 | + // instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly |
| 2560 | + // arbitrary and just meant to cap the how much time we spend in the pruning |
| 2561 | + // loop. Its highly unlikely to come into affect. |
| 2562 | + const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth; |
| 2563 | + // Stop after a certain point. No incorrectness from including too many |
| 2564 | + // instructions. |
| 2565 | + for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) { |
| 2566 | + const Instruction *ToDrop = nullptr; |
| 2567 | + for (const auto *Ins : RhsDeps) { |
| 2568 | + if (!ShouldCountInsn(Ins)) { |
| 2569 | + ToDrop = Ins; |
| 2570 | + break; |
| 2571 | + } |
| 2572 | + } |
| 2573 | + if (ToDrop == nullptr) |
| 2574 | + break; |
| 2575 | + RhsDeps.erase(ToDrop); |
| 2576 | + } |
| 2577 | + |
| 2578 | + for (const auto *Ins : RhsDeps) { |
| 2579 | + // Finally accumulate latency that we can only attribute to computing the |
| 2580 | + // RHS condition. Use latency because we are essentially trying to calculate |
| 2581 | + // the cost of the dependency chain. |
| 2582 | + // Possible TODO: We could try to estimate ILP and make this more precise. |
| 2583 | + CostOfIncluding += |
| 2584 | + TTI.getInstructionCost(Ins, TargetTransformInfo::TCK_Latency); |
| 2585 | + |
| 2586 | + if (CostOfIncluding > CostThresh) |
| 2587 | + return false; |
| 2588 | + } |
| 2589 | + return true; |
| 2590 | +} |
| 2591 | + |
2449 | 2592 | void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
|
2450 | 2593 | MachineBasicBlock *TBB,
|
2451 | 2594 | MachineBasicBlock *FBB,
|
@@ -2660,8 +2803,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
|
2660 | 2803 | else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
|
2661 | 2804 | Opcode = Instruction::Or;
|
2662 | 2805 |
|
2663 |
| - if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && |
2664 |
| - match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { |
| 2806 | + if (Opcode && |
| 2807 | + !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && |
| 2808 | + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) && |
| 2809 | + !shouldKeepJumpConditionsTogether( |
| 2810 | + FuncInfo, I, Opcode, BOp0, BOp1, |
| 2811 | + DAG.getTargetLoweringInfo().getJumpConditionMergingParams( |
| 2812 | + Opcode, BOp0, BOp1))) { |
2665 | 2813 | FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
|
2666 | 2814 | getEdgeProbability(BrMBB, Succ0MBB),
|
2667 | 2815 | getEdgeProbability(BrMBB, Succ1MBB),
|
|
0 commit comments