Skip to content

Commit 5b4759f

Browse files
committed
Revert "[X86] Don't always separate conditions in (br (and/or cond0, cond1)) into separate branches"
This has been buggy for a while. Reverts #81689 This reverts commit ae76dfb.
1 parent 732a5cb commit 5b4759f

28 files changed

+740
-920
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

-36
Original file line numberDiff line numberDiff line change
@@ -596,42 +596,6 @@ class TargetLoweringBase {
596596
/// avoided.
597597
bool isJumpExpensive() const { return JumpIsExpensive; }
598598

599-
// Costs parameters used by
600-
// SelectionDAGBuilder::shouldKeepJumpConditionsTogether.
601-
// shouldKeepJumpConditionsTogether will use these parameter value to
602-
// determine if two conditions in the form `br (and/or cond1, cond2)` should
603-
// be split into two branches or left as one.
604-
//
605-
// BaseCost is the cost threshold (in latency). If the estimated latency of
606-
// computing both `cond1` and `cond2` is below the cost of just computing
607-
// `cond1` + BaseCost, the two conditions will be kept together. Otherwise
608-
// they will be split.
609-
//
610-
// LikelyBias increases BaseCost if branch probability info indicates that it
611-
// is likely that both `cond1` and `cond2` will be computed.
612-
//
613-
// UnlikelyBias decreases BaseCost if branch probability info indicates that
614-
// it is likely that both `cond1` and `cond2` will be computed.
615-
//
616-
// Set any field to -1 to make it ignored (setting BaseCost to -1 results in
617-
// `shouldKeepJumpConditionsTogether` always returning false).
618-
struct CondMergingParams {
619-
int BaseCost;
620-
int LikelyBias;
621-
int UnlikelyBias;
622-
};
623-
// Return params for deciding if we should keep two branch conditions merged
624-
// or split them into two separate branches.
625-
// Arg0: The binary op joining the two conditions (and/or).
626-
// Arg1: The first condition (cond1)
627-
// Arg2: The second condition (cond2)
628-
virtual CondMergingParams
629-
getJumpConditionMergingParams(Instruction::BinaryOps, const Value *,
630-
const Value *) const {
631-
// -1 will always result in splitting.
632-
return {-1, -1, -1};
633-
}
634-
635599
/// Return true if selects are only cheaper than branches if the branch is
636600
/// unlikely to be predicted right.
637601
bool isPredictableSelectExpensive() const {

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

+2-150
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
#include "llvm/Analysis/Loads.h"
2727
#include "llvm/Analysis/MemoryLocation.h"
2828
#include "llvm/Analysis/TargetLibraryInfo.h"
29-
#include "llvm/Analysis/TargetTransformInfo.h"
3029
#include "llvm/Analysis/ValueTracking.h"
3130
#include "llvm/Analysis/VectorUtils.h"
3231
#include "llvm/CodeGen/Analysis.h"
@@ -94,7 +93,6 @@
9493
#include "llvm/Support/CommandLine.h"
9594
#include "llvm/Support/Compiler.h"
9695
#include "llvm/Support/Debug.h"
97-
#include "llvm/Support/InstructionCost.h"
9896
#include "llvm/Support/MathExtras.h"
9997
#include "llvm/Support/raw_ostream.h"
10098
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -2448,147 +2446,6 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
24482446
SL->SwitchCases.push_back(CB);
24492447
}
24502448

2451-
// Collect dependencies on V recursively. This is used for the cost analysis in
2452-
// `shouldKeepJumpConditionsTogether`.
2453-
static bool
2454-
collectInstructionDeps(SmallPtrSet<const Instruction *, 8> *Deps,
2455-
const Value *V,
2456-
SmallPtrSet<const Instruction *, 8> *Necessary = nullptr,
2457-
unsigned Depth = 0) {
2458-
// Return false if we have an incomplete count.
2459-
if (Depth >= SelectionDAG::MaxRecursionDepth)
2460-
return false;
2461-
2462-
auto *I = dyn_cast<Instruction>(V);
2463-
if (I == nullptr)
2464-
return true;
2465-
2466-
if (Necessary != nullptr) {
2467-
// This instruction is necessary for the other side of the condition so
2468-
// don't count it.
2469-
if (Necessary->contains(I))
2470-
return true;
2471-
}
2472-
2473-
// Already added this dep.
2474-
if (!Deps->insert(I).second)
2475-
return true;
2476-
2477-
for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx)
2478-
if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary,
2479-
Depth + 1))
2480-
return false;
2481-
return true;
2482-
}
2483-
2484-
bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
2485-
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
2486-
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
2487-
TargetLoweringBase::CondMergingParams Params) const {
2488-
if (I.getNumSuccessors() != 2)
2489-
return false;
2490-
2491-
if (Params.BaseCost < 0)
2492-
return false;
2493-
2494-
// Baseline cost.
2495-
InstructionCost CostThresh = Params.BaseCost;
2496-
2497-
BranchProbabilityInfo *BPI = nullptr;
2498-
if (Params.LikelyBias || Params.UnlikelyBias)
2499-
BPI = FuncInfo.BPI;
2500-
if (BPI != nullptr) {
2501-
// See if we are either likely to get an early out or compute both lhs/rhs
2502-
// of the condition.
2503-
BasicBlock *IfFalse = I.getSuccessor(0);
2504-
BasicBlock *IfTrue = I.getSuccessor(1);
2505-
2506-
std::optional<bool> Likely;
2507-
if (BPI->isEdgeHot(I.getParent(), IfTrue))
2508-
Likely = true;
2509-
else if (BPI->isEdgeHot(I.getParent(), IfFalse))
2510-
Likely = false;
2511-
2512-
if (Likely) {
2513-
if (Opc == (*Likely ? Instruction::And : Instruction::Or))
2514-
// Its likely we will have to compute both lhs and rhs of condition
2515-
CostThresh += Params.LikelyBias;
2516-
else {
2517-
if (Params.UnlikelyBias < 0)
2518-
return false;
2519-
// Its likely we will get an early out.
2520-
CostThresh -= Params.UnlikelyBias;
2521-
}
2522-
}
2523-
}
2524-
2525-
if (CostThresh <= 0)
2526-
return false;
2527-
2528-
// Collect "all" instructions that lhs condition is dependent on.
2529-
SmallPtrSet<const Instruction *, 8> LhsDeps, RhsDeps;
2530-
collectInstructionDeps(&LhsDeps, Lhs);
2531-
// Collect "all" instructions that rhs condition is dependent on AND are
2532-
// dependencies of lhs. This gives us an estimate on which instructions we
2533-
// stand to save by splitting the condition.
2534-
if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps))
2535-
return false;
2536-
// Add the compare instruction itself unless its a dependency on the LHS.
2537-
if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
2538-
if (!LhsDeps.contains(RhsI))
2539-
RhsDeps.insert(RhsI);
2540-
2541-
const auto &TLI = DAG.getTargetLoweringInfo();
2542-
const auto &TTI =
2543-
TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
2544-
2545-
InstructionCost CostOfIncluding = 0;
2546-
// See if this instruction will need to computed independently of whether RHS
2547-
// is.
2548-
auto ShouldCountInsn = [&RhsDeps](const Instruction *Ins) {
2549-
for (const auto *U : Ins->users()) {
2550-
// If user is independent of RHS calculation we don't need to count it.
2551-
if (auto *UIns = dyn_cast<Instruction>(U))
2552-
if (!RhsDeps.contains(UIns))
2553-
return false;
2554-
}
2555-
return true;
2556-
};
2557-
2558-
// Prune instructions from RHS Deps that are dependencies of unrelated
2559-
// instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly
2560-
// arbitrary and just meant to cap the how much time we spend in the pruning
2561-
// loop. Its highly unlikely to come into affect.
2562-
const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth;
2563-
// Stop after a certain point. No incorrectness from including too many
2564-
// instructions.
2565-
for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
2566-
const Instruction *ToDrop = nullptr;
2567-
for (const auto *Ins : RhsDeps) {
2568-
if (!ShouldCountInsn(Ins)) {
2569-
ToDrop = Ins;
2570-
break;
2571-
}
2572-
}
2573-
if (ToDrop == nullptr)
2574-
break;
2575-
RhsDeps.erase(ToDrop);
2576-
}
2577-
2578-
for (const auto *Ins : RhsDeps) {
2579-
// Finally accumulate latency that we can only attribute to computing the
2580-
// RHS condition. Use latency because we are essentially trying to calculate
2581-
// the cost of the dependency chain.
2582-
// Possible TODO: We could try to estimate ILP and make this more precise.
2583-
CostOfIncluding +=
2584-
TTI.getInstructionCost(Ins, TargetTransformInfo::TCK_Latency);
2585-
2586-
if (CostOfIncluding > CostThresh)
2587-
return false;
2588-
}
2589-
return true;
2590-
}
2591-
25922449
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
25932450
MachineBasicBlock *TBB,
25942451
MachineBasicBlock *FBB,
@@ -2803,13 +2660,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
28032660
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
28042661
Opcode = Instruction::Or;
28052662

2806-
if (Opcode &&
2807-
!(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
2808-
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
2809-
!shouldKeepJumpConditionsTogether(
2810-
FuncInfo, I, Opcode, BOp0, BOp1,
2811-
DAG.getTargetLoweringInfo().getJumpConditionMergingParams(
2812-
Opcode, BOp0, BOp1))) {
2663+
if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
2664+
match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
28132665
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
28142666
getEdgeProbability(BrMBB, Succ0MBB),
28152667
getEdgeProbability(BrMBB, Succ1MBB),

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

-5
Original file line numberDiff line numberDiff line change
@@ -385,11 +385,6 @@ class SelectionDAGBuilder {
385385
N = NewN;
386386
}
387387

388-
bool shouldKeepJumpConditionsTogether(
389-
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
390-
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
391-
TargetLoweringBase::CondMergingParams Params) const;
392-
393388
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
394389
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
395390
MachineBasicBlock *SwitchBB,

llvm/lib/Target/X86/X86ISelLowering.cpp

-49
Original file line numberDiff line numberDiff line change
@@ -77,37 +77,6 @@ static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
7777
"alignment set by x86-experimental-pref-loop-alignment."),
7878
cl::Hidden);
7979

80-
static cl::opt<int> BrMergingBaseCostThresh(
81-
"x86-br-merging-base-cost", cl::init(1),
82-
cl::desc(
83-
"Sets the cost threshold for when multiple conditionals will be merged "
84-
"into one branch versus be split in multiple branches. Merging "
85-
"conditionals saves branches at the cost of additional instructions. "
86-
"This value sets the instruction cost limit, below which conditionals "
87-
"will be merged, and above which conditionals will be split."),
88-
cl::Hidden);
89-
90-
static cl::opt<int> BrMergingLikelyBias(
91-
"x86-br-merging-likely-bias", cl::init(0),
92-
cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
93-
"that all conditionals will be executed. For example for merging "
94-
"the conditionals (a == b && c > d), if its known that a == b is "
95-
"likely, then it is likely that if the conditionals are split "
96-
"both sides will be executed, so it may be desirable to increase "
97-
"the instruction cost threshold."),
98-
cl::Hidden);
99-
100-
static cl::opt<int> BrMergingUnlikelyBias(
101-
"x86-br-merging-unlikely-bias", cl::init(1),
102-
cl::desc(
103-
"Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
104-
"that all conditionals will be executed. For example for merging "
105-
"the conditionals (a == b && c > d), if its known that a == b is "
106-
"unlikely, then it is unlikely that if the conditionals are split "
107-
"both sides will be executed, so it may be desirable to decrease "
108-
"the instruction cost threshold."),
109-
cl::Hidden);
110-
11180
static cl::opt<bool> MulConstantOptimization(
11281
"mul-constant-optimization", cl::init(true),
11382
cl::desc("Replace 'mul x, Const' with more effective instructions like "
@@ -3364,24 +3333,6 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
33643333
return ISD::SRL;
33653334
}
33663335

3367-
TargetLoweringBase::CondMergingParams
3368-
X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
3369-
const Value *Lhs,
3370-
const Value *Rhs) const {
3371-
using namespace llvm::PatternMatch;
3372-
int BaseCost = BrMergingBaseCostThresh.getValue();
3373-
// a == b && a == c is a fast pattern on x86.
3374-
ICmpInst::Predicate Pred;
3375-
if (BaseCost >= 0 && Opc == Instruction::And &&
3376-
match(Lhs, m_ICmp(Pred, m_Value(), m_Value())) &&
3377-
Pred == ICmpInst::ICMP_EQ &&
3378-
match(Rhs, m_ICmp(Pred, m_Value(), m_Value())) &&
3379-
Pred == ICmpInst::ICMP_EQ)
3380-
BaseCost += 1;
3381-
return {BaseCost, BrMergingLikelyBias.getValue(),
3382-
BrMergingUnlikelyBias.getValue()};
3383-
}
3384-
33853336
bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
33863337
return N->getOpcode() != ISD::FP_EXTEND;
33873338
}

llvm/lib/Target/X86/X86ISelLowering.h

-4
Original file line numberDiff line numberDiff line change
@@ -1150,10 +1150,6 @@ namespace llvm {
11501150

11511151
bool preferScalarizeSplat(SDNode *N) const override;
11521152

1153-
CondMergingParams
1154-
getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1155-
const Value *Rhs) const override;
1156-
11571153
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
11581154
CombineLevel Level) const override;
11591155

llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll

+5-6
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,15 @@ define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp
1818
; CHECK-NEXT: movl _block, %esi
1919
; CHECK-NEXT: movb %al, 1(%esi,%edx)
2020
; CHECK-NEXT: cmpl %ecx, _last
21-
; CHECK-NEXT: setl %cl
21+
; CHECK-NEXT: jge LBB0_3
22+
; CHECK-NEXT: ## %bb.1: ## %label.0
2223
; CHECK-NEXT: cmpl $257, %eax ## imm = 0x101
23-
; CHECK-NEXT: setne %al
24-
; CHECK-NEXT: testb %al, %cl
25-
; CHECK-NEXT: je LBB0_2
26-
; CHECK-NEXT: ## %bb.1: ## %label.0.no_exit.1_crit_edge.exitStub
24+
; CHECK-NEXT: je LBB0_3
25+
; CHECK-NEXT: ## %bb.2: ## %label.0.no_exit.1_crit_edge.exitStub
2726
; CHECK-NEXT: movb $1, %al
2827
; CHECK-NEXT: popl %esi
2928
; CHECK-NEXT: retl
30-
; CHECK-NEXT: LBB0_2: ## %codeRepl5.exitStub
29+
; CHECK-NEXT: LBB0_3: ## %codeRepl5.exitStub
3130
; CHECK-NEXT: xorl %eax, %eax
3231
; CHECK-NEXT: popl %esi
3332
; CHECK-NEXT: retl

0 commit comments

Comments
 (0)