27
27
#include "llvm/ADT/StringExtras.h"
28
28
#include "llvm/ADT/StringSwitch.h"
29
29
#include "llvm/Analysis/BlockFrequencyInfo.h"
30
+ #include "llvm/Analysis/BranchProbabilityInfo.h"
30
31
#include "llvm/Analysis/ObjCARCUtil.h"
31
32
#include "llvm/Analysis/ProfileSummaryInfo.h"
33
+ #include "llvm/Analysis/TargetTransformInfo.h"
32
34
#include "llvm/Analysis/VectorUtils.h"
35
+ #include "llvm/CodeGen/FunctionLoweringInfo.h"
33
36
#include "llvm/CodeGen/IntrinsicLowering.h"
34
37
#include "llvm/CodeGen/MachineFrameInfo.h"
35
38
#include "llvm/CodeGen/MachineFunction.h"
55
58
#include "llvm/MC/MCContext.h"
56
59
#include "llvm/MC/MCExpr.h"
57
60
#include "llvm/MC/MCSymbol.h"
61
+ #include "llvm/Support/BranchProbability.h"
58
62
#include "llvm/Support/CommandLine.h"
59
63
#include "llvm/Support/Debug.h"
60
64
#include "llvm/Support/ErrorHandling.h"
@@ -77,6 +81,24 @@ static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
77
81
"alignment set by x86-experimental-pref-loop-alignment."),
78
82
cl::Hidden);
79
83
84
+ static cl::opt<int> BrMergingBaseCostThresh(
85
+ "x86-cond-base", cl::init(1),
86
+ cl::desc(
87
+ "Base."),
88
+ cl::Hidden);
89
+
90
+ static cl::opt<int> BrMergingLikelyBias(
91
+ "x86-cond-likely-bias", cl::init(0),
92
+ cl::desc(
93
+ "Likely."),
94
+ cl::Hidden);
95
+
96
+ static cl::opt<int> BrMergingUnlikelyBias(
97
+ "x86-cond-unlikely-bias", cl::init(1),
98
+ cl::desc(
99
+ "Unlikely."),
100
+ cl::Hidden);
101
+
80
102
static cl::opt<bool> MulConstantOptimization(
81
103
"mul-constant-optimization", cl::init(true),
82
104
cl::desc("Replace 'mul x, Const' with more effective instructions like "
@@ -3339,6 +3361,143 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
3339
3361
return ISD::SRL;
3340
3362
}
3341
3363
3364
+ // Collect dependings on V recursively. This is used for the cost analysis in
3365
+ // `keepJumpConditionsTogether`.
3366
+ static bool
3367
+ collectDeps(SmallPtrSet<const Instruction *, 8> *Deps, const Value *V,
3368
+ SmallPtrSet<const Instruction *, 8> *Necessary = nullptr,
3369
+ unsigned Depth = 0) {
3370
+ // Return false if we have an incomplete count.
3371
+ if (Depth >= 6)
3372
+ return false;
3373
+
3374
+ auto *I = dyn_cast<Instruction>(V);
3375
+ if (I == nullptr)
3376
+ return true;
3377
+
3378
+ if (Necessary != nullptr) {
3379
+ // This instruction is necessary for the other side of the condition so
3380
+ // don't count it.
3381
+ if (Necessary->contains(I))
3382
+ return true;
3383
+ }
3384
+
3385
+ // Already added this dep.
3386
+ if (!Deps->insert(I).second)
3387
+ return true;
3388
+
3389
+ for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx)
3390
+ if (!collectDeps(Deps, I->getOperand(OpIdx), Necessary, Depth + 1))
3391
+ return false;
3392
+ return true;
3393
+ }
3394
+
3395
+ bool X86TargetLowering::keepJumpConditionsTogether(
3396
+ const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
3397
+ Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const {
3398
+ using namespace llvm::PatternMatch;
3399
+ if (I.getNumSuccessors() != 2)
3400
+ return false;
3401
+
3402
+ // Baseline cost. This is properly arbitrary.
3403
+ InstructionCost CostThresh = BrMergingBaseCostThresh.getValue();
3404
+ if (BrMergingBaseCostThresh.getValue() < 0)
3405
+ return false;
3406
+
3407
+ // a == b && c == d can be efficiently combined.
3408
+ ICmpInst::Predicate Pred;
3409
+ if (Opc == Instruction::And &&
3410
+ match(Lhs, m_ICmp(Pred, m_Value(), m_Value())) &&
3411
+ Pred == ICmpInst::ICMP_EQ &&
3412
+ match(Rhs, m_ICmp(Pred, m_Value(), m_Value())) &&
3413
+ Pred == ICmpInst::ICMP_EQ)
3414
+ CostThresh += 1;
3415
+
3416
+ BranchProbabilityInfo *BPI = nullptr;
3417
+ if (BrMergingLikelyBias.getValue() || BrMergingUnlikelyBias.getValue())
3418
+ BPI = FuncInfo.BPI;
3419
+ if (BPI != nullptr) {
3420
+ BasicBlock *IfFalse = I.getSuccessor(0);
3421
+ BasicBlock *IfTrue = I.getSuccessor(1);
3422
+
3423
+ std::optional<bool> Likely;
3424
+ if (BPI->isEdgeHot(I.getParent(), IfTrue))
3425
+ Likely = true;
3426
+ else if (BPI->isEdgeHot(I.getParent(), IfFalse))
3427
+ Likely = false;
3428
+
3429
+ if (Likely) {
3430
+ if (Opc == (*Likely ? Instruction::And : Instruction::Or))
3431
+ // Its likely we will have to compute both lhs and rhs of condition
3432
+ CostThresh += BrMergingLikelyBias.getValue();
3433
+ else {
3434
+ // Its likely we will get an early out.
3435
+ CostThresh -= BrMergingUnlikelyBias.getValue();
3436
+ if (BrMergingUnlikelyBias.getValue() < 0) {
3437
+ return false;
3438
+ }
3439
+ }
3440
+ }
3441
+ }
3442
+
3443
+ if (CostThresh <= 0)
3444
+ return false;
3445
+
3446
+ // Collect "all" instructions that lhs condition is dependent on.
3447
+ SmallPtrSet<const Instruction *, 8> LhsDeps, RhsDeps;
3448
+ collectDeps(&LhsDeps, Lhs);
3449
+ // Collect "all" instructions that rhs condition is dependent on AND are
3450
+ // dependencies of lhs. This gives us an estimate on which instructions we
3451
+ // stand to save by splitting the condition.
3452
+ if (!collectDeps(&RhsDeps, Rhs, &LhsDeps))
3453
+ return false;
3454
+ // Add the compare instruction itself unless its a dependency on the LHS.
3455
+ if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
3456
+ if (!LhsDeps.contains(RhsI))
3457
+ RhsDeps.insert(RhsI);
3458
+ const auto &TTI = getTargetMachine().getTargetTransformInfo(*I.getFunction());
3459
+
3460
+ InstructionCost CostOfIncluding = 0;
3461
+ // See if this instruction will need to computed independently of whether RHS
3462
+ // is.
3463
+ auto ShouldCountInsn = [&RhsDeps](const Instruction *Ins) {
3464
+ for (const auto *U : Ins->users()) {
3465
+ // If user is independent of RHS calculation we don't need to count it.
3466
+ if (auto *UIns = dyn_cast<Instruction>(U))
3467
+ if (!RhsDeps.contains(UIns))
3468
+ return false;
3469
+ }
3470
+ return true;
3471
+ };
3472
+
3473
+ // Prune instructions from RHS Deps that are dependencies of unrelated
3474
+ // instructions.
3475
+ const unsigned MaxPruneIters = 8;
3476
+ // Stop after a certain point. No incorrectness from including too many
3477
+ // instructions.
3478
+ for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
3479
+ const Instruction *ToDrop = nullptr;
3480
+ for (const auto *Ins : RhsDeps) {
3481
+ if (!ShouldCountInsn(Ins)) {
3482
+ ToDrop = Ins;
3483
+ break;
3484
+ }
3485
+ }
3486
+ if (ToDrop == nullptr)
3487
+ break;
3488
+ RhsDeps.erase(ToDrop);
3489
+ }
3490
+
3491
+ for (const auto *Ins : RhsDeps) {
3492
+ CostOfIncluding +=
3493
+ TTI.getInstructionCost(Ins, TargetTransformInfo::TCK_Latency);
3494
+
3495
+ if (CostOfIncluding > CostThresh)
3496
+ return false;
3497
+ }
3498
+ return true;
3499
+ }
3500
+
3342
3501
bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
3343
3502
return N->getOpcode() != ISD::FP_EXTEND;
3344
3503
}
0 commit comments