-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[llvm][CodeGen] Some bugfix for window scheduler #99429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
kaiyan96
commented
Jul 18, 2024
- Fixed max cycle calculation for zero-cost instructions.
- Added a new restriction for II by pragma; the window scheduler will not support loops if the Swing Scheduler's II pragma is set.
- Fixed a bug in stall cycle calculation. This bugfix will not affecting the window scheduler's result.
- Added missing initialization failure information.
@llvm/pr-subscribers-backend-hexagon Author: Kai Yan (kaiyan96) Changes
Full diff: https://github.com./llvm/llvm-project/pull/99429.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 497e282bb9768..f97f55d2d3faa 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -528,8 +528,12 @@ bool MachinePipeliner::useSwingModuloScheduler() {
}
bool MachinePipeliner::useWindowScheduler(bool Changed) {
- // WindowScheduler does not work when it is off or when SwingModuloScheduler
- // is successfully scheduled.
+ // WindowScheduler does not work for following cases:
+ // 1. when it is off.
+ // 2. when SwingModuloScheduler is successfully scheduled.
+ // 3. when pragma II is enabled.
+ if (II_setByPragma)
+ return false;
return WindowSchedulingOption == WindowSchedulingFlag::WS_Force ||
(WindowSchedulingOption == WindowSchedulingFlag::WS_On && !Changed);
}
diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index 0777480499e55..7801803f82c45 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -232,8 +232,11 @@ bool WindowScheduler::initialize() {
return false;
}
for (auto &Def : MI.all_defs())
- if (Def.isReg() && Def.getReg().isPhysical())
+ if (Def.isReg() && Def.getReg().isPhysical()) {
+ LLVM_DEBUG(dbgs() << "Physical registers are not supported in "
+ "window scheduling!\n");
return false;
+ }
}
if (SchedInstrNum <= WindowRegionLimit) {
LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");
@@ -437,14 +440,17 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
int PredCycle = getOriCycle(PredMI);
ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency());
}
- // ResourceManager can be used to detect resource conflicts between the
- // current MI and the previously inserted MIs.
- while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
- ++CurCycle;
- if (CurCycle == (int)WindowIILimit)
- return CurCycle;
+ // Zero cost instructions do not need to check resource.
+ if (!TII->isZeroCost(MI.getOpcode())) {
+ // ResourceManager can be used to detect resource conflicts between the
+ // current MI and the previously inserted MIs.
+ while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
+ ++CurCycle;
+ if (CurCycle == (int)WindowIILimit)
+ return CurCycle;
+ }
+ RM.reserveResources(*SU, CurCycle);
}
- RM.reserveResources(*SU, CurCycle);
OriToCycle[getOriMI(&MI)] = CurCycle;
LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."
<< getOriStage(getOriMI(&MI), Offset) << "]: " << MI);
@@ -485,6 +491,7 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
// ========================================
int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
int MaxStallCycle = 0;
+ int CurrentII = MaxCycle + 1;
auto Range = getScheduleRange(Offset, SchedInstrNum);
for (auto &MI : Range) {
auto *SU = TripleDAG->getSUnit(&MI);
@@ -492,8 +499,9 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
for (auto &Succ : SU->Succs) {
if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
continue;
- // If the expected cycle does not exceed MaxCycle, no check is needed.
- if (DefCycle + (int)Succ.getLatency() <= MaxCycle)
+ // If the expected cycle does not exceed loop initiation interval, no
+ // check is needed.
+ if (DefCycle + (int)Succ.getLatency() <= CurrentII)
continue;
// If the cycle of the scheduled MI A is less than that of the scheduled
// MI B, the scheduling will fail because the lifetime of the
@@ -503,7 +511,8 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
if (DefCycle < UseCycle)
return WindowIILimit;
// Get the stall cycle introduced by the register between two trips.
- int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle;
+ int StallCycle =
+ DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;
MaxStallCycle = std::max(MaxStallCycle, StallCycle);
}
}
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir b/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir
index 601b98dca8e20..be75301b016ed 100644
--- a/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-fail-2.mir
@@ -3,6 +3,7 @@
# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
# RUN: | FileCheck %s
+# CHECK: Physical registers are not supported in window scheduling!
# CHECK: The WindowScheduler failed to initialize!
---
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir b/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir
new file mode 100644
index 0000000000000..71b653da940e8
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-pragma-initiation-interval-fail.mir
@@ -0,0 +1,85 @@
+# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
+# RUN: | FileCheck %s
+# REQUIRES: asserts
+
+# Test that checks no window scheduler is performed if the II set by pragma was
+# enabled
+
+# CHECK-NOT: Start analyzing II
+# CHECK-NOT: Start scheduling Phis
+# CHECK-NOT: Current window Offset is {{[0-9]+}} and II is {{[0-9]+}}
+
+--- |
+ define void @test_pragma_ii_fail(ptr %a0, i32 %a1) {
+ b0:
+ %v0 = icmp sgt i32 %a1, 1
+ br i1 %v0, label %b1, label %b4
+
+ b1: ; preds = %b0
+ %v1 = load i32, ptr %a0, align 4
+ %v2 = add i32 %v1, 10
+ %v4 = add i32 %a1, -1
+ %cgep = getelementptr i32, ptr %a0, i32 1
+ br label %b2
+
+ b2: ; preds = %b2, %b1
+ %v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ]
+ %v6 = phi ptr [ %cgep2, %b2 ], [ %cgep, %b1 ]
+ %v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ]
+ store i32 %v7, ptr %v6, align 4
+ %v8 = add i32 %v7, 10
+ %cgep1 = getelementptr i32, ptr %v6, i32 -1
+ store i32 %v8, ptr %cgep1, align 4
+ %v10 = add i32 %v7, 10
+ %v12 = add i32 %v5, -1
+ %v13 = icmp eq i32 %v12, 0
+ %cgep2 = getelementptr i32, ptr %v6, i32 1
+ br i1 %v13, label %b4, label %b2, !llvm.loop !0
+
+ b4: ; preds = %b2, %b0
+ ret void
+ }
+
+ !0 = distinct !{!0, !1}
+ !1 = !{!"llvm.loop.pipeline.initiationinterval", i32 2}
+...
+---
+name: test_pragma_ii_fail
+tracksRegLiveness: true
+body: |
+ bb.0.b0:
+ successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ liveins: $r0, $r1
+
+ %10:intregs = COPY $r1
+ %9:intregs = COPY $r0
+ %11:predregs = C2_cmpgti %10, 1
+ J2_jumpf %11, %bb.3, implicit-def dead $pc
+ J2_jump %bb.1, implicit-def dead $pc
+
+ bb.1.b1:
+ successors: %bb.2(0x80000000)
+
+ %13:intregs, %2:intregs = L2_loadri_pi %9, 4
+ %0:intregs = A2_addi killed %13, 10
+ %1:intregs = A2_addi %10, -1
+ %16:intregs = COPY %1
+ J2_loop0r %bb.2, %16, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+
+ bb.2.b2 (machine-block-address-taken):
+ successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+
+ %4:intregs = PHI %2, %bb.1, %8, %bb.2
+ %5:intregs = PHI %0, %bb.1, %6, %bb.2
+ S2_storeri_io %4, 0, %5
+ %6:intregs = A2_addi %5, 10
+ S2_storeri_io %4, -4, %6
+ %8:intregs = A2_addi %4, 4
+ ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ J2_jump %bb.3, implicit-def dead $pc
+
+ bb.3.b4:
+ PS_jmpret $r31, implicit-def dead $pc
+
+...
diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir b/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir
new file mode 100644
index 0000000000000..a93a03bd07dee
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-ws-zero-cost.mir
@@ -0,0 +1,47 @@
+# REQUIRES: asserts
+# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
+# RUN: -window-sched=force -filetype=null -verify-machineinstrs 2>&1 \
+# RUN: | FileCheck %s
+
+# CHECK-NOT: Can't find a valid II. Keep searching...
+
+---
+name: relu
+tracksRegLiveness: true
+body: |
+ bb.0:
+ successors: %bb.2(0x30000000), %bb.1(0x50000000)
+ liveins: $r0, $r1, $r2
+
+ %0:intregs = COPY $r2
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r0
+ %3:predregs = C2_cmpeqi %2, 0
+ J2_jumpt killed %3, %bb.2, implicit-def dead $pc
+ J2_jump %bb.1, implicit-def dead $pc
+
+ bb.1:
+ successors: %bb.3(0x80000000)
+
+ %4:hvxvr = V6_vd0
+ %5:intregs = A2_addi %2, 31
+ %6:intregs = S2_lsr_i_r %5, 5
+ %7:intregs = COPY %6
+ J2_loop0r %bb.3, %7, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+ J2_jump %bb.3, implicit-def dead $pc
+
+ bb.2:
+ PS_jmpret $r31, implicit-def dead $pc
+
+ bb.3 (machine-block-address-taken):
+ successors: %bb.3(0x7c000000), %bb.2(0x04000000)
+
+ %8:intregs = PHI %1, %bb.1, %9, %bb.3
+ %10:intregs = PHI %0, %bb.1, %14, %bb.3
+ %11:hvxvr, %9:intregs = V6_vL32b_pi %8, 128
+ %12:intregs = COPY %10
+ %13:hvxvr = V6_vmaxw killed %11, %4
+ %14:intregs = V6_vS32b_pi %12, 128, killed %13
+ ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ J2_jump %bb.2, implicit-def dead $pc
+...
|
We have encountered some minor issues while maintaining our own DSA. In this PR, we will address and fix these issues collectively. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
1. Fixed max cycle calculation for zero-cost instructions. 2. Added a new restriction for II by pragma; the window scheduler will not support loops if the Swing Scheduler's II pragma is set. 3. Fixed a bug in stall cycle calculation. This bugfix will not affecting the window scheduler's result. 4. Added missing initialization failure information.
320fc48
to
d9b53d5
Compare
@arsenm, could you please take some time to review this patch? Thanks. |
Title should state what the bug fix is, not that there is a bug fix. Also, one fix per PR please |