Skip to content

Commit 396adab

Browse files
kaiyan96yuxuanchen1997
authored andcommitted
[llvm][CodeGen] Fixed a bug in stall cycle calculation for window scheduler (#99451)
Summary: Fixed a bug in stall cycle calculation. When a register defined by an instruction in the current iteration is used by an instruction in the next iteration, we have modified the number of stall cycle that need to be inserted. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250684
1 parent 9bb090e commit 396adab

File tree

2 files changed

+63
-3
lines changed

2 files changed

+63
-3
lines changed

llvm/lib/CodeGen/WindowScheduler.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -492,15 +492,16 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
492492
// ========================================
493493
int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
494494
int MaxStallCycle = 0;
495+
int CurrentII = MaxCycle + 1;
495496
auto Range = getScheduleRange(Offset, SchedInstrNum);
496497
for (auto &MI : Range) {
497498
auto *SU = TripleDAG->getSUnit(&MI);
498499
int DefCycle = getOriCycle(&MI);
499500
for (auto &Succ : SU->Succs) {
500501
if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
501502
continue;
502-
// If the expected cycle does not exceed MaxCycle, no check is needed.
503-
if (DefCycle + (int)Succ.getLatency() <= MaxCycle)
503+
// If the expected cycle does not exceed CurrentII, no check is needed.
504+
if (DefCycle + (int)Succ.getLatency() <= CurrentII)
504505
continue;
505506
// If the cycle of the scheduled MI A is less than that of the scheduled
506507
// MI B, the scheduling will fail because the lifetime of the
@@ -510,7 +511,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
510511
if (DefCycle < UseCycle)
511512
return WindowIILimit;
512513
// Get the stall cycle introduced by the register between two trips.
513-
int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle;
514+
int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;
514515
MaxStallCycle = std::max(MaxStallCycle, StallCycle);
515516
}
516517
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# REQUIRES: asserts
2+
# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \
3+
# RUN: -window-sched=force -filetype=null -verify-machineinstrs \
4+
# RUN: -window-region-limit=1 -window-search-ratio=100 -window-diff-limit=0 \
5+
# RUN: 2>&1 | FileCheck %s
6+
7+
# CHECK-LABEL: Start analyzing II
8+
# CHECK: MaxStallCycle is 0
9+
# CHECK-LABEL: Start analyzing II
10+
# CHECK: MaxStallCycle is 0
11+
# CHECK-LABEL: Start analyzing II
12+
# CHECK: MaxStallCycle is 0
13+
14+
---
15+
name: test_window_stall_cycle
16+
tracksRegLiveness: true
17+
body: |
18+
bb.0:
19+
successors: %bb.3(0x40000000), %bb.1(0x40000000)
20+
liveins: $r0, $r1
21+
22+
%0:intregs = COPY $r1
23+
%1:intregs = COPY $r0
24+
%2:intregs = nsw A2_add %0, %1
25+
%3:intregs = S2_lsr_i_r_acc %2, %2, 31
26+
%4:intregs = S2_asr_i_r killed %3, 1
27+
%5:predregs = C2_cmpgt %1, %4
28+
%6:intregs = A2_tfrsi 0
29+
J2_jumpt killed %5, %bb.3, implicit-def dead $pc
30+
J2_jump %bb.1, implicit-def dead $pc
31+
32+
bb.1:
33+
successors: %bb.2(0x80000000)
34+
35+
%7:intregs = A2_addi %4, 2
36+
%8:intregs = A2_tfrsi 0
37+
%9:intregs = A2_sub %4, %1
38+
%10:intregs = A2_addi %9, 1
39+
%11:intregs = COPY %10
40+
J2_loop0r %bb.2, %11, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
41+
42+
bb.2 (machine-block-address-taken):
43+
successors: %bb.3(0x04000000), %bb.2(0x7c000000)
44+
45+
%12:intregs = PHI %7, %bb.1, %13, %bb.2
46+
%14:intregs = PHI %8, %bb.1, %15, %bb.2
47+
%16:intregs = PHI %8, %bb.1, %17, %bb.2
48+
%18:intregs, %13:intregs = L2_loadri_pi %12, -4
49+
%17:intregs = nsw A2_add killed %18, %16
50+
%15:intregs = A2_max %17, %14
51+
ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
52+
J2_jump %bb.3, implicit-def dead $pc
53+
54+
bb.3:
55+
%19:intregs = PHI %6, %bb.0, %15, %bb.2
56+
$r0 = COPY %19
57+
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
58+
59+
...

0 commit comments

Comments
 (0)