Skip to content

Commit 571d91d

Browse files
Kan LiangPeter Zijlstra
Kan Liang
authored and
Peter Zijlstra
committed
perf: Add branch stack counters
Currently, the additional information of a branch entry is stored in a u64 space. With more and more information added, the space is running out. For example, the information of occurrences of events will be added for each branch. Two places were suggested to append the counters. https://lore.kernel.org/lkml/[email protected]/ One place is right after the flags of each branch entry. It changes the existing struct perf_branch_entry. The later ARCH specific implementation has to be really careful to consistently pick the right struct. The other place is right after the entire struct perf_branch_stack. The disadvantage is that the pointer of the extra space has to be recorded. The common interface perf_sample_save_brstack() has to be updated. The latter is much straightforward, and should be easily understood and maintained. It is implemented in the patch. Add a new branch sample type, PERF_SAMPLE_BRANCH_COUNTERS, to indicate the event which is recorded in the branch info. The "u64 counters" may store the occurrences of several events. The information regarding the number of events/counters and the width of each counter should be exposed via sysfs as a reference for the perf tool. Define the branch_counter_nr and branch_counter_width ABI here. The support will be implemented later in the Intel-specific patch. Suggested-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 744940f commit 571d91d

File tree

9 files changed

+46
-7
lines changed

9 files changed

+46
-7
lines changed

Documentation/ABI/testing/sysfs-bus-event_source-devices-caps

+6
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,9 @@ Description:
1616
Example output in powerpc:
1717
grep . /sys/bus/event_source/devices/cpu/caps/*
1818
/sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9
19+
20+
The "branch_counter_nr" in the supported platform exposes the
21+
maximum number of counters which can be shown in the u64 counters
22+
of PERF_SAMPLE_BRANCH_COUNTERS, while the "branch_counter_width"
23+
exposes the width of each counter. Both of them can be used by
24+
the perf tool to parse the logged counters in each branch.

arch/powerpc/perf/core-book3s.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -2313,7 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
23132313
struct cpu_hw_events *cpuhw;
23142314
cpuhw = this_cpu_ptr(&cpu_hw_events);
23152315
power_pmu_bhrb_read(event, cpuhw);
2316-
perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
2316+
perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL);
23172317
}
23182318

23192319
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&

arch/x86/events/amd/core.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -940,7 +940,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
940940
continue;
941941

942942
if (has_branch_stack(event))
943-
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
943+
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
944944

945945
if (perf_event_overflow(event, &data, regs))
946946
x86_pmu_stop(event, 0);

arch/x86/events/core.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1702,7 +1702,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
17021702
perf_sample_data_init(&data, 0, event->hw.last_period);
17031703

17041704
if (has_branch_stack(event))
1705-
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
1705+
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
17061706

17071707
if (perf_event_overflow(event, &data, regs))
17081708
x86_pmu_stop(event, 0);

arch/x86/events/intel/core.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -3047,7 +3047,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
30473047
perf_sample_data_init(&data, 0, event->hw.last_period);
30483048

30493049
if (has_branch_stack(event))
3050-
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
3050+
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
30513051

30523052
if (perf_event_overflow(event, &data, regs))
30533053
x86_pmu_stop(event, 0);

arch/x86/events/intel/ds.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -1755,7 +1755,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
17551755
setup_pebs_time(event, data, pebs->tsc);
17561756

17571757
if (has_branch_stack(event))
1758-
perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
1758+
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
17591759
}
17601760

17611761
static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1912,7 +1912,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
19121912

19131913
if (has_branch_stack(event)) {
19141914
intel_pmu_store_pebs_lbrs(lbr);
1915-
perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
1915+
perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
19161916
}
19171917
}
19181918

include/linux/perf_event.h

+16-1
Original file line numberDiff line numberDiff line change
@@ -1139,6 +1139,10 @@ static inline bool branch_sample_priv(const struct perf_event *event)
11391139
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
11401140
}
11411141

1142+
static inline bool branch_sample_counters(const struct perf_event *event)
1143+
{
1144+
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
1145+
}
11421146

11431147
struct perf_sample_data {
11441148
/*
@@ -1173,6 +1177,7 @@ struct perf_sample_data {
11731177
struct perf_callchain_entry *callchain;
11741178
struct perf_raw_record *raw;
11751179
struct perf_branch_stack *br_stack;
1180+
u64 *br_stack_cntr;
11761181
union perf_sample_weight weight;
11771182
union perf_mem_data_src data_src;
11781183
u64 txn;
@@ -1250,15 +1255,25 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
12501255

12511256
static inline void perf_sample_save_brstack(struct perf_sample_data *data,
12521257
struct perf_event *event,
1253-
struct perf_branch_stack *brs)
1258+
struct perf_branch_stack *brs,
1259+
u64 *brs_cntr)
12541260
{
12551261
int size = sizeof(u64); /* nr */
12561262

12571263
if (branch_sample_hw_index(event))
12581264
size += sizeof(u64);
12591265
size += brs->nr * sizeof(struct perf_branch_entry);
12601266

1267+
/*
1268+
* The extension space for counters is appended after the
1269+
* struct perf_branch_stack. It is used to store the occurrences
1270+
* of events of each branch.
1271+
*/
1272+
if (brs_cntr)
1273+
size += brs->nr * sizeof(u64);
1274+
12611275
data->br_stack = brs;
1276+
data->br_stack_cntr = brs_cntr;
12621277
data->dyn_size += size;
12631278
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
12641279
}

include/uapi/linux/perf_event.h

+10
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift {
204204

205205
PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */
206206

207+
PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */
208+
207209
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
208210
};
209211

@@ -235,6 +237,8 @@ enum perf_branch_sample_type {
235237

236238
PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
237239

240+
PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
241+
238242
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
239243
};
240244

@@ -982,6 +986,12 @@ enum perf_event_type {
982986
* { u64 nr;
983987
* { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
984988
* { u64 from, to, flags } lbr[nr];
989+
* #
990+
* # The format of the counters is decided by the
991+
* # "branch_counter_nr" and "branch_counter_width",
992+
* # which are defined in the ABI.
993+
* #
994+
* { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
985995
* } && PERF_SAMPLE_BRANCH_STACK
986996
*
987997
* { u64 abi; # enum perf_sample_regs_abi

kernel/events/core.c

+8
Original file line numberDiff line numberDiff line change
@@ -7341,6 +7341,14 @@ void perf_output_sample(struct perf_output_handle *handle,
73417341
if (branch_sample_hw_index(event))
73427342
perf_output_put(handle, data->br_stack->hw_idx);
73437343
perf_output_copy(handle, data->br_stack->entries, size);
7344+
/*
7345+
* Add the extension space which is appended
7346+
* right after the struct perf_branch_stack.
7347+
*/
7348+
if (data->br_stack_cntr) {
7349+
size = data->br_stack->nr * sizeof(u64);
7350+
perf_output_copy(handle, data->br_stack_cntr, size);
7351+
}
73447352
} else {
73457353
/*
73467354
* we always store at least the value of nr

0 commit comments

Comments
 (0)