-
Notifications
You must be signed in to change notification settings - Fork 527
/
Copy pathprofiler.cpp
197 lines (172 loc) · 6.99 KB
/
profiler.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <string.h>
#include <executorch/runtime/platform/assert.h>
#include <executorch/runtime/platform/platform.h>
#include <executorch/runtime/platform/profiler.h>
#include <inttypes.h>
namespace executorch {
namespace runtime {
namespace {
static uint8_t prof_buf[prof_buf_size * MAX_PROFILE_BLOCKS];
// Base pointer for header
static prof_header_t* prof_header =
(prof_header_t*)((uintptr_t)prof_buf + prof_header_offset);
// Base pointer for profiling entries
static prof_event_t* prof_arr =
(prof_event_t*)((uintptr_t)prof_buf + prof_events_offset);
// Base pointer for memory allocator info array
static prof_allocator_t* mem_allocator_arr =
(prof_allocator_t*)((uintptr_t)prof_buf + prof_mem_alloc_info_offset);
// Base pointer for memory profiling entries
static mem_prof_event_t* mem_prof_arr =
(mem_prof_event_t*)((uintptr_t)prof_buf + prof_mem_alloc_events_offset);
static uint32_t num_blocks = 0;
static bool prof_stats_dumped = false;
prof_state_t profile_state_tls{-1, 0u};
} // namespace
const prof_state_t& get_profile_tls_state() {
return profile_state_tls;
}
void set_profile_tls_state(const prof_state_t& state) {
profile_state_tls = state;
}
ExecutorchProfilerInstructionScope::ExecutorchProfilerInstructionScope(
const prof_state_t& state)
: old_state_(get_profile_tls_state()) {
set_profile_tls_state(state);
}
ExecutorchProfilerInstructionScope::~ExecutorchProfilerInstructionScope() {
set_profile_tls_state(old_state_);
}
uint32_t begin_profiling(const char* name) {
ET_CHECK_MSG(
prof_header->prof_entries < MAX_PROFILE_EVENTS,
"Out of profiling buffer space. Increase MAX_PROFILE_EVENTS and re-compile.");
uint32_t curr_counter = prof_header->prof_entries;
prof_header->prof_entries++;
prof_arr[curr_counter].end_time = 0;
prof_arr[curr_counter].name_str = name;
prof_state_t state = get_profile_tls_state();
prof_arr[curr_counter].chain_idx = state.chain_idx;
prof_arr[curr_counter].instruction_idx = state.instruction_idx;
// Set start time at the last to ensure that we're not capturing
// any of the overhead in this function.
prof_arr[curr_counter].start_time = et_pal_current_ticks();
return curr_counter;
}
void end_profiling(uint32_t token_id) {
ET_CHECK_MSG(token_id < MAX_PROFILE_EVENTS, "Invalid token id.");
prof_arr[token_id].end_time = et_pal_current_ticks();
}
void dump_profile_stats(prof_result_t* prof_result) {
prof_result->prof_data = (uint8_t*)prof_buf;
prof_result->num_bytes = num_blocks * prof_buf_size;
prof_result->num_blocks = num_blocks;
if (!prof_stats_dumped) {
for (size_t i = 0; i < num_blocks; i++) {
prof_header_t* prof_header_local =
(prof_header_t*)(prof_buf + prof_buf_size * i);
prof_event_t* prof_event_local =
(prof_event_t*)(prof_buf + prof_buf_size * i + prof_events_offset);
// Copy over the string names into the space allocated in prof_event_t. We
// avoided doing this earlier to keep the overhead in begin_profiling and
// end_profiling as low as possible.
for (size_t j = 0; j < prof_header_local->prof_entries; j++) {
size_t str_len = strlen(prof_event_local[j].name_str);
const char* str_ptr = prof_event_local[j].name_str;
memset(prof_event_local[j].name, 0, PROF_NAME_MAX_LEN);
if (str_len > PROF_NAME_MAX_LEN) {
memcpy(prof_event_local[j].name, str_ptr, PROF_NAME_MAX_LEN);
} else {
memcpy(prof_event_local[j].name, str_ptr, str_len);
}
}
}
}
prof_stats_dumped = true;
}
void reset_profile_stats() {
prof_stats_dumped = false;
prof_header->prof_entries = 0;
prof_header->allocator_entries = 0;
prof_header->mem_prof_entries = 0;
}
void track_allocation(int32_t id, uint32_t size) {
if (id == -1)
return;
ET_CHECK_MSG(
prof_header->mem_prof_entries < MAX_MEM_PROFILE_EVENTS,
"Out of memory profiling buffer space. Increase MAX_MEM_PROFILE_EVENTS\
to %" PRIu32 " and re-compile.",
prof_header->mem_prof_entries);
mem_prof_arr[prof_header->mem_prof_entries].allocator_id = id;
mem_prof_arr[prof_header->mem_prof_entries].allocation_size = size;
prof_header->mem_prof_entries++;
}
uint32_t track_allocator(const char* name) {
ET_CHECK_MSG(
prof_header->allocator_entries < MEM_PROFILE_MAX_ALLOCATORS,
"Out of allocator tracking space, %" PRIu32
" is needed. Increase MEM_PROFILE_MAX_ALLOCATORS and re-compile",
prof_header->allocator_entries);
size_t str_len = strlen(name);
size_t num_allocators = prof_header->allocator_entries;
memset(mem_allocator_arr[num_allocators].name, 0, PROF_NAME_MAX_LEN);
if (str_len > PROF_NAME_MAX_LEN) {
memcpy(mem_allocator_arr[num_allocators].name, name, PROF_NAME_MAX_LEN);
} else {
memcpy(mem_allocator_arr[num_allocators].name, name, str_len);
}
mem_allocator_arr[num_allocators].allocator_id = num_allocators;
return prof_header->allocator_entries++;
}
void profiling_create_block(const char* name) {
// If the current profiling block is not used then continue to use this, if
// not move onto the next block.
if (prof_header->prof_entries != 0 || prof_header->mem_prof_entries != 0 ||
prof_header->allocator_entries != 0 || num_blocks == 0) {
num_blocks += 1;
ET_CHECK_MSG(
num_blocks <= MAX_PROFILE_BLOCKS,
"Only %d blocks are supported and they've all been used up but %" PRIu32
" is used. Increment MAX_PROFILE_BLOCKS and re-run",
MAX_PROFILE_BLOCKS,
num_blocks);
}
// Copy over the name of this profiling block.
size_t str_len =
strlen(name) >= PROF_NAME_MAX_LEN ? PROF_NAME_MAX_LEN : strlen(name);
uintptr_t base = (uintptr_t)prof_buf + (num_blocks - 1) * prof_buf_size;
prof_header = (prof_header_t*)(base + prof_header_offset);
memset(prof_header->name, 0, PROF_NAME_MAX_LEN);
memcpy(prof_header->name, name, str_len);
// Set profiler version for compatiblity checks in the post-processing
// tool.
prof_header->prof_ver = ET_PROF_VER;
// Set the maximum number of entries that this block can support.
prof_header->max_prof_entries = MAX_PROFILE_EVENTS;
prof_header->max_allocator_entries = MEM_PROFILE_MAX_ALLOCATORS;
prof_header->max_mem_prof_entries = MAX_MEM_PROFILE_EVENTS;
reset_profile_stats();
// Set the base addresses for the various profiling entries arrays.
prof_arr = (prof_event_t*)(base + prof_events_offset);
mem_allocator_arr = (prof_allocator_t*)(base + prof_mem_alloc_info_offset);
mem_prof_arr = (mem_prof_event_t*)(base + prof_mem_alloc_events_offset);
}
void profiler_init(void) {
profiling_create_block("default");
}
ExecutorchProfiler::ExecutorchProfiler(const char* name) {
prof_tok = begin_profiling(name);
}
ExecutorchProfiler::~ExecutorchProfiler() {
end_profiling(prof_tok);
}
} // namespace runtime
} // namespace executorch