4
4
* LICENSE file in the root directory of this source tree.
5
5
*/
6
6
7
+ #include < array>
7
8
#include < cinttypes>
8
- #include < vector>
9
9
10
10
#include " arm_perf_monitor.h"
11
11
14
14
#include < executorch/runtime/platform/log.h>
15
15
#include < pmu_ethosu.h>
16
16
17
- static uint32_t ethosu_inference_count = 0 ;
18
- static uint64_t ethosu_ArmCycleCountStart = 0 ;
19
- static uint64_t ethosu_ArmBackendExecuteCycleCountStart = 0 ;
20
- static uint64_t ethosu_ArmBackendExecuteCycleCount = 0 ;
21
- static uint64_t ethosu_ArmWhenNPURunCycleCountStart = 0 ;
22
- static uint64_t ethosu_ArmWhenNPURunCycleCount = 0 ;
23
- static uint64_t ethosu_pmuCycleCount = 0 ;
24
- static std::vector<uint64_t > ethosu_pmuEventCounts (
25
- ETHOSU_PMU_Get_NumEventCounters (),
26
- 0);
17
+ namespace {
27
18
28
19
#if defined(ETHOSU55) || defined(ETHOSU65)
29
- static const uint32_t ethosu_pmuCountersUsed = 4 ;
20
+ const uint32_t ethosu_pmuCountersUsed = 4 ;
30
21
#elif defined(ETHOSU85)
31
- static const uint32_t ethosu_pmuCountersUsed = 5 ;
22
+ const uint32_t ethosu_pmuCountersUsed = 5 ;
32
23
#else
33
24
#error No NPU target defined
34
25
#endif
35
26
27
+ uint32_t ethosu_delegation_count = 0 ;
28
+ uint64_t ethosu_ArmCycleCountStart = 0 ;
29
+ uint64_t ethosu_ArmBackendExecuteCycleCountStart = 0 ;
30
+ uint64_t ethosu_ArmBackendExecuteCycleCount = 0 ;
31
+ uint64_t ethosu_ArmWhenNPURunCycleCountStart = 0 ;
32
+ uint64_t ethosu_ArmWhenNPURunCycleCount = 0 ;
33
+ uint64_t ethosu_pmuCycleCount = 0 ;
34
+ std::array<uint64_t , ethosu_pmuCountersUsed> ethosu_pmuEventCounts = {0 };
35
+
36
36
// ethosu_pmuCountersUsed should match numbers of counters setup in
37
37
// ethosu_inference_begin() and not be more then the HW supports
38
38
static_assert (ETHOSU_PMU_NCOUNTERS >= ethosu_pmuCountersUsed);
39
39
40
+ } // namespace
41
+
40
42
extern " C" {
41
43
42
44
// Callback invoked at start of NPU execution
@@ -85,7 +87,7 @@ void ethosu_inference_begin(struct ethosu_driver* drv, void*) {
85
87
86
88
// Callback invoked at end of NPU execution
87
89
void ethosu_inference_end (struct ethosu_driver * drv, void *) {
88
- ethosu_inference_count ++;
90
+ ethosu_delegation_count ++;
89
91
ethosu_pmuCycleCount += ETHOSU_PMU_Get_CCNTR (drv);
90
92
91
93
for (size_t i = 0 ; i < ethosu_pmuCountersUsed; i++) {
@@ -113,6 +115,7 @@ void EthosUBackend_execute_end() {
113
115
}
114
116
115
117
void StartMeasurements () {
118
+ ethosu_delegation_count = 0 ;
116
119
ethosu_ArmBackendExecuteCycleCount = 0 ;
117
120
ethosu_ArmWhenNPURunCycleCount = 0 ;
118
121
ethosu_pmuCycleCount = 0 ;
@@ -123,32 +126,43 @@ void StartMeasurements() {
123
126
ethosu_ArmCycleCountStart = ARM_PMU_Get_CCNTR ();
124
127
}
125
128
126
- void StopMeasurements () {
129
+ void StopMeasurements (int num_inferences ) {
127
130
ARM_PMU_CNTR_Disable (
128
131
PMU_CNTENCLR_CCNTR_ENABLE_Msk | PMU_CNTENCLR_CNT0_ENABLE_Msk |
129
132
PMU_CNTENCLR_CNT1_ENABLE_Msk);
130
133
uint32_t cycle_count = ARM_PMU_Get_CCNTR () - ethosu_ArmCycleCountStart;
131
134
132
135
// Number of comand streams handled by the NPU
133
- ET_LOG (Info, " NPU Inferences : %d" , ethosu_inference_count);
136
+ ET_LOG (Info, " NPU Inferences : %d" , num_inferences);
137
+ ET_LOG (
138
+ Info,
139
+ " NPU delegations: %d (%.2f per inference)" ,
140
+ ethosu_delegation_count,
141
+ (double )ethosu_delegation_count / num_inferences);
134
142
ET_LOG (Info, " Profiler report, CPU cycles per operator:" );
135
143
// This is number of CPU cycles for the ethos-u operator from start to finish
136
144
// in the framework If there is more then one commandstream the time is added
137
145
// together
138
146
ET_LOG (
139
147
Info,
140
- " ethos-u : cycle_cnt : %d cycles" ,
141
- ethosu_ArmBackendExecuteCycleCount);
148
+ " ethos-u : cycle_cnt : %d cycles (%.2f per inference)" ,
149
+ ethosu_ArmBackendExecuteCycleCount,
150
+ (double )ethosu_ArmBackendExecuteCycleCount / num_inferences);
142
151
// We could print a list of the cycles used by the other delegates here in the
143
152
// future but now we only print ethos-u: this means that "Operator(s) total:
144
153
// ..." will be the same number as ethos-u : cycle_cnt and not the sum of all
145
154
ET_LOG (
146
155
Info,
147
- " Operator(s) total: %d CPU cycles" ,
148
- ethosu_ArmBackendExecuteCycleCount);
156
+ " Operator(s) total: %d CPU cycles (%.2f per inference)" ,
157
+ ethosu_ArmBackendExecuteCycleCount,
158
+ (double )ethosu_ArmBackendExecuteCycleCount / num_inferences);
149
159
// Total CPU cycles used in the executorch method->execute()
150
160
// Other delegates and no delegates are counted in this
151
- ET_LOG (Info, " Inference runtime: %d CPU cycles total" , cycle_count);
161
+ ET_LOG (
162
+ Info,
163
+ " Inference runtime: %d CPU cycles total (%.2f per inference)" ,
164
+ cycle_count,
165
+ (double )cycle_count / num_inferences);
152
166
153
167
ET_LOG (
154
168
Info,
@@ -174,14 +188,24 @@ void StopMeasurements() {
174
188
// If there is more then one commandstream the time is added together
175
189
ET_LOG (
176
190
Info,
177
- " cpu_wait_for_npu_cntr : %" PRIu64 " CPU cycles" ,
178
- ethosu_ArmWhenNPURunCycleCount);
191
+ " cpu_wait_for_npu_cntr : %" PRIu64 " CPU cycles (%.2f per inference)" ,
192
+ ethosu_ArmWhenNPURunCycleCount,
193
+ (double )ethosu_ArmWhenNPURunCycleCount / num_inferences);
179
194
180
195
ET_LOG (Info, " Ethos-U PMU report:" );
181
- ET_LOG (Info, " ethosu_pmu_cycle_cntr : %" PRIu64, ethosu_pmuCycleCount);
196
+ ET_LOG (
197
+ Info,
198
+ " ethosu_pmu_cycle_cntr : % " PRIu64 " (%.2f per inference)" ,
199
+ ethosu_pmuCycleCount,
200
+ (double )ethosu_pmuCycleCount / num_inferences);
182
201
183
202
for (size_t i = 0 ; i < ethosu_pmuCountersUsed; i++) {
184
- ET_LOG (Info, " ethosu_pmu_cntr%zd : %" PRIu64, i, ethosu_pmuEventCounts[i]);
203
+ ET_LOG (
204
+ Info,
205
+ " ethosu_pmu_cntr%zd : %" PRIu64 " (%.2f per inference)" ,
206
+ i,
207
+ ethosu_pmuEventCounts[i],
208
+ (double )ethosu_pmuEventCounts[i] / num_inferences);
185
209
}
186
210
#if defined(ETHOSU55) || defined(ETHOSU65)
187
211
ET_LOG (
@@ -199,6 +223,8 @@ void StopMeasurements() {
199
223
#else
200
224
void StartMeasurements () {}
201
225
202
- void StopMeasurements () {}
226
+ void StopMeasurements (int num_inferences) {
227
+ (void )num_inferences;
228
+ }
203
229
204
230
#endif
0 commit comments