Skip to content

Commit 2a807f0

Browse files
committed
Only write changing parts of kernel arguments to kernel CRA
1 parent 29bf7b7 commit 2a807f0

File tree

2 files changed

+52
-3
lines changed

2 files changed

+52
-3
lines changed

include/acl_kernel_if.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ typedef struct {
8484

8585
// CRA address offset for backwards compatibility
8686
unsigned int cra_address_offset = 8;
87+
88+
char **kernel_arg_cache;
8789
} acl_kernel_if;
8890

8991
// *********************** Public functions **************************

src/acl_kernel_if.cpp

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,10 @@ int acl_kernel_if_update(const acl_device_def_autodiscovery_t &devdef,
901901
(unsigned int *)acl_malloc(kern->num_accel * sizeof(unsigned int));
902902
assert(kern->accel_invoc_queue_depth);
903903

904+
kern->kernel_arg_cache =
905+
(char **)acl_malloc(kern->num_accel * sizeof(char *));
906+
assert(kern->kernel_arg_cache);
907+
904908
// Kernel IRQ is a separate thread. Need to use circular buffer to make this
905909
// multithread safe.
906910
kern->accel_queue_front = (int *)acl_malloc(kern->num_accel * sizeof(int));
@@ -920,6 +924,7 @@ int acl_kernel_if_update(const acl_device_def_autodiscovery_t &devdef,
920924
for (unsigned b = 0; b < max_same_accel_launches; ++b) {
921925
kern->accel_job_ids[a][b] = -1;
922926
}
927+
kern->kernel_arg_cache[a] = nullptr;
923928
}
924929
}
925930

@@ -1182,9 +1187,42 @@ void acl_kernel_if_launch_kernel_on_custom_sof(
11821187

11831188
if (kern->csr_version.has_value() &&
11841189
(kern->csr_version != CSR_VERSION_ID_18_1 && image->arg_value_size > 0)) {
1185-
acl_kernel_cra_write_block(
1186-
kern, accel_id, offset + (unsigned int)image_size_static,
1187-
(unsigned int *)image->arg_value, image->arg_value_size);
1190+
if (kern->kernel_arg_cache[accel_id] == nullptr) {
1191+
acl_kernel_cra_write_block(
1192+
kern, accel_id, offset + (unsigned int)image_size_static,
1193+
(unsigned int *)image->arg_value, image->arg_value_size);
1194+
kern->kernel_arg_cache[accel_id] =
1195+
(char *)acl_malloc(image->arg_value_size);
1196+
memcpy(kern->kernel_arg_cache[accel_id], (char *)image->arg_value,
1197+
image->arg_value_size);
1198+
} else {
1199+
for (size_t step = 0; step < image->arg_value_size;) {
1200+
size_t size_to_write = 0;
1201+
size_t cmp_size = (image->arg_value_size - step) > sizeof(int)
1202+
? sizeof(int)
1203+
: (image->arg_value_size - step);
1204+
while (cmp_size > 0 &&
1205+
memcmp(kern->kernel_arg_cache[accel_id] + step + size_to_write,
1206+
image->arg_value + step + size_to_write, cmp_size) != 0) {
1207+
size_to_write += cmp_size;
1208+
cmp_size =
1209+
(image->arg_value_size - step - size_to_write) > sizeof(int)
1210+
? sizeof(int)
1211+
: (image->arg_value_size - step - size_to_write);
1212+
}
1213+
if (size_to_write == 0) {
1214+
step += (unsigned)sizeof(int);
1215+
} else {
1216+
acl_kernel_cra_write_block(
1217+
kern, accel_id, offset + (unsigned int)(image_size_static + step),
1218+
(unsigned int *)(image->arg_value + step), size_to_write);
1219+
step += size_to_write;
1220+
}
1221+
}
1222+
// image->arg_value_size should not change
1223+
memcpy(kern->kernel_arg_cache[accel_id], (char *)image->arg_value,
1224+
image->arg_value_size);
1225+
}
11881226
}
11891227

11901228
kern->accel_job_ids[accel_id][next_launch_index] = (int)activation_id;
@@ -1693,6 +1731,15 @@ void acl_kernel_if_close(acl_kernel_if *kern) {
16931731
acl_free(kern->accel_queue_front);
16941732
if (kern->accel_queue_back)
16951733
acl_free(kern->accel_queue_back);
1734+
if (kern->kernel_arg_cache) {
1735+
for (unsigned int a = 0; a < kern->num_accel; a++) {
1736+
if (kern->kernel_arg_cache[a]) {
1737+
acl_free((void *)kern->kernel_arg_cache[a]);
1738+
kern->kernel_arg_cache[a] = NULL;
1739+
}
1740+
}
1741+
acl_free((void *)kern->kernel_arg_cache);
1742+
}
16961743
kern->accel_csr = NULL;
16971744
kern->accel_perf_mon = NULL;
16981745
kern->accel_num_printfs = NULL;

0 commit comments

Comments
 (0)