Skip to content

Commit 11e307f

Browse files
committed
Set default values for kernel image static part and skip CSR write if no change
1 parent 425a29b commit 11e307f

File tree

3 files changed

+39
-3
lines changed

3 files changed

+39
-3
lines changed

include/acl_kernel_if.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ typedef struct {
8686
// CRA address offset for backwards compatibility
8787
unsigned int cra_address_offset = 8;
8888

89+
// Kernel static image cache for trackinig changed work dimensions, etc.
90+
std::vector<std::unique_ptr<char[]>> static_img_cache;
8991
// Kernel argument cache for trackinig changed arguments
9092
std::vector<std::unique_ptr<char[]>> accel_arg_cache;
9193
} acl_kernel_if;

include/acl_types.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ class acl_device_program_info_t {
442442
// don't expect it.
443443
#pragma pack(push, 4)
444444
// These are the bytes written to global memory for a kernel invocation.
445-
typedef struct {
445+
typedef struct acl_dev_kernel_invocation_image {
446446
// The activation_id is the index into the device op queue.
447447
// The value at acl_platform.device_op_queue[activation_id] will be
448448
// updated asynchronously by the HAL, so its address must remain stable.
@@ -486,6 +486,19 @@ typedef struct {
486486
char *arg_value;
487487
size_t arg_value_size;
488488

489+
// Define constructor to initialize the invocation image to default values
490+
// Hard code for now
491+
acl_dev_kernel_invocation_image()
492+
: activation_id(0), accel_id(0), work_dim(1), work_group_size(1),
493+
padding(0), arg_value(NULL), arg_value_size(0) {
494+
for (unsigned i = 0; i < 3; ++i) {
495+
global_work_size[i] = 1;
496+
num_groups[i] = 1;
497+
local_work_size[i] = 1;
498+
global_work_offset[i] = 0;
499+
}
500+
}
501+
489502
} acl_dev_kernel_invocation_image_t;
490503

491504
// Invocation image structure that matches the 18.1 CRA layout.

src/acl_kernel_if.cpp

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -879,14 +879,23 @@ int acl_kernel_if_update(const acl_device_def_autodiscovery_t &devdef,
879879
if (kern->num_accel > 0) {
880880
kern->accel_job_ids.resize(kern->num_accel);
881881
kern->accel_invoc_queue_depth.resize(kern->num_accel);
882+
kern->static_img_cache.resize(kern->num_accel);
882883
kern->accel_arg_cache.resize(kern->num_accel);
883884

884885
// Kernel IRQ is a separate thread. Need to use circular buffer to make this
885886
// multithread safe.
886887
kern->accel_queue_front.resize(kern->num_accel);
887888
kern->accel_queue_back.resize(kern->num_accel);
888889

890+
acl_dev_kernel_invocation_image_t default_invocation;
891+
size_t image_size_static =
892+
(size_t)((uintptr_t) & (default_invocation.arg_value) - (uintptr_t) &
893+
(default_invocation.work_dim));
894+
889895
for (unsigned a = 0; a < kern->num_accel; ++a) {
896+
kern->static_img_cache[a] = std::make_unique<char[]>(image_size_static);
897+
memcpy(kern->static_img_cache[a].get(),
898+
(char *)(&(default_invocation.work_dim)), image_size_static);
890899
unsigned int max_same_accel_launches =
891900
devdef.accel[a].fast_launch_depth + 1;
892901
// +1, because fast launch depth does not account for the running kernel
@@ -1153,8 +1162,19 @@ void acl_kernel_if_launch_kernel_on_custom_sof(
11531162
// it is in dynamic memory. Only write the static part of the invocation
11541163
// image if this kernel uses CRA control.
11551164
if (!kern->streaming_control_signal_names[accel_id]) {
1156-
acl_kernel_cra_write_block(kern, accel_id, offset, (unsigned int *)image_p,
1157-
image_size_static);
1165+
if (kern->csr_version == CSR_VERSION_ID_18_1) {
1166+
// Just write everything for older CSR version
1167+
acl_kernel_cra_write_block(kern, accel_id, offset,
1168+
(unsigned int *)image_p, image_size_static);
1169+
} else {
1170+
char *img_cache_ptr = kern->static_img_cache[accel_id].get();
1171+
assert(img_cache_ptr && "kernel image cache not initialized!");
1172+
if (memcmp(img_cache_ptr, (char *)image_p, image_size_static) != 0) {
1173+
acl_kernel_cra_write_block(kern, accel_id, offset,
1174+
(unsigned int *)image_p, image_size_static);
1175+
memcpy(img_cache_ptr, (char *)image_p, image_size_static);
1176+
}
1177+
}
11581178
}
11591179

11601180
bool accel_has_agent_args = false;
@@ -1692,6 +1712,7 @@ void acl_kernel_if_close(acl_kernel_if *kern) {
16921712
kern->accel_invoc_queue_depth.clear();
16931713
kern->accel_queue_front.clear();
16941714
kern->accel_queue_back.clear();
1715+
kern->static_img_cache.clear();
16951716
kern->accel_arg_cache.clear();
16961717
kern->autorun_profiling_kernel_id = -1;
16971718
}

0 commit comments

Comments
 (0)