Skip to content

Commit 7819fc4

Browse files
committed
Use c++ constructs to replace malloc calls in acl_kernel_if
1 parent 78dd354 commit 7819fc4

File tree

2 files changed

+34
-84
lines changed

2 files changed

+34
-84
lines changed

include/acl_kernel_if.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,13 @@ typedef struct {
3232

3333
// Accelerator details
3434
unsigned int num_accel;
35-
int volatile **accel_job_ids; //[num_accel][accel_invoc_queue_depth]
36-
int *accel_queue_front;
37-
int *accel_queue_back;
38-
acl_kernel_if_addr_range *accel_csr;
39-
acl_kernel_if_addr_range *accel_perf_mon;
40-
unsigned int *accel_num_printfs;
35+
std::vector<std::vector<int>>
36+
accel_job_ids; //[num_accel][accel_invoc_queue_depth]
37+
std::vector<int> accel_queue_front;
38+
std::vector<int> accel_queue_back;
39+
std::vector<acl_kernel_if_addr_range> accel_csr;
40+
std::vector<acl_kernel_if_addr_range> accel_perf_mon;
41+
std::vector<unsigned int> accel_num_printfs;
4142

4243
std::vector<std::optional<acl_streaming_kernel_control_info>>
4344
streaming_control_signal_names;
@@ -74,7 +75,7 @@ typedef struct {
7475
bool cra_ring_root_exist = false;
7576

7677
// Depth of hardware kernel invocation queue
77-
unsigned int *accel_invoc_queue_depth;
78+
std::vector<unsigned int> accel_invoc_queue_depth;
7879

7980
// Track which of the kernels is the autorun profiling kernel (-1 if none)
8081
int autorun_profiling_kernel_id;
@@ -85,7 +86,8 @@ typedef struct {
8586
// CRA address offset for backwards compatibility
8687
unsigned int cra_address_offset = 8;
8788

88-
char **accel_arg_cache;
89+
// Kernel argument cache for trackinig changed arguments
90+
std::vector<std::unique_ptr<char[]>> accel_arg_cache;
8991
} acl_kernel_if;
9092

9193
// *********************** Public functions **************************

src/acl_kernel_if.cpp

Lines changed: 24 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -634,10 +634,6 @@ int acl_kernel_if_init(acl_kernel_if *kern, acl_bsp_io bsp_io,
634634
kern->num_accel = 0;
635635
kern->cur_segment = 0xffffffff;
636636

637-
kern->accel_csr = NULL;
638-
kern->accel_perf_mon = NULL;
639-
kern->accel_num_printfs = NULL;
640-
641637
kern->autorun_profiling_kernel_id = -1;
642638

643639
if (check_version_id(kern) != 0) {
@@ -802,15 +798,9 @@ int acl_kernel_if_update(const acl_device_def_autodiscovery_t &devdef,
802798

803799
if (kern->num_accel > 0) {
804800
// Allocations for each kernel
805-
kern->accel_csr = (acl_kernel_if_addr_range *)acl_malloc(
806-
kern->num_accel * sizeof(acl_kernel_if_addr_range));
807-
assert(kern->accel_csr);
808-
kern->accel_perf_mon = (acl_kernel_if_addr_range *)acl_malloc(
809-
kern->num_accel * sizeof(acl_kernel_if_addr_range));
810-
assert(kern->accel_perf_mon);
811-
kern->accel_num_printfs =
812-
(unsigned int *)acl_malloc(kern->num_accel * sizeof(unsigned int));
813-
assert(kern->accel_num_printfs);
801+
kern->accel_csr.resize(kern->num_accel);
802+
kern->accel_perf_mon.resize(kern->num_accel);
803+
kern->accel_num_printfs.resize(kern->num_accel);
814804

815805
// The Kernel CSR registers
816806
// The new and improved config ROM give us the address *offsets* from
@@ -893,38 +883,26 @@ int acl_kernel_if_update(const acl_device_def_autodiscovery_t &devdef,
893883

894884
// Set up the structures to store state information about the device
895885
if (kern->num_accel > 0) {
896-
kern->accel_job_ids =
897-
(int volatile **)acl_malloc(kern->num_accel * sizeof(int *));
898-
assert(kern->accel_job_ids);
899-
900-
kern->accel_invoc_queue_depth =
901-
(unsigned int *)acl_malloc(kern->num_accel * sizeof(unsigned int));
902-
assert(kern->accel_invoc_queue_depth);
903-
904-
kern->accel_arg_cache =
905-
(char **)acl_malloc(kern->num_accel * sizeof(char *));
906-
assert(kern->accel_arg_cache);
886+
kern->accel_job_ids.resize(kern->num_accel);
887+
kern->accel_invoc_queue_depth.resize(kern->num_accel);
888+
kern->accel_arg_cache.resize(kern->num_accel);
907889

908890
// Kernel IRQ is a separate thread. Need to use circular buffer to make this
909891
// multithread safe.
910-
kern->accel_queue_front = (int *)acl_malloc(kern->num_accel * sizeof(int));
911-
assert(kern->accel_queue_front);
912-
kern->accel_queue_back = (int *)acl_malloc(kern->num_accel * sizeof(int));
913-
assert(kern->accel_queue_back);
892+
kern->accel_queue_front.resize(kern->num_accel);
893+
kern->accel_queue_back.resize(kern->num_accel);
914894

915895
for (unsigned a = 0; a < kern->num_accel; ++a) {
916896
unsigned int max_same_accel_launches =
917897
devdef.accel[a].fast_launch_depth + 1;
918898
// +1, because fast launch depth does not account for the running kernel
919899
kern->accel_invoc_queue_depth[a] = max_same_accel_launches;
920-
kern->accel_job_ids[a] =
921-
(int *)acl_malloc(max_same_accel_launches * sizeof(int));
900+
kern->accel_job_ids[a].resize(max_same_accel_launches);
922901
kern->accel_queue_front[a] = -1;
923902
kern->accel_queue_back[a] = -1;
924903
for (unsigned b = 0; b < max_same_accel_launches; ++b) {
925904
kern->accel_job_ids[a][b] = -1;
926905
}
927-
kern->accel_arg_cache[a] = nullptr;
928906
}
929907
}
930908

@@ -1187,22 +1165,24 @@ void acl_kernel_if_launch_kernel_on_custom_sof(
11871165

11881166
if (kern->csr_version.has_value() &&
11891167
(kern->csr_version != CSR_VERSION_ID_18_1 && image->arg_value_size > 0)) {
1190-
if (kern->accel_arg_cache[accel_id] == nullptr) {
1168+
if (!kern->accel_arg_cache[accel_id]) {
11911169
acl_kernel_cra_write_block(
11921170
kern, accel_id, offset + (unsigned int)image_size_static,
11931171
(unsigned int *)image->arg_value, image->arg_value_size);
11941172
kern->accel_arg_cache[accel_id] =
1195-
(char *)acl_malloc(image->arg_value_size);
1196-
memcpy(kern->accel_arg_cache[accel_id], (char *)image->arg_value,
1173+
std::make_unique<char[]>(image->arg_value_size);
1174+
memcpy(kern->accel_arg_cache[accel_id].get(), (char *)image->arg_value,
11971175
image->arg_value_size);
11981176
} else {
1177+
char *arg_cache_ptr = kern->accel_arg_cache[accel_id].get();
1178+
assert(arg_cache_ptr && "kernel argument cache not initialized!");
11991179
for (size_t step = 0; step < image->arg_value_size;) {
12001180
size_t size_to_write = 0;
12011181
size_t cmp_size = (image->arg_value_size - step) > sizeof(int)
12021182
? sizeof(int)
12031183
: (image->arg_value_size - step);
12041184
while (cmp_size > 0 &&
1205-
memcmp(kern->accel_arg_cache[accel_id] + step + size_to_write,
1185+
memcmp(arg_cache_ptr + step + size_to_write,
12061186
image->arg_value + step + size_to_write, cmp_size) != 0) {
12071187
size_to_write += cmp_size;
12081188
cmp_size =
@@ -1220,8 +1200,7 @@ void acl_kernel_if_launch_kernel_on_custom_sof(
12201200
}
12211201
}
12221202
// image->arg_value_size should not change
1223-
memcpy(kern->accel_arg_cache[accel_id], (char *)image->arg_value,
1224-
image->arg_value_size);
1203+
memcpy(arg_cache_ptr, (char *)image->arg_value, image->arg_value_size);
12251204
}
12261205
}
12271206

@@ -1709,45 +1688,14 @@ void acl_kernel_if_unstall_kernel(acl_kernel_if *kern, int activation_id) {
17091688

17101689
void acl_kernel_if_close(acl_kernel_if *kern) {
17111690
acl_assert_locked();
1712-
// De-Allocations for each kernel
1713-
if (kern->accel_csr)
1714-
acl_free(kern->accel_csr);
1715-
if (kern->accel_perf_mon)
1716-
acl_free(kern->accel_perf_mon);
1717-
if (kern->accel_num_printfs)
1718-
acl_free(kern->accel_num_printfs);
1719-
if (kern->accel_job_ids) {
1720-
for (unsigned int a = 0; a < kern->num_accel; a++) {
1721-
if (kern->accel_job_ids[a]) {
1722-
acl_free((void *)kern->accel_job_ids[a]);
1723-
kern->accel_job_ids[a] = NULL;
1724-
}
1725-
}
1726-
acl_free((void *)kern->accel_job_ids);
1727-
}
1728-
if (kern->accel_invoc_queue_depth)
1729-
acl_free(kern->accel_invoc_queue_depth);
1730-
if (kern->accel_queue_front)
1731-
acl_free(kern->accel_queue_front);
1732-
if (kern->accel_queue_back)
1733-
acl_free(kern->accel_queue_back);
1734-
if (kern->accel_arg_cache) {
1735-
for (unsigned int a = 0; a < kern->num_accel; a++) {
1736-
if (kern->accel_arg_cache[a]) {
1737-
acl_free((void *)kern->accel_arg_cache[a]);
1738-
kern->accel_arg_cache[a] = NULL;
1739-
}
1740-
}
1741-
acl_free((void *)kern->accel_arg_cache);
1742-
}
1743-
kern->accel_csr = NULL;
1744-
kern->accel_perf_mon = NULL;
1745-
kern->accel_num_printfs = NULL;
1746-
kern->accel_job_ids = NULL;
1747-
kern->accel_invoc_queue_depth = NULL;
1748-
kern->accel_queue_front = NULL;
1749-
kern->accel_queue_back = NULL;
1750-
kern->accel_arg_cache = NULL;
1691+
kern->accel_csr.clear();
1692+
kern->accel_perf_mon.clear();
1693+
kern->accel_num_printfs.clear();
1694+
kern->accel_job_ids.clear();
1695+
kern->accel_invoc_queue_depth.clear();
1696+
kern->accel_queue_front.clear();
1697+
kern->accel_queue_back.clear();
1698+
kern->accel_arg_cache.clear();
17511699
kern->autorun_profiling_kernel_id = -1;
17521700
}
17531701

0 commit comments

Comments
 (0)