@@ -901,6 +901,10 @@ int acl_kernel_if_update(const acl_device_def_autodiscovery_t &devdef,
901
901
(unsigned int *)acl_malloc (kern->num_accel * sizeof (unsigned int ));
902
902
assert (kern->accel_invoc_queue_depth );
903
903
904
+ kern->kernel_arg_cache =
905
+ (char **)acl_malloc (kern->num_accel * sizeof (char *));
906
+ assert (kern->kernel_arg_cache );
907
+
904
908
// Kernel IRQ is a separate thread. Need to use circular buffer to make this
905
909
// multithread safe.
906
910
kern->accel_queue_front = (int *)acl_malloc (kern->num_accel * sizeof (int ));
@@ -920,6 +924,7 @@ int acl_kernel_if_update(const acl_device_def_autodiscovery_t &devdef,
920
924
for (unsigned b = 0 ; b < max_same_accel_launches; ++b) {
921
925
kern->accel_job_ids [a][b] = -1 ;
922
926
}
927
+ kern->kernel_arg_cache [a] = nullptr ;
923
928
}
924
929
}
925
930
@@ -1182,9 +1187,42 @@ void acl_kernel_if_launch_kernel_on_custom_sof(
1182
1187
1183
1188
if (kern->csr_version .has_value () &&
1184
1189
(kern->csr_version != CSR_VERSION_ID_18_1 && image->arg_value_size > 0 )) {
1185
- acl_kernel_cra_write_block (
1186
- kern, accel_id, offset + (unsigned int )image_size_static,
1187
- (unsigned int *)image->arg_value , image->arg_value_size );
1190
+ if (kern->kernel_arg_cache [accel_id] == nullptr ) {
1191
+ acl_kernel_cra_write_block (
1192
+ kern, accel_id, offset + (unsigned int )image_size_static,
1193
+ (unsigned int *)image->arg_value , image->arg_value_size );
1194
+ kern->kernel_arg_cache [accel_id] =
1195
+ (char *)acl_malloc (image->arg_value_size );
1196
+ memcpy (kern->kernel_arg_cache [accel_id], (char *)image->arg_value ,
1197
+ image->arg_value_size );
1198
+ } else {
1199
+ for (size_t step = 0 ; step < image->arg_value_size ;) {
1200
+ size_t size_to_write = 0 ;
1201
+ size_t cmp_size = (image->arg_value_size - step) > sizeof (int )
1202
+ ? sizeof (int )
1203
+ : (image->arg_value_size - step);
1204
+ while (cmp_size > 0 &&
1205
+ memcmp (kern->kernel_arg_cache [accel_id] + step + size_to_write,
1206
+ image->arg_value + step + size_to_write, cmp_size) != 0 ) {
1207
+ size_to_write += cmp_size;
1208
+ cmp_size =
1209
+ (image->arg_value_size - step - size_to_write) > sizeof (int )
1210
+ ? sizeof (int )
1211
+ : (image->arg_value_size - step - size_to_write);
1212
+ }
1213
+ if (size_to_write == 0 ) {
1214
+ step += (unsigned )sizeof (int );
1215
+ } else {
1216
+ acl_kernel_cra_write_block (
1217
+ kern, accel_id, offset + (unsigned int )(image_size_static + step),
1218
+ (unsigned int *)(image->arg_value + step), size_to_write);
1219
+ step += size_to_write;
1220
+ }
1221
+ }
1222
+ // image->arg_value_size should not change
1223
+ memcpy (kern->kernel_arg_cache [accel_id], (char *)image->arg_value ,
1224
+ image->arg_value_size );
1225
+ }
1188
1226
}
1189
1227
1190
1228
kern->accel_job_ids [accel_id][next_launch_index] = (int )activation_id;
@@ -1693,6 +1731,15 @@ void acl_kernel_if_close(acl_kernel_if *kern) {
1693
1731
acl_free (kern->accel_queue_front );
1694
1732
if (kern->accel_queue_back )
1695
1733
acl_free (kern->accel_queue_back );
1734
+ if (kern->kernel_arg_cache ) {
1735
+ for (unsigned int a = 0 ; a < kern->num_accel ; a++) {
1736
+ if (kern->kernel_arg_cache [a]) {
1737
+ acl_free ((void *)kern->kernel_arg_cache [a]);
1738
+ kern->kernel_arg_cache [a] = NULL ;
1739
+ }
1740
+ }
1741
+ acl_free ((void *)kern->kernel_arg_cache );
1742
+ }
1696
1743
kern->accel_csr = NULL ;
1697
1744
kern->accel_perf_mon = NULL ;
1698
1745
kern->accel_num_printfs = NULL ;
0 commit comments