diff --git a/include/acl.h b/include/acl.h index 73215939..5f191ad9 100644 --- a/include/acl.h +++ b/include/acl.h @@ -136,6 +136,11 @@ typedef enum { } acl_kernel_arg_access_qualifier_t; // this is defaulted to none, for non-pipe // and non-image args. +struct acl_streaming_kernel_arg_info { + // name of the streaming interface at device image boundary + std::string interface_name; +}; + // This defines everything "interface" of a kernel argument. // Be sure to keep this consistent with l_kernel_interface_match() in // acl_kernel.cpp. This struct must remain trivially copyable. @@ -170,6 +175,9 @@ typedef struct { // allowed, e.g., "struct mystruct" std::string type_name; std::string name; + + bool streaming_arg_info_available; + acl_streaming_kernel_arg_info streaming_arg_info; } acl_kernel_arg_info_t; // This struct must remain trivially copyable. @@ -231,6 +239,8 @@ typedef struct { fast_launch_depth; /* How many kernels can be buffered on the device, 0 means no buffering just one can execute*/ unsigned int is_sycl_compile; /* [1] SYCL compile; [0] OpenCL compile*/ + + bool streaming_control_info_available; } acl_accel_def_t; /* An ACL system definition. diff --git a/include/acl_hal.h b/include/acl_hal.h index e0fb58e3..4e65cf36 100644 --- a/include/acl_hal.h +++ b/include/acl_hal.h @@ -238,6 +238,12 @@ typedef struct { /// Allocate USM shared memory void *(*shared_alloc)(cl_device_id device, size_t size, size_t alignment, mem_properties_t *properties, int *error); + + void (*simulation_streaming_kernel_start)(unsigned int physical_device_id, + const std::string &kernel_name); + void (*simulation_streaming_kernel_done)(unsigned int physical_device_id, + const std::string &kernel_name, + unsigned int &finish_counter); } acl_hal_t; /// Linked list of MMD library names to load. diff --git a/include/acl_hal_mmd.h b/include/acl_hal_mmd.h index a16d90f4..beadd7fd 100644 --- a/include/acl_hal_mmd.h +++ b/include/acl_hal_mmd.h @@ -33,6 +33,17 @@ typedef struct { unsigned long long size; /* size of this memory */ } aocl_mmd_memory_info_t; +// Interface to simulator to describe streaming kernel arguments that are +// excluded from the invocation image. Streaming arguments are passed to the +// simulator by calling aocl_mmd_simulation_streaming_kernel_args(), before +// writing the kernel invocation image containing non-streaming arguments. +struct aocl_mmd_streaming_kernel_arg_info_t { + // unique identifier for the bus-functional model (BFM) + std::string name; + // argument value + std::vector value; +}; + // MMD Version checking // Since MMD version changes only with major releases it is safe to assume // this is a float with at most one decimal @@ -120,6 +131,24 @@ typedef struct { int *error); double mmd_version; + + // Passes streaming kernel argument names and values to simulator. + void (*aocl_mmd_simulation_streaming_kernel_args)( + int handle, + const std::vector &streaming_args); + + // Submits streaming kernel control start signal to simulator. + void (*aocl_mmd_simulation_streaming_kernel_start)( + int handle, const std::string &kernel_name); + + // Queries streaming kernel control done signal from simulator. + // Returns non-negative number of finished kernels invocations. + // + // It is the responsibility of the simulator to ensure that any kernel + // invocations that finish *while* this function is invoked are properly + // accounted and returned in a subsequent invocation of this function. + void (*aocl_mmd_simulation_streaming_kernel_done)( + int handle, const std::string &kernel_name, unsigned int &finish_counter); } acl_mmd_dispatch_t; typedef struct { diff --git a/include/acl_kernel_if.h b/include/acl_kernel_if.h index 43e363e0..51ddcf8b 100644 --- a/include/acl_kernel_if.h +++ b/include/acl_kernel_if.h @@ -12,6 +12,10 @@ #include "acl_hal.h" #include "acl_types.h" +#include +#include +#include + #ifdef __cplusplus extern "C" { #endif @@ -35,6 +39,8 @@ typedef struct { acl_kernel_if_addr_range *accel_perf_mon; unsigned int *accel_num_printfs; + std::vector> streaming_control_kernel_names; + // Track potential hangs time_ns last_kern_update; diff --git a/include/acl_types.h b/include/acl_types.h index 0536838e..28685ae2 100644 --- a/include/acl_types.h +++ b/include/acl_types.h @@ -23,6 +23,7 @@ #include "acl.h" #include "acl_device_binary.h" #include "acl_hal.h" +#include "acl_hal_mmd.h" #include "acl_icd_dispatch.h" #if defined(__cplusplus) @@ -446,6 +447,7 @@ typedef struct acl_kernel_invocation_wrapper_t { acl_dev_kernel_invocation_image_t *image; acl_dev_kernel_invocation_image_t image_storage; // What image points to. + std::vector streaming_args; } acl_kernel_invocation_wrapper_t; typedef struct { diff --git a/src/acl_auto_configure.cpp b/src/acl_auto_configure.cpp index 8102d405..a682b5f5 100644 --- a/src/acl_auto_configure.cpp +++ b/src/acl_auto_configure.cpp @@ -504,6 +504,24 @@ static bool read_device_global_mem_defs( return result; } +static bool read_streaming_kernel_arg_info( + const std::string &config_str, std::string::size_type &curr_pos, + bool &streaming_arg_info_available, + acl_streaming_kernel_arg_info &streaming_arg_info, + std::vector &counters) noexcept { + unsigned int value = 0; + bool result = read_uint_counters(config_str, curr_pos, value, counters); + streaming_arg_info_available = value; + + if (result && streaming_arg_info_available) { + streaming_arg_info = acl_streaming_kernel_arg_info{}; + result = read_string_counters(config_str, curr_pos, + streaming_arg_info.interface_name, counters); + } + + return result; +} + static bool read_kernel_args(const std::string &config_str, const bool kernel_arg_info_available, std::string::size_type &curr_pos, @@ -597,6 +615,14 @@ static bool read_kernel_args(const std::string &config_str, type_name = ""; } + bool streaming_arg_info_available = false; + acl_streaming_kernel_arg_info streaming_arg_info; + if (result && counters.back() > 0) { + result = read_streaming_kernel_arg_info(config_str, curr_pos, + streaming_arg_info_available, + streaming_arg_info, counters); + } + /***************************************************************** Since the introduction of autodiscovery forwards-compatibility, new entries for each kernel argument section start here. @@ -619,6 +645,8 @@ static bool read_kernel_args(const std::string &config_str, args[j].host_accessible = host_accessible; args[j].pipe_channel_id = pipe_channel_id; args[j].buffer_location = buffer_location; + args[j].streaming_arg_info_available = streaming_arg_info_available; + args[j].streaming_arg_info = streaming_arg_info; } // forward compatibility: bypassing remaining fields at the end of // arguments section @@ -635,6 +663,18 @@ static bool read_kernel_args(const std::string &config_str, return result; } +static bool +read_streaming_kernel_control_info(const std::string &config_str, + std::string::size_type &curr_pos, + bool &streaming_control_info_available, + std::vector &counters) noexcept { + unsigned int value = 0; + bool result = read_uint_counters(config_str, curr_pos, value, counters); + streaming_control_info_available = value; + + return result; +} + static bool read_accel_defs(const std::string &config_str, std::string::size_type &curr_pos, const bool kernel_arg_info_available, @@ -872,6 +912,12 @@ static bool read_accel_defs(const std::string &config_str, accel[i].is_sycl_compile, counters); } + if (result && counters.back() > 0) { + result = read_streaming_kernel_control_info( + config_str, curr_pos, accel[i].streaming_control_info_available, + counters); + } + // forward compatibility: bypassing remaining fields at the end of kernel // description section while (result && counters.size() > 0 && diff --git a/src/acl_hal_mmd.cpp b/src/acl_hal_mmd.cpp index 27fef0f6..dd4eaf06 100644 --- a/src/acl_hal_mmd.cpp +++ b/src/acl_hal_mmd.cpp @@ -153,6 +153,12 @@ int acl_hal_mmd_set_profile_start_count(unsigned int physical_device_id, int acl_hal_mmd_set_profile_stop_count(unsigned int physical_device_id, unsigned int accel_id, uint64_t value); +void acl_hal_mmd_simulation_streaming_kernel_start( + unsigned int physical_device_id, const std::string &kernel_name); +void acl_hal_mmd_simulation_streaming_kernel_done( + unsigned int physical_device_id, const std::string &kernel_name, + unsigned int &finish_counter); + static size_t acl_kernel_if_read(acl_bsp_io *io, dev_addr_t src, char *dest, size_t size); static size_t acl_kernel_if_write(acl_bsp_io *io, dev_addr_t dest, @@ -343,7 +349,9 @@ static acl_hal_t acl_hal_mmd = { acl_hal_mmd_close_devices, // close_devices acl_hal_mmd_host_alloc, // host_alloc acl_hal_mmd_free, // free - acl_hal_mmd_shared_alloc // shared_alloc + acl_hal_mmd_shared_alloc, // shared_alloc + acl_hal_mmd_simulation_streaming_kernel_start, // simulation_streaming_kernel_start + acl_hal_mmd_simulation_streaming_kernel_done, // simulation_streaming_kernel_done }; // This will contain the device physical id to tell us which device across all @@ -1876,6 +1884,14 @@ void acl_hal_mmd_copy_globalmem_to_globalmem(cl_event event, const void *src, void acl_hal_mmd_launch_kernel(unsigned int physical_device_id, acl_kernel_invocation_wrapper_t *wrapper) { acl_assert_locked(); + + const auto &streaming_args = wrapper->streaming_args; + if (!streaming_args.empty()) { + device_info[physical_device_id] + .mmd_dispatch->aocl_mmd_simulation_streaming_kernel_args( + device_info[physical_device_id].handle, streaming_args); + } + acl_kernel_if_launch_kernel(&kern[physical_device_id], wrapper); } @@ -2816,3 +2832,18 @@ unsigned acl_convert_mmd_capabilities(unsigned mmd_capabilities) { } return capability; } + +void acl_hal_mmd_simulation_streaming_kernel_start( + unsigned int physical_device_id, const std::string &kernel_name) { + device_info[physical_device_id] + .mmd_dispatch->aocl_mmd_simulation_streaming_kernel_start( + device_info[physical_device_id].handle, kernel_name); +} + +void acl_hal_mmd_simulation_streaming_kernel_done( + unsigned int physical_device_id, const std::string &kernel_name, + unsigned int &finish_counter) { + device_info[physical_device_id] + .mmd_dispatch->aocl_mmd_simulation_streaming_kernel_done( + device_info[physical_device_id].handle, kernel_name, finish_counter); +} diff --git a/src/acl_kernel.cpp b/src/acl_kernel.cpp index 70c562e8..43fc1657 100644 --- a/src/acl_kernel.cpp +++ b/src/acl_kernel.cpp @@ -103,10 +103,10 @@ static cl_int l_enqueue_kernel_with_type( const cl_event *event_wait_list, cl_event *event, cl_command_type type); static void l_get_arg_offset_and_size(cl_kernel kernel, cl_uint arg_index, size_t *start_idx_ret, size_t *size_ret); -static cl_int -l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device, - char *buf, cl_uint *num_bytes, - acl_mem_migrate_t *memory_migration); +static cl_int l_copy_and_adjust_arguments_for_device( + cl_kernel kernel, cl_device_id device, char *buf, cl_uint *num_bytes, + acl_mem_migrate_t *memory_migration, + std::vector &streaming_args); static void l_abort_use_of_wrapper(acl_kernel_invocation_wrapper_t *wrapper); @@ -2178,7 +2178,7 @@ static cl_int l_enqueue_kernel_with_type( kernel_arg_bytes = (cl_uint)l_copy_and_adjust_arguments_for_device( kernel, device, &(invocation->arg_value[0]), &kernel_arg_bytes, - &memory_migration); + &memory_migration, serialization_wrapper->streaming_args); assert(kernel_arg_bytes <= kernel->arg_value_size); @@ -2242,7 +2242,7 @@ static cl_int l_enqueue_kernel_with_type( status = l_copy_and_adjust_arguments_for_device( kernel, device, &(invocation->arg_value[0]), &kernel_arg_bytes, - &memory_migration); + &memory_migration, wrapper->streaming_args); if (status != CL_SUCCESS) { ERR_RET(status, context, "Argument error"); @@ -2738,10 +2738,10 @@ int acl_num_non_null_mem_args(cl_kernel kernel) { // // Returns number of bytes written to the device-side buffer in num_bytes. // Returns failure if memory could not be reserved on the device. -static cl_int -l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device, - char *buf, cl_uint *num_bytes, - acl_mem_migrate_t *memory_migration) { +static cl_int l_copy_and_adjust_arguments_for_device( + cl_kernel kernel, cl_device_id device, char *buf, cl_uint *num_bytes, + acl_mem_migrate_t *memory_migration, + std::vector &streaming_args) { // indices into the host and device arg value buffer arrays. size_t host_idx = 0; size_t device_idx = 0; @@ -2770,6 +2770,9 @@ l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device, next_local[aspace.aspace_id] += l_round_up_for_alignment(aspace.static_demand); } + + streaming_args.clear(); + #ifdef MEM_DEBUG_MSG printf("kernel args\n"); #endif @@ -2785,7 +2788,17 @@ l_copy_and_adjust_arguments_for_device(cl_kernel kernel, cl_device_id device, // Exclude kernel argument value from device-side buffer by default. cl_uint buf_incr = 0; - if (arg_info->addr_space == ACL_ARG_ADDR_LOCAL) { + if (arg_info->streaming_arg_info_available) { +#ifdef MEM_DEBUG_MSG + printf("streaming"); +#endif + // Copy argument value to a separate buffer since it may be modified with + // clSetKernelArg() after kernel is enqueued but before it is launched. + const char *const arg_value = &kernel->arg_value[host_idx]; + streaming_args.emplace_back(aocl_mmd_streaming_kernel_arg_info_t{ + arg_info->streaming_arg_info.interface_name, + std::vector(arg_value, arg_value + arg_info->size)}); + } else if (arg_info->addr_space == ACL_ARG_ADDR_LOCAL) { #ifdef MEM_DEBUG_MSG printf("local"); #endif diff --git a/src/acl_kernel_if.cpp b/src/acl_kernel_if.cpp index 1d375a80..06fd80f8 100644 --- a/src/acl_kernel_if.cpp +++ b/src/acl_kernel_if.cpp @@ -948,6 +948,16 @@ int acl_kernel_if_update(const acl_device_def_autodiscovery_t &devdef, break; } } + + kern->streaming_control_kernel_names.clear(); + kern->streaming_control_kernel_names.reserve(devdef.accel.size()); + for (const auto &accel : devdef.accel) { + std::optional kernel_name; + if (accel.streaming_control_info_available) { + kernel_name = accel.iface.name; + } + kern->streaming_control_kernel_names.emplace_back(kernel_name); + } } // Do reset @@ -1256,6 +1266,13 @@ void acl_kernel_if_launch_kernel_on_custom_sof( } kern->accel_queue_front[accel_id] = next_launch_index; + if (kern->streaming_control_kernel_names[accel_id]) { + acl_get_hal()->simulation_streaming_kernel_start( + kern->physical_device_id, + *kern->streaming_control_kernel_names[accel_id]); + return; + } + unsigned int new_csr = 0; acl_kernel_cra_read(kern, accel_id, KERNEL_OFFSET_CSR, &new_csr); ACL_KERNEL_SET_BIT(new_csr, KERNEL_CSR_GO); @@ -1492,8 +1509,15 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) { unsigned int finish_counter = 0; unsigned int printf_size = 0; - acl_kernel_if_update_status_query(kern, accel_id, activation_id, - finish_counter, printf_size); + + if (kern->streaming_control_kernel_names[accel_id]) { + acl_get_hal()->simulation_streaming_kernel_done( + kern->physical_device_id, + *kern->streaming_control_kernel_names[accel_id], finish_counter); + } else { + acl_kernel_if_update_status_query(kern, accel_id, activation_id, + finish_counter, printf_size); + } if (!(finish_counter > 0)) { continue; @@ -1507,8 +1531,10 @@ void acl_kernel_if_update_status(acl_kernel_if *kern) { // Tell the host library this job is done kern->accel_job_ids[accel_id][next_queue_back] = -1; - acl_kernel_if_update_status_finish(kern, accel_id, activation_id, - printf_size); + if (!kern->streaming_control_kernel_names[accel_id]) { + acl_kernel_if_update_status_finish(kern, accel_id, activation_id, + printf_size); + } // Executing the following update after reading from performance // and efficiency monitors will clobber the throughput reported by diff --git a/test/acl_auto_configure_test.cpp b/test/acl_auto_configure_test.cpp index 64bec97c..93668b54 100644 --- a/test/acl_auto_configure_test.cpp +++ b/test/acl_auto_configure_test.cpp @@ -488,174 +488,174 @@ TEST(auto_configure, many_ok_forward_compatibility) { "100 100 100 200 200 200 200 0 0 0 0 2 " "1 name1 1 name2 47 " "40 external_sort_stage_0 0 128 1 0 0 1 0 " - "1 0 1 10 0 0 4 1 0 0 500 500 500 500 0 0 " - "0 0 1 1 1 3 1 1 1 3 1 800 800 800 800 800 " + "1 0 1 10 0 0 4 1 0 0 0 500 500 500 0 0 " + "0 0 1 1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 external_sort_stage_1 256 128 1 0 0 1 " - "0 1 0 1 10 0 0 4 1 0 0 500 500 500 500 0 " - "0 0 0 1 1 1 3 1 1 1 3 1 800 800 800 800 " + "0 1 0 1 10 0 0 4 1 0 0 0 500 500 500 0 " + "0 0 0 1 1 1 3 1 1 1 3 1 0 0 800 800 " "800 " "40 external_sort_stage_2 512 128 1 0 0 1 " - "0 1 0 1 10 0 0 4 1 0 0 500 500 500 500 0 " - "0 0 0 1 1 1 3 1 1 1 3 1 800 800 800 800 " + "0 1 0 1 10 0 0 4 1 0 0 0 500 500 500 0 " + "0 0 0 1 1 1 3 1 1 1 3 1 0 0 800 800 " "800 " "40 external_sort_stage_3 768 128 1 0 0 1 " - "0 1 0 1 10 0 0 4 1 0 0 500 500 500 500 0 " - "0 0 0 1 1 1 3 1 1 1 3 1 800 800 800 800 " + "0 1 0 1 10 0 0 4 1 0 0 0 500 500 500 0 " + "0 0 0 1 1 1 3 1 1 1 3 1 0 0 800 800 " "800 " "40 external_sort_stage_4 1024 128 1 0 0 1 " - "0 1 0 1 10 0 0 4 1 0 0 500 500 500 500 0 " - "0 0 0 1 1 1 3 1 1 1 3 1 800 800 800 800 " + "0 1 0 1 10 0 0 4 1 0 0 0 500 500 500 0 " + "0 0 0 1 1 1 3 1 1 1 3 1 0 0 800 800 " "800 " "40 external_sort_stage_5 1280 128 1 0 0 1 " - "0 1 0 1 10 0 0 4 1 0 0 500 500 500 500 0 " - "0 0 0 1 1 1 3 1 1 1 3 1 800 800 800 800 " + "0 1 0 1 10 0 0 4 1 0 0 0 500 500 500 0 " + "0 0 0 1 1 1 3 1 1 1 3 1 0 0 800 800 " "800 " "40 external_sort_stage_6 1536 128 1 0 0 1 " - "0 1 0 1 10 0 0 4 1 0 0 500 500 500 500 0 " - "0 0 0 1 1 1 3 1 1 1 3 1 800 800 800 800 " + "0 1 0 1 10 0 0 4 1 0 0 0 500 500 500 0 " + "0 0 0 1 1 1 3 1 1 1 3 1 0 0 800 800 " "800 " "38 external_stream_writer0 1792 256 1 0 0 " - "0 0 1 0 1 10 2 1 8 1024 0 0 500 500 500 " - "500 0 0 0 0 0 0 0 1 2147483647 3 1 800 " - "800 800 800 800 " + "0 0 1 0 1 10 2 1 8 1024 0 0 0 500 500 " + "500 0 0 0 0 0 0 0 1 2147483647 3 1 0 " + "0 800 800 800 " "38 external_stream_writer1 2048 256 1 0 0 " - "0 0 1 0 1 10 2 1 8 1024 0 0 500 500 500 " - "500 0 0 0 0 0 0 0 1 2147483647 3 1 800 " - "800 800 800 800 " + "0 0 1 0 1 10 2 1 8 1024 0 0 0 500 500 " + "500 0 0 0 0 0 0 0 1 2147483647 3 1 0 " + "0 800 800 800 " "38 external_stream_writer2 2304 256 1 0 0 " - "0 0 1 0 1 10 2 1 8 1024 0 0 500 500 500 " - "500 0 0 0 0 0 0 0 1 2147483647 3 1 800 " - "800 800 800 800 " + "0 0 1 0 1 10 2 1 8 1024 0 0 0 500 500 " + "500 0 0 0 0 0 0 0 1 2147483647 3 1 0 " + "0 800 800 800 " "38 external_stream_writer3 2560 256 1 0 0 " - "0 0 1 0 1 10 2 1 8 1024 0 0 500 500 500 " - "500 0 0 0 0 0 0 0 1 2147483647 3 1 800 " - "800 800 800 800 " + "0 0 1 0 1 10 2 1 8 1024 0 0 0 500 500 " + "500 0 0 0 0 0 0 0 1 2147483647 3 1 0 " + "0 800 800 800 " "38 external_stream_writer4 2816 256 1 0 0 " - "0 0 1 0 1 10 2 1 8 1024 0 0 500 500 500 " - "500 0 0 0 0 0 0 0 1 2147483647 3 1 800 " - "800 800 800 800 " + "0 0 1 0 1 10 2 1 8 1024 0 0 0 500 500 " + "500 0 0 0 0 0 0 0 1 2147483647 3 1 0 " + "0 800 800 800 " "38 external_stream_writer5 3072 256 1 0 0 " - "0 0 1 0 1 10 2 1 8 1024 0 0 500 500 500 " - "500 0 0 0 0 0 0 0 1 2147483647 3 1 800 " - "800 800 800 800 " + "0 0 1 0 1 10 2 1 8 1024 0 0 0 500 500 " + "500 0 0 0 0 0 0 0 1 2147483647 3 1 0 " + "0 800 800 800 " "38 external_stream_writer6 3328 256 1 0 0 " - "0 0 1 0 1 10 2 1 8 1024 0 0 500 500 500 " - "500 0 0 0 0 0 0 0 1 2147483647 3 1 800 " - "800 800 800 800 " + "0 0 1 0 1 10 2 1 8 1024 0 0 0 500 500 " + "500 0 0 0 0 0 0 0 1 2147483647 3 1 0 " + "0 800 800 800 " "38 input_reader 3584 256 1 0 0 0 0 1 0 1 " - "10 2 1 8 1024 0 0 500 500 500 500 0 0 0 0 " - "0 0 0 1 2147483647 3 1 800 800 800 800 " + "10 2 1 8 1024 0 0 0 500 500 500 0 0 0 0 " + "0 0 0 1 2147483647 3 1 0 0 800 800 " "800 " "38 output_writer 3840 256 1 0 0 0 0 1 0 1 " - "10 2 1 8 1024 0 0 500 500 500 500 0 0 0 0 " - "0 0 0 1 2147483647 3 1 800 800 800 800 " + "10 2 1 8 1024 0 0 0 500 500 500 0 0 0 0 " + "0 0 0 1 2147483647 3 1 0 0 800 800 " "800 " "40 sort_stage_1 4096 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_10 4352 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_11 4608 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_12 4864 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_13 5120 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_14 5376 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_15 5632 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_16 5888 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_17 6144 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_2 6400 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_3 6656 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_4 6912 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_5 7168 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_6 7424 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_7 7680 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_8 7936 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "40 sort_stage_9 8192 128 1 0 0 1 0 1 0 1 " - "10 0 0 4 1 0 0 500 500 500 500 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 " + "10 0 0 4 1 0 0 0 500 500 500 0 0 0 0 1 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 " "38 stream_reader_A0 8448 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_A1 8704 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_A2 8960 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_A3 9216 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_A4 9472 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_A5 9728 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_A6 9984 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_B0 10240 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_B1 10496 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_B2 10752 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_B3 11008 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_B4 11264 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_B5 11520 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 " "38 stream_reader_B6 11776 256 1 0 0 0 0 1 " - "0 1 10 2 1 8 1024 0 0 500 500 500 500 0 0 " - "0 0 0 0 0 1 2147483647 3 1 800 800 800 " + "0 1 10 2 1 8 1024 0 0 0 500 500 500 0 0 " + "0 0 0 0 0 1 2147483647 3 1 0 0 800 " "800 800 900 900 900 900 900"); std::vector device_defs(ACL_MAX_DEVICE); @@ -1202,7 +1202,7 @@ TEST(auto_configure, hostpipe) { "300 dev_to_host 0 1 32 32768 300 300 300 " "300 400 1 3 name3 400 0 " "1 29 foo 0 128 1 0 0 1 0 1 0 0 0 0 0 0 1 " - "1 1 3 1 1 1 3 1 800 800 800 800 800 900 " + "1 1 3 1 1 1 3 1 0 0 800 800 800 900 " "900" ); @@ -1227,3 +1227,171 @@ TEST(auto_configure, hostpipe) { CHECK_EQUAL(1, (int)device_def.autodiscovery_def.hal_info.size()); } } + +TEST(auto_configure, streaming) { + const std::string config_str{ + "23 26 " RANDOM_HASH + " pac_a10 0 1 13 DDR 2 2 24 1 2 0 4294967296 4294967296 8589934592 0 - 0 " + "0 0 0 1 3 device_global_name 256 128 1 103 _ZTS3CRCILi0EE 0 256 1 0 0 1 " + "0 1 0 9 8 0 0 8 1 0 0 1 k0_ZTS3CRCILi0EE_arg0 8 2 1 8 1024 0 3 1 " + "k0_ZTS3CRCILi0EE_arg1 8 0 0 8 1 0 0 1 k0_ZTS3CRCILi0EE_arg2 7 0 0 8 1 0 " + "0 0 7 0 0 8 1 0 0 0 7 2 1 8 1024 0 2 0 7 0 0 8 1 0 0 0 7 0 0 8 1 0 0 0 " + "7 0 0 8 1 0 0 0 0 0 1 2 64 4096 1 1 1 3 1 1 1 3 1 0 1"}; + + acl_device_def_autodiscovery_t devdef; + { + bool result; + std::string err_str; + ACL_LOCKED(result = + acl_load_device_def_from_str(config_str, devdef, err_str)); + std::cerr << err_str; + CHECK(result); + } + + CHECK_EQUAL(1, devdef.accel.size()); + + CHECK(!devdef.accel[0].is_sycl_compile); + CHECK(devdef.accel[0].streaming_control_info_available); + + const auto &args = devdef.accel[0].iface.args; + CHECK_EQUAL(9, args.size()); + + CHECK(args[0].streaming_arg_info_available); + CHECK("k0_ZTS3CRCILi0EE_arg0" == args[0].streaming_arg_info.interface_name); + + CHECK(args[1].streaming_arg_info_available); + CHECK("k0_ZTS3CRCILi0EE_arg1" == args[1].streaming_arg_info.interface_name); + + CHECK(args[2].streaming_arg_info_available); + CHECK("k0_ZTS3CRCILi0EE_arg2" == args[2].streaming_arg_info.interface_name); + + for (size_t i = 3; i < args.size(); ++i) { + CHECK(!args[i].streaming_arg_info_available); + } +} + +TEST(auto_configure, one_streaming_arg_and_streaming_kernel) { + const std::string config_str{ + "23 27 531091a097f0d7096b21f349b4b283f9e206ebc0 pac_s10 0 1 17 DDR 2 4 " + "24 1 2 0 8589934592 8589934592 17179869184 17179869184 25769803776 " + "25769803776 34359738368 0 - 0 0 0 0 0 0 1 123 _ZTS15binomial_kernel 0 " + "256 0 0 0 0 0 1 0 8 7 2 1 8 1024 0 2 0 8 0 0 8 1 0 0 1 " + "k0_ZTS15binomial_kernel_arg1 7 0 0 8 1 0 0 0 7 0 0 8 1 0 0 0 7 2 1 8 " + "1024 0 2 0 7 0 0 8 1 0 0 0 7 0 0 8 1 0 0 0 7 0 0 8 1 0 0 0 0 0 16 2 64 " + "8196 65 8196 66 8196 67 8196 68 8196 69 8196 70 8196 71 8196 72 8196 73 " + "8196 74 8196 75 8196 76 8196 77 8196 78 8196 79 8196 1 1 1 3 1 1 1 3 1 " + "1 1"}; + + acl_device_def_autodiscovery_t devdef; + { + bool result; + std::string err_str; + ACL_LOCKED(result = + acl_load_device_def_from_str(config_str, devdef, err_str)); + std::cerr << err_str; + CHECK(result); + } + + CHECK_EQUAL(1, devdef.accel.size()); + + CHECK(devdef.accel[0].streaming_control_info_available); + + const auto &args = devdef.accel[0].iface.args; + CHECK_EQUAL(8, args.size()); + + CHECK(!args[0].streaming_arg_info_available); + + CHECK(args[1].streaming_arg_info_available); + CHECK("k0_ZTS15binomial_kernel_arg1" == + args[1].streaming_arg_info.interface_name); + + for (size_t i = 2; i < args.size(); ++i) { + CHECK(!args[i].streaming_arg_info_available); + } +} + +TEST(auto_configure, two_streaming_args_and_streaming_kernel) { + const std::string config_str{ + "23 27 531091a097f0d7096b21f349b4b283f9e206ebc0 pac_s10 0 1 17 DDR 2 4 " + "24 1 2 0 8589934592 8589934592 17179869184 17179869184 25769803776 " + "25769803776 34359738368 0 - 0 0 0 0 0 0 1 124 _ZTS15binomial_kernel 0 " + "256 0 0 0 0 0 1 0 8 8 2 1 8 1024 0 2 1 k0_ZTS15binomial_kernel_arg0 8 0 " + "0 8 1 0 0 1 k0_ZTS15binomial_kernel_arg1 7 0 0 8 1 0 0 0 7 0 0 8 1 0 0 " + "0 7 2 1 8 1024 0 2 0 7 0 0 8 1 0 0 0 7 0 0 8 1 0 0 0 7 0 0 8 1 0 0 0 0 " + "0 16 2 64 8196 65 8196 66 8196 67 8196 68 8196 69 8196 70 8196 71 8196 " + "72 8196 73 8196 74 8196 75 8196 76 8196 77 8196 78 8196 79 8196 1 1 1 3 " + "1 1 1 3 1 1 1"}; + + acl_device_def_autodiscovery_t devdef; + { + bool result; + std::string err_str; + ACL_LOCKED(result = + acl_load_device_def_from_str(config_str, devdef, err_str)); + std::cerr << err_str; + CHECK(result); + } + + CHECK_EQUAL(1, devdef.accel.size()); + + CHECK(devdef.accel[0].is_sycl_compile); + CHECK(devdef.accel[0].streaming_control_info_available); + + const auto &args = devdef.accel[0].iface.args; + CHECK_EQUAL(8, args.size()); + + CHECK(args[0].streaming_arg_info_available); + CHECK("k0_ZTS15binomial_kernel_arg0" == + args[0].streaming_arg_info.interface_name); + + CHECK(args[1].streaming_arg_info_available); + CHECK("k0_ZTS15binomial_kernel_arg1" == + args[1].streaming_arg_info.interface_name); + + for (size_t i = 2; i < args.size(); ++i) { + CHECK(!args[i].streaming_arg_info_available); + } +} + +TEST(auto_configure, two_streaming_args_and_non_streaming_kernel) { + const std::string config_str{ + "23 27 531091a097f0d7096b21f349b4b283f9e206ebc0 pac_s10 0 1 17 DDR 2 4 " + "24 1 2 0 8589934592 8589934592 17179869184 17179869184 25769803776 " + "25769803776 34359738368 0 - 0 0 0 0 0 0 1 124 _ZTS15binomial_kernel 0 " + "256 0 0 0 0 0 1 0 8 8 2 1 8 1024 0 2 1 k0_ZTS15binomial_kernel_arg0 8 0 " + "0 8 1 0 0 1 k0_ZTS15binomial_kernel_arg1 7 0 0 8 1 0 0 0 7 0 0 8 1 0 0 " + "0 7 2 1 8 1024 0 2 0 7 0 0 8 1 0 0 0 7 0 0 8 1 0 0 0 7 0 0 8 1 0 0 0 0 " + "0 16 2 64 8196 65 8196 66 8196 67 8196 68 8196 69 8196 70 8196 71 8196 " + "72 8196 73 8196 74 8196 75 8196 76 8196 77 8196 78 8196 79 8196 1 1 1 3 " + "1 1 1 3 1 1 0"}; + + acl_device_def_autodiscovery_t devdef; + { + bool result; + std::string err_str; + ACL_LOCKED(result = + acl_load_device_def_from_str(config_str, devdef, err_str)); + std::cerr << err_str; + CHECK(result); + } + + CHECK_EQUAL(1, devdef.accel.size()); + + CHECK(devdef.accel[0].is_sycl_compile); + CHECK(!devdef.accel[0].streaming_control_info_available); + + const auto &args = devdef.accel[0].iface.args; + CHECK_EQUAL(8, args.size()); + + CHECK(args[0].streaming_arg_info_available); + CHECK("k0_ZTS15binomial_kernel_arg0" == + args[0].streaming_arg_info.interface_name); + + CHECK(args[1].streaming_arg_info_available); + CHECK("k0_ZTS15binomial_kernel_arg1" == + args[1].streaming_arg_info.interface_name); + + for (size_t i = 2; i < args.size(); ++i) { + CHECK(!args[i].streaming_arg_info_available); + } +}