diff --git a/include/CL/cl_ext.h b/include/CL/cl_ext.h index 5d4bcc20..8a317dce 100644 --- a/include/CL/cl_ext.h +++ b/include/CL/cl_ext.h @@ -2384,6 +2384,64 @@ clCreateBufferWithPropertiesINTEL_fn)( void * host_ptr, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; +/*********************************** +* cl_intel_program_scope_host_pipe * +***********************************/ +#define cl_intel_program_scope_host_pipe 1 +#define CL_INTEL_PROGRAM_SCOPE_HOST_PIPE_EXTENSION_NAME "cl_intel_program_scope_host_pipe" + +/* New return values from clGetEventInfo when param_name is CL_EVENT_COMMAND_TYPE */ +#define CL_COMMAND_READ_HOST_PIPE_INTEL 0x4214 +#define CL_COMMAND_WRITE_HOST_PIPE_INTEL 0x4215 +#define CL_PROGRAM_NUM_HOST_PIPES_INTEL 0x4216 +#define CL_PROGRAM_HOST_PIPE_NAMES_INTEL 0x4217 + +typedef cl_int (CL_API_CALL *clEnqueueReadHostPipeINTEL_fn )( + cl_command_queue command_queue, + cl_program program, + const char* pipe_symbol, + cl_bool blocking_read, + void* ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadHostPipeINTEL( + cl_command_queue command_queue, + cl_program program, + const char* pipe_symbol, + cl_bool blocking_read, + void* ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int (CL_API_CALL *clEnqueueWriteHostPipeINTEL_fn)( + cl_command_queue command_queue, + cl_program program, + const char* pipe_symbol, + cl_bool blocking_write, + const void* ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteHostPipeINTEL( + cl_command_queue command_queue, + cl_program program, + const char* pipe_symbol, + cl_bool blocking_write, + const void* ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_0; + /****************************************** * cl_intel_mem_channel_property extension * *******************************************/ diff --git a/include/acl.h b/include/acl.h index bc76c40b..9d8dbe80 100644 --- a/include/acl.h +++ b/include/acl.h @@ -524,6 +524,19 @@ struct acl_device_global_mem_def_t { bool reset_on_reuse; }; +// Mapping of logical to physical host pipes. +struct acl_hostpipe_mapping { + std::string logical_name; + std::string physical_name; // chan_id in the board_spec.xml + bool implement_in_csr; + std::string csr_address; // Store this as string as this value can be '-' for + // non-CSR pipe. + bool is_read; + bool is_write; + unsigned pipe_width; + unsigned pipe_depth; +}; + // Part of acl_device_def_t where members are populated from the information // in the autodiscovery string. This will get updated every time the device // is programmed with a new device binary as the new binary would contain a @@ -548,6 +561,8 @@ typedef struct acl_device_def_autodiscovery_t { device_global_mem_defs; bool cra_ring_root_exist = true; // Set the default value to true for backwards compatibility flows. + + std::vector hostpipe_mappings; } acl_device_def_autodiscovery_t; typedef struct acl_device_def_t { diff --git a/include/acl_hal.h b/include/acl_hal.h index f3d4ad3c..483980a1 100644 --- a/include/acl_hal.h +++ b/include/acl_hal.h @@ -205,7 +205,7 @@ typedef struct { size_t read_size, int *status); /// Push write_size of data to the device from host_buffer size_t (*hostchannel_push)(unsigned int physical_device_id, - int channel_handle, void *host_buffer, + int channel_handle, const void *host_buffer, size_t write_size, int *status); /// Get a pointer to the mmd buffer for the host channel void *(*hostchannel_get_buffer)(unsigned int physical_device_id, @@ -246,6 +246,12 @@ typedef struct { void (*simulation_streaming_kernel_done)(unsigned int physical_device_id, const std::string &signal_name, unsigned int &finish_counter); + + size_t (*read_csr)(unsigned int physical_device_id, uintptr_t offset, + void *ptr, size_t size); + + size_t (*write_csr)(unsigned int physical_device_id, uintptr_t offset, + const void *ptr, size_t size); } acl_hal_t; /// Linked list of MMD library names to load. diff --git a/include/acl_hostch.h b/include/acl_hostch.h index eb7b095d..73c1402b 100644 --- a/include/acl_hostch.h +++ b/include/acl_hostch.h @@ -27,6 +27,19 @@ void acl_bind_and_process_all_pipes_transactions( cl_context context, cl_device_id device, const acl_device_def_autodiscovery_t &devdef); +// Submit a program hostpipe read device operation to the device op queue +// acl_read_program_hostpipe will be invoked when the read op is RUNNING +cl_int acl_submit_read_program_hostpipe_device_op(cl_event event); +// Submit a program hostpipe write device operation to the device op queue +cl_int acl_submit_write_program_hostpipe_device_op(cl_event event); + +// Read from a program hostpipe +void acl_read_program_hostpipe(void *user_data, acl_device_op_t *op); + +// Write into a program hostpipe +// acl_write_program_hostpipe will be invoked when the write op is RUNNING +void acl_write_program_hostpipe(void *user_data, acl_device_op_t *op); + #define HOST_TO_DEVICE 1 #define DEVICE_TO_HOST 0 diff --git a/include/acl_types.h b/include/acl_types.h index 5300ee5d..aeaf7ae4 100644 --- a/include/acl_types.h +++ b/include/acl_types.h @@ -285,6 +285,54 @@ typedef void(CL_CALLBACK *acl_event_notify_fn_t)( typedef void(CL_CALLBACK *acl_mem_destructor_notify_fn_t)(cl_mem memobj, void *user_data); +enum host_op_type { MAP, PACKET }; + +typedef struct host_op_struct { + enum host_op_type m_op; + void *m_mmd_buffer; + void *m_host_buffer; + size_t m_op_size; + size_t m_size_sent; +} host_op_t; + +typedef struct host_pipe_struct { + // The handle to the device needed for mmd call + unsigned int m_physical_device_id; + + // The channel handle returned by mmd create hostch call + int m_channel_handle; + + // Operations on the host pipe are queued here. map/packet ops + std::deque m_host_op_queue; + // host_op_t * m_host_op_queue; + + // The total size of the operations we're queuing on the pinned mmd buffer + size_t size_buffered; + + // The kernel that the host pipe is binded to + cl_kernel m_binded_kernel; + + // Is host pipe binded to kernel device, only done at kernel enqueue time + bool binded; + + // Channel ID name that the host pipe will use to do the binding + std::string host_pipe_channel_id; + + // Pipe specific lock. Obtained every time we do an operation on the pipe + acl_mutex_t m_lock; + + // The following are the new entries introduced by the program scoped + // hostpipes + + // Whether this program hostpipe is implemented in the CSR + bool implement_in_csr; + + // The CSR address of this hostpipe. Compiler passes a csr_address = '-' for + // non-CSR program hostpipe + std::string csr_address; + +} host_pipe_t; + // The device-specific information about a program. // // This object is owned by the cl_program that creates it during @@ -350,6 +398,9 @@ class acl_device_program_info_t { // Return all the names of the kernels in this device program. std::set get_all_kernel_names() const; + // Map logical hostpipe name to the hostpipe struct + std::unordered_map program_hostpipe_map; + private: // This map is only used when split_kernel == 1 in the enclosing // cl_context. This maps a hashed kernel name to the acl_device_binary_t that @@ -551,6 +602,17 @@ typedef struct { } ndrange_kernel; + struct { + // Used for program scoped hostpipe + size_t size; + void *ptr; + const void *write_ptr; + bool blocking; + const char *logical_name; // Use char* instead string here due to a + // compilation error from acl_command_info_t + // constructor malloc related + } host_pipe_info; + // Reprogram the device, without an associated kernel enqueue. // This is used to hide the latency of device programming on host // program startup. @@ -684,43 +746,6 @@ typedef struct acl_mem_destructor_user_callback { mem_destructor_notify_fn; // The callback function, provided by the user. } acl_mem_destructor_user_callback; -enum host_op_type { MAP, PACKET }; - -typedef struct host_op_struct { - enum host_op_type m_op; - void *m_mmd_buffer; - void *m_host_buffer; - size_t m_op_size; - size_t m_size_sent; -} host_op_t; - -typedef struct host_pipe_struct { - // The handle to the device needed for mmd call - unsigned int m_physical_device_id; - - // The channel handle returned by mmd create hostch call - int m_channel_handle; - - // Operations on the host pipe are queued here. map/packet ops - std::deque m_host_op_queue; - // host_op_t * m_host_op_queue; - - // The total size of the operations we're queuing on the pinned mmd buffer - size_t size_buffered; - - // The kernel that the host pipe is binded to - cl_kernel m_binded_kernel; - - // Is host pipe binded to kernel device, only done at kernel enqueue time - bool binded; - - // Channel ID name that the host pipe will use to do the binding - std::string host_pipe_channel_id; - - // Pipe specific lock. Obtained every time we do an operation on the pipe - acl_mutex_t m_lock; -} host_pipe_t; - // The bookkeeping required to keep track of a block of allocated memory. // The storage for these structs is owned by the acl_platform object. // But these structs are only valid when attached to a valid context. @@ -1298,10 +1323,12 @@ typedef enum { // USM Memcpy that should call HAL's copy API without any extra work. // Corresponds to acl_command_info_t.info.ptr_xfer , - ACL_DEVICE_OP_USM_MEMCPY - - , + ACL_DEVICE_OP_USM_MEMCPY, + // Progrgam based hostpipe read or write + ACL_DEVICE_OP_HOSTPIPE_READ, + ACL_DEVICE_OP_HOSTPIPE_WRITE, ACL_NUM_DEVICE_OP_TYPES + } acl_device_op_type_t; // These are device operation conflict types. @@ -1327,6 +1354,11 @@ typedef enum { , ACL_CONFLICT_REPROGRAM // Acts like a device reprogram , + ACL_CONFLICT_HOSTPIPE_READ // Acts like a hostpipe read from the host channel + , + ACL_CONFLICT_HOSTPIPE_WRITE // Acts like a hostpipe write from the host + // channel + , ACL_NUM_CONFLICT_TYPES } acl_device_op_conflict_type_t; @@ -1541,6 +1573,8 @@ typedef struct acl_device_op_queue_t { void (*usm_memcpy)(void *, acl_device_op_t *); // For test purposes, log transition to CL_RUNNING, CL_COMPLETE void (*log_update)(void *, acl_device_op_t *, int new_status); + void (*hostpipe_read)(void *, acl_device_op_t *); + void (*hostpipe_write)(void *, acl_device_op_t *); void *user_data; // The first argument provided to the callbacks. } acl_device_op_queue_t; diff --git a/src/acl_auto_configure.cpp b/src/acl_auto_configure.cpp index c9a177fd..562a8141 100644 --- a/src/acl_auto_configure.cpp +++ b/src/acl_auto_configure.cpp @@ -198,6 +198,38 @@ static bool read_ulonglong_counters(const std::string &str, return true; } +// Reads the next word in str and converts it into a uintptr_t. +// Returns true if a valid integer was read or false if an error occurred. +// pos is updated to the position immediately following the parsed word +// even if an error occurs. +static bool read_uintptr_counters(const std::string &str, + std::string::size_type &pos, uintptr_t &val, + std::vector &counters) noexcept { + std::string result; + pos = read_word(str, pos, result); + decrement_section_counters(counters); + + size_t end = 0; + unsigned long long parsed; + try { + parsed = std::stoull(result, &end); + } catch (const std::exception &) { + return false; + } + if (end != result.size()) { + return false; + } + + val = static_cast(parsed); + // To make sure the cast work + // As `unsigned long long` might have difference size comparing to `uintptr_t + if (val != parsed) { + return false; + } + + return true; +} + // Reads the next word in str and converts it into an unsigned or using its // default value. Returns true if a valid integer was read or false if an error // occurred. pos is updated to the position immediately following the parsed @@ -593,6 +625,51 @@ static bool read_streaming_kernel_arg_info( result = read_string_counters(config_str, curr_pos, streaming_arg_info.interface_name, counters); } + return result; +} + +static bool read_hostpipe_mappings( + const std::string &config_str, std::string::size_type &curr_pos, + std::vector &hostpipe_mappings, + std::vector &counters, std::string &err_str) noexcept { + unsigned int num_mappings = 0; + bool result = + read_uint_counters(config_str, curr_pos, num_mappings, counters); + + unsigned int num_fields_per_mapping = 0; + if (result) { + result = read_uint_counters(config_str, curr_pos, num_fields_per_mapping, + counters); + } + + for (unsigned int i = 0; result && (i < num_mappings); i++) { + counters.emplace_back(num_fields_per_mapping); + + acl_hostpipe_mapping mapping{}; + result = + read_string_counters(config_str, curr_pos, mapping.logical_name, + counters) && + read_string_counters(config_str, curr_pos, mapping.physical_name, + counters) && + read_bool_counters(config_str, curr_pos, mapping.implement_in_csr, + counters) && + read_string_counters(config_str, curr_pos, mapping.csr_address, + counters) && + read_bool_counters(config_str, curr_pos, mapping.is_read, counters) && + read_bool_counters(config_str, curr_pos, mapping.is_write, counters) && + read_uint_counters(config_str, curr_pos, mapping.pipe_width, + counters) && + read_uint_counters(config_str, curr_pos, mapping.pipe_depth, counters); + + hostpipe_mappings.emplace_back(mapping); + + while (result && counters.back() > 0) { + std::string tmp; + result = read_string_counters(config_str, curr_pos, tmp, counters); + } + check_section_counters(counters); + counters.pop_back(); + } return result; } @@ -771,7 +848,6 @@ static bool read_accel_defs(const std::string &config_str, accel = std::vector(num_accel); hal_info = std::vector(num_accel); } - // Setup the accelerators for (auto i = 0U; result && (i < num_accel); i++) { accel[i].id = i; @@ -863,6 +939,7 @@ static bool read_accel_defs(const std::string &config_str, result = read_int_counters(config_str, curr_pos, total_fields_printf, counters); } + for (auto j = 0U; result && (j < accel[i].printf_format_info.size()); j++) { counters.emplace_back(total_fields_printf); result = @@ -1131,6 +1208,12 @@ bool acl_load_device_def_from_str(const std::string &config_str, devdef.cra_ring_root_exist, counters); } + // Read program scoped hostpipes mappings + if (result && counters.back() > 0) { + result = read_hostpipe_mappings( + config_str, curr_pos, devdef.hostpipe_mappings, counters, err_str); + } + // forward compatibility: bypassing remaining fields at the end of device // description section while (result && counters.size() > 0 && diff --git a/src/acl_command.cpp b/src/acl_command.cpp index 23a8c128..c8e21fdf 100644 --- a/src/acl_command.cpp +++ b/src/acl_command.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -371,6 +372,14 @@ int acl_submit_command(cl_event event) { result = acl_submit_migrate_mem_device_op(event); break; + case CL_COMMAND_READ_HOST_PIPE_INTEL: + result = acl_submit_read_program_hostpipe_device_op(event); + break; + + case CL_COMMAND_WRITE_HOST_PIPE_INTEL: + result = acl_submit_write_program_hostpipe_device_op(event); + break; + default: acl_print_debug_msg(" acl_submit_command: unknown cmd type %d\n", event->cmd.type); diff --git a/src/acl_device_op.cpp b/src/acl_device_op.cpp index fe10af2d..6f2b4cce 100644 --- a/src/acl_device_op.cpp +++ b/src/acl_device_op.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -108,47 +109,55 @@ static unsigned char conflict_matrix_half_duplex [ACL_NUM_CONFLICT_TYPES][ACL_NUM_CONFLICT_TYPES] = { // NONE, MEM_READ, MEM_WRITE, MEM_RW, KERNEL, - // PROGRAM + // PROGRAM, HOSTPIPE_READ, HOSTPIPE_WRITE // NONE vs. - {0, 0, 0, 0, 0, 1} + {0, 0, 0, 0, 0, 1, 0, 0} // MEM_READ vs. , - {0, 1, 1, 1, 0, 1} + {0, 1, 1, 1, 0, 1, 1, 1} // MEM_WRITE vs. , - {0, 1, 1, 1, 0, 1} + {0, 1, 1, 1, 0, 1, 1, 1} // MEM_RW vs. , - {0, 1, 1, 1, 0, 1} + {0, 1, 1, 1, 0, 1, 1, 1} // KERNEL vs. , - {0, 0, 0, 0, 0, 1} + {0, 0, 0, 0, 0, 1, 0, 0} // PROGRAM vs. , - {1, 1, 1, 1, 1, 1}}; + {1, 1, 1, 1, 1, 1, 1, 1}, + // HOSTPIPE_READ vs. + {0, 1, 1, 1, 0, 1, 0, 0}, + // HOSTPIPE_WRITE vs. + {0, 1, 1, 1, 0, 1, 0, 0}}; static unsigned char conflict_matrix_full_duplex [ACL_NUM_CONFLICT_TYPES][ACL_NUM_CONFLICT_TYPES] = { // NONE, MEM_READ, MEM_WRITE, MEM_RW, KERNEL, - // PROGRAM + // PROGRAM, HOSTPIPE_READ, HOSTPIPE_WRITE // NONE vs. - {0, 0, 0, 0, 0, 1} + {0, 0, 0, 0, 0, 1, 0, 0} // MEM_READ vs. , - {0, 1, 0, 1, 0, 1} + {0, 1, 0, 1, 0, 1, 1, 1} // MEM_WRITE vs. , - {0, 0, 1, 1, 0, 1} + {0, 0, 1, 1, 0, 1, 1, 1} // MEM_RW vs. , - {0, 1, 1, 1, 0, 1} + {0, 1, 1, 1, 0, 1, 1, 1} // KERNEL vs. , - {0, 0, 0, 0, 0, 1} + {0, 0, 0, 0, 0, 1, 0, 0} // PROGRAM vs. , - {1, 1, 1, 1, 1, 1}}; + {1, 1, 1, 1, 1, 1, 1, 1}, + // HOSTPIPE_READ vs. + {0, 1, 1, 1, 0, 1, 0, 0}, + // HOSTPIPE_WRITE vs. + {0, 1, 1, 1, 0, 1, 0, 0}}; static const char *l_type_name(int op_type) { switch (op_type) { @@ -176,6 +185,12 @@ static const char *l_type_name(int op_type) { case ACL_DEVICE_OP_USM_MEMCPY: return "USM_MEMCPY"; break; + case ACL_DEVICE_OP_HOSTPIPE_READ: + return "HOSTPIPE_READ"; + break; + case ACL_DEVICE_OP_HOSTPIPE_WRITE: + return "HOSTPIPE_WRITE"; + break; default: return ""; break; @@ -262,6 +277,8 @@ void acl_init_device_op_queue_limited(acl_device_op_queue_t *doq, doq->program_device = acl_program_device; doq->migrate_buffer = acl_mem_migrate_buffer; doq->usm_memcpy = acl_usm_memcpy; + doq->hostpipe_read = acl_read_program_hostpipe; + doq->hostpipe_write = acl_write_program_hostpipe; doq->log_update = 0; for (i = 0; i < ACL_MAX_DEVICE; i++) { @@ -311,6 +328,12 @@ acl_device_op_conflict_type_t acl_device_op_conflict_type(acl_device_op_t *op) { case ACL_DEVICE_OP_REPROGRAM: result = ACL_CONFLICT_REPROGRAM; break; + case ACL_DEVICE_OP_HOSTPIPE_READ: + result = ACL_CONFLICT_HOSTPIPE_READ; + break; + case ACL_DEVICE_OP_HOSTPIPE_WRITE: + result = ACL_CONFLICT_HOSTPIPE_WRITE; + break; case ACL_DEVICE_OP_NONE: case ACL_NUM_DEVICE_OP_TYPES: result = ACL_CONFLICT_NONE; @@ -598,6 +621,15 @@ l_get_devices_affected_for_op(acl_device_op_t *op, unsigned int physical_ids[], num_devices_affected = 1; } break; + case ACL_DEVICE_OP_HOSTPIPE_READ: + case ACL_DEVICE_OP_HOSTPIPE_WRITE: + if (acl_event_is_valid(event) && + acl_command_queue_is_valid(event->command_queue)) { + physical_ids[0] = event->command_queue->device->def.physical_device_id; + conflicts[0] = acl_device_op_conflict_type(op); + num_devices_affected = 1; + } + break; case ACL_DEVICE_OP_NONE: case ACL_NUM_DEVICE_OP_TYPES: break; @@ -606,6 +638,7 @@ l_get_devices_affected_for_op(acl_device_op_t *op, unsigned int physical_ids[], if (num_devices_affected == 0) { // This case is only valid for unit tests // Make assumptions on which devices are affected + // Possible TODO to add for Hostpipe read and write if (event && event->context && op) { if (event->context->num_devices >= 2 && op->info.type != ACL_DEVICE_OP_KERNEL && @@ -960,7 +993,9 @@ unsigned l_update_device_op_queue_once(acl_device_op_queue_t *doq) { } else if (op->info.event && (op->info.type == ACL_DEVICE_OP_MEM_TRANSFER_READ || op->info.type == ACL_DEVICE_OP_MEM_TRANSFER_WRITE || - op->info.type == ACL_DEVICE_OP_MEM_TRANSFER_COPY)) { + op->info.type == ACL_DEVICE_OP_MEM_TRANSFER_COPY || + op->info.type == ACL_DEVICE_OP_HOSTPIPE_READ || + op->info.type == ACL_DEVICE_OP_HOSTPIPE_WRITE)) { if (!acl_mem_op_requires_transfer(op->info.event->cmd)) { is_conflicting = 0; } @@ -1306,6 +1341,12 @@ void acl_submit_device_op(acl_device_op_queue_t *doq, acl_device_op_t *op) { case ACL_DEVICE_OP_USM_MEMCPY: DOIT(usm_memcpy, op); break; + case ACL_DEVICE_OP_HOSTPIPE_READ: + DOIT(hostpipe_read, op); + break; + case ACL_DEVICE_OP_HOSTPIPE_WRITE: + DOIT(hostpipe_write, op); + break; default: break; } diff --git a/src/acl_event.cpp b/src/acl_event.cpp index 40754dec..df1395f9 100644 --- a/src/acl_event.cpp +++ b/src/acl_event.cpp @@ -650,6 +650,10 @@ static void l_release_command_resources(acl_command_info_t &cmd) { cmd.info.memory_migration.num_alloc = 0; break; + case CL_COMMAND_READ_HOST_PIPE_INTEL: + case CL_COMMAND_WRITE_HOST_PIPE_INTEL: + // Nothing to cleanup + break; default: break; } @@ -1150,6 +1154,8 @@ void acl_dump_event(cl_event event) { NNN(CL_COMMAND_MAP_BUFFER) NNN(CL_COMMAND_WAIT_FOR_EVENTS_INTELFPGA) NNN(CL_COMMAND_PROGRAM_DEVICE_INTELFPGA) + NNN(CL_COMMAND_READ_HOST_PIPE_INTEL) + NNN(CL_COMMAND_WRITE_HOST_PIPE_INTEL) default: break; } diff --git a/src/acl_hal_mmd.cpp b/src/acl_hal_mmd.cpp index 7f7394d2..8d9810a0 100644 --- a/src/acl_hal_mmd.cpp +++ b/src/acl_hal_mmd.cpp @@ -126,7 +126,7 @@ size_t acl_hal_mmd_hostchannel_pull(unsigned int physical_device_id, int channel_handle, void *host_buffer, size_t read_size, int *status); size_t acl_hal_mmd_hostchannel_push(unsigned int physical_device_id, - int channel_handle, void *host_buffer, + int channel_handle, const void *host_buffer, size_t write_size, int *status); void *acl_hal_mmd_hostchannel_get_buffer(unsigned int physical_device_id, int channel_handle, @@ -161,6 +161,12 @@ void acl_hal_mmd_simulation_streaming_kernel_done( unsigned int physical_device_id, const std::string &kernel_name, unsigned int &finish_counter); +size_t acl_hal_mmd_read_csr(unsigned int physical_device_id, uintptr_t offset, + void *ptr, size_t size); + +size_t acl_hal_mmd_write_csr(unsigned int physical_device_id, uintptr_t offset, + const void *ptr, size_t size); + static size_t acl_kernel_if_read(acl_bsp_io *io, dev_addr_t src, char *dest, size_t size); static size_t acl_kernel_if_write(acl_bsp_io *io, dev_addr_t dest, @@ -354,6 +360,8 @@ static acl_hal_t acl_hal_mmd = { acl_hal_mmd_shared_alloc, // shared_alloc acl_hal_mmd_simulation_streaming_kernel_start, // simulation_streaming_kernel_start acl_hal_mmd_simulation_streaming_kernel_done, // simulation_streaming_kernel_done + acl_hal_mmd_read_csr, // read_csr + acl_hal_mmd_write_csr, // write_csr }; // This will contain the device physical id to tell us which device across all @@ -2200,6 +2208,7 @@ size_t acl_hal_mmd_hostchannel_pull(unsigned int physical_device_id, pull_buffer = device_info[physical_device_id] .mmd_dispatch->aocl_mmd_hostchannel_get_buffer( pcie_dev_handle, channel_handle, &buffer_size, status); + if ((NULL == pull_buffer) || (0 == buffer_size)) { return 0; } @@ -2226,7 +2235,7 @@ size_t acl_hal_mmd_hostchannel_pull(unsigned int physical_device_id, } size_t acl_hal_mmd_hostchannel_push(unsigned int physical_device_id, - int channel_handle, void *host_buffer, + int channel_handle, const void *host_buffer, size_t write_size, int *status) { size_t buffer_size = 0; size_t pushed; @@ -2242,6 +2251,7 @@ size_t acl_hal_mmd_hostchannel_push(unsigned int physical_device_id, push_buffer = device_info[physical_device_id] .mmd_dispatch->aocl_mmd_hostchannel_get_buffer( pcie_dev_handle, channel_handle, &buffer_size, status); + if ((NULL == push_buffer) || (0 == buffer_size)) { return 0; } @@ -2250,6 +2260,7 @@ size_t acl_hal_mmd_hostchannel_push(unsigned int physical_device_id, buffer_size = (write_size > buffer_size) ? buffer_size : write_size; // Copy the data into the push buffer + safe_memcpy(push_buffer, host_buffer, buffer_size, buffer_size, buffer_size); // Acknowledge host channel MMD that copy of data to its buffer is done @@ -2263,7 +2274,6 @@ size_t acl_hal_mmd_hostchannel_push(unsigned int physical_device_id, // amount of space get buffer said was available are not equal, something went // wrong assert(pushed == buffer_size); - return pushed; } @@ -2854,3 +2864,17 @@ void acl_hal_mmd_simulation_streaming_kernel_done( .mmd_dispatch->aocl_mmd_simulation_streaming_kernel_done( device_info[physical_device_id].handle, kernel_name, finish_counter); } + +size_t acl_hal_mmd_read_csr(unsigned int physical_device_id, uintptr_t offset, + void *ptr, size_t size) { + return device_info[physical_device_id].mmd_dispatch->aocl_mmd_read( + device_info[physical_device_id].handle, NULL, size, (void *)ptr, + kernel_interface, (size_t)offset); +} + +size_t acl_hal_mmd_write_csr(unsigned int physical_device_id, uintptr_t offset, + const void *ptr, size_t size) { + return device_info[physical_device_id].mmd_dispatch->aocl_mmd_write( + device_info[physical_device_id].handle, NULL, size, (const void *)ptr, + kernel_interface, (size_t)offset); +} diff --git a/src/acl_hostch.cpp b/src/acl_hostch.cpp index 0f9580fe..1e38d31f 100644 --- a/src/acl_hostch.cpp +++ b/src/acl_hostch.cpp @@ -14,8 +14,11 @@ // Internal headers. #include #include +#include +#include #include #include +#include #include #ifdef __GNUC__ @@ -24,7 +27,7 @@ /* Local Functions */ static cl_int l_push_packet(unsigned int physical_device_id, int channel_handle, - void *host_buffer, size_t write_size) { + const void *host_buffer, size_t write_size) { size_t pushed_data; int status = 0; @@ -682,6 +685,404 @@ clUnmapHostPipeIntelFPGA(cl_mem pipe, void *mapped_ptr, size_t size_to_unmap, return CL_SUCCESS; } +// Ideally this should be passed from the autodiscovery string. +static constexpr unsigned csr_pipe_address_offet = 8; + +void acl_read_program_hostpipe(void *user_data, acl_device_op_t *op) { + + cl_event event = op->info.event; + cl_int status = 0; + size_t pulled_data = 0; + bool blocking = event->cmd.info.host_pipe_info.blocking; + acl_assert_locked(); + + if (!acl_event_is_valid(event) || + !acl_command_queue_is_valid(event->command_queue)) { + acl_set_device_op_execution_status(op, -1); + return; + } + + acl_device_program_info_t *dev_prog = + event->command_queue->device->loaded_bin->get_dev_prog(); + auto host_pipe_info = dev_prog->program_hostpipe_map.at( + std::string(event->cmd.info.host_pipe_info.logical_name)); + acl_mutex_lock(&(host_pipe_info.m_lock)); + acl_set_device_op_execution_status(op, CL_SUBMITTED); + acl_set_device_op_execution_status(op, CL_RUNNING); + + if (host_pipe_info.implement_in_csr) { + // CSR read, currently only blocking version is implemented + unsigned long long parsed; + uintptr_t data_reg, ready_reg, valid_reg; + // Convert the CSR address to a pointer + try { + parsed = std::stoull(host_pipe_info.csr_address, nullptr); + } catch (const std::exception &) { + + acl_set_device_op_execution_status(op, -1); + return; + } + + data_reg = static_cast(parsed); + ready_reg = static_cast( + parsed + + csr_pipe_address_offet); // ready reg is data reg shift by 8 byte + valid_reg = static_cast( + parsed + + csr_pipe_address_offet * 2); // valid reg is ready reg shift by 8 byte + unsigned ready = 1; + unsigned valid_value; + unsigned *valid_value_pointer = &valid_value; + + // start the CSR read + + // Checking if the data is valid, blocking + do { + acl_get_hal()->read_csr(host_pipe_info.m_physical_device_id, valid_reg, + (void *)valid_value_pointer, + (size_t)sizeof(uintptr_t)); + } while (valid_value != 1); + + pulled_data = + acl_get_hal()->read_csr(host_pipe_info.m_physical_device_id, data_reg, + event->cmd.info.host_pipe_info.ptr, + event->cmd.info.host_pipe_info.size); + // Tell CSR it's ready + acl_get_hal()->write_csr(host_pipe_info.m_physical_device_id, ready_reg, + (void *)&ready, (size_t)sizeof(uintptr_t)); + } else { + // Non CSR Case + pulled_data = acl_get_hal()->hostchannel_pull( + host_pipe_info.m_physical_device_id, host_pipe_info.m_channel_handle, + event->cmd.info.host_pipe_info.ptr, event->cmd.info.host_pipe_info.size, + &status); + + if (!blocking) { + // If it is non-blocking read, we return with the success code right away + if (status != 0 || pulled_data != event->cmd.info.host_pipe_info.size) { + acl_mutex_unlock(&(host_pipe_info.m_lock)); + acl_set_device_op_execution_status(op, -1); + return; + } + } else { + // If it is a blocking read, this call won't return until the kernel + // writes the data into the pipe. + while (status != 0 || + pulled_data != event->cmd.info.host_pipe_info.size) { + pulled_data = acl_get_hal()->hostchannel_pull( + host_pipe_info.m_physical_device_id, + host_pipe_info.m_channel_handle, event->cmd.info.host_pipe_info.ptr, + event->cmd.info.host_pipe_info.size, &status); + } + } + } + + acl_mutex_unlock(&(host_pipe_info.m_lock)); + acl_set_device_op_execution_status(op, CL_COMPLETE); +} + +void acl_write_program_hostpipe(void *user_data, acl_device_op_t *op) { + + cl_int status; + cl_event event = op->info.event; + cl_context context = event->context; + bool blocking = event->cmd.info.host_pipe_info.blocking; + acl_assert_locked(); + + if (!acl_event_is_valid(event) || + !acl_command_queue_is_valid(event->command_queue)) { + acl_set_device_op_execution_status(op, -1); + return; + } + + acl_device_program_info_t *dev_prog = + event->command_queue->device->loaded_bin->get_dev_prog(); + auto host_pipe_info = dev_prog->program_hostpipe_map.at( + std::string(event->cmd.info.host_pipe_info.logical_name)); + acl_mutex_lock(&(host_pipe_info.m_lock)); + acl_set_device_op_execution_status(op, CL_SUBMITTED); + acl_set_device_op_execution_status(op, CL_RUNNING); + + if (host_pipe_info.implement_in_csr) { + // Get CSR address + unsigned long long parsed; + uintptr_t data_reg, valid_reg; + size_t pushed_data; + try { + parsed = std::stoull(host_pipe_info.csr_address, nullptr); + } catch (const std::exception &) { + acl_set_device_op_execution_status(op, -1); + return; + } + data_reg = static_cast(parsed); + valid_reg = static_cast( + parsed + + csr_pipe_address_offet); // valid reg is data reg shift by 8 byte, move + // this to the autodiscovery string maybe + unsigned int valid = 1; + // start the write + pushed_data = + acl_get_hal()->write_csr(host_pipe_info.m_physical_device_id, data_reg, + event->cmd.info.host_pipe_info.write_ptr, + event->cmd.info.host_pipe_info.size); + if (pushed_data != event->cmd.info.host_pipe_info.size) { + acl_mutex_unlock(&(host_pipe_info.m_lock)); + acl_set_device_op_execution_status(op, -1); + return; + } + // Tell CSR it's valid + acl_get_hal()->write_csr(host_pipe_info.m_physical_device_id, valid_reg, + (void *)&valid, (size_t)sizeof(uintptr_t)); + } else { + // Regular hostpipe + // Attempt to write once + status = l_push_packet(host_pipe_info.m_physical_device_id, + host_pipe_info.m_channel_handle, + event->cmd.info.host_pipe_info.write_ptr, + event->cmd.info.host_pipe_info.size); + if (!blocking) { + // If it is non-blocking write, we return with the success/failure code + // right away + if (status != CL_SUCCESS) { + acl_mutex_unlock(&(host_pipe_info.m_lock)); + acl_set_device_op_execution_status(op, -1); + return; + } + } else { + // If it's a blocking write, this function won't return until the write + // success. + while (status != CL_SUCCESS) { + status = l_push_packet(host_pipe_info.m_physical_device_id, + host_pipe_info.m_channel_handle, + event->cmd.info.host_pipe_info.write_ptr, + event->cmd.info.host_pipe_info.size); + } + } + } + acl_mutex_unlock(&(host_pipe_info.m_lock)); + acl_set_device_op_execution_status(op, CL_COMPLETE); +} + +// Submit an op to the device op queue to read hostpipe. +// Return 1 if we made forward progress, 0 otherwise. +cl_int acl_submit_read_program_hostpipe_device_op(cl_event event) { + int result = 0; + acl_assert_locked(); + + // No user-level scheduling blocks this hostpipe read + // So submit it to the device op queue. + // But only if it isn't already enqueued there. + if (!acl_event_is_valid(event)) { + return result; + } + // Already enqueued. + if (event->last_device_op) { + return result; + } + + acl_device_op_queue_t *doq = &(acl_platform.device_op_queue); + acl_device_op_t *last_op = 0; + + // Precautionary, but it also nudges the device scheduler to try + // to free up old operation slots. + acl_forget_proposed_device_ops(doq); + + last_op = acl_propose_device_op(doq, ACL_DEVICE_OP_HOSTPIPE_READ, + event); // TODO Change this to the READ op + + if (last_op) { + // We managed to enqueue everything. + event->last_device_op = last_op; + acl_commit_proposed_device_ops(doq); + result = 1; + } else { + // Back off, and wait until later when we have more space in the + // device op queue. + acl_forget_proposed_device_ops(doq); + } + return result; +} + +// Submit an op to the device op queue to write hostpipe. +// Return 1 if we made forward progress, 0 otherwise. +cl_int acl_submit_write_program_hostpipe_device_op(cl_event event) { + int result = 0; + acl_assert_locked(); + + // No user-level scheduling blocks this hostpipe write op + // So submit it to the device op queue. + // But only if it isn't already enqueued there. + if (!acl_event_is_valid(event)) { + return result; + } + // Already enqueued. + if (event->last_device_op) { + return result; + } + + acl_device_op_queue_t *doq = &(acl_platform.device_op_queue); + acl_device_op_t *last_op = 0; + + // Precautionary, but it also nudges the device scheduler to try + // to free up old operation slots. + acl_forget_proposed_device_ops(doq); + + last_op = acl_propose_device_op(doq, ACL_DEVICE_OP_HOSTPIPE_WRITE, event); + + if (last_op) { + // We managed to enqueue everything. + event->last_device_op = last_op; + acl_commit_proposed_device_ops(doq); + result = 1; + } else { + // Back off, and wait until later when we have more space in the + // device op queue. + acl_forget_proposed_device_ops(doq); + } + return result; +} + +ACL_EXPORT +CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadHostPipeINTEL( + cl_command_queue command_queue, cl_program program, const char *pipe_symbol, + cl_bool blocking_read, void *ptr, size_t size, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) { + + cl_int status = 0; + + // Get context from program, command_queue and event + cl_context context = program->context; + cl_device_id device = command_queue->device; + + std::scoped_lock lock{acl_mutex_wrapper}; + + if (ptr == NULL) { + ERR_RET(CL_INVALID_VALUE, context, + "Invalid pointer was provided to host data"); + } + + if (pipe_symbol == NULL) { + ERR_RET(CL_INVALID_VALUE, context, "Invalid Pipe Symbol"); + } + + acl_device_program_info_t *dev_prog = device->loaded_bin->get_dev_prog(); + + auto search = dev_prog->program_hostpipe_map.find(std::string(pipe_symbol)); + + if (search == dev_prog->program_hostpipe_map.end()) { + ERR_RET(CL_INVALID_VALUE, context, + "Pipe Symbol is not found in the device"); + } + + if (search == dev_prog->program_hostpipe_map.end()) { + + ERR_RET(CL_INVALID_VALUE, context, + "Pipe Symbol is not found in the device"); + } + + cl_event local_event = 0; // used for blocking + + // Create an event/command to actually move the data at the appropriate + // time. + status = + acl_create_event(command_queue, num_events_in_wait_list, event_wait_list, + CL_COMMAND_READ_HOST_PIPE_INTEL, &local_event); + + if (status != CL_SUCCESS) + return status; + + local_event->cmd.info.host_pipe_info.size = size; + local_event->cmd.info.host_pipe_info.ptr = ptr; + local_event->cmd.info.host_pipe_info.blocking = blocking_read; + local_event->cmd.info.host_pipe_info.logical_name = pipe_symbol; + + acl_idle_update( + command_queue + ->context); // If nothing's blocking, then complete right away + + if (blocking_read) { + status = clWaitForEvents(1, &local_event); + } + + if (event) { + *event = local_event; + } else { + // User didn't care, so forget about the event. + clReleaseEvent(local_event); + acl_idle_update(command_queue->context); // Clean up early + } + + return CL_SUCCESS; +} + +ACL_EXPORT +CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteHostPipeINTEL( + cl_command_queue command_queue, cl_program program, const char *pipe_symbol, + cl_bool blocking_write, const void *ptr, size_t size, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) { + + cl_int status = 0; + // Get context from program, command_queue and event + cl_context context = program->context; + cl_device_id device = command_queue->device; + + std::scoped_lock lock{acl_mutex_wrapper}; + + if (ptr == NULL) { + ERR_RET(CL_INVALID_VALUE, context, + "Invalid pointer was provided to host data"); + } + + if (pipe_symbol == NULL) { + ERR_RET(CL_INVALID_VALUE, context, "Invalid Pipe Symbol"); + } + + acl_device_program_info_t *dev_prog = device->loaded_bin->get_dev_prog(); + + auto search = dev_prog->program_hostpipe_map.find(std::string(pipe_symbol)); + + if (search == dev_prog->program_hostpipe_map.end()) { + + ERR_RET(CL_INVALID_VALUE, context, + "Pipe Symbol is not found in the device"); + } + + cl_event local_event = 0; // used for blocking + + // Create an event/command to actually move the data at the appropriate time. + status = + acl_create_event(command_queue, num_events_in_wait_list, event_wait_list, + CL_COMMAND_WRITE_HOST_PIPE_INTEL, &local_event); + + if (status != CL_SUCCESS) + return status; + + local_event->cmd.info.host_pipe_info.size = size; + local_event->cmd.info.host_pipe_info.write_ptr = ptr; + local_event->cmd.info.host_pipe_info.blocking = blocking_write; + local_event->cmd.info.host_pipe_info.logical_name = pipe_symbol; + + acl_idle_update( + command_queue + ->context); // If nothing's blocking, then complete right away + + if (blocking_write) { + status = clWaitForEvents(1, &local_event); + } + + if (event) { + *event = local_event; + } else { + // User didn't care, so forget about the event. + clReleaseEvent(local_event); + acl_idle_update(command_queue->context); // Clean up early + } + + return CL_SUCCESS; +} + #ifdef __GNUC__ #pragma GCC visibility pop #endif diff --git a/src/acl_icd_dispatch.cpp b/src/acl_icd_dispatch.cpp index 18de2b3a..2e16b512 100644 --- a/src/acl_icd_dispatch.cpp +++ b/src/acl_icd_dispatch.cpp @@ -50,6 +50,8 @@ clGetExtensionFunctionAddressIntelFPGA(const char *func_name) { ADDFUNCTIONLOOKUP(clResetKernelsIntelFPGA); ADDFUNCTIONLOOKUP(clSetBoardLibraryIntelFPGA); ADDFUNCTIONLOOKUP(clCreateBufferWithPropertiesINTEL); + ADDFUNCTIONLOOKUP(clEnqueueReadHostPipeINTEL); + ADDFUNCTIONLOOKUP(clEnqueueWriteHostPipeINTEL); // USM APIs are not currently supported on 32bit devices #ifndef __arm__ diff --git a/src/acl_platform.cpp b/src/acl_platform.cpp index 514c8642..492987fc 100644 --- a/src/acl_platform.cpp +++ b/src/acl_platform.cpp @@ -282,7 +282,8 @@ const char *acl_platform_extensions() { #endif " cl_intel_create_buffer_with_properties" " cl_intel_mem_channel_property" - " cl_intel_mem_alloc_buffer_location"; + " cl_intel_mem_alloc_buffer_location" + " cl_intel_program_scope_host_pipe"; } // Initialize the internal bookkeeping based on the system definition diff --git a/src/acl_profiler.cpp b/src/acl_profiler.cpp index f3608000..2b0510d2 100644 --- a/src/acl_profiler.cpp +++ b/src/acl_profiler.cpp @@ -836,6 +836,10 @@ int acl_process_profiler_scan_chain(acl_device_op_t *op) { snprintf(name, MAX_NAME_SIZE, ".mem_migration"); } else if (op_type == ACL_DEVICE_OP_USM_MEMCPY) { snprintf(name, MAX_NAME_SIZE, ".usm_memcpy"); + } else if (op_type == ACL_DEVICE_OP_HOSTPIPE_READ) { + snprintf(name, MAX_NAME_SIZE, ".hostpipe_read"); + } else if (op_type == ACL_DEVICE_OP_HOSTPIPE_WRITE) { + snprintf(name, MAX_NAME_SIZE, ".hostpipe_write"); } else { // Ignore unknown op_type (don't attempt to extract any profiling from it or // get timestamps) diff --git a/src/acl_program.cpp b/src/acl_program.cpp index 32ed55c0..5d13dfc2 100644 --- a/src/acl_program.cpp +++ b/src/acl_program.cpp @@ -107,6 +107,10 @@ static void l_try_to_eagerly_program_device(cl_program program); static void l_device_memory_definition_copy(acl_device_def_autodiscovery_t *dest_dev, acl_device_def_autodiscovery_t *src_dev); +static cl_int +l_register_hostpipes_to_program(acl_device_program_info_t *dev_prog, + unsigned int physical_device_id, + cl_context context); ////////////////////////////// // OpenCL API @@ -428,6 +432,11 @@ CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinaryIntelFPGA( l_try_to_eagerly_program_device(program); + // Register the program scoped hostpipe to each dev_prog + for (idev = 0; idev < num_devices; idev++) { + l_register_hostpipes_to_program(program->dev_prog[idev], idev, context); + } + return program; } @@ -1303,9 +1312,69 @@ l_create_dev_prog(cl_program program, cl_device_id device, size_t binary_len, return result; } +// Loop through auto-discovery string and store program scope hostpipe +// information in the device program info +static cl_int +l_register_hostpipes_to_program(acl_device_program_info_t *dev_prog, + unsigned int physical_device_id, + cl_context context) { + + host_pipe_t host_pipe_info; + + for (const auto &hostpipe : dev_prog->device_binary.get_devdef() + .autodiscovery_def.hostpipe_mappings) { + // Skip if the hostpipe is already registered in the program + auto search = dev_prog->program_hostpipe_map.find(hostpipe.logical_name); + if (search != dev_prog->program_hostpipe_map.end()) { + continue; + } + host_pipe_t host_pipe_info; + host_pipe_info.m_physical_device_id = physical_device_id; + if (hostpipe.is_read && hostpipe.is_write) { + ERR_RET(CL_INVALID_OPERATION, context, + "Hostpipes don't allow both read and write operations from the " + "host."); + } + if (!hostpipe.is_read && !hostpipe.is_write) { + ERR_RET(CL_INVALID_OPERATION, context, + "The hostpipe direction is not set."); + } + + if (hostpipe.implement_in_csr) { + // CSR hostpipe read and write from the given CSR address directly + host_pipe_info.implement_in_csr = true; + host_pipe_info.csr_address = hostpipe.csr_address; + // CSR pipe doesn't use m_channel_handle but we want to have it + // initialized. + host_pipe_info.m_channel_handle = -1; + } else { + host_pipe_info.implement_in_csr = false; + host_pipe_info.m_channel_handle = acl_get_hal()->hostchannel_create( + physical_device_id, (char *)hostpipe.physical_name.c_str(), + hostpipe.pipe_depth, hostpipe.pipe_width, + hostpipe.is_read); // If it's a read pipe, pass 1 to the + // hostchannel_create, which is HOST_TO_DEVICE + if (host_pipe_info.m_channel_handle <= 0) { + return CL_INVALID_VALUE; + } + } + acl_mutex_init(&(host_pipe_info.m_lock), NULL); + // The following property is not used by the program scoped hostpipe but we + // don't want to leave it uninitialized + host_pipe_info.binded = false; + host_pipe_info.m_binded_kernel = NULL; + host_pipe_info.size_buffered = 0; + + dev_prog->program_hostpipe_map[hostpipe.logical_name] = host_pipe_info; + } + + return CL_SUCCESS; +} + static cl_int l_build_program_for_device(cl_program program, unsigned int dev_idx, const char *options) { + acl_device_program_info_t *dev_prog = 0; cl_context context; int build_status; // CL_BUILD_IN_PROGRESS, CL_BUILD_ERROR, or @@ -1318,7 +1387,6 @@ static cl_int l_build_program_for_device(cl_program program, if (!program->source_text) { // Program was created from binary. dev_prog = program->dev_prog[dev_idx]; - // User might have provided a bad binary (e.g. random bytes). // Need to check that once we can. // So we can only do a NULL check, but diff --git a/test/acl_auto_configure_test.cpp b/test/acl_auto_configure_test.cpp index b52d710d..8bd52295 100644 --- a/test/acl_auto_configure_test.cpp +++ b/test/acl_auto_configure_test.cpp @@ -1487,3 +1487,84 @@ TEST(auto_configure, cra_ring_root_exist) { CHECK_EQUAL(1, devdef.cra_ring_root_exist); } + +TEST(auto_configure, hostpipe_mappings) { + const std::string config_str{ + "23 66 " RANDOM_HASH + " pac_a10 0 1 13 DDR 2 2 24 1 2 0 4294967296 4294967296 8589934592 0 - 0 " + "0 0 0 0 0 1 5 8 pipe_logical_name1 pipe_physical_name1 1 12345 0 1 4 10 " + "pipe_logical_name2 pipe_physical_name2 0 12323 1 0 8 20 " + "pipe_logical_name3 " + "pipe_physical_name1 1 12313 0 1 4 10 pipe_logical_name5 " + "pipe_physical_name1 0 " + "12316 1 0 8 20 pipe_logical_name4 pipe_physical_name3 0 12342 0 1 4 10 " + "3 90 " + "_ZTS3CRCILi0EE 512 256 1 0 0 1 0 1 0 9 6 0 0 8 1 0 0 6 2 1 8 1024 0 3 6 " + "0 0 8 1 0 0 6 0 0 8 1 0 0 6 0 0 8 1 0 0 6 2 1 8 1024 0 2 6 0 0 8 1 0 0 " + "6 0 0 8 1 0 0 6 0 0 8 1 0 0 0 0 1 2 64 4096 1 1 1 3 1 1 1 3 1 0 64 " + "_ZTS11LZReductionILi0EE 0 256 1 0 0 0 0 1 0 5 6 0 0 8 1 0 0 6 2 1 8 " + "1024 0 3 6 0 0 8 1 0 0 6 0 0 8 1 0 0 6 0 0 8 1 0 0 0 0 2 2 64 131072 65 " + "32768 1 1 1 3 1 1 1 3 1 0 125 _ZTS13StaticHuffmanILi0EE 256 256 1 0 0 1 " + "0 1 0 10 6 0 0 8 1 0 0 6 0 0 4 1 0 0 6 2 1 8 1024 0 2 6 0 0 8 1 0 0 6 0 " + "0 8 1 0 0 6 0 0 8 1 0 0 6 2 1 8 1024 0 2 6 0 0 8 1 0 0 6 0 0 8 1 0 0 6 " + "0 0 8 1 0 0 0 0 15 2 64 116 65 116 66 1152 67 512 68 256 69 120 70 120 " + "71 1152 72 116 73 1152 74 512 75 256 76 120 77 120 78 1152 1 1 1 3 1 1 " + "1 3 1 0"}; + + acl_device_def_autodiscovery_t devdef; + { + bool result; + std::string err_str; + ACL_LOCKED(result = + acl_load_device_def_from_str(config_str, devdef, err_str)); + std::cerr << err_str; + CHECK(result); + } + + CHECK_EQUAL(5, devdef.hostpipe_mappings.size()); + + CHECK(devdef.hostpipe_mappings[0].logical_name == "pipe_logical_name1"); + CHECK(devdef.hostpipe_mappings[0].physical_name == "pipe_physical_name1"); + CHECK(devdef.hostpipe_mappings[0].implement_in_csr); + CHECK(devdef.hostpipe_mappings[0].csr_address == "12345"); + CHECK(!devdef.hostpipe_mappings[0].is_read); + CHECK(devdef.hostpipe_mappings[0].is_write); + CHECK(devdef.hostpipe_mappings[0].pipe_width == 4); + CHECK(devdef.hostpipe_mappings[0].pipe_depth == 10); + + CHECK(devdef.hostpipe_mappings[1].logical_name == "pipe_logical_name2"); + CHECK(devdef.hostpipe_mappings[1].physical_name == "pipe_physical_name2"); + CHECK(!devdef.hostpipe_mappings[1].implement_in_csr); + CHECK(devdef.hostpipe_mappings[1].csr_address == "12323"); + CHECK(devdef.hostpipe_mappings[1].is_read); + CHECK(!devdef.hostpipe_mappings[1].is_write); + CHECK(devdef.hostpipe_mappings[1].pipe_width == 8); + CHECK(devdef.hostpipe_mappings[1].pipe_depth == 20); + + CHECK(devdef.hostpipe_mappings[2].logical_name == "pipe_logical_name3"); + CHECK(devdef.hostpipe_mappings[2].physical_name == "pipe_physical_name1"); + CHECK(devdef.hostpipe_mappings[2].implement_in_csr); + CHECK(devdef.hostpipe_mappings[2].csr_address == "12313"); + CHECK(!devdef.hostpipe_mappings[2].is_read); + CHECK(devdef.hostpipe_mappings[2].is_write); + CHECK(devdef.hostpipe_mappings[2].pipe_width == 4); + CHECK(devdef.hostpipe_mappings[2].pipe_depth == 10); + + CHECK(devdef.hostpipe_mappings[3].logical_name == "pipe_logical_name5"); + CHECK(devdef.hostpipe_mappings[3].physical_name == "pipe_physical_name1"); + CHECK(!devdef.hostpipe_mappings[3].implement_in_csr); + CHECK(devdef.hostpipe_mappings[3].csr_address == "12316"); + CHECK(devdef.hostpipe_mappings[3].is_read); + CHECK(!devdef.hostpipe_mappings[3].is_write); + CHECK(devdef.hostpipe_mappings[3].pipe_width == 8); + CHECK(devdef.hostpipe_mappings[3].pipe_depth == 20); + + CHECK(devdef.hostpipe_mappings[4].logical_name == "pipe_logical_name4"); + CHECK(devdef.hostpipe_mappings[4].physical_name == "pipe_physical_name3"); + CHECK(!devdef.hostpipe_mappings[4].implement_in_csr); + CHECK(devdef.hostpipe_mappings[4].csr_address == "12342"); + CHECK(!devdef.hostpipe_mappings[4].is_read); + CHECK(devdef.hostpipe_mappings[4].is_write); + CHECK(devdef.hostpipe_mappings[4].pipe_width == 4); + CHECK(devdef.hostpipe_mappings[4].pipe_depth == 10); +} diff --git a/test/acl_device_op_test.cpp b/test/acl_device_op_test.cpp index 05f7b16a..728a39cc 100644 --- a/test/acl_device_op_test.cpp +++ b/test/acl_device_op_test.cpp @@ -277,6 +277,7 @@ TEST(device_op, conflict_type) { op = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_NONE, 0); CHECK(op); + assert(op != NULL); CHECK_EQUAL(ACL_CONFLICT_NONE, acl_device_op_conflict_type(op)); @@ -347,6 +348,7 @@ TEST(device_op, submit_action) { acl_device_op_t *op = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_NONE, 0); acl_commit_proposed_device_ops(&m_doq); CHECK(op); + assert(op != NULL); // Already submitted because committing nudges device opthe scheduler. CHECK_EQUAL(CL_SUBMITTED, op->status); @@ -443,6 +445,7 @@ TEST(device_op, post_status) { CHECK(e); acl_device_op_t *op = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_NONE, e); + assert(op != NULL); CHECK_EQUAL(e, op->info.event); e->execution_status = CL_COMPLETE; @@ -470,6 +473,7 @@ TEST(device_op, post_status) { cl_event e2 = clCreateUserEvent(m_context, 0); acl_device_op_t *op2 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_NONE, e2); + assert(op2 != NULL); op2->timestamp[CL_COMPLETE] = 13; op2->execution_status = -5; @@ -496,6 +500,11 @@ TEST(device_op, err_status) { acl_device_op_t *op0 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_NONE, e); acl_device_op_t *op1 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_NONE, e); acl_device_op_t *op2 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_NONE, e); + + assert(op0 != NULL); + assert(op1 != NULL); + assert(op2 != NULL); + CHECK_EQUAL(e, op0->info.event); CHECK_EQUAL(e, op1->info.event); CHECK_EQUAL(e, op2->info.event); @@ -593,6 +602,7 @@ TEST(device_op, exhaust) { (unsigned)i) : acl_propose_device_op(&m_doq, ACL_DEVICE_OP_NONE, e); CHECK(op); + assert(op != NULL); ops[i] = op; CHECK(op); acl_print_debug_msg( @@ -847,6 +857,8 @@ TEST(device_op, prune) { acl_device_op_t *op0 = acl_propose_device_op(doq, ACL_DEVICE_OP_NONE, e0); acl_device_op_t *op1 = acl_propose_device_op(doq, ACL_DEVICE_OP_NONE, e0); + assert(op0 != NULL); + assert(op1 != NULL); acl_commit_proposed_device_ops(doq); CHECK_EQUAL(0, doq->first_live); @@ -938,6 +950,7 @@ TEST(device_op, inter_group_blocking) { acl_device_op_t *op0 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_MEM_TRANSFER_READ, 0); CHECK(op0); + assert(op0 != NULL); acl_commit_proposed_device_ops(&m_doq); acl_device_op_t *op1 = @@ -945,6 +958,8 @@ TEST(device_op, inter_group_blocking) { acl_device_op_t *op3 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_KERNEL, 0); CHECK(op1); CHECK(op3); + assert(op1 != NULL); + assert(op3 != NULL); acl_commit_proposed_device_ops(&m_doq); acl_update_device_op_queue(&m_doq); @@ -1002,6 +1017,7 @@ TEST(device_op, inter_group_all_conflict_types) { // where it belongs. acl_device_op_t *k00 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_KERNEL, 0); + assert(k00 != NULL); acl_commit_proposed_device_ops(&m_doq); CHECK_EQUAL(CL_SUBMITTED, k00->status); @@ -1011,7 +1027,9 @@ TEST(device_op, inter_group_all_conflict_types) { acl_device_op_t *p1 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_REPROGRAM, 0); + assert(p1 != NULL); acl_device_op_t *k10 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_KERNEL, 0); + assert(k10 != NULL); acl_commit_proposed_device_ops(&m_doq); CHECK_EQUAL(CL_RUNNING, k00->status); @@ -1019,6 +1037,7 @@ TEST(device_op, inter_group_all_conflict_types) { CHECK_EQUAL(CL_QUEUED, k10->status); acl_device_op_t *k11 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_KERNEL, 0); + assert(k11 != NULL); acl_commit_proposed_device_ops(&m_doq); CHECK_EQUAL(CL_RUNNING, k00->status); @@ -1053,10 +1072,13 @@ TEST(device_op, inter_group_concurrent_kernels) { acl_device_op_t *op0 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_KERNEL, 0); CHECK(op0); + assert(op0 != NULL); acl_commit_proposed_device_ops(&m_doq); acl_device_op_t *op1 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_KERNEL, 0); acl_device_op_t *op2 = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_KERNEL, 0); + assert(op1 != NULL); + assert(op2 != NULL); CHECK(op1); CHECK(op2); acl_commit_proposed_device_ops(&m_doq); @@ -1397,6 +1419,9 @@ TEST(device_op, full_duplex) { acl_commit_proposed_device_ops(&m_doq); acl_device_op_t *op_rw = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_MEM_TRANSFER_COPY, event); + assert(op_read != NULL); + assert(op_write != NULL); + assert(op_rw != NULL); acl_commit_proposed_device_ops(&m_doq); acl_update_device_op_queue(&m_doq); CHECK_EQUAL(CL_SUBMITTED, op_read->status); @@ -1426,9 +1451,11 @@ TEST(device_op, full_duplex) { acl_set_device_op_execution_status(op_rw, CL_RUNNING); op_read = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_MEM_TRANSFER_READ, event); + assert(op_read != NULL); acl_commit_proposed_device_ops(&m_doq); op_write = acl_propose_device_op(&m_doq, ACL_DEVICE_OP_MEM_TRANSFER_WRITE, event); + assert(op_write != NULL); acl_commit_proposed_device_ops(&m_doq); acl_update_device_op_queue(&m_doq); CHECK_EQUAL(CL_RUNNING, op_rw->status); @@ -1480,8 +1507,10 @@ TEST(device_op, multi_device_rw_conflict) { event = &myevents[k]; acl_device_op_t *op1 = acl_propose_device_op(&m_doq, ops[i], event); + assert(op1 != NULL); acl_commit_proposed_device_ops(&m_doq); acl_device_op_t *op2 = acl_propose_device_op(&m_doq, ops[j], event); + assert(op2 != NULL); acl_commit_proposed_device_ops(&m_doq); acl_update_device_op_queue(&m_doq); CHECK_EQUAL(CL_SUBMITTED, op1->status);