diff --git a/include/acl.h b/include/acl.h index 57de815d..b3e6d6f2 100644 --- a/include/acl.h +++ b/include/acl.h @@ -542,6 +542,9 @@ struct acl_hostpipe_mapping { int protocol = -1; // avalon_streaming = 0, avalon_streaming_uses_ready = 1 // avalon_mm = 2, avalon_mm_uses_ready = 3 + + // Introduced in 2024.2 + int is_stall_free = -1; // -1 means unset, set value is 0 or 1; }; // Mapping of sideband signals to logical pipe diff --git a/include/acl_types.h b/include/acl_types.h index f537a6cb..a3556da5 100644 --- a/include/acl_types.h +++ b/include/acl_types.h @@ -349,6 +349,9 @@ typedef struct host_pipe_struct { // Sideband signals vector std::vector side_band_signals_vector; + // Introduced in 2024.2 + int is_stall_free = -1; // -1 means unset, set value is 0 or 1; + } host_pipe_t; // The device-specific information about a program. diff --git a/src/acl_auto_configure.cpp b/src/acl_auto_configure.cpp index 6032a81e..219d0bb1 100644 --- a/src/acl_auto_configure.cpp +++ b/src/acl_auto_configure.cpp @@ -663,11 +663,21 @@ static bool read_hostpipe_mappings( // Start from 2024.0, there is a new field called protocol in the // auto-discovery string + // This field isn't currently being used by the Runtime. + // It is reserved for the future when new protocols are + // supported and the Runtime needs to differentiate. if (result && counters.back() > 0) { result = result && read_int_counters(config_str, curr_pos, mapping.protocol, counters); } + // Start from 2024.2, there is a new field called is_stall_free in the + // auto-discovery string + if (result && counters.back() > 0) { + result = result && read_int_counters(config_str, curr_pos, + mapping.is_stall_free, counters); + } + hostpipe_mappings.emplace_back(mapping); while (result && counters.back() > 0) { diff --git a/src/acl_hostch.cpp b/src/acl_hostch.cpp index 406f339f..1ca6cd38 100644 --- a/src/acl_hostch.cpp +++ b/src/acl_hostch.cpp @@ -823,9 +823,24 @@ void acl_read_program_hostpipe(void *user_data, acl_device_op_t *op) { acl_set_device_op_execution_status(op, CL_RUNNING); if (host_pipe_info.implement_in_csr) { - // CSR read, currently only blocking version is implemented + // Here is the logic for CSR pipe read + // Compiler initializes ready register to 1, if ready register exist + // Non-Blocking uses_ready + // 1. if ready == 1, fail. + // 2. Read data. + // 3. write 1 to ready. + + // Blocking uses_ready + // 1. wait until ready = 0. + // 2. read data. + // 3. write 1 to ready. + + // uses_ready + // Both Blocking and NonBlocking + // 1. Read data (always succeeds) + unsigned long long parsed; - uintptr_t data_reg, ready_reg, valid_reg; + uintptr_t data_reg, ready_reg; // Convert the CSR address to a pointer try { parsed = std::stoull(host_pipe_info.csr_address, nullptr); @@ -839,27 +854,21 @@ void acl_read_program_hostpipe(void *user_data, acl_device_op_t *op) { ready_reg = static_cast( parsed + csr_pipe_address_offet); // ready reg is data reg shift by 8 byte - valid_reg = static_cast( - parsed + - csr_pipe_address_offet * 2); // valid reg is ready reg shift by 8 byte unsigned ready = 1; - unsigned valid_value; - unsigned *valid_value_pointer = &valid_value; + unsigned ready_value; + unsigned *ready_value_pointer = &ready_value; - // protocol 3 is the avalon_mm_uses_ready protocol - // Only this uses_ready protocol requires reading/writing to ready&valid - // signals - if (host_pipe_info.protocol == 3) { - // If Blocking, wait until the data is valid. - // If Non-blocking, just read once and report failure if not valid. + if (host_pipe_info.is_stall_free == 0) { + // If Blocking, wait until the ready register = 0 + // If Non-blocking, just read once and report failure if ready == 1 do { - acl_get_hal()->read_csr(host_pipe_info.m_physical_device_id, valid_reg, - (void *)valid_value_pointer, + acl_get_hal()->read_csr(host_pipe_info.m_physical_device_id, ready_reg, + (void *)ready_value_pointer, (size_t)sizeof(uintptr_t)); - } while (blocking && valid_value != 1); + } while (blocking && ready_value != 0); - // If non-blocking and valid bit is not set, set the op to fail. - if (!blocking && valid_value == 0) { + // If non-blocking and ready bit is 1, set the op to fail. + if (!blocking && ready_value == 1) { acl_mutex_unlock(&(host_pipe_info.m_lock)); acl_set_device_op_execution_status(op, -1); return; @@ -875,9 +884,8 @@ void acl_read_program_hostpipe(void *user_data, acl_device_op_t *op) { acl_set_device_op_execution_status(op, -1); return; } - // Tell CSR it's ready - // Same reason as above, only avalon_mm_uses_ready needs to do this. - if (host_pipe_info.protocol == 3) { + // Tell CSR it's ready if ready register exist + if (host_pipe_info.is_stall_free == 0) { acl_get_hal()->write_csr(host_pipe_info.m_physical_device_id, ready_reg, (void *)&ready, (size_t)sizeof(uintptr_t)); } @@ -968,10 +976,19 @@ void acl_write_program_hostpipe(void *user_data, acl_device_op_t *op) { // Get CSR address // Here is the logic for CSR pipe write - // 1. If blocking, read valid reg, wait until valid is 0. - // 2. If non-blocking, read valid reg once ->return failure if valid is 1. - // 3. write to the pipe. - // 4. write 1 to the valid. + // Blocking uses_valid: + // 1. read valid reg, wait until valid is 0 + // 2. write to the pipe. + // 3. write 1 to the valid. + + // Non-blocking uses_valid + // 1. read valid reg once ->return failure if valid is 1 + // 2. write to the pipe. + // 3. write 1 to the valid. + + // uses_valid + // Both Blocking and NonBlocking + // 1. Write data (always succeeds) unsigned long long parsed; uintptr_t data_reg, valid_reg; @@ -990,23 +1007,24 @@ void acl_write_program_hostpipe(void *user_data, acl_device_op_t *op) { unsigned valid_value = 1; unsigned *valid_value_pointer = &valid_value; - if (blocking) { - // Wait until the valid reg is 0, before the write. - while (valid_value != 0) { + if (host_pipe_info.is_stall_free == 0) { + if (blocking) { + while (valid_value != 0) { + acl_get_hal()->read_csr(host_pipe_info.m_physical_device_id, + valid_reg, (void *)valid_value_pointer, + (size_t)sizeof(uintptr_t)); + } + } else { + // Non-blocking, if valid reg is 1, return failure. acl_get_hal()->read_csr(host_pipe_info.m_physical_device_id, valid_reg, (void *)valid_value_pointer, (size_t)sizeof(uintptr_t)); - } - } else { - // Non-blocking, if valid reg is 1, return failure. - acl_get_hal()->read_csr(host_pipe_info.m_physical_device_id, valid_reg, - (void *)valid_value_pointer, - (size_t)sizeof(uintptr_t)); - if (valid_value == 1) { - acl_mutex_unlock(&(host_pipe_info.m_lock)); - acl_set_device_op_execution_status(op, -1); - return; + if (valid_value == 1) { + acl_mutex_unlock(&(host_pipe_info.m_lock)); + acl_set_device_op_execution_status(op, -1); + return; + } } } @@ -1015,19 +1033,18 @@ void acl_write_program_hostpipe(void *user_data, acl_device_op_t *op) { host_pipe_info.m_physical_device_id, data_reg, event->cmd.info.host_pipe_dynamic_info.write_ptr, event->cmd.info.host_pipe_dynamic_info.size); + if (status != 0) { acl_mutex_unlock(&(host_pipe_info.m_lock)); acl_set_device_op_execution_status(op, -1); return; } - // For now, we trust the AVALON_MM by default uses valid. - // TODO: fix this later by using the new protocol info - // provided by the compiler. - - const unsigned valid = 1; - acl_get_hal()->write_csr(host_pipe_info.m_physical_device_id, valid_reg, - (void *)&valid, (size_t)sizeof(uintptr_t)); + if (host_pipe_info.is_stall_free == 0) { + const unsigned valid = 1; + acl_get_hal()->write_csr(host_pipe_info.m_physical_device_id, valid_reg, + (void *)&valid, (size_t)sizeof(uintptr_t)); + } } else { // Regular hostpipe diff --git a/src/acl_program.cpp b/src/acl_program.cpp index e4698650..86929df2 100644 --- a/src/acl_program.cpp +++ b/src/acl_program.cpp @@ -1364,6 +1364,7 @@ l_register_hostpipes_to_program(acl_device_program_info_t *dev_prog, } } host_pipe_info.protocol = hostpipe.protocol; + host_pipe_info.is_stall_free = hostpipe.is_stall_free; acl_mutex_init(&(host_pipe_info.m_lock), NULL); // The following property is not used by the program scoped hostpipe but we // don't want to leave it uninitialized diff --git a/test/acl_auto_configure_test.cpp b/test/acl_auto_configure_test.cpp index fb3adfd1..e763ffae 100644 --- a/test/acl_auto_configure_test.cpp +++ b/test/acl_auto_configure_test.cpp @@ -1496,14 +1496,14 @@ TEST(auto_configure, cra_ring_root_exist) { TEST(auto_configure, hostpipe_mappings) { const std::string config_str{ - "23 71 " RANDOM_HASH + "23 76 " RANDOM_HASH " pac_a10 0 1 13 DDR 2 2 24 1 2 0 4294967296 4294967296 8589934592 0 - 0 " - "0 0 0 0 0 1 5 9 " // 5 Hostpipes, 9 in each mapping - "pipe_logical_name1 pipe_physical_name1 1 12345 0 1 4 10 0 " - "pipe_logical_name2 pipe_physical_name2 0 12323 1 0 8 20 1 " - "pipe_logical_name3 pipe_physical_name1 1 12313 0 1 4 10 2 " - "pipe_logical_name5 pipe_physical_name1 0 12316 1 0 8 20 3 " - "pipe_logical_name4 pipe_physical_name3 0 12342 0 1 4 10 3 " + "0 0 0 0 0 1 5 10 " // 5 Hostpipes, 10 in each mapping + "pipe_logical_name1 pipe_physical_name1 1 12345 0 1 4 10 0 0 " + "pipe_logical_name2 pipe_physical_name2 0 12323 1 0 8 20 1 1 " + "pipe_logical_name3 pipe_physical_name1 1 12313 0 1 4 10 2 0 " + "pipe_logical_name5 pipe_physical_name1 0 12316 1 0 8 20 3 1 " + "pipe_logical_name4 pipe_physical_name3 0 12342 0 1 4 10 3 0 " "3 90 " "_ZTS3CRCILi0EE 512 256 1 0 0 1 0 1 0 9 6 0 0 8 1 0 0 6 2 1 8 1024 0 3 6 " "0 0 8 1 0 0 6 0 0 8 1 0 0 6 0 0 8 1 0 0 6 2 1 8 1024 0 2 6 0 0 8 1 0 0 " @@ -1538,6 +1538,7 @@ TEST(auto_configure, hostpipe_mappings) { CHECK(devdef.hostpipe_mappings[0].pipe_width == 4); CHECK(devdef.hostpipe_mappings[0].pipe_depth == 10); CHECK(devdef.hostpipe_mappings[0].protocol == 0); + CHECK(devdef.hostpipe_mappings[0].is_stall_free == 0); CHECK(devdef.hostpipe_mappings[1].logical_name == "pipe_logical_name2"); CHECK(devdef.hostpipe_mappings[1].physical_name == "pipe_physical_name2"); @@ -1548,6 +1549,7 @@ TEST(auto_configure, hostpipe_mappings) { CHECK(devdef.hostpipe_mappings[1].pipe_width == 8); CHECK(devdef.hostpipe_mappings[1].pipe_depth == 20); CHECK(devdef.hostpipe_mappings[1].protocol == 1); + CHECK(devdef.hostpipe_mappings[1].is_stall_free == 1); CHECK(devdef.hostpipe_mappings[2].logical_name == "pipe_logical_name3"); CHECK(devdef.hostpipe_mappings[2].physical_name == "pipe_physical_name1"); @@ -1558,6 +1560,7 @@ TEST(auto_configure, hostpipe_mappings) { CHECK(devdef.hostpipe_mappings[2].pipe_width == 4); CHECK(devdef.hostpipe_mappings[2].pipe_depth == 10); CHECK(devdef.hostpipe_mappings[2].protocol == 2); + CHECK(devdef.hostpipe_mappings[2].is_stall_free == 0); CHECK(devdef.hostpipe_mappings[3].logical_name == "pipe_logical_name5"); CHECK(devdef.hostpipe_mappings[3].physical_name == "pipe_physical_name1"); @@ -1568,6 +1571,7 @@ TEST(auto_configure, hostpipe_mappings) { CHECK(devdef.hostpipe_mappings[3].pipe_width == 8); CHECK(devdef.hostpipe_mappings[3].pipe_depth == 20); CHECK(devdef.hostpipe_mappings[3].protocol == 3); + CHECK(devdef.hostpipe_mappings[3].is_stall_free == 1); CHECK(devdef.hostpipe_mappings[4].logical_name == "pipe_logical_name4"); CHECK(devdef.hostpipe_mappings[4].physical_name == "pipe_physical_name3"); @@ -1578,18 +1582,19 @@ TEST(auto_configure, hostpipe_mappings) { CHECK(devdef.hostpipe_mappings[4].pipe_width == 4); CHECK(devdef.hostpipe_mappings[4].pipe_depth == 10); CHECK(devdef.hostpipe_mappings[4].protocol == 3); + CHECK(devdef.hostpipe_mappings[4].is_stall_free == 0); } TEST(auto_configure, sideband_mappings) { const std::string config_str{ - "23 102 " RANDOM_HASH + "23 107 " RANDOM_HASH " pac_a10 0 1 13 DDR 2 2 24 1 2 0 4294967296 4294967296 8589934592 0 - 0 " - "0 0 0 0 0 1 5 9 " // 5 Hostpipes, 9 in each mapping - "pipe_logical_name1 pipe_physical_name1 1 12345 0 1 4 10 0 " - "pipe_logical_name2 pipe_physical_name2 0 12323 1 0 8 20 1 " - "pipe_logical_name3 pipe_physical_name1 1 12313 0 1 4 10 2 " - "pipe_logical_name5 pipe_physical_name1 0 12316 1 0 8 20 3 " - "pipe_logical_name4 pipe_physical_name3 0 12342 0 1 4 10 3 " + "0 0 0 0 0 1 5 10 " // 5 Hostpipes, 10 in each mapping + "pipe_logical_name1 pipe_physical_name1 1 12345 0 1 4 10 0 0 " + "pipe_logical_name2 pipe_physical_name2 0 12323 1 0 8 20 1 1 " + "pipe_logical_name3 pipe_physical_name1 1 12313 0 1 4 10 2 0 " + "pipe_logical_name5 pipe_physical_name1 0 12316 1 0 8 20 3 1 " + "pipe_logical_name4 pipe_physical_name3 0 12342 0 1 4 10 3 0 " "2 " // 2 Sideband groups "pipe_logical_name1 4 3 0 0 320 1 320 8 2 328 8 3 352 32 " "pipe_logical_name2 4 3 0 0 320 1 320 8 2 328 8 3 352 32 "