From 5c1df40ac8ab0bd745659ee0dad5ae670a660d42 Mon Sep 17 00:00:00 2001 From: Sophie Mao Date: Wed, 1 May 2024 13:55:34 -0700 Subject: [PATCH] Fix USM mem blocking free corruption If the event being waited on is completed and deleted during the traversal of command_queue->{commands|inorder_commands}, this would lead to corruption of the corresponding data structure. This change fixes this issue by deferring the deletion till the end of the current iteration. --- include/acl_types.h | 7 +++ src/acl_command_queue.cpp | 18 ++++++-- src/acl_usm.cpp | 90 +++++++++++++++++++++++++-------------- 3 files changed, 79 insertions(+), 36 deletions(-) diff --git a/include/acl_types.h b/include/acl_types.h index 9a437b77..af1cca53 100644 --- a/include/acl_types.h +++ b/include/acl_types.h @@ -744,6 +744,10 @@ typedef struct _cl_event { int is_on_device_op_queue; // bool whether this event is submitted to DevQ bool support_profiling; // bool whether this event can be profiled + + // Flag to indicate the event is being removed during command/inorder_command + // traversal and the removal should be deferred + bool defer_removal = false; } _cl_event; ACL_DECLARE_CL_OBJECT_ALLOC_FUNCTIONS(cl_event); @@ -764,6 +768,9 @@ typedef struct _cl_command_queue { int num_commands; // Number of commands in the queue. int num_commands_submitted; // Number of events submitted to device_op_queue + // Flag to indicate whether commands/inorder_commands is being traversed + bool waiting_for_events = false; + // Used only for in-order command queues. Owner of all events managed by this // queue std::deque inorder_commands; diff --git a/src/acl_command_queue.cpp b/src/acl_command_queue.cpp index bd41dc14..b7f414f8 100644 --- a/src/acl_command_queue.cpp +++ b/src/acl_command_queue.cpp @@ -764,7 +764,13 @@ int acl_update_ooo_queue(cl_command_queue command_queue) { command_queue->last_barrier = NULL; } acl_maybe_delete_event(event); - command_queue->commands.erase(event); + if (command_queue->waiting_for_events) { + // We are in the middle of traversing command_queue->commands, defer + // the removal till later to avoid corruption + event->defer_removal = true; + } else { + command_queue->commands.erase(event); + } event->not_popped = false; command_queue->num_commands--; acl_release(command_queue); @@ -820,8 +826,14 @@ int acl_update_inorder_queue(cl_command_queue command_queue) { // popping it the second time. // Deleted event. Remove it from our queue. - command_queue->inorder_commands.erase( - command_queue->inorder_commands.begin() + queue_idx); + if (command_queue->waiting_for_events) { + // We are in the middle of traversing command_queue->inorder_commands, + // defer the removal till later to avoid corruption + event->defer_removal = true; + } else { + command_queue->inorder_commands.erase( + command_queue->inorder_commands.begin() + queue_idx); + } command_queue->num_commands--; num_updates++; event->not_popped = false; diff --git a/src/acl_usm.cpp b/src/acl_usm.cpp index 3accda38..a4881553 100644 --- a/src/acl_usm.cpp +++ b/src/acl_usm.cpp @@ -1224,48 +1224,72 @@ void l_cl_mem_blocking_free(cl_context context, void *ptr) { for (int i = 0; i < num_command_queues; i++) { cl_command_queue current_cq = context->command_queue[i]; - // check events in commands - for (const auto &event : current_cq->commands) { - if (event->execution_status != CL_COMPLETE) { - // check if ptr is used by kernels when we submit to queue - if (event->ptr_hashtable.find(ptr) != event->ptr_hashtable.end()) { - clWaitForEvents(1, &event); - } - // check if ptr is used in queues - if ((event->cmd.type == CL_COMMAND_MEMCPY_INTEL) || - (event->cmd.type == CL_COMMAND_MEMFILL_INTEL)) { - src_usm_alloc = acl_get_usm_alloc_from_ptr( - context, event->cmd.info.usm_xfer.src_ptr); - dst_usm_alloc = acl_get_usm_alloc_from_ptr( - context, event->cmd.info.usm_xfer.dst_ptr); - if ((src_usm_alloc && (src_usm_alloc->range.begin == ptr)) || - (dst_usm_alloc && (dst_usm_alloc->range.begin == ptr))) { + // Set a flag to indicate the command set of this command queue is being + // traversed, and any event deletion should be deferred + current_cq->waiting_for_events = true; + + if (current_cq->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + // Current queue is ooo queue, check events in commands + for (auto it = current_cq->commands.begin(); + it != current_cq->commands.end();) { + auto event = *it; + if (event->execution_status != CL_COMPLETE) { + // check if ptr is used by kernels when we submit to queue + if (event->ptr_hashtable.find(ptr) != event->ptr_hashtable.end()) { clWaitForEvents(1, &event); } + // check if ptr is used in queues + if ((event->cmd.type == CL_COMMAND_MEMCPY_INTEL) || + (event->cmd.type == CL_COMMAND_MEMFILL_INTEL)) { + src_usm_alloc = acl_get_usm_alloc_from_ptr( + context, event->cmd.info.usm_xfer.src_ptr); + dst_usm_alloc = acl_get_usm_alloc_from_ptr( + context, event->cmd.info.usm_xfer.dst_ptr); + if ((src_usm_alloc && (src_usm_alloc->range.begin == ptr)) || + (dst_usm_alloc && (dst_usm_alloc->range.begin == ptr))) { + clWaitForEvents(1, &event); + } + } } - } - } - // check events in inorder commands - for (const auto &event : current_cq->inorder_commands) { - if (event->execution_status != CL_COMPLETE) { - // check if ptr is used by kernels when we submit to queue - if (event->ptr_hashtable.find(ptr) != event->ptr_hashtable.end()) { - clWaitForEvents(1, &event); + if (event->defer_removal) { + it = current_cq->commands.erase(it); + event->defer_removal = false; // Reset as this event might get reused + } else { + ++it; } - // check if ptr is used in queues - if ((event->cmd.type == CL_COMMAND_MEMCPY_INTEL) || - (event->cmd.type == CL_COMMAND_MEMFILL_INTEL)) { - src_usm_alloc = acl_get_usm_alloc_from_ptr( - context, event->cmd.info.usm_xfer.src_ptr); - dst_usm_alloc = acl_get_usm_alloc_from_ptr( - context, event->cmd.info.usm_xfer.dst_ptr); - if ((src_usm_alloc && (src_usm_alloc->range.begin == ptr)) || - (dst_usm_alloc && (dst_usm_alloc->range.begin == ptr))) { + } + } else { + // Current queue is inorder queue, check events in inorder commands + for (auto it = current_cq->inorder_commands.begin(); + it != current_cq->inorder_commands.end();) { + auto event = *it; + if (event->execution_status != CL_COMPLETE) { + // check if ptr is used by kernels when we submit to queue + if (event->ptr_hashtable.find(ptr) != event->ptr_hashtable.end()) { clWaitForEvents(1, &event); } + // check if ptr is used in queues + if ((event->cmd.type == CL_COMMAND_MEMCPY_INTEL) || + (event->cmd.type == CL_COMMAND_MEMFILL_INTEL)) { + src_usm_alloc = acl_get_usm_alloc_from_ptr( + context, event->cmd.info.usm_xfer.src_ptr); + dst_usm_alloc = acl_get_usm_alloc_from_ptr( + context, event->cmd.info.usm_xfer.dst_ptr); + if ((src_usm_alloc && (src_usm_alloc->range.begin == ptr)) || + (dst_usm_alloc && (dst_usm_alloc->range.begin == ptr))) { + clWaitForEvents(1, &event); + } + } + } + if (event->defer_removal) { + it = current_cq->inorder_commands.erase(it); + event->defer_removal = false; // Reset as this event might get reused + } else { + ++it; } } } + current_cq->waiting_for_events = false; } }