Skip to content

[ET-VK] 8/n Split dispatches between multiple command buffers. This diff adds a config to limit the maximum number of command buffers created when splitting execution between multiple command buffers. #13204

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,33 @@ void ComputeGraph::prepare_pipelines() {
pipeline_descriptors_ = std::unordered_set<
vkapi::ComputePipelineCache::Key,
vkapi::ComputePipelineCache::Hasher>();

const size_t total_node_count = execute_nodes_.size();
size_t init_threshold = config_.execute_initial_threshold_node_count;
size_t count_threshold = config_.execute_threshold_node_count;

// If max command buffer count is set, we need to adjust the thresholds to
// accommodate execution within the limit, if total command buffers with
// current thresholds would exceed execute_max_cmds
if (config_.execute_max_cmds > 0) {
// Worse case scenario we have one command buffer for nodes before init
// threshold and config_.execute_max_cmds - 1 command buffers for the rest
// of dispatches

// If command buffers created after offsetting init_threshold would exceed
// max command buffer count, we need to adjust init and count thresholds
const bool slicing_exceeds_max_cmds = (total_node_count - init_threshold) >
count_threshold * (config_.execute_max_cmds - 1);
if (total_node_count > init_threshold && slicing_exceeds_max_cmds) {
// Increase count threshold so remaining nodes after offsetting init fits
// in config_.execute_max_cmds - 1
count_threshold = static_cast<size_t>(ceil(
(total_node_count - init_threshold) /
double(config_.execute_max_cmds - 1)));
}
}

execute_threshold_node_count_ = count_threshold;
}

void ComputeGraph::submit_current_cmd(const bool final_use) {
Expand Down Expand Up @@ -888,6 +915,7 @@ void ComputeGraph::execute() {
context_->set_cmd(/*reusable = */ true);

context_->cmd_reset_querypool();
const size_t total_node_count = execute_nodes_.size();
uint32_t encoded_node_count = 0;

for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
Expand All @@ -900,11 +928,13 @@ void ComputeGraph::execute() {
const bool reached_threshold =
encoded_node_count >= config_.execute_initial_threshold_node_count &&
((encoded_node_count - config_.execute_initial_threshold_node_count) %
config_.execute_threshold_node_count ==
execute_threshold_node_count_ ==
0);

// Create a new command buffer when threashold is reached
if (reached_threshold) {
// But avoid it if this is the last node, since last cmd buf is submitted
// after the loop
if (reached_threshold && encoded_node_count != total_node_count) {
context_->submit_cmd_to_gpu(VK_NULL_HANDLE, false);
deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd()));
context_->set_cmd(true);
Expand Down
8 changes: 8 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,14 @@ class ComputeGraph final {
// current Context's command buffer is submitted now.
size_t staging_nbytes_in_cmd_ = 0;

// Represents the nodes to wait before submitting commands.
// If command buffers created with config.execute_threshold_node_count exceeds
// config.execute_max_cmds, then execute_threshold_node_count will be
// increased to fit command buffers within the limit. Otherwise,
// execute_threshold_node_count will be set to
// config.execute_threshold_node_count.
size_t execute_threshold_node_count_ = 0;

public:
//
// Accessors
Expand Down
4 changes: 4 additions & 0 deletions backends/vulkan/runtime/graph/GraphConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ struct GraphConfig final {
// by taking more advantage of parallelism between the CPU and GPU.
size_t execute_initial_threshold_node_count = 0;

// If this number is greater than 0 then, during execute create at most this
// many command buffers.
size_t execute_max_cmds = 0;

vkapi::Adapter* external_adapter;

// Generate a default graph config with pre-configured settings
Expand Down
Loading