Skip to content

Commit 264fc3b

Browse files
committed
[ET-VK] 8/n Split dispatches between multiple command buffers. This diff adds a config to limit the maximum number of command buffers created when splitting execution between multiple command buffers.
Pull Request resolved: #13113 This diff introduces a new configuration option, `execute_max_cmds`, to limit the maximum number of command buffers created when splitting execution between multiple command buffers. This feature allows for more efficient management of command buffers, particularly in scenarios where the number of nodes in the graph is large. ghstack-source-id: 301393815 @exported-using-ghexport Differential Revision: [D79575908](https://our.internmc.facebook.com/intern/diff/D79575908/)
1 parent 0a2bf93 commit 264fc3b

File tree

3 files changed

+44
-2
lines changed

3 files changed

+44
-2
lines changed

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -799,6 +799,33 @@ void ComputeGraph::prepare_pipelines() {
799799
pipeline_descriptors_ = std::unordered_set<
800800
vkapi::ComputePipelineCache::Key,
801801
vkapi::ComputePipelineCache::Hasher>();
802+
803+
const size_t total_node_count = execute_nodes_.size();
804+
size_t init_threshold = config_.execute_initial_threshold_node_count;
805+
size_t count_threshold = config_.execute_threshold_node_count;
806+
807+
// If max command buffer count is set, we need to adjust the thresholds to
808+
// accommodate execution within the limit, if total command buffers with
809+
// current thresholds would exceed execute_max_cmds
810+
if (config_.execute_max_cmds > 0) {
811+
// Worse case scenario we have one command buffer for nodes before init
812+
// threshold and config_.execute_max_cmds - 1 command buffers for the rest
813+
// of dispatches
814+
815+
// If command buffers created after offsetting init_threshold would exceed
816+
// max command buffer count, we need to adjust init and count thresholds
817+
const bool slicing_exceeds_max_cmds = (total_node_count - init_threshold) >
818+
count_threshold * (config_.execute_max_cmds - 1);
819+
if (total_node_count > init_threshold && slicing_exceeds_max_cmds) {
820+
// Increase count threshold so remaining nodes after offsetting init fits
821+
// in config_.execute_max_cmds - 1
822+
count_threshold = static_cast<size_t>(ceil(
823+
(total_node_count - init_threshold) /
824+
double(config_.execute_max_cmds - 1)));
825+
}
826+
}
827+
828+
execute_threshold_node_count_ = count_threshold;
802829
}
803830

804831
void ComputeGraph::submit_current_cmd(const bool final_use) {
@@ -888,6 +915,7 @@ void ComputeGraph::execute() {
888915
context_->set_cmd(/*reusable = */ true);
889916

890917
context_->cmd_reset_querypool();
918+
const size_t total_node_count = execute_nodes_.size();
891919
uint32_t encoded_node_count = 0;
892920

893921
for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
@@ -900,11 +928,13 @@ void ComputeGraph::execute() {
900928
const bool reached_threshold =
901929
encoded_node_count >= config_.execute_initial_threshold_node_count &&
902930
((encoded_node_count - config_.execute_initial_threshold_node_count) %
903-
config_.execute_threshold_node_count ==
931+
execute_threshold_node_count_ ==
904932
0);
905933

906934
// Create a new command buffer when threashold is reached
907-
if (reached_threshold) {
935+
// But avoid it if this is the last node, since last cmd buf is submitted
936+
// after the loop
937+
if (reached_threshold && encoded_node_count != total_node_count) {
908938
context_->submit_cmd_to_gpu(VK_NULL_HANDLE, false);
909939
deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd()));
910940
context_->set_cmd(true);

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,14 @@ class ComputeGraph final {
207207
// current Context's command buffer is submitted now.
208208
size_t staging_nbytes_in_cmd_ = 0;
209209

210+
// Represents the nodes to wait before submitting commands.
211+
// If command buffers created with config.execute_threshold_node_count exceeds
212+
// config.execute_max_cmds, then execute_threshold_node_count will be
213+
// increased to fit command buffers within the limit. Otherwise,
214+
// execute_threshold_node_count will be set to
215+
// config.execute_threshold_node_count.
216+
size_t execute_threshold_node_count_ = 0;
217+
210218
public:
211219
//
212220
// Accessors

backends/vulkan/runtime/graph/GraphConfig.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ struct GraphConfig final {
6161
// by taking more advantage of parallelism between the CPU and GPU.
6262
size_t execute_initial_threshold_node_count = 0;
6363

64+
// If this number is greater than 0 then, during execute create at most this
65+
// many command buffers.
66+
size_t execute_max_cmds = 0;
67+
6468
vkapi::Adapter* external_adapter;
6569

6670
// Generate a default graph config with pre-configured settings

0 commit comments

Comments
 (0)