Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Capacity aware partitioning #22766

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
5 changes: 4 additions & 1 deletion include/onnxruntime/core/framework/execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ struct OrtRunOptions;

namespace onnxruntime {

class IResourceAccountant;

/**
Logical device representation.
*/
Expand Down Expand Up @@ -130,7 +132,8 @@ class IExecutionProvider {
*/
virtual std::vector<std::unique_ptr<ComputeCapability>>
GetCapability(const onnxruntime::GraphViewer& graph_viewer,
const IKernelLookup& kernel_lookup) const;
const IKernelLookup& kernel_lookup,
IResourceAccountant* resource_accountant = nullptr) const;

/**
Get kernel registry per execution provider type.
Expand Down
48 changes: 48 additions & 0 deletions include/onnxruntime/core/framework/resource_accountant.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include <optional>
#include <variant>

namespace onnxruntime {
#ifndef SHARED_PROVIDER
class Graph;
#else
struct Graph;
#endif

// Common holder for potentially different resource accounting
// for different EPs
using ResourceCount = std::variant<size_t>;

/// <summary>
/// This class is used for graph partitioning by EPs
/// It stores the cumulative amount of the resource such as
/// memory that would be consumed by the graph nodes if it is assigned to the EP.
///
/// It provides interfaces to add, remove and query the resource consumption.
///
/// Each provider may assign its own meaning to the resource according to its constraints.
/// </summary>
class IResourceAccountant {
protected:
IResourceAccountant() = default;
IResourceAccountant(const ResourceCount& threshold) : threshold_(threshold) {}

Check warning on line 32 in include/onnxruntime/core/framework/resource_accountant.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Single-parameter constructors should be marked explicit. [runtime/explicit] [4] Raw Output: include/onnxruntime/core/framework/resource_accountant.h:32: Single-parameter constructors should be marked explicit. [runtime/explicit] [4]

public:
virtual ~IResourceAccountant() = default;
virtual ResourceCount GetConsumedAmount() const = 0;
virtual void AddConsumedAmount(const ResourceCount& amount) = 0;
virtual void RemoveConsumedAmount(const ResourceCount& amount) = 0;
virtual ResourceCount ComputeResourceCount(const Graph&, size_t node_index) const = 0;
std::optional<ResourceCount> GetThreshold() const {
return threshold_;
}

private:
std::optional<ResourceCount> threshold_;
};

} // namespace onnxruntime
7 changes: 7 additions & 0 deletions include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,13 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
return ConstGraphNodes(nodes_, std::move(filter_func));
}

/** Compute node memory requirements, which is mostly initializers
and large attributes that are copied on device (special cases for some nodes)

Returns no value if the node was not found.
*/
size_t ComputeNodeMemoryUsage(NodeIndex) const;

/** Gets the maximum NodeIndex value used in the Graph.
WARNING: This actually returns the max index value used + 1.
*/
Expand Down
49 changes: 49 additions & 0 deletions include/onnxruntime/core/graph/indexed_sub_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <string>
#include <vector>

#include "core/common/inlined_containers_fwd.h"
#include "core/framework/resource_accountant.h"
#include "core/graph/basic_types.h"
#include "core/graph/onnx_protobuf.h"

Expand Down Expand Up @@ -70,9 +72,56 @@ struct IndexedSubGraph {
return meta_def_.get();
}

// Check if the accounting is enabled for the current EP
bool IsAccountingEnabled() const {
return resource_accountant != nullptr &&
nodes_costs.size() == nodes.size();
}

// Should call IsAccountingEnabled() first
// Takes the previously computed ResourceCount for the node
// (usually during GetCapabiilty())
// if present and adds it to the consumed amount
void AccountForNode(size_t cost_index) const {
assert(cost_index < nodes_costs.size());
if (nodes_costs[cost_index].has_value()) {
resource_accountant->AddConsumedAmount(*nodes_costs[cost_index]);
}
}

// This computes and accounts for the resource cost for the node that just
// been fused from other nodes, and the EP did not had a chance to compute the costs.
void ComputeAndAccountForNode(const Graph& graph, size_t node_index) const {
assert(resource_accountant != nullptr);
resource_accountant->AddConsumedAmount(resource_accountant->ComputeResourceCount(graph, node_index));
}

void SetAccountant(IResourceAccountant* res_accountant) {
resource_accountant = res_accountant;
}

// Append resource count to the list of costs for the nodes.
void AppendNodeCost(const ResourceCount& cost) {
assert(resource_accountant != nullptr);
nodes_costs.emplace_back(cost);
}

// Append an absent cost for the node that was already accounted for.
void AppendNodeEmptyCost() {
assert(resource_accountant != nullptr);
nodes_costs.emplace_back();
}

private:
// subgraph meta definition.
std::unique_ptr<MetaDef> meta_def_;
// Optional resource accountant for this subgraph.
IResourceAccountant* resource_accountant = nullptr;
// Vector with resource costs for nodes above. Should have the same size
// Some nodes that were previously accounted for because they already been assigned to an EP
// for example during multiple calls to GetCapabiility() will not have resource count present.
// may not have a resource count present, we skip it.
InlinedVector<std::optional<ResourceCount>> nodes_costs;
};

} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,11 @@
// The file saves configuration for partitioning node among logic streams
static const char* const kNodePartitionConfigFile = "session.node_partition_config_file";

/// "number > 0": enables Capacity Aware Partitioning for Cuda EP. The EP will place nodes on device
/// "0" : disables Capacity Aware Partitioning for Cuda EP. The EP will place nodes on device based on the default policy.

Check warning on line 201 in include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h:201: Lines should be <= 120 characters long [whitespace/line_length] [2]
/// until the device memory usage reaches the specified threshold in Kb. The default value is 0.
static const char* const kOrtSessionOptionsConfigPartitionSetCudaMemoryLimitKb = "session.node_partition_memory_limit_kb";

Check warning on line 203 in include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h:203: Lines should be <= 120 characters long [whitespace/line_length] [2]

// This Option allows setting affinities for intra op threads.
// Affinity string follows format:
// logical_processor_id,logical_processor_id;logical_processor_id,logical_processor_id
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/framework/execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@

std::vector<std::unique_ptr<ComputeCapability>>
IExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
const IKernelLookup& kernel_lookup) const {
const IKernelLookup& kernel_lookup,

Check warning on line 16 in onnxruntime/core/framework/execution_provider.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Do not indent within a namespace. [whitespace/indent_namespace] [4] Raw Output: onnxruntime/core/framework/execution_provider.cc:16: Do not indent within a namespace. [whitespace/indent_namespace] [4]
IResourceAccountant*) const {

Check warning on line 17 in onnxruntime/core/framework/execution_provider.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Do not indent within a namespace. [whitespace/indent_namespace] [4] Raw Output: onnxruntime/core/framework/execution_provider.cc:17: Do not indent within a namespace. [whitespace/indent_namespace] [4]
std::vector<std::unique_ptr<ComputeCapability>> result;
for (const auto& node : graph.Nodes()) {
if (const KernelCreateInfo* kernel_create_info = kernel_lookup.LookUpKernel(node);
Expand Down
Loading
Loading