Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 224 additions & 14 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,209 @@ Status ReadBinaryFromFile(const std::string& file_path, uint8_t* buffer, size_t
return Status::OK();
}

bool QnnBackendManager::IsTimerThreadRunning() {
std::chrono::microseconds remainUs = std::chrono::microseconds::zero();
unsigned long remaining_duration = 0;
if (timer_ && timer_->TimerInUse() && timer_->RemainingDuration(remainUs)) {
remaining_duration = static_cast<unsigned long>(remainUs.count());
return remaining_duration > 0 && remaining_duration < timer_resource_.sustained_timer_duration_;
}
return false;
}

Status QnnBackendManager::SetHtpPowerCustomConfigs(uint32_t htp_power_config_client_id,
QnnHtpPerfInfrastructure_PowerConfig_t power_config,
uint32_t rpc_polling_time,
uint32_t rpc_control_latency) {
ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP power config ID if backend setup is not complete.");

ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(rpc_polling_time));
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(rpc_control_latency));
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceConfig(power_config));
ORT_RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_client_id, GetQnnInterface()));

return Status::OK();
}

Status QnnBackendManager::SetSustainedPerformance(uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode performance_mode, uint32_t rpc_polling_time, uint32_t rpc_control_latency) {
std::lock_guard<std::mutex> lk(perf_mutex_);
Status status = Status::OK();

std::chrono::microseconds sustainedDurationUs(timer_resource_.sustained_timer_duration_);

switch (graph_state_) {
case GraphState::RUN_DONE:
if (IsTimerThreadRunning()) {
timer_->AbortTimer();
}
ORT_RETURN_IF_NOT(timer_->Launch(sustainedDurationUs), "Not able to launch timer thread.");
graph_state_ = GraphState::NONE;
timer_resource_.caller_busy_ = false;
break;
case GraphState::RUN_START:
if (IsTimerThreadRunning()) {
timer_->AbortTimer();
} else {
status = SetHtpPowerConfigs(htp_power_config_client_id, performance_mode, rpc_polling_time, rpc_control_latency);
}
graph_state_ = GraphState::NONE;
timer_resource_.caller_busy_ = true;
break;
case GraphState::INIT_DONE: {
QnnHtpPerfInfrastructure_PowerConfig_t init_done_htp_performance_cfg{};
status = htp_power_config_manager_.SetRelaxedPerfPowerConfig(init_done_htp_performance_cfg, htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT);
status = SetHtpPowerCustomConfigs(htp_power_config_client_id, init_done_htp_performance_cfg, rpc_polling_time, rpc_control_latency);
graph_state_ = GraphState::NONE;
timer_resource_.caller_busy_ = false;
break;
}
case GraphState::INIT_START:
if (IsTimerThreadRunning()) {
timer_->AbortTimer();
} else {
status = SetHtpPowerConfigs(htp_power_config_client_id, performance_mode, rpc_polling_time, rpc_control_latency);
}
graph_state_ = GraphState::NONE;
timer_resource_.caller_busy_ = true;
break;
case GraphState::TIMEOUT: {
if (!timer_resource_.caller_busy_) {
QnnHtpPerfInfrastructure_PowerConfig_t timeout_htp_performance_cfg{};
status = htp_power_config_manager_.SetRelaxedPerfPowerConfig(timeout_htp_performance_cfg, htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT);
status = SetHtpPowerCustomConfigs(htp_power_config_client_id, timeout_htp_performance_cfg, rpc_polling_time, rpc_control_latency);
graph_state_ = GraphState::NONE;
}
break;
}
default:
LOGS(*logger_, VERBOSE) << "Invalid graph state";
break;
}
return status;
}

Status QnnBackendManager::SetPerformance(uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode performance_mode, uint32_t rpc_polling_time, uint32_t rpc_control_latency) {
std::lock_guard<std::mutex> lk(perf_mutex_);
Status status = Status::OK();
switch (graph_state_) {
case GraphState::RUN_DONE:
case GraphState::INIT_DONE:
switch (performance_mode) {
case qnn::HtpPerformanceMode::kHtpLowBalanced:
case qnn::HtpPerformanceMode::kHtpBalanced:
case qnn::HtpPerformanceMode::kHtpHighPerformance: {
QnnHtpPerfInfrastructure_PowerConfig_t relaxed_htp_performance_cfg{};
status = htp_power_config_manager_.SetRelaxedPerfPowerConfig(relaxed_htp_performance_cfg, htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT);
status = SetHtpPowerCustomConfigs(htp_power_config_client_id, relaxed_htp_performance_cfg, rpc_polling_time, rpc_control_latency);
break;
}
case qnn::HtpPerformanceMode::kHtpExtremePowerSaver: {
QnnHtpPerfInfrastructure_PowerConfig_t extreme_power_saver_htp_performance_cfg{};
status = htp_power_config_manager_.SetExtremeLowPerfPowerConfig(extreme_power_saver_htp_performance_cfg, htp_power_config_client_id);
status = SetHtpPowerCustomConfigs(htp_power_config_client_id, extreme_power_saver_htp_performance_cfg, rpc_polling_time, rpc_control_latency);
break;
}
case qnn::HtpPerformanceMode::kHtpLowPowerSaver:
case qnn::HtpPerformanceMode::kHtpHighPowerSaver:
case qnn::HtpPerformanceMode::kHtpPowerSaver: {
QnnHtpPerfInfrastructure_PowerConfig_t released_htp_performance_cfg{};
status = htp_power_config_manager_.SetReleasedPerfPowerConfig(released_htp_performance_cfg, htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT);
status = SetHtpPowerCustomConfigs(htp_power_config_client_id, released_htp_performance_cfg, rpc_polling_time, rpc_control_latency);
break;
}
default:
LOGS(*logger_, VERBOSE) << "Invalid performance mode";
break;
}
graph_state_ = GraphState::NONE;
break;
case GraphState::RUN_START:
case GraphState::INIT_START:
status = SetHtpPowerConfigs(htp_power_config_client_id, performance_mode, rpc_polling_time, rpc_control_latency);
graph_state_ = GraphState::NONE;
break;
default:
LOGS(*logger_, VERBOSE) << "Invalid graph state";
break;
}
return status;
}

Status QnnBackendManager::SetState(GraphState state, uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode perfMode, uint32_t rpc_polling_time, uint32_t rpc_control_latency) {
std::lock_guard<std::mutex> lk(state_mutex_);
if (state != graph_state_) {
graph_state_ = state;
if (perfMode == qnn::HtpPerformanceMode::kHtpSustainedHighPerformance || perfMode == qnn::HtpPerformanceMode::kHtpBurst) {
ORT_RETURN_IF(timer_ == nullptr, "timer is not started");
return SetSustainedPerformance(htp_power_config_client_id, perfMode, rpc_polling_time, rpc_control_latency);
} else if (perfMode == qnn::HtpPerformanceMode::kHtpDefault) {
if (timer_ && timer_->TimerInUse()) {
timer_->AbortTimer();
}
return Status::OK();
} else {
if (timer_ && timer_->TimerInUse()) {
timer_->AbortTimer();
}
return SetPerformance(htp_power_config_client_id, perfMode, rpc_polling_time, rpc_control_latency);
}
}
return Status::OK();
}

void QnnBackendManager::TimerCallback(void* user_data) {
TimerCallbackArg* args = static_cast<TimerCallbackArg*>(user_data);
QnnBackendManager* instance = args->instance_;
auto rt = instance->SetState(GraphState::TIMEOUT, args->power_config_id_, qnn::HtpPerformanceMode::kHtpSustainedHighPerformance, 0, 0);
if (rt != Status::OK()) {
LOGS_DEFAULT(VERBOSE) << "State update failed";
}
}

void QnnBackendManager::CreateTimerThread(uint32_t htp_power_config_client_id) {
std::lock_guard<std::mutex> lk(state_mutex_);
if (timer_ == nullptr) {
std::unique_ptr<Timer> temp(new Timer());
if (temp != nullptr) {
timer_ = std::move(temp);
timer_callback_arg_ = std::make_unique<TimerCallbackArg>(htp_power_config_client_id, this);
if (timer_callback_arg_ == nullptr) {
LOGS(*logger_, VERBOSE) << "Failed to create Timer argument";
timer_.reset();
return;
}
if (!timer_->Initialize(TimerCallback, timer_callback_arg_.get())) {
LOGS(*logger_, VERBOSE) << "Failed to create timer to set performance";
timer_callback_arg_.reset();
timer_.reset();
}
} else {
LOGS(*logger_, VERBOSE) << "Failed: Timer is nullptr";
}
} else {
LOGS(*logger_, VERBOSE) << "Timer already created";
}
}

void QnnBackendManager::ReleaseTimerThread(uint32_t htp_power_config_client_id) {
std::lock_guard<std::mutex> lk(state_mutex_);
if (timer_ != nullptr) {
timer_->DeInitialize();
graph_state_ = GraphState::NONE;
timer_resource_.caller_busy_ = false;
}

timer_callback_arg_.reset();
timer_.reset();
Status status = Status::OK();
QnnHtpPerfInfrastructure_PowerConfig_t htp_performance_cfg{};
status = htp_power_config_manager_.SetReleasedPerfPowerConfig(htp_performance_cfg, htp_power_config_client_id, onnxruntime::qnn::DcvsState_t::DCVS_DEFAULT);
status = SetHtpPowerCustomConfigs(htp_power_config_client_id, htp_performance_cfg, 0, 0);
if (status != Status::OK()) {
LOGS_DEFAULT(VERBOSE) << "Not able to set Power config to release";
}
}

Status QnnBackendManager::ParseLoraConfig(std::string lora_config_path) {
LOGS_DEFAULT(INFO) << "Acquiring the QnnInterface " << lora_config_path;

Expand Down Expand Up @@ -1173,7 +1376,6 @@ Status QnnBackendManager::CreateContext(bool enable_htp_weight_sharing) {
LOGS_DEFAULT(INFO) << "Context created already.";
return Status::OK();
}

QnnContext_Config_t context_config_weight_sharing = QNN_CONTEXT_CONFIG_INIT;
QnnHtpContext_CustomConfig_t custom_config;
custom_config.option = QNN_HTP_CONTEXT_CONFIG_OPTION_WEIGHT_SHARING_ENABLED;
Expand Down Expand Up @@ -1693,10 +1895,21 @@ Status QnnBackendManager::SetupBackend(const logging::Logger& logger,
LOGS_DEFAULT(WARNING) << "Failed to setup so cleaning up";
ReleaseResources();
}

return status;
}

Status QnnBackendManager::InitializePowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) {
ORT_RETURN_IF_ERROR(CreateHtpPowerCfgId(device_id, core_id, htp_power_config_id));
CreateTimerThread(htp_power_config_id);
return Status::OK();
}

Status QnnBackendManager::DeInitializePowerCfgId(uint32_t htp_power_config_id) {
ReleaseTimerThread(htp_power_config_id);
ORT_RETURN_IF_ERROR(DestroyHTPPowerConfigID(htp_power_config_id));
return Status::OK();
}

Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) {
// This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned
// to a different EP. Therefore, we have to check that backend setup actually completed before trying to
Expand Down Expand Up @@ -1742,21 +1955,18 @@ Status QnnBackendManager::SetPerThreadHtpPowerConfigs(const std::thread::id& thr

auto htp_power_config_id = htp_power_configs.power_config_id;
if (pre_run) {
// add in htp_power_configs the default power config id also so to run when we execute
if (htp_power_configs.pre_run_perf_mode.has_value()) {
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(*htp_power_configs.pre_run_perf_mode,
htp_power_config_id));
}

if (htp_power_configs.rpc_control_latency.has_value()) {
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(*htp_power_configs.rpc_control_latency));
ORT_RETURN_IF_ERROR(SetState(onnxruntime::qnn::GraphState::RUN_START, htp_power_config_id, *htp_power_configs.pre_run_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency));
} else if (htp_power_configs.default_perf_mode.has_value()) {
ORT_RETURN_IF_ERROR(SetState(onnxruntime::qnn::GraphState::RUN_START, htp_power_config_id, *htp_power_configs.default_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency));
}

if (htp_power_configs.rpc_polling_time.has_value()) {
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(*htp_power_configs.rpc_polling_time));
} else {
if (htp_power_configs.post_run_perf_mode.has_value()) {
ORT_RETURN_IF_ERROR(SetState(onnxruntime::qnn::GraphState::RUN_DONE, htp_power_config_id, *htp_power_configs.post_run_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency));
} else if (htp_power_configs.default_perf_mode.has_value()) {
ORT_RETURN_IF_ERROR(SetState(onnxruntime::qnn::GraphState::RUN_DONE, htp_power_config_id, *htp_power_configs.default_perf_mode, *htp_power_configs.rpc_polling_time, *htp_power_configs.rpc_control_latency));
}
} else if (htp_power_configs.post_run_perf_mode.has_value()) {
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(*htp_power_configs.post_run_perf_mode,
htp_power_config_id));
}

ORT_RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_id, GetQnnInterface()));
Expand Down
62 changes: 55 additions & 7 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
#include "core/providers/qnn/builder/qnn_htp_power_config_manager.h"
#include "core/providers/qnn/builder/qnn_profile_serializer.h"
#include "core/providers/qnn/builder/qnn_node_group/qnn_node_group.h"
#include "core/providers/qnn/builder/timer.h"
#include <HTP/QnnHtpPerfInfrastructure.h>

#ifdef QNN_FILE_MAPPED_WEIGHTS_AVAILABLE
#include "core/providers/qnn/builder/qnn_file_mapping_interface.h"
Expand Down Expand Up @@ -126,6 +128,16 @@ struct QnnBackendManagerConfig {
bool skip_qnn_version_check;
};

// Graph states to tune the power/performance configurations
enum class GraphState {
INIT_START,
INIT_DONE,
RUN_START,
RUN_DONE,
TIMEOUT,
NONE
};

class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager> {
private:
// private tag to pass to constructor to ensure that constructor cannot be directly called externally
Expand Down Expand Up @@ -173,12 +185,9 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
std::shared_ptr<qnn::RpcMemLibrary> rpcmem_library,
std::unordered_map<std::string, std::unique_ptr<std::vector<std::string>>>& context_bin_map);

Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id);
Status InitializePowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id);

Status SetHtpPowerConfigs(uint32_t htp_power_config_client_id,
HtpPerformanceMode htp_performance_mode,
uint32_t rpc_polling_time,
uint32_t rpc_control_latency);
Status DeInitializePowerCfgId(uint32_t htp_power_config_id);

Status SetPerThreadHtpPowerConfigs(const std::thread::id& thread_id, bool pre_run);

Expand Down Expand Up @@ -224,8 +233,6 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>

const std::string& GetSdkVersion() { return sdk_build_version_; }

Status DestroyHTPPowerConfigID(uint32_t htp_power_config_id);

Status GetMaxSpillFillBufferSize(unsigned char* buffer,
uint64_t buffer_length,
uint64_t& max_spill_fill_buffer_size);
Expand Down Expand Up @@ -256,6 +263,7 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
bool ProfilingEnabled() { return profiling_enabled_; }
#endif

Status SetState(GraphState state, uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode perfMode, uint32_t rpc_polling_time, uint32_t rpc_control_latency);
bool FileMappingIsEnabled() {
return file_mapped_weights_enabled_;
}
Expand Down Expand Up @@ -343,6 +351,29 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>

void* LibFunction(void* handle, const char* symbol, std::string& error_msg);

bool IsTimerThreadRunning();

Status SetSustainedPerformance(uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode performance_mode, uint32_t rpc_polling_time, uint32_t rpc_control_latency);

Status SetPerformance(uint32_t htp_power_config_client_id, qnn::HtpPerformanceMode performance_mode, uint32_t rpc_polling_time, uint32_t rpc_control_latency);

static void TimerCallback(void* user_data);

Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id);

Status DestroyHTPPowerConfigID(uint32_t htp_power_config_id);

void CreateTimerThread(uint32_t htp_power_config_client_id);

void ReleaseTimerThread(uint32_t htp_power_config_client_id);

Status SetHtpPowerConfigs(uint32_t htp_power_config_client_id,
HtpPerformanceMode htp_performance_mode,
uint32_t rpc_polling_time,
uint32_t rpc_control_latency);

Status SetHtpPowerCustomConfigs(uint32_t htp_power_config_client_id, QnnHtpPerfInfrastructure_PowerConfig_t power_config, uint32_t rpc_polling_time, uint32_t rpc_control_latency);

template <class T>
inline T ResolveSymbol(void* lib_handle, const char* sym, const logging::Logger& logger) {
std::string error_msg = "";
Expand Down Expand Up @@ -530,6 +561,23 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
// Mapping of thread id to on-run-start/end power configs
std::mutex per_thread_power_configs_mutex_;
std::unordered_map<std::thread::id, PerThreadHtpPowerConfigs_t> per_thread_power_configs_;
std::mutex perf_mutex_;
std::mutex state_mutex_;
std::unique_ptr<Timer> timer_;
struct TimerResource {
static const unsigned long sustained_timer_duration_ = 300000; // in microseconds
std::atomic<bool> caller_busy_ = false;
};
TimerResource timer_resource_;
std::atomic<GraphState> graph_state_ = GraphState::NONE;
struct TimerCallbackArg {
uint32_t power_config_id_;
QnnBackendManager* instance_;

TimerCallbackArg(uint32_t id, QnnBackendManager* manager)
: power_config_id_(id), instance_(manager) {}
};
std::unique_ptr<TimerCallbackArg> timer_callback_arg_;

std::shared_ptr<qnn::RpcMemLibrary> rpcmem_library_ = nullptr;
};
Expand Down
Loading