Skip to content

Commit 3bd94e2

Browse files
authored
raft: Fix the max concurrency of IngestSST (relase-8.5) (#10610)
close #10605 * Parse `raftstore.apply-low-priority-pool-size` in `KVStore::fetchProxyConfig` * `KVStore::getMaxPrehandleSubtaskSize` return different max concurrency according to the job_type Signed-off-by: JaySon-Huang <tshent@qq.com>
1 parent e3fe665 commit 3bd94e2

File tree

7 files changed

+382
-40
lines changed

7 files changed

+382
-40
lines changed

dbms/src/Debug/MockKVStore/MockRaftStoreProxy.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,8 @@ struct MockRaftStoreProxy : MutexLockWrap
274274
, table_id(1)
275275
, cluster_ver(RaftstoreVer::V1)
276276
{
277-
proxy_config_string = R"({"raftstore":{"snap-handle-pool-size":4},"server":{"engine-addr":"123"}})";
277+
proxy_config_string
278+
= R"({"raftstore":{"snap-handle-pool-size":4,"apply-low-priority-pool-size":8},"server":{"engine-addr":"123"}})";
278279
}
279280

280281
LoggerPtr log;

dbms/src/Storages/KVStore/KVStore.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,22 +133,28 @@ void KVStore::fetchProxyConfig(const TiFlashRaftProxyHelper * proxy_helper)
133133
Poco::JSON::Parser parser;
134134
auto obj = parser.parse(cpp_string);
135135
auto ptr = obj.extract<Poco::JSON::Object::Ptr>();
136+
136137
auto raftstore = ptr->getObject("raftstore");
137138
proxy_config_summary.snap_handle_pool_size = raftstore->getValue<uint64_t>("snap-handle-pool-size");
139+
proxy_config_summary.apply_low_priority_pool_size
140+
= raftstore->getValue<uint64_t>("apply-low-priority-pool-size");
141+
138142
auto server = ptr->getObject("server");
139143
proxy_config_summary.engine_addr = server->getValue<std::string>("engine-addr");
144+
140145
LOG_INFO(
141146
log,
142-
"Parsed proxy config: snap_handle_pool_size={} engine_addr={}",
147+
"Parsed proxy config: snap_handle_pool_size={} apply_low_priority_pool_size={} engine_addr={}",
143148
proxy_config_summary.snap_handle_pool_size,
149+
proxy_config_summary.apply_low_priority_pool_size,
144150
proxy_config_summary.engine_addr);
145151
proxy_config_summary.valid = true;
146152
}
147153
catch (...)
148154
{
149155
proxy_config_summary.valid = false;
150-
// we don't care
151-
LOG_WARNING(log, "Can't parse config from proxy {}", cpp_string);
156+
// ignore the error and log a error message
157+
tryLogCurrentWarningException(log, fmt::format("Can't parse config from proxy {}", cpp_string));
152158
}
153159
}
154160
}

dbms/src/Storages/KVStore/KVStore.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,10 @@ static_assert(magic_enum::enum_count<PersistRegionReason>() == sizeof(PersistReg
113113
struct ProxyConfigSummary
114114
{
115115
bool valid = false;
116+
// The max concurrency of PreHandleSnapshot tasks in proxy.
116117
size_t snap_handle_pool_size = 0;
118+
// The max concurrency of IngestSST tasks in proxy.
119+
size_t apply_low_priority_pool_size = 0;
117120
std::string engine_addr;
118121
};
119122

@@ -144,7 +147,7 @@ class KVStore final : private boost::noncopyable
144147
metapb::Store & debugMutStoreMeta();
145148
FileUsageStatistics getFileUsageStatistics() const;
146149
// Proxy will validate and refit the config items from the toml file.
147-
const ProxyConfigSummary & getProxyConfigSummay() const { return proxy_config_summary; }
150+
const ProxyConfigSummary & getProxyConfigSummary() const { return proxy_config_summary; }
148151
void reportThreadAllocInfo(std::string_view, ReportThreadAllocateInfoType type, uint64_t value);
149152
static void reportThreadAllocBatch(std::string_view, ReportThreadAllocateInfoBatch data);
150153
JointThreadInfoJeallocMapPtr getJointThreadInfoJeallocMap() const { return joint_memory_allocation_map; }
@@ -229,7 +232,8 @@ class KVStore final : private boost::noncopyable
229232
size_t getOngoingPrehandleTaskCount() const;
230233
size_t getOngoingPrehandleSubtaskCount() const;
231234
EngineStoreApplyRes handleIngestSST(UInt64 region_id, SSTViewVec, UInt64 index, UInt64 term, TMTContext & tmt);
232-
size_t getMaxParallelPrehandleSize() const;
235+
size_t getMaxParallelPrehandleSize(DM::FileConvertJobType job_type) const;
236+
size_t getMaxPrehandleSubtaskSize(DM::FileConvertJobType job_type) const;
233237

234238
public: // Raft Read
235239
void addReadIndexEvent(Int64 f) { read_index_event_flag += f; }

dbms/src/Storages/KVStore/MultiRaft/PrehandleSnapshot.cpp

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,11 @@ void PreHandlingTrace::waitForSubtaskResources(uint64_t region_id, size_t parall
122122
.Observe(watch.elapsedSeconds());
123123
LOG_INFO(
124124
log,
125-
"Prehandle resource acquired after {:.3f} seconds, region_id={} parallel={}",
125+
"Prehandle resource acquired after {:.3f} seconds, region_id={} parallel={} limit={}",
126126
watch.elapsedSeconds(),
127127
region_id,
128-
parallel);
128+
parallel,
129+
parallel_subtask_limit);
129130
CurrentMetrics::sub(CurrentMetrics::RaftNumWaitedParallelPrehandlingTasks);
130131
}
131132

@@ -301,20 +302,41 @@ PrehandleResult KVStore::preHandleSnapshotToFiles(
301302
return PrehandleResult{};
302303
}
303304

304-
size_t KVStore::getMaxParallelPrehandleSize() const
305+
size_t KVStore::getMaxParallelPrehandleSize(DM::FileConvertJobType job_type) const
305306
{
306-
const auto & proxy_config = getProxyConfigSummay();
307-
size_t total_concurrency = 0;
308-
if (proxy_config.valid)
307+
return getMaxPrehandleSubtaskSize(job_type);
308+
}
309+
310+
size_t KVStore::getMaxPrehandleSubtaskSize(DM::FileConvertJobType job_type) const
311+
{
312+
const auto & proxy_config = getProxyConfigSummary();
313+
switch (job_type)
314+
{
315+
case DM::FileConvertJobType::ApplySnapshot:
309316
{
310-
total_concurrency = proxy_config.snap_handle_pool_size;
317+
if (proxy_config.valid && proxy_config.snap_handle_pool_size > 0)
318+
{
319+
return proxy_config.snap_handle_pool_size;
320+
}
321+
else
322+
{
323+
auto cpu_num = std::thread::hardware_concurrency();
324+
return static_cast<size_t>(std::clamp(cpu_num * 0.7, 2.0, 16.0));
325+
}
311326
}
312-
else
327+
case DM::FileConvertJobType::IngestSST:
313328
{
314-
auto cpu_num = std::thread::hardware_concurrency();
315-
total_concurrency = static_cast<size_t>(std::clamp(cpu_num * 0.7, 2.0, 16.0));
329+
if (proxy_config.valid && proxy_config.apply_low_priority_pool_size > 0)
330+
{
331+
return proxy_config.apply_low_priority_pool_size;
332+
}
333+
else
334+
{
335+
auto cpu_num = std::thread::hardware_concurrency();
336+
return std::max(1, static_cast<size_t>(cpu_num));
337+
}
338+
}
316339
}
317-
return total_concurrency;
318340
}
319341

320342
// If size is 0, do not parallel prehandle for this snapshot, which is regular.
@@ -323,7 +345,8 @@ static inline std::pair<std::vector<std::string>, size_t> getSplitKey(
323345
LoggerPtr log,
324346
KVStore * kvstore,
325347
RegionPtr new_region,
326-
std::shared_ptr<DM::SSTFilesToBlockInputStream> sst_stream)
348+
std::shared_ptr<DM::SSTFilesToBlockInputStream> sst_stream,
349+
DM::FileConvertJobType job_type)
327350
{
328351
// We don't use this is the single snapshot is small, due to overhead in decoding.
329352
constexpr size_t default_parallel_prehandle_threshold = 1 * 1024 * 1024 * 1024;
@@ -356,7 +379,12 @@ static inline std::pair<std::vector<std::string>, size_t> getSplitKey(
356379
// so we must add 1 here.
357380
auto ongoing_count = kvstore->getOngoingPrehandleSubtaskCount() + 1;
358381
uint64_t want_split_parts = 0;
359-
auto total_concurrency = kvstore->getMaxParallelPrehandleSize();
382+
// If total_concurrency is 4, and prehandle-pool is sized 8,
383+
// and if there are 4 ongoing snapshots, then we will not parallel prehandling any new snapshot.
384+
// This is because in serverless, too much parallelism causes performance reduction.
385+
// So, if there is already enough parallelism that is used to prehandle,
386+
// it is not necessary to manually split a snapshot.
387+
auto total_concurrency = kvstore->getMaxParallelPrehandleSize(job_type);
360388
if (total_concurrency + 1 > ongoing_count)
361389
{
362390
// Current thread takes 1 which is in `ongoing_count`.
@@ -385,7 +413,7 @@ static inline std::pair<std::vector<std::string>, size_t> getSplitKey(
385413
if (split_keys.size() + 1 < want_split_parts)
386414
{
387415
// If there are too few split keys, the `split_keys` itself may be not be uniformly distributed,
388-
// it is even better that we still handle it sequantially.
416+
// it is even better that we still handle it sequentially.
389417
split_keys.clear();
390418
LOG_INFO(
391419
log,
@@ -717,8 +745,11 @@ PrehandleResult KVStore::preHandleSSTsToDTFiles(
717745
};
718746

719747
// `split_keys` do not begin with 'z'.
720-
auto [split_keys, approx_bytes] = getSplitKey(log, this, new_region, sst_stream);
721-
prehandling_trace.waitForSubtaskResources(region_id, split_keys.size() + 1, getMaxParallelPrehandleSize());
748+
auto [split_keys, approx_bytes] = getSplitKey(log, this, new_region, sst_stream, job_type);
749+
prehandling_trace.waitForSubtaskResources(
750+
region_id,
751+
split_keys.size() + 1,
752+
getMaxPrehandleSubtaskSize(job_type));
722753
ReadFromStreamResult result;
723754
if (split_keys.empty())
724755
{

dbms/src/Storages/KVStore/TMTContext.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -173,15 +173,13 @@ void TMTContext::initS3GCManager(const TiFlashRaftProxyHelper * proxy_helper)
173173
if (S3::ClientFactory::instance().isEnabled() && !context.getSharedContextDisagg()->isDisaggregatedComputeMode())
174174
{
175175
kvstore->fetchProxyConfig(proxy_helper);
176-
if (kvstore->getProxyConfigSummay().valid)
176+
if (kvstore->getProxyConfigSummary().valid)
177177
{
178-
LOG_INFO(
179-
Logger::get(),
180-
"Build s3gc manager from proxy's conf engine_addr={}",
181-
kvstore->getProxyConfigSummay().engine_addr);
178+
auto engine_addr = kvstore->getProxyConfigSummary().engine_addr;
179+
LOG_INFO(Logger::get(), "Build s3gc manager from proxy's conf engine_addr={}", engine_addr);
182180
s3gc_owner = OwnerManager::createS3GCOwner(
183181
context,
184-
/*id*/ kvstore->getProxyConfigSummay().engine_addr,
182+
/*id*/ engine_addr,
185183
etcd_client);
186184
}
187185
else if (!raftproxy_config.pd_addrs.empty())

dbms/src/Storages/KVStore/tests/gtest_raftstore_v2.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,8 @@ try
871871
{
872872
auto & ctx = TiFlashTestEnv::getGlobalContext();
873873
proxy_instance->cluster_ver = RaftstoreVer::V2;
874-
proxy_instance->proxy_config_string = R"({"raftstore":{"snap-handle-pool-size":3},"server":{"engine-addr":"123"}})";
874+
proxy_instance->proxy_config_string
875+
= R"({"raftstore":{"snap-handle-pool-size":3, "apply-low-priority-pool-size":8},"server":{"engine-addr":"123"}})";
875876
KVStore & kvs = getKVS();
876877
kvs.fetchProxyConfig(proxy_helper.get());
877878
ASSERT_NE(proxy_helper->sst_reader_interfaces.fn_key, nullptr);
@@ -912,7 +913,7 @@ try
912913
MockSSTReader::getMockSSTData().clear();
913914
DB::FailPointHelper::enablePauseFailPoint(DB::FailPoints::pause_before_prehandle_subtask, 100);
914915
std::vector<std::thread> ths;
915-
auto runId = [&](size_t ths_id) {
916+
auto run_id = [&](size_t ths_id) {
916917
auto [value_write, value_default] = proxy_instance->generateTiKVKeyValue(111, 999);
917918
MockSSTGenerator default_cf{region_ids[ths_id], table_id, ColumnFamilyType::Default};
918919
for (HandleID h = table_limits[ths_id]; h < table_limits[ths_id + 1]; h++)
@@ -942,13 +943,13 @@ try
942943
std::nullopt);
943944
}
944945
};
945-
ths.push_back(std::thread(runId, 0));
946+
ths.push_back(std::thread(run_id, 0));
946947
std::this_thread::sleep_for(std::chrono::milliseconds(300));
947948

948949
ASSERT_EQ(kvs.getOngoingPrehandleTaskCount(), 1);
949950
for (size_t ths_id = 1; ths_id < region_ids.size(); ths_id++)
950951
{
951-
ths.push_back(std::thread(runId, ths_id));
952+
ths.push_back(std::thread(run_id, ths_id));
952953
}
953954

954955
auto loop = 0;

0 commit comments

Comments
 (0)