Skip to content

Commit 62b07d2

Browse files
*: Downgrade some logging level (#10603) (#10604)
close #10574 * Downgrade some logging level from ERROR to WARN * Downgrade S3 returning 404 http response code cause it is expected when using HEAD to check whether an object exist or not Signed-off-by: JaySon-Huang <tshent@qq.com> Co-authored-by: JaySon-Huang <tshent@qq.com>
1 parent 3bd94e2 commit 62b07d2

File tree

9 files changed

+58
-39
lines changed

9 files changed

+58
-39
lines changed

dbms/src/Common/ProfileEvents.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@
3535
M(ContextLock) \
3636
M(CreatedHTTPConnections) \
3737
M(DNSError) \
38-
M(S3ReadMicroseconds) \
39-
M(S3WriteMicroseconds) \
4038
M(S3ReadRequestsCount) \
4139
M(S3WriteRequestsCount) \
4240
M(S3ReadRequestsErrors) \
@@ -45,6 +43,8 @@
4543
M(S3WriteRequestsThrottling) \
4644
M(S3ReadRequestsRedirects) \
4745
M(S3WriteRequestsRedirects) \
46+
M(S3ReadRequestsNotFound) \
47+
M(S3WriteRequestsNotFound) \
4848
\
4949
M(RWLockAcquiredReadLocks) \
5050
M(RWLockAcquiredWriteLocks) \

dbms/src/Flash/ResourceControl/LocalAdmissionController.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ void LocalAdmissionController::requestGACLoop()
491491
}
492492
catch (...)
493493
{
494-
LOG_ERROR(
494+
LOG_WARNING(
495495
log,
496496
"doRequestGAC got error: {}, retry {} sec later",
497497
getCurrentExceptionMessage(false),

dbms/src/Storages/KVStore/tests/gtest_kvstore_fast_add_peer.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -788,8 +788,10 @@ try
788788
}
789789
CATCH
790790

791+
// FIXME: Disabled because the flaky test result errors in CI.
792+
// And we don't plan to support enable FAP in short time.
791793
// Test cancel and regular snapshot
792-
TEST_F(RegionKVStoreTestFAP, Cancel4)
794+
TEST_F(RegionKVStoreTestFAP, DISABLED_Cancel4)
793795
try
794796
{
795797
using namespace std::chrono_literals;
@@ -856,7 +858,7 @@ try
856858
CATCH
857859

858860
// Test cancel when building segments
859-
TEST_F(RegionKVStoreTestFAP, Cancel5)
861+
TEST_F(RegionKVStoreTestFAP, DISABLED_Cancel5)
860862
try
861863
{
862864
auto mock_data = prepareForRestart(FAPTestOpt{.second_region = true});

dbms/src/Storages/S3/FileCache.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,7 @@ void FileCache::downloadImpl(const String & s3_key, FileSegmentPtr & file_seg)
733733
sw.elapsedMilliseconds());
734734
}
735735

736-
void FileCache::download(const String & s3_key, FileSegmentPtr & file_seg)
736+
void FileCache::bgDownloadExecutor(const String & s3_key, FileSegmentPtr & file_seg)
737737
{
738738
try
739739
{
@@ -742,7 +742,8 @@ void FileCache::download(const String & s3_key, FileSegmentPtr & file_seg)
742742
}
743743
catch (...)
744744
{
745-
tryLogCurrentException(log, fmt::format("Download s3_key={} failed", s3_key));
745+
// ignore the exception here, and log as warning.
746+
tryLogCurrentWarningException(log, fmt::format("Download s3_key={} failed", s3_key));
746747
}
747748

748749
if (!file_seg->isReadyToRead())
@@ -774,7 +775,7 @@ void FileCache::bgDownload(const String & s3_key, FileSegmentPtr & file_seg)
774775
bg_downloading_count.load(std::memory_order_relaxed),
775776
s3_key);
776777
S3FileCachePool::get().scheduleOrThrowOnError(
777-
[this, s3_key = s3_key, file_seg = file_seg]() mutable { download(s3_key, file_seg); });
778+
[this, s3_key = s3_key, file_seg = file_seg]() mutable { bgDownloadExecutor(s3_key, file_seg); });
778779
}
779780

780781
void FileCache::fgDownload(const String & s3_key, FileSegmentPtr & file_seg)

dbms/src/Storages/S3/FileCache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ class FileCache
265265

266266
void bgDownload(const String & s3_key, FileSegmentPtr & file_seg);
267267
void fgDownload(const String & s3_key, FileSegmentPtr & file_seg);
268-
void download(const String & s3_key, FileSegmentPtr & file_seg);
268+
void bgDownloadExecutor(const String & s3_key, FileSegmentPtr & file_seg);
269269
void downloadImpl(const String & s3_key, FileSegmentPtr & file_seg);
270270

271271
static String toTemporaryFilename(const String & fname);

dbms/src/Storages/S3/PocoHTTPClient.cpp

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,25 @@
3838

3939
#include <algorithm>
4040
#include <functional>
41+
#include <magic_enum.hpp>
4142
#include <utility>
4243

4344
static const int SUCCESS_RESPONSE_MIN = 200;
4445
static const int SUCCESS_RESPONSE_MAX = 299;
4546

4647
namespace ProfileEvents
4748
{
48-
extern const Event S3ReadMicroseconds;
4949
extern const Event S3ReadRequestsCount;
5050
extern const Event S3ReadRequestsErrors;
5151
extern const Event S3ReadRequestsThrottling;
5252
extern const Event S3ReadRequestsRedirects;
53+
extern const Event S3ReadRequestsNotFound;
5354

54-
extern const Event S3WriteMicroseconds;
5555
extern const Event S3WriteRequestsCount;
5656
extern const Event S3WriteRequestsErrors;
5757
extern const Event S3WriteRequestsThrottling;
5858
extern const Event S3WriteRequestsRedirects;
59+
extern const Event S3WriteRequestsNotFound;
5960

6061
extern const Event S3GetRequestThrottlerCount;
6162
extern const Event S3GetRequestThrottlerSleepMicroseconds;
@@ -175,14 +176,14 @@ PocoHTTPClient::S3MetricKind PocoHTTPClient::getMetricKind(const Aws::Http::Http
175176

176177
void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount)
177178
{
178-
const ProfileEvents::Event events_map[static_cast<size_t>(S3MetricType::EnumSize)]
179-
[static_cast<size_t>(S3MetricKind::EnumSize)]
179+
const ProfileEvents::Event events_map[magic_enum::enum_count<S3MetricType>()]
180+
[magic_enum::enum_count<S3MetricKind>()]
180181
= {
181-
{ProfileEvents::S3ReadMicroseconds, ProfileEvents::S3WriteMicroseconds},
182182
{ProfileEvents::S3ReadRequestsCount, ProfileEvents::S3WriteRequestsCount},
183183
{ProfileEvents::S3ReadRequestsErrors, ProfileEvents::S3WriteRequestsErrors},
184184
{ProfileEvents::S3ReadRequestsThrottling, ProfileEvents::S3WriteRequestsThrottling},
185185
{ProfileEvents::S3ReadRequestsRedirects, ProfileEvents::S3WriteRequestsRedirects},
186+
{ProfileEvents::S3ReadRequestsNotFound, ProfileEvents::S3WriteRequestsNotFound},
186187
};
187188

188189
S3MetricKind kind = getMetricKind(request);
@@ -272,6 +273,7 @@ void PocoHTTPClient::makeRequestInternal(
272273
}
273274
}
274275

276+
// Return redirect uri if redirect happens, otherwise return nullopt
275277
template <typename Session>
276278
std::optional<String> PocoHTTPClient::makeRequestOnce(
277279
const Poco::URI & target_uri,
@@ -342,8 +344,6 @@ std::optional<String> PocoHTTPClient::makeRequestOnce(
342344
for (const auto & [header_name, header_value] : extra_headers)
343345
poco_request.set(boost::algorithm::to_lower_copy(header_name), header_value);
344346

345-
Poco::Net::HTTPResponse poco_response;
346-
347347
Stopwatch watch;
348348

349349
Poco::Net::HTTPSendMetrics metrics;
@@ -372,28 +372,38 @@ std::optional<String> PocoHTTPClient::makeRequestOnce(
372372

373373
if (enable_s3_requests_logging)
374374
LOG_DEBUG(tracing_logger, "Receiving response...");
375+
Poco::Net::HTTPResponse poco_response;
375376
auto & response_body_stream = session->receiveResponse(poco_response);
376377
GET_METRIC(tiflash_storage_s3_http_request_seconds, type_response)
377378
.Observe(watch.elapsedMillisecondsFromLastTime() / 1000.0);
378379

379380
int status_code = static_cast<int>(poco_response.getStatus());
380-
if (status_code >= SUCCESS_RESPONSE_MIN && status_code <= SUCCESS_RESPONSE_MAX)
381+
if (status_code >= SUCCESS_RESPONSE_MIN && status_code <= SUCCESS_RESPONSE_MAX) // 2xx
381382
{
382383
if (enable_s3_requests_logging)
383384
LOG_DEBUG(tracing_logger, "Response status: {}, {}", status_code, poco_response.getReason());
384385
}
386+
else if (status_code == Poco::Net::HTTPResponse::HTTP_NOT_FOUND) // 404
387+
{
388+
/// 404 Not Found is a valid response for some requests (e.g., GET/HEAD of non-existing object)
389+
/// so we just log it at DEBUG level when `enable_s3_requests_logging` is on.
390+
if (enable_s3_requests_logging)
391+
LOG_DEBUG(tracing_logger, "Response status: {}, {}", status_code, poco_response.getReason());
392+
}
385393
else
386394
{
387395
/// Error statuses are more important so we show them even if `enable_s3_requests_logging == false`.
388396
LOG_INFO(tracing_logger, "Response status: {}, {}", status_code, poco_response.getReason());
389397
}
398+
399+
// handle redirect status codes
390400
if (poco_response.getStatus() == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT
391401
|| poco_response.getStatus() == Poco::Net::HTTPResponse::HTTP_FOUND)
392402
{
393403
auto location = poco_response.get("location");
394404
remote_host_filter->checkURL(Poco::URI(location));
395405
if (enable_s3_requests_logging)
396-
LOG_DEBUG(tracing_logger, "Redirecting request to new location: {}", location);
406+
LOG_INFO(tracing_logger, "Redirecting request to new location: {}", location);
397407

398408
addMetric(request, S3MetricType::Redirects);
399409

@@ -420,7 +430,7 @@ std::optional<String> PocoHTTPClient::makeRequestOnce(
420430
response->AddHeader(header_name, header_value);
421431
}
422432

423-
/// Request is successful but for some special requests we can have actual error message in body
433+
/// Request is successful (2xx) but for some special requests we can have actual error message in body
424434
if (status_code >= SUCCESS_RESPONSE_MIN && status_code <= SUCCESS_RESPONSE_MAX
425435
&& checkRequestCanReturn2xxAndErrorInBody(request))
426436
{
@@ -456,10 +466,20 @@ std::optional<String> PocoHTTPClient::makeRequestOnce(
456466
}
457467
else
458468
{
459-
if (status_code == 429 || status_code == 503)
460-
{ // API throttling
469+
if (status_code == Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS
470+
|| status_code == Poco::Net::HTTPResponse::HTTP_SERVICE_UNAVAILABLE)
471+
{
472+
// API throttling from the server side
473+
// 429 Too Many Requests
474+
// 503 Service Unavailable
461475
addMetric(request, S3MetricType::Throttling);
462476
}
477+
else if (status_code == Poco::Net::HTTPResponse::HTTP_NOT_FOUND)
478+
{
479+
// Get/Head request to a non-existing object, returning 404 Not Found
480+
// is a valid response.
481+
addMetric(request, S3MetricType::ErrorNotFound);
482+
}
463483
else if (status_code >= 300)
464484
{
465485
addMetric(request, S3MetricType::Errors);

dbms/src/Storages/S3/PocoHTTPClient.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
#include <aws/core/http/standard/StandardHttpResponse.h>
2727

2828
#include <string>
29-
#include <vector>
3029

3130
namespace Aws::Http::Standard
3231
{
@@ -141,21 +140,17 @@ class PocoHTTPClient : public Aws::Http::HttpClient
141140

142141
enum class S3MetricType
143142
{
144-
Microseconds,
145143
Count,
146144
Errors,
147145
Throttling,
148146
Redirects,
149-
150-
EnumSize,
147+
ErrorNotFound,
151148
};
152149

153150
enum class S3MetricKind
154151
{
155152
Read,
156153
Write,
157-
158-
EnumSize,
159154
};
160155

161156
static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request);

dbms/src/Storages/StorageDisaggregatedRemote.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -288,28 +288,29 @@ void StorageDisaggregated::buildReadTaskForWriteNode(
288288
for (const auto & region_id : error.region_ids())
289289
retry_regions.insert(region_id);
290290

291-
LOG_INFO(
292-
log,
293-
"Received EstablishDisaggregated response with retryable error: {}, addr={} retry_regions={}",
291+
String error_msg = fmt::format(
292+
"Received EstablishDisaggTask response with retryable error: {}, addr={} retry_regions={}",
294293
error.msg(),
295294
batch_cop_task.store_addr,
296295
retry_regions);
296+
LOG_INFO(log, error_msg);
297297

298298
dropRegionCache(cluster->region_cache, req, std::move(retry_regions));
299299

300-
throw Exception(error.msg(), ErrorCodes::DISAGG_ESTABLISH_RETRYABLE_ERROR);
300+
throw Exception(error_msg, ErrorCodes::DISAGG_ESTABLISH_RETRYABLE_ERROR);
301301
}
302302
else if (resp.error().has_error_locked())
303303
{
304304
using namespace pingcap;
305305

306306
const auto & error = resp.error().error_locked();
307307

308-
LOG_INFO(
309-
log,
310-
"Received EstablishDisaggregated response with retryable error: {}, addr={}",
308+
String error_msg = fmt::format(
309+
"Received EstablishDisaggTask response with retryable error: {}, addr={} lock_info_size={}",
311310
error.msg(),
312-
batch_cop_task.store_addr);
311+
batch_cop_task.store_addr,
312+
error.locked().size());
313+
LOG_INFO(log, error_msg);
313314

314315
Stopwatch w_resolve_lock;
315316

@@ -337,7 +338,7 @@ void StorageDisaggregated::buildReadTaskForWriteNode(
337338
GET_METRIC(tiflash_disaggregated_breakdown_duration_seconds, type_resolve_lock)
338339
.Observe(w_resolve_lock.elapsedSeconds());
339340

340-
throw Exception(error.msg(), ErrorCodes::DISAGG_ESTABLISH_RETRYABLE_ERROR);
341+
throw Exception(error_msg, ErrorCodes::DISAGG_ESTABLISH_RETRYABLE_ERROR);
341342
}
342343
else
343344
{

dbms/src/TiDB/Schema/SchemaSyncService.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ void SchemaSyncService::addKeyspaceGCTasks()
109109
}
110110
catch (const Exception & e)
111111
{
112-
LOG_ERROR(
112+
LOG_WARNING(
113113
ks_log,
114114
"{}, keyspace={} failed by {} \n stack : {}",
115115
stage,
@@ -119,11 +119,11 @@ void SchemaSyncService::addKeyspaceGCTasks()
119119
}
120120
catch (const Poco::Exception & e)
121121
{
122-
LOG_ERROR(ks_log, "{}, keyspace={} failed by {}", stage, keyspace, e.displayText());
122+
LOG_WARNING(ks_log, "{}, keyspace={} failed by {}", stage, keyspace, e.displayText());
123123
}
124124
catch (const std::exception & e)
125125
{
126-
LOG_ERROR(ks_log, "{}, keyspace={} failed by {}", stage, keyspace, e.what());
126+
LOG_WARNING(ks_log, "{}, keyspace={} failed by {}", stage, keyspace, e.what());
127127
}
128128
return false;
129129
},

0 commit comments

Comments
 (0)