Skip to content

Commit 021a2b2

Browse files
ordishsclaude
andauthored
feat(pruner): add incremental Prometheus metrics for real-time monitoring (#492)
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent f9e3a6d commit 021a2b2

File tree

2 files changed

+36
-5
lines changed

2 files changed

+36
-5
lines changed

services/pruner/blob_deletion_worker.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ func (s *Server) processBlobDeletionsAtHeight(height uint32, blockHash *chainhas
130130
} else {
131131
completedIDs = append(completedIDs, deletion.Id)
132132
successCount++
133+
blobDeletionProcessedTotal.Inc()
133134
}
134135
}
135136

@@ -148,7 +149,6 @@ func (s *Server) processBlobDeletionsAtHeight(height uint32, blockHash *chainhas
148149
duration := time.Since(batchStartTime).Round(time.Second)
149150
s.logger.Infof("[pruner][%s:%d] blob deletion: batch complete - %s succeeded, %s failed (took %s)",
150151
hashStr, height, humanize.Comma(successCount), humanize.Comma(failCount), duration)
151-
blobDeletionProcessedTotal.Add(float64(successCount))
152152

153153
// Notify observer if registered (for testing)
154154
if s.blobDeletionObserver != nil {

stores/utxo/aerospike/pruner/pruner_service.go

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,13 @@ var _ pruner.Service = (*Service)(nil)
3939
var IndexName, _ = gocore.Config().Get("pruner_IndexName", "pruner_dah_index")
4040

4141
var (
42-
prometheusMetricsInitOnce sync.Once
43-
prometheusUtxoCleanupBatch prometheus.Histogram
44-
prometheusUtxoRecordErrors prometheus.Counter
45-
prometheusUtxoBatchQueryError prometheus.Counter
42+
prometheusMetricsInitOnce sync.Once
43+
prometheusUtxoCleanupBatch prometheus.Histogram
44+
prometheusUtxoRecordErrors prometheus.Counter
45+
prometheusUtxoBatchQueryError prometheus.Counter
46+
prometheusUtxoRecordsDeleted prometheus.Counter
47+
prometheusUtxoParentsUpdated prometheus.Counter
48+
prometheusUtxoExternalFilesDeleted prometheus.Counter
4649
)
4750

4851
// Options contains configuration options for the cleanup service
@@ -173,6 +176,18 @@ func NewService(settings *settings.Settings, opts Options) (*Service, error) {
173176
Name: "utxo_pruner_batch_query_errors_total",
174177
Help: "Total number of Aerospike batch query errors during child verification",
175178
})
179+
prometheusUtxoRecordsDeleted = promauto.NewCounter(prometheus.CounterOpts{
180+
Name: "utxo_pruner_records_deleted_total",
181+
Help: "Total number of UTXO records deleted during pruning (updated incrementally)",
182+
})
183+
prometheusUtxoParentsUpdated = promauto.NewCounter(prometheus.CounterOpts{
184+
Name: "utxo_pruner_parents_updated_total",
185+
Help: "Total number of parent records updated during pruning (updated incrementally)",
186+
})
187+
prometheusUtxoExternalFilesDeleted = promauto.NewCounter(prometheus.CounterOpts{
188+
Name: "utxo_pruner_external_files_deleted_total",
189+
Help: "Total number of external files deleted during pruning (updated incrementally)",
190+
})
176191
})
177192

178193
// Use the configured query policy from settings (configured via aerospike_queryPolicy URL)
@@ -455,6 +470,12 @@ func (s *Service) partitionWorker(
455470
totalProcessed += int64(processed)
456471
totalSkipped += int64(skipped)
457472
mu.Unlock()
473+
474+
// Update Prometheus counter incrementally for real-time rate calculation
475+
if processed > 0 {
476+
prometheusUtxoRecordsDeleted.Add(float64(processed))
477+
}
478+
458479
return nil
459480
})
460481
}
@@ -1331,6 +1352,11 @@ func (s *Service) executeBatchParentUpdates(ctx context.Context, updates map[str
13311352
return errors.NewStorageError("%d parent update operations failed", errorCount)
13321353
}
13331354

1355+
// Update metric with successful parent updates
1356+
if successCount > 0 {
1357+
prometheusUtxoParentsUpdated.Add(float64(successCount))
1358+
}
1359+
13341360
return nil
13351361
}
13361362

@@ -1453,6 +1479,11 @@ func (s *Service) executeBatchExternalFileDeletions(ctx context.Context, files [
14531479

14541480
s.logger.Debugf("External file deletion batch - success: %d, already deleted: %d, errors: %d", successCount, alreadyDeletedCount, errorCount)
14551481

1482+
// Update metric with successful deletions
1483+
if successCount > 0 {
1484+
prometheusUtxoExternalFilesDeleted.Add(float64(successCount))
1485+
}
1486+
14561487
// Return error if any deletions failed
14571488
if errorCount > 0 {
14581489
return errors.NewStorageError("%d external file deletions failed", errorCount)

0 commit comments

Comments
 (0)