Skip to content

Commit 8207068

Browse files
authored
refactor(pruner): remove persisted height coordination (#494)
1 parent 5601c0e commit 8207068

File tree

12 files changed

+41
-571
lines changed

12 files changed

+41
-571
lines changed

services/pruner/blob_deletion_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ func TestBlobDeletionSchedulingAndExecution(t *testing.T) {
217217
// Set up mock settings
218218
server.settings = &settings.Settings{
219219
Pruner: settings.PrunerSettings{
220-
BlobDeletionEnabled: true,
220+
SkipBlobDeletion: false,
221221
BlobDeletionSafetyWindow: 0,
222222
BlobDeletionBatchSize: 100,
223223
BlobDeletionMaxRetries: 3,
@@ -377,7 +377,7 @@ func TestBlobDeletionIdempotency(t *testing.T) {
377377

378378
server.settings = &settings.Settings{
379379
Pruner: settings.PrunerSettings{
380-
BlobDeletionEnabled: true,
380+
SkipBlobDeletion: false,
381381
BlobDeletionBatchSize: 100,
382382
BlobDeletionMaxRetries: 3,
383383
},

services/pruner/blob_deletion_worker.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ func (s *Server) blobDeletionWorker() {
3939
}
4040

4141
func (s *Server) processBlobDeletionsAtHeight(height uint32, blockHash *chainhash.Hash) {
42-
if !s.settings.Pruner.BlobDeletionEnabled {
42+
if s.settings.Pruner.SkipBlobDeletion {
4343
return
4444
}
4545

services/pruner/server.go

Lines changed: 8 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,6 @@ func (s *Server) Init(ctx context.Context) error {
127127
return errors.NewServiceError("pruner service not available from UTXO store")
128128
}
129129

130-
// Set persisted height getter for block persister coordination
131-
s.prunerService.SetPersistedHeightGetter(s.GetLastPersistedHeight)
132-
133130
// Validate block trigger mode
134131
blockTrigger := s.settings.Pruner.BlockTrigger
135132
if blockTrigger != settings.PrunerBlockTriggerOnBlockPersisted && blockTrigger != settings.PrunerBlockTriggerOnBlockMined {
@@ -234,31 +231,18 @@ func (s *Server) Init(ctx context.Context) error {
234231
continue
235232
}
236233

237-
// Get height from block assembly state
238-
state, err := s.blockAssemblyClient.GetBlockAssemblyState(ctx)
234+
// Get height from blockchain client using the block hash
235+
// This is the authoritative source and avoids race conditions with Block Assembly state updates
236+
header, meta, err := s.blockchainClient.GetBlockHeader(ctx, blockHash)
239237
if err != nil {
240-
s.logger.Debugf("Failed to get block assembly state on Block notification: %v", err)
238+
s.logger.Debugf("Failed to get block header for Block notification hash %s: %v", blockHash, err)
241239
continue
242240
}
243-
244-
// If block assembly hasn't initialized yet (height=0), get height from blockchain
245-
// This handles the edge case where Block notifications arrive before block assembly
246-
// has processed its first block
247-
var height uint32
248-
if state.CurrentHeight == 0 {
249-
header, meta, err := s.blockchainClient.GetBlockHeader(ctx, blockHash)
250-
if err != nil {
251-
s.logger.Debugf("Failed to get block header for Block notification hash %s: %v", blockHash, err)
252-
continue
253-
}
254-
if header == nil || meta == nil {
255-
s.logger.Debugf("Block notification for hash %s has no header/meta", blockHash)
256-
continue
257-
}
258-
height = meta.Height
259-
} else {
260-
height = state.CurrentHeight
241+
if header == nil || meta == nil {
242+
s.logger.Debugf("Block notification for hash %s has no header/meta", blockHash)
243+
continue
261244
}
245+
height := meta.Height
262246

263247
// Queue pruning request immediately - processor will wait for mined_set if block assembly is running
264248
if height > s.lastProcessedHeight.Load() {
@@ -426,13 +410,6 @@ func (s *Server) HealthGRPC(ctx context.Context, _ *pruner_api.EmptyMessage) (*p
426410
}, errors.WrapGRPC(err)
427411
}
428412

429-
// GetLastPersistedHeight returns the last known block height that has been persisted
430-
// by the block persister service. This is used to coordinate cleanup operations to
431-
// avoid deleting data that the block persister still needs.
432-
func (s *Server) GetLastPersistedHeight() uint32 {
433-
return s.lastPersistedHeight.Load()
434-
}
435-
436413
// SetBlobDeletionObserver sets an optional observer for blob deletion completion events.
437414
// This is primarily used for testing to synchronize test execution with deletion processing.
438415
func (s *Server) SetBlobDeletionObserver(observer BlobDeletionObserver) {

services/pruner/worker.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ func (s *Server) prunerProcessor(ctx context.Context) {
240240

241241
// Phase 1: Preserve parents of old unmined transactions
242242
// This must run before Phase 2 to protect parents from deletion
243-
if s.utxoStore != nil {
243+
if s.utxoStore != nil && !s.settings.Pruner.SkipPreserveParents {
244244
hashStr := "<unknown>"
245245
if latestReq.BlockHash != nil {
246246
hashStr = latestReq.BlockHash.String()
@@ -260,6 +260,12 @@ func (s *Server) prunerProcessor(ctx context.Context) {
260260
prunerUpdatingParents.Add(float64(count))
261261
}
262262
}
263+
} else if s.settings.Pruner.SkipPreserveParents {
264+
hashStr := "<unknown>"
265+
if latestReq.BlockHash != nil {
266+
hashStr = latestReq.BlockHash.String()
267+
}
268+
s.logger.Infof("[pruner][%s:%d] phase 1: skipped (pruner_skipPreserveParents=true)", hashStr, latestReq.Height)
263269
}
264270

265271
// Phase 2: DAH pruning (deletion)

settings/pruner_settings.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@ type PrunerSettings struct {
1818
UTXOProgressLogInterval time.Duration `key:"pruner_utxoProgressLogInterval" desc:"Interval for logging pruning progress" default:"30s" category:"Pruner" usage:"How often to log progress during UTXO pruning" type:"duration" longdesc:"### Purpose\nSets how often to log progress during UTXO pruning operations.\n\n### How It Works\nDuring long-running pruning operations, progress is logged at this interval to help operators monitor the operation without flooding logs.\n\n### Recommendations\n- **30s** (default) - Regular updates without excessive logging\n- Decrease for more frequent updates during debugging\n- Increase for quieter logs in stable environments"`
1919
UTXOPartitionQueries int `key:"pruner_utxoPartitionQueries" desc:"Number of parallel Aerospike partition queries" default:"0" category:"Pruner" usage:"0 = auto-detect based on CPU cores" type:"int" longdesc:"### Purpose\nControls the number of parallel partition workers when scanning Aerospike for prunable records.\n\n### How It Works\n- Aerospike keyspace is divided into 4096 partitions\n- This setting controls how many workers scan partitions in parallel\n- Each worker processes a range of partitions independently\n- Partition-based scanning achieves up to 100x performance improvement over sequential queries\n\n### Values\n- **0** (default) - Auto-detect based on CPU cores and Aerospike query-threads-limit\n- **N > 0** - Fixed number of partition workers (capped at 4096)\n\n### Trade-offs\n| Setting | Benefit | Drawback |\n|---------|---------|----------|\n| Higher | Faster scanning | More Aerospike load, more connections |\n| Lower | Reduced cluster pressure | Slower pruning |\n\n### Recommendations\n- Use **0** for automatic scaling based on available resources\n- Set explicitly to match your Aerospike cluster's capacity"`
2020
UTXOSetTTL bool `key:"pruner_utxoSetTTL" desc:"Use TTL expiration instead of hard delete" default:"false" category:"Pruner" usage:"Set record TTL instead of deleting" type:"bool" longdesc:"### Purpose\nControls whether the pruner uses TTL-based expiration instead of hard deletes for UTXO records.\n\n### How It Works\n- When enabled, instead of deleting records directly, the pruner sets the Aerospike record TTL to 1 second\n- Aerospike's nsup (namespace supervisor) thread handles the actual expiration in the background\n- This produces optimized tombstones and reduces write amplification compared to hard deletes\n- The record becomes inaccessible within 1 second and is fully cleaned up by nsup\n\n### Trade-offs\n| Setting | Benefit | Drawback |\n|---------|---------|----------|\n| true | Reduced write amplification, optimized tombstones | Records persist ~1 second longer |\n| false (default) | Immediate deletion | Higher write amplification, standard tombstones |\n\n### Recommendations\n- **false** (default) - Standard hard delete behavior\n- **true** - Recommended for high-throughput environments where nsup-managed expiration is preferred"`
21-
BlobDeletionEnabled bool `key:"pruner_blobDeletionEnabled" desc:"Enable blob deletion scheduling" default:"true" category:"Pruner" usage:"Enable deletion of expired blobs" type:"bool" longdesc:"### Purpose\nEnables scheduled deletion of blob store data (transactions and subtrees) based on Delete-At-Height (DAH) values.\n\n### How It Works\n- When enabled, pruner schedules deletions for blobs that have reached their DAH height\n- Works with all blob store types (file, S3, memory, etc.)\n- Deletion is triggered after BlobDeletionSafetyWindow blocks past the persister height\n\n### Recommendations\n- **true** (default) - Enable for normal operations\n- **false** - Disable if you want to retain all blob data indefinitely"`
21+
SkipBlobDeletion bool `key:"pruner_skipBlobDeletion" desc:"Skip blob deletion scheduling" default:"false" category:"Pruner" usage:"Skip deletion of expired blobs" type:"bool" longdesc:"### Purpose\nSkips scheduled deletion of blob store data (transactions and subtrees) based on Delete-At-Height (DAH) values.\n\n### How It Works\n- When enabled, pruner skips deletions for blobs that have reached their DAH height\n- Works with all blob store types (file, S3, memory, etc.)\n- Deletion would normally be triggered after BlobDeletionSafetyWindow blocks past the persister height\n\n### Recommendations\n- **false** (default) - Enable blob deletion for normal operations\n- **true** - Skip blob deletion if you want to retain all blob data indefinitely"`
2222
BlobDeletionSafetyWindow uint32 `key:"pruner_blobDeletionSafetyWindow" desc:"Blocks to wait after persister before deletion" default:"10" category:"Pruner" usage:"Safety margin for blob deletion" type:"uint32" longdesc:"### Purpose\nNumber of blocks to wait after Block Persister height before deleting blobs.\n\n### How It Works\nProvides a safety margin to ensure persisted blocks are stable before deleting their associated blobs. Prevents deletion of data that might be needed during reorg scenarios.\n\n### Recommendations\n- **10** (default) - Good balance between storage and safety\n- Increase for deeper reorg protection\n- Decrease to free storage faster (not recommended)"`
2323
BlobDeletionBatchSize int `key:"pruner_blobDeletionBatchSize" desc:"Maximum deletions per trigger" default:"1000" category:"Pruner" usage:"Limits deletions per pruning cycle" type:"int" longdesc:"### Purpose\nMaximum number of blob deletions to process per pruning trigger.\n\n### How It Works\nLimits the number of deletions per cycle to prevent overwhelming the blob store and database. Remaining deletions are processed in subsequent triggers.\n\n### Recommendations\n- **1000** (default) - Good throughput without excessive load\n- Increase for faster cleanup if system can handle it\n- Decrease to reduce deletion load"`
2424
BlobDeletionMaxRetries int `key:"pruner_blobDeletionMaxRetries" desc:"Maximum retry attempts for failed deletions" default:"3" category:"Pruner" usage:"Retries for transient failures" type:"int" longdesc:"### Purpose\nMaximum number of retry attempts for failed blob deletions.\n\n### How It Works\nWhen a blob deletion fails (network error, temporary unavailability), the pruner retries up to this many times before logging an error.\n\n### Recommendations\n- **3** (default) - Good balance for transient failures\n- Increase for unreliable storage backends\n- Decrease for faster failure detection"`
25+
SkipPreserveParents bool `key:"pruner_skipPreserveParents" desc:"Skip Phase 1: preserve parents of unmined transactions" default:"false" category:"Pruner" usage:"Skip parent preservation phase" type:"bool" longdesc:"### Purpose\nSkips Phase 1 of pruning which preserves parent transactions of old unmined transactions.\n\n### How It Works\nWhen enabled, the pruner skips calling PreserveParentsOfOldUnminedTransactions.\nThis means parent transactions will not be protected from deletion even if they have unmined children.\n\n### Trade-offs\n| Setting | Benefit | Drawback |\n|---------|---------|----------|\n| false (default) | Parents preserved for unmined tx resubmission | Additional processing overhead |\n| true | Faster pruning, reduced processing | Parents may be deleted, breaking unmined tx chains |\n\n### Recommendations\n- **false** (default) - Normal operation, preserves parent transactions\n- **true** - Skip parent preservation if you don't need to resubmit unmined transactions"`
2526
SkipParentUpdates bool `key:"pruner_skipParentUpdates" desc:"Skip parent update operations" default:"false" category:"Pruner" usage:"Skip parent update operations" type:"bool" longdesc:"### Purpose\nSkips parent update operations during pruning.\n\n### How It Works\nWhen enabled, the pruner skips parent update operations during pruning.\n\n### Recommendations\n- **false** (default) - Normal operation\n- **true** - Skip parent update operations"`
2627
SkipDeletions bool `key:"pruner_skipDeletions" desc:"Skip deletion operations" default:"false" category:"Pruner" usage:"Skip deletion operations" type:"bool" longdesc:"### Purpose\nSkips deletion operations during pruning.\n\n### How It Works\nWhen enabled, the pruner skips deletion operations during pruning.\n\n### Recommendations\n- **false** (default) - Normal operation\n- **true** - Skip deletion operations"`
2728
}

settings/settings.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,10 +503,11 @@ func NewSettings(alternativeContext ...string) *Settings {
503503
UTXOProgressLogInterval: getDuration("pruner_utxoProgressLogInterval", 30*time.Second, alternativeContext...), // Progress every 30s
504504
UTXOPartitionQueries: getInt("pruner_utxoPartitionQueries", 0, alternativeContext...), // 0 = auto-detect based on CPU cores
505505
UTXOSetTTL: getBool("pruner_utxoSetTTL", false, alternativeContext...), // Use TTL instead of delete (false = hard delete)
506-
BlobDeletionEnabled: getBool("pruner_blobDeletionEnabled", true, alternativeContext...), // Enable blob deletion by default
506+
SkipBlobDeletion: getBool("pruner_skipBlobDeletion", false, alternativeContext...), // Skip blob deletion disabled by default (deletion enabled)
507507
BlobDeletionSafetyWindow: getUint32("pruner_blobDeletionSafetyWindow", 10, alternativeContext...), // Wait 10 blocks after persister
508508
BlobDeletionBatchSize: getInt("pruner_blobDeletionBatchSize", 1000, alternativeContext...), // Process 1000 deletions per batch
509509
BlobDeletionMaxRetries: getInt("pruner_blobDeletionMaxRetries", 3, alternativeContext...), // Retry failed deletions up to 3 times
510+
SkipPreserveParents: getBool("pruner_skipPreserveParents", false, alternativeContext...), // Skip Phase 1: preserve parents
510511
SkipParentUpdates: getBool("pruner_skipParentUpdates", false, alternativeContext...), // Skip parent updates for performance
511512
SkipDeletions: getBool("pruner_skipDeletions", false, alternativeContext...), // Skip deletions for performance
512513
},

0 commit comments

Comments
 (0)