Skip to content
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 6 additions & 25 deletions beacon_node/network/src/sync/backfill_sync/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,6 @@ pub struct BackFillSync<T: BeaconChainTypes> {
/// Batches validated by this chain.
validated_batches: u64,

/// We keep track of peers that are participating in the backfill sync. Unlike RangeSync,
/// BackFillSync uses all synced peers to download the chain from. If BackFillSync fails, we don't
/// want to penalize all our synced peers, so we use this variable to keep track of peers that
/// have participated and only penalize these peers if backfill sync fails.
participating_peers: HashSet<PeerId>,

/// When a backfill sync fails, we keep track of whether a new fully synced peer has joined.
/// This signifies that we are able to attempt to restart a failed chain.
restart_failed_sync: bool,
Expand Down Expand Up @@ -186,7 +180,6 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
network_globals,
current_processing_batch: None,
validated_batches: 0,
participating_peers: HashSet::new(),
restart_failed_sync: false,
beacon_chain,
};
Expand Down Expand Up @@ -294,13 +287,10 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
/// A peer has disconnected.
/// If the peer has active batches, those are considered failed and re-requested.
#[must_use = "A failure here indicates the backfill sync has failed and the global sync state should be updated"]
pub fn peer_disconnected(&mut self, peer_id: &PeerId) -> Result<(), BackFillError> {
pub fn peer_disconnected(&mut self, _peer_id: &PeerId) -> Result<(), BackFillError> {
if matches!(self.state(), BackFillState::Failed) {
return Ok(());
}

// Remove the peer from the participation list
self.participating_peers.remove(peer_id);
Ok(())
}

Expand Down Expand Up @@ -440,9 +430,8 @@ impl<T: BeaconChainTypes> BackFillSync<T> {

// Set the state
self.set_state(BackFillState::Failed);
// Remove all batches and active requests and participating peers.
// Remove all batches and reset local sync state.
self.batches.clear();
self.participating_peers.clear();
self.restart_failed_sync = false;

// Reset all downloading and processing targets
Expand Down Expand Up @@ -620,29 +609,21 @@ impl<T: BeaconChainTypes> BackFillSync<T> {
imported_blocks,
penalty,
} => {
// Penalize the peer that provided the faulty batch.
network.report_peer(*peer, *penalty, "backfill_faulty_batch");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: we need to fix this for backfill sync and range sync to penalize the block peer or column(s) peer


match batch.processing_completed(BatchProcessingResult::FaultyFailure) {
Err(e) => {
// Batch was in the wrong state
self.fail_sync(BackFillError::BatchInvalidState(batch_id, e.0))
.map(|_| ProcessResult::Successful)
}
Ok(BatchOperationOutcome::Failed { blacklist: _ }) => {
// check that we have not exceeded the re-process retry counter
// If a batch has exceeded the invalid batch lookup attempts limit, it means
// that it is likely all peers are sending invalid batches
// repeatedly and are either malicious or faulty. We stop the backfill sync and
// report all synced peers that have participated.
warn!(
score_adjustment = %penalty,
batch_epoch = %batch_id,
"Backfill batch failed to download. Penalizing peers"
"Backfill batch failed to download after exceeding retry limit"
);

for peer in self.participating_peers.drain() {
// TODO(das): `participating_peers` only includes block peers. Should we
// penalize the custody column peers too?
network.report_peer(peer, *penalty, "backfill_batch_failed");
}
self.fail_sync(BackFillError::BatchProcessingFailed(batch_id))
.map(|_| ProcessResult::Successful)
}
Expand Down