Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions anchor/network/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,26 @@ use metrics::*;
pub static PEERS_CONNECTED: LazyLock<Result<IntGauge>> = LazyLock::new(|| {
try_create_int_gauge("libp2p_peers", "Count of libp2p peers currently connected")
});

pub static HANDSHAKE_SUCCESSFUL: LazyLock<Result<IntCounter>> = LazyLock::new(|| {
try_create_int_counter(
"libp2p_handshake_successful_total",
"Total count of successful handshakes",
)
});

pub static HANDSHAKE_FAILED: LazyLock<Result<IntCounterVec>> = LazyLock::new(|| {
try_create_int_counter_vec(
"libp2p_handshake_failed_total",
"Total count of failed handshakes by reason",
&["reason"],
)
});

pub static HANDSHAKE_SUBNET_MATCHES: LazyLock<Result<IntGaugeVec>> = LazyLock::new(|| {
try_create_int_gauge_vec(
"libp2p_handshake_subnet_matches",
"Count of successful handshakes by number of matching subnets",
&["match_count"],
)
});
86 changes: 83 additions & 3 deletions anchor/network/src/network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,26 @@ use crate::{

const MAX_TRANSMIT_SIZE_BYTES: usize = 5_000_000;

/// Count the number of matching subnet bits between two hex-encoded subnet strings
fn count_matching_subnets(our_subnets: &str, their_subnets: &str) -> usize {
// Decode both subnet strings
let our_bytes = match hex::decode(our_subnets) {
Ok(bytes) => bytes,
Err(_) => return 0,
};
let their_bytes = match hex::decode(their_subnets) {
Ok(bytes) => bytes,
Err(_) => return 0,
};

// Count matching bits using bitwise AND
our_bytes
.iter()
.zip(their_bytes.iter())
.map(|(a, b)| (a & b).count_ones() as usize)
.sum()
}

#[derive(Debug, Error)]
pub enum NetworkError {
#[error("Unable to listen on address {address}: {source}")]
Expand Down Expand Up @@ -551,11 +571,71 @@ impl<R: MessageReceiver> Network<R> {
peer_id,
their_info,
}) => {
debug!(%peer_id, ?their_info, "Handshake completed");
// Update peer store with their_info
// Record successful handshake
if let Ok(counter) = crate::metrics::HANDSHAKE_SUCCESSFUL.as_ref() {
counter.inc();
}

// Count and record matching subnets
if let (Some(our_metadata), Some(their_metadata)) =
(&self.node_info.metadata, &their_info.metadata)
{
let matching_count =
count_matching_subnets(&our_metadata.subnets, &their_metadata.subnets);

debug!(
%peer_id,
our_subnets = %our_metadata.subnets,
their_subnets = %their_metadata.subnets,
matching_subnets = matching_count,
"Handshake completed"
);

// Record subnet match count
if let Ok(gauge_vec) = crate::metrics::HANDSHAKE_SUBNET_MATCHES.as_ref() {
let label = if matching_count == 0 {
"0"
} else {
// For 1+ use the actual count
&matching_count.to_string()
};
if let Ok(gauge) = gauge_vec.get_metric_with_label_values(&[label]) {
gauge.inc();
}
}
} else {
debug!(%peer_id, ?their_info, "Handshake completed");
}
}
Err(handshake::Failed { peer_id, error }) => {
debug!(%peer_id, ?error, "Handshake failed");
// Determine failure reason for metrics
let failure_reason = match error.as_ref() {
handshake::Error::NetworkMismatch { .. } => "network_mismatch",
handshake::Error::NodeInfo(_) => "nodeinfo_error",
handshake::Error::Inbound(_) => "inbound_failure",
handshake::Error::Outbound(outbound_err) => match outbound_err {
libp2p::request_response::OutboundFailure::DialFailure => {
"outbound_dial_failure"
}
libp2p::request_response::OutboundFailure::Timeout => "outbound_timeout",
libp2p::request_response::OutboundFailure::ConnectionClosed => {
"outbound_connection_closed"
}
libp2p::request_response::OutboundFailure::UnsupportedProtocols => {
"outbound_unsupported_protocols"
}
libp2p::request_response::OutboundFailure::Io(_) => "outbound_io_error",
},
};

// Record failed handshake with reason
if let Ok(counter_vec) = crate::metrics::HANDSHAKE_FAILED.as_ref()
&& let Ok(counter) = counter_vec.get_metric_with_label_values(&[failure_reason])
{
counter.inc();
}

debug!(%peer_id, ?error, reason = failure_reason, "Handshake failed");

// Disconnect the peer on handshake failure
self.disconnect_peer(&peer_id, "Handshake failed");
Expand Down
Loading