Skip to content

Commit da9fad3

Browse files
committed
fix: retry holepunch on path validation failure
When path validation fails (e.g., after interface down/up), schedule a holepunch retry after 500ms instead of just closing the path. This handles cases where routing isn't fully established yet.
1 parent 8b8abc6 commit da9fad3

File tree

1 file changed

+65
-38
lines changed

1 file changed

+65
-38
lines changed

iroh/src/socket/remote_map/remote_state.rs

Lines changed: 65 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -960,51 +960,78 @@ impl RemoteStateActor {
960960
self.paths.abandoned_path(&addr);
961961
}
962962
}
963-
PathEvent::Closed { id, .. } | PathEvent::LocallyClosed { id, .. } => {
964-
let Some(path_remote) = conn_state.paths.get(&id).cloned() else {
965-
debug!("path not in path_id_map");
966-
return;
967-
};
968-
event!(
969-
target: "iroh::_events::path::closed",
970-
Level::DEBUG,
971-
remote = %self.endpoint_id.fmt_short(),
972-
?path_remote,
973-
?conn_id,
974-
path_id = ?id,
975-
);
976-
conn_state.remove_open_path(&id);
977-
978-
// If one connection closes this path, close it on all connections.
979-
for (conn_id, conn_state) in self.connections.iter_mut() {
980-
let Some(path_id) = conn_state.path_ids.get(&path_remote) else {
981-
continue;
982-
};
983-
let Some(conn) = conn_state.handle.upgrade() else {
984-
continue;
985-
};
986-
if let Some(path) = conn.path(*path_id) {
987-
trace!(?path_remote, ?conn_id, %path_id, "closing path");
988-
if let Err(err) = path.close() {
989-
trace!(
990-
?path_remote,
991-
?conn_id,
992-
%path_id,
993-
"path close failed: {err:#}"
994-
);
995-
}
996-
}
997-
}
998-
999-
// If the remote closed our selected path, select a new one.
1000-
self.select_path();
963+
PathEvent::Closed { id, .. } => {
964+
drop(conn);
965+
self.handle_path_closed(conn_id, id, false);
966+
}
967+
PathEvent::LocallyClosed { id, error } => {
968+
drop(conn);
969+
let validation_failed = error == PathError::ValidationFailed;
970+
self.handle_path_closed(conn_id, id, validation_failed);
1001971
}
1002972
PathEvent::RemoteStatus { .. } | PathEvent::ObservedAddr { .. } => {
1003973
// Nothing to do for these events.
1004974
}
1005975
}
1006976
}
1007977

978+
/// Handles a path being closed (either remotely or locally).
979+
fn handle_path_closed(&mut self, conn_id: ConnId, id: PathId, validation_failed: bool) {
980+
// If path validation failed, retry holepunching after a short delay.
981+
// This handles cases where paths fail to validate after network changes
982+
// (e.g., interface down/up) due to routing not being fully established.
983+
// Note: this must be checked BEFORE the early returns below, because paths
984+
// opened by initiate_nat_traversal_round() are not tracked in our path map.
985+
if validation_failed {
986+
debug!("path validation failed, scheduling holepunch retry");
987+
let retry_delay = Duration::from_millis(500);
988+
self.last_holepunch = None;
989+
self.scheduled_holepunch = Some(Instant::now() + retry_delay);
990+
}
991+
992+
let Some(conn_state) = self.connections.get_mut(&conn_id) else {
993+
debug!("path closed for removed connection");
994+
return;
995+
};
996+
let Some(path_remote) = conn_state.paths.get(&id).cloned() else {
997+
debug!("path not in path_id_map");
998+
return;
999+
};
1000+
event!(
1001+
target: "iroh::_events::path::closed",
1002+
Level::DEBUG,
1003+
remote = %self.endpoint_id.fmt_short(),
1004+
?path_remote,
1005+
?conn_id,
1006+
path_id = ?id,
1007+
);
1008+
conn_state.remove_open_path(&id);
1009+
1010+
// If one connection closes this path, close it on all connections.
1011+
for (other_conn_id, other_conn_state) in self.connections.iter_mut() {
1012+
let Some(path_id) = other_conn_state.path_ids.get(&path_remote) else {
1013+
continue;
1014+
};
1015+
let Some(conn) = other_conn_state.handle.upgrade() else {
1016+
continue;
1017+
};
1018+
if let Some(path) = conn.path(*path_id) {
1019+
trace!(?path_remote, conn_id = ?other_conn_id, %path_id, "closing path");
1020+
if let Err(err) = path.close() {
1021+
trace!(
1022+
?path_remote,
1023+
conn_id = ?other_conn_id,
1024+
%path_id,
1025+
"path close failed: {err:#}"
1026+
);
1027+
}
1028+
}
1029+
}
1030+
1031+
// If the closed path was our selected path, select a new one.
1032+
self.select_path();
1033+
}
1034+
10081035
/// Selects the path with the lowest RTT, prefers direct paths.
10091036
///
10101037
/// If there are direct paths, this selects the direct path with the lowest RTT. If

0 commit comments

Comments
 (0)