Skip to content

Commit 6cef874

Browse files
authored
TQ: Support adding sleds via trust quorum (#9650)
This PR introduces two new external APIs to allow adding multiple sleds to a rack at once and to query status about the ongoing operation. It also adds an omdb command for more detailed status. Much more omdb to come in the near future. This PR also introduces a background task for driving the trust quorum reconfiguration to completion. Reconfiguration is driven by two steps. Synchronously updating the DB in the new external endpoint handler and then asynchronously trying to commit the operation via the background task. I tested this on a4x2 and it works as expected. See the trace from the original external API test below: ``` ➜ oxide.rs git:(main) ✗ echo '{"rack_id": "0dbef452-a6dd-4831-bbdc-769ea3353f28", "sled_ids": [{"part": "PPP-PPPPPPP","serial": "00000000002"}]}' | target/debug/oxide --profile recovery api /v1/trust-quorum/new-members --method POST --input - ➜ oxide.rs git:(main) ✗ target/debug/oxide --profile recovery api /v1/trust-quorum/config/latest/0dbef452-a6dd-4831-bbdc-769ea3353f28 { "abort_reason": null, "commit_crash_tolerance": 1, "coordinator": { "part_number": "PPP-PPPPPPP", "serial_number": "00000000003" }, "encrypted_rack_secrets": null, "epoch": 2, "last_committed_epoch": 1, "members": { "PPP-PPPPPPP:00000000000": { "share_digest": null, "state": "unacked", "time_committed": null, "time_prepared": null }, "PPP-PPPPPPP:00000000001": { "share_digest": null, "state": "unacked", "time_committed": null, "time_prepared": null }, "PPP-PPPPPPP:00000000002": { "share_digest": null, "state": "unacked", "time_committed": null, "time_prepared": null }, "PPP-PPPPPPP:00000000003": { "share_digest": null, "state": "unacked", "time_committed": null, "time_prepared": null } }, "rack_id": "0dbef452-a6dd-4831-bbdc-769ea3353f28", "state": "preparing", "threshold": 3, "time_aborted": null, "time_committed": null, "time_committing": null, "time_created": "2026-01-14T21:32:18.780136Z" } ➜ oxide.rs git:(main) ✗ target/debug/oxide --profile recovery api /v1/trust-quorum/config/latest/0dbef452-a6dd-4831-bbdc-769ea3353f28 { "abort_reason": null, "commit_crash_tolerance": 1, "coordinator": { "part_number": "PPP-PPPPPPP", "serial_number": "00000000003" }, "encrypted_rack_secrets": null, "epoch": 2, "last_committed_epoch": 1, "members": { "PPP-PPPPPPP:00000000000": { "share_digest": "fcfb09128c84d82cc81b200c6c682510f63160a4417856f4041b1886445e8b14", "state": "prepared", "time_committed": null, "time_prepared": "2026-01-14T21:32:55.826622Z" }, "PPP-PPPPPPP:00000000001": { "share_digest": "d8cad02bd3bccd08109a79e3bf6d8dab0d460a0ba879bf42887dc0fc8d855786", "state": "prepared", "time_committed": null, "time_prepared": "2026-01-14T21:32:55.848235Z" }, "PPP-PPPPPPP:00000000002": { "share_digest": "dd57ad8e271734fabfe97d6180d6da3e5c3805e17dacf58e0f2a6d5ed7f1242b", "state": "prepared", "time_committed": null, "time_prepared": "2026-01-14T21:32:55.806644Z" }, "PPP-PPPPPPP:00000000003": { "share_digest": "6b27327ca49976ccca83972e6578ef195c99489e62811e8d0a0cb061fca9c0c4", "state": "prepared", "time_committed": null, "time_prepared": "2026-01-14T21:32:55.837154Z" } }, "rack_id": "0dbef452-a6dd-4831-bbdc-769ea3353f28", "state": "preparing", "threshold": 3, "time_aborted": null, "time_committed": null, "time_committing": null, "time_created": "2026-01-14T21:32:18.780136Z" } ➜ oxide.rs git:(main) ✗ target/debug/oxide --profile recovery api /v1/trust-quorum/config/latest/0dbef452-a6dd-4831-bbdc-769ea3353f28 { "abort_reason": null, "commit_crash_tolerance": 1, "coordinator": { "part_number": "PPP-PPPPPPP", "serial_number": "00000000003" }, "encrypted_rack_secrets": { "data": "53de7731deec3f298a7f5067e256a63bb2869a91c9710d9b23dbf3d261d1b730039d9cb11b543c14906ff77cd409d32953959e9ff8933858", "salt": "ec609ed5ff7aee94e2e88ad94af56e0cbb8a66a683294005c7888f60a627956a" }, "epoch": 2, "last_committed_epoch": 1, "members": { "PPP-PPPPPPP:00000000000": { "share_digest": "fcfb09128c84d82cc81b200c6c682510f63160a4417856f4041b1886445e8b14", "state": "committed", "time_committed": "2026-01-14T21:33:03.864617Z", "time_prepared": "2026-01-14T21:32:55.826622Z" }, "PPP-PPPPPPP:00000000001": { "share_digest": "d8cad02bd3bccd08109a79e3bf6d8dab0d460a0ba879bf42887dc0fc8d855786", "state": "committed", "time_committed": "2026-01-14T21:33:03.864617Z", "time_prepared": "2026-01-14T21:32:55.848235Z" }, "PPP-PPPPPPP:00000000002": { "share_digest": "dd57ad8e271734fabfe97d6180d6da3e5c3805e17dacf58e0f2a6d5ed7f1242b", "state": "committed", "time_committed": "2026-01-14T21:33:03.864617Z", "time_prepared": "2026-01-14T21:32:55.806644Z" }, "PPP-PPPPPPP:00000000003": { "share_digest": "6b27327ca49976ccca83972e6578ef195c99489e62811e8d0a0cb061fca9c0c4", "state": "committed", "time_committed": "2026-01-14T21:33:03.864617Z", "time_prepared": "2026-01-14T21:32:55.837154Z" } }, "rack_id": "0dbef452-a6dd-4831-bbdc-769ea3353f28", "state": "committed", "threshold": 3, "time_aborted": null, "time_committed": "2026-01-14T21:33:04.652543Z", "time_committing": "2026-01-14T21:32:55.861158Z", "time_created": "2026-01-14T21:32:18.780136Z" } ➜ oxide.rs git:(main) ✗ ```
1 parent 5eb1337 commit 6cef874

File tree

40 files changed

+32970
-16
lines changed

40 files changed

+32970
-16
lines changed

Cargo.lock

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

clients/sled-agent-client/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,7 @@ schemars.workspace = true
2424
serde.workspace = true
2525
serde_json.workspace = true
2626
sled-agent-types.workspace = true
27+
sled-hardware-types.workspace = true
28+
trust-quorum-types.workspace = true
2729
slog.workspace = true
2830
uuid.workspace = true

clients/sled-agent-client/src/lib.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ use std::convert::TryFrom;
1313
use uuid::Uuid;
1414

1515
pub use propolis_client::{CrucibleOpts, VolumeConstructionRequest};
16-
1716
progenitor::generate_api!(
1817
spec = "../../openapi/sled-agent/sled-agent-latest.json",
1918
interface = Positional,
@@ -47,14 +46,19 @@ progenitor::generate_api!(
4746
},
4847
replace = {
4948
Baseboard = sled_agent_types_versions::latest::inventory::Baseboard,
49+
BaseboardId = sled_hardware_types::BaseboardId,
5050
ByteCount = omicron_common::api::external::ByteCount,
51+
CommitRequest = trust_quorum_types::messages::CommitRequest,
52+
CommitStatus = trust_quorum_types::status::CommitStatus,
53+
CoordinatorStatus = trust_quorum_types::status::CoordinatorStatus,
5154
DatasetsConfig = omicron_common::disk::DatasetsConfig,
5255
DatasetManagementStatus = omicron_common::disk::DatasetManagementStatus,
5356
DatasetKind = omicron_common::api::internal::shared::DatasetKind,
5457
DiskIdentity = omicron_common::disk::DiskIdentity,
5558
DiskManagementStatus = omicron_common::disk::DiskManagementStatus,
5659
DiskManagementError = omicron_common::disk::DiskManagementError,
5760
DiskVariant = omicron_common::disk::DiskVariant,
61+
Epoch = trust_quorum_types::types::Epoch,
5862
ExternalIpGatewayMap = omicron_common::api::internal::shared::ExternalIpGatewayMap,
5963
ExternalIpConfig = omicron_common::api::internal::shared::ExternalIpConfig,
6064
ExternalIpv4Config = omicron_common::api::internal::shared::ExternalIpv4Config,
@@ -79,15 +83,18 @@ progenitor::generate_api!(
7983
OmicronZonesConfig = sled_agent_types_versions::latest::inventory::OmicronZonesConfig,
8084
PortFec = omicron_common::api::internal::shared::PortFec,
8185
PortSpeed = omicron_common::api::internal::shared::PortSpeed,
82-
RouterId = omicron_common::api::internal::shared::RouterId,
86+
PrepareAndCommitRequest = trust_quorum_types::messages::PrepareAndCommitRequest,
87+
ReconfigureMsg = trust_quorum_types::messages::ReconfigureMsg,
8388
ResolvedVpcFirewallRule = omicron_common::api::internal::shared::ResolvedVpcFirewallRule,
8489
ResolvedVpcRoute = omicron_common::api::internal::shared::ResolvedVpcRoute,
8590
ResolvedVpcRouteSet = omicron_common::api::internal::shared::ResolvedVpcRouteSet,
91+
RouterId = omicron_common::api::internal::shared::RouterId,
8692
RouterTarget = omicron_common::api::internal::shared::RouterTarget,
8793
RouterVersion = omicron_common::api::internal::shared::RouterVersion,
8894
SledRole = sled_agent_types_versions::latest::inventory::SledRole,
8995
SourceNatConfigGeneric = omicron_common::api::internal::shared::SourceNatConfigGeneric,
9096
SwitchLocation = omicron_common::api::external::SwitchLocation,
97+
Threshold = trust_quorum_types::types::Threshold,
9198
Vni = omicron_common::api::external::Vni,
9299
VpcFirewallIcmpFilter = omicron_common::api::external::VpcFirewallIcmpFilter,
93100
ZpoolKind = omicron_common::zpool_name::ZpoolKind,

dev-tools/omdb/src/bin/omdb/nexus.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ use nexus_types::internal_api::background::SupportBundleCleanupReport;
7474
use nexus_types::internal_api::background::SupportBundleCollectionReport;
7575
use nexus_types::internal_api::background::SupportBundleCollectionStepStatus;
7676
use nexus_types::internal_api::background::SupportBundleEreportStatus;
77+
use nexus_types::internal_api::background::TrustQuorumManagerStatus;
7778
use nexus_types::internal_api::background::TufArtifactReplicationCounters;
7879
use nexus_types::internal_api::background::TufArtifactReplicationRequest;
7980
use nexus_types::internal_api::background::TufArtifactReplicationStatus;
@@ -1250,6 +1251,9 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
12501251
"fm_sitrep_gc" => {
12511252
print_task_fm_sitrep_gc(details);
12521253
}
1254+
"trust_quorum_manager" => {
1255+
print_task_trust_quorum_manager(details);
1256+
}
12531257
_ => {
12541258
println!(
12551259
"warning: unknown background task: {:?} \
@@ -3243,6 +3247,40 @@ fn print_task_fm_sitrep_gc(details: &serde_json::Value) {
32433247
);
32443248
}
32453249

3250+
fn print_task_trust_quorum_manager(details: &serde_json::Value) {
3251+
let status = match serde_json::from_value::<TrustQuorumManagerStatus>(
3252+
details.clone(),
3253+
) {
3254+
Ok(status) => status,
3255+
Err(error) => {
3256+
eprintln!(
3257+
"warning: failed to interpret task details: {:?}: {:#?}",
3258+
error, details
3259+
);
3260+
return;
3261+
}
3262+
};
3263+
3264+
match status {
3265+
TrustQuorumManagerStatus::PerRackStatus { statuses, errors } => {
3266+
if statuses.is_empty() && errors.is_empty() {
3267+
println!("No active reconfigurations");
3268+
return;
3269+
}
3270+
for status in statuses {
3271+
println!("{status}");
3272+
}
3273+
3274+
for error in errors {
3275+
println!("{error}");
3276+
}
3277+
}
3278+
TrustQuorumManagerStatus::Error(error) => {
3279+
println!(" task did not complete successfully: {error}");
3280+
}
3281+
}
3282+
}
3283+
32463284
const ERRICON: &str = "/!\\";
32473285

32483286
fn warn_if_nonzero(n: usize) -> &'static str {

dev-tools/omdb/tests/env.out

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,10 @@ task: "switch_port_config_manager"
216216
manages switch port settings for rack switches
217217

218218

219+
task: "trust_quorum_manager"
220+
Drive trust quorum reconfigurations to completion
221+
222+
219223
task: "tuf_artifact_replication"
220224
replicate update repo artifacts across sleds
221225

@@ -449,6 +453,10 @@ task: "switch_port_config_manager"
449453
manages switch port settings for rack switches
450454

451455

456+
task: "trust_quorum_manager"
457+
Drive trust quorum reconfigurations to completion
458+
459+
452460
task: "tuf_artifact_replication"
453461
replicate update repo artifacts across sleds
454462

@@ -669,6 +677,10 @@ task: "switch_port_config_manager"
669677
manages switch port settings for rack switches
670678

671679

680+
task: "trust_quorum_manager"
681+
Drive trust quorum reconfigurations to completion
682+
683+
672684
task: "tuf_artifact_replication"
673685
replicate update repo artifacts across sleds
674686

dev-tools/omdb/tests/successes.out

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,10 @@ task: "switch_port_config_manager"
451451
manages switch port settings for rack switches
452452

453453

454+
task: "trust_quorum_manager"
455+
Drive trust quorum reconfigurations to completion
456+
457+
454458
task: "tuf_artifact_replication"
455459
replicate update repo artifacts across sleds
456460

@@ -857,6 +861,12 @@ task: "switch_port_config_manager"
857861
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
858862
warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {})
859863

864+
task: "trust_quorum_manager"
865+
configured period: every <REDACTED_DURATION>m
866+
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
867+
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
868+
No active reconfigurations
869+
860870
task: "tuf_artifact_replication"
861871
configured period: every <REDACTED_DURATION>h
862872
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
@@ -1425,6 +1435,12 @@ task: "switch_port_config_manager"
14251435
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
14261436
warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {})
14271437

1438+
task: "trust_quorum_manager"
1439+
configured period: every <REDACTED_DURATION>m
1440+
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
1441+
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
1442+
No active reconfigurations
1443+
14281444
task: "tuf_artifact_replication"
14291445
configured period: every <REDACTED_DURATION>h
14301446
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>

nexus-config/src/nexus_config.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,8 @@ pub struct BackgroundTaskConfig {
431431
pub probe_distributor: ProbeDistributorConfig,
432432
/// configuration for multicast reconciler (group+members) task
433433
pub multicast_reconciler: MulticastGroupReconcilerConfig,
434+
/// configuration for trust quorum manager task
435+
pub trust_quorum: TrustQuorumConfig,
434436
}
435437

436438
#[serde_as]
@@ -962,6 +964,15 @@ pub struct ProbeDistributorConfig {
962964
pub period_secs: Duration,
963965
}
964966

967+
#[serde_as]
968+
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
969+
pub struct TrustQuorumConfig {
970+
/// period (in seconds) for periodic activations of the background task that
971+
/// completes trust quorum reconfigurations.
972+
#[serde_as(as = "DurationSeconds<u64>")]
973+
pub period_secs: Duration,
974+
}
975+
965976
/// Configuration for a nexus server
966977
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
967978
pub struct PackageConfig {
@@ -1229,6 +1240,7 @@ mod test {
12291240
fm.sitrep_gc_period_secs = 49
12301241
probe_distributor.period_secs = 50
12311242
multicast_reconciler.period_secs = 60
1243+
trust_quorum.period_secs = 60
12321244
[default_region_allocation_strategy]
12331245
type = "random"
12341246
seed = 0
@@ -1486,6 +1498,9 @@ mod test {
14861498
sled_cache_ttl_secs: MulticastGroupReconcilerConfig::default_sled_cache_ttl_secs(),
14871499
backplane_cache_ttl_secs: MulticastGroupReconcilerConfig::default_backplane_cache_ttl_secs(),
14881500
},
1501+
trust_quorum: TrustQuorumConfig {
1502+
period_secs: Duration::from_secs(60),
1503+
},
14891504
},
14901505
multicast: MulticastConfig { enabled: false },
14911506
default_region_allocation_strategy:
@@ -1589,6 +1604,7 @@ mod test {
15891604
fm.sitrep_gc_period_secs = 46
15901605
probe_distributor.period_secs = 47
15911606
multicast_reconciler.period_secs = 60
1607+
trust_quorum.period_secs = 60
15921608
15931609
[default_region_allocation_strategy]
15941610
type = "random"

nexus/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ slog.workspace = true
109109
slog-async.workspace = true
110110
slog-dtrace.workspace = true
111111
slog-error-chain.workspace = true
112+
swrite.workspace = true
112113
display-error-chain.workspace = true
113114
slog-term.workspace = true
114115
static_assertions.workspace = true
@@ -119,6 +120,7 @@ tokio = { workspace = true, features = ["full"] }
119120
tokio-postgres = { workspace = true, features = ["with-serde_json-1"] }
120121
tokio-util = { workspace = true, features = ["codec", "rt"] }
121122
tough.workspace = true
123+
trust-quorum-types.workspace = true
122124
tufaceous-artifact.workspace = true
123125
usdt.workspace = true
124126
uuid.workspace = true

nexus/background-task-interface/src/init.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ pub struct BackgroundTasks {
5555
pub task_fm_sitrep_gc: Activator,
5656
pub task_probe_distributor: Activator,
5757
pub task_multicast_reconciler: Activator,
58+
pub task_trust_quorum_manager: Activator,
5859

5960
// Handles to activate background tasks that do not get used by Nexus
6061
// at-large. These background tasks are implementation details as far as

nexus/db-queries/src/db/datastore/sled.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,32 @@ impl DataStore {
737737
Ok(rack_id.map(RackUuid::from))
738738
}
739739

740+
// Return the commissioned sled if it exists in the given rack, given its
741+
// `BaseboardId`.
742+
pub async fn sled_get_commissioned_by_baseboard_and_rack_id(
743+
&self,
744+
opctx: &OpContext,
745+
rack_id: RackUuid,
746+
baseboard_id: BaseboardId,
747+
) -> Result<Option<Sled>, Error> {
748+
opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?;
749+
let conn = &*self.pool_connection_authorized(opctx).await?;
750+
use nexus_db_schema::schema::sled::dsl;
751+
let sled = dsl::sled
752+
.filter(dsl::time_deleted.is_null())
753+
.filter(dsl::part_number.eq(baseboard_id.part_number))
754+
.filter(dsl::serial_number.eq(baseboard_id.serial_number))
755+
.filter(dsl::rack_id.eq(rack_id.into_untyped_uuid()))
756+
.sled_filter(SledFilter::Commissioned)
757+
.select(Sled::as_select())
758+
.get_result_async::<Sled>(conn)
759+
.await
760+
.optional()
761+
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;
762+
763+
Ok(sled)
764+
}
765+
740766
pub async fn sled_list(
741767
&self,
742768
opctx: &OpContext,

0 commit comments

Comments
 (0)