Skip to content

Commit

Permalink
random beacon: Write restored VSS public key to output after crash re…
Browse files Browse the repository at this point in the history
…covery (MystenLabs#15568)

## Description 

Without this fix, narwhal nodes will not be able to sign any headers
containing randomness after they restart.

Also adds additional detail to some relevant log messages.

## Test Plan 

Fixes simtest failure in `test_upgrade_compatibility`.
  • Loading branch information
aschran authored Jan 5, 2024
1 parent 46aef16 commit e1877e9
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 9 deletions.
8 changes: 5 additions & 3 deletions crates/sui-benchmark/tests/simtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,10 +407,11 @@ mod test {
let finished = Arc::new(AtomicBool::new(false));
let finished_clone = finished.clone();
let _handle = tokio::task::spawn(async move {
info!("Running from version {starting_version} to version {max_ver}");
for version in starting_version..=max_ver {
info!("Targeting protocol version: {}", version);
info!("Targeting protocol version: {version}");
test_cluster.wait_for_all_nodes_upgrade_to(version).await;
info!("All nodes are at protocol version: {}", version);
info!("All nodes are at protocol version: {version}");
// Let all nodes run for a few epochs at this version.
tokio::time::sleep(Duration::from_secs(50)).await;
if version == max_ver {
Expand Down Expand Up @@ -439,12 +440,13 @@ mod test {
for package in new_framework_ref {
framework_injection::set_override(*package.id(), package.modules().clone());
}
info!("Framework injected");
info!("Framework injected for next_version {next_version}");
test_cluster
.update_validator_supported_versions(
SupportedProtocolVersions::new_for_testing(starting_version, next_version),
)
.await;
info!("Updated validator supported versions to include next_version {next_version}")
}
finished_clone.store(true, Ordering::SeqCst);
});
Expand Down
17 changes: 14 additions & 3 deletions crates/sui-core/src/authority/authority_per_epoch_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2436,7 +2436,11 @@ impl AuthorityPerEpochStore {
..
}) = transaction
{
debug!("Received EndOfPublish from {:?}", authority.concise());
debug!(
"Received EndOfPublish for epoch {} from {:?}",
self.committee.epoch,
authority.concise()
);

// It is ok to just release lock here as this function is the only place that transition into RejectAllCerts state
// And this function itself is always executed from consensus task
Expand All @@ -2459,8 +2463,9 @@ impl AuthorityPerEpochStore {
if collected_end_of_publish {
assert!(lock.is_none());
debug!(
"Collected enough end_of_publish messages with last message from validator {:?}",
authority.concise()
"Collected enough end_of_publish messages for epoch {} with last message from validator {:?}",
self.committee.epoch,
authority.concise(),
);
let mut l = self.get_reconfig_state_write_lock_guard();
l.close_all_certs();
Expand Down Expand Up @@ -2493,6 +2498,12 @@ impl AuthorityPerEpochStore {

if !is_reject_all_certs || !self.deferred_transactions_empty() || commit_has_deferred_txns {
// Don't end epoch until all deferred transactions are processed.
if is_reject_all_certs {
debug!(
"Blocking end of epoch on deferred transactions, from previous commits?={}, from this commit?={commit_has_deferred_txns}",
!self.deferred_transactions_empty(),
);
}
return Ok((lock, false));
}

Expand Down
18 changes: 15 additions & 3 deletions narwhal/primary/src/state_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,21 @@ impl RandomnessState {
// Load existing data from store.
let dkg_output = store.dkg_output();
if let Some(dkg_output) = &dkg_output {
info!(
"random beacon: loaded existing DKG output for epoch {}",
committee.epoch()
);
metrics
.state_handler_random_beacon_dkg_num_shares
.set(dkg_output.shares.as_ref().map_or(0, |shares| shares.len()) as i64);
if let Err(e) = vss_key_output.set(dkg_output.vss_pk.clone()) {
error!("random beacon: unable to write VSS key to output during startup: {e:?}")
}
} else {
info!(
"random beacon: no existing DKG output found for epoch {}",
committee.epoch()
);
}
metrics
.state_handler_current_randomness_round
Expand Down Expand Up @@ -222,6 +234,9 @@ impl RandomnessState {
self.metrics
.state_handler_random_beacon_dkg_num_shares
.set(output.shares.as_ref().map_or(0, |shares| shares.len()) as i64);
if let Err(e) = self.vss_key_output.set(output.vss_pk.clone()) {
error!("random beacon: unable to write VSS key to output: {e:?}")
}
self.store.set_dkg_output(&output);
self.dkg_output = Some(output);
}
Expand Down Expand Up @@ -305,9 +320,6 @@ impl RandomnessState {
&mut rand::thread_rng(),
) {
Ok(output) => {
if let Err(e) = self.vss_key_output.set(output.vss_pk.clone()) {
error!("random beacon: unable to write VSS key to output: {e:?}")
}
let num_shares = output.shares.as_ref().map_or(0, |shares| shares.len());
self.set_dkg_output(output);
info!("random beacon: DKG complete with {num_shares} shares for this node");
Expand Down

0 comments on commit e1877e9

Please sign in to comment.