Skip to content

Commit

Permalink
expose chain identifier as a metric (MystenLabs#11823)
Browse files Browse the repository at this point in the history
## Description 

As title. This will be a useful thing to watch during testnet wipe

## Test Plan 

tested locally

---
If your changes are not user-facing and not a breaking change, you can
skip the following section. Otherwise, please indicate what changed, and
then add to the Release Notes section as highlighted during the release
process.

### Type of Change (Check all that apply)

- [ ] user-visible impact
- [ ] breaking change for a client SDKs
- [ ] breaking change for FNs (FN binary must upgrade)
- [ ] breaking change for validators or node operators (must upgrade
binaries)
- [ ] breaking change for on-chain data layout
- [ ] necessitate either a data wipe or data migration

### Release notes
Expose chain identifier as a metric
  • Loading branch information
longbowlu authored May 9, 2023
1 parent bae32e9 commit 909f351
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 13 deletions.
9 changes: 6 additions & 3 deletions crates/mysten-metrics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,11 @@ impl RegistryService {

/// Create a metric that measures the uptime from when this metric was constructed.
/// The metric is labeled with the provided 'version' label (this should generally be of the
/// format: 'semver-gitrevision').
pub fn uptime_metric(version: &'static str) -> Box<dyn prometheus::core::Collector> {
/// format: 'semver-gitrevision') and the provided 'chain_identifier' label.
pub fn uptime_metric(
version: &'static str,
chain_identifier: &str,
) -> Box<dyn prometheus::core::Collector> {
let opts = prometheus::opts!("uptime", "uptime of the node service in seconds")
.variable_label("version");

Expand All @@ -320,7 +323,7 @@ pub fn uptime_metric(version: &'static str) -> Box<dyn prometheus::core::Collect
opts,
prometheus_closure_metric::ValueType::Counter,
uptime,
&[version],
&[version, chain_identifier],
)
.unwrap();

Expand Down
30 changes: 21 additions & 9 deletions crates/sui-node/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0

use clap::Parser;
use fastcrypto::encoding::{Encoding, Hex};
use mysten_common::sync::async_once_cell::AsyncOnceCell;
use std::path::PathBuf;
use std::sync::Arc;
Expand Down Expand Up @@ -63,9 +64,6 @@ fn main() {
metrics::start_prometheus_server(config.metrics_address)
};
let prometheus_registry = registry_service.default_registry();
prometheus_registry
.register(mysten_metrics::uptime_metric(VERSION))
.unwrap();

// Initialize logging
let (_guard, filter_handle) = telemetry_subscribers::TelemetryConfig::new()
Expand Down Expand Up @@ -102,15 +100,15 @@ fn main() {

// Run node in a separate runtime so that admin/monitoring functions continue to work
// if it deadlocks.
let node_one_cell = Arc::new(AsyncOnceCell::<Arc<sui_node::SuiNode>>::new());
let node_one_cell_clone = node_one_cell.clone();
let node_once_cell = Arc::new(AsyncOnceCell::<Arc<sui_node::SuiNode>>::new());
let node_once_cell_clone = node_once_cell.clone();
let rpc_runtime = runtimes.json_rpc.handle().clone();

runtimes.sui_node.spawn(async move {
if let Err(e) = sui_node::SuiNode::start_async(
&config,
registry_service,
node_one_cell_clone,
node_once_cell_clone,
Some(rpc_runtime),
)
.await
Expand All @@ -124,14 +122,28 @@ fn main() {
}
});

let node_one_cell_clone = node_one_cell.clone();
let node_once_cell_clone = node_once_cell.clone();
runtimes.metrics.spawn(async move {
let node = node_one_cell_clone.get().await;
let node = node_once_cell_clone.get().await;
let chain_identifier = match node.state().get_chain_identifier() {
// Unwrap safe: Checkpoint Digest is 32 bytes long
Some(chain_identifier) => Hex::encode(chain_identifier.into_inner().get(0..4).unwrap()),
None => "Unknown".to_string(),
};

info!("Sui chain identifier: {chain_identifier}");
prometheus_registry
.register(mysten_metrics::uptime_metric(
VERSION,
chain_identifier.as_str(),
))
.unwrap();

sui_node::admin::run_admin_server(node, admin_interface_port, filter_handle).await
});

runtimes.metrics.spawn(async move {
let node = node_one_cell.get().await;
let node = node_once_cell.get().await;
let state = node.state();
loop {
send_telemetry_event(state.clone(), is_validator).await;
Expand Down
2 changes: 1 addition & 1 deletion crates/sui-proxy/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ async fn main() -> Result<()> {
let registry_service = metrics::start_prometheus_server(metrics_listener);
let prometheus_registry = registry_service.default_registry();
prometheus_registry
.register(mysten_metrics::uptime_metric(VERSION))
.register(mysten_metrics::uptime_metric(VERSION, "sui-proxy"))
.unwrap();
let app = app(
Labels {
Expand Down

0 comments on commit 909f351

Please sign in to comment.