Skip to content

Commit

Permalink
Switch to use Multiaddr to configure prometheus server for consiste…
Browse files Browse the repository at this point in the history
…ncy (MystenLabs/narwhal#625)

* SocketAddr -> Multiaddr

* fixup! SocketAddr -> Multiaddr

* fixup! fixup! SocketAddr -> Multiaddr

* fixup! fixup! fixup! SocketAddr -> Multiaddr

* add comment

* fixup! add comment
  • Loading branch information
mwtian authored Aug 1, 2022
1 parent 24d683b commit 28f3101
Show file tree
Hide file tree
Showing 11 changed files with 68 additions and 81 deletions.
27 changes: 1 addition & 26 deletions narwhal/Docker/gen.validators.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,32 +98,7 @@ do
${node} generate_keys --filename ${val}/key.json
done

cat > ${target}/parameters.json <<EOF
{
"batch_size": 500000,
"block_synchronizer": {
"certificates_synchronize_timeout": "2_000ms",
"handler_certificate_deliver_timeout": "2_000ms",
"payload_availability_timeout": "2_000ms",
"payload_synchronize_timeout": "2_000ms"
},
"consensus_api_grpc": {
"get_collections_timeout": "5_000ms",
"remove_collections_timeout": "5_000ms",
"socket_addr": "/ip4/0.0.0.0/tcp/8000/http"
},
"gc_depth": 50,
"header_size": 1000,
"max_batch_delay": "200ms",
"max_concurrent_requests": 500000,
"max_header_delay": "2000ms",
"sync_retry_delay": "10_000ms",
"sync_retry_nodes": 3,
"prometheus_metrics": {
"socket_addr": "0.0.0.0:8010"
}
}
EOF
cp validators/parameters.json ${target}/parameters.json

./scripts/gen.committee.py -n ${num} -d ${target} > ${target}/committee.json

Expand Down
4 changes: 2 additions & 2 deletions narwhal/Docker/validators/parameters.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@
"sync_retry_delay": "10_000ms",
"sync_retry_nodes": 3,
"prometheus_metrics": {
"socket_addr": "0.0.0.0:8010"
"socket_addr": "/ip4/0.0.0.0/tcp/8010/http"
}
}
}
2 changes: 1 addition & 1 deletion narwhal/benchmark/benchmark/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self, mean_tps, mean_latency, std_tps=0, std_latency=0):
self.std_latency = std_latency

def __str__(self):
return(
return (
f' TPS: {self.mean_tps} +/- {self.std_tps} tx/s\n'
f' Latency: {self.mean_latency} +/- {self.std_latency} ms\n'
)
Expand Down
2 changes: 1 addition & 1 deletion narwhal/benchmark/data/paper-data/plot-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __init__(self, mean_tps, mean_latency, std_tps=0, std_latency=0):
self.std_latency = std_latency

def __str__(self):
return(
return (
f' TPS: {self.mean_tps} +/- {self.std_tps} tx/s\n'
f' Latency: {self.mean_latency} +/- {self.std_latency} ms\n'
)
Expand Down
11 changes: 6 additions & 5 deletions narwhal/benchmark/fabfile.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# Copyright(C) Facebook, Inc. and its affiliates.
from fabric import task
from benchmark.seed import SeedData

from benchmark.seed import SeedData
from benchmark.local import LocalBench
from benchmark.full_demo import Demo

from benchmark.logs import ParseError, LogParser
from benchmark.utils import Print
from benchmark.plot import Ploter, PlotError
Expand Down Expand Up @@ -45,7 +44,7 @@ def local(ctx, debug=True):
},
'max_concurrent_requests': 500_000,
'prometheus_metrics': {
"socket_addr": "127.0.0.1:0"
"socket_addr": "/ip4/127.0.0.1/tcp/0/http"
}
}
try:
Expand Down Expand Up @@ -77,6 +76,7 @@ def demo(ctx, debug=True):
"consensus_api_grpc": {
"get_collections_timeout": "5_000ms",
"remove_collections_timeout": "5_000ms",
# Use a random available local port.
"socket_addr": "/ip4/0.0.0.0/tcp/0/http"
},
"gc_depth": 50, # rounds
Expand All @@ -87,7 +87,8 @@ def demo(ctx, debug=True):
"sync_retry_delay": "10_000ms", # ms
"sync_retry_nodes": 3, # number of nodes
'prometheus_metrics': {
"socket_addr": "127.0.0.1:0"
# Use a random available local port.
"socket_addr": "/ip4/127.0.0.1/tcp/0/http"
}
}
try:
Expand Down Expand Up @@ -201,7 +202,7 @@ def remote(ctx, debug=False):
},
'max_concurrent_requests': 500_000,
'prometheus_metrics': {
"socket_addr": "127.0.0.1:0"
"socket_addr": "/ip4/127.0.0.1/tcp/0/http"
}
}
try:
Expand Down
7 changes: 3 additions & 4 deletions narwhal/config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ use std::{
collections::{BTreeMap, HashMap},
fs::{self, OpenOptions},
io::{BufWriter, Write as _},
net::SocketAddr,
ops::Deref,
sync::Arc,
time::Duration,
Expand Down Expand Up @@ -136,13 +135,13 @@ pub struct Parameters {
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct PrometheusMetricsParameters {
/// Socket address the server should be listening to.
pub socket_addr: SocketAddr,
pub socket_addr: Multiaddr,
}

impl Default for PrometheusMetricsParameters {
fn default() -> Self {
Self {
socket_addr: format!("127.0.0.1:{}", get_available_port())
socket_addr: format!("/ip4/127.0.0.1/tcp/{}/http", get_available_port())
.parse()
.unwrap(),
}
Expand Down Expand Up @@ -562,7 +561,7 @@ mod tests {
assert!(logs_contain("Remove collections timeout set to 5000 ms"));
assert!(logs_contain("Max concurrent requests set to 500000"));
assert!(logs_contain(
"Prometheus metrics server will run on 127.0.0.1"
"Prometheus metrics server will run on /ip4/127.0.0.1/tcp"
));
}
}
71 changes: 39 additions & 32 deletions narwhal/config/tests/config_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,12 @@ fn update_primary_network_info_test() {
}
}

// If one or both of the parameters_xx_matches() tests are broken by a change, the following additional places are
// highly likely needed to be updated as well:
// 1. Docker/validators/parameters.json for starting Narwhal cluster with Docker Compose.
// 2. benchmark/fabfile.py for benchmarking a Narwhal cluster locally.
// 3. Sui configurations & snapshot tests when upgrading Narwhal in Sui to include the change.

#[test]
fn parameters_snapshot_matches() {
// This configuration is load-bearing in the NW benchmarks,
Expand All @@ -118,7 +124,7 @@ fn parameters_snapshot_matches() {
..ConsensusAPIGrpcParameters::default()
};
let prometheus_metrics_parameters = PrometheusMetricsParameters {
socket_addr: "127.0.0.1:8081".parse().unwrap(),
socket_addr: "/ip4/127.0.0.1/tcp/8081/http".parse().unwrap(),
};

let parameters = Parameters {
Expand All @@ -129,35 +135,6 @@ fn parameters_snapshot_matches() {
assert_json_snapshot!("parameters", parameters)
}

#[test]
fn commmittee_snapshot_matches() {
// The shape of this configuration is load-bearing in the NW benchmarks,
// and in Sui (prod)
let keys = test_utils::keys(None);

let committee = Committee {
epoch: Epoch::default(),
authorities: keys
.iter()
.map(|kp| {
let mut port = 0;
let increment_port_getter = || {
port += 1;
port
};
(
kp.public().clone(),
make_authority_with_port_getter(increment_port_getter),
)
})
.collect(),
};
// we need authorities to be serialized in order
let mut settings = insta::Settings::clone_current();
settings.set_sort_maps(true);
settings.bind(|| assert_json_snapshot!("committee", committee));
}

#[test]
fn parameters_import_snapshot_matches() {
// GIVEN
Expand All @@ -182,7 +159,7 @@ fn parameters_import_snapshot_matches() {
},
"max_concurrent_requests": 500000,
"prometheus_metrics": {
"socket_addr": "127.0.0.1:0"
"socket_addr": "/ip4/127.0.0.1/tcp/0/http"
}
}"#;

Expand All @@ -196,8 +173,38 @@ fn parameters_import_snapshot_matches() {
writeln!(file, "{input}").expect("Couldn't write to file");

// WHEN
let params = Parameters::import(file_path.to_str().unwrap()).expect("Error raised");
let params = Parameters::import(file_path.to_str().unwrap())
.expect("Failed to import given Parameters json");

// THEN
assert_json_snapshot!("parameters_import", params)
}

#[test]
fn commmittee_snapshot_matches() {
// The shape of this configuration is load-bearing in the NW benchmarks,
// and in Sui (prod)
let keys = test_utils::keys(None);

let committee = Committee {
epoch: Epoch::default(),
authorities: keys
.iter()
.map(|kp| {
let mut port = 0;
let increment_port_getter = || {
port += 1;
port
};
(
kp.public().clone(),
make_authority_with_port_getter(increment_port_getter),
)
})
.collect(),
};
// we need authorities to be serialized in order
let mut settings = insta::Settings::clone_current();
settings.set_sort_maps(true);
settings.bind(|| assert_json_snapshot!("committee", committee));
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ expression: parameters
},
"max_concurrent_requests": 500000,
"prometheus_metrics": {
"socket_addr": "127.0.0.1:8081"
"socket_addr": "/ip4/127.0.0.1/tcp/8081/http"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ expression: params
},
"max_concurrent_requests": 500000,
"prometheus_metrics": {
"socket_addr": "127.0.0.1:0"
"socket_addr": "/ip4/127.0.0.1/tcp/0/http"
}
}
11 changes: 6 additions & 5 deletions narwhal/node/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ bytes = "1.2.0"
cfg-if = "1.0.0"
clap = "2.34"
futures = "0.3.21"
multiaddr = "0.14.0"
mysten-network = { git = "https://github.com/mystenlabs/mysten-infra.git", rev = "20ef52a00135114eb361e28673cfaa9bf4560f6f" }
rand = "0.7.3"
store = { git = "https://github.com/mystenlabs/mysten-infra.git", package = "typed-store", rev = "123c9e40b529315e1c1d91a54fb717111c3e349c" }
thiserror = "1.0.31"
Expand Down Expand Up @@ -47,16 +49,15 @@ serde-reflection = "0.3.6"
serde_yaml = "0.8.26"
structopt = "0.3.26"
test_utils = { path = "../test_utils" }
mysten-network = { git = "https://github.com/mystenlabs/mysten-infra.git", rev = "c6dc7a23a40b3517f138d122a76d3bc15f844f67" }

[features]
benchmark = ["worker/benchmark", "primary/benchmark", "consensus/benchmark"]
dhat-heap = ["dhat"] # if you are doing heap profiling

[[bin]]
name = "benchmark_client"
path = "src/benchmark_client.rs"
required-features = ["benchmark"]
[[bin]]
name = "benchmark_client"
path = "src/benchmark_client.rs"
required-features = ["benchmark"]

[[example]]
name = "generate-format"
Expand Down
10 changes: 7 additions & 3 deletions narwhal/node/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
use axum::{http::StatusCode, routing::get, Extension, Router};
use config::WorkerId;
use crypto::PublicKey;
use multiaddr::Multiaddr;
use mysten_network::multiaddr::to_socket_addr;
use prometheus::{Registry, TextEncoder};
use std::{collections::HashMap, net::SocketAddr};
use std::collections::HashMap;
use tokio::task::JoinHandle;

const METRICS_ROUTE: &str = "/metrics";
Expand All @@ -26,13 +28,15 @@ pub fn worker_metrics_registry(worker_id: WorkerId, name: PublicKey) -> Registry
Registry::new_custom(Some(WORKER_METRICS_PREFIX.to_string()), Some(labels)).unwrap()
}

pub fn start_prometheus_server(addr: SocketAddr, registry: &Registry) -> JoinHandle<()> {
pub fn start_prometheus_server(addr: Multiaddr, registry: &Registry) -> JoinHandle<()> {
let app = Router::new()
.route(METRICS_ROUTE, get(metrics))
.layer(Extension(registry.clone()));

let socket_addr = to_socket_addr(&addr).expect("failed to convert Multiaddr to SocketAddr");

tokio::spawn(async move {
axum::Server::bind(&addr)
axum::Server::bind(&socket_addr)
.serve(app.into_make_service())
.await
.unwrap();
Expand Down

0 comments on commit 28f3101

Please sign in to comment.