bug: metrics no longer panics when attempting to register multiple replicas (#735)

buddies-main-deployment
James Prestwich 3 years ago committed by GitHub
parent c4de0a65eb
commit c9a1cd520b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 11
      rust/agents/processor/src/processor.rs
  2. 9
      rust/agents/updater/src/updater.rs
  3. 62
      rust/optics-base/src/metrics.rs

@ -299,16 +299,11 @@ impl OpticsAgent for Processor {
tokio::spawn(async move { tokio::spawn(async move {
let replica = replica_opt.ok_or_else(|| eyre!("No replica named {}", name))?; let replica = replica_opt.ok_or_else(|| eyre!("No replica named {}", name))?;
let next_to_inspect = prometheus::IntGauge::new( let next_to_inspect = metrics.new_replica_int_gauge(
&name,
"optics_processor_next_to_inspect_idx", "optics_processor_next_to_inspect_idx",
"Index of the next message to inspect", "Index of the next message to inspect",
) )?;
.expect("metric description failed validation");
metrics
.registry
.register(Box::new(next_to_inspect.clone()))
.expect("must be able to register agent metrics");
Replica { Replica {
interval, interval,

@ -227,15 +227,12 @@ impl AsRef<AgentCore> for Updater {
impl Updater { impl Updater {
/// Instantiate a new updater /// Instantiate a new updater
pub fn new(signer: Signers, interval_seconds: u64, update_pause: u64, core: AgentCore) -> Self { pub fn new(signer: Signers, interval_seconds: u64, update_pause: u64, core: AgentCore) -> Self {
let signed_attestation_count = IntCounter::new( let signed_attestation_count = core
.metrics
.new_int_counter(
"optics_updater_signed_attestation_count", "optics_updater_signed_attestation_count",
"Number of attestations signed", "Number of attestations signed",
) )
.expect("metric description failed validation");
core.metrics
.registry
.register(Box::new(signed_attestation_count.clone()))
.expect("must be able to register agent metrics"); .expect("must be able to register agent metrics");
Self { Self {

@ -1,8 +1,8 @@
//! Useful metrics that all agents should track. //! Useful metrics that all agents should track.
use std::sync::Arc; use color_eyre::Result;
use prometheus::{Encoder, HistogramOpts, HistogramVec, IntGaugeVec, Opts, Registry}; use prometheus::{Encoder, HistogramOpts, HistogramVec, IntGaugeVec, Opts, Registry};
use std::sync::Arc;
use tokio::task::JoinHandle; use tokio::task::JoinHandle;
#[derive(Debug)] #[derive(Debug)]
@ -15,7 +15,7 @@ pub struct CoreMetrics {
rpc_latencies: Box<HistogramVec>, rpc_latencies: Box<HistogramVec>,
listen_port: Option<u16>, listen_port: Option<u16>,
/// Metrics registry for adding new metrics and gathering reports /// Metrics registry for adding new metrics and gathering reports
pub registry: Arc<Registry>, registry: Arc<Registry>,
} }
impl CoreMetrics { impl CoreMetrics {
@ -77,6 +77,56 @@ impl CoreMetrics {
Ok(metrics) Ok(metrics)
} }
/// Register an int gauge.
///
/// If this metric is per-replica, use `new_replica_int_gauge`
pub fn new_int_gauge(&self, metric_name: &str, help: &str) -> Result<prometheus::IntGauge> {
let gauge = prometheus::IntGauge::new(metric_name, help)
.expect("metric description failed validation");
self.registry.register(Box::new(gauge.clone()))?;
Ok(gauge)
}
/// Register an int gauge for a specific replica.
///
/// The name will be prefixed with the replica name to avoid accidental
/// duplication
pub fn new_replica_int_gauge(
&self,
replica_name: &str,
metric_name: &str,
help: &str,
) -> Result<prometheus::IntGauge> {
self.new_int_gauge(&format!("{}_{}", replica_name, metric_name), help)
}
/// Register an int counter.
///
/// If this metric is per-replica, use `new_replica_int_counter`
pub fn new_int_counter(&self, metric_name: &str, help: &str) -> Result<prometheus::IntCounter> {
let gauge = prometheus::IntCounter::new(metric_name, help)
.expect("metric description failed validation");
self.registry.register(Box::new(gauge.clone()))?;
Ok(gauge)
}
/// Register an int counter for a specific replica.
///
/// The name will be prefixed with the replica name to avoid accidental
/// duplication
pub fn new_replica_int_counter(
&self,
replica_name: &str,
metric_name: &str,
help: &str,
) -> Result<prometheus::IntCounter> {
self.new_int_counter(&format!("{}_{}", replica_name, metric_name), help)
}
/// Call with the new balance when gas is spent. /// Call with the new balance when gas is spent.
pub fn wallet_balance_changed( pub fn wallet_balance_changed(
&self, &self,
@ -123,7 +173,11 @@ impl CoreMetrics {
tokio::spawn(std::future::ready(())) tokio::spawn(std::future::ready(()))
} }
Some(port) => { Some(port) => {
tracing::info!("starting prometheus server on 0.0.0.0:{port}", port = port); tracing::info!(
port,
"starting prometheus server on 0.0.0.0:{port}",
port = port
);
tokio::spawn(async move { tokio::spawn(async move {
warp::serve(warp::path!("metrics").map(move || { warp::serve(warp::path!("metrics").map(move || {
warp::reply::with_header( warp::reply::with_header(

Loading…
Cancel
Save