From c9a1cd520bd43ec8c7ea0410bf5e1ea0957bafc5 Mon Sep 17 00:00:00 2001 From: James Prestwich <10149425+prestwich@users.noreply.github.com> Date: Mon, 6 Sep 2021 14:49:13 -0700 Subject: [PATCH] bug: metrics no longer panics when attempting to register multiple replicas (#735) --- rust/agents/processor/src/processor.rs | 11 ++--- rust/agents/updater/src/updater.rs | 15 +++---- rust/optics-base/src/metrics.rs | 62 ++++++++++++++++++++++++-- 3 files changed, 67 insertions(+), 21 deletions(-) diff --git a/rust/agents/processor/src/processor.rs b/rust/agents/processor/src/processor.rs index 51b1773df..d6f967562 100644 --- a/rust/agents/processor/src/processor.rs +++ b/rust/agents/processor/src/processor.rs @@ -299,16 +299,11 @@ impl OpticsAgent for Processor { tokio::spawn(async move { let replica = replica_opt.ok_or_else(|| eyre!("No replica named {}", name))?; - let next_to_inspect = prometheus::IntGauge::new( + let next_to_inspect = metrics.new_replica_int_gauge( + &name, "optics_processor_next_to_inspect_idx", "Index of the next message to inspect", - ) - .expect("metric description failed validation"); - - metrics - .registry - .register(Box::new(next_to_inspect.clone())) - .expect("must be able to register agent metrics"); + )?; Replica { interval, diff --git a/rust/agents/updater/src/updater.rs b/rust/agents/updater/src/updater.rs index b98d84234..6fa2ca494 100644 --- a/rust/agents/updater/src/updater.rs +++ b/rust/agents/updater/src/updater.rs @@ -227,15 +227,12 @@ impl AsRef for Updater { impl Updater { /// Instantiate a new updater pub fn new(signer: Signers, interval_seconds: u64, update_pause: u64, core: AgentCore) -> Self { - let signed_attestation_count = IntCounter::new( - "optics_updater_signed_attestation_count", - "Number of attestations signed", - ) - .expect("metric description failed validation"); - - core.metrics - .registry - .register(Box::new(signed_attestation_count.clone())) + let signed_attestation_count = core + .metrics + .new_int_counter( + "optics_updater_signed_attestation_count", + "Number of attestations signed", + ) .expect("must be able to register agent metrics"); Self { diff --git a/rust/optics-base/src/metrics.rs b/rust/optics-base/src/metrics.rs index 8920cb7cb..4ee3028f1 100644 --- a/rust/optics-base/src/metrics.rs +++ b/rust/optics-base/src/metrics.rs @@ -1,8 +1,8 @@ //! Useful metrics that all agents should track. -use std::sync::Arc; - +use color_eyre::Result; use prometheus::{Encoder, HistogramOpts, HistogramVec, IntGaugeVec, Opts, Registry}; +use std::sync::Arc; use tokio::task::JoinHandle; #[derive(Debug)] @@ -15,7 +15,7 @@ pub struct CoreMetrics { rpc_latencies: Box, listen_port: Option, /// Metrics registry for adding new metrics and gathering reports - pub registry: Arc, + registry: Arc, } impl CoreMetrics { @@ -77,6 +77,56 @@ impl CoreMetrics { Ok(metrics) } + /// Register an int gauge. + /// + /// If this metric is per-replica, use `new_replica_int_gauge` + pub fn new_int_gauge(&self, metric_name: &str, help: &str) -> Result { + let gauge = prometheus::IntGauge::new(metric_name, help) + .expect("metric description failed validation"); + + self.registry.register(Box::new(gauge.clone()))?; + + Ok(gauge) + } + + /// Register an int gauge for a specific replica. + /// + /// The name will be prefixed with the replica name to avoid accidental + /// duplication + pub fn new_replica_int_gauge( + &self, + replica_name: &str, + metric_name: &str, + help: &str, + ) -> Result { + self.new_int_gauge(&format!("{}_{}", replica_name, metric_name), help) + } + + /// Register an int counter. + /// + /// If this metric is per-replica, use `new_replica_int_counter` + pub fn new_int_counter(&self, metric_name: &str, help: &str) -> Result { + let gauge = prometheus::IntCounter::new(metric_name, help) + .expect("metric description failed validation"); + + self.registry.register(Box::new(gauge.clone()))?; + + Ok(gauge) + } + + /// Register an int counter for a specific replica. + /// + /// The name will be prefixed with the replica name to avoid accidental + /// duplication + pub fn new_replica_int_counter( + &self, + replica_name: &str, + metric_name: &str, + help: &str, + ) -> Result { + self.new_int_counter(&format!("{}_{}", replica_name, metric_name), help) + } + /// Call with the new balance when gas is spent. pub fn wallet_balance_changed( &self, @@ -123,7 +173,11 @@ impl CoreMetrics { tokio::spawn(std::future::ready(())) } Some(port) => { - tracing::info!("starting prometheus server on 0.0.0.0:{port}", port = port); + tracing::info!( + port, + "starting prometheus server on 0.0.0.0:{port}", + port = port + ); tokio::spawn(async move { warp::serve(warp::path!("metrics").map(move || { warp::reply::with_header(