From 6140e6f397de2ed15529ae23169461fd90e4772a Mon Sep 17 00:00:00 2001 From: Daniel Savu <23065004+daniel-savu@users.noreply.github.com> Date: Thu, 25 Jul 2024 11:38:24 +0100 Subject: [PATCH] feat: Relayer heap profiling (#4180) ### Description During the scalability audit we've identified relayer memory usage as a cause of concern. This PR runs the profiler each time e2e is run and outputs a report called `dhat-heap.json` in the current dir. That file can be visualised with a dhat viewer, such as the one hosted by the author [here](https://nnethercote.github.io/dh_view/dh_view.html). It adds a conditionally compiled heap memory profiler ([dhat-rs](https://docs.rs/dhat/latest/dhat/)), which is only included when both the `memory-profiling` feature is enabled and the `memory-profiling` cargo profile is used. The report can order the profiling results by total lifetime allocated bytes, peak allocated bytes, total allocation count, etc. Below is the result of sorting by total lifetime allocated bytes, which reveals that the biggest memory hogs are the hook indexing channel [senders](https://github.com/hyperlane-xyz/hyperlane-monorepo/blob/main/rust/hyperlane-base/src/contract_sync/mod.rs#L52) (one per origin chain), taking up 100+ MB. Its size can most likely be reduced from 1M to 1k. ![Screenshot 2024-07-23 at 14 32 21](https://github.com/user-attachments/assets/f551ebfe-3ee0-40de-b53f-e5de1bc56dfa) ### Drive-by changes Adds the remaining IDs in the channel as a field to the hook indexing log, to know if it's safe to lower the channel size. ### Related issues - Fixes https://github.com/hyperlane-xyz/issues/issues/1293 ### Backward compatibility Yes, since everything is conditionally compiled / isolated in a new cargo profile. ### Testing Manual --- rust/.gitignore | 1 + rust/Cargo.lock | 32 ++++++++++++- rust/Cargo.toml | 1 + rust/agents/relayer/Cargo.toml | 3 ++ rust/agents/relayer/src/main.rs | 13 ++++- rust/agents/relayer/src/memory_profiler.rs | 48 +++++++++++++++++++ rust/agents/relayer/src/server/mod.rs | 2 +- .../src/contract_sync/cursors/mod.rs | 3 +- rust/hyperlane-base/src/contract_sync/mod.rs | 1 + rust/utils/run-locally/src/main.rs | 2 +- 10 files changed, 100 insertions(+), 6 deletions(-) create mode 100644 rust/agents/relayer/src/memory_profiler.rs diff --git a/rust/.gitignore b/rust/.gitignore index dcdf8bcd7..9eb7b8339 100644 --- a/rust/.gitignore +++ b/rust/.gitignore @@ -5,3 +5,4 @@ kathydb hyperlane_db config/test_config.json validator_db_anvil* +dhat-heap* diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 1ae801fed..ef75d9fa3 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -2128,6 +2128,22 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "dhat" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cd11d84628e233de0ce467de10b8633f4ddaecafadefc86e13b84b8739b827" +dependencies = [ + "backtrace", + "lazy_static", + "mintex", + "parking_lot 0.12.1", + "rustc-hash", + "serde", + "serde_json", + "thousands", +] + [[package]] name = "dialoguer" version = "0.10.4" @@ -4345,7 +4361,7 @@ dependencies = [ "futures-util", "hex 0.4.3", "hyperlane-core", - "itertools 0.10.5", + "itertools 0.12.0", "num 0.4.1", "num-traits", "reqwest", @@ -5501,6 +5517,12 @@ dependencies = [ "adler", ] +[[package]] +name = "mintex" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bec4598fddb13cc7b528819e697852653252b760f1228b7642679bf2ff2cd07" + [[package]] name = "mio" version = "0.8.10" @@ -7045,8 +7067,10 @@ dependencies = [ "config", "console-subscriber", "convert_case 0.6.0", + "ctrlc", "derive-new", "derive_more", + "dhat", "ethers", "ethers-contract", "eyre", @@ -9954,6 +9978,12 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "thousands" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820" + [[package]] name = "thread_local" version = "1.1.7" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index bfe2564f0..8e7c1a62d 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -80,6 +80,7 @@ curve25519-dalek = { version = "~3.2", features = ["serde"] } derive-new = "0.5" derive_builder = "0.12" derive_more = "0.99" +dhat = "0.3.3" ed25519-dalek = "~1.0" eyre = "=0.6.8" fixed-hash = "0.8.0" diff --git a/rust/agents/relayer/Cargo.toml b/rust/agents/relayer/Cargo.toml index f8a7eb97b..41fff1b24 100644 --- a/rust/agents/relayer/Cargo.toml +++ b/rust/agents/relayer/Cargo.toml @@ -15,8 +15,10 @@ axum.workspace = true config.workspace = true console-subscriber.workspace = true convert_case.workspace = true +ctrlc = { workspace = true, features = ["termination"], optional = true } derive-new.workspace = true derive_more.workspace = true +dhat = { workspace = true, optional = true } ethers-contract.workspace = true ethers.workspace = true eyre.workspace = true @@ -55,3 +57,4 @@ default = ["color-eyre", "oneline-errors"] oneline-errors = ["hyperlane-base/oneline-errors"] color-eyre = ["hyperlane-base/color-eyre"] test-utils = ["hyperlane-base/test-utils"] +memory-profiling = ["dep:ctrlc", "dep:dhat"] diff --git a/rust/agents/relayer/src/main.rs b/rust/agents/relayer/src/main.rs index 7d085f529..f9ac628ac 100644 --- a/rust/agents/relayer/src/main.rs +++ b/rust/agents/relayer/src/main.rs @@ -13,7 +13,18 @@ use hyperlane_base::agent_main; use relayer::Relayer; +#[cfg(feature = "memory-profiling")] +mod memory_profiler; + #[tokio::main(flavor = "multi_thread", worker_threads = 20)] async fn main() -> Result<()> { - agent_main::().await + let agent_main_fut = agent_main::(); + + #[cfg(feature = "memory-profiling")] + memory_profiler::run_future(agent_main_fut).await?; + + #[cfg(not(feature = "memory-profiling"))] + agent_main_fut.await?; + + Ok(()) } diff --git a/rust/agents/relayer/src/memory_profiler.rs b/rust/agents/relayer/src/memory_profiler.rs new file mode 100644 index 000000000..9027862fb --- /dev/null +++ b/rust/agents/relayer/src/memory_profiler.rs @@ -0,0 +1,48 @@ +use dhat::Profiler; +use eyre::Error as Report; + +#[global_allocator] +static ALLOC: dhat::Alloc = dhat::Alloc; + +fn initialize() -> Option { + let profiler = Profiler::new_heap(); + Some(profiler) +} + +fn termination_handler(profiler_singleton: &mut Option) { + // only call drop on the profiler once + if let Some(profiler) = profiler_singleton.take() { + drop(profiler); + } +} + +pub(crate) async fn run_future(fut: F) -> Result +where + F: std::future::Future>, + T: Default, +{ + let mut profiler_singleton = initialize(); + + let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + let mut shutdown_tx_singleton = Some(shutdown_tx); + + ctrlc::set_handler(move || { + termination_handler(&mut profiler_singleton); + + // only send the shutdown signal once + let Some(shutdown_tx) = shutdown_tx_singleton.take() else { + return; + }; + if let Err(_) = shutdown_tx.send(()) { + eprintln!("failed to send shutdown signal"); + } + }) + .expect("Error setting termination handler"); + + // this `select!` isn't cancellation-safe if `fut` owns state + // but for profiling scenarios this is not a risk + tokio::select! { + res = fut => { return res; }, + _ = shutdown_rx => { return Ok(T::default()) }, + }; +} diff --git a/rust/agents/relayer/src/server/mod.rs b/rust/agents/relayer/src/server/mod.rs index ccc27fbb7..4b60e7bb5 100644 --- a/rust/agents/relayer/src/server/mod.rs +++ b/rust/agents/relayer/src/server/mod.rs @@ -5,7 +5,7 @@ use tokio::sync::broadcast::Sender; use crate::msg::op_queue::OperationPriorityQueue; -pub const ENDPOINT_MESSAGES_QUEUE_SIZE: usize = 1_000; +pub const ENDPOINT_MESSAGES_QUEUE_SIZE: usize = 100; pub use list_messages::*; pub use message_retry::*; diff --git a/rust/hyperlane-base/src/contract_sync/cursors/mod.rs b/rust/hyperlane-base/src/contract_sync/cursors/mod.rs index 016454d04..54b79ca4a 100644 --- a/rust/hyperlane-base/src/contract_sync/cursors/mod.rs +++ b/rust/hyperlane-base/src/contract_sync/cursors/mod.rs @@ -13,8 +13,7 @@ pub enum CursorType { RateLimited, } -// H256 * 1M = 32MB per origin chain worst case -// With one such channel per origin chain. +// H512 * 1M = 64MB per origin chain const TX_ID_CHANNEL_CAPACITY: Option = Some(1_000_000); pub trait Indexable { diff --git a/rust/hyperlane-base/src/contract_sync/mod.rs b/rust/hyperlane-base/src/contract_sync/mod.rs index 9c8ba75d6..a8c39e48d 100644 --- a/rust/hyperlane-base/src/contract_sync/mod.rs +++ b/rust/hyperlane-base/src/contract_sync/mod.rs @@ -116,6 +116,7 @@ where num_logs, ?tx_id, sequences = ?logs.iter().map(|(log, _)| log.sequence).collect::>(), + pending_ids = ?recv.len(), "Found log(s) for tx id" ); } diff --git a/rust/utils/run-locally/src/main.rs b/rust/utils/run-locally/src/main.rs index 1bf299075..e5498b5f6 100644 --- a/rust/utils/run-locally/src/main.rs +++ b/rust/utils/run-locally/src/main.rs @@ -319,7 +319,7 @@ fn main() -> ExitCode { log!("Building rust..."); let build_rust = Program::new("cargo") .cmd("build") - .arg("features", "test-utils") + .arg("features", "test-utils memory-profiling") .arg("bin", "relayer") .arg("bin", "validator") .arg("bin", "scraper")