From 5a31e7b5d39000af78d5fe206ef1e9c3627a2652 Mon Sep 17 00:00:00 2001 From: Daniel Savu <23065004+daniel-savu@users.noreply.github.com> Date: Tue, 11 Jun 2024 13:51:56 +0100 Subject: [PATCH] chore: debug slow relayer startup (#3932) ### Description Opened this PR to debug slow relayer startup, but it actually looks like that's already been fixed by hook indexing. The requirement for fast startup is that the processor sees new messages and then one of the following happens: - (1) if the message is meant for an unknown domain, it drops it ([this log](https://github.com/hyperlane-xyz/hyperlane-monorepo/blob/6d132b02a3e181c929aee7d452ca765cc039ea4e/rust/agents/relayer/src/msg/processor.rs#L269)), otherwise - it forwards the message to the submitter, which then: - (2) adds it straight to the confirm queue because it's already been submitted ([here](https://github.com/hyperlane-xyz/hyperlane-monorepo/blob/6d132b02a3e181c929aee7d452ca765cc039ea4e/rust/agents/relayer/src/msg/pending_message.rs#L159)) - (3) successfully builds metadata ([here](https://github.com/hyperlane-xyz/hyperlane-monorepo/blob/6d132b02a3e181c929aee7d452ca765cc039ea4e/rust/agents/relayer/src/msg/op_submitter.rs#L231)) - (4) in some cases building metadata fails because the ism doesn't exist The relayer logs either of the four cases (in most cases (2), because we're running a second relayer for now). To be even more confident in fast startup, I ran a single relayer and restarted it. New messages were successfully processed according to one of the four cases above, and the prep queues didn't increase ### Drive-by changes ### Related issues ### Backward compatibility ### Testing --- rust/agents/relayer/src/msg/op_submitter.rs | 1 + rust/agents/relayer/src/msg/processor.rs | 12 ++++++++++-- .../infra/config/environments/mainnet3/agent.ts | 6 +++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/rust/agents/relayer/src/msg/op_submitter.rs b/rust/agents/relayer/src/msg/op_submitter.rs index 20eb93616..84731aa63 100644 --- a/rust/agents/relayer/src/msg/op_submitter.rs +++ b/rust/agents/relayer/src/msg/op_submitter.rs @@ -244,6 +244,7 @@ async fn prepare_task( metrics.ops_dropped.inc(); } PendingOperationResult::Confirm => { + debug!(?op, "Pushing operation to confirm queue"); confirm_queue.push(op).await; } } diff --git a/rust/agents/relayer/src/msg/processor.rs b/rust/agents/relayer/src/msg/processor.rs index 6545d480a..1c81c3017 100644 --- a/rust/agents/relayer/src/msg/processor.rs +++ b/rust/agents/relayer/src/msg/processor.rs @@ -138,7 +138,10 @@ impl DirectionalNonceIterator { #[instrument] fn iterate(&mut self) { match self.direction { - NonceDirection::High => self.nonce = self.nonce.map(|n| n.saturating_add(1)), + NonceDirection::High => { + self.nonce = self.nonce.map(|n| n.saturating_add(1)); + debug!(?self, "Iterating high nonce"); + } NonceDirection::Low => { if let Some(nonce) = self.nonce { // once the message with nonce zero is processed, we should stop going backwards @@ -155,6 +158,7 @@ impl DirectionalNonceIterator { if let Some(message) = self.indexed_message_with_nonce()? { Self::update_max_nonce_gauge(&message, metrics); if !self.is_message_processed()? { + debug!(?message, iterator=?self, "Found processable message"); return Ok(MessageStatus::Processable(message)); } else { return Ok(MessageStatus::Processed); @@ -235,7 +239,11 @@ impl ProcessorExt for MessageProcessor { // nonce. // Scan until we find next nonce without delivery confirmation. if let Some(msg) = self.try_get_unprocessed_message().await? { - debug!(?msg, "Processor working on message"); + debug!( + ?msg, + cursor = ?self.nonce_iterator, + "Processor working on message" + ); let destination = msg.destination; // Skip if not whitelisted. diff --git a/typescript/infra/config/environments/mainnet3/agent.ts b/typescript/infra/config/environments/mainnet3/agent.ts index 971e30530..7d24fcc20 100644 --- a/typescript/infra/config/environments/mainnet3/agent.ts +++ b/typescript/infra/config/environments/mainnet3/agent.ts @@ -209,7 +209,7 @@ const hyperlane: RootAgentConfig = { rpcConsensusType: RpcConsensusType.Fallback, docker: { repo, - tag: 'd6bb976-20240520-164138', + tag: '939fa81-20240607-194607', }, gasPaymentEnforcement: gasPaymentEnforcement, metricAppContexts, @@ -226,7 +226,7 @@ const hyperlane: RootAgentConfig = { rpcConsensusType: RpcConsensusType.Fallback, docker: { repo, - tag: 'd6bb976-20240520-164138', + tag: '939fa81-20240607-194607', }, }, }; @@ -240,7 +240,7 @@ const releaseCandidate: RootAgentConfig = { rpcConsensusType: RpcConsensusType.Fallback, docker: { repo, - tag: 'c9c5d37-20240510-014327', + tag: '939fa81-20240607-194607', }, // We're temporarily (ab)using the RC relayer as a way to increase // message throughput.