From eb17a9af15f928520033039f8d25fa3cf0c7f10d Mon Sep 17 00:00:00 2001 From: Yorke Rhodes Date: Wed, 7 Jun 2023 11:20:14 -0400 Subject: [PATCH] Block validator submitter on nonzero mailbox count (#2341) ### Description Ensures all checkpoint submitters wait for nonzero mailbox count ### Drive-by Changes Notably always spawns backfill submitter after seeing nonzero mailbox count ### Related issues Fixes https://github.com/hyperlane-xyz/hyperlane-monorepo/issues/2340 ### Backward compatibility Yes ### Testing Updated E2E Tests --- rust/agents/validator/src/submit.rs | 12 ------- rust/agents/validator/src/validator.rs | 46 +++++++++++++++++--------- rust/utils/run-locally/src/main.rs | 27 +++++++++++++-- 3 files changed, 55 insertions(+), 30 deletions(-) diff --git a/rust/agents/validator/src/submit.rs b/rust/agents/validator/src/submit.rs index 9b787577d..6bb57e9e3 100644 --- a/rust/agents/validator/src/submit.rs +++ b/rust/agents/validator/src/submit.rs @@ -133,18 +133,6 @@ impl ValidatorSubmitter { } pub(crate) async fn legacy_checkpoint_submitter(self) -> Result<()> { - // Ensure that the mailbox has > 0 messages before we enter the main - // validator submit loop. This is to avoid an underflow / reverted - // call when we invoke the `mailbox.latest_checkpoint()` method, - // which returns the **index** of the last element in the tree - // rather than just the size. See - // https://github.com/hyperlane-network/hyperlane-monorepo/issues/575 for - // more details. - while self.mailbox.count(self.reorg_period).await? == 0 { - info!("Waiting for first message to mailbox"); - sleep(self.interval).await; - } - // current_index will be None if the validator cannot find // a previously signed checkpoint let mut current_index = self.checkpoint_syncer.latest_index().await?; diff --git a/rust/agents/validator/src/validator.rs b/rust/agents/validator/src/validator.rs index 17afe4d67..585712eb5 100644 --- a/rust/agents/validator/src/validator.rs +++ b/rust/agents/validator/src/validator.rs @@ -105,6 +105,21 @@ impl BaseAgent for Validator { async fn run(&self) -> Instrumented>> { self.announce().await.expect("Failed to announce validator"); + let reorg_period = NonZeroU64::new(self.reorg_period); + + // Ensure that the mailbox has count > 0 before we begin indexing + // messages or submitting checkpoints. + while self + .mailbox + .count(reorg_period) + .await + .expect("Failed to get count of mailbox") + == 0 + { + info!("Waiting for first message to mailbox"); + sleep(self.interval).await; + } + let mut tasks = vec![]; tasks.push(self.run_message_sync().await); @@ -146,28 +161,27 @@ impl Validator { ); let empty_tree = IncrementalMerkle::default(); - let lag = NonZeroU64::new(self.reorg_period); + let reorg_period = NonZeroU64::new(self.reorg_period); let tip_tree = self .mailbox - .tree(lag) + .tree(reorg_period) .await .expect("failed to get mailbox tree"); - let mut tasks = vec![]; + assert!(tip_tree.count() > 0, "mailbox tree is empty"); + let backfill_target = submitter.checkpoint(&tip_tree); let legacy_submitter = submitter.clone(); + let backfill_submitter = submitter.clone(); - if tip_tree.count() > 0 { - let backfill_target = submitter.checkpoint(&tip_tree); - let backfill_submitter = submitter.clone(); - tasks.push( - tokio::spawn(async move { - backfill_submitter - .checkpoint_submitter(empty_tree, Some(backfill_target)) - .await - }) - .instrument(info_span!("BackfillCheckpointSubmitter")), - ); - } + let mut tasks = vec![]; + tasks.push( + tokio::spawn(async move { + backfill_submitter + .checkpoint_submitter(empty_tree, Some(backfill_target)) + .await + }) + .instrument(info_span!("BackfillCheckpointSubmitter")), + ); tasks.push( tokio::spawn(async move { submitter.checkpoint_submitter(tip_tree, None).await }) @@ -222,6 +236,7 @@ impl Validator { validator_address=?announcement.validator, "Please send tokens to the validator address to announce", ); + sleep(self.interval).await; } else { let outcome = self .validator_announce @@ -235,7 +250,6 @@ impl Validator { } } } - sleep(self.interval).await; } Ok(()) } diff --git a/rust/utils/run-locally/src/main.rs b/rust/utils/run-locally/src/main.rs index ad03bd9d2..2277a618d 100644 --- a/rust/utils/run-locally/src/main.rs +++ b/rust/utils/run-locally/src/main.rs @@ -373,7 +373,29 @@ fn main() -> ExitCode { state.watchers.push(scraper_stderr); state.scraper = Some(scraper); - // Send half the kathy messages before starting the agents + let mut validator_iter = validator_envs.iter(); + + // spawn 1st validator before any messages have been sent to test empty mailbox + let validator1_env = validator_iter.next().unwrap(); + let (validator, validator_stdout, validator_stderr) = run_agent( + "validator", + &common_env + .clone() + .into_iter() + .chain(validator1_env.clone()) + .collect(), + &[], + "VAL1", + log_all, + &log_dir, + ); + state.watchers.push(validator_stdout); + state.watchers.push(validator_stderr); + state.validators.push(validator); + + sleep(Duration::from_secs(5)); + + // Send half the kathy messages before starting the rest of the agents let mut kathy = Command::new("yarn"); kathy .arg("kathy") @@ -392,7 +414,8 @@ fn main() -> ExitCode { state.watchers.push(kathy_stderr); kathy.wait().unwrap(); - for (i, validator_env) in validator_envs.iter().enumerate() { + // spawn the rest of the validators + for (i, validator_env) in validator_iter.enumerate() { let (validator, validator_stdout, validator_stderr) = run_agent( "validator", &common_env