Agent Config Parsing Casing Fixes (#2666)

### Description

This is a fix for an issue in the arguments and environment where we are
unable to correctly support attribute names due to the casing. The main
issue cannot be fully fixed practically by this PR because it would
cause some configuration cases to break. For now it adds manual
exceptions for the command line argument case and leaves envs as they
were. There are then two new parsers that can be used with the new
config format.

### Drive-by changes

Also adds this to argument parser since it was trivial to copy/paste it
and it will allow us to harden parsing when we switch over.

### Related issues

- Fixes #2662 
- Fixes #2663 
- Progress on #2215 

### Backward compatibility

Yes

### Testing

Unit Tests
pull/2677/head
Mattie Conover 1 year ago committed by GitHub
parent 0d3dd6076d
commit 7c989e7eaa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      rust/Cargo.lock
  2. 2
      rust/Cargo.toml
  3. 2
      rust/hyperlane-base/Cargo.toml
  4. 40
      rust/hyperlane-base/src/settings/loader/arguments.rs
  5. 343
      rust/hyperlane-base/src/settings/loader/deprecated_arguments.rs
  6. 174
      rust/hyperlane-base/src/settings/loader/environment.rs
  7. 35
      rust/hyperlane-base/src/settings/loader/mod.rs

1
rust/Cargo.lock generated

@ -3601,6 +3601,7 @@ dependencies = [
"bs58 0.5.0",
"color-eyre",
"config",
"convert_case 0.6.0",
"derive-new",
"ed25519-dalek",
"ethers",

@ -60,7 +60,7 @@ bs58 = "0.5.0"
bytes = "1"
clap = "4"
color-eyre = "0.6"
config = "~0.13.3"
config = "0.13.3"
convert_case = "0.6"
crunchy = "0.2"
ctrlc = "3.2"

@ -10,11 +10,11 @@ publish.workspace = true
version.workspace = true
[dependencies]
# Main block
async-trait.workspace = true
bs58.workspace = true
color-eyre = { workspace = true, optional = true }
config.workspace = true
convert_case.workspace = true
derive-new.workspace = true
ed25519-dalek.workspace = true
ethers.workspace = true

@ -1,10 +1,11 @@
use std::ffi::{OsStr, OsString};
use config::{ConfigError, Map, Source, Value, ValueKind};
use convert_case::Case;
use crate::settings::loader::split_and_recase_key;
/// A source for loading configuration from command line arguments.
/// Command line argument keys are case-insensitive, and the following forms are
/// supported:
///
/// * `--key=value`
/// * `--key="value"`
@ -23,6 +24,10 @@ pub struct CommandLineArguments {
/// Ignore empty env values (treat as unset).
ignore_empty: bool,
/// What casing to use for the keys in the environment. By default it will not mutate the key
/// value.
casing: Option<Case>,
/// Alternate source for the environment. This can be used when you want to
/// test your own code using this source, without the need to change the
/// actual system environment variables.
@ -41,6 +46,11 @@ impl CommandLineArguments {
self
}
pub fn casing(mut self, casing: Case) -> Self {
self.casing = Some(casing);
self
}
pub fn source<I, S>(mut self, source: I) -> Self
where
I: IntoIterator<Item = S>,
@ -77,13 +87,7 @@ impl Source for CommandLineArguments {
continue;
}
let mut key = key.to_lowercase();
// If separator is given replace with `.`
if !separator.is_empty() && separator != "." {
key = key.replace(separator, ".");
}
let key = split_and_recase_key(separator, self.casing, key);
m.insert(key, Value::new(Some(&uri), ValueKind::String(value)));
}
@ -283,7 +287,8 @@ mod test {
Some(Value::new(
Some(&origin),
ValueKind::String($value.to_owned())
))
)),
$key
);
};
}
@ -291,11 +296,12 @@ mod test {
const ARGUMENTS: &[&str] = &[
"--key-a",
"value-a",
"--keY-b=value-b",
"--key-c=\"value c\"",
"--KEY-d='valUE d'",
"--key-b=value-b",
"--key-c-partA=\"value c a\"",
"--key-c-PartB=\"value c b\"",
"--key-d='valUE d'",
"--key-e=''",
"--key-F",
"--key-f",
"--key-g=value-g",
"--key-h",
];
@ -309,7 +315,8 @@ mod test {
assert_arg!(config, "key.a", "value-a");
assert_arg!(config, "key.b", "value-b");
assert_arg!(config, "key.c", "value c");
assert_arg!(config, "key.c.partA", "value c a");
assert_arg!(config, "key.c.PartB", "value c b");
assert_arg!(config, "key.d", "valUE d");
assert_arg!(config, "key.e", "");
assert_arg!(config, "key.f", "");
@ -329,7 +336,8 @@ mod test {
assert_arg!(config, "key.a", "value-a");
assert_arg!(config, "key.b", "value-b");
assert_arg!(config, "key.c", "value c");
assert_arg!(config, "key.c.partA", "value c a");
assert_arg!(config, "key.c.PartB", "value c b");
assert_arg!(config, "key.d", "valUE d");
assert_arg!(config, "key.g", "value-g");

@ -0,0 +1,343 @@
// TODO: Remove this file after deprecated config parsing has been removed.
use std::ffi::{OsStr, OsString};
use config::{ConfigError, Map, Source, Value, ValueKind};
use convert_case::Case;
use crate::settings::loader::split_and_recase_key;
/// A source for loading configuration from command line arguments.
/// Command line argument keys are case-insensitive, and the following forms are
/// supported:
///
/// * `--key=value`
/// * `--key="value"`
/// * `--key='value'`
/// * `--key value`
/// * `--key` (value is an empty string)
#[must_use]
#[derive(Clone, Debug, Default)]
pub struct DeprecatedCommandLineArguments {
/// Optional character sequence that separates each key segment in an
/// environment key pattern. Consider a nested configuration such as
/// `redis.password`, a separator of `-` would allow an environment key
/// of `redis-password` to match.
separator: Option<String>,
/// Ignore empty env values (treat as unset).
ignore_empty: bool,
/// Alternate source for the environment. This can be used when you want to
/// test your own code using this source, without the need to change the
/// actual system environment variables.
source: Option<Vec<OsString>>,
}
#[allow(unused)]
impl DeprecatedCommandLineArguments {
pub fn separator(mut self, s: &str) -> Self {
self.separator = Some(s.into());
self
}
pub fn ignore_empty(mut self, ignore: bool) -> Self {
self.ignore_empty = ignore;
self
}
pub fn source<I, S>(mut self, source: I) -> Self
where
I: IntoIterator<Item = S>,
S: AsRef<OsStr>,
{
self.source = Some(source.into_iter().map(|s| s.as_ref().to_owned()).collect());
self
}
}
impl Source for DeprecatedCommandLineArguments {
fn clone_into_box(&self) -> Box<dyn Source + Send + Sync> {
Box::new((*self).clone())
}
fn collect(&self) -> Result<Map<String, Value>, ConfigError> {
let mut m = Map::new();
let uri: String = "program argument".into();
let separator = self.separator.as_deref().unwrap_or("-");
let mut args = if let Some(source) = &self.source {
ArgumentParser::from_vec(source.clone())
} else {
ArgumentParser::from_env()
};
while let Some((key, value)) = args
.next()
.transpose()
.map_err(|e| ConfigError::Foreign(Box::new(e)))?
{
if self.ignore_empty && value.is_empty() {
continue;
}
let mut key = split_and_recase_key(separator, Some(Case::Flat), key);
if key.ends_with("interchaingaspaymaster") {
key = key.replace("interchaingaspaymaster", "interchainGasPaymaster");
} else if key.ends_with("validatorannounce") {
key = key.replace("validatorannounce", "validatorAnnounce");
}
m.insert(key, Value::new(Some(&uri), ValueKind::String(value)));
}
let remaining = args.finish();
if remaining.is_empty() {
Ok(m)
} else {
Err(ConfigError::Message("Could not parse all arguments".into()))
}
}
}
/// An ultra simple CLI arguments parser.
/// Adapted from pico-args 0.5.0.
#[derive(Clone, Debug)]
pub struct ArgumentParser(Vec<OsString>);
impl ArgumentParser {
/// Creates a parser from a vector of arguments.
///
/// The executable path **must** be removed.
///
/// This can be used for supporting `--` arguments to forward to another
/// program.
fn from_vec(args: Vec<OsString>) -> Self {
ArgumentParser(args)
}
/// Creates a parser from [`env::args_os`].
///
/// The executable path will be removed.
///
/// [`env::args_os`]: https://doc.rust-lang.org/stable/std/env/fn.args_os.html
fn from_env() -> Self {
let mut args: Vec<_> = std::env::args_os().collect();
args.remove(0);
ArgumentParser(args)
}
/// Returns a list of remaining arguments.
///
/// It's up to the caller what to do with them.
/// One can report an error about unused arguments,
/// other can use them for further processing.
fn finish(self) -> Vec<OsString> {
self.0
}
}
impl Iterator for ArgumentParser {
type Item = Result<(String, String), Error>;
fn next(&mut self) -> Option<Self::Item> {
let (k, v, kind, idx) = match self.find_next_kv_pair() {
Ok(Some(tup)) => tup,
Ok(None) => return None,
Err(e) => return Some(Err(e)),
};
match kind {
PairKind::SingleArgument => {
self.0.remove(idx);
}
PairKind::TwoArguments => {
self.0.remove(idx + 1);
self.0.remove(idx);
}
}
Some(Ok((k, v)))
}
}
// internal workings
impl ArgumentParser {
#[inline(never)]
fn find_next_kv_pair(&mut self) -> Result<Option<(String, String, PairKind, usize)>, Error> {
let Some(idx) = self.index_of_next_key() else {
return Ok(None);
};
// full term without leading '--'
let term = &os_to_str(&self.0[idx])?[2..];
if term.is_empty() {
return Err(Error::EmptyKey);
}
if let Some((key, value)) = term.split_once('=') {
// Parse a `--key=value` pair.
let key = key.to_owned();
// Check for quoted value.
let value = if starts_with(value, b'"') {
if !ends_with(value, b'"') {
// A closing quote must be the same as an opening one.
return Err(Error::UnmatchedQuote(key));
}
&value[1..value.len() - 1]
} else if starts_with(value, b'\'') {
if !ends_with(value, b'\'') {
// A closing quote must be the same as an opening one.
return Err(Error::UnmatchedQuote(key));
}
&value[1..value.len() - 1]
} else {
value
};
Ok(Some((key, value.to_owned(), PairKind::SingleArgument, idx)))
} else {
// Parse a `--key value` pair.
let key = term.to_owned();
let value = self
.0
.get(idx + 1)
.map(|v| os_to_str(v))
.transpose()?
.unwrap_or("");
if value.is_empty() || value.starts_with('-') {
// the next value is another key
Ok(Some((key, "".to_owned(), PairKind::SingleArgument, idx)))
} else {
Ok(Some((key, value.to_owned(), PairKind::TwoArguments, idx)))
}
}
}
fn index_of_next_key(&self) -> Option<usize> {
self.0.iter().position(|v| {
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
v.len() >= 2 && &v.as_bytes()[0..2] == b"--"
}
#[cfg(not(unix))]
{
v.len() >= 2 && v.to_str().map(|v| v.starts_with("--")).unwrap_or(false)
}
})
}
}
#[inline]
fn starts_with(text: &str, c: u8) -> bool {
if text.is_empty() {
false
} else {
text.as_bytes()[0] == c
}
}
#[inline]
fn ends_with(text: &str, c: u8) -> bool {
if text.is_empty() {
false
} else {
text.as_bytes()[text.len() - 1] == c
}
}
#[inline]
fn os_to_str(text: &OsStr) -> Result<&str, Error> {
text.to_str().ok_or(Error::NonUtf8Argument)
}
/// A list of possible errors.
#[derive(Clone, Debug, thiserror::Error)]
pub enum Error {
/// Arguments must be a valid UTF-8 strings.
#[error("argument is not a UTF-8 string")]
NonUtf8Argument,
/// Found '--` or a key with nothing after the prefix
#[error("key name is empty (possibly after removing prefix)")]
EmptyKey,
/// Could not find closing quote for a value.
#[error("unmatched quote in `{0}`")]
UnmatchedQuote(String),
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum PairKind {
SingleArgument,
TwoArguments,
}
#[cfg(test)]
mod test {
use super::*;
macro_rules! assert_arg {
($config:expr, $key:literal, $value:literal) => {
let origin = "program argument".to_owned();
assert_eq!(
$config.remove($key),
Some(Value::new(
Some(&origin),
ValueKind::String($value.to_owned())
))
);
};
}
const ARGUMENTS: &[&str] = &[
"--key-a",
"value-a",
"--keY-b=value-b",
"--key-c=\"value c\"",
"--KEY-d='valUE d'",
"--key-e=''",
"--key-F",
"--key-g=value-g",
"--key-h",
];
#[test]
fn default_case() {
let mut config = DeprecatedCommandLineArguments::default()
.source(ARGUMENTS)
.collect()
.unwrap();
assert_arg!(config, "key.a", "value-a");
assert_arg!(config, "key.b", "value-b");
assert_arg!(config, "key.c", "value c");
assert_arg!(config, "key.d", "valUE d");
assert_arg!(config, "key.e", "");
assert_arg!(config, "key.f", "");
assert_arg!(config, "key.g", "value-g");
assert_arg!(config, "key.h", "");
assert!(config.is_empty());
}
#[test]
fn ignore_empty() {
let mut config = DeprecatedCommandLineArguments::default()
.source(ARGUMENTS)
.ignore_empty(true)
.collect()
.unwrap();
assert_arg!(config, "key.a", "value-a");
assert_arg!(config, "key.b", "value-b");
assert_arg!(config, "key.c", "value c");
assert_arg!(config, "key.d", "valUE d");
assert_arg!(config, "key.g", "value-g");
assert!(config.is_empty());
}
}

@ -0,0 +1,174 @@
use std::env;
use config::{ConfigError, Map, Source, Value, ValueKind};
use convert_case::Case;
use crate::settings::loader::split_and_recase_key;
#[must_use]
#[derive(Clone, Debug, Default)]
pub struct Environment {
/// Optional prefix that will limit access to the environment to only keys that
/// begin with the defined prefix.
///
/// A prefix must include any desired separator. e.g. `CONFIG_`.
///
/// For example, the key `CONFIG_DEBUG` would become `DEBUG` with a prefix of `CONFIG_`.
prefix: Option<String>,
/// Optional character sequence that separates each key segment in an environment key pattern.
/// Consider a nested configuration such as `redis.password`, a separator of `_` would allow
/// an environment key of `REDIS_PASSWORD` to match. Defaults to `_`.
separator: Option<String>,
/// What casing to use for the keys in the environment. By default it will not mutate the key
/// value. Case conversion will be performed after the prefix has been removed on each of the
/// seperated path components individually.
casing: Option<Case>,
/// Ignore empty env values (treat as unset).
ignore_empty: bool,
/// Alternate source for the environment. This can be used when you want to test your own code
/// using this source, without the need to change the actual system environment variables.
source: Option<Map<String, String>>,
}
#[allow(unused)]
impl Environment {
pub fn prefix(mut self, s: &str) -> Self {
self.prefix = Some(s.into());
self
}
pub fn separator(mut self, s: &str) -> Self {
self.separator = Some(s.into());
self
}
pub fn ignore_empty(mut self, ignore: bool) -> Self {
self.ignore_empty = ignore;
self
}
pub fn casing(mut self, casing: Case) -> Self {
self.casing = Some(casing);
self
}
pub fn source<'a, I, S>(mut self, source: I) -> Self
where
I: IntoIterator<Item = &'a (S, S)>,
S: AsRef<str> + 'a,
{
self.source = Some(
source
.into_iter()
.map(|(k, v)| (k.as_ref().to_owned(), v.as_ref().to_owned()))
.collect(),
);
self
}
}
impl Source for Environment {
fn clone_into_box(&self) -> Box<dyn Source + Send + Sync> {
Box::new((*self).clone())
}
fn collect(&self) -> Result<Map<String, Value>, ConfigError> {
let uri: String = "program environment".into();
let separator = self.separator.as_deref().unwrap_or("_");
// Define a prefix pattern to test and exclude from keys
let prefix_pattern = self.prefix.as_deref().unwrap_or("");
let mapper = |(key, value): (String, String)| -> Option<(String, Value)> {
let key = if prefix_pattern.is_empty() {
key
} else if let Some(key) = key.strip_prefix(prefix_pattern) {
key.into()
} else {
return None;
};
// Treat empty environment variables as unset
if self.ignore_empty && value.is_empty() {
return None;
}
let key = split_and_recase_key(separator, self.casing, key);
Some((key, Value::new(Some(&uri), ValueKind::String(value))))
};
Ok(if let Some(source) = &self.source {
source.clone().into_iter().filter_map(mapper).collect()
} else {
env::vars().filter_map(mapper).collect()
})
}
}
#[cfg(test)]
mod test {
use super::*;
macro_rules! assert_env {
($config:expr, $key:literal, $value:literal) => {
let origin = "program environment".to_owned();
assert_eq!(
$config.remove($key),
Some(Value::new(
Some(&origin),
ValueKind::String($value.to_owned())
)),
$key
);
};
}
const ENVS: &[(&str, &str)] = &[
("PRE__KEY__A", "value-a"),
("PRE__key__b", ""),
("PRE__KEY__C__PART_A", "value c a"),
("PRE__KEY__C_PART_B", "value c b"),
];
#[test]
fn default_case() {
let mut config = Environment::default()
.source(ENVS)
.prefix("PRE__")
.separator("__")
.casing(Case::Camel)
.collect()
.unwrap();
assert_env!(config, "key.a", "value-a");
assert_env!(config, "key.b", "");
assert_env!(config, "key.c.partA", "value c a");
assert_env!(config, "key.cPartB", "value c b");
assert!(config.is_empty());
}
#[test]
fn ignore_empty() {
let mut config = Environment::default()
.source(ENVS)
.ignore_empty(true)
.source(ENVS)
.prefix("PRE__")
.separator("__")
.casing(Case::Snake)
.collect()
.unwrap();
assert_env!(config, "key.a", "value-a");
assert_env!(config, "key.c.part_a", "value c a");
assert_env!(config, "key.c_part_b", "value c b");
assert!(config.is_empty());
}
}

@ -1,16 +1,16 @@
use std::collections::HashMap;
use std::env;
use std::error::Error;
use std::path::PathBuf;
use std::{collections::HashMap, env, error::Error, path::PathBuf};
use crate::settings::loader::arguments::CommandLineArguments;
use config::{Config, Environment, File};
use config::{Config, Environment as DeprecatedEnvironment, File};
use convert_case::{Case, Casing};
use eyre::{bail, Context, Result};
use itertools::Itertools;
use serde::Deserialize;
use crate::settings::RawSettings;
use crate::{settings::loader::deprecated_arguments::DeprecatedCommandLineArguments, RawSettings};
mod arguments;
mod deprecated_arguments;
mod environment;
/// Load a settings object from the config locations.
/// Further documentation can be found in the `settings` module.
@ -77,16 +77,16 @@ where
let config_deserializer = builder
// Use a base configuration env variable prefix
.add_source(
Environment::with_prefix("HYP_BASE")
DeprecatedEnvironment::with_prefix("HYP_BASE")
.separator("_")
.source(Some(filtered_env.clone())),
)
.add_source(
Environment::with_prefix(&prefix)
DeprecatedEnvironment::with_prefix(&prefix)
.separator("_")
.source(Some(filtered_env)),
)
.add_source(CommandLineArguments::default().separator("."))
.add_source(DeprecatedCommandLineArguments::default().separator("."))
.build()?;
let formatted_config = {
@ -125,3 +125,18 @@ where
}
}
}
/// Load a settings object from the config locations and re-join the components with the standard
/// `config` crate separator `.`.
fn split_and_recase_key(sep: &str, case: Option<Case>, key: String) -> String {
if let Some(case) = case {
// if case is given, replace case of each key component and separate them with `.`
key.split(sep).map(|s| s.to_case(case)).join(".")
} else if !sep.is_empty() && sep != "." {
// Just standardize the separator to `.`
key.replace(sep, ".")
} else {
// no changes needed if there was no separator defined and we are preserving case.
key
}
}

Loading…
Cancel
Save