[PAN-2074] Add --metrics-category CLI to only enable select metrics (#969)

We are growing our list of metrics by the day, and some of them we don't
always need.  So we should provide a way to trim them.

* If metrics are not enabled we supply a no-op metrics
* If only select metric categories are enabled we supply real metrics
  for the selected metrics and no-op metrics for all the rest
Signed-off-by: Adrian Sutton <adrian.sutton@consensys.net>
pull/2/head
Danno Ferrin 6 years ago committed by GitHub
parent b2d41e41ef
commit ddf1bd2d24
  1. 6
      metrics/src/main/java/tech/pegasys/pantheon/metrics/MetricCategory.java
  2. 15
      metrics/src/main/java/tech/pegasys/pantheon/metrics/noop/NoOpMetricsSystem.java
  3. 18
      metrics/src/main/java/tech/pegasys/pantheon/metrics/prometheus/MetricsConfiguration.java
  4. 84
      metrics/src/main/java/tech/pegasys/pantheon/metrics/prometheus/PrometheusMetricsSystem.java
  5. 8
      metrics/src/test/java/tech/pegasys/pantheon/metrics/prometheus/MetricsHttpServiceTest.java
  6. 32
      metrics/src/test/java/tech/pegasys/pantheon/metrics/prometheus/PrometheusMetricsSystemTest.java
  7. 25
      pantheon/src/main/java/tech/pegasys/pantheon/cli/PantheonCommand.java
  8. 18
      pantheon/src/test/java/tech/pegasys/pantheon/cli/PantheonCommandTest.java
  9. 1
      pantheon/src/test/resources/everything_config.toml

@ -12,6 +12,9 @@
*/
package tech.pegasys.pantheon.metrics;
import java.util.EnumSet;
import java.util.Set;
public enum MetricCategory {
BIG_QUEUE("big_queue"),
BLOCKCHAIN("blockchain"),
@ -24,6 +27,9 @@ public enum MetricCategory {
RPC("rpc"),
SYNCHRONIZER("synchronizer");
public static final Set<MetricCategory> DEFAULT_METRIC_CATEGORIES =
EnumSet.allOf(MetricCategory.class);
private final String name;
private final boolean pantheonSpecific;

@ -20,9 +20,13 @@ import tech.pegasys.pantheon.metrics.Observation;
import tech.pegasys.pantheon.metrics.OperationTimer;
import tech.pegasys.pantheon.metrics.OperationTimer.TimingContext;
import java.util.Collections;
import java.util.List;
import java.util.function.Supplier;
import java.util.stream.Stream;
import io.prometheus.client.Collector;
public class NoOpMetricsSystem implements MetricsSystem {
private static final Counter NO_OP_COUNTER = new NoOpCounter();
@ -30,6 +34,13 @@ public class NoOpMetricsSystem implements MetricsSystem {
private static final OperationTimer NO_OP_TIMER = () -> NO_OP_TIMING_CONTEXT;
public static final LabelledMetric<OperationTimer> NO_OP_LABELLED_TIMER = label -> NO_OP_TIMER;
public static final LabelledMetric<Counter> NO_OP_LABELLED_COUNTER = label -> NO_OP_COUNTER;
public static final Collector NO_OP_COLLECTOR =
new Collector() {
@Override
public List<MetricFamilySamples> collect() {
return Collections.emptyList();
}
};
@Override
public LabelledMetric<Counter> createLabelledCounter(
@ -37,7 +48,7 @@ public class NoOpMetricsSystem implements MetricsSystem {
final String name,
final String help,
final String... labelNames) {
return labels -> NO_OP_COUNTER;
return NO_OP_LABELLED_COUNTER;
}
@Override
@ -46,7 +57,7 @@ public class NoOpMetricsSystem implements MetricsSystem {
final String name,
final String help,
final String... labelNames) {
return labels -> NO_OP_TIMER;
return NO_OP_LABELLED_TIMER;
}
@Override

@ -12,9 +12,13 @@
*/
package tech.pegasys.pantheon.metrics.prometheus;
import tech.pegasys.pantheon.metrics.MetricCategory;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Objects;
import java.util.Set;
import com.google.common.collect.Lists;
@ -28,6 +32,7 @@ public class MetricsConfiguration {
private boolean enabled;
private int port;
private String host;
private Set<MetricCategory> metricCategories;
private boolean pushEnabled;
private int pushPort;
private String pushHost;
@ -40,6 +45,7 @@ public class MetricsConfiguration {
metricsConfiguration.setEnabled(false);
metricsConfiguration.setPort(DEFAULT_METRICS_PORT);
metricsConfiguration.setHost(DEFAULT_METRICS_HOST);
metricsConfiguration.setMetricCategories(EnumSet.allOf(MetricCategory.class));
metricsConfiguration.setPushEnabled(false);
metricsConfiguration.setPushPort(DEFAULT_METRICS_PUSH_PORT);
metricsConfiguration.setPushHost(DEFAULT_METRICS_PUSH_HOST);
@ -75,6 +81,14 @@ public class MetricsConfiguration {
this.host = host;
}
public Set<MetricCategory> getMetricCategories() {
return metricCategories;
}
public void setMetricCategories(final Set<MetricCategory> metricCategories) {
this.metricCategories = metricCategories;
}
public int getPushPort() {
return pushPort;
}
@ -133,6 +147,8 @@ public class MetricsConfiguration {
+ ", host='"
+ host
+ '\''
+ ", categories="
+ metricCategories.toString()
+ ", pushEnabled="
+ pushEnabled
+ ", pushPort="
@ -157,6 +173,7 @@ public class MetricsConfiguration {
final MetricsConfiguration that = (MetricsConfiguration) o;
return enabled == that.enabled
&& port == that.port
&& Objects.equals(metricCategories, that.metricCategories)
&& pushEnabled == that.pushEnabled
&& pushPort == that.pushPort
&& pushInterval == that.pushInterval
@ -173,6 +190,7 @@ public class MetricsConfiguration {
enabled,
port,
host,
metricCategories,
pushEnabled,
pushPort,
pushHost,

@ -14,16 +14,19 @@ package tech.pegasys.pantheon.metrics.prometheus;
import static java.util.Arrays.asList;
import static java.util.Collections.singleton;
import static tech.pegasys.pantheon.metrics.noop.NoOpMetricsSystem.NO_OP_COLLECTOR;
import tech.pegasys.pantheon.metrics.LabelledMetric;
import tech.pegasys.pantheon.metrics.MetricCategory;
import tech.pegasys.pantheon.metrics.MetricsSystem;
import tech.pegasys.pantheon.metrics.Observation;
import tech.pegasys.pantheon.metrics.OperationTimer;
import tech.pegasys.pantheon.metrics.noop.NoOpMetricsSystem;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@ -55,20 +58,31 @@ public class PrometheusMetricsSystem implements MetricsSystem {
cachedTimers = new ConcurrentHashMap<>();
private final Map<String, Collector> cachedGauges = new ConcurrentHashMap<>();
private final EnumSet<MetricCategory> enabledCategories = EnumSet.allOf(MetricCategory.class);
PrometheusMetricsSystem() {}
public static MetricsSystem init() {
public static MetricsSystem init(final MetricsConfiguration metricsConfiguration) {
if (!metricsConfiguration.isEnabled()) {
return new NoOpMetricsSystem();
}
final PrometheusMetricsSystem metricsSystem = new PrometheusMetricsSystem();
metricsSystem.collectors.put(
MetricCategory.PROCESS, singleton(new StandardExports().register(metricsSystem.registry)));
metricsSystem.collectors.put(
MetricCategory.JVM,
asList(
new MemoryPoolsExports().register(metricsSystem.registry),
new BufferPoolsExports().register(metricsSystem.registry),
new GarbageCollectorExports().register(metricsSystem.registry),
new ThreadExports().register(metricsSystem.registry),
new ClassLoadingExports().register(metricsSystem.registry)));
metricsSystem.enabledCategories.retainAll(metricsConfiguration.getMetricCategories());
if (metricsSystem.enabledCategories.contains(MetricCategory.PROCESS)) {
metricsSystem.collectors.put(
MetricCategory.PROCESS,
singleton(new StandardExports().register(metricsSystem.registry)));
}
if (metricsSystem.enabledCategories.contains(MetricCategory.JVM)) {
metricsSystem.collectors.put(
MetricCategory.JVM,
asList(
new MemoryPoolsExports().register(metricsSystem.registry),
new BufferPoolsExports().register(metricsSystem.registry),
new GarbageCollectorExports().register(metricsSystem.registry),
new ThreadExports().register(metricsSystem.registry),
new ClassLoadingExports().register(metricsSystem.registry)));
}
return metricsSystem;
}
@ -82,9 +96,13 @@ public class PrometheusMetricsSystem implements MetricsSystem {
return cachedCounters.computeIfAbsent(
metricName,
(k) -> {
final Counter counter = Counter.build(metricName, help).labelNames(labelNames).create();
addCollector(category, counter);
return new PrometheusCounter(counter);
if (enabledCategories.contains(category)) {
final Counter counter = Counter.build(metricName, help).labelNames(labelNames).create();
addCollector(category, counter);
return new PrometheusCounter(counter);
} else {
return NoOpMetricsSystem.NO_OP_LABELLED_COUNTER;
}
});
}
@ -98,18 +116,22 @@ public class PrometheusMetricsSystem implements MetricsSystem {
return cachedTimers.computeIfAbsent(
metricName,
(k) -> {
final Summary summary =
Summary.build(metricName, help)
.quantile(0.2, 0.02)
.quantile(0.5, 0.05)
.quantile(0.8, 0.02)
.quantile(0.95, 0.005)
.quantile(0.99, 0.001)
.quantile(1.0, 0)
.labelNames(labelNames)
.create();
addCollector(category, summary);
return new PrometheusTimer(summary);
if (enabledCategories.contains(category)) {
final Summary summary =
Summary.build(metricName, help)
.quantile(0.2, 0.02)
.quantile(0.5, 0.05)
.quantile(0.8, 0.02)
.quantile(0.95, 0.005)
.quantile(0.99, 0.001)
.quantile(1.0, 0)
.labelNames(labelNames)
.create();
addCollector(category, summary);
return new PrometheusTimer(summary);
} else {
return NoOpMetricsSystem.NO_OP_LABELLED_TIMER;
}
});
}
@ -123,9 +145,13 @@ public class PrometheusMetricsSystem implements MetricsSystem {
cachedGauges.computeIfAbsent(
metricName,
(k) -> {
Collector collector = new CurrentValueCollector(metricName, help, valueSupplier);
addCollector(category, collector);
return collector;
if (enabledCategories.contains(category)) {
final Collector collector = new CurrentValueCollector(metricName, help, valueSupplier);
addCollector(category, collector);
return collector;
} else {
return NO_OP_COLLECTOR;
}
});
}

@ -49,15 +49,19 @@ public class MetricsHttpServiceTest {
}
private static MetricsHttpService createMetricsHttpService(final MetricsConfiguration config) {
return new MetricsHttpService(vertx, config, PrometheusMetricsSystem.init());
return new MetricsHttpService(vertx, config, PrometheusMetricsSystem.init(config));
}
private static MetricsHttpService createMetricsHttpService() {
return new MetricsHttpService(vertx, createMetricsConfig(), PrometheusMetricsSystem.init());
final MetricsConfiguration metricsConfiguration = createMetricsConfig();
metricsConfiguration.setEnabled(true);
return new MetricsHttpService(
vertx, metricsConfiguration, PrometheusMetricsSystem.init(metricsConfiguration));
}
private static MetricsConfiguration createMetricsConfig() {
final MetricsConfiguration config = MetricsConfiguration.createDefault();
config.setEnabled(true);
config.setPort(0);
config.setHostsWhitelist(Collections.singletonList("*"));
return config;

@ -17,17 +17,21 @@ import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static org.assertj.core.api.Assertions.assertThat;
import static tech.pegasys.pantheon.metrics.MetricCategory.JVM;
import static tech.pegasys.pantheon.metrics.MetricCategory.NETWORK;
import static tech.pegasys.pantheon.metrics.MetricCategory.PEERS;
import static tech.pegasys.pantheon.metrics.MetricCategory.RPC;
import tech.pegasys.pantheon.metrics.Counter;
import tech.pegasys.pantheon.metrics.LabelledMetric;
import tech.pegasys.pantheon.metrics.MetricCategory;
import tech.pegasys.pantheon.metrics.MetricsSystem;
import tech.pegasys.pantheon.metrics.Observation;
import tech.pegasys.pantheon.metrics.OperationTimer;
import tech.pegasys.pantheon.metrics.OperationTimer.TimingContext;
import tech.pegasys.pantheon.metrics.noop.NoOpMetricsSystem;
import java.util.Comparator;
import java.util.EnumSet;
import org.junit.Test;
@ -132,7 +136,8 @@ public class PrometheusMetricsSystemTest {
final LabelledMetric<OperationTimer> timer =
metricsSystem.createLabelledTimer(RPC, "request", "Some help", "methodName");
try (final TimingContext context = timer.labels("method").startTimer()) {}
//noinspection EmptyTryBlock
try (final TimingContext ignored = timer.labels("method").startTimer()) {}
assertThat(metricsSystem.getMetrics())
.usingElementComparator(IGNORE_VALUES) // We don't know how long it will actually take.
@ -163,4 +168,29 @@ public class PrometheusMetricsSystemTest {
assertThat(metricsSystem.getMetrics())
.containsExactlyInAnyOrder(new Observation(JVM, "myValue", 7d, emptyList()));
}
@Test
public void shouldOnlyObserveEnabledMetrics() {
final MetricsConfiguration metricsConfiguration = MetricsConfiguration.createDefault();
metricsConfiguration.setMetricCategories(EnumSet.of(MetricCategory.RPC));
metricsConfiguration.setEnabled(true);
final MetricsSystem localMetricSystem = PrometheusMetricsSystem.init(metricsConfiguration);
// do a category we are not watching
final LabelledMetric<Counter> counterN =
localMetricSystem.createLabelledCounter(NETWORK, "ABC", "Not that kind of network", "show");
assertThat(counterN).isSameAs(NoOpMetricsSystem.NO_OP_LABELLED_COUNTER);
counterN.labels("show").inc();
assertThat(localMetricSystem.getMetrics()).isEmpty();
// do a category we are watching
final LabelledMetric<Counter> counterR =
localMetricSystem.createLabelledCounter(RPC, "name", "Not useful", "method");
assertThat(counterR).isNotSameAs(NoOpMetricsSystem.NO_OP_LABELLED_COUNTER);
counterR.labels("op").inc();
assertThat(localMetricSystem.getMetrics())
.containsExactly(new Observation(RPC, "name", 1.0, singletonList("op")));
}
}

@ -21,6 +21,7 @@ import static tech.pegasys.pantheon.ethereum.jsonrpc.RpcApis.DEFAULT_JSON_RPC_AP
import static tech.pegasys.pantheon.ethereum.jsonrpc.websocket.WebSocketConfiguration.DEFAULT_WEBSOCKET_PORT;
import static tech.pegasys.pantheon.ethereum.jsonrpc.websocket.WebSocketConfiguration.DEFAULT_WEBSOCKET_REFRESH_DELAY;
import static tech.pegasys.pantheon.ethereum.p2p.peers.DefaultPeer.DEFAULT_PORT;
import static tech.pegasys.pantheon.metrics.MetricCategory.DEFAULT_METRIC_CATEGORIES;
import static tech.pegasys.pantheon.metrics.prometheus.MetricsConfiguration.DEFAULT_METRICS_PORT;
import static tech.pegasys.pantheon.metrics.prometheus.MetricsConfiguration.DEFAULT_METRICS_PUSH_PORT;
import static tech.pegasys.pantheon.metrics.prometheus.MetricsConfiguration.createDefault;
@ -49,6 +50,7 @@ import tech.pegasys.pantheon.ethereum.jsonrpc.RpcApis;
import tech.pegasys.pantheon.ethereum.jsonrpc.websocket.WebSocketConfiguration;
import tech.pegasys.pantheon.ethereum.permissioning.PermissioningConfiguration;
import tech.pegasys.pantheon.ethereum.permissioning.PermissioningConfigurationBuilder;
import tech.pegasys.pantheon.metrics.MetricCategory;
import tech.pegasys.pantheon.metrics.MetricsSystem;
import tech.pegasys.pantheon.metrics.prometheus.MetricsConfiguration;
import tech.pegasys.pantheon.metrics.prometheus.PrometheusMetricsSystem;
@ -69,6 +71,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Stream;
@ -137,8 +140,6 @@ public class PantheonCommand implements DefaultCommandValues, Runnable {
private final SynchronizerConfiguration.Builder synchronizerConfigurationBuilder;
private final RunnerBuilder runnerBuilder;
private final MetricsSystem metricsSystem = PrometheusMetricsSystem.init();
// Public IP stored to prevent having to research it each time we need it.
private InetAddress autoDiscoveredDefaultIP = null;
@ -356,6 +357,15 @@ public class PantheonCommand implements DefaultCommandValues, Runnable {
arity = "1")
private final Integer metricsPort = DEFAULT_METRICS_PORT;
@Option(
names = {"--metrics-category", "--metrics-categories"},
paramLabel = "<category name>",
split = ",",
arity = "1..*",
description =
"Comma separated list of categories to track metrics for (default: ${DEFAULT-VALUE})")
private final Set<MetricCategory> metricCategories = DEFAULT_METRIC_CATEGORIES;
@Option(
names = {"--metrics-push-enabled"},
description = "Enable the metrics push gateway integration (default: ${DEFAULT-VALUE})")
@ -489,8 +499,10 @@ public class PantheonCommand implements DefaultCommandValues, Runnable {
}
}
private Supplier<PantheonExceptionHandler> exceptionHandlerSupplier =
private final Supplier<PantheonExceptionHandler> exceptionHandlerSupplier =
Suppliers.memoize(PantheonExceptionHandler::new);
private final Supplier<MetricsSystem> metricsSystem =
Suppliers.memoize(() -> PrometheusMetricsSystem.init(metricsConfiguration()));
public PantheonCommand(
final Logger logger,
@ -643,7 +655,7 @@ public class PantheonCommand implements DefaultCommandValues, Runnable {
new MiningParameters(coinbase, minTransactionGasPrice, extraData, isMiningEnabled))
.devMode(NetworkName.DEV.equals(getNetwork()))
.nodePrivateKeyFile(nodePrivateKeyFile())
.metricsSystem(metricsSystem)
.metricsSystem(metricsSystem.get())
.privacyParameters(privacyParameters())
.build();
} catch (final InvalidConfigurationException e) {
@ -760,6 +772,7 @@ public class PantheonCommand implements DefaultCommandValues, Runnable {
metricsConfiguration.setEnabled(isMetricsEnabled);
metricsConfiguration.setHost(metricsHost);
metricsConfiguration.setPort(metricsPort);
metricsConfiguration.setMetricCategories(metricCategories);
metricsConfiguration.setPushEnabled(isMetricsPushEnabled);
metricsConfiguration.setPushHost(metricsPushHost);
metricsConfiguration.setPushPort(metricsPushPort);
@ -848,7 +861,7 @@ public class PantheonCommand implements DefaultCommandValues, Runnable {
.webSocketConfiguration(webSocketConfiguration)
.dataDir(dataDir())
.bannedNodeIds(bannedNodeIds)
.metricsSystem(metricsSystem)
.metricsSystem(metricsSystem.get())
.metricsConfiguration(metricsConfiguration)
.build();
@ -1060,7 +1073,7 @@ public class PantheonCommand implements DefaultCommandValues, Runnable {
}
public MetricsSystem getMetricsSystem() {
return metricsSystem;
return metricsSystem.get();
}
public PantheonExceptionHandler exceptionHandler() {

@ -19,6 +19,7 @@ import static org.junit.Assume.assumeFalse;
import static org.junit.Assume.assumeTrue;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.ArgumentMatchers.isNotNull;
import static org.mockito.Mockito.atLeast;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyZeroInteractions;
import static tech.pegasys.pantheon.cli.NetworkName.DEV;
@ -43,6 +44,7 @@ import tech.pegasys.pantheon.ethereum.jsonrpc.JsonRpcConfiguration;
import tech.pegasys.pantheon.ethereum.jsonrpc.RpcApi;
import tech.pegasys.pantheon.ethereum.jsonrpc.websocket.WebSocketConfiguration;
import tech.pegasys.pantheon.ethereum.permissioning.PermissioningConfiguration;
import tech.pegasys.pantheon.metrics.MetricCategory;
import tech.pegasys.pantheon.metrics.prometheus.MetricsConfiguration;
import tech.pegasys.pantheon.util.bytes.BytesValue;
@ -1543,6 +1545,20 @@ public class PantheonCommandTest extends CommandTestAbstract {
assertThat(commandErrorOutput.toString()).isEmpty();
}
@Test
public void metricsCategoryPropertyMustBeUsed() {
parseCommand("--metrics-enabled", "--metrics-category", MetricCategory.JVM.toString());
verify(mockRunnerBuilder).metricsConfiguration(metricsConfigArgumentCaptor.capture());
verify(mockRunnerBuilder).build();
assertThat(metricsConfigArgumentCaptor.getValue().getMetricCategories())
.containsExactly(MetricCategory.JVM);
assertThat(commandOutput.toString()).isEmpty();
assertThat(commandErrorOutput.toString()).isEmpty();
}
@Test
public void metricsPushEnabledPropertyMustBeUsed() {
parseCommand("--metrics-push-enabled");
@ -1940,7 +1956,7 @@ public class PantheonCommandTest extends CommandTestAbstract {
*/
private void verifyOptionsConstraintLoggerCall(
final String dependentOptions, final String mainOption) {
verify(mockLogger)
verify(mockLogger, atLeast(1))
.warn(
stringArgumentCaptor.capture(),
stringArgumentCaptor.capture(),

@ -58,6 +58,7 @@ rpc-ws-authentication-credentials-file="none"
metrics-enabled=false
metrics-host="8.6.7.5"
metrics-port=309
metrics-category=["RPC"]
metrics-push-enabled=false
metrics-push-host="5.5.5.1"
metrics-push-port=212

Loading…
Cancel
Save