From e39e5a68edc6658d71e3446f0fc6fc2b69bf13f1 Mon Sep 17 00:00:00 2001 From: jack-berg <34418638+jack-berg@users.noreply.github.com> Date: Tue, 15 Nov 2022 16:01:36 -0600 Subject: [PATCH] Replace runtime.jvm.gc.time/runtime.jvm.gc.count metrics with process.runtime.jvm.gc.duration histogram (#6964) Replaces #6362. I've reduced the attributes to only record the gc name and the action that was taken (i.e. I've removed the gc cause). If needed we can add the cause later, but for now this should be sufficient to determine total time spent in GC, and categorize time spent as stop the world or parallel. --- .../javaagent/build.gradle.kts | 6 - .../RuntimeMetricsInstaller.java | 6 +- .../runtimemetrics/RuntimeMetricsTest.groovy | 5 +- .../runtime-metrics/library/README.md | 6 + .../runtimemetrics/GarbageCollector.java | 118 ++++++++++----- .../runtimemetrics/GarbageCollectorTest.java | 134 ++++++++++++++++++ 6 files changed, 225 insertions(+), 50 deletions(-) create mode 100644 instrumentation/runtime-metrics/library/src/test/java/io/opentelemetry/instrumentation/runtimemetrics/GarbageCollectorTest.java diff --git a/instrumentation/runtime-metrics/javaagent/build.gradle.kts b/instrumentation/runtime-metrics/javaagent/build.gradle.kts index 18994108833e..343b15624f28 100644 --- a/instrumentation/runtime-metrics/javaagent/build.gradle.kts +++ b/instrumentation/runtime-metrics/javaagent/build.gradle.kts @@ -7,9 +7,3 @@ dependencies { compileOnly("io.opentelemetry:opentelemetry-sdk-extension-autoconfigure") } - -tasks { - withType().configureEach { - jvmArgs("-Dotel.instrumentation.runtime-metrics.experimental-metrics.enabled=true") - } -} diff --git a/instrumentation/runtime-metrics/javaagent/src/main/java/io/opentelemetry/instrumentation/javaagent/runtimemetrics/RuntimeMetricsInstaller.java b/instrumentation/runtime-metrics/javaagent/src/main/java/io/opentelemetry/instrumentation/javaagent/runtimemetrics/RuntimeMetricsInstaller.java index 025e2298d04a..ac3b61750b4e 100644 --- a/instrumentation/runtime-metrics/javaagent/src/main/java/io/opentelemetry/instrumentation/javaagent/runtimemetrics/RuntimeMetricsInstaller.java +++ b/instrumentation/runtime-metrics/javaagent/src/main/java/io/opentelemetry/instrumentation/javaagent/runtimemetrics/RuntimeMetricsInstaller.java @@ -38,10 +38,6 @@ public void afterAgent(AutoConfiguredOpenTelemetrySdk autoConfiguredSdk) { Cpu.registerObservers(openTelemetry); MemoryPools.registerObservers(openTelemetry); Threads.registerObservers(openTelemetry); - - if (config.getBoolean( - "otel.instrumentation.runtime-metrics.experimental-metrics.enabled", false)) { - GarbageCollector.registerObservers(openTelemetry); - } + GarbageCollector.registerObservers(openTelemetry); } } diff --git a/instrumentation/runtime-metrics/javaagent/src/test/groovy/io/opentelemetry/instrumentation/javaagent/runtimemetrics/RuntimeMetricsTest.groovy b/instrumentation/runtime-metrics/javaagent/src/test/groovy/io/opentelemetry/instrumentation/javaagent/runtimemetrics/RuntimeMetricsTest.groovy index 64b6a9c65241..17393506cecd 100644 --- a/instrumentation/runtime-metrics/javaagent/src/test/groovy/io/opentelemetry/instrumentation/javaagent/runtimemetrics/RuntimeMetricsTest.groovy +++ b/instrumentation/runtime-metrics/javaagent/src/test/groovy/io/opentelemetry/instrumentation/javaagent/runtimemetrics/RuntimeMetricsTest.groovy @@ -13,6 +13,8 @@ class RuntimeMetricsTest extends AgentInstrumentationSpecification { def "test runtime metrics is enabled"() { when: def conditions = new PollingConditions(timeout: 10, initialDelay: 1.5, factor: 1.25) + // Force a gc to ensure gc metrics + System.gc() then: conditions.eventually { @@ -22,8 +24,7 @@ class RuntimeMetricsTest extends AgentInstrumentationSpecification { assert getMetrics().any { it.name == "process.runtime.jvm.system.cpu.load_1m" } assert getMetrics().any { it.name == "process.runtime.jvm.system.cpu.utilization" } assert getMetrics().any { it.name == "process.runtime.jvm.cpu.utilization" } - assert getMetrics().any { it.name == "runtime.jvm.gc.time" } - assert getMetrics().any { it.name == "runtime.jvm.gc.count" } + assert getMetrics().any { it.name == "process.runtime.jvm.gc.duration" } assert getMetrics().any { it.name == "process.runtime.jvm.memory.init" } assert getMetrics().any { it.name == "process.runtime.jvm.memory.usage" } assert getMetrics().any { it.name == "process.runtime.jvm.memory.committed" } diff --git a/instrumentation/runtime-metrics/library/README.md b/instrumentation/runtime-metrics/library/README.md index eafeb540bfbc..6155cd79b1d7 100644 --- a/instrumentation/runtime-metrics/library/README.md +++ b/instrumentation/runtime-metrics/library/README.md @@ -54,33 +54,39 @@ The following lists attributes reported for a variety of garbage collectors. Not * `process.runtime.jvm.memory.committed`: {pool=Compressed Class Space,type=non_heap}, {pool=Par Eden Space,type=heap}, {pool=Tenured Gen,type=heap}, {pool=Par Survivor Space,type=heap}, {pool=Code Cache,type=non_heap}, {pool=Metaspace,type=non_heap} * `process.runtime.jvm.memory.limit`: {pool=Compressed Class Space,type=non_heap}, {pool=Par Eden Space,type=heap}, {pool=Tenured Gen,type=heap}, {pool=Par Survivor Space,type=heap}, {pool=Code Cache,type=non_heap} * `process.runtime.jvm.memory.usage_after_last_gc`: {pool=Par Eden Space,type=heap}, {pool=Tenured Gen,type=heap}, {pool=Par Survivor Space,type=heap} + * `process.runtime.jvc.gc.duration`: {action=end of minor GC,gc=ParNew}, {action=end of major GC,gc=MarkSweepCompact} * G1 Garbage Collector * `process.runtime.jvm.memory.init`: {pool=G1 Survivor Space,type=heap}, {pool=G1 Eden Space,type=heap}, {pool=CodeCache,type=non_heap}, {pool=G1 Old Gen,type=heap}, {pool=Compressed Class Space,type=non_heap}, {pool=Metaspace,type=non_heap} * `process.runtime.jvm.memory.usage`: {pool=G1 Survivor Space,type=heap}, {pool=G1 Eden Space,type=heap}, {pool=CodeCache,type=non_heap}, {pool=G1 Old Gen,type=heap}, {pool=Compressed Class Space,type=non_heap}, {pool=Metaspace,type=non_heap} * `process.runtime.jvm.memory.committed`: {pool=G1 Survivor Space,type=heap}, {pool=G1 Eden Space,type=heap}, {pool=CodeCache,type=non_heap}, {pool=G1 Old Gen,type=heap}, {pool=Compressed Class Space,type=non_heap}, {pool=Metaspace,type=non_heap} * `process.runtime.jvm.memory.limit`: {pool=CodeCache,type=non_heap}, {pool=G1 Old Gen,type=heap}, {pool=Compressed Class Space,type=non_heap} * `process.runtime.jvm.memory.usage_after_last_gc`: {pool=G1 Survivor Space,type=heap}, {pool=G1 Eden Space,type=heap}, {pool=G1 Old Gen,type=heap} + * `process.runtime.jvc.gc.duration`: {action=end of minor GC,gc=G1 Young Generation}, {action=end of major GC,gc=G1 Old Generation} * Parallel Garbage Collector * `process.runtime.jvm.memory.init`: {pool=CodeCache,type=non_heap}, {pool=PS Survivor Space,type=heap}, {pool=PS Old Gen,type=heap}, {pool=PS Eden Space,type=heap}, {pool=Compressed Class Space,type=non_heap}, {pool=Metaspace,type=non_heap} * `process.runtime.jvm.memory.usage`: {pool=CodeCache,type=non_heap}, {pool=PS Survivor Space,type=heap}, {pool=PS Old Gen,type=heap}, {pool=PS Eden Space,type=heap}, {pool=Compressed Class Space,type=non_heap}, {pool=Metaspace,type=non_heap} * `process.runtime.jvm.memory.committed`: {pool=CodeCache,type=non_heap}, {pool=PS Survivor Space,type=heap}, {pool=PS Old Gen,type=heap}, {pool=PS Eden Space,type=heap}, {pool=Compressed Class Space,type=non_heap}, {pool=Metaspace,type=non_heap} * `process.runtime.jvm.memory.limit`: {pool=CodeCache,type=non_heap}, {pool=PS Survivor Space,type=heap}, {pool=PS Old Gen,type=heap}, {pool=PS Eden Space,type=heap}, {pool=Compressed Class Space,type=non_heap} * `process.runtime.jvm.memory.usage_after_last_gc`: {pool=PS Survivor Space,type=heap}, {pool=PS Old Gen,type=heap}, {pool=PS Eden Space,type=heap} + * `process.runtime.jvc.gc.duration`: {action=end of major GC,gc=PS MarkSweep}, {action=end of minor GC,gc=PS Scavenge} * Serial Garbage Collector * `process.runtime.jvm.memory.init`: {pool=CodeCache,type=non_heap}, {pool=Tenured Gen,type=heap}, {pool=Eden Space,type=heap}, {pool=Survivor Space,type=heap}, {pool=Compressed Class Space,type=non_heap}, {pool=Metaspace,type=non_heap} * `process.runtime.jvm.memory.usage`: {pool=CodeCache,type=non_heap}, {pool=Tenured Gen,type=heap}, {pool=Eden Space,type=heap}, {pool=Survivor Space,type=heap}, {pool=Compressed Class Space,type=non_heap}, {pool=Metaspace,type=non_heap} * `process.runtime.jvm.memory.committed`: {pool=CodeCache,type=non_heap}, {pool=Tenured Gen,type=heap}, {pool=Eden Space,type=heap}, {pool=Survivor Space,type=heap}, {pool=Compressed Class Space,type=non_heap}, {pool=Metaspace,type=non_heap} * `process.runtime.jvm.memory.limit`: {pool=CodeCache,type=non_heap}, {pool=Tenured Gen,type=heap}, {pool=Eden Space,type=heap}, {pool=Survivor Space,type=heap}, {pool=Compressed Class Space,type=non_heap} * `process.runtime.jvm.memory.usage_after_last_gc`: {pool=Tenured Gen,type=heap}, {pool=Eden Space,type=heap}, {pool=Survivor Space,type=heap} + * `process.runtime.jvc.gc.duration`: {action=end of minor GC,gc=Copy}, {action=end of major GC,gc=MarkSweepCompact} * Shenandoah Garbage Collector * `process.runtime.jvm.memory.init`: {pool=Metaspace,type=non_heap}, {pool=CodeCache,type=non_heap}, {pool=Shenandoah,type=heap}, {pool=Compressed Class Space,type=non_heap} * `process.runtime.jvm.memory.usage`: {pool=Metaspace,type=non_heap}, {pool=CodeCache,type=non_heap}, {pool=Shenandoah,type=heap}, {pool=Compressed Class Space,type=non_heap} * `process.runtime.jvm.memory.committed`: {pool=Metaspace,type=non_heap}, {pool=CodeCache,type=non_heap}, {pool=Shenandoah,type=heap}, {pool=Compressed Class Space,type=non_heap} * `process.runtime.jvm.memory.limit`: {pool=CodeCache,type=non_heap}, {pool=Shenandoah,type=heap}, {pool=Compressed Class Space,type=non_heap} * `process.runtime.jvm.memory.usage_after_last_gc`: {pool=Shenandoah,type=heap} + * `process.runtime.jvc.gc.duration`: {action=end of GC cycle,gc=Shenandoah Cycles}, {action=end of GC pause,gc=Shenandoah Pauses} * Z Garbage Collector * `process.runtime.jvm.memory.init`: {pool=Metaspace,type=non_heap}, {pool=CodeCache,type=non_heap}, {pool=ZHeap,type=heap}, {pool=Compressed Class Space,type=non_heap} * `process.runtime.jvm.memory.usage`: {pool=Metaspace,type=non_heap}, {pool=CodeCache,type=non_heap}, {pool=ZHeap,type=heap}, {pool=Compressed Class Space,type=non_heap} * `process.runtime.jvm.memory.committed`: {pool=Metaspace,type=non_heap}, {pool=CodeCache,type=non_heap}, {pool=ZHeap,type=heap}, {pool=Compressed Class Space,type=non_heap} * `process.runtime.jvm.memory.limit`: {pool=CodeCache,type=non_heap}, {pool=ZHeap,type=heap}, {pool=Compressed Class Space,type=non_heap} * `process.runtime.jvm.memory.usage_after_last_gc`: {pool=ZHeap,type=heap} + * `process.runtime.jvc.gc.duration`: {action=end of GC cycle,gc=ZGC Cycles}, {action=end of GC pause,gc=ZGC Pauses} diff --git a/instrumentation/runtime-metrics/library/src/main/java/io/opentelemetry/instrumentation/runtimemetrics/GarbageCollector.java b/instrumentation/runtime-metrics/library/src/main/java/io/opentelemetry/instrumentation/runtimemetrics/GarbageCollector.java index 5556060b891e..37a7f3920817 100644 --- a/instrumentation/runtime-metrics/library/src/main/java/io/opentelemetry/instrumentation/runtimemetrics/GarbageCollector.java +++ b/instrumentation/runtime-metrics/library/src/main/java/io/opentelemetry/instrumentation/runtimemetrics/GarbageCollector.java @@ -5,65 +5,109 @@ package io.opentelemetry.instrumentation.runtimemetrics; +import com.sun.management.GarbageCollectionNotificationInfo; import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.common.AttributeKey; import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.LongHistogram; import io.opentelemetry.api.metrics.Meter; import java.lang.management.GarbageCollectorMXBean; import java.lang.management.ManagementFactory; -import java.util.ArrayList; import java.util.List; +import java.util.function.Function; +import javax.management.Notification; +import javax.management.NotificationEmitter; +import javax.management.NotificationFilter; +import javax.management.NotificationListener; +import javax.management.openmbean.CompositeData; /** - * Registers observers that generate metrics about JVM garbage collectors. + * Registers instruments that generate metrics about JVM garbage collection. * *

Example usage: * *

{@code
  * GarbageCollector.registerObservers(GlobalOpenTelemetry.get());
  * }
- * - *

Example metrics being exported: - * - *

- *   runtime.jvm.gc.time{gc="PS1"} 6.7
- *   runtime.jvm.gc.count{gc="PS1"} 1
- * 
*/ public final class GarbageCollector { + private static final AttributeKey GC_KEY = AttributeKey.stringKey("gc"); + private static final AttributeKey ACTION_KEY = AttributeKey.stringKey("action"); - /** Register observers for java runtime garbage collector metrics. */ + private static final NotificationFilter GC_FILTER = + notification -> + notification + .getType() + .equals(GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION); + + /** Register observers for java runtime memory metrics. */ public static void registerObservers(OpenTelemetry openTelemetry) { - List garbageCollectors = ManagementFactory.getGarbageCollectorMXBeans(); + registerObservers( + openTelemetry, + ManagementFactory.getGarbageCollectorMXBeans(), + GarbageCollector::extractNotificationInfo); + } + + // Visible for testing + static void registerObservers( + OpenTelemetry openTelemetry, + List gcBeans, + Function notificationInfoExtractor) { Meter meter = RuntimeMetricsUtil.getMeter(openTelemetry); - List labelSets = new ArrayList<>(garbageCollectors.size()); - for (GarbageCollectorMXBean gc : garbageCollectors) { - labelSets.add(Attributes.of(GC_KEY, gc.getName())); + + LongHistogram gcTime = + meter + .histogramBuilder("process.runtime.jvm.gc.duration") + .setDescription("Duration of JVM garbage collection actions") + .setUnit("ms") + .ofLongs() + .build(); + + for (GarbageCollectorMXBean gcBean : gcBeans) { + if (!(gcBean instanceof NotificationEmitter)) { + continue; + } + NotificationEmitter notificationEmitter = (NotificationEmitter) gcBean; + notificationEmitter.addNotificationListener( + new GcNotificationListener(gcTime, notificationInfoExtractor), GC_FILTER, null); } - meter - .counterBuilder("runtime.jvm.gc.time") - .setDescription("Time spent in a given JVM garbage collector in milliseconds.") - .setUnit("ms") - .buildWithCallback( - resultLongObserver -> { - for (int i = 0; i < garbageCollectors.size(); i++) { - resultLongObserver.record( - garbageCollectors.get(i).getCollectionTime(), labelSets.get(i)); - } - }); - meter - .counterBuilder("runtime.jvm.gc.count") - .setDescription( - "The number of collections that have occurred for a given JVM garbage collector.") - .setUnit("{collections}") - .buildWithCallback( - resultLongObserver -> { - for (int i = 0; i < garbageCollectors.size(); i++) { - resultLongObserver.record( - garbageCollectors.get(i).getCollectionCount(), labelSets.get(i)); - } - }); + } + + private static final class GcNotificationListener implements NotificationListener { + + private final LongHistogram gcTime; + private final Function + notificationInfoExtractor; + + private GcNotificationListener( + LongHistogram gcTime, + Function notificationInfoExtractor) { + this.gcTime = gcTime; + this.notificationInfoExtractor = notificationInfoExtractor; + } + + @Override + public void handleNotification(Notification notification, Object unused) { + GarbageCollectionNotificationInfo notificationInfo = + notificationInfoExtractor.apply(notification); + gcTime.record( + notificationInfo.getGcInfo().getDuration(), + Attributes.of( + GC_KEY, notificationInfo.getGcName(), ACTION_KEY, notificationInfo.getGcAction())); + } + } + + /** + * Extract {@link GarbageCollectionNotificationInfo} from the {@link Notification}. + * + *

Note: this exists as a separate function so that the behavior can be overridden with mocks + * in tests. It's very challenging to create a mock {@link CompositeData} that can be parsed by + * {@link GarbageCollectionNotificationInfo#from(CompositeData)}. + */ + private static GarbageCollectionNotificationInfo extractNotificationInfo( + Notification notification) { + return GarbageCollectionNotificationInfo.from((CompositeData) notification.getUserData()); } private GarbageCollector() {} diff --git a/instrumentation/runtime-metrics/library/src/test/java/io/opentelemetry/instrumentation/runtimemetrics/GarbageCollectorTest.java b/instrumentation/runtime-metrics/library/src/test/java/io/opentelemetry/instrumentation/runtimemetrics/GarbageCollectorTest.java new file mode 100644 index 000000000000..3232e6549847 --- /dev/null +++ b/instrumentation/runtime-metrics/library/src/test/java/io/opentelemetry/instrumentation/runtimemetrics/GarbageCollectorTest.java @@ -0,0 +1,134 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.instrumentation.runtimemetrics; + +import static io.opentelemetry.instrumentation.runtimemetrics.ScopeUtil.EXPECTED_SCOPE; +import static io.opentelemetry.sdk.testing.assertj.OpenTelemetryAssertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.sun.management.GarbageCollectionNotificationInfo; +import com.sun.management.GcInfo; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.instrumentation.testing.junit.InstrumentationExtension; +import io.opentelemetry.instrumentation.testing.junit.LibraryInstrumentationExtension; +import java.lang.management.GarbageCollectorMXBean; +import java.util.Collections; +import java.util.concurrent.atomic.AtomicLong; +import javax.management.Notification; +import javax.management.NotificationEmitter; +import javax.management.NotificationListener; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.mockito.ArgumentCaptor; +import org.mockito.Captor; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.LENIENT) +class GarbageCollectorTest { + + @RegisterExtension + static final InstrumentationExtension testing = LibraryInstrumentationExtension.create(); + + @Mock(extraInterfaces = NotificationEmitter.class) + private GarbageCollectorMXBean gcBean; + + @Captor private ArgumentCaptor listenerCaptor; + + @Test + void registerObservers() { + GarbageCollector.registerObservers( + testing.getOpenTelemetry(), + Collections.singletonList(gcBean), + GarbageCollectorTest::getGcNotificationInfo); + + NotificationEmitter notificationEmitter = (NotificationEmitter) gcBean; + verify(notificationEmitter).addNotificationListener(listenerCaptor.capture(), any(), any()); + NotificationListener listener = listenerCaptor.getValue(); + + listener.handleNotification( + createTestNotification("G1 Young Generation", "end of minor GC", 10), null); + listener.handleNotification( + createTestNotification("G1 Young Generation", "end of minor GC", 12), null); + listener.handleNotification( + createTestNotification("G1 Old Generation", "end of major GC", 11), null); + + testing.waitAndAssertMetrics( + "io.opentelemetry.runtime-metrics", + "process.runtime.jvm.gc.duration", + metrics -> + metrics.anySatisfy( + metricData -> + assertThat(metricData) + .hasInstrumentationScope(EXPECTED_SCOPE) + .hasDescription("Duration of JVM garbage collection actions") + .hasUnit("ms") + .hasHistogramSatisfying( + histogram -> + histogram.hasPointsSatisfying( + point -> + point + .hasCount(2) + .hasSum(22) + .hasAttributes( + Attributes.builder() + .put("gc", "G1 Young Generation") + .put("action", "end of minor GC") + .build()), + point -> + point + .hasCount(1) + .hasSum(11) + .hasAttributes( + Attributes.builder() + .put("gc", "G1 Old Generation") + .put("action", "end of major GC") + .build()))))); + } + + private static Notification createTestNotification( + String gcName, String gcAction, long duration) { + GarbageCollectionNotificationInfo gcNotificationInfo = + mock(GarbageCollectionNotificationInfo.class); + when(gcNotificationInfo.getGcName()).thenReturn(gcName); + when(gcNotificationInfo.getGcAction()).thenReturn(gcAction); + GcInfo gcInfo = mock(GcInfo.class); + when(gcInfo.getDuration()).thenReturn(duration); + when(gcNotificationInfo.getGcInfo()).thenReturn(gcInfo); + return new TestNotification(gcNotificationInfo); + } + + private static GarbageCollectionNotificationInfo getGcNotificationInfo( + Notification notification) { + return ((TestNotification) notification).gcNotificationInfo; + } + + /** + * A {@link Notification} when is initialized with a mock {@link + * GarbageCollectionNotificationInfo}. + */ + private static class TestNotification extends Notification { + + private static final AtomicLong sequence = new AtomicLong(0); + + private final GarbageCollectionNotificationInfo gcNotificationInfo; + + private TestNotification(GarbageCollectionNotificationInfo gcNotificationInfo) { + super( + GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION, + "test", + sequence.incrementAndGet()); + this.gcNotificationInfo = gcNotificationInfo; + } + } +}