diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index e9bfa358103c8..3d37056956c69 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -46,6 +46,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.discovery.DiscoveryStats; import org.opensearch.http.HttpStats; +import org.opensearch.index.SegmentReplicationRejectionStats; import org.opensearch.index.stats.IndexingPressureStats; import org.opensearch.index.stats.ShardIndexingPressureStats; import org.opensearch.index.store.remote.filecache.FileCacheStats; @@ -129,6 +130,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private SearchBackpressureStats searchBackpressureStats; + @Nullable + private SegmentReplicationRejectionStats segmentReplicationRejectionStats; + @Nullable private ClusterManagerThrottlingStats clusterManagerThrottlingStats; @@ -211,6 +215,12 @@ public NodeStats(StreamInput in) throws IOException { } else { resourceUsageStats = null; } + // TODO: change to V_2_12_0 on main after backport to 2.x + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + segmentReplicationRejectionStats = in.readOptionalWriteable(SegmentReplicationRejectionStats::new); + } else { + segmentReplicationRejectionStats = null; + } if (in.getVersion().onOrAfter(Version.V_2_12_0)) { repositoriesStats = in.readOptionalWriteable(RepositoriesStats::new); } else { @@ -244,6 +254,7 @@ public NodeStats( @Nullable FileCacheStats fileCacheStats, @Nullable TaskCancellationStats taskCancellationStats, @Nullable SearchPipelineStats searchPipelineStats, + @Nullable SegmentReplicationRejectionStats segmentReplicationRejectionStats, @Nullable RepositoriesStats repositoriesStats ) { super(node); @@ -271,6 +282,7 @@ public NodeStats( this.fileCacheStats = fileCacheStats; this.taskCancellationStats = taskCancellationStats; this.searchPipelineStats = searchPipelineStats; + this.segmentReplicationRejectionStats = segmentReplicationRejectionStats; this.repositoriesStats = repositoriesStats; } @@ -415,6 +427,10 @@ public SearchPipelineStats getSearchPipelineStats() { } @Nullable + public SegmentReplicationRejectionStats getSegmentReplicationRejectionStats() { + return segmentReplicationRejectionStats; + } + public RepositoriesStats getRepositoriesStats() { return repositoriesStats; } @@ -465,6 +481,10 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(resourceUsageStats); } + // TODO: change to V_2_12_0 on main after backport to 2.x + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalWriteable(segmentReplicationRejectionStats); + } if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(repositoriesStats); } @@ -561,6 +581,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getResourceUsageStats() != null) { getResourceUsageStats().toXContent(builder, params); } + if (getSegmentReplicationRejectionStats() != null) { + getSegmentReplicationRejectionStats().toXContent(builder, params); + } + if (getRepositoriesStats() != null) { getRepositoriesStats().toXContent(builder, params); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index 88dff20354aa2..fc72668d36413 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -215,6 +215,7 @@ public enum Metric { TASK_CANCELLATION("task_cancellation"), SEARCH_PIPELINE("search_pipeline"), RESOURCE_USAGE_STATS("resource_usage_stats"), + SEGMENT_REPLICATION_BACKPRESSURE("segment_replication_backpressure"), REPOSITORIES("repositories"); private String metricName; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index aa02f8e580f4a..99cf42cfdc4d0 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -126,6 +126,7 @@ protected NodeStats nodeOperation(NodeStatsRequest nodeStatsRequest) { NodesStatsRequest.Metric.TASK_CANCELLATION.containedIn(metrics), NodesStatsRequest.Metric.SEARCH_PIPELINE.containedIn(metrics), NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics), + NodesStatsRequest.Metric.SEGMENT_REPLICATION_BACKPRESSURE.containedIn(metrics), NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics) ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java index f51fabbfb2388..5efec8b876435 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -170,6 +170,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq false, false, false, + false, false ); List shardsStats = new ArrayList<>(); diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java b/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java index 4284daf9ffef4..d9d480e7b2b27 100644 --- a/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java @@ -106,10 +106,11 @@ public SegmentReplicationPressureService( ClusterService clusterService, IndicesService indicesService, ShardStateAction shardStateAction, + SegmentReplicationStatsTracker tracker, ThreadPool threadPool ) { this.indicesService = indicesService; - this.tracker = new SegmentReplicationStatsTracker(this.indicesService); + this.tracker = tracker; this.shardStateAction = shardStateAction; this.threadPool = threadPool; diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java new file mode 100644 index 0000000000000..9f9f150ebe2d7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.opensearch.Version; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * Segment replication rejection stats. + * + * @opensearch.internal + */ +public class SegmentReplicationRejectionStats implements Writeable, ToXContentFragment { + + /** + * Total rejections due to segment replication backpressure + */ + private long totalRejectionCount; + + public SegmentReplicationRejectionStats(final long totalRejectionCount) { + this.totalRejectionCount = totalRejectionCount; + } + + public SegmentReplicationRejectionStats(StreamInput in) throws IOException { + // TODO: change to V_2_12_0 on main after backport to 2.x + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.totalRejectionCount = in.readVLong(); + } + } + + public long getTotalRejectionCount() { + return totalRejectionCount; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("segment_replication_backpressure"); + builder.field("total_rejected_requests", totalRejectionCount); + return builder.endObject(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + // TODO: change to V_2_12_0 on main after backport to 2.x + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeVLong(totalRejectionCount); + } + } + + @Override + public String toString() { + return "SegmentReplicationRejectionStats{ totalRejectedRequestCount=" + totalRejectionCount + '}'; + } + +} diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java b/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java index 6d5c00c08caff..f5fc8aa1c1eea 100644 --- a/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java @@ -33,6 +33,14 @@ public SegmentReplicationStatsTracker(IndicesService indicesService) { rejectionCount = ConcurrentCollections.newConcurrentMap(); } + public SegmentReplicationRejectionStats getTotalRejectionStats() { + return new SegmentReplicationRejectionStats(this.rejectionCount.values().stream().mapToInt(AtomicInteger::get).sum()); + } + + protected Map getRejectionCount() { + return rejectionCount; + } + public SegmentReplicationStats getStats() { Map stats = new HashMap<>(); for (IndexService indexService : indicesService) { diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index c9148f382a028..711a90d424ac3 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -136,6 +136,7 @@ import org.opensearch.index.IndexModule; import org.opensearch.index.IndexSettings; import org.opensearch.index.IndexingPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.recovery.RemoteStoreRestoreService; @@ -977,6 +978,7 @@ protected Node( transportService.getTaskManager() ); + final SegmentReplicationStatsTracker segmentReplicationStatsTracker = new SegmentReplicationStatsTracker(indicesService); RepositoriesModule repositoriesModule = new RepositoriesModule( this.environment, pluginsService.filterPlugins(RepositoryPlugin.class), @@ -1116,6 +1118,7 @@ protected Node( fileCache, taskCancellationMonitoringService, resourceUsageCollectorService, + segmentReplicationStatsTracker, repositoryService ); @@ -1246,6 +1249,7 @@ protected Node( b.bind(MetricsRegistry.class).toInstance(metricsRegistry); b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService); b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); + b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); }); injector = modules.createInjector(); diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index e2d7bc2c86ba3..49dde0b81cac7 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -48,6 +48,7 @@ import org.opensearch.discovery.Discovery; import org.opensearch.http.HttpServerTransport; import org.opensearch.index.IndexingPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.store.remote.filecache.FileCache; import org.opensearch.indices.IndicesService; import org.opensearch.ingest.IngestService; @@ -96,6 +97,8 @@ public class NodeService implements Closeable { private final TaskCancellationMonitoringService taskCancellationMonitoringService; private final RepositoriesService repositoriesService; + private final SegmentReplicationStatsTracker segmentReplicationStatsTracker; + NodeService( Settings settings, ThreadPool threadPool, @@ -119,6 +122,7 @@ public class NodeService implements Closeable { FileCache fileCache, TaskCancellationMonitoringService taskCancellationMonitoringService, ResourceUsageCollectorService resourceUsageCollectorService, + SegmentReplicationStatsTracker segmentReplicationStatsTracker, RepositoriesService repositoriesService ) { this.settings = settings; @@ -146,6 +150,7 @@ public class NodeService implements Closeable { this.repositoriesService = repositoriesService; clusterService.addStateApplier(ingestService); clusterService.addStateApplier(searchPipelineService); + this.segmentReplicationStatsTracker = segmentReplicationStatsTracker; } public NodeInfo info( @@ -226,6 +231,7 @@ public NodeStats stats( boolean taskCancellation, boolean searchPipelineStats, boolean resourceUsageStats, + boolean segmentReplicationTrackerStats, boolean repositoriesStats ) { // for indices stats we want to include previous allocated shards stats as well (it will @@ -256,6 +262,7 @@ public NodeStats stats( fileCacheStats && fileCache != null ? fileCache.fileCacheStats() : null, taskCancellation ? this.taskCancellationMonitoringService.stats() : null, searchPipelineStats ? this.searchPipelineService.stats() : null, + segmentReplicationTrackerStats ? this.segmentReplicationStatsTracker.getTotalRejectionStats() : null, repositoriesStats ? this.repositoriesService.getRepositoriesStats() : null ); } diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index 3491f18da9550..ebdd012006fb2 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -49,6 +49,7 @@ import org.opensearch.discovery.DiscoveryStats; import org.opensearch.http.HttpStats; import org.opensearch.index.ReplicationStats; +import org.opensearch.index.SegmentReplicationRejectionStats; import org.opensearch.index.remote.RemoteSegmentStats; import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.translog.RemoteTranslogStats; @@ -417,6 +418,17 @@ public void testSerialization() throws IOException { assertEquals(aResourceUsageStats.getTimestamp(), bResourceUsageStats.getTimestamp()); }); } + SegmentReplicationRejectionStats segmentReplicationRejectionStats = nodeStats.getSegmentReplicationRejectionStats(); + SegmentReplicationRejectionStats deserializedSegmentReplicationRejectionStats = deserializedNodeStats + .getSegmentReplicationRejectionStats(); + if (segmentReplicationRejectionStats == null) { + assertNull(deserializedSegmentReplicationRejectionStats); + } else { + assertEquals( + segmentReplicationRejectionStats.getTotalRejectionCount(), + deserializedSegmentReplicationRejectionStats.getTotalRejectionCount() + ); + } ScriptCacheStats scriptCacheStats = nodeStats.getScriptCacheStats(); ScriptCacheStats deserializedScriptCacheStats = deserializedNodeStats.getScriptCacheStats(); if (scriptCacheStats == null) { @@ -812,6 +824,11 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { } nodesResourceUsageStats = new NodesResourceUsageStats(resourceUsageStatsMap); } + SegmentReplicationRejectionStats segmentReplicationRejectionStats = null; + if (frequently()) { + segmentReplicationRejectionStats = new SegmentReplicationRejectionStats(randomNonNegativeLong()); + } + ClusterManagerThrottlingStats clusterManagerThrottlingStats = null; if (frequently()) { clusterManagerThrottlingStats = new ClusterManagerThrottlingStats(); @@ -853,6 +870,7 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { null, null, null, + segmentReplicationRejectionStats, null ); } diff --git a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java index 6f03e87bf5824..f037b75dc16a3 100644 --- a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java +++ b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java @@ -192,6 +192,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -220,6 +221,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -248,6 +250,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ) ); @@ -307,6 +310,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -335,6 +339,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -363,6 +368,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ) ); diff --git a/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java b/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java index 34fa13f0ba62c..478fdcb24f76a 100644 --- a/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java +++ b/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java @@ -278,6 +278,13 @@ private SegmentReplicationPressureService buildPressureService(Settings settings ClusterService clusterService = mock(ClusterService.class); when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); - return new SegmentReplicationPressureService(settings, clusterService, indicesService, shardStateAction, mock(ThreadPool.class)); + return new SegmentReplicationPressureService( + settings, + clusterService, + indicesService, + shardStateAction, + new SegmentReplicationStatsTracker(indicesService), + mock(ThreadPool.class) + ); } } diff --git a/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java b/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java new file mode 100644 index 0000000000000..04423d583e8f9 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.indices.IndicesService; +import org.opensearch.test.OpenSearchTestCase; + +import org.mockito.Mockito; + +import static org.mockito.Mockito.mock; + +public class SegmentReplicationStatsTrackerTests extends OpenSearchTestCase { + + private IndicesService indicesService = mock(IndicesService.class); + + public void testRejectedCount() { + SegmentReplicationStatsTracker segmentReplicationStatsTracker = new SegmentReplicationStatsTracker(indicesService); + + // Verify that total rejection count is 0 on an empty rejectionCount map in statsTracker. + assertTrue(segmentReplicationStatsTracker.getRejectionCount().isEmpty()); + assertEquals(segmentReplicationStatsTracker.getTotalRejectionStats().getTotalRejectionCount(), 0L); + + // Verify that total rejection count is 1 after incrementing rejectionCount. + segmentReplicationStatsTracker.incrementRejectionCount(Mockito.mock(ShardId.class)); + assertEquals(segmentReplicationStatsTracker.getTotalRejectionStats().getTotalRejectionCount(), 1L); + } + +} diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 710717532ceb4..b7a2baacba611 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -178,6 +178,7 @@ import org.opensearch.gateway.TransportNodesListGatewayStartedShards; import org.opensearch.index.IndexingPressureService; import org.opensearch.index.SegmentReplicationPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.remote.RemoteStorePressureService; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; @@ -2188,6 +2189,7 @@ public void onFailure(final Exception e) { clusterService, mock(IndicesService.class), mock(ShardStateAction.class), + mock(SegmentReplicationStatsTracker.class), mock(ThreadPool.class) ), mock(RemoteStorePressureService.class), diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index 60a54110fd0b4..2ba4de5e54a67 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -122,6 +122,7 @@ List adjustNodesStats(List nodesStats) { nodeStats.getFileCacheStats(), nodeStats.getTaskCancellationStats(), nodeStats.getSearchPipelineStats(), + nodeStats.getSegmentReplicationRejectionStats(), nodeStats.getRepositoriesStats() ); }).collect(Collectors.toList()); diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 898e125b94954..63d8f069bebea 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -2722,6 +2722,7 @@ public void ensureEstimatedStats() { false, false, false, + false, false ); assertThat(