From 0282e64bb25a8485526a1b6bf584ac2b9495f219 Mon Sep 17 00:00:00 2001 From: Daniel Widdis Date: Thu, 18 Apr 2024 23:05:39 -0700 Subject: [PATCH 01/32] Ignore BaseRestHandler unconsumed content check as it's always consumed (#13290) * Ignore BaseRestHandler unconsumed content check as it's always consumed Signed-off-by: Daniel Widdis * Remove comment, continue to ignore content on Force Merge Signed-off-by: Daniel Widdis * Remove no-body test from RestDeletePitActionTests Signed-off-by: Daniel Widdis --------- Signed-off-by: Daniel Widdis --- CHANGELOG.md | 1 + .../org/opensearch/rest/BaseRestHandler.java | 4 - .../forcemerge/RestForceMergeActionTests.java | 22 ------ .../opensearch/rest/BaseRestHandlerTests.java | 79 ------------------- .../search/pit/RestDeletePitActionTests.java | 25 ------ 5 files changed, 1 insertion(+), 130 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5efcfee3d9d9d..6d8af16db72a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Enabled mockTelemetryPlugin for IT and fixed OOM issues ([#13054](https://github.com/opensearch-project/OpenSearch/pull/13054)) - Fix implement mark() and markSupported() in class FilterStreamInput ([#13098](https://github.com/opensearch-project/OpenSearch/pull/13098)) - Fix snapshot _status API to return correct status for partial snapshots ([#12812](https://github.com/opensearch-project/OpenSearch/pull/12812)) +- Ignore BaseRestHandler unconsumed content check as it's always consumed. ([#13290](https://github.com/opensearch-project/OpenSearch/pull/13290)) ### Security diff --git a/server/src/main/java/org/opensearch/rest/BaseRestHandler.java b/server/src/main/java/org/opensearch/rest/BaseRestHandler.java index 3552e32022b2c..fc150405747ec 100644 --- a/server/src/main/java/org/opensearch/rest/BaseRestHandler.java +++ b/server/src/main/java/org/opensearch/rest/BaseRestHandler.java @@ -122,10 +122,6 @@ public final void handleRequest(RestRequest request, RestChannel channel, NodeCl throw new IllegalArgumentException(unrecognized(request, unconsumedParams, candidateParams, "parameter")); } - if (request.hasContent() && request.isContentConsumed() == false) { - throw new IllegalArgumentException("request [" + request.method() + " " + request.path() + "] does not support having a body"); - } - usageCount.increment(); // execute the action action.accept(channel); diff --git a/server/src/test/java/org/opensearch/action/admin/indices/forcemerge/RestForceMergeActionTests.java b/server/src/test/java/org/opensearch/action/admin/indices/forcemerge/RestForceMergeActionTests.java index b09e592922ed9..01d72b78a679e 100644 --- a/server/src/test/java/org/opensearch/action/admin/indices/forcemerge/RestForceMergeActionTests.java +++ b/server/src/test/java/org/opensearch/action/admin/indices/forcemerge/RestForceMergeActionTests.java @@ -32,14 +32,9 @@ package org.opensearch.action.admin.indices.forcemerge; -import org.opensearch.client.node.NodeClient; -import org.opensearch.common.xcontent.json.JsonXContent; -import org.opensearch.core.common.bytes.BytesArray; -import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.rest.RestRequest; import org.opensearch.rest.action.admin.indices.RestForceMergeAction; -import org.opensearch.test.rest.FakeRestChannel; import org.opensearch.test.rest.FakeRestRequest; import org.opensearch.test.rest.RestActionTestCase; import org.junit.Before; @@ -47,9 +42,6 @@ import java.util.HashMap; import java.util.Map; -import static org.hamcrest.Matchers.equalTo; -import static org.mockito.Mockito.mock; - public class RestForceMergeActionTests extends RestActionTestCase { @Before @@ -57,20 +49,6 @@ public void setUpAction() { controller().registerHandler(new RestForceMergeAction()); } - public void testBodyRejection() throws Exception { - final RestForceMergeAction handler = new RestForceMergeAction(); - String json = JsonXContent.contentBuilder().startObject().field("max_num_segments", 1).endObject().toString(); - final FakeRestRequest request = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY).withContent( - new BytesArray(json), - MediaTypeRegistry.JSON - ).withPath("/_forcemerge").build(); - IllegalArgumentException e = expectThrows( - IllegalArgumentException.class, - () -> handler.handleRequest(request, new FakeRestChannel(request, randomBoolean(), 1), mock(NodeClient.class)) - ); - assertThat(e.getMessage(), equalTo("request [GET /_forcemerge] does not support having a body")); - } - public void testDeprecationMessage() { final Map params = new HashMap<>(); params.put("only_expunge_deletes", Boolean.TRUE.toString()); diff --git a/server/src/test/java/org/opensearch/rest/BaseRestHandlerTests.java b/server/src/test/java/org/opensearch/rest/BaseRestHandlerTests.java index ce929e64d8960..45653e9d8e4d6 100644 --- a/server/src/test/java/org/opensearch/rest/BaseRestHandlerTests.java +++ b/server/src/test/java/org/opensearch/rest/BaseRestHandlerTests.java @@ -35,10 +35,6 @@ import org.opensearch.client.node.NodeClient; import org.opensearch.common.Table; import org.opensearch.common.settings.Settings; -import org.opensearch.common.xcontent.json.JsonXContent; -import org.opensearch.core.common.bytes.BytesArray; -import org.opensearch.core.xcontent.MediaTypeRegistry; -import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.rest.RestHandler.ReplacedRoute; import org.opensearch.rest.RestHandler.Route; import org.opensearch.rest.RestRequest.Method; @@ -281,81 +277,6 @@ public String getName() { assertTrue(executed.get()); } - public void testConsumedBody() throws Exception { - final AtomicBoolean executed = new AtomicBoolean(); - final BaseRestHandler handler = new BaseRestHandler() { - @Override - protected RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { - request.content(); - return channel -> executed.set(true); - } - - @Override - public String getName() { - return "test_consumed_body"; - } - }; - - try (XContentBuilder builder = JsonXContent.contentBuilder().startObject().endObject()) { - final RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withContent( - new BytesArray(builder.toString()), - MediaTypeRegistry.JSON - ).build(); - final RestChannel channel = new FakeRestChannel(request, randomBoolean(), 1); - handler.handleRequest(request, channel, mockClient); - assertTrue(executed.get()); - } - } - - public void testUnconsumedNoBody() throws Exception { - final AtomicBoolean executed = new AtomicBoolean(); - final BaseRestHandler handler = new BaseRestHandler() { - @Override - protected RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { - return channel -> executed.set(true); - } - - @Override - public String getName() { - return "test_unconsumed_body"; - } - }; - - final RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).build(); - final RestChannel channel = new FakeRestChannel(request, randomBoolean(), 1); - handler.handleRequest(request, channel, mockClient); - assertTrue(executed.get()); - } - - public void testUnconsumedBody() throws IOException { - final AtomicBoolean executed = new AtomicBoolean(); - final BaseRestHandler handler = new BaseRestHandler() { - @Override - protected RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { - return channel -> executed.set(true); - } - - @Override - public String getName() { - return "test_unconsumed_body"; - } - }; - - try (XContentBuilder builder = JsonXContent.contentBuilder().startObject().endObject()) { - final RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withContent( - new BytesArray(builder.toString()), - MediaTypeRegistry.JSON - ).build(); - final RestChannel channel = new FakeRestChannel(request, randomBoolean(), 1); - final IllegalArgumentException e = expectThrows( - IllegalArgumentException.class, - () -> handler.handleRequest(request, channel, mockClient) - ); - assertThat(e, hasToString(containsString("request [GET /] does not support having a body"))); - assertFalse(executed.get()); - } - } - public void testReplaceRoutesMethod() throws Exception { List routes = Arrays.asList(new Route(Method.GET, "/path/test"), new Route(Method.PUT, "/path2/test")); List replacedRoutes = RestHandler.replaceRoutes(routes, "/prefix", "/deprecatedPrefix"); diff --git a/server/src/test/java/org/opensearch/search/pit/RestDeletePitActionTests.java b/server/src/test/java/org/opensearch/search/pit/RestDeletePitActionTests.java index b60541825e3ed..448ba9e5a8cd7 100644 --- a/server/src/test/java/org/opensearch/search/pit/RestDeletePitActionTests.java +++ b/server/src/test/java/org/opensearch/search/pit/RestDeletePitActionTests.java @@ -82,31 +82,6 @@ public void deletePits(DeletePitRequest request, ActionListener pitCalled = new SetOnce<>(); - try (NodeClient nodeClient = new NoOpNodeClient(this.getTestName()) { - @Override - public void deletePits(DeletePitRequest request, ActionListener listener) { - pitCalled.set(true); - assertThat(request.getPitIds(), hasSize(1)); - assertThat(request.getPitIds().get(0), equalTo("_all")); - } - }) { - RestDeletePitAction action = new RestDeletePitAction(); - RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withContent( - new BytesArray("{\"pit_id\": [\"BODY\"]}"), - MediaTypeRegistry.JSON - ).withPath("/_all").build(); - FakeRestChannel channel = new FakeRestChannel(request, false, 0); - - IllegalArgumentException ex = expectThrows( - IllegalArgumentException.class, - () -> action.handleRequest(request, channel, nodeClient) - ); - assertTrue(ex.getMessage().contains("request [GET /_all] does not support having a body")); - } - } - public void testDeletePitQueryStringParamsShouldThrowException() { SetOnce pitCalled = new SetOnce<>(); try (NodeClient nodeClient = new NoOpNodeClient(this.getTestName()) { From b4692c897ec4b19161417147d29b116671616fe1 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Fri, 19 Apr 2024 11:03:06 -0400 Subject: [PATCH 02/32] Reconsider the breaking changes check policy to detect breaking changes against released versions (#13292) * Reconsider the breaking changes check policy to detect breaking changes against released versions Signed-off-by: Andriy Redko * Add an ability to provide the target version to compare with for japicmp tasks Signed-off-by: Andriy Redko * Add documentation for japicmp tasks Signed-off-by: Andriy Redko --------- Signed-off-by: Andriy Redko --- CHANGELOG.md | 1 + DEVELOPER_GUIDE.md | 15 +++++++++++ server/build.gradle | 65 ++++++++++++++++++++++++++++----------------- 3 files changed, 56 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d8af16db72a7..dadfcbfbd8b05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Improve built-in secure transports support ([#12907](https://github.com/opensearch-project/OpenSearch/pull/12907)) - Update links to documentation in rest-api-spec ([#13043](https://github.com/opensearch-project/OpenSearch/pull/13043)) - Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex ([#13104](https://github.com/opensearch-project/OpenSearch/pull/13104)) +- [BWC and API enforcement] Reconsider the breaking changes check policy to detect breaking changes against released versions ([#13292](https://github.com/opensearch-project/OpenSearch/pull/13292)) ### Deprecated diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md index f0851fc58d444..92ef71b92da7e 100644 --- a/DEVELOPER_GUIDE.md +++ b/DEVELOPER_GUIDE.md @@ -57,6 +57,7 @@ - [Developer API](#developer-api) - [User API](#user-api) - [Experimental Development](#experimental-development) + - [API Compatibility Checks](#api-compatibility-checks) - [Backports](#backports) - [LineLint](#linelint) - [Lucene Snapshots](#lucene-snapshots) @@ -607,6 +608,20 @@ a LTS feature but with additional guard rails and communication mechanisms to si release, or be removed altogether. Any Developer or User APIs implemented along with the experimental feature should be marked with `@ExperimentalApi` (or documented as `@opensearch.experimental`) annotation to signal the implementation is not subject to LTS and does not follow backwards compatibility guidelines. +#### API Compatibility Checks + +The compatibility checks for public APIs are performed using [japicmp](https://siom79.github.io/japicmp/) and are available as separate Gradle tasks (those are run on demand at the moment): + +``` +./gradlew japicmp +``` + +By default, the API compatibility checks are run against the latest released version of the OpenSearch, however the target version to compare to could be provided using system property during the build, fe.: + +``` +./gradlew japicmp -Djapicmp.compare.version=2.14.0-SNAPSHOT +``` + ### Backports The Github workflow in [`backport.yml`](.github/workflows/backport.yml) creates backport PRs automatically when the original PR with an appropriate label `backport ` is merged to main with the backport workflow run successfully on the PR. For example, if a PR on main needs to be backported to `1.x` branch, add a label `backport 1.x` to the PR and make sure the backport workflow runs on the PR along with other checks. Once this PR is merged to main, the workflow will create a backport PR to the `1.x` branch. diff --git a/server/build.gradle b/server/build.gradle index a076a6bee36bf..fc383f940991c 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -173,6 +173,22 @@ tasks.named("testingConventions").configure { } } +// Set to current version by default +def japicmpCompareTarget = System.getProperty("japicmp.compare.version") +if (japicmpCompareTarget == null) { /* use latest released version */ + // Read the list from maven central. + // Fetch the metadata and parse the xml into Version instances, pick the latest one + japicmpCompareTarget = new URL('https://repo1.maven.org/maven2/org/opensearch/opensearch/maven-metadata.xml').openStream().withStream { s -> + new XmlParser().parse(s) + .versioning.versions.version + .collect { it.text() }.findAll { it ==~ /\d+\.\d+\.\d+/ } + .collect { org.opensearch.gradle.Version.fromString(it) } + .toSorted() + .last() + .toString() + } +} + def generateModulesList = tasks.register("generateModulesList") { List modules = project(':modules').subprojects.collect { it.name } File modulesFile = new File(buildDir, 'generated-resources/modules.txt') @@ -380,9 +396,10 @@ tasks.named("sourcesJar").configure { } } -/** Compares the current build against a snapshot build */ +/** Compares the current build against a laltest released version or the version supplied through 'japicmp.compare.version' system property */ tasks.register("japicmp", me.champeau.gradle.japicmp.JapicmpTask) { - oldClasspath.from(files("${buildDir}/snapshot/opensearch-${version}.jar")) + logger.info("Comparing public APIs from ${version} to ${japicmpCompareTarget}") + oldClasspath.from(files("${buildDir}/japicmp-target/opensearch-${japicmpCompareTarget}.jar")) newClasspath.from(tasks.named('jar')) onlyModified = true failOnModification = true @@ -390,50 +407,48 @@ tasks.register("japicmp", me.champeau.gradle.japicmp.JapicmpTask) { annotationIncludes = ['@org.opensearch.common.annotation.PublicApi', '@org.opensearch.common.annotation.DeprecatedApi'] txtOutputFile = layout.buildDirectory.file("reports/java-compatibility/report.txt") htmlOutputFile = layout.buildDirectory.file("reports/java-compatibility/report.html") - dependsOn downloadSnapshot + dependsOn downloadJapicmpCompareTarget } /** If the Java API Comparison task failed, print a hint if the change should be merged from its target branch */ gradle.taskGraph.afterTask { Task task, TaskState state -> if (task.name == 'japicmp' && state.failure != null) { - def sha = getGitShaFromJar("${buildDir}/snapshot/opensearch-${version}.jar") - logger.info("Incompatiable java api from snapshot jar built off of commit ${sha}") - - if (!inHistory(sha)) { - logger.warn('\u001B[33mPlease merge from the target branch and run this task again.\u001B[0m') - } + logger.info("Public APIs changes incompatiable with ${japicmpCompareTarget} target have been detected") } } -/** Downloads latest snapshot from maven repository */ -tasks.register("downloadSnapshot", Copy) { +/** Downloads latest released version from maven repository */ +tasks.register("downloadJapicmpCompareTarget", Copy) { def mavenSnapshotRepoUrl = "https://aws.oss.sonatype.org/content/repositories/snapshots/" def groupId = "org.opensearch" def artifactId = "opensearch" - def repos = project.getRepositories(); - MavenArtifactRepository opensearchRepo = repos.maven(repo -> { - repo.setName("opensearch-snapshots"); - repo.setUrl(mavenSnapshotRepoUrl); - }); - - repos.exclusiveContent(exclusiveRepo -> { - exclusiveRepo.filter(descriptor -> descriptor.includeGroup(groupId)); - exclusiveRepo.forRepositories(opensearchRepo); - }); + // Add repository for snapshot artifacts if japicmp compare target version is snapshot + if (japicmpCompareTarget.endsWith("-SNAPSHOT")) { + def repos = project.getRepositories(); + MavenArtifactRepository opensearchRepo = repos.maven(repo -> { + repo.setName("opensearch-snapshots"); + repo.setUrl(mavenSnapshotRepoUrl); + }); + + repos.exclusiveContent(exclusiveRepo -> { + exclusiveRepo.filter(descriptor -> descriptor.includeGroup(groupId)); + exclusiveRepo.forRepositories(opensearchRepo); + }); + } configurations { - snapshotArtifact { + japicmpCompareTargetArtifact { exclude group: 'org.apache.lucene' } } dependencies { - snapshotArtifact("${groupId}:${artifactId}:${version}:") + japicmpCompareTargetArtifact("${groupId}:${artifactId}:${japicmpCompareTarget}:") } - from configurations.snapshotArtifact - into "$buildDir/snapshot" + from configurations.japicmpCompareTargetArtifact + into "$buildDir/japicmp-target" } /** Check if the sha is in the current history */ From bfbdc9f43ebfcd55c0b2a5a96d9e2a73bd66ff04 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Sun, 21 Apr 2024 12:32:47 -0700 Subject: [PATCH 03/32] Fix the flaky test for derived fields highlighter test (#13313) Signed-off-by: Rishabh Maurya --- .../DerivedFieldFetchAndHighlightTests.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java index 28d97c74d9445..92127da9654aa 100644 --- a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java +++ b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/DerivedFieldFetchAndHighlightTests.java @@ -144,17 +144,17 @@ public void testDerivedFieldFromIndexMapping() throws IOException { // create a fetch context to be used by HighlightPhase processor FetchContext fetchContext = mock(FetchContext.class); - when(fetchContext.mapperService()).thenReturn(mockShardContext.getMapperService()); + when(fetchContext.mapperService()).thenReturn(mapperService); when(fetchContext.getQueryShardContext()).thenReturn(mockShardContext); when(fetchContext.getIndexSettings()).thenReturn(indexService.getIndexSettings()); when(fetchContext.searcher()).thenReturn( new ContextIndexSearcher( - searcher.getIndexReader(), - searcher.getSimilarity(), - searcher.getQueryCache(), - searcher.getQueryCachingPolicy(), + reader, + IndexSearcher.getDefaultSimilarity(), + IndexSearcher.getDefaultQueryCache(), + IndexSearcher.getDefaultQueryCachingPolicy(), true, - searcher.getExecutor(), + null, null ) ); @@ -253,17 +253,17 @@ public void testDerivedFieldFromSearchMapping() throws IOException { // create a fetch context to be used by HighlightPhase processor FetchContext fetchContext = mock(FetchContext.class); - when(fetchContext.mapperService()).thenReturn(mockShardContext.getMapperService()); + when(fetchContext.mapperService()).thenReturn(mapperService); when(fetchContext.getQueryShardContext()).thenReturn(mockShardContext); when(fetchContext.getIndexSettings()).thenReturn(indexService.getIndexSettings()); when(fetchContext.searcher()).thenReturn( new ContextIndexSearcher( - searcher.getIndexReader(), - searcher.getSimilarity(), - searcher.getQueryCache(), - searcher.getQueryCachingPolicy(), + reader, + IndexSearcher.getDefaultSimilarity(), + IndexSearcher.getDefaultQueryCache(), + IndexSearcher.getDefaultQueryCachingPolicy(), true, - searcher.getExecutor(), + null, null ) ); From 98009ed9b83d43377e6b3ffa610db7154ff7d200 Mon Sep 17 00:00:00 2001 From: Srikanth Padakanti Date: Mon, 22 Apr 2024 03:02:30 -0500 Subject: [PATCH 04/32] Improve error messages for `_stats` with closed indices (#13012) * Improve the error messages for _stats with closed indices Signed-off-by: srikanth padakanti * Edit the changelog text Signed-off-by: srikanth padakanti * correct the test cases Signed-off-by: srikanth padakanti * fix to throw appropriate error message Signed-off-by: srikanth padakanti * fix to throw appropriate error message Signed-off-by: srikanth padakanti --------- Signed-off-by: srikanth padakanti Signed-off-by: Srikanth Padakanti --- CHANGELOG.md | 1 + .../index/rankeval/RankEvalRequestIT.java | 2 +- .../metadata/IndexNameExpressionResolver.java | 8 +++++++- .../IndexNameExpressionResolverTests.java | 19 +++++++++++++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dadfcbfbd8b05..0594cfea2c89d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Enabled mockTelemetryPlugin for IT and fixed OOM issues ([#13054](https://github.com/opensearch-project/OpenSearch/pull/13054)) - Fix implement mark() and markSupported() in class FilterStreamInput ([#13098](https://github.com/opensearch-project/OpenSearch/pull/13098)) - Fix snapshot _status API to return correct status for partial snapshots ([#12812](https://github.com/opensearch-project/OpenSearch/pull/12812)) +- Improve the error messages for _stats with closed indices ([#13012](https://github.com/opensearch-project/OpenSearch/pull/13012)) - Ignore BaseRestHandler unconsumed content check as it's always consumed. ([#13290](https://github.com/opensearch-project/OpenSearch/pull/13290)) ### Security diff --git a/modules/rank-eval/src/internalClusterTest/java/org/opensearch/index/rankeval/RankEvalRequestIT.java b/modules/rank-eval/src/internalClusterTest/java/org/opensearch/index/rankeval/RankEvalRequestIT.java index 488c2e33648e7..0e3db9d1c78b3 100644 --- a/modules/rank-eval/src/internalClusterTest/java/org/opensearch/index/rankeval/RankEvalRequestIT.java +++ b/modules/rank-eval/src/internalClusterTest/java/org/opensearch/index/rankeval/RankEvalRequestIT.java @@ -345,7 +345,7 @@ public void testIndicesOptions() { request.indicesOptions(IndicesOptions.fromParameters("closed", null, null, "false", SearchRequest.DEFAULT_INDICES_OPTIONS)); response = client().execute(RankEvalAction.INSTANCE, request).actionGet(); assertEquals(1, response.getFailures().size()); - assertThat(response.getFailures().get("amsterdam_query"), instanceOf(IndexClosedException.class)); + assertThat(response.getFailures().get("amsterdam_query"), instanceOf(IllegalArgumentException.class)); // test allow_no_indices request = new RankEvalRequest(task, new String[] { "bad*" }); diff --git a/server/src/main/java/org/opensearch/cluster/metadata/IndexNameExpressionResolver.java b/server/src/main/java/org/opensearch/cluster/metadata/IndexNameExpressionResolver.java index 9a3b569a7ac3d..24ff83d638d4b 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/IndexNameExpressionResolver.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/IndexNameExpressionResolver.java @@ -380,7 +380,13 @@ private void checkSystemIndexAccess(Context context, Metadata metadata, Set Date: Mon, 22 Apr 2024 18:07:57 +0530 Subject: [PATCH 05/32] [Remote Store] Throw IOException when remote is not in sync (#13282) Signed-off-by: Gaurav Bafna --- .../indices/create/RemoteCloneIndexIT.java | 26 +++++++--- .../RemotePrimaryRelocationIT.java | 51 +++++++++---------- .../opensearch/index/shard/IndexShard.java | 12 +++-- .../opensearch/index/shard/StoreRecovery.java | 15 ------ 4 files changed, 52 insertions(+), 52 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java index f50e8fd0a38cf..98c2a3a1581b8 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java @@ -40,13 +40,17 @@ */ import org.opensearch.Version; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse; import org.opensearch.action.admin.indices.shrink.ResizeType; import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.client.Requests; import org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.opensearch.common.settings.Settings; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; import org.opensearch.test.VersionUtils; @@ -156,7 +160,11 @@ public void testCreateCloneIndexFailure() throws ExecutionException, Interrupted client().admin() .cluster() .prepareUpdateSettings() - .setTransientSettings(Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none")) + .setTransientSettings( + Settings.builder() + .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none") + .put(RecoverySettings.INDICES_INTERNAL_REMOTE_UPLOAD_TIMEOUT.getKey(), "10s") + ) .get(); try { setFailRate(REPOSITORY_NAME, 100); @@ -168,9 +176,14 @@ public void testCreateCloneIndexFailure() throws ExecutionException, Interrupted .setWaitForActiveShards(0) .setSettings(Settings.builder().put("index.number_of_replicas", 0).putNull("index.blocks.write").build()) .get(); - - Thread.sleep(2000); - ensureYellow("target"); + // waiting more than waitForRemoteStoreSync's sleep time of 30 sec to deterministically fail + Thread.sleep(40000); + ensureRed("target"); + ClusterHealthRequest healthRequest = Requests.clusterHealthRequest() + .waitForNoRelocatingShards(true) + .waitForNoInitializingShards(true); + ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet(); + assertEquals(actionGet.getUnassignedShards(), numPrimaryShards); } catch (ExecutionException | InterruptedException e) { throw new RuntimeException(e); @@ -182,11 +195,12 @@ public void testCreateCloneIndexFailure() throws ExecutionException, Interrupted .cluster() .prepareUpdateSettings() .setTransientSettings( - Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), (String) null) + Settings.builder() + .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), (String) null) + .put(RecoverySettings.INDICES_INTERNAL_REMOTE_UPLOAD_TIMEOUT.getKey(), (String) null) ) .get(); } - } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java index b1c429a45a1a1..4a4057def4207 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java @@ -8,9 +8,8 @@ package org.opensearch.remotemigration; -import com.carrotsearch.randomizedtesting.generators.RandomNumbers; - import org.opensearch.action.DocWriteResponse; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesRequest; import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesResponse; @@ -18,11 +17,13 @@ import org.opensearch.action.delete.DeleteResponse; import org.opensearch.action.index.IndexResponse; import org.opensearch.client.Client; +import org.opensearch.client.Requests; import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; import org.opensearch.common.Priority; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.plugins.Plugin; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.hamcrest.OpenSearchAssertions; @@ -162,12 +163,12 @@ public void testMixedModeRelocation_RemoteSeedingFail() throws Exception { String remoteNode = internalCluster().startNode(); internalCluster().validateClusterFormed(); - // assert repo gets registered - GetRepositoriesRequest gr = new GetRepositoriesRequest(new String[] { REPOSITORY_NAME }); - GetRepositoriesResponse getRepositoriesResponse = client.admin().cluster().getRepositories(gr).actionGet(); - assertEquals(1, getRepositoriesResponse.repositories().size()); - setFailRate(REPOSITORY_NAME, 100); + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(RecoverySettings.INDICES_INTERNAL_REMOTE_UPLOAD_TIMEOUT.getKey(), "10s")) + .get(); logger.info("--> relocating from {} to {} ", docRepNode, remoteNode); client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, docRepNode, remoteNode)).execute().actionGet(); @@ -181,29 +182,23 @@ public void testMixedModeRelocation_RemoteSeedingFail() throws Exception { .actionGet(); assertTrue(clusterHealthResponse.getRelocatingShards() == 1); - setFailRate(REPOSITORY_NAME, 0); - Thread.sleep(RandomNumbers.randomIntBetween(random(), 0, 2000)); - clusterHealthResponse = client().admin() - .cluster() - .prepareHealth() - .setTimeout(TimeValue.timeValueSeconds(45)) - .setWaitForEvents(Priority.LANGUID) - .setWaitForNoRelocatingShards(true) - .execute() - .actionGet(); - assertTrue(clusterHealthResponse.getRelocatingShards() == 0); - logger.info("--> remote to remote relocation complete"); + // waiting more than waitForRemoteStoreSync's sleep time of 30 sec to deterministically fail + Thread.sleep(40000); + + ClusterHealthRequest healthRequest = Requests.clusterHealthRequest() + .waitForNoRelocatingShards(true) + .waitForNoInitializingShards(true); + ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet(); + assertEquals(actionGet.getRelocatingShards(), 0); + assertEquals(docRepNode, primaryNodeName("test")); + finished.set(true); indexingThread.join(); - refresh("test"); - OpenSearchAssertions.assertHitCount(client().prepareSearch("test").setTrackTotalHits(true).get(), numAutoGenDocs.get()); - OpenSearchAssertions.assertHitCount( - client().prepareSearch("test") - .setTrackTotalHits(true)// extra paranoia ;) - .setQuery(QueryBuilders.termQuery("auto", true)) - .get(), - numAutoGenDocs.get() - ); + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(RecoverySettings.INDICES_INTERNAL_REMOTE_UPLOAD_TIMEOUT.getKey(), (String) null)) + .get(); } private static Thread getIndexingThread(AtomicBoolean finished, AtomicInteger numAutoGenDocs) { diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 26dbbbcdee7c0..72a67096fcd9e 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -2118,15 +2118,16 @@ public boolean isRemoteSegmentStoreInSync() { return false; } - public void waitForRemoteStoreSync() { + public void waitForRemoteStoreSync() throws IOException { waitForRemoteStoreSync(() -> {}); } /* Blocks the calling thread, waiting for the remote store to get synced till internal Remote Upload Timeout Calls onProgress on seeing an increased file count on remote + Throws IOException if the remote store is not synced within the timeout */ - public void waitForRemoteStoreSync(Runnable onProgress) { + public void waitForRemoteStoreSync(Runnable onProgress) throws IOException { assert indexSettings.isAssignedOnRemoteNode(); RemoteSegmentStoreDirectory directory = getRemoteDirectory(); int segmentUploadeCount = 0; @@ -2138,7 +2139,7 @@ public void waitForRemoteStoreSync(Runnable onProgress) { while (System.nanoTime() - startNanos < getRecoverySettings().internalRemoteUploadTimeout().nanos()) { try { if (isRemoteSegmentStoreInSync()) { - break; + return; } else { if (directory.getSegmentsUploadedToRemoteStore().size() > segmentUploadeCount) { onProgress.run(); @@ -2156,6 +2157,11 @@ public void waitForRemoteStoreSync(Runnable onProgress) { return; } } + throw new IOException( + "Failed to upload to remote segment store within remote upload timeout of " + + getRecoverySettings().internalRemoteUploadTimeout().getMinutes() + + " minutes" + ); } public void preRecovery() { diff --git a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java index f5e342d28fde1..e130f50e105d5 100644 --- a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java +++ b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java @@ -193,13 +193,6 @@ void recoverFromLocalShards( indexShard.getEngine().forceMerge(false, -1, false, false, false, UUIDs.randomBase64UUID()); if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { indexShard.waitForRemoteStoreSync(); - if (indexShard.isRemoteSegmentStoreInSync() == false) { - throw new IndexShardRecoveryException( - indexShard.shardId(), - "failed to upload to remote", - new IOException("Failed to upload to remote segment store") - ); - } } return true; } catch (IOException ex) { @@ -436,10 +429,6 @@ void recoverFromSnapshotAndRemoteStore( indexShard.finalizeRecovery(); if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { indexShard.waitForRemoteStoreSync(); - if (indexShard.isRemoteSegmentStoreInSync() == false) { - listener.onFailure(new IndexShardRestoreFailedException(shardId, "Failed to upload to remote segment store")); - return; - } } indexShard.postRecovery("restore done"); @@ -722,10 +711,6 @@ private void restore( indexShard.finalizeRecovery(); if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { indexShard.waitForRemoteStoreSync(); - if (indexShard.isRemoteSegmentStoreInSync() == false) { - listener.onFailure(new IndexShardRestoreFailedException(shardId, "Failed to upload to remote segment store")); - return; - } } indexShard.postRecovery("restore done"); listener.onResponse(true); From 4e58cef69c00aa255c569bf23dc1ee552f208a64 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Mon, 22 Apr 2024 18:31:11 +0530 Subject: [PATCH 06/32] Ignoring unavailable shards during search request execution with ignore_available parameter (#13298) Signed-off-by: Ankit Jain --- CHANGELOG.md | 1 + .../search/AbstractSearchAsyncAction.java | 6 ++++-- .../AbstractSearchAsyncActionTests.java | 19 +++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0594cfea2c89d..98a696c62cfa5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) - Improve built-in secure transports support ([#12907](https://github.com/opensearch-project/OpenSearch/pull/12907)) - Update links to documentation in rest-api-spec ([#13043](https://github.com/opensearch-project/OpenSearch/pull/13043)) +- Ignoring unavailable shards during search request execution with ignore_available parameter ([#13298](https://github.com/opensearch-project/OpenSearch/pull/13298)) - Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex ([#13104](https://github.com/opensearch-project/OpenSearch/pull/13104)) - [BWC and API enforcement] Reconsider the breaking changes check policy to detect breaking changes against released versions ([#13292](https://github.com/opensearch-project/OpenSearch/pull/13292)) diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index 0520a4a7aecec..9bf4a4b1e18f1 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -425,8 +425,10 @@ public final void executeNextPhase(SearchPhase currentPhase, SearchPhase nextPha currentPhase.getName() ); } - onPhaseFailure(currentPhase, "Partial shards failure (" + discrepancy + " shards unavailable)", null); - return; + if (!request.indicesOptions().ignoreUnavailable()) { + onPhaseFailure(currentPhase, "Partial shards failure (" + discrepancy + " shards unavailable)", null); + return; + } } } if (logger.isTraceEnabled()) { diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index 420289d3ff2e5..7dcbf213d6c9d 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -70,6 +70,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -442,6 +443,24 @@ public void testShardNotAvailableWithDisallowPartialFailures() { assertEquals(0, searchPhaseExecutionException.getSuppressed().length); } + public void testShardNotAvailableWithIgnoreUnavailable() { + SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(false) + .indicesOptions(new IndicesOptions(EnumSet.of(IndicesOptions.Option.IGNORE_UNAVAILABLE), IndicesOptions.WildcardStates.NONE)); + AtomicReference exception = new AtomicReference<>(); + ActionListener listener = ActionListener.wrap(response -> {}, exception::set); + int numShards = randomIntBetween(2, 10); + ArraySearchPhaseResults phaseResults = new ArraySearchPhaseResults<>(numShards); + AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, listener, false, new AtomicLong()); + // skip one to avoid the "all shards failed" failure. + SearchShardIterator skipIterator = new SearchShardIterator(null, null, Collections.emptyList(), null); + skipIterator.resetAndSkip(); + action.skipShard(skipIterator); + + // Validate no exception is thrown + action.executeNextPhase(action, createFetchSearchPhase()); + action.sendSearchResponse(InternalSearchResponse.empty(), phaseResults.results); + } + private static ArraySearchPhaseResults phaseResults( Set contextIds, List> nodeLookups, From 5bc065b191cf6d20b78218ae57af3b389903b4ea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 09:22:44 -0700 Subject: [PATCH 07/32] Bump com.gradle.enterprise from 3.17.1 to 3.17.2 (#13327) * Bump com.gradle.enterprise from 3.17.1 to 3.17.2 Bumps com.gradle.enterprise from 3.17.1 to 3.17.2. --- updated-dependencies: - dependency-name: com.gradle.enterprise dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 2 +- settings.gradle | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 98a696c62cfa5..2bb98c67256e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,7 +34,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.apache.commons:commonscodec` from 1.15 to 1.16.1 ([#12627](https://github.com/opensearch-project/OpenSearch/pull/12627)) - Bump `org.apache.commons:commonslang` from 3.13.0 to 3.14.0 ([#12627](https://github.com/opensearch-project/OpenSearch/pull/12627)) - Bump Apache Tika from 2.6.0 to 2.9.2 ([#12627](https://github.com/opensearch-project/OpenSearch/pull/12627)) -- Bump `com.gradle.enterprise` from 3.16.2 to 3.17.1 ([#13116](https://github.com/opensearch-project/OpenSearch/pull/13116), [#13191](https://github.com/opensearch-project/OpenSearch/pull/13191)) +- Bump `com.gradle.enterprise` from 3.16.2 to 3.17.2 ([#13116](https://github.com/opensearch-project/OpenSearch/pull/13116), [#13191](https://github.com/opensearch-project/OpenSearch/pull/13191), [#13327](https://github.com/opensearch-project/OpenSearch/pull/13327)) - Bump `gradle/wrapper-validation-action` from 2 to 3 ([#13192](https://github.com/opensearch-project/OpenSearch/pull/13192)) - Bump joda from 2.12.2 to 2.12.7 ([#13193](https://github.com/opensearch-project/OpenSearch/pull/13193)) - Bump bouncycastle from 1.77 to 1.78 ([#13243](https://github.com/opensearch-project/OpenSearch/pull/13243)) diff --git a/settings.gradle b/settings.gradle index e7d3dc56b67b9..8e961b9d4179f 100644 --- a/settings.gradle +++ b/settings.gradle @@ -10,7 +10,7 @@ */ plugins { - id "com.gradle.enterprise" version "3.17.1" + id "com.gradle.enterprise" version "3.17.2" } ext.disableBuildCache = hasProperty('DISABLE_BUILD_CACHE') || System.getenv().containsKey('DISABLE_BUILD_CACHE') From cf58fabf203ade0e87b29cb7c7c8f5b133311ec3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 09:25:22 -0700 Subject: [PATCH 08/32] Bump com.google.apis:google-api-services-compute from v1-rev235-1.25.0 to v1-rev20240407-2.0.0 in /plugins/discovery-gce (#13333) * Bump com.google.apis:google-api-services-compute Bumps com.google.apis:google-api-services-compute from v1-rev235-1.25.0 to v1-rev20240407-2.0.0. --- updated-dependencies: - dependency-name: com.google.apis:google-api-services-compute dependency-type: direct:production ... Signed-off-by: dependabot[bot] * Updating SHAs Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + plugins/discovery-gce/build.gradle | 2 +- .../google-api-services-compute-v1-rev20240407-2.0.0.jar.sha1 | 1 + .../google-api-services-compute-v1-rev235-1.25.0.jar.sha1 | 1 - 4 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 plugins/discovery-gce/licenses/google-api-services-compute-v1-rev20240407-2.0.0.jar.sha1 delete mode 100644 plugins/discovery-gce/licenses/google-api-services-compute-v1-rev235-1.25.0.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bb98c67256e0..3e8184cb29a4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump joda from 2.12.2 to 2.12.7 ([#13193](https://github.com/opensearch-project/OpenSearch/pull/13193)) - Bump bouncycastle from 1.77 to 1.78 ([#13243](https://github.com/opensearch-project/OpenSearch/pull/13243)) - Update google dependencies in repository-gcs and discovery-gce ([#13213](https://github.com/opensearch-project/OpenSearch/pull/13213)) +- Bump `com.google.apis:google-api-services-compute` from v1-rev235-1.25.0 to v1-rev20240407-2.0.0 ([#13333](https://github.com/opensearch-project/OpenSearch/pull/13333)) ### Changed - [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) diff --git a/plugins/discovery-gce/build.gradle b/plugins/discovery-gce/build.gradle index 92cdda59d1c99..80aae03bc0332 100644 --- a/plugins/discovery-gce/build.gradle +++ b/plugins/discovery-gce/build.gradle @@ -18,7 +18,7 @@ opensearchplugin { } dependencies { - api "com.google.apis:google-api-services-compute:v1-rev235-1.25.0" + api "com.google.apis:google-api-services-compute:v1-rev20240407-2.0.0" api "com.google.api-client:google-api-client:1.35.2" api "com.google.oauth-client:google-oauth-client:1.35.0" api "com.google.http-client:google-http-client:${versions.google_http_client}" diff --git a/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev20240407-2.0.0.jar.sha1 b/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev20240407-2.0.0.jar.sha1 new file mode 100644 index 0000000000000..834d718641a51 --- /dev/null +++ b/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev20240407-2.0.0.jar.sha1 @@ -0,0 +1 @@ +edf93bc92c9b87fee51aa6c3545b565e58075c05 \ No newline at end of file diff --git a/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev235-1.25.0.jar.sha1 b/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev235-1.25.0.jar.sha1 deleted file mode 100644 index f79af846281de..0000000000000 --- a/plugins/discovery-gce/licenses/google-api-services-compute-v1-rev235-1.25.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -67bf1ac84286b4f9ea996a90f6e91e36dc648aff \ No newline at end of file From c9f1565939c8328c23f235aa547ab55d0085af73 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 11:52:18 -0700 Subject: [PATCH 09/32] Bump commons-cli:commons-cli from 1.6.0 to 1.7.0 in /plugins/repository-hdfs (#13331) * Bump commons-cli:commons-cli in /plugins/repository-hdfs Bumps commons-cli:commons-cli from 1.6.0 to 1.7.0. --- updated-dependencies: - dependency-name: commons-cli:commons-cli dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Updating SHAs Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + plugins/repository-hdfs/build.gradle | 2 +- plugins/repository-hdfs/licenses/commons-cli-1.6.0.jar.sha1 | 1 - plugins/repository-hdfs/licenses/commons-cli-1.7.0.jar.sha1 | 1 + 4 files changed, 3 insertions(+), 2 deletions(-) delete mode 100644 plugins/repository-hdfs/licenses/commons-cli-1.6.0.jar.sha1 create mode 100644 plugins/repository-hdfs/licenses/commons-cli-1.7.0.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e8184cb29a4d..b5db1a9084572 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump bouncycastle from 1.77 to 1.78 ([#13243](https://github.com/opensearch-project/OpenSearch/pull/13243)) - Update google dependencies in repository-gcs and discovery-gce ([#13213](https://github.com/opensearch-project/OpenSearch/pull/13213)) - Bump `com.google.apis:google-api-services-compute` from v1-rev235-1.25.0 to v1-rev20240407-2.0.0 ([#13333](https://github.com/opensearch-project/OpenSearch/pull/13333)) +- Bump `commons-cli:commons-cli` from 1.6.0 to 1.7.0 ([#13331](https://github.com/opensearch-project/OpenSearch/pull/13331)) ### Changed - [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) diff --git a/plugins/repository-hdfs/build.gradle b/plugins/repository-hdfs/build.gradle index cd7175e70e607..e019a878dfcf0 100644 --- a/plugins/repository-hdfs/build.gradle +++ b/plugins/repository-hdfs/build.gradle @@ -70,7 +70,7 @@ dependencies { api 'com.google.code.gson:gson:2.10.1' runtimeOnly "com.google.guava:guava:${versions.guava}" api "commons-logging:commons-logging:${versions.commonslogging}" - api 'commons-cli:commons-cli:1.6.0' + api 'commons-cli:commons-cli:1.7.0' api "commons-codec:commons-codec:${versions.commonscodec}" api 'commons-collections:commons-collections:3.2.2' api "org.apache.commons:commons-compress:${versions.commonscompress}" diff --git a/plugins/repository-hdfs/licenses/commons-cli-1.6.0.jar.sha1 b/plugins/repository-hdfs/licenses/commons-cli-1.6.0.jar.sha1 deleted file mode 100644 index bb94eda6814ea..0000000000000 --- a/plugins/repository-hdfs/licenses/commons-cli-1.6.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -38166a23afb5bd5520f739b87b3be87f7f0fb96d \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/commons-cli-1.7.0.jar.sha1 b/plugins/repository-hdfs/licenses/commons-cli-1.7.0.jar.sha1 new file mode 100644 index 0000000000000..759bc9275d346 --- /dev/null +++ b/plugins/repository-hdfs/licenses/commons-cli-1.7.0.jar.sha1 @@ -0,0 +1 @@ +6504b3f17e8bc5adc6b6c8deecc90144d0154075 \ No newline at end of file From efa06feeabd336e3d95764d072b116250c01d8f5 Mon Sep 17 00:00:00 2001 From: Lakshya Taragi <157457166+ltaragi@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:59:52 +0530 Subject: [PATCH 10/32] Prefer remote replicas for primary promotion during migration (#13136) Signed-off-by: Lakshya Taragi --- .../RemoteDualReplicationIT.java | 135 +++++++++++++++++ .../cluster/routing/RoutingNodes.java | 32 +++- .../remotestore/RemoteStoreNodeService.java | 14 ++ .../allocation/FailedShardsRoutingTests.java | 140 ++++++++++++++++++ 4 files changed, 316 insertions(+), 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java index e316bae5d8ebc..18f07910403d4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java @@ -439,6 +439,141 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { ); } + /* + Scenario: + - Starts 1 docrep backed data node + - Creates an index with 0 replica + - Starts 1 remote backed data node + - Moves primary copy from docrep to remote through _cluster/reroute + - Starts 1 more remote backed data node + - Expands index to 2 replicas, one each on new remote node and docrep node + - Stops remote enabled node hosting the primary + - Ensures remote replica gets promoted to primary + - Ensures doc count is same after failover + - Indexes some more docs to ensure working of failed-over primary + */ + public void testFailoverRemotePrimaryToRemoteReplica() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 1 docrep data node"); + String docrepNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0); + + logger.info("---> Creating index with 0 replica"); + createIndex(FAILOVER_REMOTE_TO_REMOTE, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build()); + ensureGreen(FAILOVER_REMOTE_TO_REMOTE); + initDocRepToRemoteMigration(); + + logger.info("---> Starting 1 remote enabled data node"); + addRemote = true; + String remoteNodeName1 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + assertEquals( + internalCluster().client() + .admin() + .cluster() + .prepareGetRepositories(REPOSITORY_NAME, REPOSITORY_2_NAME) + .get() + .repositories() + .size(), + 2 + ); + + logger.info("---> Starting doc ingestion in parallel thread"); + AsyncIndexingService asyncIndexingService = new AsyncIndexingService(FAILOVER_REMOTE_TO_REMOTE); + asyncIndexingService.startIndexing(); + + logger.info("---> Moving primary copy from docrep node {} to remote enabled node {}", docrepNodeName, remoteNodeName1); + assertAcked( + internalCluster().client() + .admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(FAILOVER_REMOTE_TO_REMOTE, 0, docrepNodeName, remoteNodeName1)) + .get() + ); + ensureGreen(FAILOVER_REMOTE_TO_REMOTE); + assertEquals(primaryNodeName(FAILOVER_REMOTE_TO_REMOTE), remoteNodeName1); + + logger.info("---> Starting 1 more remote enabled data node"); + String remoteNodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + + logger.info("---> Expanding index to 2 replica copies, on docrepNode and remoteNode2"); + assertAcked( + internalCluster().client() + .admin() + .indices() + .prepareUpdateSettings() + .setIndices(FAILOVER_REMOTE_TO_REMOTE) + .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build()) + .get() + ); + ensureGreen(FAILOVER_REMOTE_TO_REMOTE); + + logger.info("---> Stopping indexing thread"); + asyncIndexingService.stopIndexing(); + + refreshAndWaitForReplication(FAILOVER_REMOTE_TO_REMOTE); + Map shardStatsMap = internalCluster().client() + .admin() + .indices() + .prepareStats(FAILOVER_REMOTE_TO_REMOTE) + .setDocs(true) + .get() + .asMap(); + DiscoveryNodes nodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes(); + long initialPrimaryDocCount = 0; + for (ShardRouting shardRouting : shardStatsMap.keySet()) { + if (shardRouting.primary()) { + assertTrue(nodes.get(shardRouting.currentNodeId()).isRemoteStoreNode()); + initialPrimaryDocCount = shardStatsMap.get(shardRouting).getStats().getDocs().getCount(); + } + } + int firstBatch = (int) asyncIndexingService.getIndexedDocs(); + assertReplicaAndPrimaryConsistency(FAILOVER_REMOTE_TO_REMOTE, firstBatch, 0); + + logger.info("---> Stop remote store enabled node hosting the primary"); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName1)); + ensureStableCluster(3); + ensureYellow(FAILOVER_REMOTE_TO_REMOTE); + DiscoveryNodes finalNodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes(); + + waitUntil(() -> { + ClusterState clusterState = client().admin().cluster().prepareState().get().getState(); + String nodeId = clusterState.getRoutingTable().index(FAILOVER_REMOTE_TO_REMOTE).shard(0).primaryShard().currentNodeId(); + if (nodeId == null) { + return false; + } else { + assertEquals(finalNodes.get(nodeId).getName(), remoteNodeName2); + return finalNodes.get(nodeId).isRemoteStoreNode(); + } + }); + + shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_REMOTE).setDocs(true).get().asMap(); + long primaryDocCountAfterFailover = 0; + for (ShardRouting shardRouting : shardStatsMap.keySet()) { + if (shardRouting.primary()) { + assertTrue(finalNodes.get(shardRouting.currentNodeId()).isRemoteStoreNode()); + primaryDocCountAfterFailover = shardStatsMap.get(shardRouting).getStats().getDocs().getCount(); + } + } + assertEquals(initialPrimaryDocCount, primaryDocCountAfterFailover); + + logger.info("---> Index some more docs to ensure that the failed over primary is ingesting new docs"); + int secondBatch = randomIntBetween(1, 10); + logger.info("---> Indexing {} more docs", secondBatch); + indexBulk(FAILOVER_REMOTE_TO_REMOTE, secondBatch); + refreshAndWaitForReplication(FAILOVER_REMOTE_TO_REMOTE); + + shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_REMOTE).setDocs(true).get().asMap(); + assertEquals(2, shardStatsMap.size()); + shardStatsMap.forEach( + (shardRouting, shardStats) -> { assertEquals(firstBatch + secondBatch, shardStats.getStats().getDocs().getCount()); } + ); + } + /* Scenario: - Starts 1 docrep backed data node diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java index 938a603c459c9..ab455f52c4195 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java @@ -67,6 +67,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.isMigratingToRemoteStore; + /** * {@link RoutingNodes} represents a copy the routing information contained in the {@link ClusterState cluster state}. * It can be either initialized as mutable or immutable (see {@link #RoutingNodes(ClusterState, boolean)}), allowing @@ -418,6 +420,20 @@ public ShardRouting activeReplicaWithOldestVersion(ShardId shardId) { .orElse(null); } + /** + * Returns one active replica shard on a remote node for the given shard id or null if + * no such replica is found. + *

+ * Since we aim to continue moving forward during remote store migration, replicas already migrated to remote nodes + * are preferred for primary promotion + */ + public ShardRouting activeReplicaOnRemoteNode(ShardId shardId) { + return assignedShards(shardId).stream().filter(shr -> !shr.primary() && shr.active()).filter((shr) -> { + RoutingNode nd = node(shr.currentNodeId()); + return (nd != null && nd.node().isRemoteStoreNode()); + }).findFirst().orElse(null); + } + /** * Returns true iff all replicas are active for the given shard routing. Otherwise false */ @@ -735,11 +751,17 @@ private void unassignPrimaryAndPromoteActiveReplicaIfExists( RoutingChangesObserver routingChangesObserver ) { assert failedShard.primary(); - ShardRouting activeReplica; - if (metadata.isSegmentReplicationEnabled(failedShard.getIndexName())) { - activeReplica = activeReplicaWithOldestVersion(failedShard.shardId()); - } else { - activeReplica = activeReplicaWithHighestVersion(failedShard.shardId()); + ShardRouting activeReplica = null; + if (isMigratingToRemoteStore(metadata)) { + // we might not find any replica on remote node + activeReplica = activeReplicaOnRemoteNode(failedShard.shardId()); + } + if (activeReplica == null) { + if (metadata.isSegmentReplicationEnabled(failedShard.getIndexName())) { + activeReplica = activeReplicaWithOldestVersion(failedShard.shardId()); + } else { + activeReplica = activeReplicaWithHighestVersion(failedShard.shardId()); + } } if (activeReplica == null) { moveToUnassigned(failedShard, unassignedInfo); diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java index 94b11086ba865..adfb751421db7 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.RepositoriesMetadata; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -238,4 +239,17 @@ public static boolean isMigratingToRemoteStore(ClusterSettings clusterSettings) return (isMixedMode && isRemoteStoreMigrationDirection); } + + /** + * To check if the cluster is undergoing remote store migration using clusterState metadata + * @return + * true For REMOTE_STORE migration direction and MIXED compatibility mode, + * false otherwise + */ + public static boolean isMigratingToRemoteStore(Metadata metadata) { + boolean isMixedMode = REMOTE_STORE_COMPATIBILITY_MODE_SETTING.get(metadata.settings()).equals(CompatibilityMode.MIXED); + boolean isRemoteStoreMigrationDirection = MIGRATION_DIRECTION_SETTING.get(metadata.settings()).equals(Direction.REMOTE_STORE); + + return (isMixedMode && isRemoteStoreMigrationDirection); + } } diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java index db4cedbbbe7b5..04e37e7d958d0 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java @@ -40,6 +40,7 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.RoutingNodes; import org.opensearch.cluster.routing.RoutingTable; @@ -47,13 +48,18 @@ import org.opensearch.cluster.routing.allocation.command.AllocationCommands; import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; import org.opensearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider; +import org.opensearch.common.UUIDs; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.index.shard.ShardId; import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.test.VersionUtils; import java.util.ArrayList; import java.util.HashSet; +import java.util.List; +import java.util.Map; import java.util.Set; import static org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING; @@ -61,6 +67,10 @@ import static org.opensearch.cluster.routing.ShardRoutingState.RELOCATING; import static org.opensearch.cluster.routing.ShardRoutingState.STARTED; import static org.opensearch.cluster.routing.ShardRoutingState.UNASSIGNED; +import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThan; @@ -812,4 +822,134 @@ private void testReplicaIsPromoted(boolean isSegmentReplicationEnabled) { } } } + + public void testPreferReplicaOnRemoteNodeForPrimaryPromotion() { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build()); + AllocationService allocation = createAllocationService(Settings.builder().build()); + + // segment replication enabled + Settings.Builder settingsBuilder = settings(Version.CURRENT).put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT); + + // remote store migration metadata settings + Metadata metadata = Metadata.builder() + .put(IndexMetadata.builder("test").settings(settingsBuilder).numberOfShards(1).numberOfReplicas(4)) + .persistentSettings( + Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.MIXED.mode) + .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE.direction) + .build() + ) + .build(); + + RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build(); + + ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(initialRoutingTable) + .build(); + + ShardId shardId = new ShardId(metadata.index("test").getIndex(), 0); + + // add a remote node and start primary shard + Map remoteStoreNodeAttributes = Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_VALUE" + ); + DiscoveryNode remoteNode1 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(remoteNode1)).build(); + clusterState = ClusterState.builder(clusterState).routingTable(allocation.reroute(clusterState, "reroute").routingTable()).build(); + assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(4)); + + clusterState = startInitializingShardsAndReroute(allocation, clusterState); + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(4)); + + // add remote and non-remote nodes and start replica shards + DiscoveryNode remoteNode2 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + DiscoveryNode remoteNode3 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + DiscoveryNode nonRemoteNode1 = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), Version.CURRENT); + DiscoveryNode nonRemoteNode2 = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), Version.CURRENT); + List replicaShardNodes = List.of(remoteNode2, remoteNode3, nonRemoteNode1, nonRemoteNode2); + + for (int i = 0; i < 4; i++) { + clusterState = ClusterState.builder(clusterState) + .nodes(DiscoveryNodes.builder(clusterState.nodes()).add(replicaShardNodes.get(i))) + .build(); + + clusterState = allocation.reroute(clusterState, "reroute"); + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1 + i)); + assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(4 - (i + 1))); + + clusterState = startInitializingShardsAndReroute(allocation, clusterState); + assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1 + (i + 1))); + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(4 - (i + 1))); + } + + // fail primary shard + ShardRouting primaryShard0 = clusterState.routingTable().index("test").shard(0).primaryShard(); + ClusterState newState = allocation.applyFailedShard(clusterState, primaryShard0, randomBoolean()); + assertNotEquals(clusterState, newState); + clusterState = newState; + + // verify that promoted replica exists on a remote node + assertEquals(4, clusterState.getRoutingNodes().shardsWithState(STARTED).size()); + ShardRouting primaryShardRouting1 = clusterState.routingTable().index("test").shard(0).primaryShard(); + assertNotEquals(primaryShard0, primaryShardRouting1); + assertTrue( + primaryShardRouting1.currentNodeId().equals(remoteNode2.getId()) + || primaryShardRouting1.currentNodeId().equals(remoteNode3.getId()) + ); + + // fail primary shard again + newState = allocation.applyFailedShard(clusterState, primaryShardRouting1, randomBoolean()); + assertNotEquals(clusterState, newState); + clusterState = newState; + + // verify that promoted replica again exists on a remote node + assertEquals(3, clusterState.getRoutingNodes().shardsWithState(STARTED).size()); + ShardRouting primaryShardRouting2 = clusterState.routingTable().index("test").shard(0).primaryShard(); + assertNotEquals(primaryShardRouting1, primaryShardRouting2); + assertTrue( + primaryShardRouting2.currentNodeId().equals(remoteNode2.getId()) + || primaryShardRouting2.currentNodeId().equals(remoteNode3.getId()) + ); + assertNotEquals(primaryShardRouting1.currentNodeId(), primaryShardRouting2.currentNodeId()); + + ShardRouting expectedCandidateForSegRep = clusterState.getRoutingNodes().activeReplicaWithOldestVersion(shardId); + + // fail primary shard again + newState = allocation.applyFailedShard(clusterState, primaryShardRouting2, randomBoolean()); + assertNotEquals(clusterState, newState); + clusterState = newState; + + // verify that promoted replica exists on a non-remote node + assertEquals(2, clusterState.getRoutingNodes().shardsWithState(STARTED).size()); + ShardRouting primaryShardRouting3 = clusterState.routingTable().index("test").shard(0).primaryShard(); + assertNotEquals(primaryShardRouting2, primaryShardRouting3); + assertTrue( + primaryShardRouting3.currentNodeId().equals(nonRemoteNode1.getId()) + || primaryShardRouting3.currentNodeId().equals(nonRemoteNode2.getId()) + ); + assertEquals(expectedCandidateForSegRep.allocationId(), primaryShardRouting3.allocationId()); + } } From 2aad4998af0b20b28fd7dba009fdba4658bb130e Mon Sep 17 00:00:00 2001 From: Lakshya Taragi <157457166+ltaragi@users.noreply.github.com> Date: Tue, 23 Apr 2024 16:07:25 +0530 Subject: [PATCH 11/32] Prevent version upgrade during remote migration (#13185) Signed-off-by: Lakshya Taragi --- .../coordination/JoinTaskExecutor.java | 20 ++++++- .../coordination/JoinTaskExecutorTests.java | 59 +++++++++++++++++++ 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 5d896e392e6bc..5475470b81b93 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -31,6 +31,7 @@ package org.opensearch.cluster.coordination; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.Version; import org.opensearch.cluster.ClusterState; @@ -57,6 +58,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -66,6 +68,7 @@ import static org.opensearch.cluster.decommission.DecommissionHelper.nodeCommissioned; import static org.opensearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.STRICT; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -78,7 +81,7 @@ public class JoinTaskExecutor implements ClusterStateTaskExecutor remoteDN = existingNodes.stream().filter(DiscoveryNode::isRemoteStoreNode).findFirst(); remoteDN.ifPresent(discoveryNode -> ensureRemoteStoreNodesCompatibility(joiningNode, discoveryNode)); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 5eafe63e63fad..3e343e95f6c4b 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -76,6 +76,7 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; +import static org.opensearch.test.VersionUtils.allOpenSearchVersions; import static org.opensearch.test.VersionUtils.allVersions; import static org.opensearch.test.VersionUtils.maxCompatibleVersion; import static org.opensearch.test.VersionUtils.randomCompatibleVersion; @@ -885,6 +886,64 @@ public void testUpdatesClusterStateWithMultiNodeClusterAndSameRepository() throw validateRepositoryMetadata(result.resultingState, clusterManagerNode, 2); } + public void testNodeJoinInMixedMode() { + Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + + List versions = allOpenSearchVersions(); + assert versions.size() >= 2 : "test requires at least two open search versions"; + Version baseVersion = versions.get(versions.size() - 2); + Version higherVersion = versions.get(versions.size() - 1); + + DiscoveryNode currentNode1 = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), baseVersion); + DiscoveryNode currentNode2 = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), baseVersion); + DiscoveryNodes currentNodes = DiscoveryNodes.builder() + .add(currentNode1) + .localNodeId(currentNode1.getId()) + .add(currentNode2) + .localNodeId(currentNode2.getId()) + .build(); + + Settings mixedModeCompatibilitySettings = Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.MIXED) + .build(); + + Metadata metadata = Metadata.builder().persistentSettings(mixedModeCompatibilitySettings).build(); + + // joining node of a higher version than the current nodes + DiscoveryNode joiningNode1 = new DiscoveryNode( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + Collections.singleton(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), + higherVersion + ); + final IllegalStateException exception = expectThrows( + IllegalStateException.class, + () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode1, currentNodes, metadata) + ); + String reason = String.format( + Locale.ROOT, + "remote migration : a node [%s] of higher version [%s] is not allowed to join a cluster with maximum version [%s]", + joiningNode1, + joiningNode1.getVersion(), + currentNodes.getMaxNodeVersion() + ); + assertEquals(reason, exception.getMessage()); + + // joining node of the same version as the current nodes + DiscoveryNode joiningNode2 = new DiscoveryNode( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + Collections.singleton(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), + baseVersion + ); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode2, currentNodes, metadata); + } + private void validateRepositoryMetadata(ClusterState updatedState, DiscoveryNode existingNode, int expectedRepositories) throws Exception { From 41a30551d704a991f27ee9eabe5be8cd55910428 Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Tue, 23 Apr 2024 19:35:15 +0800 Subject: [PATCH 12/32] Rename ingest processor supports overriding target field if exists (#12990) * Rename ingest processor supports overriding target field if exists Signed-off-by: Gao Binlong * Modify change log Signed-off-by: Gao Binlong --------- Signed-off-by: Gao Binlong --- CHANGELOG.md | 1 + .../ingest/common/RenameProcessor.java | 17 +++++-- .../common/DotExpanderProcessorTests.java | 1 + .../common/RenameProcessorFactoryTests.java | 13 +++++ .../ingest/common/RenameProcessorTests.java | 51 +++++++++++-------- .../test/ingest/280_rename_processor.yml | 39 ++++++++++++++ 6 files changed, 96 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b5db1a9084572..e1184d48deb59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Allow setting KEYSTORE_PASSWORD through env variable ([#12865](https://github.com/opensearch-project/OpenSearch/pull/12865)) - [Concurrent Segment Search] Perform buildAggregation concurrently and support Composite Aggregations ([#12697](https://github.com/opensearch-project/OpenSearch/pull/12697)) - [Concurrent Segment Search] Disable concurrent segment search for system indices and throttled requests ([#12954](https://github.com/opensearch-project/OpenSearch/pull/12954)) +- Rename ingest processor supports overriding target field if exists ([#12990](https://github.com/opensearch-project/OpenSearch/pull/12990)) - [Tiered Caching] Make took time caching policy setting dynamic ([#13063](https://github.com/opensearch-project/OpenSearch/pull/13063)) - Derived fields support to derive field values at query time without indexing ([#12569](https://github.com/opensearch-project/OpenSearch/pull/12569)) - Detect breaking changes on pull requests ([#9044](https://github.com/opensearch-project/OpenSearch/pull/9044)) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RenameProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RenameProcessor.java index 7564bbdf95f45..6ec3ebb6ace81 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RenameProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RenameProcessor.java @@ -52,18 +52,21 @@ public final class RenameProcessor extends AbstractProcessor { private final TemplateScript.Factory field; private final TemplateScript.Factory targetField; private final boolean ignoreMissing; + private final boolean overrideTarget; RenameProcessor( String tag, String description, TemplateScript.Factory field, TemplateScript.Factory targetField, - boolean ignoreMissing + boolean ignoreMissing, + boolean overrideTarget ) { super(tag, description); this.field = field; this.targetField = targetField; this.ignoreMissing = ignoreMissing; + this.overrideTarget = overrideTarget; } TemplateScript.Factory getField() { @@ -78,6 +81,10 @@ boolean isIgnoreMissing() { return ignoreMissing; } + boolean isOverrideTarget() { + return overrideTarget; + } + @Override public IngestDocument execute(IngestDocument document) { String path = document.renderTemplate(field); @@ -94,9 +101,10 @@ public IngestDocument execute(IngestDocument document) { // We fail here if the target field point to an array slot that is out of range. // If we didn't do this then we would fail if we set the value in the target_field // and then on failure processors would not see that value we tried to rename as we already - // removed it. + // removed it. If the target field is out of range, we throw the exception no matter + // what the parameter overrideTarget is. String target = document.renderTemplate(targetField); - if (document.hasField(target, true)) { + if (document.hasField(target, true) && !overrideTarget) { throw new IllegalArgumentException("field [" + target + "] already exists"); } @@ -143,7 +151,8 @@ public RenameProcessor create( scriptService ); boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); - return new RenameProcessor(processorTag, description, fieldTemplate, targetFieldTemplate, ignoreMissing); + boolean overrideTarget = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "override_target", false); + return new RenameProcessor(processorTag, description, fieldTemplate, targetFieldTemplate, ignoreMissing, overrideTarget); } } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DotExpanderProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DotExpanderProcessorTests.java index cd912269a593d..73719b24c74ea 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DotExpanderProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DotExpanderProcessorTests.java @@ -105,6 +105,7 @@ public void testEscapeFields_valueField() throws Exception { null, new TestTemplateService.MockTemplateScript.Factory("foo"), new TestTemplateService.MockTemplateScript.Factory("foo.bar"), + false, false ); processor.execute(document); diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorFactoryTests.java index ec43be97689ee..8ce9203db43ce 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorFactoryTests.java @@ -77,6 +77,19 @@ public void testCreateWithIgnoreMissing() throws Exception { assertThat(renameProcessor.isIgnoreMissing(), equalTo(true)); } + public void testCreateWithOverrideTarget() throws Exception { + Map config = new HashMap<>(); + config.put("field", "old_field"); + config.put("target_field", "new_field"); + config.put("override_target", true); + String processorTag = randomAlphaOfLength(10); + RenameProcessor renameProcessor = factory.create(null, processorTag, null, config); + assertThat(renameProcessor.getTag(), equalTo(processorTag)); + assertThat(renameProcessor.getField().newInstance(Collections.emptyMap()).execute(), equalTo("old_field")); + assertThat(renameProcessor.getTargetField().newInstance(Collections.emptyMap()).execute(), equalTo("new_field")); + assertThat(renameProcessor.isOverrideTarget(), equalTo(true)); + } + public void testCreateNoFieldPresent() throws Exception { Map config = new HashMap<>(); config.put("target_field", "new_field"); diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorTests.java index a600464371af8..ad5b46e924278 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RenameProcessorTests.java @@ -59,7 +59,7 @@ public void testRename() throws Exception { do { newFieldName = RandomDocumentPicks.randomFieldName(random()); } while (RandomDocumentPicks.canAddField(newFieldName, ingestDocument) == false || newFieldName.equals(fieldName)); - Processor processor = createRenameProcessor(fieldName, newFieldName, false); + Processor processor = createRenameProcessor(fieldName, newFieldName, false, false); processor.execute(ingestDocument); assertThat(ingestDocument.getFieldValue(newFieldName, Object.class), equalTo(fieldValue)); } @@ -77,7 +77,7 @@ public void testRenameArrayElement() throws Exception { document.put("one", one); IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); - Processor processor = createRenameProcessor("list.0", "item", false); + Processor processor = createRenameProcessor("list.0", "item", false, false); processor.execute(ingestDocument); Object actualObject = ingestDocument.getSourceAndMetadata().get("list"); assertThat(actualObject, instanceOf(List.class)); @@ -90,7 +90,7 @@ public void testRenameArrayElement() throws Exception { assertThat(actualObject, instanceOf(String.class)); assertThat(actualObject, equalTo("item1")); - processor = createRenameProcessor("list.0", "list.3", false); + processor = createRenameProcessor("list.0", "list.3", false, randomBoolean()); try { processor.execute(ingestDocument); fail("processor execute should have failed"); @@ -105,7 +105,7 @@ public void testRenameArrayElement() throws Exception { public void testRenameNonExistingField() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); String fieldName = RandomDocumentPicks.randomFieldName(random()); - Processor processor = createRenameProcessor(fieldName, RandomDocumentPicks.randomFieldName(random()), false); + Processor processor = createRenameProcessor(fieldName, RandomDocumentPicks.randomFieldName(random()), false, false); try { processor.execute(ingestDocument); fail("processor execute should have failed"); @@ -114,7 +114,7 @@ public void testRenameNonExistingField() throws Exception { } // when using template snippet, the resolved field path maybe empty - processor = createRenameProcessor("", RandomDocumentPicks.randomFieldName(random()), false); + processor = createRenameProcessor("", RandomDocumentPicks.randomFieldName(random()), false, false); try { processor.execute(ingestDocument); fail("processor execute should have failed"); @@ -127,30 +127,36 @@ public void testRenameNonExistingFieldWithIgnoreMissing() throws Exception { IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); String fieldName = RandomDocumentPicks.randomFieldName(random()); - Processor processor = createRenameProcessor(fieldName, RandomDocumentPicks.randomFieldName(random()), true); + Processor processor = createRenameProcessor(fieldName, RandomDocumentPicks.randomFieldName(random()), true, false); processor.execute(ingestDocument); assertIngestDocument(originalIngestDocument, ingestDocument); // when using template snippet, the resolved field path maybe empty - processor = createRenameProcessor("", RandomDocumentPicks.randomFieldName(random()), true); + processor = createRenameProcessor("", RandomDocumentPicks.randomFieldName(random()), true, false); processor.execute(ingestDocument); assertIngestDocument(originalIngestDocument, ingestDocument); } public void testRenameNewFieldAlreadyExists() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); - String fieldName = RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument); - Processor processor = createRenameProcessor( - RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument), - fieldName, - false - ); + String field = RandomDocumentPicks.randomExistingFieldName(random(), ingestDocument); + Object fieldValue = ingestDocument.getFieldValue(field, Object.class); + String targetField = RandomDocumentPicks.addRandomField(random(), ingestDocument, RandomDocumentPicks.randomFieldValue(random())); + + Processor processor = createRenameProcessor(field, targetField, false, false); try { processor.execute(ingestDocument); fail("processor execute should have failed"); } catch (IllegalArgumentException e) { - assertThat(e.getMessage(), equalTo("field [" + fieldName + "] already exists")); + assertThat(e.getMessage(), equalTo("field [" + targetField + "] already exists")); } + + Processor processorWithOverrideTarget = createRenameProcessor(field, targetField, false, true); + + processorWithOverrideTarget.execute(ingestDocument); + assertThat(ingestDocument.hasField(field), equalTo(false)); + assertThat(ingestDocument.hasField(targetField), equalTo(true)); + assertThat(ingestDocument.getFieldValue(targetField, Object.class), equalTo(fieldValue)); } public void testRenameExistingFieldNullValue() throws Exception { @@ -158,7 +164,7 @@ public void testRenameExistingFieldNullValue() throws Exception { String fieldName = RandomDocumentPicks.randomFieldName(random()); ingestDocument.setFieldValue(fieldName, null); String newFieldName = randomValueOtherThanMany(ingestDocument::hasField, () -> RandomDocumentPicks.randomFieldName(random())); - Processor processor = createRenameProcessor(fieldName, newFieldName, false); + Processor processor = createRenameProcessor(fieldName, newFieldName, false, false); processor.execute(ingestDocument); if (newFieldName.startsWith(fieldName + '.')) { assertThat(ingestDocument.getFieldValue(fieldName, Object.class), instanceOf(Map.class)); @@ -182,7 +188,7 @@ public Object put(String key, Object value) { source.put("list", Collections.singletonList("item")); IngestDocument ingestDocument = new IngestDocument(source, Collections.emptyMap()); - Processor processor = createRenameProcessor("list", "new_field", false); + Processor processor = createRenameProcessor("list", "new_field", false, false); try { processor.execute(ingestDocument); fail("processor execute should have failed"); @@ -206,7 +212,7 @@ public Object remove(Object key) { source.put("list", Collections.singletonList("item")); IngestDocument ingestDocument = new IngestDocument(source, Collections.emptyMap()); - Processor processor = createRenameProcessor("list", "new_field", false); + Processor processor = createRenameProcessor("list", "new_field", false, false); try { processor.execute(ingestDocument); fail("processor execute should have failed"); @@ -221,12 +227,12 @@ public void testRenameLeafIntoBranch() throws Exception { Map source = new HashMap<>(); source.put("foo", "bar"); IngestDocument ingestDocument = new IngestDocument(source, Collections.emptyMap()); - Processor processor1 = createRenameProcessor("foo", "foo.bar", false); + Processor processor1 = createRenameProcessor("foo", "foo.bar", false, false); processor1.execute(ingestDocument); assertThat(ingestDocument.getFieldValue("foo", Map.class), equalTo(Collections.singletonMap("bar", "bar"))); assertThat(ingestDocument.getFieldValue("foo.bar", String.class), equalTo("bar")); - Processor processor2 = createRenameProcessor("foo.bar", "foo.bar.baz", false); + Processor processor2 = createRenameProcessor("foo.bar", "foo.bar.baz", false, false); processor2.execute(ingestDocument); assertThat( ingestDocument.getFieldValue("foo", Map.class), @@ -236,18 +242,19 @@ public void testRenameLeafIntoBranch() throws Exception { assertThat(ingestDocument.getFieldValue("foo.bar.baz", String.class), equalTo("bar")); // for fun lets try to restore it (which don't allow today) - Processor processor3 = createRenameProcessor("foo.bar.baz", "foo", false); + Processor processor3 = createRenameProcessor("foo.bar.baz", "foo", false, false); Exception e = expectThrows(IllegalArgumentException.class, () -> processor3.execute(ingestDocument)); assertThat(e.getMessage(), equalTo("field [foo] already exists")); } - private RenameProcessor createRenameProcessor(String field, String targetField, boolean ignoreMissing) { + private RenameProcessor createRenameProcessor(String field, String targetField, boolean ignoreMissing, boolean overrideTarget) { return new RenameProcessor( randomAlphaOfLength(10), null, new TestTemplateService.MockTemplateScript.Factory(field), new TestTemplateService.MockTemplateScript.Factory(targetField), - ignoreMissing + ignoreMissing, + overrideTarget ); } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/280_rename_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/280_rename_processor.yml index 96b2256bcc1dc..0ef658896ff0a 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/280_rename_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/280_rename_processor.yml @@ -64,3 +64,42 @@ teardown: index: test id: 1 - match: { _source.message: "foo bar baz" } + +--- +"Test rename processor with override_target": + - skip: + version: " - 2.13.99" + reason: "introduced in 2.14.0" + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "rename" : { + "field" : "foo", + "target_field" : "bar", + "override_target" : true + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: { + foo: "foo", + bar: "bar" + } + + - do: + get: + index: test + id: 1 + - match: { _source: { "bar": "foo" } } From ae49fd29ee81d8ba64baa0a38a58bb639a446205 Mon Sep 17 00:00:00 2001 From: Sachin Kale Date: Tue, 23 Apr 2024 17:42:52 +0530 Subject: [PATCH 13/32] Handle OpenSearchRejectedExecutionException while retrying refresh (#13301) Signed-off-by: Sachin Kale --- .../ReleasableRetryableRefreshListener.java | 7 ++++ ...leasableRetryableRefreshListenerTests.java | 42 +++++++++++++++++-- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/shard/ReleasableRetryableRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/ReleasableRetryableRefreshListener.java index 757275932c5f1..80daefc4482fc 100644 --- a/server/src/main/java/org/opensearch/index/shard/ReleasableRetryableRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/ReleasableRetryableRefreshListener.java @@ -13,6 +13,7 @@ import org.opensearch.common.lease.Releasable; import org.opensearch.common.lease.Releasables; import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; @@ -129,6 +130,12 @@ private void scheduleRetry(TimeValue interval, String retryThreadPoolName, boole ); scheduled = true; getLogger().info("Scheduled retry with didRefresh={}", didRefresh); + } catch (OpenSearchRejectedExecutionException e) { + if (e.isExecutorShutdown()) { + getLogger().info("Scheduling retry with didRefresh={} failed due to executor shut down", didRefresh); + } else { + throw e; + } } finally { if (scheduled == false) { retryScheduled.set(false); diff --git a/server/src/test/java/org/opensearch/index/shard/ReleasableRetryableRefreshListenerTests.java b/server/src/test/java/org/opensearch/index/shard/ReleasableRetryableRefreshListenerTests.java index a0641c365a2a1..e0ad09efac367 100644 --- a/server/src/test/java/org/opensearch/index/shard/ReleasableRetryableRefreshListenerTests.java +++ b/server/src/test/java/org/opensearch/index/shard/ReleasableRetryableRefreshListenerTests.java @@ -316,7 +316,32 @@ protected Logger getLogger() { public void testScheduleRetryAfterClose() throws Exception { // This tests that once the listener has been closed, even the retries would not be scheduled. final AtomicLong runCount = new AtomicLong(); - ReleasableRetryableRefreshListener testRefreshListener = new ReleasableRetryableRefreshListener(threadPool) { + ReleasableRetryableRefreshListener testRefreshListener = getRetryableRefreshListener(runCount); + Thread thread1 = new Thread(() -> { + try { + testRefreshListener.afterRefresh(true); + } catch (IOException e) { + throw new AssertionError(e); + } + }); + Thread thread2 = new Thread(() -> { + try { + Thread.sleep(500); + testRefreshListener.drainRefreshes(); + } catch (InterruptedException e) { + throw new AssertionError(e); + } + }); + thread1.start(); + thread2.start(); + thread1.join(); + thread2.join(); + assertBusy(() -> assertEquals(1, runCount.get())); + assertRefreshListenerClosed(testRefreshListener); + } + + private ReleasableRetryableRefreshListener getRetryableRefreshListener(AtomicLong runCount) { + return new ReleasableRetryableRefreshListener(threadPool) { @Override protected boolean performAfterRefreshWithPermit(boolean didRefresh) { try { @@ -341,6 +366,11 @@ protected String getRetryThreadPoolName() { return ThreadPool.Names.REMOTE_REFRESH_RETRY; } + @Override + protected boolean isRetryEnabled() { + return true; + } + @Override protected TimeValue getNextRetryInterval() { try { @@ -351,6 +381,12 @@ protected TimeValue getNextRetryInterval() { return TimeValue.timeValueMillis(100); } }; + } + + public void testScheduleRetryAfterThreadpoolShutdown() throws Exception { + // This tests that once the thread-pool is shut down, the exception is handled. + final AtomicLong runCount = new AtomicLong(); + ReleasableRetryableRefreshListener testRefreshListener = getRetryableRefreshListener(runCount); Thread thread1 = new Thread(() -> { try { testRefreshListener.afterRefresh(true); @@ -361,7 +397,7 @@ protected TimeValue getNextRetryInterval() { Thread thread2 = new Thread(() -> { try { Thread.sleep(500); - testRefreshListener.drainRefreshes(); + threadPool.shutdown(); } catch (InterruptedException e) { throw new AssertionError(e); } @@ -371,7 +407,7 @@ protected TimeValue getNextRetryInterval() { thread1.join(); thread2.join(); assertBusy(() -> assertEquals(1, runCount.get())); - assertRefreshListenerClosed(testRefreshListener); + assertFalse(testRefreshListener.getRetryScheduledStatus()); } public void testConcurrentScheduleRetry() throws Exception { From f41db2c8f78ae2365f428e692770be433a54de8a Mon Sep 17 00:00:00 2001 From: Sai Medhini Reddy Maryada <117196660+saimedhi@users.noreply.github.com> Date: Tue, 23 Apr 2024 07:18:47 -0700 Subject: [PATCH 14/32] Fixed skip version for Search Pipeline integration tests (#13344) Signed-off-by: saimedhi --- .../test/search_pipeline/10_basic.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml index 60c0706415bc2..a4877975a0052 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search_pipeline/10_basic.yml @@ -1,8 +1,8 @@ --- "Test basic pipeline crud": - skip: - version: " - 2.6.99" - reason: "Added in 2.7.0" + version: " - 2.8.99" + reason: "Added in 2.9.0" - do: search_pipeline.put: id: "my_pipeline" @@ -32,8 +32,8 @@ --- "Test Put Versioned Pipeline": - skip: - version: " - 2.6.99" - reason: "Added in 2.7.0" + version: " - 2.8.99" + reason: "Added in 2.9.0" - do: search_pipeline.put: id: "my_pipeline" @@ -125,8 +125,8 @@ --- "Test Get All Pipelines": - skip: - version: " - 2.6.99" - reason: "Added in 2.7.0" + version: " - 2.8.99" + reason: "Added in 2.9.0" - do: search_pipeline.put: id: "first_pipeline" @@ -152,8 +152,8 @@ --- "Test invalid config": - skip: - version: " - 2.6.99" - reason: "Added in 2.7.0" + version: " - 2.8.99" + reason: "Added in 2.9.0" - do: catch: /parse_exception/ search_pipeline.put: From ab7e914c3956ac206cf269ca345574882770a283 Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Tue, 23 Apr 2024 22:33:28 +0800 Subject: [PATCH 15/32] Fix from and size parameter can be negative when searching (#13047) * Fix from and size parameter can be negative when searching Signed-off-by: Gao Binlong * Add changelog Signed-off-by: Gao Binlong * fix test failure Signed-off-by: Gao Binlong * Fix test failure Signed-off-by: Gao Binlong * Optimize the code Signed-off-by: Gao Binlong --------- Signed-off-by: Gao Binlong --- CHANGELOG.md | 1 + .../test/search/360_from_and_size.yml | 113 ++++++++++++++++++ .../action/search/SearchRequest.java | 2 +- .../rest/action/search/RestSearchAction.java | 12 +- .../search/builder/SearchSourceBuilder.java | 6 +- .../builder/SearchSourceBuilderTests.java | 38 +++++- 6 files changed, 161 insertions(+), 11 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/360_from_and_size.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index e1184d48deb59..c0e966c49506c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix issue with feature flags where default value may not be honored ([#12849](https://github.com/opensearch-project/OpenSearch/pull/12849)) - Fix UOE While building Exists query for nested search_as_you_type field ([#12048](https://github.com/opensearch-project/OpenSearch/pull/12048)) - Client with Java 8 runtime and Apache HttpClient 5 Transport fails with java.lang.NoSuchMethodError: java.nio.ByteBuffer.flip()Ljava/nio/ByteBuffer ([#13100](https://github.com/opensearch-project/opensearch-java/pull/13100)) +- Fix from and size parameter can be negative when searching ([#13047](https://github.com/opensearch-project/OpenSearch/pull/13047)) - Enabled mockTelemetryPlugin for IT and fixed OOM issues ([#13054](https://github.com/opensearch-project/OpenSearch/pull/13054)) - Fix implement mark() and markSupported() in class FilterStreamInput ([#13098](https://github.com/opensearch-project/OpenSearch/pull/13098)) - Fix snapshot _status API to return correct status for partial snapshots ([#12812](https://github.com/opensearch-project/OpenSearch/pull/12812)) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/360_from_and_size.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/360_from_and_size.yml new file mode 100644 index 0000000000000..95bcb9e5326cb --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/360_from_and_size.yml @@ -0,0 +1,113 @@ +setup: + - do: + indices.create: + index: test_1 + - do: + index: + index: test_1 + id: 1 + body: { foo: bar } + - do: + index: + index: test_1 + id: 2 + body: { foo: bar } + - do: + index: + index: test_1 + id: 3 + body: { foo: bar } + + - do: + index: + index: test_1 + id: 4 + body: { foo: bar } + - do: + indices.refresh: + index: [test_1] + +--- +teardown: + - do: + indices.delete: + index: test_1 + ignore: 404 + +--- +"Throws exception if from or size query parameter is negative": + - skip: + version: " - 2.99.99" + reason: "fixed in 3.0.0" + - do: + catch: '/\[from\] parameter cannot be negative, found \[-5\]/' + search: + index: test_1 + from: -5 + size: 10 + body: + query: + match: + foo: bar + + - do: + catch: '/\[size\] parameter cannot be negative, found \[-1\]/' + search: + index: test_1 + from: 0 + size: -1 + body: + query: + match: + foo: bar + + - do: + search: + index: test_1 + from: 0 + size: 10 + body: + query: + match: + foo: bar + + - match: {hits.total.value: 4} + +--- +"Throws exception if from or size request body parameter is negative": + - skip: + version: " - 2.99.99" + reason: "fixed in 3.0.0" + - do: + catch: '/\[from\] parameter cannot be negative, found \[-5\]/' + search: + index: test_1 + body: + from: -5 + size: 10 + query: + match: + foo: bar + + - do: + catch: '/\[size\] parameter cannot be negative, found \[-1\]/' + search: + index: test_1 + body: + from: 0 + size: -1 + query: + match: + foo: bar + + - do: + search: + index: test_1 + body: + from: 0 + size: 10 + query: + match: + foo: bar + + - match: {hits.total.value: 4} diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequest.java b/server/src/main/java/org/opensearch/action/search/SearchRequest.java index 3b8a6937815aa..f3d9f77e2394c 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequest.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequest.java @@ -497,7 +497,7 @@ public PointInTimeBuilder pointInTimeBuilder() { } /** - * The tye of search to execute. + * The type of search to execute. */ public SearchType searchType() { return searchType; diff --git a/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java b/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java index 80dc34c4d5d68..3a6b45013e892 100644 --- a/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java +++ b/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java @@ -50,6 +50,7 @@ import org.opensearch.rest.action.RestCancellableNodeClient; import org.opensearch.rest.action.RestStatusToXContentListener; import org.opensearch.search.Scroll; +import org.opensearch.search.SearchService; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.fetch.StoredFieldsContext; import org.opensearch.search.fetch.subphase.FetchSourceContext; @@ -235,13 +236,12 @@ private static void parseSearchSource(final SearchSourceBuilder searchSourceBuil searchSourceBuilder.query(queryBuilder); } - int from = request.paramAsInt("from", -1); - if (from != -1) { - searchSourceBuilder.from(from); + if (request.hasParam("from")) { + searchSourceBuilder.from(request.paramAsInt("from", SearchService.DEFAULT_FROM)); } - int size = request.paramAsInt("size", -1); - if (size != -1) { - setSize.accept(size); + + if (request.hasParam("size")) { + setSize.accept(request.paramAsInt("size", SearchService.DEFAULT_SIZE)); } if (request.hasParam("explain")) { diff --git a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java index 182350c22f697..07248a0719c3a 100644 --- a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java +++ b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java @@ -418,7 +418,7 @@ public QueryBuilder postFilter() { */ public SearchSourceBuilder from(int from) { if (from < 0) { - throw new IllegalArgumentException("[from] parameter cannot be negative"); + throw new IllegalArgumentException("[from] parameter cannot be negative, found [" + from + "]"); } this.from = from; return this; @@ -1215,9 +1215,9 @@ public void parseXContent(XContentParser parser, boolean checkTrailingTokens) th currentFieldName = parser.currentName(); } else if (token.isValue()) { if (FROM_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { - from = parser.intValue(); + from(parser.intValue()); } else if (SIZE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { - size = parser.intValue(); + size(parser.intValue()); } else if (TIMEOUT_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { timeout = TimeValue.parseTimeValue(parser.text(), null, TIMEOUT_FIELD.getPreferredName()); } else if (TERMINATE_AFTER_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { diff --git a/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java b/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java index eea7b1829e9b0..5b1035e24185d 100644 --- a/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java +++ b/server/src/test/java/org/opensearch/search/builder/SearchSourceBuilderTests.java @@ -571,7 +571,7 @@ public void testParseIndicesBoost() throws IOException { public void testNegativeFromErrors() { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> new SearchSourceBuilder().from(-2)); - assertEquals("[from] parameter cannot be negative", expected.getMessage()); + assertEquals("[from] parameter cannot be negative, found [-2]", expected.getMessage()); } public void testNegativeSizeErrors() { @@ -582,6 +582,42 @@ public void testNegativeSizeErrors() { assertEquals("[size] parameter cannot be negative, found [-1]", expected.getMessage()); } + public void testParseFromAndSize() throws IOException { + int negativeFrom = randomIntBetween(-100, -1); + String restContent = " { \"from\": \"" + negativeFrom + "\"}"; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { + IllegalArgumentException expected = expectThrows( + IllegalArgumentException.class, + () -> SearchSourceBuilder.fromXContent(parser) + ); + assertEquals("[from] parameter cannot be negative, found [" + negativeFrom + "]", expected.getMessage()); + } + + int validFrom = randomIntBetween(0, 100); + restContent = " { \"from\": \"" + validFrom + "\"}"; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { + SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser); + assertEquals(validFrom, searchSourceBuilder.from()); + } + + int negativeSize = randomIntBetween(-100, -1); + restContent = " { \"size\": \"" + negativeSize + "\"}"; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { + IllegalArgumentException expected = expectThrows( + IllegalArgumentException.class, + () -> SearchSourceBuilder.fromXContent(parser) + ); + assertEquals("[size] parameter cannot be negative, found [" + negativeSize + "]", expected.getMessage()); + } + + int validSize = randomIntBetween(0, 100); + restContent = " { \"size\": \"" + validSize + "\"}"; + try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { + SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser); + assertEquals(validSize, searchSourceBuilder.size()); + } + } + private void assertIndicesBoostParseErrorMessage(String restContent, String expectedErrorMessage) throws IOException { try (XContentParser parser = createParser(JsonXContent.jsonXContent, restContent)) { ParsingException e = expectThrows(ParsingException.class, () -> SearchSourceBuilder.fromXContent(parser)); From 97e319109b2159e349269e9188aeb94c72165685 Mon Sep 17 00:00:00 2001 From: Craig Perkins Date: Tue, 23 Apr 2024 10:50:15 -0400 Subject: [PATCH 16/32] Bump com.github.spullara.mustache.java:compiler from 0.9.10 to 0.9.11 (#13329) * Bump com.github.spullara.mustache.java:compiler from 0.9.10 to 0.9.11 Signed-off-by: Craig Perkins * Add to CHANGELOG Signed-off-by: Craig Perkins * Fix error in yamlRestTest Signed-off-by: Craig Perkins --------- Signed-off-by: Craig Perkins --- CHANGELOG.md | 1 + modules/lang-mustache/build.gradle | 2 +- modules/lang-mustache/licenses/compiler-0.9.10.jar.sha1 | 1 - modules/lang-mustache/licenses/compiler-0.9.11.jar.sha1 | 1 + .../test/lang_mustache/20_render_search_template.yml | 4 ++-- 5 files changed, 5 insertions(+), 4 deletions(-) delete mode 100644 modules/lang-mustache/licenses/compiler-0.9.10.jar.sha1 create mode 100644 modules/lang-mustache/licenses/compiler-0.9.11.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index c0e966c49506c..22c3e0d787f58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Update google dependencies in repository-gcs and discovery-gce ([#13213](https://github.com/opensearch-project/OpenSearch/pull/13213)) - Bump `com.google.apis:google-api-services-compute` from v1-rev235-1.25.0 to v1-rev20240407-2.0.0 ([#13333](https://github.com/opensearch-project/OpenSearch/pull/13333)) - Bump `commons-cli:commons-cli` from 1.6.0 to 1.7.0 ([#13331](https://github.com/opensearch-project/OpenSearch/pull/13331)) +- Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.11 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329)) ### Changed - [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) diff --git a/modules/lang-mustache/build.gradle b/modules/lang-mustache/build.gradle index 14eafd8d43e13..4bf75b593ebf2 100644 --- a/modules/lang-mustache/build.gradle +++ b/modules/lang-mustache/build.gradle @@ -38,7 +38,7 @@ opensearchplugin { } dependencies { - api "com.github.spullara.mustache.java:compiler:0.9.10" + api "com.github.spullara.mustache.java:compiler:0.9.11" } restResources { diff --git a/modules/lang-mustache/licenses/compiler-0.9.10.jar.sha1 b/modules/lang-mustache/licenses/compiler-0.9.10.jar.sha1 deleted file mode 100644 index 6336318c2ce1a..0000000000000 --- a/modules/lang-mustache/licenses/compiler-0.9.10.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6111ae24e3be9ecbd75f5fe908583fc14b4f0174 \ No newline at end of file diff --git a/modules/lang-mustache/licenses/compiler-0.9.11.jar.sha1 b/modules/lang-mustache/licenses/compiler-0.9.11.jar.sha1 new file mode 100644 index 0000000000000..a77675488b2e0 --- /dev/null +++ b/modules/lang-mustache/licenses/compiler-0.9.11.jar.sha1 @@ -0,0 +1 @@ +1bce858aca4f0ce93fdb939de8c8474431c06322 \ No newline at end of file diff --git a/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/20_render_search_template.yml b/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/20_render_search_template.yml index 946b63a65d923..a0f6828e66d79 100644 --- a/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/20_render_search_template.yml +++ b/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/20_render_search_template.yml @@ -47,7 +47,7 @@ - match: { template_output.aggs.my_terms.terms.field: "my_other_field" } - do: - catch: /Improperly.closed.variable.in.query-template/ + catch: /Improperly.closed.variable:.my_value.in.query-template/ render_search_template: body: { "source": { "query": { "match": { "text": "{{{my_value}}" } }, "aggs": { "my_terms": { "terms": { "field": "{{my_field}}" } } } }, "params": { "my_value": "bar", "my_field": "field1" } } --- @@ -99,7 +99,7 @@ - match: { template_output.size: 100 } - do: - catch: /Improperly.closed.variable.in.query-template/ + catch: /Improperly.closed.variable:.my_value.in.query-template/ render_search_template: body: { "source": "{ \"query\": { \"match\": { \"text\": \"{{{my_value}}\" } }, \"size\": {{my_size}} }", "params": { "my_value": "bar", "my_size": 100 } } From 3ea0a31f6f613ddd5b7bde0053195d5b212c813d Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Tue, 23 Apr 2024 20:48:28 +0530 Subject: [PATCH 17/32] Upload remote paths during index creation or full cluster upload (#13150) * Upload remote paths during index creation or full cluster upload Signed-off-by: Ashish Singh --- .../RemoteStoreUploadIndexPathIT.java | 108 ++++++ .../transfer/RemoteTransferContainer.java | 10 +- .../remote/IndexMetadataUploadListener.java | 49 +++ .../remote/RemoteClusterStateService.java | 130 ++++++-- .../index/remote/RemoteIndexPath.java | 159 +++++++++ .../index/remote/RemoteIndexPathUploader.java | 281 ++++++++++++++++ .../index/remote/RemoteStorePathStrategy.java | 4 + .../RemoteStorePathStrategyResolver.java | 2 + .../main/java/org/opensearch/node/Node.java | 17 +- .../blobstore/BaseBlobStoreFormat.java | 130 ++++++++ .../blobstore/ChecksumBlobStoreFormat.java | 74 +---- .../blobstore/ConfigBlobStoreFormat.java | 73 ++++ .../RemoteTransferContainerTests.java | 12 +- .../GatewayMetaStatePersistedStateTests.java | 24 +- .../RemoteClusterStateServiceTests.java | 10 +- .../index/remote/RemoteIndexPathTests.java | 140 ++++++++ .../remote/RemoteIndexPathUploaderTests.java | 314 ++++++++++++++++++ 17 files changed, 1409 insertions(+), 128 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreUploadIndexPathIT.java create mode 100644 server/src/main/java/org/opensearch/gateway/remote/IndexMetadataUploadListener.java create mode 100644 server/src/main/java/org/opensearch/index/remote/RemoteIndexPath.java create mode 100644 server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java create mode 100644 server/src/main/java/org/opensearch/repositories/blobstore/BaseBlobStoreFormat.java create mode 100644 server/src/main/java/org/opensearch/repositories/blobstore/ConfigBlobStoreFormat.java create mode 100644 server/src/test/java/org/opensearch/index/remote/RemoteIndexPathTests.java create mode 100644 server/src/test/java/org/opensearch/index/remote/RemoteIndexPathUploaderTests.java diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreUploadIndexPathIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreUploadIndexPathIT.java new file mode 100644 index 0000000000000..c39cec96aa476 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreUploadIndexPathIT.java @@ -0,0 +1,108 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore; + +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.util.FileSystemUtils; +import org.opensearch.index.remote.RemoteIndexPath; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.io.IOException; +import java.util.Locale; +import java.util.concurrent.ExecutionException; + +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemoteStoreUploadIndexPathIT extends RemoteStoreBaseIntegTestCase { + + private final String INDEX_NAME = "remote-store-test-idx-1"; + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true).build(); + } + + /** + * Checks that the remote index path file gets created for the intended remote store path type and does not get created + * wherever not required. + */ + public void testRemoteIndexPathFileCreation() throws ExecutionException, InterruptedException, IOException { + String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNodes(2); + + // Case 1 - Hashed_prefix, we would need the remote index path file to be created. + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.HASHED_PREFIX) + ) + .get(); + + createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 1)); + validateRemoteIndexPathFile(true); + assertAcked(client().admin().indices().delete(new DeleteIndexRequest(INDEX_NAME)).get()); + FileSystemUtils.deleteSubDirectories(translogRepoPath); + FileSystemUtils.deleteSubDirectories(segmentRepoPath); + + // Case 2 - Hashed_infix, we would not have the remote index path file created here. + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.HASHED_INFIX) + ) + .get(); + createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 1)); + validateRemoteIndexPathFile(false); + assertAcked(client().admin().indices().delete(new DeleteIndexRequest(INDEX_NAME)).get()); + + // Case 3 - fixed, we would not have the remote index path file created here either. + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.FIXED)) + .get(); + createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 1)); + validateRemoteIndexPathFile(false); + assertAcked(client().admin().indices().delete(new DeleteIndexRequest(INDEX_NAME)).get()); + + } + + private void validateRemoteIndexPathFile(boolean exists) { + String indexUUID = client().admin() + .indices() + .prepareGetSettings(INDEX_NAME) + .get() + .getSetting(INDEX_NAME, IndexMetadata.SETTING_INDEX_UUID); + + assertEquals(exists, FileSystemUtils.exists(translogRepoPath.resolve(RemoteIndexPath.DIR))); + assertEquals( + exists, + FileSystemUtils.exists( + translogRepoPath.resolve(RemoteIndexPath.DIR) + .resolve(String.format(Locale.ROOT, RemoteIndexPath.FILE_NAME_FORMAT, indexUUID)) + ) + ); + assertEquals(exists, FileSystemUtils.exists(segmentRepoPath.resolve(RemoteIndexPath.DIR))); + assertEquals( + exists, + FileSystemUtils.exists( + segmentRepoPath.resolve(RemoteIndexPath.DIR) + .resolve(String.format(Locale.ROOT, RemoteIndexPath.FILE_NAME_FORMAT, indexUUID)) + ) + ); + } +} diff --git a/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java b/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java index cd2ef22327ebb..cbd1852202d1c 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java +++ b/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java @@ -52,7 +52,7 @@ public class RemoteTransferContainer implements Closeable { private final String remoteFileName; private final boolean failTransferIfFileExists; private final WritePriority writePriority; - private final long expectedChecksum; + private final Long expectedChecksum; private final OffsetRangeInputStreamSupplier offsetRangeInputStreamSupplier; private final boolean isRemoteDataIntegritySupported; private final AtomicBoolean readBlock = new AtomicBoolean(); @@ -79,7 +79,7 @@ public RemoteTransferContainer( boolean failTransferIfFileExists, WritePriority writePriority, OffsetRangeInputStreamSupplier offsetRangeInputStreamSupplier, - long expectedChecksum, + Long expectedChecksum, boolean isRemoteDataIntegritySupported ) { this( @@ -115,7 +115,7 @@ public RemoteTransferContainer( boolean failTransferIfFileExists, WritePriority writePriority, OffsetRangeInputStreamSupplier offsetRangeInputStreamSupplier, - long expectedChecksum, + Long expectedChecksum, boolean isRemoteDataIntegritySupported, Map metadata ) { @@ -230,7 +230,7 @@ private LocalStreamSupplier getMultipartStreamSupplier( } private boolean isRemoteDataIntegrityCheckPossible() { - return isRemoteDataIntegritySupported; + return isRemoteDataIntegritySupported && Objects.nonNull(expectedChecksum); } private void finalizeUpload(boolean uploadSuccessful) throws IOException { @@ -238,7 +238,7 @@ private void finalizeUpload(boolean uploadSuccessful) throws IOException { return; } - if (uploadSuccessful) { + if (uploadSuccessful && Objects.nonNull(expectedChecksum)) { long actualChecksum = getActualChecksum(); if (actualChecksum != expectedChecksum) { throw new CorruptIndexException( diff --git a/server/src/main/java/org/opensearch/gateway/remote/IndexMetadataUploadListener.java b/server/src/main/java/org/opensearch/gateway/remote/IndexMetadataUploadListener.java new file mode 100644 index 0000000000000..f9158c9260747 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/IndexMetadataUploadListener.java @@ -0,0 +1,49 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.core.action.ActionListener; +import org.opensearch.threadpool.ThreadPool; + +import java.util.List; +import java.util.Objects; +import java.util.concurrent.ExecutorService; + +/** + * Hook for running code that needs to be executed before the upload of index metadata. Here we have introduced a hook + * for index creation (also triggerred after enabling the remote cluster statement for the first time). The listener + * is intended to be run in parallel and async with the index metadata upload. + * + * @opensearch.internal + */ +public abstract class IndexMetadataUploadListener { + + private final ExecutorService executorService; + + public IndexMetadataUploadListener(ThreadPool threadPool, String threadPoolName) { + Objects.requireNonNull(threadPool); + Objects.requireNonNull(threadPoolName); + assert ThreadPool.THREAD_POOL_TYPES.containsKey(threadPoolName) && ThreadPool.Names.SAME.equals(threadPoolName) == false; + this.executorService = threadPool.executor(threadPoolName); + } + + /** + * Runs before the new index upload of index metadata (or first time upload). The caller is expected to trigger + * onSuccess or onFailure of the {@code ActionListener}. + * + * @param indexMetadataList list of index metadata of new indexes (or first time index metadata upload). + * @param actionListener listener to be invoked on success or failure. + */ + public final void onNewIndexUpload(List indexMetadataList, ActionListener actionListener) { + executorService.execute(() -> doOnNewIndexUpload(indexMetadataList, actionListener)); + } + + protected abstract void doOnNewIndexUpload(List indexMetadataList, ActionListener actionListener); +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index c892b475d71da..d2f927c827e5b 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -160,6 +160,7 @@ public class RemoteClusterStateService implements Closeable { private final Settings settings; private final LongSupplier relativeTimeNanosSupplier; private final ThreadPool threadpool; + private final List indexMetadataUploadListeners; private BlobStoreRepository blobStoreRepository; private BlobStoreTransferService blobStoreTransferService; private volatile TimeValue slowWriteLoggingThreshold; @@ -177,6 +178,7 @@ public class RemoteClusterStateService implements Closeable { // ToXContent Params with gateway mode. // We are using gateway context mode to persist all custom metadata. public static final ToXContent.Params FORMAT_PARAMS; + static { Map params = new HashMap<>(1); params.put(Metadata.CONTEXT_MODE_PARAM, Metadata.CONTEXT_MODE_GATEWAY); @@ -189,7 +191,8 @@ public RemoteClusterStateService( Settings settings, ClusterSettings clusterSettings, LongSupplier relativeTimeNanosSupplier, - ThreadPool threadPool + ThreadPool threadPool, + List indexMetadataUploadListeners ) { assert isRemoteStoreClusterStateEnabled(settings) : "Remote cluster state is not enabled"; this.nodeId = nodeId; @@ -206,6 +209,7 @@ public RemoteClusterStateService( clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setGlobalMetadataUploadTimeout); clusterSettings.addSettingsUpdateConsumer(METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, this::setMetadataManifestUploadTimeout); this.remoteStateStats = new RemotePersistenceStats(); + this.indexMetadataUploadListeners = indexMetadataUploadListeners; } private BlobStoreTransferService getBlobStoreTransferService() { @@ -233,10 +237,12 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri // Write globalMetadata String globalMetadataFile = writeGlobalMetadata(clusterState); + List toUpload = new ArrayList<>(clusterState.metadata().indices().values()); // any validations before/after upload ? final List allUploadedIndexMetadata = writeIndexMetadataParallel( clusterState, - new ArrayList<>(clusterState.metadata().indices().values()) + toUpload, + ClusterState.UNKNOWN_UUID.equals(previousClusterUUID) ? toUpload : Collections.emptyList() ); final ClusterMetadataManifest manifest = uploadManifest( clusterState, @@ -313,7 +319,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( .collect(Collectors.toMap(UploadedIndexMetadata::getIndexName, Function.identity())); List toUpload = new ArrayList<>(); - + List newIndexMetadataList = new ArrayList<>(); for (final IndexMetadata indexMetadata : clusterState.metadata().indices().values()) { final Long previousVersion = previousStateIndexMetadataVersionByName.get(indexMetadata.getIndex().getName()); if (previousVersion == null || indexMetadata.getVersion() != previousVersion) { @@ -329,9 +335,13 @@ public ClusterMetadataManifest writeIncrementalMetadata( numIndicesUnchanged++; } previousStateIndexMetadataVersionByName.remove(indexMetadata.getIndex().getName()); + // Adding the indexMetadata to newIndexMetadataList if there is no previous version present for the index. + if (previousVersion == null) { + newIndexMetadataList.add(indexMetadata); + } } - List uploadedIndexMetadataList = writeIndexMetadataParallel(clusterState, toUpload); + List uploadedIndexMetadataList = writeIndexMetadataParallel(clusterState, toUpload, newIndexMetadataList); uploadedIndexMetadataList.forEach( uploadedIndexMetadata -> allUploadedIndexMetadata.put(uploadedIndexMetadata.getIndexName(), uploadedIndexMetadata) ); @@ -436,13 +446,18 @@ private String writeGlobalMetadata(ClusterState clusterState) throws IOException * Uploads provided IndexMetadata's to remote store in parallel. The call is blocking so the method waits for upload to finish and then return. * * @param clusterState current ClusterState - * @param toUpload list of IndexMetadata to upload + * @param toUpload list of IndexMetadata to upload * @return {@code List} list of IndexMetadata uploaded to remote */ - private List writeIndexMetadataParallel(ClusterState clusterState, List toUpload) - throws IOException { - List exceptionList = Collections.synchronizedList(new ArrayList<>(toUpload.size())); - final CountDownLatch latch = new CountDownLatch(toUpload.size()); + private List writeIndexMetadataParallel( + ClusterState clusterState, + List toUpload, + List newIndexMetadataList + ) throws IOException { + assert Objects.nonNull(indexMetadataUploadListeners) : "indexMetadataUploadListeners can not be null"; + int latchCount = toUpload.size() + indexMetadataUploadListeners.size(); + List exceptionList = Collections.synchronizedList(new ArrayList<>(latchCount)); + final CountDownLatch latch = new CountDownLatch(latchCount); List result = new ArrayList<>(toUpload.size()); LatchedActionListener latchedActionListener = new LatchedActionListener<>( @@ -467,6 +482,8 @@ private List writeIndexMetadataParallel(ClusterState clus writeIndexMetadataAsync(clusterState, indexMetadata, latchedActionListener); } + invokeIndexMetadataUploadListeners(newIndexMetadataList, latch, exceptionList); + try { if (latch.await(getIndexMetadataUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { RemoteStateTransferException ex = new RemoteStateTransferException( @@ -506,11 +523,63 @@ private List writeIndexMetadataParallel(ClusterState clus return result; } + /** + * Invokes the index metadata upload listener but does not wait for the execution to complete. + */ + private void invokeIndexMetadataUploadListeners( + List newIndexMetadataList, + CountDownLatch latch, + List exceptionList + ) { + for (IndexMetadataUploadListener listener : indexMetadataUploadListeners) { + String listenerName = listener.getClass().getSimpleName(); + listener.onNewIndexUpload( + newIndexMetadataList, + getIndexMetadataUploadActionListener(newIndexMetadataList, latch, exceptionList, listenerName) + ); + } + + } + + private ActionListener getIndexMetadataUploadActionListener( + List newIndexMetadataList, + CountDownLatch latch, + List exceptionList, + String listenerName + ) { + long startTime = System.nanoTime(); + return new LatchedActionListener<>( + ActionListener.wrap( + ignored -> logger.trace( + new ParameterizedMessage( + "{} : Invoked listener={} successfully tookTimeNs={}", + listenerName, + newIndexMetadataList, + (System.nanoTime() - startTime) + ) + ), + ex -> { + logger.error( + new ParameterizedMessage( + "{} : Exception during invocation of listener={} tookTimeNs={}", + listenerName, + newIndexMetadataList, + (System.nanoTime() - startTime) + ), + ex + ); + exceptionList.add(ex); + } + ), + latch + ); + } + /** * Allows async Upload of IndexMetadata to remote * - * @param clusterState current ClusterState - * @param indexMetadata {@link IndexMetadata} to upload + * @param clusterState current ClusterState + * @param indexMetadata {@link IndexMetadata} to upload * @param latchedActionListener listener to respond back on after upload finishes */ private void writeIndexMetadataAsync( @@ -659,16 +728,6 @@ private void writeMetadataManifest(String clusterName, String clusterUUID, Clust ); } - private String fetchPreviousClusterUUID(String clusterName, String clusterUUID) { - final Optional latestManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); - if (!latestManifest.isPresent()) { - final String previousClusterUUID = getLastKnownUUIDFromRemote(clusterName); - assert !clusterUUID.equals(previousClusterUUID) : "Last cluster UUID is same current cluster UUID"; - return previousClusterUUID; - } - return latestManifest.get().getPreviousClusterUUID(); - } - private BlobContainer indexMetadataContainer(String clusterName, String clusterUUID, String indexUUID) { // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index/ftqsCnn9TgOX return blobStoreRepository.blobStore() @@ -737,7 +796,7 @@ static String getManifestFileName(long term, long version, boolean committed) { (committed ? "C" : "P"), // C for committed and P for published RemoteStoreUtils.invertLong(System.currentTimeMillis()), String.valueOf(MANIFEST_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last place to - // determine codec version. + // determine codec version. ); } @@ -750,7 +809,7 @@ static String indexMetadataFileName(IndexMetadata indexMetadata) { RemoteStoreUtils.invertLong(indexMetadata.getVersion()), RemoteStoreUtils.invertLong(System.currentTimeMillis()), String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last - // place to determine codec version. + // place to determine codec version. ); } @@ -772,8 +831,8 @@ private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { /** * Fetch latest index metadata from remote cluster state * - * @param clusterUUID uuid of cluster state to refer to in remote - * @param clusterName name of the cluster + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster * @param clusterMetadataManifest manifest file of cluster * @return {@code Map} latest IndexUUID to IndexMetadata map */ @@ -795,8 +854,8 @@ private Map getIndexMetadataMap( /** * Fetch index metadata from remote cluster state * - * @param clusterUUID uuid of cluster state to refer to in remote - * @param clusterName name of the cluster + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster * @param uploadedIndexMetadata {@link UploadedIndexMetadata} contains details about remote location of index metadata * @return {@link IndexMetadata} */ @@ -825,7 +884,6 @@ private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, U * @return {@link IndexMetadata} */ public ClusterState getLatestClusterState(String clusterName, String clusterUUID) { - start(); Optional clusterMetadataManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); if (clusterMetadataManifest.isEmpty()) { throw new IllegalStateException( @@ -989,6 +1047,7 @@ private List createClusterChain(final Map trimClusterUUIDs( @@ -1050,7 +1109,7 @@ private boolean isValidClusterUUID(ClusterMetadataManifest manifest) { * * @param clusterUUID uuid of cluster state to refer to in remote * @param clusterName name of the cluster - * @param limit max no of files to fetch + * @param limit max no of files to fetch * @return all manifest file names */ private List getManifestFileNames(String clusterName, String clusterUUID, int limit) throws IllegalStateException { @@ -1123,7 +1182,7 @@ private int getManifestCodecVersion(String fileName) { if (splitName.length == SPLITED_MANIFEST_FILE_LENGTH) { return Integer.parseInt(splitName[splitName.length - 1]); // Last value would be codec version. } else if (splitName.length < SPLITED_MANIFEST_FILE_LENGTH) { // Where codec is not part of file name, i.e. default codec version 0 - // is used. + // is used. return ClusterMetadataManifest.CODEC_V0; } else { throw new IllegalArgumentException("Manifest file name is corrupted"); @@ -1141,7 +1200,7 @@ public void writeMetadataFailed() { /** * Exception for Remote state transfer. */ - static class RemoteStateTransferException extends RuntimeException { + public static class RemoteStateTransferException extends RuntimeException { public RemoteStateTransferException(String errorDesc) { super(errorDesc); @@ -1155,7 +1214,7 @@ public RemoteStateTransferException(String errorDesc, Throwable cause) { /** * Purges all remote cluster state against provided cluster UUIDs * - * @param clusterName name of the cluster + * @param clusterName name of the cluster * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged */ void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { @@ -1188,8 +1247,8 @@ public void onFailure(Exception e) { /** * Deletes older than last {@code versionsToRetain} manifests. Also cleans up unreferenced IndexMetadata associated with older manifests * - * @param clusterName name of the cluster - * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @param clusterUUID uuid of cluster state to refer to in remote * @param manifestsToRetain no of latest manifest files to keep in remote */ // package private for testing @@ -1308,7 +1367,8 @@ private void deleteStalePaths(String clusterName, String clusterUUID, List> TRANSLOG_PATH = Map.of(TRANSLOG, List.of(DATA, METADATA)); + public static final Map> SEGMENT_PATH = Map.of(SEGMENTS, List.of(DataType.values())); + public static final Map> COMBINED_PATH; + + static { + Map> combinedPath = new HashMap<>(); + combinedPath.putAll(TRANSLOG_PATH); + combinedPath.putAll(SEGMENT_PATH); + COMBINED_PATH = Collections.unmodifiableMap(combinedPath); + } + private static final String DEFAULT_VERSION = "1"; + public static final String DIR = "remote-index-path"; + public static final String FILE_NAME_FORMAT = "remote_path_%s"; + static final String KEY_VERSION = "version"; + static final String KEY_INDEX_UUID = "index_uuid"; + static final String KEY_SHARD_COUNT = "shard_count"; + static final String KEY_PATH_CREATION_MAP = "path_creation_map"; + static final String KEY_PATHS = "paths"; + private final String indexUUID; + private final int shardCount; + private final Iterable basePath; + private final PathType pathType; + private final PathHashAlgorithm pathHashAlgorithm; + + /** + * This keeps the map of paths that would be present in the content of the index path file. For eg - It is possible + * that segment and translog repository can be different. For this use case, we have either segment or translog as the + * key, and list of data, metadata, and lock_files (only for segment) as the value. + */ + private final Map> pathCreationMap; + + public RemoteIndexPath( + String indexUUID, + int shardCount, + Iterable basePath, + PathType pathType, + PathHashAlgorithm pathHashAlgorithm, + Map> pathCreationMap + ) { + if (Objects.isNull(pathCreationMap) + || Objects.isNull(pathType) + || isCompatible(pathType, pathHashAlgorithm) == false + || shardCount < 1 + || Objects.isNull(basePath) + || pathCreationMap.isEmpty() + || pathCreationMap.keySet().stream().anyMatch(k -> pathCreationMap.get(k).isEmpty())) { + ParameterizedMessage parameterizedMessage = new ParameterizedMessage( + "Invalid input in RemoteIndexPath constructor indexUUID={} shardCount={} basePath={} pathType={}" + + " pathHashAlgorithm={} pathCreationMap={}", + indexUUID, + shardCount, + basePath, + pathType, + pathHashAlgorithm, + pathCreationMap + ); + throw new IllegalArgumentException(parameterizedMessage.getFormattedMessage()); + } + boolean validMap = pathCreationMap.keySet() + .stream() + .allMatch(k -> pathCreationMap.get(k).stream().allMatch(k::isSupportedDataType)); + if (validMap == false) { + throw new IllegalArgumentException( + new ParameterizedMessage("pathCreationMap={} is having illegal combination of category and type", pathCreationMap) + .getFormattedMessage() + ); + } + this.indexUUID = indexUUID; + this.shardCount = shardCount; + this.basePath = basePath; + this.pathType = pathType; + this.pathHashAlgorithm = pathHashAlgorithm; + this.pathCreationMap = pathCreationMap; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(KEY_VERSION, DEFAULT_VERSION); + builder.field(KEY_INDEX_UUID, indexUUID); + builder.field(KEY_SHARD_COUNT, shardCount); + builder.field(PathType.NAME, pathType.name()); + if (Objects.nonNull(pathHashAlgorithm)) { + builder.field(PathHashAlgorithm.NAME, pathHashAlgorithm.name()); + } + + Map> pathMap = new HashMap<>(); + for (Map.Entry> entry : pathCreationMap.entrySet()) { + pathMap.put(entry.getKey().getName(), entry.getValue().stream().map(DataType::getName).collect(Collectors.toList())); + } + builder.field(KEY_PATH_CREATION_MAP); + builder.map(pathMap); + builder.startArray(KEY_PATHS); + for (Map.Entry> entry : pathCreationMap.entrySet()) { + DataCategory dataCategory = entry.getKey(); + for (DataType type : entry.getValue()) { + for (int shardNo = 0; shardNo < shardCount; shardNo++) { + PathInput pathInput = PathInput.builder() + .basePath(new BlobPath().add(basePath)) + .indexUUID(indexUUID) + .shardId(Integer.toString(shardNo)) + .dataCategory(dataCategory) + .dataType(type) + .build(); + builder.value(pathType.path(pathInput, pathHashAlgorithm).buildAsString()); + } + } + } + builder.endArray(); + return builder; + } + + public static RemoteIndexPath fromXContent(XContentParser ignored) { + throw new UnsupportedOperationException("RemoteIndexPath.fromXContent() is not supported"); + } +} diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java new file mode 100644 index 0000000000000..1ac7e41014d23 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java @@ -0,0 +1,281 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.action.LatchedActionListener; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.index.Index; +import org.opensearch.gateway.remote.IndexMetadataUploadListener; +import org.opensearch.gateway.remote.RemoteClusterStateService.RemoteStateTransferException; +import org.opensearch.node.Node; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.ConfigBlobStoreFormat; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING; +import static org.opensearch.index.remote.RemoteIndexPath.COMBINED_PATH; +import static org.opensearch.index.remote.RemoteIndexPath.SEGMENT_PATH; +import static org.opensearch.index.remote.RemoteIndexPath.TRANSLOG_PATH; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteDataAttributePresent; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled; + +/** + * Uploads the remote store path for all possible combinations of {@link org.opensearch.index.remote.RemoteStoreEnums.DataCategory} + * and {@link org.opensearch.index.remote.RemoteStoreEnums.DataType} for each shard of an index. + * + * @opensearch.internal + */ +@ExperimentalApi +public class RemoteIndexPathUploader extends IndexMetadataUploadListener { + + public static final ConfigBlobStoreFormat REMOTE_INDEX_PATH_FORMAT = new ConfigBlobStoreFormat<>( + RemoteIndexPath.FILE_NAME_FORMAT + ); + + private static final String TIMEOUT_EXCEPTION_MSG = "Timed out waiting while uploading remote index path file for indexes=%s"; + private static final String UPLOAD_EXCEPTION_MSG = "Exception occurred while uploading remote index paths for indexes=%s"; + static final String TRANSLOG_REPO_NAME_KEY = Node.NODE_ATTRIBUTES.getKey() + + RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; + static final String SEGMENT_REPO_NAME_KEY = Node.NODE_ATTRIBUTES.getKey() + + RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; + + private static final Logger logger = LogManager.getLogger(RemoteIndexPathUploader.class); + + private final Settings settings; + private final boolean isRemoteDataAttributePresent; + private final boolean isTranslogSegmentRepoSame; + private final Supplier repositoriesService; + private volatile TimeValue indexMetadataUploadTimeout; + + private BlobStoreRepository translogRepository; + private BlobStoreRepository segmentRepository; + + public RemoteIndexPathUploader( + ThreadPool threadPool, + Settings settings, + Supplier repositoriesService, + ClusterSettings clusterSettings + ) { + super(threadPool, ThreadPool.Names.GENERIC); + this.settings = Objects.requireNonNull(settings); + this.repositoriesService = Objects.requireNonNull(repositoriesService); + isRemoteDataAttributePresent = isRemoteDataAttributePresent(settings); + // If the remote data attributes are not present, then there is no effect of translog and segment being same or different or null. + isTranslogSegmentRepoSame = isTranslogSegmentRepoSame(); + Objects.requireNonNull(clusterSettings); + indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); + clusterSettings.addSettingsUpdateConsumer(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, this::setIndexMetadataUploadTimeout); + } + + @Override + protected void doOnNewIndexUpload(List indexMetadataList, ActionListener actionListener) { + if (isRemoteDataAttributePresent == false) { + logger.trace("Skipping beforeNewIndexUpload as there are no remote indexes"); + actionListener.onResponse(null); + return; + } + + long startTime = System.nanoTime(); + boolean success = false; + List eligibleList = indexMetadataList.stream().filter(this::requiresPathUpload).collect(Collectors.toList()); + String indexNames = eligibleList.stream().map(IndexMetadata::getIndex).map(Index::toString).collect(Collectors.joining(",")); + int latchCount = eligibleList.size() * (isTranslogSegmentRepoSame ? 1 : 2); + CountDownLatch latch = new CountDownLatch(latchCount); + List exceptionList = Collections.synchronizedList(new ArrayList<>(latchCount)); + try { + for (IndexMetadata indexMetadata : eligibleList) { + writeIndexPathAsync(indexMetadata, latch, exceptionList); + } + + logger.trace(new ParameterizedMessage("Remote index path upload started for {}", indexNames)); + + try { + if (latch.await(indexMetadataUploadTimeout.millis(), TimeUnit.MILLISECONDS) == false) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, TIMEOUT_EXCEPTION_MSG, indexNames) + ); + exceptionList.forEach(ex::addSuppressed); + actionListener.onFailure(ex); + return; + } + } catch (InterruptedException exception) { + exceptionList.forEach(exception::addSuppressed); + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, TIMEOUT_EXCEPTION_MSG, indexNames), + exception + ); + actionListener.onFailure(ex); + return; + } + if (exceptionList.size() > 0) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, UPLOAD_EXCEPTION_MSG, indexNames) + ); + exceptionList.forEach(ex::addSuppressed); + actionListener.onFailure(ex); + return; + } + success = true; + actionListener.onResponse(null); + } catch (Exception exception) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, UPLOAD_EXCEPTION_MSG, indexNames), + exception + ); + exceptionList.forEach(ex::addSuppressed); + actionListener.onFailure(ex); + } finally { + long tookTimeNs = System.nanoTime() - startTime; + logger.trace(new ParameterizedMessage("executed beforeNewIndexUpload status={} tookTimeNs={}", success, tookTimeNs)); + } + + } + + private void writeIndexPathAsync(IndexMetadata idxMD, CountDownLatch latch, List exceptionList) { + if (isTranslogSegmentRepoSame) { + // If the repositories are same, then we need to upload a single file containing paths for both translog and segments. + writePathToRemoteStore(idxMD, translogRepository, latch, exceptionList, COMBINED_PATH); + } else { + // If the repositories are different, then we need to upload one file per segment and translog containing their individual + // paths. + writePathToRemoteStore(idxMD, translogRepository, latch, exceptionList, TRANSLOG_PATH); + writePathToRemoteStore(idxMD, segmentRepository, latch, exceptionList, SEGMENT_PATH); + } + } + + private void writePathToRemoteStore( + IndexMetadata idxMD, + BlobStoreRepository repository, + CountDownLatch latch, + List exceptionList, + Map> pathCreationMap + ) { + Map remoteCustomData = idxMD.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY); + RemoteStoreEnums.PathType pathType = RemoteStoreEnums.PathType.valueOf(remoteCustomData.get(RemoteStoreEnums.PathType.NAME)); + RemoteStoreEnums.PathHashAlgorithm hashAlgorithm = RemoteStoreEnums.PathHashAlgorithm.valueOf( + remoteCustomData.get(RemoteStoreEnums.PathHashAlgorithm.NAME) + ); + String indexUUID = idxMD.getIndexUUID(); + int shardCount = idxMD.getNumberOfShards(); + BlobPath basePath = repository.basePath(); + BlobContainer blobContainer = repository.blobStore().blobContainer(basePath.add(RemoteIndexPath.DIR)); + ActionListener actionListener = getUploadPathLatchedActionListener(idxMD, latch, exceptionList, pathCreationMap); + try { + REMOTE_INDEX_PATH_FORMAT.writeAsyncWithUrgentPriority( + new RemoteIndexPath(indexUUID, shardCount, basePath, pathType, hashAlgorithm, pathCreationMap), + blobContainer, + indexUUID, + actionListener + ); + } catch (IOException ioException) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, UPLOAD_EXCEPTION_MSG, List.of(idxMD.getIndex().getName())) + ); + actionListener.onFailure(ioException); + } + } + + private Repository validateAndGetRepository(String repoSetting) { + final String repo = settings.get(repoSetting); + assert repo != null : "Remote " + repoSetting + " repository is not configured"; + final Repository repository = repositoriesService.get().repository(repo); + assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository"; + return repository; + } + + public void start() { + assert isRemoteStoreClusterStateEnabled(settings) == true : "Remote cluster state is not enabled"; + if (isRemoteDataAttributePresent == false) { + // If remote store data attributes are not present than we skip this. + return; + } + translogRepository = (BlobStoreRepository) validateAndGetRepository(TRANSLOG_REPO_NAME_KEY); + segmentRepository = (BlobStoreRepository) validateAndGetRepository(SEGMENT_REPO_NAME_KEY); + } + + private boolean isTranslogSegmentRepoSame() { + String translogRepoName = settings.get(TRANSLOG_REPO_NAME_KEY); + String segmentRepoName = settings.get(SEGMENT_REPO_NAME_KEY); + return Objects.equals(translogRepoName, segmentRepoName); + } + + private LatchedActionListener getUploadPathLatchedActionListener( + IndexMetadata indexMetadata, + CountDownLatch latch, + List exceptionList, + Map> pathCreationMap + ) { + return new LatchedActionListener<>( + ActionListener.wrap( + resp -> logger.trace( + new ParameterizedMessage("Index path uploaded for {} indexMetadata={}", pathCreationMap, indexMetadata) + ), + ex -> { + logger.error( + new ParameterizedMessage( + "Exception during Index path upload for {} indexMetadata={}", + pathCreationMap, + indexMetadata + ), + ex + ); + exceptionList.add(ex); + } + ), + latch + ); + } + + /** + * This method checks if the index metadata has attributes that calls for uploading the index path for remote store + * uploads. It checks if the remote store path type is {@code HASHED_PREFIX} and returns true if so. + */ + private boolean requiresPathUpload(IndexMetadata indexMetadata) { + // A cluster will have remote custom metadata only if the cluster is remote store enabled from data side. + Map remoteCustomData = indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY); + if (Objects.isNull(remoteCustomData) || remoteCustomData.isEmpty()) { + return false; + } + String pathTypeStr = remoteCustomData.get(RemoteStoreEnums.PathType.NAME); + if (Objects.isNull(pathTypeStr)) { + return false; + } + // We need to upload the path only if the path type for an index is hashed_prefix + return RemoteStoreEnums.PathType.HASHED_PREFIX == RemoteStoreEnums.PathType.parseString(pathTypeStr); + } + + private void setIndexMetadataUploadTimeout(TimeValue newIndexMetadataUploadTimeout) { + this.indexMetadataUploadTimeout = newIndexMetadataUploadTimeout; + } +} diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java index 775f8fe19e4ef..c58f6c3faac84 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.common.Nullable; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.index.remote.RemoteStoreEnums.DataCategory; @@ -25,6 +26,7 @@ * @opensearch.internal */ @PublicApi(since = "2.14.0") +@ExperimentalApi public class RemoteStorePathStrategy { private final PathType type; @@ -74,6 +76,7 @@ public BlobPath generatePath(PathInput pathInput) { * @opensearch.internal */ @PublicApi(since = "2.14.0") + @ExperimentalApi public static class PathInput { private final BlobPath basePath; private final String indexUUID; @@ -122,6 +125,7 @@ public static Builder builder() { * @opensearch.internal */ @PublicApi(since = "2.14.0") + @ExperimentalApi public static class Builder { private BlobPath basePath; private String indexUUID; diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java index a33f7522daaae..5cd69dfa679a5 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java @@ -9,6 +9,7 @@ package org.opensearch.index.remote; import org.opensearch.Version; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.indices.RemoteStoreSettings; @@ -20,6 +21,7 @@ * * @opensearch.internal */ +@ExperimentalApi public class RemoteStorePathStrategyResolver { private final RemoteStoreSettings remoteStoreSettings; diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index a33fd71e21896..47f128af438a6 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -146,6 +146,7 @@ import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.recovery.RemoteStoreRestoreService; +import org.opensearch.index.remote.RemoteIndexPathUploader; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.remote.filecache.FileCache; @@ -726,17 +727,26 @@ protected Node( threadPool::relativeTimeInMillis ); final RemoteClusterStateService remoteClusterStateService; + final RemoteIndexPathUploader remoteIndexPathUploader; if (isRemoteStoreClusterStateEnabled(settings)) { + remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + repositoriesServiceReference::get, + clusterService.getClusterSettings() + ); remoteClusterStateService = new RemoteClusterStateService( nodeEnvironment.nodeId(), repositoriesServiceReference::get, settings, clusterService.getClusterSettings(), threadPool::preciseRelativeTimeInNanos, - threadPool + threadPool, + List.of(remoteIndexPathUploader) ); } else { remoteClusterStateService = null; + remoteIndexPathUploader = null; } // collect engine factory providers from plugins @@ -1313,6 +1323,7 @@ protected Node( b.bind(SearchRequestSlowLog.class).toInstance(searchRequestSlowLog); b.bind(MetricsRegistry.class).toInstance(metricsRegistry); b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService); + b.bind(RemoteIndexPathUploader.class).toProvider(() -> remoteIndexPathUploader); b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); b.bind(SearchRequestOperationsCompositeListenerFactory.class).toInstance(searchRequestOperationsCompositeListenerFactory); @@ -1462,6 +1473,10 @@ public Node start() throws NodeValidationException { if (remoteClusterStateService != null) { remoteClusterStateService.start(); } + final RemoteIndexPathUploader remoteIndexPathUploader = injector.getInstance(RemoteIndexPathUploader.class); + if (remoteIndexPathUploader != null) { + remoteIndexPathUploader.start(); + } // Load (and maybe upgrade) the metadata stored on disk final GatewayMetaState gatewayMetaState = injector.getInstance(GatewayMetaState.class); gatewayMetaState.start( diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BaseBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/BaseBlobStoreFormat.java new file mode 100644 index 0000000000000..262a32fa1e74d --- /dev/null +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BaseBlobStoreFormat.java @@ -0,0 +1,130 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories.blobstore; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.store.OutputStreamIndexOutput; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.lucene.store.IndexOutputOutputStream; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.compress.Compressor; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Locale; +import java.util.Objects; + +/** + * Provides common methods, variables that can be used by the implementors. + * + * @opensearch.internal + */ +public class BaseBlobStoreFormat { + + private static final int BUFFER_SIZE = 4096; + + private final String blobNameFormat; + + private final boolean skipHeaderFooter; + + /** + * @param blobNameFormat format of the blobname in {@link String#format} format + */ + public BaseBlobStoreFormat(String blobNameFormat, boolean skipHeaderFooter) { + this.blobNameFormat = blobNameFormat; + this.skipHeaderFooter = skipHeaderFooter; + } + + protected String blobName(String name) { + return String.format(Locale.ROOT, blobNameFormat, name); + } + + /** + * Writes blob with resolving the blob name using {@link #blobName} method. + *

+ * The blob will optionally by compressed. + * + * @param obj object to be serialized + * @param blobContainer blob container + * @param name blob name + * @param compressor whether to use compression + * @param params ToXContent params + * @param codec codec used + * @param version version used + */ + protected void write( + final T obj, + final BlobContainer blobContainer, + final String name, + final Compressor compressor, + final ToXContent.Params params, + XContentType xContentType, + String codec, + Integer version + ) throws IOException { + final String blobName = blobName(name); + final BytesReference bytes = serialize(obj, blobName, compressor, params, xContentType, codec, version); + blobContainer.writeBlob(blobName, bytes.streamInput(), bytes.length(), false); + } + + public BytesReference serialize( + final T obj, + final String blobName, + final Compressor compressor, + final ToXContent.Params params, + XContentType xContentType, + String codec, + Integer version + ) throws IOException { + assert skipHeaderFooter || (Objects.nonNull(codec) && Objects.nonNull(version)); + try (BytesStreamOutput outputStream = new BytesStreamOutput()) { + try ( + OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput( + "ChecksumBlobStoreFormat.writeBlob(blob=\"" + blobName + "\")", + blobName, + outputStream, + BUFFER_SIZE + ) + ) { + if (skipHeaderFooter == false) { + CodecUtil.writeHeader(indexOutput, codec, version); + } + try (OutputStream indexOutputOutputStream = new IndexOutputOutputStream(indexOutput) { + @Override + public void close() { + // this is important since some of the XContentBuilders write bytes on close. + // in order to write the footer we need to prevent closing the actual index input. + } + }; + XContentBuilder builder = MediaTypeRegistry.contentBuilder( + xContentType, + compressor.threadLocalOutputStream(indexOutputOutputStream) + ) + ) { + builder.startObject(); + obj.toXContent(builder, params); + builder.endObject(); + } + if (skipHeaderFooter == false) { + CodecUtil.writeFooter(indexOutput); + } + } + return outputStream.bytes(); + } + } + + protected String getBlobNameFormat() { + return blobNameFormat; + } +} diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java index 3e6052a5ef820..e567e1b626c5a 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java @@ -38,7 +38,6 @@ import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.store.ByteBuffersIndexInput; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.OutputStreamIndexOutput; import org.apache.lucene.util.BytesRef; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.CheckedFunction; @@ -48,26 +47,21 @@ import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; import org.opensearch.common.blobstore.transfer.stream.OffsetRangeIndexInputStream; import org.opensearch.common.io.Streams; -import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.lucene.store.ByteArrayIndexInput; -import org.opensearch.common.lucene.store.IndexOutputOutputStream; import org.opensearch.common.xcontent.LoggingDeprecationHandler; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.common.xcontent.XContentType; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.compress.Compressor; -import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.ToXContent; -import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.gateway.CorruptStateException; import org.opensearch.index.store.exception.ChecksumCombinationException; import org.opensearch.snapshots.SnapshotInfo; import java.io.IOException; -import java.io.OutputStream; import java.util.Arrays; import java.util.HashMap; import java.util.Locale; @@ -80,7 +74,7 @@ * * @opensearch.internal */ -public final class ChecksumBlobStoreFormat { +public final class ChecksumBlobStoreFormat extends BaseBlobStoreFormat { // Serialization parameters to specify correct context for metadata serialization public static final ToXContent.Params SNAPSHOT_ONLY_FORMAT_PARAMS; @@ -98,12 +92,8 @@ public final class ChecksumBlobStoreFormat { // The format version public static final int VERSION = 1; - private static final int BUFFER_SIZE = 4096; - private final String codec; - private final String blobNameFormat; - private final CheckedFunction reader; /** @@ -112,8 +102,8 @@ public final class ChecksumBlobStoreFormat { * @param reader prototype object that can deserialize T from XContent */ public ChecksumBlobStoreFormat(String codec, String blobNameFormat, CheckedFunction reader) { + super(blobNameFormat, false); this.reader = reader; - this.blobNameFormat = blobNameFormat; this.codec = codec; } @@ -130,7 +120,7 @@ public T read(BlobContainer blobContainer, String name, NamedXContentRegistry na } public String blobName(String name) { - return String.format(Locale.ROOT, blobNameFormat, name); + return String.format(Locale.ROOT, getBlobNameFormat(), name); } public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistry, BytesReference bytes) throws IOException { @@ -170,30 +160,7 @@ public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistr * @param compressor whether to use compression */ public void write(final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor) throws IOException { - write(obj, blobContainer, name, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); - } - - /** - * Writes blob with resolving the blob name using {@link #blobName} method. - *

- * The blob will optionally by compressed. - * - * @param obj object to be serialized - * @param blobContainer blob container - * @param name blob name - * @param compressor whether to use compression - * @param params ToXContent params - */ - public void write( - final T obj, - final BlobContainer blobContainer, - final String name, - final Compressor compressor, - final ToXContent.Params params - ) throws IOException { - final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor, params); - blobContainer.writeBlob(blobName, bytes.streamInput(), bytes.length(), false); + write(obj, blobContainer, name, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS, XContentType.SMILE, codec, VERSION); } /** @@ -251,7 +218,7 @@ private void writeAsyncWithPriority( final ToXContent.Params params ) throws IOException { if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { - write(obj, blobContainer, name, compressor, params); + write(obj, blobContainer, name, compressor, params, XContentType.SMILE, codec, VERSION); listener.onResponse(null); return; } @@ -290,35 +257,6 @@ private void writeAsyncWithPriority( public BytesReference serialize(final T obj, final String blobName, final Compressor compressor, final ToXContent.Params params) throws IOException { - try (BytesStreamOutput outputStream = new BytesStreamOutput()) { - try ( - OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput( - "ChecksumBlobStoreFormat.writeBlob(blob=\"" + blobName + "\")", - blobName, - outputStream, - BUFFER_SIZE - ) - ) { - CodecUtil.writeHeader(indexOutput, codec, VERSION); - try (OutputStream indexOutputOutputStream = new IndexOutputOutputStream(indexOutput) { - @Override - public void close() throws IOException { - // this is important since some of the XContentBuilders write bytes on close. - // in order to write the footer we need to prevent closing the actual index input. - } - }; - XContentBuilder builder = MediaTypeRegistry.contentBuilder( - XContentType.SMILE, - compressor.threadLocalOutputStream(indexOutputOutputStream) - ) - ) { - builder.startObject(); - obj.toXContent(builder, params); - builder.endObject(); - } - CodecUtil.writeFooter(indexOutput); - } - return outputStream.bytes(); - } + return serialize(obj, blobName, compressor, params, XContentType.SMILE, codec, VERSION); } } diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ConfigBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ConfigBlobStoreFormat.java new file mode 100644 index 0000000000000..18c718ca2110e --- /dev/null +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ConfigBlobStoreFormat.java @@ -0,0 +1,73 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories.blobstore; + +import org.apache.lucene.store.IndexInput; +import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; +import org.opensearch.common.blobstore.transfer.stream.OffsetRangeIndexInputStream; +import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.compress.NoneCompressor; +import org.opensearch.core.xcontent.ToXContent; + +import java.io.IOException; + +/** + * Format for writing short configurations to remote. Read interface does not exist as it not yet required. This format + * should be used for writing data from in-memory to remote store where there is no need for checksum and the client + * library for the remote store has inbuilt checksum capabilities while upload and download both. This format would + * serialise the data in Json format and store it on remote store as is. This does not support compression yet (this + * can be changed as required). In comparison to {@link ChecksumBlobStoreFormat}, this format does not add any additional + * metadata (like header and footer) to the content. Hence, this format does not depend on {@code CodecUtil} from + * Lucene library. + * + * @opensearch.internal + */ +public class ConfigBlobStoreFormat extends BaseBlobStoreFormat { + + /** + * @param blobNameFormat format of the blobname in {@link String#format} format + */ + public ConfigBlobStoreFormat(String blobNameFormat) { + super(blobNameFormat, true); + } + + public void writeAsyncWithUrgentPriority(T obj, BlobContainer blobContainer, String name, ActionListener listener) + throws IOException { + if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { + write(obj, blobContainer, name, new NoneCompressor(), ToXContent.EMPTY_PARAMS, XContentType.JSON, null, null); + listener.onResponse(null); + return; + } + String blobName = blobName(name); + BytesReference bytes = serialize(obj, blobName, new NoneCompressor(), ToXContent.EMPTY_PARAMS, XContentType.JSON, null, null); + String resourceDescription = "BlobStoreFormat.writeAsyncWithPriority(blob=\"" + blobName + "\")"; + try (IndexInput input = new ByteArrayIndexInput(resourceDescription, BytesReference.toBytes(bytes))) { + try ( + RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( + blobName, + blobName, + bytes.length(), + true, + WritePriority.URGENT, + (size, position) -> new OffsetRangeIndexInputStream(input, size, position), + null, + false + ) + ) { + ((AsyncMultiStreamBlobContainer) blobContainer).asyncBlobUpload(remoteTransferContainer.createWriteContext(), listener); + } + } + } +} diff --git a/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java b/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java index 074f659850c7b..44f6a0e11251c 100644 --- a/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java +++ b/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java @@ -67,7 +67,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new OffsetRangeFileInputStream(testFile, size, position); } }, - 0, + 0L, false ) ) { @@ -89,7 +89,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new OffsetRangeFileInputStream(testFile, size, position); } }, - 0, + 0L, false ) ) { @@ -155,7 +155,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new OffsetRangeFileInputStream(testFile, size, position); } }, - 0, + 0L, false ) ) { @@ -223,7 +223,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new OffsetRangeFileInputStream(testFile, size, position); } }, - 0, + 0L, isRemoteDataIntegritySupported ) ) { @@ -286,7 +286,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new RateLimitingOffsetRangeInputStream(offsetRangeIndexInputStream, rateLimiterSupplier, null); } }, - 0, + 0L, true ) ) { @@ -347,7 +347,7 @@ public OffsetRangeInputStream get(long size, long position) throws IOException { return new RateLimitingOffsetRangeInputStream(offsetRangeIndexInputStream, rateLimiterSupplier, null); } }, - 0, + 0L, true ) ) { diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index 74bae7b5eb7cf..3ba98c44f8d3e 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -71,6 +71,7 @@ import org.opensearch.gateway.remote.RemotePersistenceStats; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; +import org.opensearch.index.remote.RemoteIndexPathUploader; import org.opensearch.node.Node; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.fs.FsRepository; @@ -473,20 +474,23 @@ public void testDataOnlyNodePersistence() throws Exception { ); Supplier remoteClusterStateServiceSupplier = () -> { if (isRemoteStoreClusterStateEnabled(settings)) { + Supplier repositoriesServiceSupplier = () -> new RepositoriesService( + settings, + clusterService, + transportService, + Collections.emptyMap(), + Collections.emptyMap(), + transportService.getThreadPool() + ); + ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); return new RemoteClusterStateService( nodeEnvironment.nodeId(), - () -> new RepositoriesService( - settings, - clusterService, - transportService, - Collections.emptyMap(), - Collections.emptyMap(), - transportService.getThreadPool() - ), + repositoriesServiceSupplier, settings, - new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + clusterSettings, () -> 0L, - threadPool + threadPool, + List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) ); } else { return null; diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 65477051cdb30..9f321cd62847c 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -38,6 +38,7 @@ import org.opensearch.core.index.Index; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; +import org.opensearch.index.remote.RemoteIndexPathUploader; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.indices.IndicesModule; import org.opensearch.repositories.FilterRepository; @@ -154,7 +155,8 @@ public void setup() { settings, clusterSettings, () -> 0L, - threadPool + threadPool, + List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) ); } @@ -173,15 +175,17 @@ public void testFailWriteFullMetadataNonClusterManagerNode() throws IOException public void testFailInitializationWhenRemoteStateDisabled() { final Settings settings = Settings.builder().build(); + ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); assertThrows( AssertionError.class, () -> new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, settings, - new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + clusterSettings, () -> 0L, - threadPool + threadPool, + List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)) ) ); } diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathTests.java new file mode 100644 index 0000000000000..8ddbd383756e7 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathTests.java @@ -0,0 +1,140 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.mockito.Mockito; + +import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.TRANSLOG; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.LOCK_FILES; + +public class RemoteIndexPathTests extends OpenSearchTestCase { + + /** + * This checks that the remote path contains paths only for segment and data/metadata/lock_files combination. + */ + public void testToXContentWithSegmentRepo() throws IOException { + RemoteIndexPath indexPath = new RemoteIndexPath( + "djjsid73he8yd7usduh", + 2, + new BlobPath().add("djsd878ndjh").add("hcs87cj8"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + RemoteIndexPath.SEGMENT_PATH + ); + XContentBuilder xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); + xContentBuilder.startObject(); + xContentBuilder = indexPath.toXContent(xContentBuilder, ToXContent.EMPTY_PARAMS); + xContentBuilder.endObject(); + String expected = + "{\"version\":\"1\",\"index_uuid\":\"djjsid73he8yd7usduh\",\"shard_count\":2,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"segments\":[\"data\",\"metadata\",\"lock_files\"]},\"paths\":[\"9BmBinD5HYs/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/data/\",\"ExCNOD8_5ew/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/data/\",\"z8wtf0yr2l4/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/metadata/\",\"VheHVwFlExE/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/metadata/\",\"IgFKbsDeUpQ/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/lock_files/\",\"pA3gy_GZtns/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/lock_files/\"]}"; + assertEquals(expected, xContentBuilder.toString()); + } + + /** + * This checks that the remote path contains paths only for translog and data/metadata combination. + */ + public void testToXContentForTranslogRepoOnly() throws IOException { + RemoteIndexPath indexPath = new RemoteIndexPath( + "djjsid73he8yd7usduh", + 2, + new BlobPath().add("djsd878ndjh").add("hcs87cj8"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + RemoteIndexPath.TRANSLOG_PATH + ); + XContentBuilder xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); + xContentBuilder.startObject(); + xContentBuilder = indexPath.toXContent(xContentBuilder, ToXContent.EMPTY_PARAMS); + xContentBuilder.endObject(); + String expected = + "{\"version\":\"1\",\"index_uuid\":\"djjsid73he8yd7usduh\",\"shard_count\":2,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"translog\":[\"data\",\"metadata\"]},\"paths\":[\"2EaVODaKBck/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/translog/data/\",\"dTS2VqEOUNo/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/translog/data/\",\"PVNKNGonmZw/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/translog/metadata/\",\"NXmt0Y6NjA8/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/translog/metadata/\"]}"; + assertEquals(expected, xContentBuilder.toString()); + } + + /** + * This checks that the remote path contains paths only for translog and data/metadata combination. + */ + public void testToXContentForBothRepos() throws IOException { + Map> pathCreationMap = new TreeMap<>(); + pathCreationMap.putAll(RemoteIndexPath.TRANSLOG_PATH); + pathCreationMap.putAll(RemoteIndexPath.SEGMENT_PATH); + RemoteIndexPath indexPath = new RemoteIndexPath( + "csbdqiu8a7sdnjdks", + 3, + new BlobPath().add("nxf9yv0").add("c3ejoi"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + pathCreationMap + ); + XContentBuilder xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); + xContentBuilder.startObject(); + xContentBuilder = indexPath.toXContent(xContentBuilder, ToXContent.EMPTY_PARAMS); + xContentBuilder.endObject(); + String expected = + "{\"version\":\"1\",\"index_uuid\":\"csbdqiu8a7sdnjdks\",\"shard_count\":3,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"translog\":[\"data\",\"metadata\"],\"segments\":[\"data\",\"metadata\",\"lock_files\"]},\"paths\":[\"Cjo0F6kNjYk/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/data/\",\"kpayyhxct1I/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/data/\",\"p2RlgnHeIgc/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/data/\",\"gkPIurBtB1w/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/metadata/\",\"Y4YhlbxAB1c/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/metadata/\",\"HYc8fyVPouI/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/metadata/\",\"igzyZCz1ysI/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/lock_files/\",\"uEluEiYmptk/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/lock_files/\",\"TfAD8f06_7A/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/lock_files/\",\"QqKEpasbEGs/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/translog/data/\",\"sNyoimoe1Bw/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/translog/data/\",\"d4YQtONfq50/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/translog/data/\",\"zLr4UXjK8T4/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/translog/metadata/\",\"_s8i7ZmlXGE/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/translog/metadata/\",\"tvtD3-k5ISg/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/translog/metadata/\"]}"; + assertEquals(expected, xContentBuilder.toString()); + } + + public void testRemoteIndexPathWithInvalidPathCreationMap() throws IOException { + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> new RemoteIndexPath( + "djjsid73he8yd7usduh", + 2, + new BlobPath().add("djsd878ndjh").add("hcs87cj8"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + new HashMap<>() + ) + ); + assertEquals( + "Invalid input in RemoteIndexPath constructor indexUUID=djjsid73he8yd7usduh shardCount=2 " + + "basePath=[djsd878ndjh][hcs87cj8] pathType=HASHED_PREFIX pathHashAlgorithm=FNV_1A_BASE64 pathCreationMap={}", + ex.getMessage() + ); + } + + public void testFromXContent() { + UnsupportedOperationException ex = assertThrows( + UnsupportedOperationException.class, + () -> RemoteIndexPath.fromXContent(Mockito.mock(XContentParser.class)) + ); + assertEquals("RemoteIndexPath.fromXContent() is not supported", ex.getMessage()); + } + + public void testInvalidPathCreationMap() { + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> new RemoteIndexPath( + "djjsid73he8yd7usduh", + 2, + new BlobPath().add("djsd878ndjh").add("hcs87cj8"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + Map.of(TRANSLOG, List.of(LOCK_FILES)) + ) + ); + assertEquals("pathCreationMap={TRANSLOG=[LOCK_FILES]} is having illegal combination of category and type", ex.getMessage()); + } +} diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathUploaderTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathUploaderTests.java new file mode 100644 index 0000000000000..2e4dd15ccb581 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathUploaderTests.java @@ -0,0 +1,314 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.opensearch.Version; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.SetOnce; +import org.opensearch.common.UUIDs; +import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.compress.DeflateCompressor; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.RemoteClusterStateService.RemoteStateTransferException; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.node.Node; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +import org.mockito.Mockito; + +import static org.opensearch.index.remote.RemoteStoreEnums.PathType.FIXED; +import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_INFIX; +import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_PREFIX; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class RemoteIndexPathUploaderTests extends OpenSearchTestCase { + + private static final String CLUSTER_STATE_REPO_KEY = Node.NODE_ATTRIBUTES.getKey() + + RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; + + private static final String TRANSLOG_REPO_NAME = "translog-repo"; + private static final String SEGMENT_REPO_NAME = "segment-repo"; + + private final ThreadPool threadPool = new TestThreadPool(getTestName()); + private Settings settings; + private ClusterSettings clusterSettings; + private RepositoriesService repositoriesService; + private BlobStoreRepository repository; + private BlobStore blobStore; + private BlobContainer blobContainer; + private BlobPath basePath; + private List indexMetadataList; + private final AtomicLong successCount = new AtomicLong(); + private final AtomicLong failureCount = new AtomicLong(); + + @Before + public void setup() { + settings = Settings.builder() + .put(RemoteIndexPathUploader.TRANSLOG_REPO_NAME_KEY, TRANSLOG_REPO_NAME) + .put(RemoteIndexPathUploader.SEGMENT_REPO_NAME_KEY, TRANSLOG_REPO_NAME) + .put(CLUSTER_STATE_REPO_KEY, TRANSLOG_REPO_NAME) + .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .build(); + clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + basePath = BlobPath.cleanPath().add("test"); + repositoriesService = mock(RepositoriesService.class); + repository = mock(BlobStoreRepository.class); + when(repositoriesService.repository(anyString())).thenReturn(repository); + blobStore = mock(BlobStore.class); + when(repository.blobStore()).thenReturn(blobStore); + when(repositoriesService.repository(TRANSLOG_REPO_NAME)).thenReturn(repository); + when(repository.basePath()).thenReturn(basePath); + when(repository.getCompressor()).thenReturn(new DeflateCompressor()); + blobContainer = mock(BlobContainer.class); + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); + + Map remoteCustomData = Map.of( + PathType.NAME, + HASHED_PREFIX.name(), + RemoteStoreEnums.PathHashAlgorithm.NAME, + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64.name() + ); + Settings idxSettings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()) + .build(); + IndexMetadata indexMetadata = new IndexMetadata.Builder("test").settings(idxSettings) + .numberOfShards(1) + .numberOfReplicas(0) + .putCustom(IndexMetadata.REMOTE_STORE_CUSTOM_KEY, remoteCustomData) + .build(); + indexMetadataList = List.of(indexMetadata); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + terminate(threadPool); + } + + public void testInterceptWithNoRemoteDataAttributes() { + Settings settings = Settings.Builder.EMPTY_SETTINGS; + clusterSettings.applySettings(settings); + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + List indexMetadataList = Mockito.mock(List.class); + ActionListener actionListener = ActionListener.wrap( + res -> successCount.incrementAndGet(), + ex -> failureCount.incrementAndGet() + ); + remoteIndexPathUploader.doOnNewIndexUpload(indexMetadataList, actionListener); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + verify(indexMetadataList, times(0)).stream(); + } + + public void testInterceptWithEmptyIndexMetadataList() { + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + ActionListener actionListener = ActionListener.wrap( + res -> successCount.incrementAndGet(), + ex -> failureCount.incrementAndGet() + ); + remoteIndexPathUploader.doOnNewIndexUpload(Collections.emptyList(), actionListener); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + } + + public void testInterceptWithEmptyEligibleIndexMetadataList() { + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + ActionListener actionListener = ActionListener.wrap( + res -> successCount.incrementAndGet(), + ex -> failureCount.incrementAndGet() + ); + + // Case 1 - Null remoteCustomData + List indexMetadataList = new ArrayList<>(); + IndexMetadata indexMetadata = mock(IndexMetadata.class); + indexMetadataList.add(indexMetadata); + remoteIndexPathUploader.doOnNewIndexUpload(indexMetadataList, actionListener); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + + // Case 2 - Empty remoteCustomData + when(indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY)).thenReturn(new HashMap<>()); + remoteIndexPathUploader.doOnNewIndexUpload(indexMetadataList, actionListener); + assertEquals(2, successCount.get()); + assertEquals(0, failureCount.get()); + + // Case 3 - RemoteStoreEnums.PathType.NAME not in remoteCustomData map + Map remoteCustomData = Map.of("test", "test"); + when(indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY)).thenReturn(remoteCustomData); + remoteIndexPathUploader.doOnNewIndexUpload(indexMetadataList, actionListener); + assertEquals(3, successCount.get()); + assertEquals(0, failureCount.get()); + + // Case 4 - RemoteStoreEnums.PathType.NAME is not HASHED_PREFIX + remoteCustomData = Map.of(PathType.NAME, randomFrom(FIXED, HASHED_INFIX).name()); + when(indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY)).thenReturn(remoteCustomData); + remoteIndexPathUploader.doOnNewIndexUpload(indexMetadataList, actionListener); + assertEquals(4, successCount.get()); + assertEquals(0, failureCount.get()); + } + + public void testInterceptWithSameRepo() throws IOException { + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + ActionListener actionListener = ActionListener.wrap( + res -> successCount.incrementAndGet(), + ex -> failureCount.incrementAndGet() + ); + remoteIndexPathUploader.doOnNewIndexUpload(indexMetadataList, actionListener); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + verify(blobContainer, times(1)).writeBlob(anyString(), any(InputStream.class), anyLong(), anyBoolean()); + } + + public void testInterceptWithDifferentRepo() throws IOException { + Settings settings = Settings.builder() + .put(this.settings) + .put(RemoteIndexPathUploader.SEGMENT_REPO_NAME_KEY, SEGMENT_REPO_NAME) + .build(); + when(repositoriesService.repository(SEGMENT_REPO_NAME)).thenReturn(repository); + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + ActionListener actionListener = ActionListener.wrap( + res -> successCount.incrementAndGet(), + ex -> failureCount.incrementAndGet() + ); + remoteIndexPathUploader.doOnNewIndexUpload(indexMetadataList, actionListener); + assertEquals(1, successCount.get()); + assertEquals(0, failureCount.get()); + verify(blobContainer, times(2)).writeBlob(anyString(), any(InputStream.class), anyLong(), anyBoolean()); + } + + public void testInterceptWithLatchAwaitTimeout() throws IOException { + blobContainer = mock(AsyncMultiStreamBlobContainer.class); + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + + Settings settings = Settings.builder() + .put(this.settings) + .put(RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING.getKey(), TimeValue.ZERO) + .build(); + clusterSettings.applySettings(settings); + SetOnce exceptionSetOnce = new SetOnce<>(); + ActionListener actionListener = ActionListener.wrap(res -> successCount.incrementAndGet(), ex -> { + failureCount.incrementAndGet(); + exceptionSetOnce.set(ex); + }); + remoteIndexPathUploader.doOnNewIndexUpload(indexMetadataList, actionListener); + assertEquals(0, successCount.get()); + assertEquals(1, failureCount.get()); + assertTrue(exceptionSetOnce.get() instanceof RemoteStateTransferException); + assertTrue( + exceptionSetOnce.get().getMessage().contains("Timed out waiting while uploading remote index path file for indexes=[test/") + ); + verify(blobContainer, times(0)).writeBlob(anyString(), any(InputStream.class), anyLong(), anyBoolean()); + } + + public void testInterceptWithInterruptedExceptionDuringLatchAwait() throws Exception { + AsyncMultiStreamBlobContainer asyncMultiStreamBlobContainer = mock(AsyncMultiStreamBlobContainer.class); + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(asyncMultiStreamBlobContainer); + RemoteIndexPathUploader remoteIndexPathUploader = new RemoteIndexPathUploader( + threadPool, + settings, + () -> repositoriesService, + clusterSettings + ); + remoteIndexPathUploader.start(); + Settings settings = Settings.builder() + .put(this.settings) + .put(RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING.getKey(), TimeValue.timeValueSeconds(1)) + .build(); + clusterSettings.applySettings(settings); + SetOnce exceptionSetOnce = new SetOnce<>(); + ActionListener actionListener = ActionListener.wrap(res -> successCount.incrementAndGet(), ex -> { + failureCount.incrementAndGet(); + exceptionSetOnce.set(ex); + }); + Thread thread = new Thread(() -> { + try { + remoteIndexPathUploader.onNewIndexUpload(indexMetadataList, actionListener); + } catch (Exception e) { + assertTrue(e instanceof InterruptedException); + assertEquals("sleep interrupted", e.getMessage()); + } + }); + thread.start(); + Thread.sleep(10); + thread.interrupt(); + + assertBusy(() -> { + assertEquals(0, successCount.get()); + assertEquals(1, failureCount.get()); + }); + } + +} From 7c6127a1c67021a63b4356fe78e4223f5662f9f2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Apr 2024 13:52:23 -0700 Subject: [PATCH 18/32] Bump jakarta.enterprise:jakarta.enterprise.cdi-api from 4.0.1 to 4.1.0 in /qa/wildfly (#13328) * Bump jakarta.enterprise:jakarta.enterprise.cdi-api in /qa/wildfly Bumps [jakarta.enterprise:jakarta.enterprise.cdi-api](https://github.com/cdi-spec/cdi) from 4.0.1 to 4.1.0. - [Release notes](https://github.com/cdi-spec/cdi/releases) - [Commits](https://github.com/cdi-spec/cdi/compare/4.0.1...4.1.0) --- updated-dependencies: - dependency-name: jakarta.enterprise:jakarta.enterprise.cdi-api dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + qa/wildfly/build.gradle | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22c3e0d787f58..5fbb705059b07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `com.google.apis:google-api-services-compute` from v1-rev235-1.25.0 to v1-rev20240407-2.0.0 ([#13333](https://github.com/opensearch-project/OpenSearch/pull/13333)) - Bump `commons-cli:commons-cli` from 1.6.0 to 1.7.0 ([#13331](https://github.com/opensearch-project/OpenSearch/pull/13331)) - Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.11 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329)) +- Bump `jakarta.enterprise:jakarta.enterprise.cdi-api` from 4.0.1 to 4.1.0 ([#13328](https://github.com/opensearch-project/OpenSearch/pull/13328)) ### Changed - [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) diff --git a/qa/wildfly/build.gradle b/qa/wildfly/build.gradle index 5d37be47e782e..abf033fff378a 100644 --- a/qa/wildfly/build.gradle +++ b/qa/wildfly/build.gradle @@ -40,7 +40,7 @@ apply plugin: 'opensearch.internal-distribution-download' testFixtures.useFixture() dependencies { - providedCompile('jakarta.enterprise:jakarta.enterprise.cdi-api:4.0.1') { + providedCompile('jakarta.enterprise:jakarta.enterprise.cdi-api:4.1.0') { exclude module: 'jakarta.annotation-api' } providedCompile 'jakarta.ws.rs:jakarta.ws.rs-api:3.1.0' From 9e62ccf545d253516737b43edb30d79dcb9aed03 Mon Sep 17 00:00:00 2001 From: Naomichi Yamakita Date: Wed, 24 Apr 2024 07:42:12 +0900 Subject: [PATCH 19/32] Fix flat_object parsing error when an object field name is too long (#13259) --------- Signed-off-by: naomichi-y --- CHANGELOG.md | 1 + .../xcontent/JsonToStringXContentParser.java | 9 ++-- .../mapper/FlatObjectFieldDataTests.java | 43 +++++++++++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fbb705059b07..bb128bf5cfd77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix snapshot _status API to return correct status for partial snapshots ([#12812](https://github.com/opensearch-project/OpenSearch/pull/12812)) - Improve the error messages for _stats with closed indices ([#13012](https://github.com/opensearch-project/OpenSearch/pull/13012)) - Ignore BaseRestHandler unconsumed content check as it's always consumed. ([#13290](https://github.com/opensearch-project/OpenSearch/pull/13290)) +- Fix mapper_parsing_exception when using flat_object fields with names longer than 11 characters ([#13259](https://github.com/opensearch-project/OpenSearch/pull/13259)) ### Security diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 9b2bd06a88e2e..998122d9e5c43 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -106,8 +106,9 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx // skip } else if (this.parser.currentToken() == Token.START_OBJECT) { parseToken(path, currentFieldName); - int dotIndex = path.lastIndexOf(DOT_SYMBOL); - if (dotIndex != -1) { + int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length()); + + if (dotIndex != -1 && path.length() > currentFieldName.length()) { path.setLength(path.length() - currentFieldName.length() - 1); } } else { @@ -117,8 +118,8 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx parseValue(parsedFields); this.valueList.add(parsedFields.toString()); this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields); - int dotIndex = path.lastIndexOf(DOT_SYMBOL); - if (dotIndex != -1) { + int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length()); + if (dotIndex != -1 && path.length() > currentFieldName.length()) { path.setLength(path.length() - currentFieldName.length() - 1); } } diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldDataTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldDataTests.java index e318ca5e953a3..03ce1eb7252de 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldDataTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldDataTests.java @@ -54,6 +54,49 @@ public void testDocValue() throws Exception { assertEquals(1, valueReaders.size()); } + public void testLongFieldNameWithHashArray() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("test") + .startObject("properties") + .startObject("field") + .field("type", FIELD_TYPE) + .endObject() + .endObject() + .endObject() + .endObject() + .toString(); + final DocumentMapper mapper = mapperService.documentMapperParser().parse("test", new CompressedXContent(mapping)); + + XContentBuilder json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .startObject("detail") + .startArray("fooooooooooo") + .startObject() + .field("name", "baz") + .endObject() + .startObject() + .field("name", "baz") + .endObject() + .endArray() + .endObject() + .endObject() + .endObject(); + + ParsedDocument d = mapper.parse(new SourceToParse("test", "1", BytesReference.bytes(json), MediaTypeRegistry.JSON)); + writer.addDocument(d.rootDoc()); + writer.commit(); + + IndexFieldData fieldData = getForField("field"); + List readers = refreshReader(); + assertEquals(1, readers.size()); + + IndexFieldData valueFieldData = getForField("field._value"); + List valueReaders = refreshReader(); + assertEquals(1, valueReaders.size()); + } + @Override protected String getFieldDataType() { return FIELD_TYPE; From 76e4c86af61de81b5d54d0440e7210913a8c2d8f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:25:36 -0700 Subject: [PATCH 20/32] Bump com.google.api.grpc:proto-google-iam-v1 from 0.12.0 to 1.33.0 in /plugins/repository-gcs (#13332) * Bump com.google.api.grpc:proto-google-iam-v1 in /plugins/repository-gcs Bumps [com.google.api.grpc:proto-google-iam-v1](https://github.com/googleapis/sdk-platform-java) from 0.12.0 to 1.33.0. - [Release notes](https://github.com/googleapis/sdk-platform-java/releases) - [Changelog](https://github.com/googleapis/sdk-platform-java/blob/main/CHANGELOG.md) - [Commits](https://github.com/googleapis/sdk-platform-java/commits) --- updated-dependencies: - dependency-name: com.google.api.grpc:proto-google-iam-v1 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Updating SHAs Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + plugins/repository-gcs/build.gradle | 2 +- .../repository-gcs/licenses/proto-google-iam-v1-0.12.0.jar.sha1 | 1 - .../repository-gcs/licenses/proto-google-iam-v1-1.33.0.jar.sha1 | 1 + 4 files changed, 3 insertions(+), 2 deletions(-) delete mode 100644 plugins/repository-gcs/licenses/proto-google-iam-v1-0.12.0.jar.sha1 create mode 100644 plugins/repository-gcs/licenses/proto-google-iam-v1-1.33.0.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index bb128bf5cfd77..db73c0e8caf21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `commons-cli:commons-cli` from 1.6.0 to 1.7.0 ([#13331](https://github.com/opensearch-project/OpenSearch/pull/13331)) - Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.11 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329)) - Bump `jakarta.enterprise:jakarta.enterprise.cdi-api` from 4.0.1 to 4.1.0 ([#13328](https://github.com/opensearch-project/OpenSearch/pull/13328)) +- Bump `com.google.api.grpc:proto-google-iam-v1` from 0.12.0 to 1.33.0 ([#13332](https://github.com/opensearch-project/OpenSearch/pull/13332)) ### Changed - [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) diff --git a/plugins/repository-gcs/build.gradle b/plugins/repository-gcs/build.gradle index c4b1ab8d6875e..110df89f25de8 100644 --- a/plugins/repository-gcs/build.gradle +++ b/plugins/repository-gcs/build.gradle @@ -61,7 +61,7 @@ dependencies { api 'com.google.api-client:google-api-client:2.2.0' api 'com.google.api.grpc:proto-google-common-protos:2.37.1' - api 'com.google.api.grpc:proto-google-iam-v1:0.12.0' + api 'com.google.api.grpc:proto-google-iam-v1:1.33.0' api "com.google.auth:google-auth-library-credentials:${versions.google_auth}" api "com.google.auth:google-auth-library-oauth2-http:${versions.google_auth}" diff --git a/plugins/repository-gcs/licenses/proto-google-iam-v1-0.12.0.jar.sha1 b/plugins/repository-gcs/licenses/proto-google-iam-v1-0.12.0.jar.sha1 deleted file mode 100644 index 2bfae3456d499..0000000000000 --- a/plugins/repository-gcs/licenses/proto-google-iam-v1-0.12.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ea312c0250a5d0a7cdd1b20bc2c3259938b79855 \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/proto-google-iam-v1-1.33.0.jar.sha1 b/plugins/repository-gcs/licenses/proto-google-iam-v1-1.33.0.jar.sha1 new file mode 100644 index 0000000000000..ba04056c54697 --- /dev/null +++ b/plugins/repository-gcs/licenses/proto-google-iam-v1-1.33.0.jar.sha1 @@ -0,0 +1 @@ +4766da92d1f36c8b612c1c142d5f3ace3774f098 \ No newline at end of file From db361ecead1d3c945371c84f4e0ea915c7abcc57 Mon Sep 17 00:00:00 2001 From: Kiran Prakash Date: Wed, 24 Apr 2024 14:50:39 -0700 Subject: [PATCH 21/32] [Tiered Caching] Bug fix for IndicesRequestCache StaleKey management (#13070) * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * Update CHANGELOG.md Signed-off-by: Kiran Prakash * revert Signed-off-by: Kiran Prakash * revert Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * code comments only Signed-off-by: Kiran Prakash * docs changes Signed-off-by: Kiran Prakash * Update CHANGELOG.md Signed-off-by: Kiran Prakash * revert catching AlreadyClosedException Signed-off-by: Kiran Prakash * assert Signed-off-by: Kiran Prakash * conflicts Signed-off-by: Kiran Prakash * Update IndicesRequestCacheTests.java Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * address comments Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * address conflicts Signed-off-by: Kiran Prakash * spotless apply Signed-off-by: Kiran Prakash * address comments Signed-off-by: Kiran Prakash * update code comments Signed-off-by: Kiran Prakash * address bug & add tests Signed-off-by: Kiran Prakash --------- Signed-off-by: Kiran Prakash --- CHANGELOG.md | 1 + .../indices/IndicesRequestCache.java | 92 +- .../indices/IndicesRequestCacheTests.java | 908 ++++++++---------- 3 files changed, 486 insertions(+), 515 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db73c0e8caf21..b80741f24e275 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix from and size parameter can be negative when searching ([#13047](https://github.com/opensearch-project/OpenSearch/pull/13047)) - Enabled mockTelemetryPlugin for IT and fixed OOM issues ([#13054](https://github.com/opensearch-project/OpenSearch/pull/13054)) - Fix implement mark() and markSupported() in class FilterStreamInput ([#13098](https://github.com/opensearch-project/OpenSearch/pull/13098)) +- Fix IndicesRequestCache Stale calculation ([#13070](https://github.com/opensearch-project/OpenSearch/pull/13070)] - Fix snapshot _status API to return correct status for partial snapshots ([#12812](https://github.com/opensearch-project/OpenSearch/pull/12812)) - Improve the error messages for _stats with closed indices ([#13012](https://github.com/opensearch-project/OpenSearch/pull/13012)) - Ignore BaseRestHandler unconsumed content check as it's always consumed. ([#13290](https://github.com/opensearch-project/OpenSearch/pull/13290)) diff --git a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java index eab772cda3213..039e14a031f3f 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java +++ b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java @@ -47,6 +47,7 @@ import org.opensearch.common.cache.LoadAwareCacheLoader; import org.opensearch.common.cache.RemovalListener; import org.opensearch.common.cache.RemovalNotification; +import org.opensearch.common.cache.RemovalReason; import org.opensearch.common.cache.policy.CachedQueryResult; import org.opensearch.common.cache.serializer.BytesReferenceSerializer; import org.opensearch.common.cache.service.CacheService; @@ -216,7 +217,6 @@ void clear(CacheEntity entity) { public void onRemoval(RemovalNotification, BytesReference> notification) { // In case this event happens for an old shard, we can safely ignore this as we don't keep track for old // shards as part of request cache. - // Pass a new removal notification containing Key rather than ICacheKey to the CacheEntity for backwards compatibility. Key key = notification.getKey().key; RemovalNotification newNotification = new RemovalNotification<>( @@ -224,11 +224,9 @@ public void onRemoval(RemovalNotification, BytesReference> notifi notification.getValue(), notification.getRemovalReason() ); - cacheEntityLookup.apply(key.shardId).ifPresent(entity -> entity.onRemoval(newNotification)); - cacheCleanupManager.updateCleanupKeyToCountMapOnCacheEviction( - new CleanupKey(cacheEntityLookup.apply(key.shardId).orElse(null), key.readerCacheKeyId) - ); + CleanupKey cleanupKey = new CleanupKey(cacheEntityLookup.apply(key.shardId).orElse(null), key.readerCacheKeyId); + cacheCleanupManager.updateStaleCountOnEntryRemoval(cleanupKey, newNotification); } private ICacheKey getICacheKey(Key key) { @@ -272,10 +270,11 @@ BytesReference getOrCompute( OpenSearchDirectoryReader.addReaderCloseListener(reader, cleanupKey); } } - cacheCleanupManager.updateCleanupKeyToCountMapOnCacheInsertion(cleanupKey); + cacheCleanupManager.updateStaleCountOnCacheInsert(cleanupKey); } else { cacheEntity.onHit(); } + return value; } @@ -508,7 +507,7 @@ void enqueueCleanupKey(CleanupKey cleanupKey) { * * @param cleanupKey the CleanupKey to be updated in the map */ - private void updateCleanupKeyToCountMapOnCacheInsertion(CleanupKey cleanupKey) { + private void updateStaleCountOnCacheInsert(CleanupKey cleanupKey) { if (stalenessThreshold == 0.0 || cleanupKey.entity == null) { return; } @@ -524,8 +523,29 @@ private void updateCleanupKeyToCountMapOnCacheInsertion(CleanupKey cleanupKey) { cleanupKeyToCountMap.computeIfAbsent(shardId, k -> new HashMap<>()).merge(cleanupKey.readerCacheKeyId, 1, Integer::sum); } - private void updateCleanupKeyToCountMapOnCacheEviction(CleanupKey cleanupKey) { - if (stalenessThreshold == 0.0 || cleanupKey.entity == null) { + /** + * Handles the eviction of a cache entry. + * + *

This method is called when an entry is evicted from the cache. + * We consider all removal notifications except with the reason Replaced + * {@link #incrementStaleKeysCount} would have removed the entries from the map and increment the {@link #staleKeysCount} + * Hence we decrement {@link #staleKeysCount} if we do not find the shardId or readerCacheKeyId in the map. + * Skip decrementing staleKeysCount if we find the shardId or readerCacheKeyId in the map since it would have not been accounted for in the staleKeysCount in + * + * @param cleanupKey the CleanupKey that has been evicted from the cache + * @param notification RemovalNotification of the cache entry evicted + */ + private void updateStaleCountOnEntryRemoval(CleanupKey cleanupKey, RemovalNotification notification) { + if (notification.getRemovalReason() == RemovalReason.REPLACED) { + // The reason of the notification is REPLACED when a cache entry's value is updated, since replacing an entry + // does not affect the staleness count, we skip such notifications. + return; + } + if (cleanupKey.entity == null) { + // entity will only be null when the shard is closed/deleted + // we would have accounted this in staleKeysCount when the closing/deletion of shard would have closed the associated + // readers + staleKeysCount.decrementAndGet(); return; } IndexShard indexShard = (IndexShard) cleanupKey.entity.getCacheIdentity(); @@ -535,15 +555,33 @@ private void updateCleanupKeyToCountMapOnCacheEviction(CleanupKey cleanupKey) { } ShardId shardId = indexShard.shardId(); - cleanupKeyToCountMap.computeIfPresent(shardId, (shard, keyCountMap) -> { - keyCountMap.computeIfPresent(cleanupKey.readerCacheKeyId, (key, currentValue) -> { - // decrement the stale key count + cleanupKeyToCountMap.compute(shardId, (key, readerCacheKeyMap) -> { + if (readerCacheKeyMap == null || !readerCacheKeyMap.containsKey(cleanupKey.readerCacheKeyId)) { + // If ShardId is not present or readerCacheKeyId is not present + // it should have already been accounted for and hence been removed from this map + // so decrement staleKeysCount staleKeysCount.decrementAndGet(); - int newValue = currentValue - 1; - // Remove the key if the new value is zero by returning null; otherwise, update with the new value. - return newValue == 0 ? null : newValue; - }); - return keyCountMap; + // Return the current map + return readerCacheKeyMap; + } else { + // If it is in the map, it is not stale yet. + // Proceed to adjust the count for the readerCacheKeyId in the map + // but do not decrement the staleKeysCount + Integer count = readerCacheKeyMap.get(cleanupKey.readerCacheKeyId); + // this should never be null + assert (count != null && count >= 0); + // Reduce the count by 1 + int newCount = count - 1; + if (newCount > 0) { + // Update the map with the new count + readerCacheKeyMap.put(cleanupKey.readerCacheKeyId, newCount); + } else { + // Remove the readerCacheKeyId entry if new count is zero + readerCacheKeyMap.remove(cleanupKey.readerCacheKeyId); + } + // If after modification, the readerCacheKeyMap is empty, we return null to remove the ShardId entry + return readerCacheKeyMap.isEmpty() ? null : readerCacheKeyMap; + } }); } @@ -551,7 +589,7 @@ private void updateCleanupKeyToCountMapOnCacheEviction(CleanupKey cleanupKey) { * Updates the count of stale keys in the cache. * This method is called when a CleanupKey is added to the keysToClean set. * - * It increments the staleKeysCount by the count of the CleanupKey in the cleanupKeyToCountMap. + *

It increments the staleKeysCount by the count of the CleanupKey in the cleanupKeyToCountMap. * If the CleanupKey's readerCacheKeyId is null or the CleanupKey's entity is not open, it increments the staleKeysCount * by the total count of keys associated with the CleanupKey's ShardId in the cleanupKeyToCountMap and removes the ShardId from the map. * @@ -569,7 +607,7 @@ private void incrementStaleKeysCount(CleanupKey cleanupKey) { ShardId shardId = indexShard.shardId(); // Using computeIfPresent to atomically operate on the countMap for a given shardId - cleanupKeyToCountMap.computeIfPresent(shardId, (key, countMap) -> { + cleanupKeyToCountMap.computeIfPresent(shardId, (currentShardId, countMap) -> { if (cleanupKey.readerCacheKeyId == null) { // Aggregate and add to staleKeysCount atomically if readerCacheKeyId is null int totalSum = countMap.values().stream().mapToInt(Integer::intValue).sum(); @@ -578,18 +616,19 @@ private void incrementStaleKeysCount(CleanupKey cleanupKey) { return null; } else { // Update staleKeysCount based on specific readerCacheKeyId, then remove it from the countMap - countMap.computeIfPresent(cleanupKey.readerCacheKeyId, (k, v) -> { - staleKeysCount.addAndGet(v); + countMap.computeIfPresent(cleanupKey.readerCacheKeyId, (readerCacheKey, count) -> { + staleKeysCount.addAndGet(count); // Return null to remove the key after updating staleKeysCount return null; }); - // Check if countMap is empty after removal to decide if we need to remove the shardId entry if (countMap.isEmpty()) { - return null; // Returning null removes the entry for shardId + // Returning null removes the entry for shardId + return null; } } - return countMap; // Return the modified countMap to keep the mapping + // Return the modified countMap to retain updates + return countMap; }); } @@ -715,6 +754,11 @@ public void close() { this.cacheCleaner.close(); } + // for testing + ConcurrentMap> getCleanupKeyToCountMap() { + return cleanupKeyToCountMap; + } + private final class IndicesRequestCacheCleaner implements Runnable, Releasable { private final IndicesRequestCacheCleanupManager cacheCleanupManager; diff --git a/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java b/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java index e3dca1b7bfda2..051acfe9d085a 100644 --- a/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java +++ b/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java @@ -38,7 +38,6 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; @@ -51,7 +50,6 @@ import org.opensearch.common.cache.RemovalNotification; import org.opensearch.common.cache.RemovalReason; import org.opensearch.common.cache.module.CacheModule; -import org.opensearch.common.cache.service.CacheService; import org.opensearch.common.cache.stats.ImmutableCacheStats; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; @@ -77,46 +75,58 @@ import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchSingleNodeTestCase; import org.opensearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Optional; import java.util.UUID; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; import static org.opensearch.indices.IndicesRequestCache.INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; public class IndicesRequestCacheTests extends OpenSearchSingleNodeTestCase { + private ThreadPool threadPool; + private IndexWriter writer; + private Directory dir; + private IndicesRequestCache cache; + private IndexShard indexShard; + private ThreadPool getThreadPool() { return new ThreadPool(Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "default tracer tests").build()); } - public void testBasicOperationsCache() throws Exception { - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache( - Settings.EMPTY, - (shardId -> Optional.of(new IndicesService.IndexShardCacheEntity(indexShard))), - new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); + @Before + public void setup() throws IOException { + dir = newDirectory(); + writer = new IndexWriter(dir, newIndexWriterConfig()); + indexShard = createIndex("test").getShard(0); + } + @After + public void cleanup() throws IOException { + IOUtils.close(writer, dir, cache); + terminate(threadPool); + } + + public void testBasicOperationsCache() throws Exception { + threadPool = getThreadPool(); + cache = getIndicesRequestCache(Settings.EMPTY); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + DirectoryReader reader = getReader(writer, indexShard.shardId()); // initial cache IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); Loader loader = new Loader(reader, 0); - BytesReference value = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); ShardRequestCache requestCacheStats = indexShard.requestCache(); assertEquals(0, requestCacheStats.stats().getHitCount()); @@ -128,7 +138,7 @@ public void testBasicOperationsCache() throws Exception { // cache hit entity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(reader, 0); - value = cache.getOrCompute(entity, loader, reader, termBytes); + value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); requestCacheStats = indexShard.requestCache(); assertEquals(1, requestCacheStats.stats().getHitCount()); @@ -154,34 +164,21 @@ public void testBasicOperationsCache() throws Exception { assertEquals(0, cache.count()); assertEquals(0, requestCacheStats.stats().getMemorySize().bytesAsInt()); - IOUtils.close(reader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(reader); assertEquals(0, cache.numRegisteredCloseListeners()); } public void testBasicOperationsCacheWithFeatureFlag() throws Exception { - IndexShard indexShard = createIndex("test").getShard(0); - CacheService cacheService = new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache( - Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.PLUGGABLE_CACHE, "true").build(), - (shardId -> Optional.of(new IndicesService.IndexShardCacheEntity(indexShard))), - cacheService, - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.PLUGGABLE_CACHE, "true").build(); + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + DirectoryReader reader = getReader(writer, indexShard.shardId()); // initial cache IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); Loader loader = new Loader(reader, 0); - BytesReference value = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); ShardRequestCache requestCacheStats = indexShard.requestCache(); assertEquals(0, requestCacheStats.stats().getHitCount()); @@ -193,7 +190,7 @@ public void testBasicOperationsCacheWithFeatureFlag() throws Exception { // cache hit entity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(reader, 0); - value = cache.getOrCompute(entity, loader, reader, termBytes); + value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); requestCacheStats = indexShard.requestCache(); assertEquals(1, requestCacheStats.stats().getHitCount()); @@ -219,47 +216,28 @@ public void testBasicOperationsCacheWithFeatureFlag() throws Exception { assertEquals(0, cache.count()); assertEquals(0, requestCacheStats.stats().getMemorySize().bytesAsInt()); - IOUtils.close(reader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(reader); assertEquals(0, cache.numRegisteredCloseListeners()); } public void testCacheDifferentReaders() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache(Settings.EMPTY, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + threadPool = getThreadPool(); + cache = getIndicesRequestCache(Settings.EMPTY); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + if (randomBoolean()) { writer.flush(); IOUtils.close(writer); writer = new IndexWriter(dir, newIndexWriterConfig()); } writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); // initial cache IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); Loader loader = new Loader(reader, 0); - BytesReference value = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value = cache.getOrCompute(entity, loader, reader, getTermBytes()); ShardRequestCache requestCacheStats = entity.stats(); assertEquals("foo", value.streamInput().readString()); assertEquals(0, requestCacheStats.stats().getHitCount()); @@ -274,7 +252,7 @@ public void testCacheDifferentReaders() throws Exception { // cache the second IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(secondReader, 0); - value = cache.getOrCompute(entity, loader, secondReader, termBytes); + value = cache.getOrCompute(entity, loader, secondReader, getTermBytes()); requestCacheStats = entity.stats(); assertEquals("bar", value.streamInput().readString()); assertEquals(0, requestCacheStats.stats().getHitCount()); @@ -287,7 +265,7 @@ public void testCacheDifferentReaders() throws Exception { secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(secondReader, 0); - value = cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + value = cache.getOrCompute(secondEntity, loader, secondReader, getTermBytes()); requestCacheStats = entity.stats(); assertEquals("bar", value.streamInput().readString()); assertEquals(1, requestCacheStats.stats().getHitCount()); @@ -298,7 +276,7 @@ public void testCacheDifferentReaders() throws Exception { entity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(reader, 0); - value = cache.getOrCompute(entity, loader, reader, termBytes); + value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); requestCacheStats = entity.stats(); assertEquals(2, requestCacheStats.stats().getHitCount()); @@ -331,8 +309,7 @@ public void testCacheDifferentReaders() throws Exception { assertEquals(0, cache.count()); assertEquals(0, requestCacheStats.stats().getMemorySize().bytesAsInt()); - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); assertEquals(0, cache.numRegisteredCloseListeners()); } @@ -359,55 +336,20 @@ public void testCacheCleanupThresholdSettingValidator_Invalid_Percentage() { assertThrows(IllegalArgumentException.class, () -> { IndicesRequestCache.validateStalenessSetting("500%"); }); } + // when staleness threshold is zero, stale keys should be cleaned up every time cache cleaner is invoked. public void testCacheCleanupBasedOnZeroThreshold() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); + threadPool = getThreadPool(); Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0%").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), settings).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); - } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals(2, cache.count()); // Close the reader, to be enqueued for cleanup @@ -419,209 +361,148 @@ public void testCacheCleanupBasedOnZeroThreshold() throws Exception { cache.cacheCleanupManager.cleanCache(); // cleanup should remove the stale-key assertEquals(1, cache.count()); - - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); } - public void testCacheCleanupBasedOnStaleThreshold_StalenessEqualToThreshold() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.5").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), settings).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); + // when staleness count is higher than stale threshold, stale keys should be cleaned up. + public void testCacheCleanupBasedOnStaleThreshold_StalenessHigherThanThreshold() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.49").build(); + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); - } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); + assertEquals(2, cache.count()); - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + // no stale keys so far + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // Close the reader, to be enqueued for cleanup + reader.close(); + // 1 out of 2 keys ie 50% are now stale. + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); + // cache count should not be affected + assertEquals(2, cache.count()); - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + // clean cache with 49% staleness threshold + cache.cacheCleanupManager.cleanCache(); + // cleanup should have taken effect with 49% threshold + assertEquals(1, cache.count()); + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + + IOUtils.close(secondReader); + } + + // when staleness count equal to stale threshold, stale keys should be cleaned up. + public void testCacheCleanupBasedOnStaleThreshold_StalenessEqualToThreshold() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.5").build(); + cache = getIndicesRequestCache(settings); + writer.addDocument(newDoc(0, "foo")); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); + + // Get 2 entries into the cache + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); + + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals(2, cache.count()); // Close the reader, to be enqueued for cleanup - // 1 out of 2 keys ie 50% are now stale. reader.close(); + // 1 out of 2 keys ie 50% are now stale. + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); // cache count should not be affected assertEquals(2, cache.count()); // clean cache with 50% staleness threshold cache.cacheCleanupManager.cleanCache(); // cleanup should have taken effect + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); assertEquals(1, cache.count()); - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); } + // when a cache entry that is Stale is evicted for any reason, we have to deduct the count from our staleness count public void testStaleCount_OnRemovalNotificationOfStaleKey_DecrementsStaleCount() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); + threadPool = getThreadPool(); Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.51").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), settings).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); - } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - - // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); + ShardId shardId = indexShard.shardId(); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + // Get 2 entries into the cache from 2 different readers + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals(2, cache.count()); - // Close the reader, to be enqueued for cleanup + // assert no stale keys are accounted so far + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // Close the reader, this should create a stale key reader.close(); - AtomicInteger staleKeysCount = cache.cacheCleanupManager.getStaleKeysCount(); // 1 out of 2 keys ie 50% are now stale. - assertEquals(1, staleKeysCount.get()); + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); // cache count should not be affected assertEquals(2, cache.count()); - OpenSearchDirectoryReader.DelegatingCacheHelper delegatingCacheHelper = - (OpenSearchDirectoryReader.DelegatingCacheHelper) secondReader.getReaderCacheHelper(); - String readerCacheKeyId = delegatingCacheHelper.getDelegatingCacheKey().getId(); - IndicesRequestCache.Key key = new IndicesRequestCache.Key( - ((IndexShard) secondEntity.getCacheIdentity()).shardId(), - termBytes, - readerCacheKeyId - ); + IndicesRequestCache.Key key = new IndicesRequestCache.Key(indexShard.shardId(), getTermBytes(), getReaderCacheKeyId(reader)); + // test the mapping + ConcurrentMap> cleanupKeyToCountMap = cache.cacheCleanupManager.getCleanupKeyToCountMap(); + // shard id should exist + assertTrue(cleanupKeyToCountMap.containsKey(shardId)); + // reader CacheKeyId should NOT exist + assertFalse(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(reader))); + // secondReader CacheKeyId should exist + assertTrue(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(secondReader))); cache.onRemoval( new RemovalNotification, BytesReference>( new ICacheKey<>(key), - termBytes, + getTermBytes(), RemovalReason.EVICTED ) ); - staleKeysCount = cache.cacheCleanupManager.getStaleKeysCount(); + + // test the mapping, it should stay the same + // shard id should exist + assertTrue(cleanupKeyToCountMap.containsKey(shardId)); + // reader CacheKeyId should NOT exist + assertFalse(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(reader))); + // secondReader CacheKeyId should exist + assertTrue(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(secondReader))); // eviction of previous stale key from the cache should decrement staleKeysCount in iRC - assertEquals(0, staleKeysCount.get()); + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); } - public void testStaleCount_OnRemovalNotificationOfStaleKey_DoesNotDecrementsStaleCount() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); + // when a cache entry that is NOT Stale is evicted for any reason, staleness count should NOT be deducted + public void testStaleCount_OnRemovalNotificationOfNonStaleKey_DoesNotDecrementsStaleCount() throws Exception { + threadPool = getThreadPool(); Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.51").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), settings).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); - } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + ShardId shardId = indexShard.shardId(); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals(2, cache.count()); // Close the reader, to be enqueued for cleanup @@ -632,102 +513,251 @@ public void testStaleCount_OnRemovalNotificationOfStaleKey_DoesNotDecrementsStal // cache count should not be affected assertEquals(2, cache.count()); - OpenSearchDirectoryReader.DelegatingCacheHelper delegatingCacheHelper = (OpenSearchDirectoryReader.DelegatingCacheHelper) reader - .getReaderCacheHelper(); - String readerCacheKeyId = delegatingCacheHelper.getDelegatingCacheKey().getId(); - IndicesRequestCache.Key key = new IndicesRequestCache.Key( - ((IndexShard) secondEntity.getCacheIdentity()).shardId(), - termBytes, - readerCacheKeyId - ); + // evict entry from second reader (this reader is not closed) + IndicesRequestCache.Key key = new IndicesRequestCache.Key(indexShard.shardId(), getTermBytes(), getReaderCacheKeyId(secondReader)); + + // test the mapping + ConcurrentMap> cleanupKeyToCountMap = cache.cacheCleanupManager.getCleanupKeyToCountMap(); + // shard id should exist + assertTrue(cleanupKeyToCountMap.containsKey(shardId)); + // reader CacheKeyId should NOT exist + assertFalse(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(reader))); + // secondReader CacheKeyId should exist + assertTrue(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(secondReader))); cache.onRemoval( new RemovalNotification, BytesReference>( new ICacheKey<>(key), - termBytes, + getTermBytes(), RemovalReason.EVICTED ) ); + + // test the mapping, shardId entry should be cleaned up + // shard id should NOT exist + assertFalse(cleanupKeyToCountMap.containsKey(shardId)); + staleKeysCount = cache.cacheCleanupManager.getStaleKeysCount(); // eviction of NON-stale key from the cache should NOT decrement staleKeysCount in iRC assertEquals(1, staleKeysCount.get()); - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); } - public void testCacheCleanupBasedOnStaleThreshold_StalenessGreaterThanThreshold() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.49").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), settings).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) + // when a cache entry that is NOT Stale is evicted WITHOUT its reader closing, we should NOT deduct it from staleness count + public void testStaleCount_WithoutReaderClosing_DecrementsStaleCount() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.51").build(); + cache = getIndicesRequestCache(settings); + + writer.addDocument(newDoc(0, "foo")); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); + + // Get 2 entries into the cache from 2 different readers + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); + + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); + assertEquals(2, cache.count()); + + // no keys are stale + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // create notification for removal of non-stale entry + IndicesRequestCache.Key key = new IndicesRequestCache.Key(indexShard.shardId(), getTermBytes(), getReaderCacheKeyId(reader)); + cache.onRemoval( + new RemovalNotification, BytesReference>( + new ICacheKey<>(key), + getTermBytes(), + RemovalReason.EVICTED + ) ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); + // stale keys count should stay zero + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + + IOUtils.close(reader, secondReader); + } + + // test staleness count based on removal notifications + public void testStaleCount_OnRemovalNotifications() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.51").build(); + cache = getIndicesRequestCache(settings); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + + // Get 5 entries into the cache + int totalKeys = 5; + IndicesService.IndexShardCacheEntity entity = null; + TermQueryBuilder termQuery = null; + BytesReference termBytes = null; + for (int i = 1; i <= totalKeys; i++) { + termQuery = new TermQueryBuilder("id", "" + i); + termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + entity = new IndicesService.IndexShardCacheEntity(indexShard); + Loader loader = new Loader(reader, 0); + cache.getOrCompute(entity, loader, reader, termBytes); + assertEquals(i, cache.count()); } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + // no keys are stale yet + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // closing the reader should make all keys stale + reader.close(); + assertEquals(totalKeys, cache.cacheCleanupManager.getStaleKeysCount().get()); - // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); + String readerCacheKeyId = getReaderCacheKeyId(reader); + IndicesRequestCache.Key key = new IndicesRequestCache.Key( + ((IndexShard) entity.getCacheIdentity()).shardId(), + termBytes, + readerCacheKeyId + ); - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); + int staleCount = cache.cacheCleanupManager.getStaleKeysCount().get(); + // Notification for Replaced should not deduct the staleCount + cache.onRemoval( + new RemovalNotification, BytesReference>( + new ICacheKey<>(key), + getTermBytes(), + RemovalReason.REPLACED + ) + ); + // stale keys count should stay the same + assertEquals(staleCount, cache.cacheCleanupManager.getStaleKeysCount().get()); + + // Notification for all but Replaced should deduct the staleCount + RemovalReason[] reasons = { RemovalReason.INVALIDATED, RemovalReason.EVICTED, RemovalReason.EXPLICIT, RemovalReason.CAPACITY }; + for (RemovalReason reason : reasons) { + cache.onRemoval( + new RemovalNotification, BytesReference>(new ICacheKey<>(key), getTermBytes(), reason) + ); + assertEquals(--staleCount, cache.cacheCleanupManager.getStaleKeysCount().get()); + } + } - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + // when staleness count less than the stale threshold, stale keys should NOT be cleaned up. + public void testCacheCleanupBasedOnStaleThreshold_StalenessLesserThanThreshold() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "51%").build(); + cache = getIndicesRequestCache(settings); - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); + writer.addDocument(newDoc(0, "foo")); + DirectoryReader reader = getReader(writer, indexShard.shardId()); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); + + // Get 2 entries into the cache + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + assertEquals(1, cache.count()); + + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals(2, cache.count()); // Close the reader, to be enqueued for cleanup - // 1 out of 2 keys ie 50% are now stale. reader.close(); + // 1 out of 2 keys ie 50% are now stale. + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); // cache count should not be affected assertEquals(2, cache.count()); - // clean cache with 49% staleness threshold + // clean cache with 51% staleness threshold cache.cacheCleanupManager.cleanCache(); - // cleanup should have taken effect with 49% threshold + // cleanup should have been ignored + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); + assertEquals(2, cache.count()); + + IOUtils.close(secondReader); + } + + // test the cleanupKeyToCountMap are set appropriately when both readers are closed + public void testCleanupKeyToCountMapAreSetAppropriately() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.51").build(); + cache = getIndicesRequestCache(settings); + + writer.addDocument(newDoc(0, "foo")); + ShardId shardId = indexShard.shardId(); + DirectoryReader reader = getReader(writer, shardId); + DirectoryReader secondReader = getReader(writer, shardId); + + // Get 2 entries into the cache from 2 different readers + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); assertEquals(1, cache.count()); + // test the mappings + ConcurrentMap> cleanupKeyToCountMap = cache.cacheCleanupManager.getCleanupKeyToCountMap(); + assertEquals(1, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(reader))); - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); + // test the mapping + assertEquals(2, cache.count()); + assertEquals(1, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(secondReader))); + // create another entry for the second reader + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes("id", "1")); + // test the mapping + assertEquals(3, cache.count()); + assertEquals(2, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(secondReader))); + + // Close the reader, to create stale entries + reader.close(); + // cache count should not be affected + assertEquals(3, cache.count()); + // test the mapping, first reader's entry should be removed from the mapping and accounted for in the staleKeysCount + assertFalse(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(reader))); + assertEquals(1, cache.cacheCleanupManager.getStaleKeysCount().get()); + // second reader's mapping should not be affected + assertEquals(2, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(secondReader))); + // send removal notification for first reader + IndicesRequestCache.Key key = new IndicesRequestCache.Key(indexShard.shardId(), getTermBytes(), getReaderCacheKeyId(reader)); + cache.onRemoval( + new RemovalNotification, BytesReference>( + new ICacheKey<>(key), + getTermBytes(), + RemovalReason.EVICTED + ) + ); + // test the mapping, it should stay the same + assertFalse(cleanupKeyToCountMap.get(shardId).containsKey(getReaderCacheKeyId(reader))); + // staleKeysCount should be decremented + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // second reader's mapping should not be affected + assertEquals(2, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(secondReader))); + + // Without closing the secondReader send removal notification of one of its key + key = new IndicesRequestCache.Key(indexShard.shardId(), getTermBytes(), getReaderCacheKeyId(secondReader)); + cache.onRemoval( + new RemovalNotification, BytesReference>( + new ICacheKey<>(key), + getTermBytes(), + RemovalReason.EVICTED + ) + ); + // staleKeysCount should be the same as before + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // secondReader's readerCacheKeyId count should be decremented by 1 + assertEquals(1, (int) cleanupKeyToCountMap.get(shardId).get(getReaderCacheKeyId(secondReader))); + // Without closing the secondReader send removal notification of its last key + key = new IndicesRequestCache.Key(indexShard.shardId(), getTermBytes(), getReaderCacheKeyId(secondReader)); + cache.onRemoval( + new RemovalNotification, BytesReference>( + new ICacheKey<>(key), + getTermBytes(), + RemovalReason.EVICTED + ) + ); + // staleKeysCount should be the same as before + assertEquals(0, cache.cacheCleanupManager.getStaleKeysCount().get()); + // since all the readers of this shard is closed, the cleanupKeyToCountMap should have no entries + assertEquals(0, cleanupKeyToCountMap.size()); + + IOUtils.close(secondReader); } - public void testCacheCleanupBasedOnStaleThreshold_StalenessLesserThanThreshold() throws Exception { + private DirectoryReader getReader(IndexWriter writer, ShardId shardId) throws IOException { + return OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), shardId); + } + + private IndicesRequestCache getIndicesRequestCache(Settings settings) { IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "51%").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { + return new IndicesRequestCache(settings, (shardId -> { IndexService indexService = null; try { indexService = indicesService.indexServiceSafe(shardId.getIndex()); @@ -740,52 +770,30 @@ public void testCacheCleanupBasedOnStaleThreshold_StalenessLesserThanThreshold() threadPool, ClusterServiceUtils.createClusterService(threadPool) ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - - writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - if (randomBoolean()) { - writer.flush(); - IOUtils.close(writer); - writer = new IndexWriter(dir, newIndexWriterConfig()); - } - writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - - // Get 2 entries into the cache - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); - - entity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(reader, 0); - cache.getOrCompute(entity, loader, reader, termBytes); + } - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(entity, loader, secondReader, termBytes); + private Loader getLoader(DirectoryReader reader) { + return new Loader(reader, 0); + } - secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - loader = new Loader(secondReader, 0); - cache.getOrCompute(secondEntity, loader, secondReader, termBytes); - assertEquals(2, cache.count()); + private IndicesService.IndexShardCacheEntity getEntity(IndexShard indexShard) { + return new IndicesService.IndexShardCacheEntity(indexShard); + } - // Close the reader, to be enqueued for cleanup - // 1 out of 2 keys ie 50% are now stale. - reader.close(); - // cache count should not be affected - assertEquals(2, cache.count()); + private BytesReference getTermBytes() throws IOException { + TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); + return XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + } - // clean cache with 51% staleness threshold - cache.cacheCleanupManager.cleanCache(); - // cleanup should have been ignored - assertEquals(2, cache.count()); + private BytesReference getTermBytes(String fieldName, String value) throws IOException { + TermQueryBuilder termQuery = new TermQueryBuilder(fieldName, value); + return XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + } - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + private String getReaderCacheKeyId(DirectoryReader reader) { + OpenSearchDirectoryReader.DelegatingCacheHelper delegatingCacheHelper = (OpenSearchDirectoryReader.DelegatingCacheHelper) reader + .getReaderCacheHelper(); + return delegatingCacheHelper.getDelegatingCacheKey().getId(); } public void testClosingIndexWipesStats() throws Exception { @@ -795,6 +803,8 @@ public void testClosingIndexWipesStats() throws Exception { Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards).build(); String indexToKeepName = "test"; String indexToCloseName = "test2"; + // delete all indices if already + assertAcked(client().admin().indices().prepareDelete("_all").get()); IndexService indexToKeep = createIndex(indexToKeepName, indexSettings); IndexService indexToClose = createIndex(indexToCloseName, indexSettings); for (int i = 0; i < numShards; i++) { @@ -802,9 +812,9 @@ public void testClosingIndexWipesStats() throws Exception { assertNotNull(indexToKeep.getShard(i)); assertNotNull(indexToClose.getShard(i)); } - ThreadPool threadPool = getThreadPool(); + threadPool = getThreadPool(); Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "0.001%").build(); - IndicesRequestCache cache = new IndicesRequestCache(settings, (shardId -> { + cache = new IndicesRequestCache(settings, (shardId -> { IndexService indexService = null; try { indexService = indicesService.indexServiceSafe(shardId.getIndex()); @@ -821,8 +831,6 @@ public void testClosingIndexWipesStats() throws Exception { threadPool, ClusterServiceUtils.createClusterService(threadPool) ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); writer.addDocument(newDoc(0, "foo")); TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); @@ -895,47 +903,27 @@ public void testClosingIndexWipesStats() throws Exception { for (DirectoryReader reader : readersToKeep) { IOUtils.close(reader); } - IOUtils.close(secondReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(secondReader); } public void testEviction() throws Exception { final ByteSizeValue size; { - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache( - Settings.EMPTY, - (shardId -> Optional.of(new IndicesService.IndexShardCacheEntity(indexShard))), - new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + threadPool = getThreadPool(); + cache = getIndicesRequestCache(Settings.EMPTY); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - + DirectoryReader reader = getReader(writer, indexShard.shardId()); writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader secondLoader = new Loader(secondReader, 0); - BytesReference value1 = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value1 = cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); assertEquals("foo", value1.streamInput().readString()); - BytesReference value2 = cache.getOrCompute(secondEntity, secondLoader, secondReader, termBytes); + BytesReference value2 = cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals("bar", value2.streamInput().readString()); size = new ByteSizeValue(cache.getSizeInBytes()); IOUtils.close(reader, secondReader, writer, dir, cache); - terminate(threadPool); } - IndexShard indexShard = createIndex("test1").getShard(0); - ThreadPool threadPool = getThreadPool(); + indexShard = createIndex("test1").getShard(0); IndicesRequestCache cache = new IndicesRequestCache( // Add 5 instead of 1; the key size now depends on the length of dimension names and values so there's more variation Settings.builder().put(IndicesRequestCache.INDICES_CACHE_QUERY_SIZE.getKey(), size.getBytes() + 5 + "b").build(), @@ -944,83 +932,52 @@ public void testEviction() throws Exception { threadPool, ClusterServiceUtils.createClusterService(threadPool) ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + dir = newDirectory(); + writer = new IndexWriter(dir, newIndexWriterConfig()); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader loader = new Loader(reader, 0); - + DirectoryReader reader = getReader(writer, indexShard.shardId()); writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader secondLoader = new Loader(secondReader, 0); - + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); writer.updateDocument(new Term("id", "0"), newDoc(0, "baz")); DirectoryReader thirdReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - IndicesService.IndexShardCacheEntity thirddEntity = new IndicesService.IndexShardCacheEntity(indexShard); - Loader thirdLoader = new Loader(thirdReader, 0); - BytesReference value1 = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value1 = cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); assertEquals("foo", value1.streamInput().readString()); - BytesReference value2 = cache.getOrCompute(secondEntity, secondLoader, secondReader, termBytes); + BytesReference value2 = cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); assertEquals("bar", value2.streamInput().readString()); logger.info("Memory size: {}", indexShard.requestCache().stats().getMemorySize()); - BytesReference value3 = cache.getOrCompute(thirddEntity, thirdLoader, thirdReader, termBytes); + BytesReference value3 = cache.getOrCompute(getEntity(indexShard), getLoader(thirdReader), thirdReader, getTermBytes()); assertEquals("baz", value3.streamInput().readString()); assertEquals(2, cache.count()); assertEquals(1, indexShard.requestCache().stats().getEvictions()); - IOUtils.close(reader, secondReader, thirdReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(reader, secondReader, thirdReader); } public void testClearAllEntityIdentity() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache(Settings.EMPTY, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + threadPool = getThreadPool(); + cache = getIndicesRequestCache(Settings.EMPTY); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + DirectoryReader reader = getReader(writer, indexShard.shardId()); IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); Loader loader = new Loader(reader, 0); writer.updateDocument(new Term("id", "0"), newDoc(0, "bar")); - DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader secondReader = getReader(writer, indexShard.shardId()); IndicesService.IndexShardCacheEntity secondEntity = new IndicesService.IndexShardCacheEntity(indexShard); Loader secondLoader = new Loader(secondReader, 0); writer.updateDocument(new Term("id", "0"), newDoc(0, "baz")); - DirectoryReader thirdReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader thirdReader = getReader(writer, indexShard.shardId()); + ; IndicesService.IndexShardCacheEntity thirddEntity = new IndicesService.IndexShardCacheEntity(createIndex("test1").getShard(0)); Loader thirdLoader = new Loader(thirdReader, 0); - BytesReference value1 = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value1 = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value1.streamInput().readString()); - BytesReference value2 = cache.getOrCompute(secondEntity, secondLoader, secondReader, termBytes); + BytesReference value2 = cache.getOrCompute(secondEntity, secondLoader, secondReader, getTermBytes()); assertEquals("bar", value2.streamInput().readString()); logger.info("Memory size: {}", indexShard.requestCache().stats().getMemorySize()); - BytesReference value3 = cache.getOrCompute(thirddEntity, thirdLoader, thirdReader, termBytes); + BytesReference value3 = cache.getOrCompute(thirddEntity, thirdLoader, thirdReader, getTermBytes()); assertEquals("baz", value3.streamInput().readString()); assertEquals(3, cache.count()); RequestCacheStats requestCacheStats = entity.stats().stats(); @@ -1031,14 +988,13 @@ public void testClearAllEntityIdentity() throws Exception { cache.cacheCleanupManager.cleanCache(); assertEquals(1, cache.count()); // third has not been validated since it's a different identity - value3 = cache.getOrCompute(thirddEntity, thirdLoader, thirdReader, termBytes); + value3 = cache.getOrCompute(thirddEntity, thirdLoader, thirdReader, getTermBytes()); requestCacheStats = entity.stats().stats(); requestCacheStats.add(thirddEntity.stats().stats()); assertEquals(hitCount + 1, requestCacheStats.getHitCount()); assertEquals("baz", value3.streamInput().readString()); - IOUtils.close(reader, secondReader, thirdReader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(reader, secondReader, thirdReader); } public Iterable newDoc(int id, String value) { @@ -1050,7 +1006,7 @@ public Iterable newDoc(int id, String value) { private static class Loader implements CheckedSupplier { - private final DirectoryReader reader; + final DirectoryReader reader; private final int id; public boolean loadedFromCache = true; @@ -1074,38 +1030,18 @@ public BytesReference get() { throw new RuntimeException(e); } } - } public void testInvalidate() throws Exception { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - IndexShard indexShard = createIndex("test").getShard(0); - ThreadPool threadPool = getThreadPool(); - IndicesRequestCache cache = new IndicesRequestCache(Settings.EMPTY, (shardId -> { - IndexService indexService = null; - try { - indexService = indicesService.indexServiceSafe(shardId.getIndex()); - } catch (IndexNotFoundException ex) { - return Optional.empty(); - } - return Optional.of(new IndicesService.IndexShardCacheEntity(indexService.getShard(shardId.id()))); - }), - new CacheModule(new ArrayList<>(), Settings.EMPTY).getCacheService(), - threadPool, - ClusterServiceUtils.createClusterService(threadPool) - ); - Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); - + threadPool = getThreadPool(); + IndicesRequestCache cache = getIndicesRequestCache(Settings.EMPTY); writer.addDocument(newDoc(0, "foo")); - DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); + DirectoryReader reader = getReader(writer, indexShard.shardId()); // initial cache IndicesService.IndexShardCacheEntity entity = new IndicesService.IndexShardCacheEntity(indexShard); Loader loader = new Loader(reader, 0); - BytesReference value = cache.getOrCompute(entity, loader, reader, termBytes); + BytesReference value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); ShardRequestCache requestCacheStats = entity.stats(); assertEquals(0, requestCacheStats.stats().getHitCount()); @@ -1117,7 +1053,7 @@ public void testInvalidate() throws Exception { // cache hit entity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(reader, 0); - value = cache.getOrCompute(entity, loader, reader, termBytes); + value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); requestCacheStats = entity.stats(); assertEquals(1, requestCacheStats.stats().getHitCount()); @@ -1131,8 +1067,8 @@ public void testInvalidate() throws Exception { // load again after invalidate entity = new IndicesService.IndexShardCacheEntity(indexShard); loader = new Loader(reader, 0); - cache.invalidate(entity, reader, termBytes); - value = cache.getOrCompute(entity, loader, reader, termBytes); + cache.invalidate(entity, reader, getTermBytes()); + value = cache.getOrCompute(entity, loader, reader, getTermBytes()); assertEquals("foo", value.streamInput().readString()); requestCacheStats = entity.stats(); assertEquals(1, requestCacheStats.stats().getHitCount()); @@ -1157,16 +1093,11 @@ public void testInvalidate() throws Exception { assertEquals(0, cache.count()); assertEquals(0, requestCacheStats.stats().getMemorySize().bytesAsInt()); - IOUtils.close(reader, writer, dir, cache); - terminate(threadPool); + IOUtils.close(reader); assertEquals(0, cache.numRegisteredCloseListeners()); } public void testEqualsKey() throws IOException { - IndicesService indicesService = getInstanceFromNode(IndicesService.class); - Directory dir = newDirectory(); - IndexWriterConfig config = newIndexWriterConfig(); - IndexWriter writer = new IndexWriter(dir, config); ShardId shardId = new ShardId("foo", "bar", 1); ShardId shardId1 = new ShardId("foo1", "bar1", 2); IndexReader reader1 = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), shardId); @@ -1193,13 +1124,9 @@ public void testEqualsKey() throws IOException { } public void testSerializationDeserializationOfCacheKey() throws Exception { - TermQueryBuilder termQuery = new TermQueryBuilder("id", "0"); - BytesReference termBytes = XContentHelper.toXContent(termQuery, MediaTypeRegistry.JSON, false); - IndexService indexService = createIndex("test"); - IndexShard indexShard = indexService.getShard(0); IndicesService.IndexShardCacheEntity shardCacheEntity = new IndicesService.IndexShardCacheEntity(indexShard); String readerCacheKeyId = UUID.randomUUID().toString(); - IndicesRequestCache.Key key1 = new IndicesRequestCache.Key(indexShard.shardId(), termBytes, readerCacheKeyId); + IndicesRequestCache.Key key1 = new IndicesRequestCache.Key(indexShard.shardId(), getTermBytes(), readerCacheKeyId); BytesReference bytesReference = null; try (BytesStreamOutput out = new BytesStreamOutput()) { key1.writeTo(out); @@ -1211,8 +1138,7 @@ public void testSerializationDeserializationOfCacheKey() throws Exception { assertEquals(readerCacheKeyId, key2.readerCacheKeyId); assertEquals(((IndexShard) shardCacheEntity.getCacheIdentity()).shardId(), key2.shardId); - assertEquals(termBytes, key2.value); - + assertEquals(getTermBytes(), key2.value); } private class TestBytesReference extends AbstractBytesReference { From c055a3d6b7721b983b245c22f3bba335a547b2a3 Mon Sep 17 00:00:00 2001 From: Pranshu Shukla <55992439+Pranshu-S@users.noreply.github.com> Date: Thu, 25 Apr 2024 06:18:53 +0530 Subject: [PATCH 22/32] Fix Flaky Test PersistentTasksExecutorFullRestartIT.testFullClusterRestart (#13350) Signed-off-by: Pranshu Shukla --- .../persistent/PersistentTasksExecutorFullRestartIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/persistent/PersistentTasksExecutorFullRestartIT.java b/server/src/internalClusterTest/java/org/opensearch/persistent/PersistentTasksExecutorFullRestartIT.java index 708388b3328f0..151a207d2c191 100644 --- a/server/src/internalClusterTest/java/org/opensearch/persistent/PersistentTasksExecutorFullRestartIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/persistent/PersistentTasksExecutorFullRestartIT.java @@ -43,6 +43,7 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; @@ -129,7 +130,7 @@ public void testFullClusterRestart() throws Exception { .custom(PersistentTasksCustomMetadata.TYPE)).tasks(), empty() ); - }); + }, 20, TimeUnit.SECONDS); } } From 9e9ab6b3c406be2dcc701934ff03c0e56911819a Mon Sep 17 00:00:00 2001 From: Shubh Sahu Date: Thu, 25 Apr 2024 07:19:52 +0530 Subject: [PATCH 23/32] [Remote Store] Add capability of doing flush as determined by the translog (#12992) Signed-off-by: Shubh Sahu --- CHANGELOG.md | 1 + .../opensearch/remotestore/RemoteStoreIT.java | 43 +++++++++++++++++++ .../common/settings/ClusterSettings.java | 3 +- .../opensearch/index/shard/IndexShard.java | 3 +- .../translog/InternalTranslogManager.java | 6 +++ .../index/translog/RemoteFsTranslog.java | 11 +++++ .../opensearch/index/translog/Translog.java | 9 ++++ .../transfer/TranslogTransferManager.java | 4 ++ .../indices/RemoteStoreSettings.java | 23 ++++++++++ ...RemoteStoreSettingsDynamicUpdateTests.java | 20 +++++++++ 10 files changed, 121 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b80741f24e275..f2fbf895dafcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add an individual setting of rate limiter for segment replication ([#12959](https://github.com/opensearch-project/OpenSearch/pull/12959)) - [Streaming Indexing] Ensure support of the new transport by security plugin ([#13174](https://github.com/opensearch-project/OpenSearch/pull/13174)) - Add cluster setting to dynamically configure the buckets for filter rewrite optimization. ([#13179](https://github.com/opensearch-project/OpenSearch/pull/13179)) +- [Remote Store] Add capability of doing refresh as determined by the translog ([#12992](https://github.com/opensearch-project/OpenSearch/pull/12992)) ### Dependencies - Bump `org.apache.commons:commons-configuration2` from 2.10.0 to 2.10.1 ([#12896](https://github.com/opensearch-project/OpenSearch/pull/12896)) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 78441f74f6b4f..ca0ae3ca9a700 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -30,6 +30,7 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardClosedException; +import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.Translog.Durability; import org.opensearch.indices.IndicesService; import org.opensearch.indices.RemoteStoreSettings; @@ -63,6 +64,7 @@ import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.TRANSLOG; import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA; import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA; +import static org.opensearch.index.shard.IndexShardTestCase.getTranslog; import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; import static org.opensearch.test.OpenSearchTestCase.getShardLevelBlobPath; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @@ -859,4 +861,45 @@ public void testLocalOnlyTranslogCleanupOnNodeRestart() throws Exception { refresh(INDEX_NAME); assertHitCount(client(dataNode).prepareSearch(INDEX_NAME).setSize(0).get(), searchableDocs + 15); } + + public void testFlushOnTooManyRemoteTranslogFiles() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + String datanode = internalCluster().startDataOnlyNodes(1).get(0); + createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 10000L, -1)); + ensureGreen(INDEX_NAME); + + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings( + Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "100") + ); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + IndexShard indexShard = getIndexShard(datanode, INDEX_NAME); + Path translogLocation = getTranslog(indexShard).location(); + assertFalse(indexShard.shouldPeriodicallyFlush()); + + try (Stream files = Files.list(translogLocation)) { + long totalFiles = files.filter(f -> f.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)).count(); + assertEquals(totalFiles, 1L); + } + + // indexing 100 documents (100 bulk requests), no flush will be triggered yet + for (int i = 0; i < 100; i++) { + indexBulk(INDEX_NAME, 1); + } + + try (Stream files = Files.list(translogLocation)) { + long totalFiles = files.filter(f -> f.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)).count(); + assertEquals(totalFiles, 101L); + } + // Will flush and trim the translog readers + indexBulk(INDEX_NAME, 1); + + assertBusy(() -> { + try (Stream files = Files.list(translogLocation)) { + long totalFiles = files.filter(f -> f.getFileName().toString().endsWith(Translog.TRANSLOG_FILE_SUFFIX)).count(); + assertEquals(totalFiles, 1L); + } + }, 30, TimeUnit.SECONDS); + } } diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index dab0f6bcf1c85..c70f22be518f2 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -732,7 +732,8 @@ public void apply(Settings value, Settings current, Settings previous) { RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_TRANSFER_TIMEOUT_SETTING, RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING, - RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING + RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING, + RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS ) ) ); diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 72a67096fcd9e..cbf10c9e4e552 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -2917,7 +2917,7 @@ public void restoreFromRepository(Repository repository, ActionListener * * @return {@code true} if the engine should be flushed */ - boolean shouldPeriodicallyFlush() { + public boolean shouldPeriodicallyFlush() { final Engine engine = getEngineOrNull(); if (engine != null) { try { @@ -4493,6 +4493,7 @@ public Durability getTranslogDurability() { /** * Schedules a flush or translog generation roll if needed but will not schedule more than one concurrently. The operation will be * executed asynchronously on the flush thread pool. + * Can also schedule a flush if decided by translog manager */ public void afterWriteOperation() { if (shouldPeriodicallyFlush() || shouldRollTranslogGeneration()) { diff --git a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java index a22c538286a88..e2210217672ef 100644 --- a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java +++ b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java @@ -437,6 +437,12 @@ public String getTranslogUUID() { * @return if the translog should be flushed */ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long flushThreshold) { + /* + * This can trigger flush depending upon translog's implementation + */ + if (translog.shouldFlush()) { + return true; + } // This is the minimum seqNo that is referred in translog and considered for calculating translog size long minTranslogRefSeqNo = translog.getMinUnreferencedSeqNoInSegments(localCheckpointOfLastCommit + 1); final long minReferencedTranslogGeneration = translog.getMinGenerationForSeqNo(minTranslogRefSeqNo).translogFileGeneration; diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index da905b9605dfd..8c01791af2c9e 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -675,4 +675,15 @@ public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommi int availablePermits() { return syncPermit.availablePermits(); } + + /** + * Checks whether or not the shard should be flushed based on translog files. + * This checks if number of translog files breaches the threshold count determined by + * {@code cluster.remote_store.translog.max_readers} setting + * @return {@code true} if the shard should be flushed + */ + @Override + protected boolean shouldFlush() { + return readers.size() >= translogTransferManager.getMaxRemoteTranslogReadersSettings(); + } } diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java index c653605f8fa10..842e9c77d2350 100644 --- a/server/src/main/java/org/opensearch/index/translog/Translog.java +++ b/server/src/main/java/org/opensearch/index/translog/Translog.java @@ -2082,4 +2082,13 @@ public static String createEmptyTranslog( public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { return minUnrefCheckpointInLastCommit; } + + /** + * Checks whether or not the shard should be flushed based on translog files. + * each translog type can have it's own decider + * @return {@code true} if the shard should be flushed + */ + protected boolean shouldFlush() { + return false; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index 1087244623b87..47638f44fd6fc 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -585,4 +585,8 @@ public void onFailure(Exception e) { throw e; } } + + public int getMaxRemoteTranslogReadersSettings() { + return this.remoteStoreSettings.getMaxRemoteTranslogReaders(); + } } diff --git a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java index e0a9f7a9e05c1..0bd4c7aedfc03 100644 --- a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java +++ b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java @@ -94,11 +94,23 @@ public class RemoteStoreSettings { Property.Dynamic ); + /** + * Controls the maximum referenced remote translog files. If breached the shard will be flushed. + */ + public static final Setting CLUSTER_REMOTE_MAX_TRANSLOG_READERS = Setting.intSetting( + "cluster.remote_store.translog.max_readers", + 1000, + 100, + Property.Dynamic, + Property.NodeScope + ); + private volatile TimeValue clusterRemoteTranslogBufferInterval; private volatile int minRemoteSegmentMetadataFiles; private volatile TimeValue clusterRemoteTranslogTransferTimeout; private volatile RemoteStoreEnums.PathType pathType; private volatile RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm; + private volatile int maxRemoteTranslogReaders; public RemoteStoreSettings(Settings settings, ClusterSettings clusterSettings) { clusterRemoteTranslogBufferInterval = CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(settings); @@ -124,6 +136,9 @@ public RemoteStoreSettings(Settings settings, ClusterSettings clusterSettings) { pathHashAlgorithm = clusterSettings.get(CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING); clusterSettings.addSettingsUpdateConsumer(CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING, this::setPathHashAlgorithm); + + maxRemoteTranslogReaders = CLUSTER_REMOTE_MAX_TRANSLOG_READERS.get(settings); + clusterSettings.addSettingsUpdateConsumer(CLUSTER_REMOTE_MAX_TRANSLOG_READERS, this::setMaxRemoteTranslogReaders); } public TimeValue getClusterRemoteTranslogBufferInterval() { @@ -167,4 +182,12 @@ private void setPathType(RemoteStoreEnums.PathType pathType) { private void setPathHashAlgorithm(RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm) { this.pathHashAlgorithm = pathHashAlgorithm; } + + public int getMaxRemoteTranslogReaders() { + return maxRemoteTranslogReaders; + } + + private void setMaxRemoteTranslogReaders(int maxRemoteTranslogReaders) { + this.maxRemoteTranslogReaders = maxRemoteTranslogReaders; + } } diff --git a/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java b/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java index 8a77d97f88d67..f89fd3df6e340 100644 --- a/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java +++ b/server/src/test/java/org/opensearch/indices/RemoteStoreSettingsDynamicUpdateTests.java @@ -96,4 +96,24 @@ public void testClusterRemoteTranslogTransferTimeout() { ); assertEquals(TimeValue.timeValueSeconds(40), remoteStoreSettings.getClusterRemoteTranslogTransferTimeout()); } + + public void testMaxRemoteReferencedTranslogFiles() { + // Test default value + assertEquals(1000, remoteStoreSettings.getMaxRemoteTranslogReaders()); + + // Test override with valid value + clusterSettings.applySettings( + Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "500").build() + ); + assertEquals(500, remoteStoreSettings.getMaxRemoteTranslogReaders()); + + // Test override with value less than minimum + assertThrows( + IllegalArgumentException.class, + () -> clusterSettings.applySettings( + Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_MAX_TRANSLOG_READERS.getKey(), "99").build() + ) + ); + assertEquals(500, remoteStoreSettings.getMaxRemoteTranslogReaders()); + } } From a8008e2f37ec7dca061fcb7b5f84c944dd4f1108 Mon Sep 17 00:00:00 2001 From: Bhumika Saini Date: Thu, 25 Apr 2024 12:27:29 +0530 Subject: [PATCH 24/32] Integrate local recovery with remote store seeding during migration (#12922) Signed-off-by: Bhumika Saini --- .../RemotePrimaryLocalRecoveryIT.java | 179 ++++++++++++++++++ .../org/opensearch/index/IndexService.java | 3 +- .../index/seqno/ReplicationTracker.java | 11 +- .../opensearch/index/shard/IndexShard.java | 5 +- .../opensearch/index/shard/StoreRecovery.java | 8 + .../index/translog/RemoteFsTranslog.java | 19 +- .../index/translog/TranslogConfig.java | 21 +- .../translog/TruncateTranslogAction.java | 3 +- .../index/engine/InternalEngineTests.java | 8 +- .../index/shard/RefreshListenersTests.java | 3 +- .../InternalTranslogManagerTests.java | 14 +- .../index/translog/LocalTranslogTests.java | 8 +- .../index/translog/RemoteFsTranslogTests.java | 20 +- .../translog/TranslogManagerTestCase.java | 9 +- .../index/engine/EngineTestCase.java | 21 +- 15 files changed, 285 insertions(+), 47 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryLocalRecoveryIT.java diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryLocalRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryLocalRecoveryIT.java new file mode 100644 index 0000000000000..024fc68602a19 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryLocalRecoveryIT.java @@ -0,0 +1,179 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotemigration; + +import org.opensearch.action.admin.indices.stats.ShardStats; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.util.FileSystemUtils; +import org.opensearch.index.remote.RemoteSegmentStats; +import org.opensearch.index.translog.RemoteTranslogStats; +import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.SEGMENTS; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA; +import static org.opensearch.index.store.RemoteSegmentStoreDirectory.SEGMENT_NAME_UUID_SEPARATOR; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemotePrimaryLocalRecoveryIT extends MigrationBaseTestCase { + String indexName = "idx1"; + int numOfNodes = randomIntBetween(6, 9); + + /** + * Tests local recovery sanity in the happy path flow + */ + public void testLocalRecoveryRollingRestart() throws Exception { + triggerRollingRestartForRemoteMigration(0); + internalCluster().stopAllNodes(); + } + + /** + * Tests local recovery sanity during remote migration with a node restart in between + */ + public void testLocalRecoveryRollingRestartAndNodeFailure() throws Exception { + triggerRollingRestartForRemoteMigration(0); + + DiscoveryNodes discoveryNodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes(); + DiscoveryNode nodeToRestart = (DiscoveryNode) discoveryNodes.getDataNodes().values().toArray()[randomIntBetween(0, numOfNodes - 4)]; + internalCluster().restartNode(nodeToRestart.getName()); + + Map shardStatsMap = internalCluster().client().admin().indices().prepareStats(indexName).get().asMap(); + for (Map.Entry entry : shardStatsMap.entrySet()) { + ShardRouting shardRouting = entry.getKey(); + ShardStats shardStats = entry.getValue(); + if (nodeToRestart.equals(shardRouting.currentNodeId())) { + RemoteSegmentStats remoteSegmentStats = shardStats.getStats().getSegments().getRemoteSegmentStats(); + assertTrue(remoteSegmentStats.getTotalUploadTime() > 0); + assertTrue(remoteSegmentStats.getUploadBytesSucceeded() > 0); + } + + assertBusy(() -> { + String shardPath = getShardLevelBlobPath( + client(), + indexName, + new BlobPath(), + String.valueOf(shardRouting.getId()), + SEGMENTS, + DATA + ).buildAsString(); + Path segmentDataRepoPath = segmentRepoPath.resolve(shardPath); + List segmentsNFilesInRepo = Arrays.stream(FileSystemUtils.files(segmentDataRepoPath)) + .filter(path -> path.getFileName().toString().contains("segments_")) + .map(path -> path.getFileName().toString()) + .collect(Collectors.toList()); + Set expectedUniqueSegmentsNFiles = segmentsNFilesInRepo.stream() + .map(fileName -> fileName.split(SEGMENT_NAME_UUID_SEPARATOR)[0]) + .collect(Collectors.toSet()); + assertEquals( + "Expected no duplicate segments_N files in remote but duplicates were found " + segmentsNFilesInRepo, + expectedUniqueSegmentsNFiles.size(), + segmentsNFilesInRepo.size() + ); + }, 90, TimeUnit.SECONDS); + } + + internalCluster().stopAllNodes(); + } + + /** + * Tests local recovery flow sanity in the happy path flow with replicas in place + */ + public void testLocalRecoveryFlowWithReplicas() throws Exception { + triggerRollingRestartForRemoteMigration(randomIntBetween(1, 2)); + internalCluster().stopAllNodes(); + } + + /** + * Helper method to run a rolling restart for migration to remote backed cluster + */ + private void triggerRollingRestartForRemoteMigration(int replicaCount) throws Exception { + internalCluster().startClusterManagerOnlyNodes(3); + internalCluster().startNodes(numOfNodes - 3); + + // create index + Settings indexSettings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, replicaCount) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 10)) + .build(); + createIndex(indexName, indexSettings); + ensureGreen(indexName); + indexBulk(indexName, randomIntBetween(100, 10000)); + refresh(indexName); + indexBulk(indexName, randomIntBetween(100, 10000)); + + initDocRepToRemoteMigration(); + + // rolling restart + final Settings remoteNodeAttributes = remoteStoreClusterSettings( + REPOSITORY_NAME, + segmentRepoPath, + REPOSITORY_2_NAME, + translogRepoPath + ); + internalCluster().rollingRestart(new InternalTestCluster.RestartCallback() { + // Update remote attributes + @Override + public Settings onNodeStopped(String nodeName) { + return remoteNodeAttributes; + } + }); + ensureStableCluster(numOfNodes); + ensureGreen(TimeValue.timeValueSeconds(90), indexName); + assertEquals(internalCluster().size(), numOfNodes); + + // Assert on remote uploads + Map shardStatsMap = internalCluster().client().admin().indices().prepareStats(indexName).get().asMap(); + DiscoveryNodes discoveryNodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes(); + shardStatsMap.forEach((shardRouting, shardStats) -> { + if (discoveryNodes.get(shardRouting.currentNodeId()).isRemoteStoreNode() && shardRouting.primary()) { + RemoteSegmentStats remoteSegmentStats = shardStats.getStats().getSegments().getRemoteSegmentStats(); + assertTrue(remoteSegmentStats.getTotalUploadTime() > 0); + assertTrue(remoteSegmentStats.getUploadBytesSucceeded() > 0); + } + }); + + // Assert on new remote uploads after seeding + indexBulk(indexName, randomIntBetween(100, 10000)); + refresh(indexName); + indexBulk(indexName, randomIntBetween(100, 10000)); + Map newShardStatsMap = internalCluster().client().admin().indices().prepareStats(indexName).get().asMap(); + newShardStatsMap.forEach((shardRouting, shardStats) -> { + if (discoveryNodes.get(shardRouting.currentNodeId()).isRemoteStoreNode() && shardRouting.primary()) { + RemoteSegmentStats prevRemoteSegmentStats = shardStatsMap.get(shardRouting) + .getStats() + .getSegments() + .getRemoteSegmentStats(); + RemoteSegmentStats newRemoteSegmentStats = shardStats.getStats().getSegments().getRemoteSegmentStats(); + assertTrue(newRemoteSegmentStats.getTotalUploadTime() > prevRemoteSegmentStats.getTotalUploadTime()); + assertTrue(newRemoteSegmentStats.getUploadBytesSucceeded() > prevRemoteSegmentStats.getUploadBytesSucceeded()); + + RemoteTranslogStats prevRemoteTranslogStats = shardStatsMap.get(shardRouting) + .getStats() + .getTranslog() + .getRemoteTranslogStats(); + RemoteTranslogStats newRemoteTranslogStats = shardStats.getStats().getTranslog().getRemoteTranslogStats(); + assertTrue(newRemoteTranslogStats.getUploadBytesSucceeded() > prevRemoteTranslogStats.getUploadBytesSucceeded()); + } + }); + } +} diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index a7b29314210df..14c6cecb1f847 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -500,13 +500,14 @@ public synchronized IndexShard createShard( if (this.indexSettings.isRemoteStoreEnabled()) { remoteDirectory = remoteDirectoryFactory.newDirectory(this.indexSettings, path); } else { - if (sourceNode != null && sourceNode.isRemoteStoreNode() == false) { + if (sourceNode == null || sourceNode.isRemoteStoreNode() == false) { if (routing.primary() == false) { throw new IllegalStateException("Can't migrate a remote shard to replica before primary " + routing.shardId()); } logger.info("DocRep shard {} is migrating to remote", shardId); seedRemote = true; } + remoteDirectory = ((RemoteSegmentStoreDirectoryFactory) remoteDirectoryFactory).newDirectory( RemoteStoreNodeAttribute.getRemoteStoreSegmentRepo(this.indexSettings.getNodeSettings()), this.indexSettings.getUUID(), diff --git a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java index 39b1cc130d6a5..6697991aef90b 100644 --- a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java +++ b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java @@ -1106,12 +1106,11 @@ private ReplicationGroup calculateReplicationGroup() { } else { newVersion = replicationGroup.getVersion() + 1; } - assert indexSettings.isRemoteTranslogStoreEnabled() - // Handle migration cases. Ignore assertion if any of the shard copies in the replication group is assigned to a remote node - || (replicationGroup != null - && replicationGroup.getReplicationTargets() - .stream() - .anyMatch(shardRouting -> isShardOnRemoteEnabledNode.apply(shardRouting.currentNodeId()))) + assert newVersion == 0 || indexSettings.isRemoteTranslogStoreEnabled() + // Handle migration cases. Ignore assertion if any of the shard copies in the replication group is assigned to a remote node + || replicationGroup.getReplicationTargets() + .stream() + .anyMatch(shardRouting -> isShardOnRemoteEnabledNode.apply(shardRouting.currentNodeId())) || checkpoints.entrySet().stream().filter(e -> e.getValue().tracked).allMatch(e -> e.getValue().replicated) : "In absence of remote translog store, all tracked shards must have replication mode as LOGICAL_REPLICATION"; diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index cbf10c9e4e552..18d4a2ca6d639 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -436,7 +436,7 @@ public IndexShard( logger.debug("state: [CREATED]"); this.checkIndexOnStartup = indexSettings.getValue(IndexSettings.INDEX_CHECK_ON_STARTUP); - this.translogConfig = new TranslogConfig(shardId, shardPath().resolveTranslog(), indexSettings, bigArrays, nodeId); + this.translogConfig = new TranslogConfig(shardId, shardPath().resolveTranslog(), indexSettings, bigArrays, nodeId, seedRemote); final String aId = shardRouting.allocationId().getId(); final long primaryTerm = indexSettings.getIndexMetadata().primaryTerm(shardId.id()); this.pendingPrimaryTerm = primaryTerm; @@ -5000,7 +5000,8 @@ public void syncTranslogFilesFromRemoteTranslog() throws IOException { shardPath().resolveTranslog(), indexSettings.getRemoteStorePathStrategy(), remoteStoreSettings, - logger + logger, + shouldSeedRemoteStore() ); } diff --git a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java index e130f50e105d5..8d689e8769728 100644 --- a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java +++ b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java @@ -650,6 +650,14 @@ private void internalRecoverFromStore(IndexShard indexShard) throws IndexShardRe indexShard.recoveryState().getIndex().setFileDetailsComplete(); } indexShard.openEngineAndRecoverFromTranslog(); + if (indexShard.shouldSeedRemoteStore()) { + indexShard.getThreadPool().executor(ThreadPool.Names.GENERIC).execute(() -> { + logger.info("Attempting to seed Remote Store via local recovery for {}", indexShard.shardId()); + indexShard.refresh("remote store migration"); + }); + indexShard.waitForRemoteStoreSync(); + logger.info("Remote Store is now seeded via local recovery for {}", indexShard.shardId()); + } indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); indexShard.finalizeRecovery(); indexShard.postRecovery("post recovery from shard_store"); diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 8c01791af2c9e..c7f756957076c 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -120,7 +120,7 @@ public RemoteFsTranslog( remoteStoreSettings ); try { - download(translogTransferManager, location, logger); + download(translogTransferManager, location, logger, config.shouldSeedRemote()); Checkpoint checkpoint = readCheckpoint(location); logger.info("Downloaded data from remote translog till maxSeqNo = {}", checkpoint.maxSeqNo); this.readers.addAll(recoverFromFiles(checkpoint)); @@ -168,7 +168,8 @@ public static void download( Path location, RemoteStorePathStrategy pathStrategy, RemoteStoreSettings remoteStoreSettings, - Logger logger + Logger logger, + boolean seedRemote ) throws IOException { assert repository instanceof BlobStoreRepository : String.format( Locale.ROOT, @@ -189,11 +190,12 @@ public static void download( pathStrategy, remoteStoreSettings ); - RemoteFsTranslog.download(translogTransferManager, location, logger); + RemoteFsTranslog.download(translogTransferManager, location, logger, seedRemote); logger.trace(remoteTranslogTransferTracker.toString()); } - static void download(TranslogTransferManager translogTransferManager, Path location, Logger logger) throws IOException { + static void download(TranslogTransferManager translogTransferManager, Path location, Logger logger, boolean seedRemote) + throws IOException { /* In Primary to Primary relocation , there can be concurrent upload and download of translog. While translog files are getting downloaded by new primary, it might hence be deleted by the primary @@ -206,7 +208,7 @@ static void download(TranslogTransferManager translogTransferManager, Path locat boolean success = false; long startTimeMs = System.currentTimeMillis(); try { - downloadOnce(translogTransferManager, location, logger); + downloadOnce(translogTransferManager, location, logger, seedRemote); success = true; return; } catch (FileNotFoundException | NoSuchFileException e) { @@ -220,7 +222,8 @@ static void download(TranslogTransferManager translogTransferManager, Path locat throw ex; } - private static void downloadOnce(TranslogTransferManager translogTransferManager, Path location, Logger logger) throws IOException { + private static void downloadOnce(TranslogTransferManager translogTransferManager, Path location, Logger logger, boolean seedRemote) + throws IOException { logger.debug("Downloading translog files from remote"); RemoteTranslogTransferTracker statsTracker = translogTransferManager.getRemoteTranslogTransferTracker(); long prevDownloadBytesSucceeded = statsTracker.getDownloadBytesSucceeded(); @@ -262,7 +265,9 @@ private static void downloadOnce(TranslogTransferManager translogTransferManager logger.debug("No translog files found on remote, checking local filesystem for cleanup"); if (FileSystemUtils.exists(location.resolve(CHECKPOINT_FILE_NAME))) { final Checkpoint checkpoint = readCheckpoint(location); - if (isEmptyTranslog(checkpoint) == false) { + if (seedRemote) { + logger.debug("Remote migration ongoing. Retaining the translog on local, skipping clean-up"); + } else if (isEmptyTranslog(checkpoint) == false) { logger.debug("Translog files exist on local without any metadata in remote, cleaning up these files"); // Creating empty translog will cleanup the older un-referenced tranlog files, we don't have to explicitly delete Translog.createEmptyTranslog(location, translogTransferManager.getShardId(), checkpoint); diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java b/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java index 2f00773075d41..f720f041b287c 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java @@ -59,6 +59,7 @@ public final class TranslogConfig { private final Path translogPath; private final ByteSizeValue bufferSize; private final String nodeId; + private final boolean seedRemote; /** * Creates a new TranslogConfig instance @@ -66,9 +67,17 @@ public final class TranslogConfig { * @param translogPath the path to use for the transaction log files * @param indexSettings the index settings used to set internal variables * @param bigArrays a bigArrays instance used for temporarily allocating write operations + * @param seedRemote boolean denoting whether remote store needs to be seeded as part of remote migration */ - public TranslogConfig(ShardId shardId, Path translogPath, IndexSettings indexSettings, BigArrays bigArrays, String nodeId) { - this(shardId, translogPath, indexSettings, bigArrays, DEFAULT_BUFFER_SIZE, nodeId); + public TranslogConfig( + ShardId shardId, + Path translogPath, + IndexSettings indexSettings, + BigArrays bigArrays, + String nodeId, + boolean seedRemote + ) { + this(shardId, translogPath, indexSettings, bigArrays, DEFAULT_BUFFER_SIZE, nodeId, seedRemote); } TranslogConfig( @@ -77,7 +86,8 @@ public TranslogConfig(ShardId shardId, Path translogPath, IndexSettings indexSet IndexSettings indexSettings, BigArrays bigArrays, ByteSizeValue bufferSize, - String nodeId + String nodeId, + boolean seedRemote ) { this.bufferSize = bufferSize; this.indexSettings = indexSettings; @@ -85,6 +95,7 @@ public TranslogConfig(ShardId shardId, Path translogPath, IndexSettings indexSet this.translogPath = translogPath; this.bigArrays = bigArrays; this.nodeId = nodeId; + this.seedRemote = seedRemote; } /** @@ -125,4 +136,8 @@ public ByteSizeValue getBufferSize() { public String getNodeId() { return nodeId; } + + public boolean shouldSeedRemote() { + return seedRemote; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java b/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java index 25fcdc614172a..cf6c9087777fe 100644 --- a/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java +++ b/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java @@ -195,7 +195,8 @@ private boolean isTranslogClean(ShardPath shardPath, ClusterState clusterState, translogPath, indexSettings, BigArrays.NON_RECYCLING_INSTANCE, - "" + "", + false ); long primaryTerm = indexSettings.getIndexMetadata().primaryTerm(shardPath.getShardId().id()); // We open translog to check for corruption, do not clean anything. diff --git a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java index cc927a19fd01a..74aef54987842 100644 --- a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java @@ -4002,7 +4002,7 @@ public void testRecoverFromForeignTranslog() throws IOException { final Path badTranslogLog = createTempDir(); final String badUUID = Translog.createEmptyTranslog(badTranslogLog, SequenceNumbers.NO_OPS_PERFORMED, shardId, primaryTerm.get()); Translog translog = new LocalTranslog( - new TranslogConfig(shardId, badTranslogLog, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, badTranslogLog, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), badUUID, createTranslogDeletionPolicy(INDEX_SETTINGS), () -> SequenceNumbers.NO_OPS_PERFORMED, @@ -4020,7 +4020,8 @@ public void testRecoverFromForeignTranslog() throws IOException { translog.location(), config.getIndexSettings(), BigArrays.NON_RECYCLING_INSTANCE, - "" + "", + false ); EngineConfig brokenConfig = new EngineConfig.Builder().shardId(shardId) @@ -7714,7 +7715,8 @@ public void testNotWarmUpSearcherInEngineCtor() throws Exception { createTempDir(), config.getTranslogConfig().getIndexSettings(), config.getTranslogConfig().getBigArrays(), - "" + "", + false ); EngineConfig configWithWarmer = new EngineConfig.Builder().shardId(config.getShardId()) .threadPool(config.getThreadPool()) diff --git a/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java b/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java index a45b25f04060b..8a77fbca2915d 100644 --- a/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RefreshListenersTests.java @@ -134,7 +134,8 @@ public void setupListeners() throws Exception { createTempDir("translog"), indexSettings, BigArrays.NON_RECYCLING_INSTANCE, - "" + "", + false ); Engine.EventListener eventListener = new Engine.EventListener() { @Override diff --git a/server/src/test/java/org/opensearch/index/translog/InternalTranslogManagerTests.java b/server/src/test/java/org/opensearch/index/translog/InternalTranslogManagerTests.java index c098d11a3487f..c27d0367abf68 100644 --- a/server/src/test/java/org/opensearch/index/translog/InternalTranslogManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/InternalTranslogManagerTests.java @@ -38,7 +38,7 @@ public void testRecoveryFromTranslog() throws IOException { LocalCheckpointTracker tracker = new LocalCheckpointTracker(NO_OPS_PERFORMED, NO_OPS_PERFORMED); try { translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -68,7 +68,7 @@ public void testRecoveryFromTranslog() throws IOException { translogManager.syncTranslog(); translogManager.close(); translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -117,7 +117,7 @@ public void testTranslogRollsGeneration() throws IOException { LocalCheckpointTracker tracker = new LocalCheckpointTracker(NO_OPS_PERFORMED, NO_OPS_PERFORMED); try { translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -147,7 +147,7 @@ public void testTranslogRollsGeneration() throws IOException { translogManager.syncTranslog(); translogManager.close(); translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -182,7 +182,7 @@ public void testTrimOperationsFromTranslog() throws IOException { LocalCheckpointTracker tracker = new LocalCheckpointTracker(NO_OPS_PERFORMED, NO_OPS_PERFORMED); try { translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -214,7 +214,7 @@ public void testTrimOperationsFromTranslog() throws IOException { translogManager.close(); translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), @@ -253,7 +253,7 @@ public void testTranslogSync() throws IOException { ParsedDocument doc = testParsedDocument("1", null, testDocumentWithTextField(), B_1, null); AtomicReference translogManagerAtomicReference = new AtomicReference<>(); translogManager = new InternalTranslogManager( - new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""), + new TranslogConfig(shardId, primaryTranslogDir, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, "", false), primaryTerm, globalCheckpoint::get, createTranslogDeletionPolicy(INDEX_SETTINGS), diff --git a/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java index 4997067b75198..cae27d5b259c4 100644 --- a/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java @@ -291,7 +291,7 @@ private TranslogConfig getTranslogConfig(final Path path, final Settings setting ); final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(shardId.getIndex(), settings); - return new TranslogConfig(shardId, path, indexSettings, NON_RECYCLING_INSTANCE, bufferSize, ""); + return new TranslogConfig(shardId, path, indexSettings, NON_RECYCLING_INSTANCE, bufferSize, "", false); } private Location addToTranslogAndList(Translog translog, List list, Translog.Operation op) throws IOException { @@ -1453,7 +1453,8 @@ public void testTranslogWriterCanFlushInAddOrReadCall() throws IOException { temp.getIndexSettings(), temp.getBigArrays(), new ByteSizeValue(1, ByteSizeUnit.KB), - "" + "", + false ); final Set persistedSeqNos = new HashSet<>(); @@ -1552,7 +1553,8 @@ public void testTranslogWriterFsyncedWithLocalTranslog() throws IOException { temp.getIndexSettings(), temp.getBigArrays(), new ByteSizeValue(1, ByteSizeUnit.KB), - "" + "", + false ); final Set persistedSeqNos = new HashSet<>(); diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java index 28979a3dc4f28..6bf35cc1eac9b 100644 --- a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java @@ -224,7 +224,7 @@ private TranslogConfig getTranslogConfig(final Path path, final Settings setting // To simulate that the node is remote backed Settings nodeSettings = Settings.builder().put("node.attr.remote_store.translog.repository", "my-repo-1").build(); final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(shardId.getIndex(), settings, nodeSettings); - return new TranslogConfig(shardId, path, indexSettings, NON_RECYCLING_INSTANCE, bufferSize, ""); + return new TranslogConfig(shardId, path, indexSettings, NON_RECYCLING_INSTANCE, bufferSize, "", false); } private BlobStoreRepository createRepository() { @@ -399,7 +399,8 @@ private TranslogConfig getConfig(int gensToKeep) { temp.getIndexSettings(), temp.getBigArrays(), new ByteSizeValue(1, ByteSizeUnit.KB), - "" + "", + false ); return config; } @@ -1561,7 +1562,8 @@ public void testTranslogWriterFsyncDisabledInRemoteFsTranslog() throws IOExcepti temp.getIndexSettings(), temp.getBigArrays(), new ByteSizeValue(1, ByteSizeUnit.KB), - "" + "", + false ); final Set persistedSeqNos = new HashSet<>(); @@ -1692,7 +1694,7 @@ public void testDownloadWithRetries() throws IOException { // Always File not found when(mockTransfer.downloadTranslog(any(), any(), any())).thenThrow(new NoSuchFileException("File not found")); TranslogTransferManager finalMockTransfer = mockTransfer; - assertThrows(NoSuchFileException.class, () -> RemoteFsTranslog.download(finalMockTransfer, location, logger)); + assertThrows(NoSuchFileException.class, () -> RemoteFsTranslog.download(finalMockTransfer, location, logger, false)); // File not found in first attempt . File found in second attempt. mockTransfer = mock(TranslogTransferManager.class); @@ -1713,7 +1715,7 @@ public void testDownloadWithRetries() throws IOException { }).when(mockTransfer).downloadTranslog(any(), any(), any()); // no exception thrown - RemoteFsTranslog.download(mockTransfer, location, logger); + RemoteFsTranslog.download(mockTransfer, location, logger, false); } // No translog data in local as well as remote, we skip creating empty translog @@ -1726,7 +1728,7 @@ public void testDownloadWithNoTranslogInLocalAndRemote() throws IOException { when(mockTransfer.getRemoteTranslogTransferTracker()).thenReturn(remoteTranslogTransferTracker); Path[] filesBeforeDownload = FileSystemUtils.files(location); - RemoteFsTranslog.download(mockTransfer, location, logger); + RemoteFsTranslog.download(mockTransfer, location, logger, false); assertEquals(filesBeforeDownload, FileSystemUtils.files(location)); } @@ -1746,7 +1748,7 @@ public void testDownloadWithTranslogOnlyInLocal() throws IOException { Checkpoint existingCheckpoint = Translog.readCheckpoint(location); TranslogTransferManager finalMockTransfer = mockTransfer; - RemoteFsTranslog.download(finalMockTransfer, location, logger); + RemoteFsTranslog.download(finalMockTransfer, location, logger, false); Path[] filesPostDownload = FileSystemUtils.files(location); assertEquals(2, filesPostDownload.length); @@ -1782,11 +1784,11 @@ public void testDownloadWithEmptyTranslogOnlyInLocal() throws IOException { TranslogTransferManager finalMockTransfer = mockTransfer; // download first time will ensure creating empty translog - RemoteFsTranslog.download(finalMockTransfer, location, logger); + RemoteFsTranslog.download(finalMockTransfer, location, logger, false); Path[] filesPostFirstDownload = FileSystemUtils.files(location); // download on empty translog should be a no-op - RemoteFsTranslog.download(finalMockTransfer, location, logger); + RemoteFsTranslog.download(finalMockTransfer, location, logger, false); Path[] filesPostSecondDownload = FileSystemUtils.files(location); assertArrayEquals(filesPostFirstDownload, filesPostSecondDownload); diff --git a/server/src/test/java/org/opensearch/index/translog/TranslogManagerTestCase.java b/server/src/test/java/org/opensearch/index/translog/TranslogManagerTestCase.java index e17d2770f014a..a16e607bbf37a 100644 --- a/server/src/test/java/org/opensearch/index/translog/TranslogManagerTestCase.java +++ b/server/src/test/java/org/opensearch/index/translog/TranslogManagerTestCase.java @@ -74,7 +74,14 @@ protected Translog createTranslog(LongSupplier primaryTermSupplier) throws IOExc } protected Translog createTranslog(Path translogPath, LongSupplier primaryTermSupplier) throws IOException { - TranslogConfig translogConfig = new TranslogConfig(shardId, translogPath, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""); + TranslogConfig translogConfig = new TranslogConfig( + shardId, + translogPath, + INDEX_SETTINGS, + BigArrays.NON_RECYCLING_INSTANCE, + "", + false + ); String translogUUID = Translog.createEmptyTranslog( translogPath, SequenceNumbers.NO_OPS_PERFORMED, diff --git a/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java index 1cb5501810c5d..3403425d89254 100644 --- a/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java @@ -527,7 +527,14 @@ protected Translog createTranslog(LongSupplier primaryTermSupplier) throws IOExc } protected Translog createTranslog(Path translogPath, LongSupplier primaryTermSupplier) throws IOException { - TranslogConfig translogConfig = new TranslogConfig(shardId, translogPath, INDEX_SETTINGS, BigArrays.NON_RECYCLING_INSTANCE, ""); + TranslogConfig translogConfig = new TranslogConfig( + shardId, + translogPath, + INDEX_SETTINGS, + BigArrays.NON_RECYCLING_INSTANCE, + "", + false + ); String translogUUID = Translog.createEmptyTranslog( translogPath, SequenceNumbers.NO_OPS_PERFORMED, @@ -877,7 +884,8 @@ public EngineConfig config( translogPath, indexSettings, BigArrays.NON_RECYCLING_INSTANCE, - "" + "", + false ); final List extRefreshListenerList = externalRefreshListener == null ? emptyList() @@ -946,7 +954,14 @@ protected EngineConfig config( .put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true) .build() ); - TranslogConfig translogConfig = new TranslogConfig(shardId, translogPath, indexSettings, BigArrays.NON_RECYCLING_INSTANCE, ""); + TranslogConfig translogConfig = new TranslogConfig( + shardId, + translogPath, + indexSettings, + BigArrays.NON_RECYCLING_INSTANCE, + "", + false + ); return new EngineConfig.Builder().shardId(config.getShardId()) .threadPool(config.getThreadPool()) .indexSettings(indexSettings) From 42d46a9d048b8f8ac07d88b71ede32f6c0d7a261 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Thu, 25 Apr 2024 14:07:19 +0530 Subject: [PATCH 25/32] Allow index & cluster default refresh interval setting value to be -1 (#11411) Signed-off-by: Ashish Singh --- .../ClusterIndexRefreshIntervalIT.java | 169 +++++++++++++++--- .../metadata/MetadataCreateIndexService.java | 7 +- .../opensearch/indices/IndicesService.java | 7 +- 3 files changed, 159 insertions(+), 24 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/ClusterIndexRefreshIntervalIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/ClusterIndexRefreshIntervalIT.java index 25fa7ae7eb8eb..f936b53f52a7b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/ClusterIndexRefreshIntervalIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/ClusterIndexRefreshIntervalIT.java @@ -235,33 +235,19 @@ public void testDefaultRefreshIntervalWithUpdateClusterAndIndexSettings() throws } public void testRefreshIntervalDisabled() throws ExecutionException, InterruptedException { - TimeValue clusterMinimumRefreshInterval = client().settings() - .getAsTime(IndicesService.CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), TimeValue.MINUS_ONE); - boolean createIndexSuccess = clusterMinimumRefreshInterval.equals(TimeValue.MINUS_ONE); String clusterManagerName = internalCluster().getClusterManagerName(); List dataNodes = new ArrayList<>(internalCluster().getDataNodeNames()); Settings settings = Settings.builder() .put(indexSettings()) .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), IndexSettings.MINIMUM_REFRESH_INTERVAL) .build(); - if (createIndexSuccess) { - createIndex(INDEX_NAME, settings); - ensureYellowAndNoInitializingShards(INDEX_NAME); - ensureGreen(INDEX_NAME); - GetIndexResponse getIndexResponse = client(clusterManagerName).admin().indices().getIndex(new GetIndexRequest()).get(); - IndicesService indicesService = internalCluster().getInstance(IndicesService.class, randomFrom(dataNodes)); - String uuid = getIndexResponse.getSettings().get(INDEX_NAME).get(IndexMetadata.SETTING_INDEX_UUID); - IndexService indexService = indicesService.indexService(new Index(INDEX_NAME, uuid)); - assertEquals(IndexSettings.MINIMUM_REFRESH_INTERVAL, indexService.getRefreshTaskInterval()); - } else { - IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> createIndex(INDEX_NAME, settings)); - assertEquals( - "invalid index.refresh_interval [-1]: cannot be smaller than cluster.minimum.index.refresh_interval [" - + getMinRefreshIntervalForRefreshDisabled() - + "]", - exception.getMessage() - ); - } + createIndex(INDEX_NAME, settings); + ensureGreen(INDEX_NAME); + GetIndexResponse getIndexResponse = client(clusterManagerName).admin().indices().getIndex(new GetIndexRequest()).get(); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, randomFrom(dataNodes)); + String uuid = getIndexResponse.getSettings().get(INDEX_NAME).get(IndexMetadata.SETTING_INDEX_UUID); + IndexService indexService = indicesService.indexService(new Index(INDEX_NAME, uuid)); + assertEquals(IndexSettings.MINIMUM_REFRESH_INTERVAL, indexService.getRefreshTaskInterval()); } protected TimeValue getMinRefreshIntervalForRefreshDisabled() { @@ -366,6 +352,147 @@ public void testClusterMinimumChangeOnIndexWithCustomRefreshInterval() throws Ex assertEquals(customRefreshInterval, indexService.getRefreshTaskInterval()); } + public void testClusterMinimumRefreshIntervalOfMinusOneFails() { + // This test checks that we can not set cluster minimum refresh interval as -1 (or -1ms). + String clusterManagerName = internalCluster().getClusterManagerName(); + String refreshInterval = randomFrom("-1", "-1ms"); + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), refreshInterval)) + .get() + ); + assertEquals( + "failed to parse value [" + refreshInterval + "] for setting [cluster.minimum.index.refresh_interval], must be >= [0ms]", + ex.getMessage() + ); + } + + public void testClusterMinimumRefreshIntervalOfZero() { + // This test checks that we can set the cluster minimum refresh interval as 0. + String clusterManagerName = internalCluster().getClusterManagerName(); + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), "0")) + .get(); + } + + public void testDefaultRefreshIntervalOfMinusOneIrrespectiveOfMinimum() { + // This test checks that we are able to set the cluster default refresh interval to one regardless of what the + // minimum is set to. -1 corresponds to no period background refreshes. + String clusterManagerName = internalCluster().getClusterManagerName(); + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder() + .put(CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("0", "1ms", "1s", "10s")) + .put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("-1", "-1ms")) + ) + .get(); + } + + public void testCreateIndexWithMinusOneRefreshInterval() throws ExecutionException, InterruptedException { + // This test checks that we are able to create index with -1 refresh interval using index settings and default interval both. + String clusterManagerName = internalCluster().getClusterManagerName(); + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder() + .put(CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), "10s") + .put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), "10s") + ) + .get(); + + Settings indexSettings = Settings.builder() + .put(indexSettings()) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("-1", "-1ms")) + .build(); + createIndex(INDEX_NAME, indexSettings); + ensureGreen(INDEX_NAME); + + IndexService indexService = getIndexServiceFromRandomDataNode(INDEX_NAME); + assertEquals(-1, indexService.getRefreshTaskInterval().millis()); + + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("-1", "-1ms"))) + .get(); + createIndex(OTHER_INDEX_NAME); + ensureGreen(OTHER_INDEX_NAME); + indexService = getIndexServiceFromRandomDataNode(OTHER_INDEX_NAME); + assertEquals(-1, indexService.getRefreshTaskInterval().millis()); + } + + public void testUpdateIndexWithMinusOneRefreshInterval() throws ExecutionException, InterruptedException { + // This test checks that we are able to update index with -1 refresh interval using index settings and default interval both. + String clusterManagerName = internalCluster().getClusterManagerName(); + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder() + .put(CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING.getKey(), "10s") + .put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), "10s") + ) + .get(); + + createIndex(INDEX_NAME); + ensureGreen(INDEX_NAME); + IndexService indexService = getIndexServiceFromRandomDataNode(INDEX_NAME); + assertEquals(10, indexService.getRefreshTaskInterval().seconds()); + + client(clusterManagerName).admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(INDEX_NAME).settings( + Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("-1", "-1ms")) + ) + ) + .actionGet(); + assertEquals(-1, indexService.getRefreshTaskInterval().millis()); + + client(clusterManagerName).admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(INDEX_NAME).settings( + Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "100s") + ) + ) + .actionGet(); + assertEquals(100, indexService.getRefreshTaskInterval().seconds()); + + client(clusterManagerName).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomFrom("-1", "-1ms"))) + .get(); + + client(clusterManagerName).admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(INDEX_NAME).settings( + Settings.builder().putNull(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey()) + ) + ) + .actionGet(); + assertEquals(-1, indexService.getRefreshTaskInterval().millis()); + } + + private IndexService getIndexServiceFromRandomDataNode(String indexName) throws ExecutionException, InterruptedException { + String clusterManagerName = internalCluster().getClusterManagerName(); + List dataNodes = new ArrayList<>(internalCluster().getDataNodeNames()); + GetIndexResponse getIndexResponse = client(clusterManagerName).admin().indices().getIndex(new GetIndexRequest()).get(); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, randomFrom(dataNodes)); + String uuid = getIndexResponse.getSettings().get(indexName).get(IndexMetadata.SETTING_INDEX_UUID); + return indicesService.indexService(new Index(indexName, uuid)); + } + protected TimeValue getDefaultRefreshInterval() { return IndexSettings.DEFAULT_REFRESH_INTERVAL; } diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java index 0eba4d241f0fd..b31985a260361 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java @@ -1636,10 +1636,15 @@ public static void validateTranslogRetentionSettings(Settings indexSettings) { * @param clusterSettings cluster setting */ public static void validateRefreshIntervalSettings(Settings requestSettings, ClusterSettings clusterSettings) { - if (IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.exists(requestSettings) == false) { + if (IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.exists(requestSettings) == false + || requestSettings.get(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey()) == null) { return; } TimeValue requestRefreshInterval = IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.get(requestSettings); + // If the refresh interval supplied is -1, we allow the index to be created because -1 means no periodic refresh. + if (requestRefreshInterval.millis() == -1) { + return; + } TimeValue clusterMinimumRefreshInterval = clusterSettings.get(IndicesService.CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING); if (requestRefreshInterval.millis() < clusterMinimumRefreshInterval.millis()) { throw new IllegalArgumentException( diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 0187a9fb3b8ba..fd7d897a0e99c 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -273,8 +273,8 @@ public class IndicesService extends AbstractLifecycleComponent */ public static final Setting CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING = Setting.timeSetting( "cluster.minimum.index.refresh_interval", - IndexSettings.MINIMUM_REFRESH_INTERVAL, - IndexSettings.MINIMUM_REFRESH_INTERVAL, + TimeValue.ZERO, + TimeValue.ZERO, new ClusterMinimumRefreshIntervalValidator(), Property.NodeScope, Property.Dynamic @@ -2009,6 +2009,9 @@ public Iterator> settings() { * @param defaultRefreshInterval value of cluster default index refresh interval setting */ private static void validateRefreshIntervalSettings(TimeValue minimumRefreshInterval, TimeValue defaultRefreshInterval) { + if (defaultRefreshInterval.millis() < 0) { + return; + } if (minimumRefreshInterval.compareTo(defaultRefreshInterval) > 0) { throw new IllegalArgumentException( "cluster minimum index refresh interval [" From 5bf522cce3dded6ecf6167ce53f1f56e3771edd5 Mon Sep 17 00:00:00 2001 From: Lakshya Taragi <157457166+ltaragi@users.noreply.github.com> Date: Thu, 25 Apr 2024 16:21:16 +0530 Subject: [PATCH 26/32] Take shallow copy snapshot when remote store migration is in progress (#13309) Signed-off-by: Lakshya Taragi --- .../RemoteStoreMigrationSettingsUpdateIT.java | 81 ++-------------- ...eMigrationShardAllocationBaseTestCase.java | 96 +++++++++++++++++++ .../RemoteStoreMigrationTestCase.java | 51 ++++++++++ .../snapshots/SnapshotsService.java | 9 ++ 4 files changed, 165 insertions(+), 72 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java index c3720e6fbbd09..b71f7d7cf7e4a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java @@ -8,27 +8,15 @@ package org.opensearch.remotemigration; -import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; import org.opensearch.client.Client; -import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; import org.opensearch.common.settings.SettingsException; -import org.opensearch.core.rest.RestStatus; -import org.opensearch.index.IndexSettings; -import org.opensearch.indices.replication.common.ReplicationType; -import org.opensearch.snapshots.SnapshotInfo; -import org.opensearch.snapshots.SnapshotState; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import java.nio.file.Path; import java.util.Optional; -import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; -import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; -import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY; -import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; -import static org.opensearch.index.IndexSettings.INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.STRICT; import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; @@ -92,13 +80,7 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix assertNodeInCluster(remoteNodeName); logger.info("Create a non remote-backed index"); - client.admin() - .indices() - .prepareCreate(TEST_INDEX) - .setSettings( - Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() - ) - .get(); + createIndex(TEST_INDEX, 0); logger.info("Verify that non remote stored backed index is created"); assertNonRemoteStoreBackedIndex(TEST_INDEX); @@ -115,21 +97,12 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix logger.info("Create snapshot of non remote stored backed index"); - SnapshotInfo snapshotInfo = client().admin() - .cluster() - .prepareCreateSnapshot(snapshotRepoName, snapshotName) - .setIndices(TEST_INDEX) - .setWaitForCompletion(true) - .get() - .getSnapshotInfo(); - - assertEquals(SnapshotState.SUCCESS, snapshotInfo.state()); - assertTrue(snapshotInfo.successfulShards() > 0); - assertEquals(0, snapshotInfo.failedShards()); + createSnapshot(snapshotRepoName, snapshotName, TEST_INDEX); logger.info("Restore index from snapshot under NONE direction"); String restoredIndexName1 = TEST_INDEX + "-restored1"; restoreSnapshot(snapshotRepoName, snapshotName, restoredIndexName1); + ensureGreen(restoredIndexName1); logger.info("Verify that restored index is non remote-backed"); assertNonRemoteStoreBackedIndex(restoredIndexName1); @@ -138,6 +111,7 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix setDirection(REMOTE_STORE.direction); String restoredIndexName2 = TEST_INDEX + "-restored2"; restoreSnapshot(snapshotRepoName, snapshotName, restoredIndexName2); + ensureGreen(restoredIndexName2); logger.info("Verify that restored index is non remote-backed"); assertRemoteStoreBackedIndex(restoredIndexName2); @@ -146,10 +120,10 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix // compatibility mode setting test public void testSwitchToStrictMode() throws Exception { - logger.info(" --> initialize cluster"); + logger.info("Initialize cluster"); initializeCluster(false); - logger.info(" --> create a mixed mode cluster"); + logger.info("Create a mixed mode cluster"); setClusterMode(MIXED.mode); addRemote = true; String remoteNodeName = internalCluster().startNode(); @@ -159,58 +133,21 @@ public void testSwitchToStrictMode() throws Exception { assertNodeInCluster(remoteNodeName); assertNodeInCluster(nonRemoteNodeName); - logger.info(" --> attempt switching to strict mode"); + logger.info("Attempt switching to strict mode"); SettingsException exception = assertThrows(SettingsException.class, () -> setClusterMode(STRICT.mode)); assertEquals( "can not switch to STRICT compatibility mode when the cluster contains both remote and non-remote nodes", exception.getMessage() ); - logger.info(" --> stop remote node so that cluster had only non-remote nodes"); + logger.info("Stop remote node so that cluster had only non-remote nodes"); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName)); ensureStableCluster(2); - logger.info(" --> attempt switching to strict mode"); + logger.info("Attempt switching to strict mode"); setClusterMode(STRICT.mode); } - // restore indices from a snapshot - private void restoreSnapshot(String snapshotRepoName, String snapshotName, String restoredIndexName) { - RestoreSnapshotResponse restoreSnapshotResponse = client.admin() - .cluster() - .prepareRestoreSnapshot(snapshotRepoName, snapshotName) - .setWaitForCompletion(false) - .setIndices(TEST_INDEX) - .setRenamePattern(TEST_INDEX) - .setRenameReplacement(restoredIndexName) - .get(); - - assertEquals(restoreSnapshotResponse.status(), RestStatus.ACCEPTED); - ensureGreen(restoredIndexName); - } - - // verify that the created index is not remote store backed - private void assertNonRemoteStoreBackedIndex(String indexName) { - Settings indexSettings = client.admin().indices().prepareGetIndex().execute().actionGet().getSettings().get(indexName); - assertEquals(ReplicationType.DOCUMENT.toString(), indexSettings.get(SETTING_REPLICATION_TYPE)); - assertNull(indexSettings.get(SETTING_REMOTE_STORE_ENABLED)); - assertNull(indexSettings.get(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY)); - assertNull(indexSettings.get(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY)); - } - - // verify that the created index is remote store backed - private void assertRemoteStoreBackedIndex(String indexName) { - Settings indexSettings = client.admin().indices().prepareGetIndex().execute().actionGet().getSettings().get(indexName); - assertEquals(ReplicationType.SEGMENT.toString(), indexSettings.get(SETTING_REPLICATION_TYPE)); - assertEquals("true", indexSettings.get(SETTING_REMOTE_STORE_ENABLED)); - assertEquals(REPOSITORY_NAME, indexSettings.get(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY)); - assertEquals(REPOSITORY_2_NAME, indexSettings.get(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY)); - assertEquals( - IndexSettings.DEFAULT_REMOTE_TRANSLOG_BUFFER_INTERVAL, - INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(indexSettings) - ); - } - // bootstrap a cluster private void initializeCluster(boolean remoteClusterManager) { addRemote = remoteClusterManager; diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java index ad2302d1ab2e1..ffcab9483485d 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java @@ -9,15 +9,27 @@ package org.opensearch.remotemigration; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; +import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.settings.Settings; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.index.IndexSettings; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.snapshots.SnapshotInfo; +import org.opensearch.snapshots.SnapshotState; import java.util.Map; import java.util.Optional; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.index.IndexSettings.INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @@ -98,4 +110,88 @@ protected ShardRouting getShardRouting(boolean isPrimary) { return (isPrimary ? table.primaryShard() : table.replicaShards().get(0)); } + // create a snapshot + public static SnapshotInfo createSnapshot(String snapshotRepoName, String snapshotName, String... indices) { + SnapshotInfo snapshotInfo = internalCluster().client() + .admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName) + .setIndices(indices) + .setWaitForCompletion(true) + .get() + .getSnapshotInfo(); + + assertEquals(SnapshotState.SUCCESS, snapshotInfo.state()); + assertTrue(snapshotInfo.successfulShards() > 0); + assertEquals(0, snapshotInfo.failedShards()); + return snapshotInfo; + } + + // create new index + public static void createIndex(String indexName, int replicaCount) { + assertAcked( + internalCluster().client() + .admin() + .indices() + .prepareCreate(indexName) + .setSettings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, replicaCount) + .build() + ) + .get() + ); + } + + // restore indices from a snapshot + public static RestoreSnapshotResponse restoreSnapshot(String snapshotRepoName, String snapshotName, String restoredIndexName) { + RestoreSnapshotResponse restoreSnapshotResponse = internalCluster().client() + .admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName) + .setWaitForCompletion(false) + .setIndices(TEST_INDEX) + .setRenamePattern(TEST_INDEX) + .setRenameReplacement(restoredIndexName) + .get(); + assertEquals(restoreSnapshotResponse.status(), RestStatus.ACCEPTED); + return restoreSnapshotResponse; + } + + // verify that the created index is not remote store backed + public static void assertNonRemoteStoreBackedIndex(String indexName) { + Settings indexSettings = internalCluster().client() + .admin() + .indices() + .prepareGetIndex() + .execute() + .actionGet() + .getSettings() + .get(indexName); + assertEquals(ReplicationType.DOCUMENT.toString(), indexSettings.get(SETTING_REPLICATION_TYPE)); + assertNull(indexSettings.get(SETTING_REMOTE_STORE_ENABLED)); + assertNull(indexSettings.get(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY)); + assertNull(indexSettings.get(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY)); + } + + // verify that the created index is remote store backed + public static void assertRemoteStoreBackedIndex(String indexName) { + Settings indexSettings = internalCluster().client() + .admin() + .indices() + .prepareGetIndex() + .execute() + .actionGet() + .getSettings() + .get(indexName); + assertEquals(ReplicationType.SEGMENT.toString(), indexSettings.get(SETTING_REPLICATION_TYPE)); + assertEquals("true", indexSettings.get(SETTING_REMOTE_STORE_ENABLED)); + assertEquals(REPOSITORY_NAME, indexSettings.get(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY)); + assertEquals(REPOSITORY_2_NAME, indexSettings.get(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY)); + assertEquals( + IndexSettings.DEFAULT_REMOTE_TRANSLOG_BUFFER_INTERVAL, + INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(indexSettings) + ); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java index 640b83f194c1c..7d816a5e18698 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java @@ -13,8 +13,11 @@ import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.client.Client; import org.opensearch.common.settings.Settings; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.snapshots.SnapshotInfo; import org.opensearch.test.OpenSearchIntegTestCase; +import java.nio.file.Path; import java.util.List; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; @@ -70,4 +73,52 @@ public void testMigrationDirections() { updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), "random")); assertThrows(IllegalArgumentException.class, () -> client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); } + + public void testNoShallowSnapshotInMixedMode() throws Exception { + logger.info("Initialize remote cluster"); + addRemote = true; + internalCluster().setBootstrapClusterManagerNodeIndex(0); + List cmNodes = internalCluster().startNodes(1); + Client client = internalCluster().client(cmNodes.get(0)); + + logger.info("Add remote node"); + internalCluster().startNode(); + internalCluster().validateClusterFormed(); + + logger.info("Create remote backed index"); + RemoteStoreMigrationShardAllocationBaseTestCase.createIndex("test", 0); + RemoteStoreMigrationShardAllocationBaseTestCase.assertRemoteStoreBackedIndex("test"); + + logger.info("Create shallow snapshot setting enabled repo"); + String shallowSnapshotRepoName = "shallow-snapshot-repo-name"; + Path shallowSnapshotRepoPath = randomRepoPath(); + assertAcked( + clusterAdmin().preparePutRepository(shallowSnapshotRepoName) + .setType("fs") + .setSettings( + Settings.builder() + .put("location", shallowSnapshotRepoPath) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), Boolean.TRUE) + ) + ); + + logger.info("Verify shallow snapshot creation"); + final String snapshot1 = "snapshot1"; + SnapshotInfo snapshotInfo1 = RemoteStoreMigrationShardAllocationBaseTestCase.createSnapshot( + shallowSnapshotRepoName, + snapshot1, + "test" + ); + assertEquals(snapshotInfo1.isRemoteStoreIndexShallowCopyEnabled(), true); + + logger.info("Set MIXED compatibility mode"); + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings(Settings.builder().put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed")); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + logger.info("Verify that new snapshot is not shallow"); + final String snapshot2 = "snapshot2"; + SnapshotInfo snapshotInfo2 = RemoteStoreMigrationShardAllocationBaseTestCase.createSnapshot(shallowSnapshotRepoName, snapshot2); + assertEquals(snapshotInfo2.isRemoteStoreIndexShallowCopyEnabled(), false); + } } diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java index 71918bc73b55a..22b640963e896 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java @@ -131,6 +131,8 @@ import static java.util.Collections.emptySet; import static java.util.Collections.unmodifiableList; import static org.opensearch.cluster.SnapshotsInProgress.completed; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.repositories.blobstore.BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY; import static org.opensearch.snapshots.SnapshotUtils.validateSnapshotsBackingAnyIndex; @@ -343,6 +345,13 @@ public ClusterState execute(ClusterState currentState) { } boolean remoteStoreIndexShallowCopy = REMOTE_STORE_INDEX_SHALLOW_COPY.get(repository.getMetadata().settings()); + logger.debug("remote_store_index_shallow_copy setting is set as [{}]", remoteStoreIndexShallowCopy); + if (remoteStoreIndexShallowCopy + && clusterService.getClusterSettings().get(REMOTE_STORE_COMPATIBILITY_MODE_SETTING).equals(CompatibilityMode.MIXED)) { + // don't allow shallow snapshots if compatibility mode is not strict + logger.warn("Shallow snapshots are not supported during migration. Falling back to full snapshot."); + remoteStoreIndexShallowCopy = false; + } newEntry = SnapshotsInProgress.startedEntry( new Snapshot(repositoryName, snapshotId), request.includeGlobalState(), From 18aa5ad084b3ba0bc0603a84988699e71c461ae8 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Thu, 25 Apr 2024 14:05:51 -0400 Subject: [PATCH 27/32] OpenJDK Update (April 2024 Patch releases) (#13389) Signed-off-by: Andriy Redko --- CHANGELOG.md | 1 + .../java/org/opensearch/gradle/test/DistroTestPlugin.java | 4 ++-- buildSrc/version.properties | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2fbf895dafcf..f6aae22496ab8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.11 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329)) - Bump `jakarta.enterprise:jakarta.enterprise.cdi-api` from 4.0.1 to 4.1.0 ([#13328](https://github.com/opensearch-project/OpenSearch/pull/13328)) - Bump `com.google.api.grpc:proto-google-iam-v1` from 0.12.0 to 1.33.0 ([#13332](https://github.com/opensearch-project/OpenSearch/pull/13332)) +- OpenJDK Update (April 2024 Patch releases), update to Eclipse Temurin 21.0.3+9 ([#13389](https://github.com/opensearch-project/OpenSearch/pull/13389)) ### Changed - [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) diff --git a/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java index bc44f81a81aff..b2b3e3003e572 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java @@ -77,9 +77,9 @@ import java.util.stream.Stream; public class DistroTestPlugin implements Plugin { - private static final String SYSTEM_JDK_VERSION = "21.0.2+13"; + private static final String SYSTEM_JDK_VERSION = "21.0.3+9"; private static final String SYSTEM_JDK_VENDOR = "adoptium"; - private static final String GRADLE_JDK_VERSION = "21.0.2+13"; + private static final String GRADLE_JDK_VERSION = "21.0.3+9"; private static final String GRADLE_JDK_VENDOR = "adoptium"; // all distributions used by distro tests. this is temporary until tests are per distribution diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 6c6138ac9b7f6..d0aaea546803b 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -2,7 +2,7 @@ opensearch = 3.0.0 lucene = 9.11.0-snapshot-fb97840 bundled_jdk_vendor = adoptium -bundled_jdk = 21.0.2+13 +bundled_jdk = 21.0.3+9 # optional dependencies spatial4j = 0.7 From 28a8fc51b48c1fbc838e48687843b051de5075ee Mon Sep 17 00:00:00 2001 From: rajiv-kv <157019998+rajiv-kv@users.noreply.github.com> Date: Fri, 26 Apr 2024 00:21:16 +0530 Subject: [PATCH 28/32] flaky test fix - assert cluster stats on isolated node as stats is local to the node (#13383) Signed-off-by: Rajiv Kumar Vaidyanathan --- .../discovery/ClusterManagerDisruptionIT.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java index 79f6ba6dfa642..65ca8eab278ef 100644 --- a/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java @@ -198,11 +198,15 @@ public void testIsolateClusterManagerAndVerifyClusterStateConsensus() throws Exc + nodeState ); } - } - ClusterStateStats clusterStateStats = internalCluster().clusterService().getClusterManagerService().getClusterStateStats(); - assertTrue(clusterStateStats.getUpdateFailed() > 0); + }); + + ClusterStateStats clusterStateStats = internalCluster().clusterService(isolatedNode) + .getClusterManagerService() + .getClusterStateStats(); + assertTrue(clusterStateStats.getUpdateFailed() > 0); + } /** From db694a9a486ba6d8f39bf11136da0c9220edaf0d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Apr 2024 12:31:16 -0700 Subject: [PATCH 29/32] Bump com.squareup.okio:okio from 3.8.0 to 3.9.0 in /test/fixtures/hdfs-fixture (#12997) * Bump com.squareup.okio:okio in /test/fixtures/hdfs-fixture Bumps [com.squareup.okio:okio](https://github.com/square/okio) from 3.8.0 to 3.9.0. - [Changelog](https://github.com/square/okio/blob/master/CHANGELOG.md) - [Commits](https://github.com/square/okio/compare/parent-3.8.0...parent-3.9.0) --- updated-dependencies: - dependency-name: com.squareup.okio:okio dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + test/fixtures/hdfs-fixture/build.gradle | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6aae22496ab8..cc493a4f4618c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `jakarta.enterprise:jakarta.enterprise.cdi-api` from 4.0.1 to 4.1.0 ([#13328](https://github.com/opensearch-project/OpenSearch/pull/13328)) - Bump `com.google.api.grpc:proto-google-iam-v1` from 0.12.0 to 1.33.0 ([#13332](https://github.com/opensearch-project/OpenSearch/pull/13332)) - OpenJDK Update (April 2024 Patch releases), update to Eclipse Temurin 21.0.3+9 ([#13389](https://github.com/opensearch-project/OpenSearch/pull/13389)) +- Bump `com.squareup.okio:okio` from 3.8.0 to 3.9.0 ([#12997](https://github.com/opensearch-project/OpenSearch/pull/12997)) ### Changed - [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872)) diff --git a/test/fixtures/hdfs-fixture/build.gradle b/test/fixtures/hdfs-fixture/build.gradle index a6275f200217a..3eb026b19e20c 100644 --- a/test/fixtures/hdfs-fixture/build.gradle +++ b/test/fixtures/hdfs-fixture/build.gradle @@ -78,6 +78,6 @@ dependencies { runtimeOnly("com.squareup.okhttp3:okhttp:4.12.0") { exclude group: "com.squareup.okio" } - runtimeOnly "com.squareup.okio:okio:3.8.0" + runtimeOnly "com.squareup.okio:okio:3.9.0" runtimeOnly "org.xerial.snappy:snappy-java:1.1.10.5" } From a69cd0864a673579a9cb2190da765bc1ebfd0a22 Mon Sep 17 00:00:00 2001 From: Gaurav Chandani Date: Fri, 26 Apr 2024 08:46:21 +0530 Subject: [PATCH 30/32] Async Batch shards changes for GatewayAllocator (#8746) Changes for create/update/delete batches for batch mode for async fetch for both primary & replica. It also added the node scope setting to enable/ disable batch mode. Signed-off-by: Gaurav Chandani Signed-off-by: Shivansh Arora Signed-off-by: Aman Khare --- CHANGELOG.md | 1 + .../gateway/RecoveryFromGatewayIT.java | 331 ++++++++ .../org/opensearch/cluster/ClusterModule.java | 14 +- .../cluster/routing/RoutingNode.java | 4 + .../routing/allocation/AllocationService.java | 39 + .../allocation/ExistingShardsAllocator.java | 40 +- .../common/settings/ClusterSettings.java | 4 + .../gateway/BaseGatewayShardAllocator.java | 2 + .../gateway/ReplicaShardBatchAllocator.java | 2 +- .../gateway/ShardsBatchGatewayAllocator.java | 721 ++++++++++++++++++ ...sportNodesListShardStoreMetadataBatch.java | 19 +- .../main/java/org/opensearch/node/Node.java | 10 +- .../cluster/ClusterModuleTests.java | 11 +- .../gateway/GatewayAllocatorTests.java | 360 +++++++++ .../gateway/ShardBatchCacheTests.java | 13 +- .../test/gateway/TestGatewayAllocator.java | 1 + .../TestShardBatchGatewayAllocator.java | 144 ++++ 17 files changed, 1696 insertions(+), 20 deletions(-) create mode 100644 server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java create mode 100644 server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java create mode 100644 test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java diff --git a/CHANGELOG.md b/CHANGELOG.md index cc493a4f4618c..7adf9a752c846 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Streaming Indexing] Ensure support of the new transport by security plugin ([#13174](https://github.com/opensearch-project/OpenSearch/pull/13174)) - Add cluster setting to dynamically configure the buckets for filter rewrite optimization. ([#13179](https://github.com/opensearch-project/OpenSearch/pull/13179)) - [Remote Store] Add capability of doing refresh as determined by the translog ([#12992](https://github.com/opensearch-project/OpenSearch/pull/12992)) +- Batch mode for async fetching shard information in GatewayAllocator for unassigned shards ([#8746](https://github.com/opensearch-project/OpenSearch/pull/8746)) ### Dependencies - Bump `org.apache.commons:commons-configuration2` from 2.10.0 to 2.10.1 ([#12896](https://github.com/opensearch-project/OpenSearch/pull/12896)) diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java index ba03532a9aa2f..bc0557ddc2afa 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java @@ -32,24 +32,31 @@ package org.opensearch.gateway; +import org.apache.lucene.index.CorruptIndexException; import org.opensearch.Version; import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.action.admin.cluster.reroute.ClusterRerouteResponse; import org.opensearch.action.admin.cluster.shards.ClusterSearchShardsGroup; import org.opensearch.action.admin.cluster.shards.ClusterSearchShardsResponse; +import org.opensearch.action.admin.indices.exists.indices.IndicesExistsResponse; import org.opensearch.action.admin.indices.recovery.RecoveryResponse; import org.opensearch.action.admin.indices.stats.IndexStats; +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; import org.opensearch.action.admin.indices.stats.ShardStats; import org.opensearch.action.support.ActionTestUtils; +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.client.Requests; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.ElectionSchedulerFactory; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.cluster.routing.allocation.ExistingShardsAllocator; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.XContentFactory; @@ -62,6 +69,7 @@ import org.opensearch.index.MergePolicyProvider; import org.opensearch.index.engine.Engine; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardPath; import org.opensearch.indices.IndicesService; import org.opensearch.indices.recovery.RecoveryState; @@ -71,6 +79,7 @@ import org.opensearch.indices.store.TransportNodesListShardStoreMetadataHelper; import org.opensearch.plugins.Plugin; import org.opensearch.test.InternalSettingsPlugin; +import org.opensearch.test.InternalTestCluster; import org.opensearch.test.InternalTestCluster.RestartCallback; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; @@ -94,6 +103,8 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; import static org.opensearch.cluster.coordination.ClusterBootstrapService.INITIAL_CLUSTER_MANAGER_NODES_SETTING; +import static org.opensearch.cluster.health.ClusterHealthStatus.GREEN; +import static org.opensearch.cluster.health.ClusterHealthStatus.RED; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; @@ -750,6 +761,276 @@ public void testMessyElectionsStillMakeClusterGoGreen() throws Exception { ensureGreen("test"); } + public void testBatchModeEnabled() throws Exception { + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true).build() + ); + List dataOnlyNodes = internalCluster().startDataOnlyNodes(2); + createIndex( + "test", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build() + ); + ensureGreen("test"); + Settings node0DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(0)); + Settings node1DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(1)); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(0))); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(1))); + ensureRed("test"); + ensureStableCluster(1); + + logger.info("--> Now do a protective reroute"); + ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + ShardsBatchGatewayAllocator gatewayAllocator = internalCluster().getInstance( + ShardsBatchGatewayAllocator.class, + internalCluster().getClusterManagerName() + ); + assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); + assertEquals(1, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(1, gatewayAllocator.getNumberOfStoreShardBatches()); + + // Now start both data nodes and ensure batch mode is working + logger.info("--> restarting the stopped nodes"); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(0)).put(node0DataPathSettings).build()); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(1)).put(node1DataPathSettings).build()); + ensureStableCluster(3); + ensureGreen("test"); + assertEquals(0, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfInFlightFetches()); + } + + public void testBatchModeDisabled() throws Exception { + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), false).build() + ); + List dataOnlyNodes = internalCluster().startDataOnlyNodes(2); + createIndex( + "test", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build() + ); + + ensureGreen("test"); + Settings node0DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(0)); + Settings node1DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(1)); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(0))); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(1))); + ensureStableCluster(1); + + logger.info("--> Now do a protective reroute"); + ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + ShardsBatchGatewayAllocator gatewayAllocator = internalCluster().getInstance( + ShardsBatchGatewayAllocator.class, + internalCluster().getClusterManagerName() + ); + ensureRed("test"); + + assertFalse(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); + + // assert no batches created + assertEquals(0, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); + + logger.info("--> restarting the stopped nodes"); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(0)).put(node0DataPathSettings).build()); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(1)).put(node1DataPathSettings).build()); + ensureStableCluster(3); + ensureGreen("test"); + } + + public void testNBatchesCreationAndAssignment() throws Exception { + // we will reduce batch size to 5 to make sure we have enough batches to test assignment + // Total number of primary shards = 50 (50 indices*1) + // Total number of replica shards = 50 (50 indices*1) + // Total batches creation for primaries and replicas will be 10 each + + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true).build() + ); + List dataOnlyNodes = internalCluster().startDataOnlyNodes(2); + createNIndices(50, "test"); + ensureStableCluster(3); + IndicesStatsResponse indicesStats = dataNodeClient().admin().indices().prepareStats().get(); + assertThat(indicesStats.getSuccessfulShards(), equalTo(100)); + ClusterHealthResponse health = client().admin() + .cluster() + .health(Requests.clusterHealthRequest().waitForGreenStatus().timeout("1m")) + .actionGet(); + assertFalse(health.isTimedOut()); + assertEquals(GREEN, health.getStatus()); + + String clusterManagerName = internalCluster().getClusterManagerName(); + Settings clusterManagerDataPathSettings = internalCluster().dataPathSettings(clusterManagerName); + Settings node0DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(0)); + Settings node1DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(1)); + + internalCluster().stopCurrentClusterManagerNode(); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(0))); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(1))); + + // Now start cluster manager node and post that verify batches created + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder() + .put("node.name", clusterManagerName) + .put(clusterManagerDataPathSettings) + .put(ShardsBatchGatewayAllocator.GATEWAY_ALLOCATOR_BATCH_SIZE.getKey(), 5) + .put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true) + .build() + ); + ensureStableCluster(1); + + logger.info("--> Now do a protective reroute"); // to avoid any race condition in test + ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + ShardsBatchGatewayAllocator gatewayAllocator = internalCluster().getInstance( + ShardsBatchGatewayAllocator.class, + internalCluster().getClusterManagerName() + ); + assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); + assertEquals(10, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(10, gatewayAllocator.getNumberOfStoreShardBatches()); + health = client(internalCluster().getClusterManagerName()).admin().cluster().health(Requests.clusterHealthRequest()).actionGet(); + assertFalse(health.isTimedOut()); + assertEquals(RED, health.getStatus()); + assertEquals(100, health.getUnassignedShards()); + assertEquals(0, health.getInitializingShards()); + assertEquals(0, health.getActiveShards()); + assertEquals(0, health.getRelocatingShards()); + assertEquals(0, health.getNumberOfDataNodes()); + + // Now start both data nodes and ensure batch mode is working + logger.info("--> restarting the stopped nodes"); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(0)).put(node0DataPathSettings).build()); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(1)).put(node1DataPathSettings).build()); + ensureStableCluster(3); + + // wait for cluster to turn green + health = client().admin().cluster().health(Requests.clusterHealthRequest().waitForGreenStatus().timeout("5m")).actionGet(); + assertFalse(health.isTimedOut()); + assertEquals(GREEN, health.getStatus()); + assertEquals(0, health.getUnassignedShards()); + assertEquals(0, health.getInitializingShards()); + assertEquals(100, health.getActiveShards()); + assertEquals(0, health.getRelocatingShards()); + assertEquals(2, health.getNumberOfDataNodes()); + assertEquals(0, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); + } + + public void testCulpritShardInBatch() throws Exception { + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true).build() + ); + List dataOnlyNodes = internalCluster().startDataOnlyNodes(3); + createNIndices(4, "test"); + ensureStableCluster(4); + ClusterHealthResponse health = client().admin() + .cluster() + .health(Requests.clusterHealthRequest().waitForGreenStatus().timeout("5m")) + .actionGet(); + assertFalse(health.isTimedOut()); + assertEquals(GREEN, health.getStatus()); + assertEquals(8, health.getActiveShards()); + + String culpritShardIndexName = "test0"; + Index idx = resolveIndex(culpritShardIndexName); + for (String node : internalCluster().nodesInclude(culpritShardIndexName)) { + IndicesService indexServices = internalCluster().getInstance(IndicesService.class, node); + IndexService indexShards = indexServices.indexServiceSafe(idx); + Integer shardId = 0; + IndexShard shard = indexShards.getShard(0); + logger.debug("--> failing shard [{}] on node [{}]", shardId, node); + shard.failShard("test", new CorruptIndexException("test corrupted", "")); + logger.debug("--> failed shard [{}] on node [{}]", shardId, node); + } + + String clusterManagerName = internalCluster().getClusterManagerName(); + Settings clusterManagerDataPathSettings = internalCluster().dataPathSettings(clusterManagerName); + Settings node0DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(0)); + Settings node1DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(1)); + Settings node2DataPathSettings = internalCluster().dataPathSettings(dataOnlyNodes.get(2)); + + internalCluster().stopCurrentClusterManagerNode(); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(0))); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(1))); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataOnlyNodes.get(2))); + + // Now start cluster manager node and post that verify batches created + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder() + .put("node.name", clusterManagerName) + .put(clusterManagerDataPathSettings) + .put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true) + .build() + ); + ensureStableCluster(1); + + logger.info("--> Now do a protective reroute"); // to avoid any race condition in test + ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + ShardsBatchGatewayAllocator gatewayAllocator = internalCluster().getInstance( + ShardsBatchGatewayAllocator.class, + internalCluster().getClusterManagerName() + ); + assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); + assertEquals(1, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(1, gatewayAllocator.getNumberOfStoreShardBatches()); + assertTrue(clusterRerouteResponse.isAcknowledged()); + health = client(internalCluster().getClusterManagerName()).admin().cluster().health(Requests.clusterHealthRequest()).actionGet(); + assertFalse(health.isTimedOut()); + assertEquals(RED, health.getStatus()); + assertEquals(8, health.getUnassignedShards()); + assertEquals(0, health.getInitializingShards()); + assertEquals(0, health.getActiveShards()); + assertEquals(0, health.getRelocatingShards()); + assertEquals(0, health.getNumberOfDataNodes()); + + logger.info("--> restarting the stopped nodes"); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(0)).put(node0DataPathSettings).build()); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(1)).put(node1DataPathSettings).build()); + internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(2)).put(node2DataPathSettings).build()); + ensureStableCluster(4); + + health = client().admin().cluster().health(Requests.clusterHealthRequest().waitForGreenStatus().timeout("1m")).actionGet(); + + assertEquals(RED, health.getStatus()); + assertTrue(health.isTimedOut()); + assertEquals(0, health.getNumberOfPendingTasks()); + assertEquals(0, health.getNumberOfInFlightFetch()); + assertEquals(6, health.getActiveShards()); + assertEquals(2, health.getUnassignedShards()); + assertEquals(0, health.getInitializingShards()); + assertEquals(0, health.getRelocatingShards()); + assertEquals(3, health.getNumberOfDataNodes()); + } + + private void createNIndices(int n, String prefix) { + + for (int i = 0; i < n; i++) { + createIndex( + prefix + i, + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build() + ); + // index doc2 + client().prepareIndex(prefix + i).setId("1").setSource("foo", "bar").get(); + + // index doc 2 + client().prepareIndex(prefix + i).setId("2").setSource("foo2", "bar2").get(); + ensureGreen(prefix + i); + } + } + public void testSingleShardFetchUsingBatchAction() { String indexName = "test"; int numOfShards = 1; @@ -909,6 +1190,56 @@ public void testShardStoreFetchCorruptedIndexUsingBatchAction() throws Exception assertNodeStoreFilesMetadataSuccessCase(nodeStoreFilesMetadata.get(shardId2), shardId2); } + public void testDeleteRedIndexInBatchMode() throws Exception { + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true).build() + ); + List dataOnlyNodes = internalCluster().startDataOnlyNodes( + 2, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true).build() + ); + createIndex( + "test", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + createIndex( + "test1", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + createIndex( + "test2", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + createIndex( + "testg", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build() + ); + + ensureGreen("test", "test1", "test2", "testg"); + internalCluster().stopRandomDataNode(); + ensureStableCluster(2); + + ShardsBatchGatewayAllocator gatewayAllocator = internalCluster().getInstance( + ShardsBatchGatewayAllocator.class, + internalCluster().getClusterManagerName() + ); + ensureRed("test", "test1", "test2"); + + assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); + + logger.info("--> Now do a reroute so batches are created"); // to avoid any race condition in test + ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + AcknowledgedResponse deleteIndexResponse = client().admin().indices().prepareDelete("test").get(); + assertTrue(deleteIndexResponse.isAcknowledged()); + + ensureYellow("testg"); + IndicesExistsResponse indexExistResponse = client().admin().indices().prepareExists("test").get(); + assertFalse(indexExistResponse.isExists()); + } + private void prepareIndices(String[] indices, int numberOfPrimaryShards, int numberOfReplicaShards) { for (String index : indices) { createIndex( diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index b846d382db89d..aa9101090b6d5 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -93,6 +93,7 @@ import org.opensearch.core.common.io.stream.Writeable.Reader; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.GatewayAllocator; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; import org.opensearch.ingest.IngestMetadata; import org.opensearch.persistent.PersistentTasksCustomMetadata; import org.opensearch.persistent.PersistentTasksNodeService; @@ -153,7 +154,13 @@ public ClusterModule( this.shardsAllocator = createShardsAllocator(settings, clusterService.getClusterSettings(), clusterPlugins); this.clusterService = clusterService; this.indexNameExpressionResolver = new IndexNameExpressionResolver(threadContext); - this.allocationService = new AllocationService(allocationDeciders, shardsAllocator, clusterInfoService, snapshotsInfoService); + this.allocationService = new AllocationService( + allocationDeciders, + shardsAllocator, + clusterInfoService, + snapshotsInfoService, + settings + ); } public static List getNamedWriteables() { @@ -423,6 +430,7 @@ public AllocationService getAllocationService() { @Override protected void configure() { bind(GatewayAllocator.class).asEagerSingleton(); + bind(ShardsBatchGatewayAllocator.class).asEagerSingleton(); bind(AllocationService.class).toInstance(allocationService); bind(ClusterService.class).toInstance(clusterService); bind(NodeConnectionsService.class).asEagerSingleton(); @@ -442,10 +450,10 @@ protected void configure() { bind(ShardsAllocator.class).toInstance(shardsAllocator); } - public void setExistingShardsAllocators(GatewayAllocator gatewayAllocator) { + public void setExistingShardsAllocators(GatewayAllocator gatewayAllocator, ShardsBatchGatewayAllocator shardsBatchGatewayAllocator) { final Map existingShardsAllocators = new HashMap<>(); existingShardsAllocators.put(GatewayAllocator.ALLOCATOR_NAME, gatewayAllocator); - + existingShardsAllocators.put(ShardsBatchGatewayAllocator.ALLOCATOR_NAME, shardsBatchGatewayAllocator); for (ClusterPlugin clusterPlugin : clusterPlugins) { for (Map.Entry existingShardsAllocatorEntry : clusterPlugin.getExistingShardsAllocators() .entrySet()) { diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingNode.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingNode.java index 15ec41d5c3fbb..24c3077960444 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RoutingNode.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingNode.java @@ -204,6 +204,10 @@ public int size() { return shards.size(); } + public Collection getInitializingShards() { + return initializingShards; + } + /** * Add a new shard to this node * @param shard Shard to create on this Node diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java index a5e2175f05c51..d6b364887b560 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java @@ -35,6 +35,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.Version; import org.opensearch.cluster.ClusterInfoService; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.RestoreInProgress; @@ -54,8 +55,10 @@ import org.opensearch.cluster.routing.allocation.command.AllocationCommands; import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; import org.opensearch.cluster.routing.allocation.decider.Decision; +import org.opensearch.common.settings.Settings; import org.opensearch.gateway.GatewayAllocator; import org.opensearch.gateway.PriorityComparator; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; import org.opensearch.snapshots.SnapshotsInfoService; import java.util.ArrayList; @@ -73,6 +76,7 @@ import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; import static org.opensearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING; +import static org.opensearch.cluster.routing.allocation.ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE; /** * This service manages the node allocation of a cluster. For this reason the @@ -87,6 +91,7 @@ public class AllocationService { private static final Logger logger = LogManager.getLogger(AllocationService.class); private final AllocationDeciders allocationDeciders; + private Settings settings; private Map existingShardsAllocators; private final ShardsAllocator shardsAllocator; private final ClusterInfoService clusterInfoService; @@ -109,11 +114,23 @@ public AllocationService( ShardsAllocator shardsAllocator, ClusterInfoService clusterInfoService, SnapshotsInfoService snapshotsInfoService + ) { + this(allocationDeciders, shardsAllocator, clusterInfoService, snapshotsInfoService, Settings.EMPTY); + } + + public AllocationService( + AllocationDeciders allocationDeciders, + ShardsAllocator shardsAllocator, + ClusterInfoService clusterInfoService, + SnapshotsInfoService snapshotsInfoService, + Settings settings + ) { this.allocationDeciders = allocationDeciders; this.shardsAllocator = shardsAllocator; this.clusterInfoService = clusterInfoService; this.snapshotsInfoService = snapshotsInfoService; + this.settings = settings; } /** @@ -548,6 +565,20 @@ private void allocateExistingUnassignedShards(RoutingAllocation allocation) { existingShardsAllocator.beforeAllocation(allocation); } + /* + Use batch mode if enabled and there is no custom allocator set for Allocation service + */ + Boolean batchModeEnabled = EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(settings); + if (batchModeEnabled && allocation.nodes().getMinNodeVersion().onOrAfter(Version.V_3_0_0) && existingShardsAllocators.size() == 2) { + /* + If we do not have any custom allocator set then we will be using ShardsBatchGatewayAllocator + Currently AllocationService will not run any custom Allocator that implements allocateAllUnassignedShards + */ + allocateAllUnassignedShards(allocation); + return; + } + logger.warn("Falling back to single shard assignment since batch mode disable or multiple custom allocators set"); + final RoutingNodes.UnassignedShards.UnassignedIterator primaryIterator = allocation.routingNodes().unassigned().iterator(); while (primaryIterator.hasNext()) { final ShardRouting shardRouting = primaryIterator.next(); @@ -569,6 +600,14 @@ private void allocateExistingUnassignedShards(RoutingAllocation allocation) { } } + private void allocateAllUnassignedShards(RoutingAllocation allocation) { + ExistingShardsAllocator allocator = existingShardsAllocators.get(ShardsBatchGatewayAllocator.ALLOCATOR_NAME); + allocator.allocateAllUnassignedShards(allocation, true); + allocator.afterPrimariesBeforeReplicas(allocation); + // Replicas Assignment + allocator.allocateAllUnassignedShards(allocation, false); + } + private void disassociateDeadNodes(RoutingAllocation allocation) { for (Iterator it = allocation.routingNodes().mutableIterator(); it.hasNext();) { RoutingNode node = it.next(); diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/ExistingShardsAllocator.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/ExistingShardsAllocator.java index f1889cdf780d4..fb2a37237f8b6 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/ExistingShardsAllocator.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/ExistingShardsAllocator.java @@ -39,12 +39,13 @@ import org.opensearch.common.Nullable; import org.opensearch.common.settings.Setting; import org.opensearch.gateway.GatewayAllocator; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; import java.util.List; /** * Searches for, and allocates, shards for which there is an existing on-disk copy somewhere in the cluster. The default implementation is - * {@link GatewayAllocator}, but plugins can supply their own implementations too. + * {@link GatewayAllocator} and {@link ShardsBatchGatewayAllocator}, but plugins can supply their own implementations too. * * @opensearch.internal */ @@ -60,6 +61,26 @@ public interface ExistingShardsAllocator { Setting.Property.PrivateIndex ); + /** + * Boolean setting to enable/disable batch allocation of unassigned shards already existing on disk. + * This will allow sending all Unassigned Shards to the ExistingShard Allocator to make decision to allocate + * in one or more go. + * + * Enable this setting if your ExistingShardAllocator is implementing the + * {@link ExistingShardsAllocator#allocateAllUnassignedShards(RoutingAllocation, boolean)} method. + * The default implementation of this method is not optimized and assigns shards one by one. + * + * If no plugin overrides {@link ExistingShardsAllocator} then default implementation will be use for it , i.e, + * {@link ShardsBatchGatewayAllocator}. + * + * This setting is experimental at this point. + */ + Setting EXISTING_SHARDS_ALLOCATOR_BATCH_MODE = Setting.boolSetting( + "cluster.allocator.existing_shards_allocator.batch_enabled", + false, + Setting.Property.NodeScope + ); + /** * Called before starting a round of allocation, allowing the allocator to invalidate some caches if appropriate. */ @@ -80,6 +101,23 @@ void allocateUnassigned( UnassignedAllocationHandler unassignedAllocationHandler ); + /** + * Allocate all unassigned shards in the given {@link RoutingAllocation} for which this {@link ExistingShardsAllocator} is responsible. + * Default implementation calls {@link #allocateUnassigned(ShardRouting, RoutingAllocation, UnassignedAllocationHandler)} for each Unassigned shard + * and is kept here for backward compatibility. + * + * Allocation service will currently run the default implementation of it implemented by {@link ShardsBatchGatewayAllocator} + */ + default void allocateAllUnassignedShards(RoutingAllocation allocation, boolean primary) { + RoutingNodes.UnassignedShards.UnassignedIterator iterator = allocation.routingNodes().unassigned().iterator(); + while (iterator.hasNext()) { + ShardRouting shardRouting = iterator.next(); + if (shardRouting.primary() == primary) { + allocateUnassigned(shardRouting, allocation, iterator); + } + } + } + /** * Returns an explanation for a single unassigned shard. */ diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index c70f22be518f2..ded844b3a7f18 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -64,6 +64,7 @@ import org.opensearch.cluster.routing.OperationRouting; import org.opensearch.cluster.routing.allocation.AwarenessReplicaBalance; import org.opensearch.cluster.routing.allocation.DiskThresholdSettings; +import org.opensearch.cluster.routing.allocation.ExistingShardsAllocator; import org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider; @@ -102,6 +103,7 @@ import org.opensearch.gateway.DanglingIndicesState; import org.opensearch.gateway.GatewayService; import org.opensearch.gateway.PersistedClusterStateService; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.http.HttpTransportSettings; import org.opensearch.index.IndexModule; @@ -268,6 +270,7 @@ public void apply(Settings value, Settings current, Settings previous) { DanglingIndicesState.AUTO_IMPORT_DANGLING_INDICES_SETTING, EnableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ENABLE_SETTING, EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING, + ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE, FilterAllocationDecider.CLUSTER_ROUTING_INCLUDE_GROUP_SETTING, FilterAllocationDecider.CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING, FilterAllocationDecider.CLUSTER_ROUTING_REQUIRE_GROUP_SETTING, @@ -330,6 +333,7 @@ public void apply(Settings value, Settings current, Settings previous) { GatewayService.RECOVER_AFTER_MASTER_NODES_SETTING, GatewayService.RECOVER_AFTER_NODES_SETTING, GatewayService.RECOVER_AFTER_TIME_SETTING, + ShardsBatchGatewayAllocator.GATEWAY_ALLOCATOR_BATCH_SIZE, PersistedClusterStateService.SLOW_WRITE_LOGGING_THRESHOLD, NetworkModule.HTTP_DEFAULT_TYPE_SETTING, NetworkModule.TRANSPORT_DEFAULT_TYPE_SETTING, diff --git a/server/src/main/java/org/opensearch/gateway/BaseGatewayShardAllocator.java b/server/src/main/java/org/opensearch/gateway/BaseGatewayShardAllocator.java index e0831293fc7e1..eed5de65258fc 100644 --- a/server/src/main/java/org/opensearch/gateway/BaseGatewayShardAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/BaseGatewayShardAllocator.java @@ -135,6 +135,8 @@ private void executeDecision( } } + public void allocateUnassignedBatch(String batchId, RoutingAllocation allocation) {} + protected long getExpectedShardSize(ShardRouting shardRouting, RoutingAllocation allocation) { if (shardRouting.primary()) { if (shardRouting.recoverySource().getType() == RecoverySource.Type.SNAPSHOT) { diff --git a/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java index 3459f1591b633..be7867b7823f6 100644 --- a/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java @@ -56,7 +56,7 @@ public void processExistingRecoveries(RoutingAllocation allocation, List GATEWAY_ALLOCATOR_BATCH_SIZE = Setting.longSetting( + "cluster.allocator.gateway.batch_size", + DEFAULT_SHARD_BATCH_SIZE, + 1, + 10000, + Setting.Property.NodeScope + ); + + private final RerouteService rerouteService; + private final PrimaryShardBatchAllocator primaryShardBatchAllocator; + private final ReplicaShardBatchAllocator replicaShardBatchAllocator; + private Set lastSeenEphemeralIds = Collections.emptySet(); + + // visible for testing + protected final ConcurrentMap batchIdToStartedShardBatch = ConcurrentCollections.newConcurrentMap(); + + // visible for testing + protected final ConcurrentMap batchIdToStoreShardBatch = ConcurrentCollections.newConcurrentMap(); + private final TransportNodesListGatewayStartedShardsBatch batchStartedAction; + private final TransportNodesListShardStoreMetadataBatch batchStoreAction; + + @Inject + public ShardsBatchGatewayAllocator( + RerouteService rerouteService, + TransportNodesListGatewayStartedShardsBatch batchStartedAction, + TransportNodesListShardStoreMetadataBatch batchStoreAction, + Settings settings + ) { + this.rerouteService = rerouteService; + this.primaryShardBatchAllocator = new InternalPrimaryBatchShardAllocator(); + this.replicaShardBatchAllocator = new InternalReplicaBatchShardAllocator(); + this.batchStartedAction = batchStartedAction; + this.batchStoreAction = batchStoreAction; + this.maxBatchSize = GATEWAY_ALLOCATOR_BATCH_SIZE.get(settings); + } + + @Override + public void cleanCaches() { + Stream.of(batchIdToStartedShardBatch, batchIdToStoreShardBatch).forEach(b -> { + Releasables.close(b.values().stream().map(shardsBatch -> shardsBatch.asyncBatch).collect(Collectors.toList())); + b.clear(); + }); + } + + // for tests + protected ShardsBatchGatewayAllocator() { + this.rerouteService = null; + this.batchStartedAction = null; + this.primaryShardBatchAllocator = null; + this.batchStoreAction = null; + this.replicaShardBatchAllocator = null; + this.maxBatchSize = DEFAULT_SHARD_BATCH_SIZE; + } + + // for tests + + @Override + public int getNumberOfInFlightFetches() { + int count = 0; + for (ShardsBatch batch : batchIdToStartedShardBatch.values()) { + count += (batch.getNumberOfInFlightFetches() * batch.getBatchedShards().size()); + } + for (ShardsBatch batch : batchIdToStoreShardBatch.values()) { + count += (batch.getNumberOfInFlightFetches() * batch.getBatchedShards().size()); + } + + return count; + } + + @Override + public void applyStartedShards(final List startedShards, final RoutingAllocation allocation) { + for (ShardRouting startedShard : startedShards) { + safelyRemoveShardFromBothBatch(startedShard); + } + } + + @Override + public void applyFailedShards(final List failedShards, final RoutingAllocation allocation) { + for (FailedShard failedShard : failedShards) { + safelyRemoveShardFromBothBatch(failedShard.getRoutingEntry()); + } + } + + @Override + public void beforeAllocation(final RoutingAllocation allocation) { + assert primaryShardBatchAllocator != null; + assert replicaShardBatchAllocator != null; + ensureAsyncFetchStorePrimaryRecency(allocation); + } + + @Override + public void afterPrimariesBeforeReplicas(RoutingAllocation allocation) { + assert replicaShardBatchAllocator != null; + List> storedShardBatches = batchIdToStoreShardBatch.values() + .stream() + .map(ShardsBatch::getBatchedShardRoutings) + .collect(Collectors.toList()); + if (allocation.routingNodes().hasInactiveShards()) { + // cancel existing recoveries if we have a better match + replicaShardBatchAllocator.processExistingRecoveries(allocation, storedShardBatches); + } + } + + @Override + public void allocateUnassigned( + ShardRouting shardRouting, + RoutingAllocation allocation, + UnassignedAllocationHandler unassignedAllocationHandler + ) { + throw new UnsupportedOperationException("ShardsBatchGatewayAllocator does not support allocating unassigned shards"); + } + + @Override + public void allocateAllUnassignedShards(final RoutingAllocation allocation, boolean primary) { + + assert primaryShardBatchAllocator != null; + assert replicaShardBatchAllocator != null; + innerAllocateUnassignedBatch(allocation, primaryShardBatchAllocator, replicaShardBatchAllocator, primary); + } + + protected void innerAllocateUnassignedBatch( + RoutingAllocation allocation, + PrimaryShardBatchAllocator primaryBatchShardAllocator, + ReplicaShardBatchAllocator replicaBatchShardAllocator, + boolean primary + ) { + // create batches for unassigned shards + Set batchesToAssign = createAndUpdateBatches(allocation, primary); + if (batchesToAssign.isEmpty()) { + return; + } + if (primary) { + batchIdToStartedShardBatch.values() + .stream() + .filter(batch -> batchesToAssign.contains(batch.batchId)) + .forEach( + shardsBatch -> primaryBatchShardAllocator.allocateUnassignedBatch(shardsBatch.getBatchedShardRoutings(), allocation) + ); + } else { + batchIdToStoreShardBatch.values() + .stream() + .filter(batch -> batchesToAssign.contains(batch.batchId)) + .forEach(batch -> replicaBatchShardAllocator.allocateUnassignedBatch(batch.getBatchedShardRoutings(), allocation)); + } + } + + // visible for testing + protected Set createAndUpdateBatches(RoutingAllocation allocation, boolean primary) { + Set batchesToBeAssigned = new HashSet<>(); + RoutingNodes.UnassignedShards unassigned = allocation.routingNodes().unassigned(); + ConcurrentMap currentBatches = primary ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + // get all batched shards + Map currentBatchedShards = new HashMap<>(); + for (Map.Entry batchEntry : currentBatches.entrySet()) { + batchEntry.getValue().getBatchedShards().forEach(shardId -> currentBatchedShards.put(shardId, batchEntry.getKey())); + } + + Set newShardsToBatch = Sets.newHashSet(); + Set batchedShardsToAssign = Sets.newHashSet(); + // add all unassigned shards to the batch if they are not already in a batch + unassigned.forEach(shardRouting -> { + if ((currentBatchedShards.containsKey(shardRouting.shardId()) == false) && (shardRouting.primary() == primary)) { + assert shardRouting.unassigned(); + newShardsToBatch.add(shardRouting); + } + // if shard is already batched update to latest shardRouting information in the batches + // Replica shard assignment can be cancelled if we get a better match. These ShardRouting objects also + // store other information like relocating node, targetRelocatingShard etc. And it can be updated after + // batches are created. If we don't update the ShardRouting object, stale data would be passed from the + // batch. This stale data can end up creating a same decision which has already been taken, and we'll see + // failure in executeDecision of BaseGatewayShardAllocator. Previous non-batch mode flow also used to + // pass ShardRouting object directly from unassignedIterator, so we're following the same behaviour. + else if (shardRouting.primary() == primary) { + String batchId = currentBatchedShards.get(shardRouting.shardId()); + batchesToBeAssigned.add(batchId); + currentBatches.get(batchId).batchInfo.get(shardRouting.shardId()).setShardRouting(shardRouting); + batchedShardsToAssign.add(shardRouting.shardId()); + } + }); + + allocation.routingNodes().forEach(routingNode -> routingNode.getInitializingShards().forEach(shardRouting -> { + if (currentBatchedShards.containsKey(shardRouting.shardId()) && shardRouting.primary() == primary) { + batchedShardsToAssign.add(shardRouting.shardId()); + // Set updated shard routing in batch if it already exists + String batchId = currentBatchedShards.get(shardRouting.shardId()); + currentBatches.get(batchId).batchInfo.get(shardRouting.shardId()).setShardRouting(shardRouting); + } + })); + + refreshShardBatches(currentBatches, batchedShardsToAssign, primary); + + Iterator iterator = newShardsToBatch.iterator(); + assert maxBatchSize > 0 : "Shards batch size must be greater than 0"; + + long batchSize = maxBatchSize; + Map perBatchShards = new HashMap<>(); + while (iterator.hasNext()) { + ShardRouting currentShard = iterator.next(); + ShardEntry shardEntry = new ShardEntry( + new ShardAttributes( + IndexMetadata.INDEX_DATA_PATH_SETTING.get(allocation.metadata().index(currentShard.index()).getSettings()) + ), + currentShard + ); + perBatchShards.put(currentShard.shardId(), shardEntry); + batchSize--; + iterator.remove(); + // add to batch if batch size full or last shard in unassigned list + if (batchSize == 0 || iterator.hasNext() == false) { + String batchUUId = UUIDs.base64UUID(); + ShardsBatch shardsBatch = new ShardsBatch(batchUUId, perBatchShards, primary); + // add the batch to list of current batches + addBatch(shardsBatch, primary); + batchesToBeAssigned.add(batchUUId); + perBatchShards.clear(); + batchSize = maxBatchSize; + } + } + return batchesToBeAssigned; + } + + private void refreshShardBatches( + ConcurrentMap currentBatches, + Set batchedShardsToAssign, + boolean primary + ) { + // cleanup shard from batches if they are not present in unassigned list from allocation object. This is + // needed as AllocationService.reroute can also be called directly by API flows for example DeleteIndices. + // So, as part of calling reroute, those shards will be removed from allocation object. It'll handle the + // scenarios where shards can be removed from unassigned list without "start" or "failed" event. + for (Map.Entry batchEntry : currentBatches.entrySet()) { + Iterator shardIdIterator = batchEntry.getValue().getBatchedShards().iterator(); + while (shardIdIterator.hasNext()) { + ShardId shardId = shardIdIterator.next(); + if (batchedShardsToAssign.contains(shardId) == false) { + shardIdIterator.remove(); + batchEntry.getValue().clearShardFromCache(shardId); + } + } + ConcurrentMap batches = primary ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + deleteBatchIfEmpty(batches, batchEntry.getValue().getBatchId()); + } + } + + private void addBatch(ShardsBatch shardsBatch, boolean primary) { + ConcurrentMap batches = primary ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + if (batches.containsKey(shardsBatch.getBatchId())) { + throw new IllegalStateException("Batch already exists. BatchId = " + shardsBatch.getBatchId()); + } + batches.put(shardsBatch.getBatchId(), shardsBatch); + } + + /** + * Safely remove a shard from the appropriate batch depending on if it is primary or replica + * If the shard is not in a batch, this is a no-op. + * Cleans the batch if it is empty after removing the shard. + * This method should be called when removing the shard from the batch instead {@link ShardsBatch#removeFromBatch(ShardRouting)} + * so that we can clean up the batch if it is empty and release the fetching resources + * + * @param shardRouting shard to be removed + * @param primary from which batch shard needs to be removed + */ + protected void safelyRemoveShardFromBatch(ShardRouting shardRouting, boolean primary) { + String batchId = primary ? getBatchId(shardRouting, true) : getBatchId(shardRouting, false); + if (batchId == null) { + logger.debug("Shard[{}] is not batched", shardRouting); + return; + } + ConcurrentMap batches = primary ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + ShardsBatch batch = batches.get(batchId); + batch.removeFromBatch(shardRouting); + deleteBatchIfEmpty(batches, batchId); + } + + /** + * Safely remove shard from both the batches irrespective of its primary or replica, + * For the corresponding shardId. The method intends to clean up the batch if it is empty + * after removing the shard + * @param shardRouting shard to remove + */ + protected void safelyRemoveShardFromBothBatch(ShardRouting shardRouting) { + safelyRemoveShardFromBatch(shardRouting, true); + safelyRemoveShardFromBatch(shardRouting, false); + } + + private void deleteBatchIfEmpty(ConcurrentMap batches, String batchId) { + if (batches.containsKey(batchId)) { + ShardsBatch batch = batches.get(batchId); + if (batch.getBatchedShards().isEmpty()) { + Releasables.close(batch.getAsyncFetcher()); + batches.remove(batchId); + } + } + } + + protected String getBatchId(ShardRouting shardRouting, boolean primary) { + ConcurrentMap batches = primary ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + + return batches.entrySet() + .stream() + .filter(entry -> entry.getValue().getBatchedShards().contains(shardRouting.shardId())) + .findFirst() + .map(Map.Entry::getKey) + .orElse(null); + } + + @Override + public AllocateUnassignedDecision explainUnassignedShardAllocation(ShardRouting unassignedShard, RoutingAllocation routingAllocation) { + assert unassignedShard.unassigned(); + assert routingAllocation.debugDecision(); + if (getBatchId(unassignedShard, unassignedShard.primary()) == null) { + createAndUpdateBatches(routingAllocation, unassignedShard.primary()); + } + assert getBatchId(unassignedShard, unassignedShard.primary()) != null; + if (unassignedShard.primary()) { + assert primaryShardBatchAllocator != null; + return primaryShardBatchAllocator.makeAllocationDecision(unassignedShard, routingAllocation, logger); + } else { + assert replicaShardBatchAllocator != null; + return replicaShardBatchAllocator.makeAllocationDecision(unassignedShard, routingAllocation, logger); + } + } + + /** + * Clear the fetched data for the primary to ensure we do not cancel recoveries based on excessively stale data. + */ + private void ensureAsyncFetchStorePrimaryRecency(RoutingAllocation allocation) { + DiscoveryNodes nodes = allocation.nodes(); + if (hasNewNodes(nodes)) { + final Set newEphemeralIds = StreamSupport.stream(Spliterators.spliterator(nodes.getDataNodes().entrySet(), 0), false) + .map(node -> node.getValue().getEphemeralId()) + .collect(Collectors.toSet()); + // Invalidate the cache if a data node has been added to the cluster. This ensures that we do not cancel a recovery if a node + // drops out, we fetch the shard data, then some indexing happens and then the node rejoins the cluster again. There are other + // ways we could decide to cancel a recovery based on stale data (e.g. changing allocation filters or a primary failure) but + // making the wrong decision here is not catastrophic so we only need to cover the common case. + + logger.trace( + () -> new ParameterizedMessage( + "new nodes {} found, clearing primary async-fetch-store cache", + Sets.difference(newEphemeralIds, lastSeenEphemeralIds) + ) + ); + batchIdToStoreShardBatch.values().forEach(batch -> clearCacheForBatchPrimary(batch, allocation)); + + // recalc to also (lazily) clear out old nodes. + this.lastSeenEphemeralIds = newEphemeralIds; + } + } + + private static void clearCacheForBatchPrimary(ShardsBatch batch, RoutingAllocation allocation) { + // We need to clear the cache for the primary shard to ensure we do not cancel recoveries based on excessively + // stale data. We do this by clearing the cache of nodes for all the active primaries of replicas in the current batch. + // Although this flow can be optimized by only clearing the cache for the primary shard but currently + // when we want to fetch data we do for complete node, for doing this a new fetch flow will also handle just + // fetching the data for a single shard on the node and fill that up in our cache + // Opened issue #13352 - to track the improvement + List primaries = batch.getBatchedShards() + .stream() + .map(allocation.routingNodes()::activePrimary) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + AsyncShardBatchFetch fetch = batch.getAsyncFetcher(); + primaries.forEach(shardRouting -> fetch.clearCacheForNode(shardRouting.currentNodeId())); + } + + private boolean hasNewNodes(DiscoveryNodes nodes) { + for (final DiscoveryNode node : nodes.getDataNodes().values()) { + if (lastSeenEphemeralIds.contains(node.getEphemeralId()) == false) { + return true; + } + } + return false; + } + + class InternalBatchAsyncFetch extends AsyncShardBatchFetch { + InternalBatchAsyncFetch( + Logger logger, + String type, + Map map, + AsyncShardFetch.Lister, T> action, + String batchUUId, + Class clazz, + V emptyShardResponse, + Predicate emptyShardResponsePredicate, + ShardBatchResponseFactory responseFactory + ) { + super(logger, type, map, action, batchUUId, clazz, emptyShardResponse, emptyShardResponsePredicate, responseFactory); + } + + @Override + protected void reroute(String reroutingKey, String reason) { + logger.trace("{} scheduling reroute for {}", reroutingKey, reason); + assert rerouteService != null; + rerouteService.reroute( + "async_shard_batch_fetch", + Priority.HIGH, + ActionListener.wrap( + r -> logger.trace("{} scheduled reroute completed for {}", reroutingKey, reason), + e -> logger.debug(new ParameterizedMessage("{} scheduled reroute failed for {}", reroutingKey, reason), e) + ) + ); + } + } + + class InternalPrimaryBatchShardAllocator extends PrimaryShardBatchAllocator { + + @Override + @SuppressWarnings("unchecked") + protected AsyncShardFetch.FetchResult fetchData( + List eligibleShards, + List inEligibleShards, + RoutingAllocation allocation + ) { + return (AsyncShardFetch.FetchResult< + TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch>) fetchDataAndCleanIneligibleShards( + eligibleShards, + inEligibleShards, + allocation + ); + } + + } + + class InternalReplicaBatchShardAllocator extends ReplicaShardBatchAllocator { + @Override + @SuppressWarnings("unchecked") + protected AsyncShardFetch.FetchResult fetchData( + List eligibleShards, + List inEligibleShards, + RoutingAllocation allocation + ) { + return (AsyncShardFetch.FetchResult< + TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadataBatch>) fetchDataAndCleanIneligibleShards( + eligibleShards, + inEligibleShards, + allocation + ); + } + + @Override + protected boolean hasInitiatedFetching(ShardRouting shard) { + String batchId = getBatchId(shard, shard.primary()); + return batchId != null; + } + } + + AsyncShardFetch.FetchResult fetchDataAndCleanIneligibleShards( + List eligibleShards, + List inEligibleShards, + RoutingAllocation allocation + ) { + // get batch id for anyone given shard. We are assuming all shards will have same batchId + ShardRouting shardRouting = eligibleShards.iterator().hasNext() ? eligibleShards.iterator().next() : null; + shardRouting = shardRouting == null && inEligibleShards.iterator().hasNext() ? inEligibleShards.iterator().next() : shardRouting; + if (shardRouting == null) { + return new AsyncShardFetch.FetchResult<>(null, Collections.emptyMap()); + } + String batchId = getBatchId(shardRouting, shardRouting.primary()); + if (batchId == null) { + logger.debug("Shard {} has no batch id", shardRouting); + throw new IllegalStateException("Shard " + shardRouting + " has no batch id. Shard should batched before fetching"); + } + ConcurrentMap batches = shardRouting.primary() ? batchIdToStartedShardBatch : batchIdToStoreShardBatch; + if (batches.containsKey(batchId) == false) { + logger.debug("Batch {} has no shards batch", batchId); + throw new IllegalStateException("Batch " + batchId + " has no shards batch"); + } + + ShardsBatch shardsBatch = batches.get(batchId); + // remove in eligible shards which allocator is not responsible for + inEligibleShards.forEach(sr -> safelyRemoveShardFromBatch(sr, sr.primary())); + + if (shardsBatch.getBatchedShards().isEmpty() && eligibleShards.isEmpty()) { + logger.debug("Batch {} is empty", batchId); + return new AsyncShardFetch.FetchResult<>(null, Collections.emptyMap()); + } + Map> shardToIgnoreNodes = new HashMap<>(); + for (ShardId shardId : shardsBatch.asyncBatch.shardAttributesMap.keySet()) { + shardToIgnoreNodes.put(shardId, allocation.getIgnoreNodes(shardId)); + } + AsyncShardBatchFetch asyncFetcher = shardsBatch.getAsyncFetcher(); + AsyncShardFetch.FetchResult fetchResult = asyncFetcher.fetchData( + allocation.nodes(), + shardToIgnoreNodes + ); + if (fetchResult.hasData()) { + fetchResult.processAllocation(allocation); + } + + return fetchResult; + } + + /** + * Holds information about a batch of shards to be allocated. + * Async fetcher is used to fetch the data for the batch. + *

+ * Visible for testing + */ + public class ShardsBatch { + private final String batchId; + private final boolean primary; + + private final InternalBatchAsyncFetch asyncBatch; + + private final Map batchInfo; + + public ShardsBatch(String batchId, Map shardsWithInfo, boolean primary) { + this.batchId = batchId; + this.batchInfo = new HashMap<>(shardsWithInfo); + // create a ShardId -> customDataPath map for async fetch + Map shardIdsMap = batchInfo.entrySet() + .stream() + .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().getShardAttributes())); + this.primary = primary; + if (this.primary) { + asyncBatch = new InternalBatchAsyncFetch<>( + logger, + "batch_shards_started", + shardIdsMap, + batchStartedAction, + batchId, + GatewayStartedShard.class, + new GatewayStartedShard(null, false, null, null), + GatewayStartedShard::isEmpty, + new ShardBatchResponseFactory<>(true) + ); + } else { + asyncBatch = new InternalBatchAsyncFetch<>( + logger, + "batch_shards_store", + shardIdsMap, + batchStoreAction, + batchId, + NodeStoreFilesMetadata.class, + new NodeStoreFilesMetadata(new StoreFilesMetadata(null, Store.MetadataSnapshot.EMPTY, Collections.emptyList()), null), + NodeStoreFilesMetadata::isEmpty, + new ShardBatchResponseFactory<>(false) + ); + } + } + + protected void removeShard(ShardId shardId) { + this.batchInfo.remove(shardId); + } + + private TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata buildEmptyReplicaShardResponse() { + return new TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata( + new TransportNodesListShardStoreMetadataHelper.StoreFilesMetadata( + null, + Store.MetadataSnapshot.EMPTY, + Collections.emptyList() + ), + null + ); + } + + private void removeFromBatch(ShardRouting shard) { + removeShard(shard.shardId()); + clearShardFromCache(shard.shardId()); + // assert that fetcher and shards are the same as batched shards + assert batchInfo.size() == asyncBatch.shardAttributesMap.size() : "Shards size is not equal to fetcher size"; + } + + private void clearShardFromCache(ShardId shardId) { + asyncBatch.clearShard(shardId); + } + + public List getBatchedShardRoutings() { + return batchInfo.values().stream().map(ShardEntry::getShardRouting).collect(Collectors.toList()); + } + + public Set getBatchedShards() { + return batchInfo.keySet(); + } + + public String getBatchId() { + return batchId; + } + + public AsyncShardBatchFetch getAsyncFetcher() { + return asyncBatch; + } + + public int getNumberOfInFlightFetches() { + return asyncBatch.getNumberOfInFlightFetches(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o instanceof ShardsBatch == false) { + return false; + } + ShardsBatch shardsBatch = (ShardsBatch) o; + return batchId.equals(shardsBatch.getBatchId()) && batchInfo.keySet().equals(shardsBatch.getBatchedShards()); + } + + @Override + public int hashCode() { + return Objects.hash(batchId); + } + + @Override + public String toString() { + return "batchId: " + batchId; + } + + } + + /** + * Holds information about a shard to be allocated in a batch. + */ + static class ShardEntry { + + private final ShardAttributes shardAttributes; + + private ShardRouting shardRouting; + + public ShardEntry(ShardAttributes shardAttributes, ShardRouting shardRouting) { + this.shardAttributes = shardAttributes; + this.shardRouting = shardRouting; + } + + public ShardRouting getShardRouting() { + return shardRouting; + } + + public ShardAttributes getShardAttributes() { + return shardAttributes; + } + + public ShardEntry setShardRouting(ShardRouting shardRouting) { + this.shardRouting = shardRouting; + return this; + } + } + + public int getNumberOfStartedShardBatches() { + return batchIdToStartedShardBatch.size(); + } + + public int getNumberOfStoreShardBatches() { + return batchIdToStoreShardBatch.size(); + } +} diff --git a/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java b/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java index 85d5bff4677ef..22b03539cca74 100644 --- a/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java +++ b/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java @@ -276,7 +276,7 @@ public void writeTo(StreamOutput out) throws IOException { } } - boolean isEmpty(NodeStoreFilesMetadata response) { + public static boolean isEmpty(NodeStoreFilesMetadata response) { return response.storeFilesMetadata() == null || response.storeFilesMetadata().isEmpty() && response.getStoreFileFetchException() == null; } @@ -329,7 +329,13 @@ public static class NodeStoreFilesMetadataBatch extends BaseNodeResponse { protected NodeStoreFilesMetadataBatch(StreamInput in) throws IOException { super(in); - this.nodeStoreFilesMetadataBatch = in.readMap(ShardId::new, NodeStoreFilesMetadata::new); + this.nodeStoreFilesMetadataBatch = in.readMap(ShardId::new, i -> { + if (i.readBoolean()) { + return new NodeStoreFilesMetadata(i); + } else { + return null; + } + }); } public NodeStoreFilesMetadataBatch(DiscoveryNode node, Map nodeStoreFilesMetadataBatch) { @@ -344,7 +350,14 @@ public Map getNodeStoreFilesMetadataBatch() { @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeMap(nodeStoreFilesMetadataBatch, (o, k) -> k.writeTo(o), (o, v) -> v.writeTo(o)); + out.writeMap(nodeStoreFilesMetadataBatch, (o, k) -> k.writeTo(o), (o, v) -> { + if (v != null) { + o.writeBoolean(true); + v.writeTo(o); + } else { + o.writeBoolean(false); + } + }); } } diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 47f128af438a6..b4f2a303c7024 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -136,6 +136,7 @@ import org.opensearch.gateway.GatewayService; import org.opensearch.gateway.MetaStateService; import org.opensearch.gateway.PersistedClusterStateService; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.http.HttpServerTransport; import org.opensearch.identity.IdentityService; @@ -1333,9 +1334,12 @@ protected Node( // We allocate copies of existing shards by looking for a viable copy of the shard in the cluster and assigning the shard there. // The search for viable copies is triggered by an allocation attempt (i.e. a reroute) and is performed asynchronously. When it // completes we trigger another reroute to try the allocation again. This means there is a circular dependency: the allocation - // service needs access to the existing shards allocators (e.g. the GatewayAllocator) which need to be able to trigger a - // reroute, which needs to call into the allocation service. We close the loop here: - clusterModule.setExistingShardsAllocators(injector.getInstance(GatewayAllocator.class)); + // service needs access to the existing shards allocators (e.g. the GatewayAllocator, ShardsBatchGatewayAllocator) which + // need to be able to trigger a reroute, which needs to call into the allocation service. We close the loop here: + clusterModule.setExistingShardsAllocators( + injector.getInstance(GatewayAllocator.class), + injector.getInstance(ShardsBatchGatewayAllocator.class) + ); List pluginLifecycleComponents = pluginComponents.stream() .filter(p -> p instanceof LifecycleComponent) diff --git a/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java b/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java index b30ebaf183084..557e4dc2ca8c5 100644 --- a/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java +++ b/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java @@ -73,6 +73,7 @@ import org.opensearch.gateway.GatewayAllocator; import org.opensearch.plugins.ClusterPlugin; import org.opensearch.test.gateway.TestGatewayAllocator; +import org.opensearch.test.gateway.TestShardBatchGatewayAllocator; import java.util.Arrays; import java.util.Collection; @@ -296,7 +297,10 @@ public void testRejectsReservedExistingShardsAllocatorName() { null, threadContext ); - expectThrows(IllegalArgumentException.class, () -> clusterModule.setExistingShardsAllocators(new TestGatewayAllocator())); + expectThrows( + IllegalArgumentException.class, + () -> clusterModule.setExistingShardsAllocators(new TestGatewayAllocator(), new TestShardBatchGatewayAllocator()) + ); } public void testRejectsDuplicateExistingShardsAllocatorName() { @@ -308,7 +312,10 @@ public void testRejectsDuplicateExistingShardsAllocatorName() { null, threadContext ); - expectThrows(IllegalArgumentException.class, () -> clusterModule.setExistingShardsAllocators(new TestGatewayAllocator())); + expectThrows( + IllegalArgumentException.class, + () -> clusterModule.setExistingShardsAllocators(new TestGatewayAllocator(), new TestShardBatchGatewayAllocator()) + ); } private static ClusterPlugin existingShardsAllocatorPlugin(final String allocatorName) { diff --git a/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java new file mode 100644 index 0000000000000..bb59a5792ec8c --- /dev/null +++ b/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java @@ -0,0 +1,360 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.Version; +import org.opensearch.action.support.nodes.BaseNodeResponse; +import org.opensearch.cluster.ClusterInfo; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.OpenSearchAllocationTestCase; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.opensearch.common.collect.Tuple; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.snapshots.SnapshotShardSizeInfo; +import org.opensearch.test.gateway.TestShardBatchGatewayAllocator; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class GatewayAllocatorTests extends OpenSearchAllocationTestCase { + + private final Logger logger = LogManager.getLogger(GatewayAllocatorTests.class); + TestShardBatchGatewayAllocator testShardsBatchGatewayAllocator = null; + ClusterState clusterState = null; + RoutingAllocation testAllocation = null; + String indexPrefix = "TEST"; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + testShardsBatchGatewayAllocator = new TestShardBatchGatewayAllocator(); + } + + public void testSingleBatchCreation() { + createIndexAndUpdateClusterState(1, 3, 1); + createBatchesAndAssert(1); + } + + public void testTwoBatchCreation() { + createIndexAndUpdateClusterState(2, 1020, 1); + createBatchesAndAssert(2); + + List listOfBatches = new ArrayList<>( + testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().values() + ); + assertNotEquals(listOfBatches.get(0), listOfBatches.get(1)); + + // test for replicas + listOfBatches = new ArrayList<>(testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().values()); + assertNotEquals(listOfBatches.get(0), listOfBatches.get(1)); + } + + public void testNonDuplicationOfBatch() { + createIndexAndUpdateClusterState(1, 3, 1); + Tuple, Set> batches = createBatchesAndAssert(1); + assertEquals(1, batches.v1().size()); + assertEquals(1, batches.v2().size()); + + // again try to create batch and verify no new batch is created since shard is already batched and no new unassigned shard + assertEquals(batches.v1(), testShardsBatchGatewayAllocator.createAndUpdateBatches(testAllocation, true)); + assertEquals(batches.v2(), testShardsBatchGatewayAllocator.createAndUpdateBatches(testAllocation, false)); + } + + public void testCorrectnessOfBatch() { + createIndexAndUpdateClusterState(2, 1020, 1); + createBatchesAndAssert(2); + Set shardsSet1 = clusterState.routingTable() + .index(indexPrefix + 0) + .getShards() + .values() + .stream() + .map(IndexShardRoutingTable::getShardId) + .collect(Collectors.toSet()); + Set shardsSet2 = clusterState.routingTable() + .index(indexPrefix + 1) + .getShards() + .values() + .stream() + .map(IndexShardRoutingTable::getShardId) + .collect(Collectors.toSet()); + shardsSet1.addAll(shardsSet2); + + Set shardsInAllbatches = testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .values() + .stream() + .map(ShardsBatchGatewayAllocator.ShardsBatch::getBatchedShards) + .flatMap(Set::stream) + .collect(Collectors.toSet()); + assertEquals(shardsInAllbatches, shardsSet1); + shardsInAllbatches = testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch() + .values() + .stream() + .map(ShardsBatchGatewayAllocator.ShardsBatch::getBatchedShards) + .flatMap(Set::stream) + .collect(Collectors.toSet()); + assertEquals(shardsInAllbatches, shardsSet1); + + Set primariesInAllBatches = testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .values() + .stream() + .map(ShardsBatchGatewayAllocator.ShardsBatch::getBatchedShardRoutings) + .flatMap(List::stream) + .collect(Collectors.toSet()); + primariesInAllBatches.forEach(shardRouting -> assertTrue(shardRouting.unassigned() && shardRouting.primary() == true)); + + Set replicasInAllBatches = testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch() + .values() + .stream() + .map(ShardsBatchGatewayAllocator.ShardsBatch::getBatchedShardRoutings) + .flatMap(List::stream) + .collect(Collectors.toSet()); + + replicasInAllBatches.forEach(shardRouting -> assertTrue(shardRouting.unassigned() && shardRouting.primary() == false)); + } + + public void testAsyncFetcherCreationInBatch() { + createIndexAndUpdateClusterState(1, 3, 1); + Tuple, Set> batchesTuple = createBatchesAndAssert(1); + Set primaryBatches = batchesTuple.v1(); + Set replicaBatches = batchesTuple.v2(); + + ShardsBatchGatewayAllocator.ShardsBatch shardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .get(primaryBatches.iterator().next()); + AsyncShardFetch asyncFetcher = shardsBatch.getAsyncFetcher(); + // assert asyncFetcher is not null + assertNotNull(asyncFetcher); + shardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().get(replicaBatches.iterator().next()); + asyncFetcher = shardsBatch.getAsyncFetcher(); + assertNotNull(asyncFetcher); + } + + public void testSafelyRemoveShardFromBatch() { + createIndexAndUpdateClusterState(2, 1023, 1); + + Tuple, Set> batchesTuple = createBatchesAndAssert(2); + Set primaryBatches = batchesTuple.v1(); + Set replicaBatches = batchesTuple.v2(); + + ShardsBatchGatewayAllocator.ShardsBatch primaryShardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .get(primaryBatches.iterator().next()); + ShardRouting primaryShardRouting = primaryShardsBatch.getBatchedShardRoutings().iterator().next(); + assertEquals(2, replicaBatches.size()); + ShardsBatchGatewayAllocator.ShardsBatch replicaShardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch() + .get(replicaBatches.iterator().next()); + ShardRouting replicaShardRouting = replicaShardsBatch.getBatchedShardRoutings().iterator().next(); + + // delete 1 shard routing from each batch + testShardsBatchGatewayAllocator.safelyRemoveShardFromBatch(primaryShardRouting); + + testShardsBatchGatewayAllocator.safelyRemoveShardFromBatch(replicaShardRouting); + // verify that shard routing is removed from both batches + assertFalse(primaryShardsBatch.getBatchedShards().contains(primaryShardRouting.shardId())); + assertFalse(replicaShardsBatch.getBatchedShards().contains(replicaShardRouting.shardId())); + + // try to remove that shard again to see if its no op and doent result in exception + testShardsBatchGatewayAllocator.safelyRemoveShardFromBatch(primaryShardRouting); + testShardsBatchGatewayAllocator.safelyRemoveShardFromBatch(replicaShardRouting); + + // now remove all shard routings to verify that batch only gets deleted + primaryShardsBatch.getBatchedShardRoutings().forEach(testShardsBatchGatewayAllocator::safelyRemoveShardFromBatch); + replicaShardsBatch.getBatchedShardRoutings().forEach(testShardsBatchGatewayAllocator::safelyRemoveShardFromBatch); + + assertFalse(testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().containsKey(primaryShardsBatch.getBatchId())); + assertFalse(testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().containsKey(replicaShardsBatch.getBatchId())); + assertEquals(1, testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().size()); + assertEquals(1, testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().size()); + } + + public void testSafelyRemoveShardFromBothBatch() { + createIndexAndUpdateClusterState(1, 3, 1); + createBatchesAndAssert(1); + ShardsBatchGatewayAllocator.ShardsBatch primaryShardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .values() + .iterator() + .next(); + ShardsBatchGatewayAllocator.ShardsBatch replicaShardsBatch = testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch() + .values() + .iterator() + .next(); + + ShardRouting anyPrimary = primaryShardsBatch.getBatchedShardRoutings().iterator().next(); + // remove first shard routing from both batches + testShardsBatchGatewayAllocator.safelyRemoveShardFromBothBatch(anyPrimary); + + // verify that shard routing is removed from both batches + assertFalse(primaryShardsBatch.getBatchedShards().contains(anyPrimary.shardId())); + assertFalse(replicaShardsBatch.getBatchedShards().contains(anyPrimary.shardId())); + + // try to remove that shard again to see if its no op and doesnt result in exception + testShardsBatchGatewayAllocator.safelyRemoveShardFromBothBatch(anyPrimary); + + // now remove all shard routings to verify that batch gets deleted + primaryShardsBatch.getBatchedShardRoutings().forEach(testShardsBatchGatewayAllocator::safelyRemoveShardFromBothBatch); + replicaShardsBatch.getBatchedShardRoutings().forEach(testShardsBatchGatewayAllocator::safelyRemoveShardFromBothBatch); + + assertFalse(testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().containsKey(primaryShardsBatch.getBatchId())); + assertFalse(testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().containsKey(replicaShardsBatch.getBatchId())); + assertEquals(0, testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().size()); + assertEquals(0, testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().size()); + } + + public void testGetBatchIdExisting() { + createIndexAndUpdateClusterState(2, 1020, 1); + // get all shardsRoutings for test index + List allShardRoutings1 = clusterState.routingTable() + .index(indexPrefix + 0) + .getShards() + .values() + .stream() + .map(IndexShardRoutingTable::getShards) + .flatMap(List::stream) + .collect(Collectors.toList()); + List allShardRouting2 = clusterState.routingTable() + .index(indexPrefix + 1) + .getShards() + .values() + .stream() + .map(IndexShardRoutingTable::getShards) + .flatMap(List::stream) + .collect(Collectors.toList()); + + Tuple, Set> batchesTuple = createBatchesAndAssert(2); + Set primaryBatches = batchesTuple.v1(); + Set replicaBatches = batchesTuple.v2(); + + // create a map of shards to batch id for primaries + + Map shardIdToBatchIdForStartedShards = new HashMap<>(); + allShardRoutings1.addAll(allShardRouting2); + assertEquals(4080, allShardRoutings1.size()); + for (ShardRouting shardRouting : allShardRoutings1) { + for (String batchId : primaryBatches) { + if (shardRouting.primary() == true + && testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch() + .get(batchId) + .getBatchedShards() + .contains(shardRouting.shardId())) { + if (shardIdToBatchIdForStartedShards.containsKey(shardRouting.shardId())) { + fail("found duplicate shard routing for shard. One shard cant be in multiple batches " + shardRouting.shardId()); + } + assertTrue(shardRouting.primary()); + shardIdToBatchIdForStartedShards.put(shardRouting.shardId(), batchId); + } + } + } + Map shardIdToBatchIdForStoreShards = new HashMap<>(); + + for (ShardRouting shardRouting : allShardRoutings1) { + for (String batchId : replicaBatches) { + if (shardRouting.primary() == false + && testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch() + .get(batchId) + .getBatchedShards() + .contains(shardRouting.shardId())) { + if (shardIdToBatchIdForStoreShards.containsKey(shardRouting.shardId())) { + fail("found duplicate shard routing for shard. One shard cant be in multiple batches " + shardRouting.shardId()); + } + assertFalse(shardRouting.primary()); + shardIdToBatchIdForStoreShards.put(shardRouting.shardId(), batchId); + } + } + } + + assertEquals(4080, shardIdToBatchIdForStartedShards.size() + shardIdToBatchIdForStoreShards.size()); + // now compare the maps with getBatchId() call + for (ShardRouting shardRouting : allShardRoutings1) { + if (shardRouting.primary()) { + assertEquals( + shardIdToBatchIdForStartedShards.get(shardRouting.shardId()), + testShardsBatchGatewayAllocator.getBatchId(shardRouting, true) + ); + } else { + assertEquals( + shardIdToBatchIdForStoreShards.get(shardRouting.shardId()), + testShardsBatchGatewayAllocator.getBatchId(shardRouting, false) + ); + } + } + } + + public void testGetBatchIdNonExisting() { + createIndexAndUpdateClusterState(1, 1, 1); + List allShardRoutings = clusterState.routingTable() + .index(indexPrefix + 0) + .getShards() + .values() + .stream() + .map(IndexShardRoutingTable::getShards) + .flatMap(List::stream) + .collect(Collectors.toList()); + allShardRoutings.forEach(shard -> assertNull(testShardsBatchGatewayAllocator.getBatchId(shard, shard.primary()))); + } + + private void createIndexAndUpdateClusterState(int count, int numberOfShards, int numberOfReplicas) { + if (count == 0) return; + Metadata.Builder metadata = Metadata.builder(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + for (int i = 0; i < count; i++) { + String indexName = indexPrefix + i; + metadata.put( + IndexMetadata.builder(indexName) + .settings(settings(Version.CURRENT)) + .numberOfShards(numberOfShards) + .numberOfReplicas(numberOfReplicas) + ); + } + for (int i = 0; i < count; i++) { + String indexName = indexPrefix + i; + routingTableBuilder = routingTableBuilder.addAsNew(metadata.build().index(indexName)); + } + clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata.build()) + .routingTable(routingTableBuilder.build()) + .build(); + testAllocation = new RoutingAllocation( + new AllocationDeciders(Collections.emptyList()), + new RoutingNodes(clusterState, false), + clusterState, + ClusterInfo.EMPTY, + SnapshotShardSizeInfo.EMPTY, + System.nanoTime() + ); + } + + // call this after index creation and update cluster state + private Tuple, Set> createBatchesAndAssert(int expectedBatchSize) { + Set primaryBatches = testShardsBatchGatewayAllocator.createAndUpdateBatches(testAllocation, true); + Set replicaBatches = testShardsBatchGatewayAllocator.createAndUpdateBatches(testAllocation, false); + assertEquals(expectedBatchSize, primaryBatches.size()); + assertEquals(expectedBatchSize, replicaBatches.size()); + assertEquals(expectedBatchSize, testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().size()); + assertEquals(expectedBatchSize, testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().size()); + assertEquals(testShardsBatchGatewayAllocator.getBatchIdToStartedShardBatch().keySet(), primaryBatches); + assertEquals(testShardsBatchGatewayAllocator.getBatchIdToStoreShardBatch().keySet(), replicaBatches); + return new Tuple<>(primaryBatches, replicaBatches); + } +} diff --git a/server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java b/server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java index 1b42a31a4fd84..12030ad41d508 100644 --- a/server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java +++ b/server/src/test/java/org/opensearch/gateway/ShardBatchCacheTests.java @@ -29,8 +29,7 @@ public class ShardBatchCacheTests extends OpenSearchAllocationTestCase { private static final String BATCH_ID = "b1"; private final DiscoveryNode node1 = newNode("node1"); private final DiscoveryNode node2 = newNode("node2"); - // Needs to be enabled once ShardsBatchGatewayAllocator is pushed - // private final Map batchInfo = new HashMap<>(); + private final Map batchInfo = new HashMap<>(); private AsyncShardBatchFetch.ShardBatchCache shardCache; private List shardsInBatch = new ArrayList<>(); private static final int NUMBER_OF_SHARDS_DEFAULT = 10; @@ -162,7 +161,7 @@ public void testShardsDataWithException() { null ); - // assertEquals(5, batchInfo.size()); + assertEquals(10, batchInfo.size()); assertEquals(2, fetchData.size()); assertEquals(10, fetchData.get(node1).getNodeGatewayStartedShardsBatch().size()); assertTrue(fetchData.get(node2).getNodeGatewayStartedShardsBatch().isEmpty()); @@ -210,10 +209,10 @@ private void fillShards(Map shardAttributesMap, int nu for (ShardId shardId : shardsInBatch) { ShardAttributes attr = new ShardAttributes(""); shardAttributesMap.put(shardId, attr); - // batchInfo.put( - // shardId, - // new ShardsBatchGatewayAllocator.ShardEntry(attr, randomShardRouting(shardId.getIndexName(), shardId.id())) - // ); + batchInfo.put( + shardId, + new ShardsBatchGatewayAllocator.ShardEntry(attr, randomShardRouting(shardId.getIndexName(), shardId.id())) + ); } } diff --git a/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java b/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java index b1695ff00e0cc..b9f52a62f823a 100644 --- a/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java +++ b/test/framework/src/main/java/org/opensearch/test/gateway/TestGatewayAllocator.java @@ -183,4 +183,5 @@ public String getReplicationCheckPointKey(ShardId shardId, String nodeName) { public void addReplicationCheckpoint(ShardId shardId, String nodeName, ReplicationCheckpoint replicationCheckpoint) { shardIdNodeToReplicationCheckPointMap.putIfAbsent(getReplicationCheckPointKey(shardId, nodeName), replicationCheckpoint); } + } diff --git a/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java b/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java new file mode 100644 index 0000000000000..53a4e90adb976 --- /dev/null +++ b/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java @@ -0,0 +1,144 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.test.gateway; + +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.gateway.AsyncShardFetch; +import org.opensearch.gateway.PrimaryShardBatchAllocator; +import org.opensearch.gateway.ReplicaShardBatchAllocator; +import org.opensearch.gateway.ShardsBatchGatewayAllocator; +import org.opensearch.gateway.TransportNodesGatewayStartedShardHelper; +import org.opensearch.gateway.TransportNodesListGatewayStartedShardsBatch; +import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import org.opensearch.indices.store.TransportNodesListShardStoreMetadataBatch; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class TestShardBatchGatewayAllocator extends ShardsBatchGatewayAllocator { + + Map> knownAllocations = new HashMap<>(); + DiscoveryNodes currentNodes = DiscoveryNodes.EMPTY_NODES; + Map shardIdNodeToReplicationCheckPointMap = new HashMap<>(); + + PrimaryShardBatchAllocator primaryBatchShardAllocator = new PrimaryShardBatchAllocator() { + @Override + protected AsyncShardFetch.FetchResult fetchData( + List eligibleShards, + List inEligibleShards, + RoutingAllocation allocation + ) { + Map foundShards = new HashMap<>(); + HashMap> shardsToIgnoreNodes = new HashMap<>(); + for (Map.Entry> entry : knownAllocations.entrySet()) { + String nodeId = entry.getKey(); + Map shardsOnNode = entry.getValue(); + HashMap adaptedResponse = new HashMap<>(); + + for (ShardRouting shardRouting : eligibleShards) { + ShardId shardId = shardRouting.shardId(); + Set ignoreNodes = allocation.getIgnoreNodes(shardId); + + if (shardsOnNode.containsKey(shardId) && ignoreNodes.contains(nodeId) == false && currentNodes.nodeExists(nodeId)) { + TransportNodesGatewayStartedShardHelper.GatewayStartedShard nodeShard = + new TransportNodesGatewayStartedShardHelper.GatewayStartedShard( + shardsOnNode.get(shardId).allocationId().getId(), + shardsOnNode.get(shardId).primary(), + getReplicationCheckpoint(shardId, nodeId) + ); + adaptedResponse.put(shardId, nodeShard); + shardsToIgnoreNodes.put(shardId, ignoreNodes); + } + foundShards.put( + currentNodes.get(nodeId), + new TransportNodesListGatewayStartedShardsBatch.NodeGatewayStartedShardsBatch( + currentNodes.get(nodeId), + adaptedResponse + ) + ); + } + } + return new AsyncShardFetch.FetchResult<>(foundShards, shardsToIgnoreNodes); + } + }; + + ReplicaShardBatchAllocator replicaBatchShardAllocator = new ReplicaShardBatchAllocator() { + + @Override + protected AsyncShardFetch.FetchResult fetchData( + List eligibleShards, + List inEligibleShards, + RoutingAllocation allocation + ) { + return new AsyncShardFetch.FetchResult<>(Collections.emptyMap(), Collections.emptyMap()); + } + + @Override + protected boolean hasInitiatedFetching(ShardRouting shard) { + return true; + } + }; + + @Override + public void allocateAllUnassignedShards(RoutingAllocation allocation, boolean primary) { + currentNodes = allocation.nodes(); + innerAllocateUnassignedBatch(allocation, primaryBatchShardAllocator, replicaBatchShardAllocator, primary); + } + + @Override + public void beforeAllocation(RoutingAllocation allocation) {} + + @Override + public void afterPrimariesBeforeReplicas(RoutingAllocation allocation) {} + + public Set createAndUpdateBatches(RoutingAllocation allocation, boolean primary) { + return super.createAndUpdateBatches(allocation, primary); + } + + public void safelyRemoveShardFromBatch(ShardRouting shard) { + super.safelyRemoveShardFromBatch(shard, shard.primary()); + } + + public void safelyRemoveShardFromBothBatch(ShardRouting shardRouting) { + super.safelyRemoveShardFromBothBatch(shardRouting); + } + + public String getBatchId(ShardRouting shard, boolean primary) { + return super.getBatchId(shard, primary); + } + + public Map getBatchIdToStartedShardBatch() { + return batchIdToStartedShardBatch; + } + + public Map getBatchIdToStoreShardBatch() { + return batchIdToStoreShardBatch; + } + + @Override + public AllocateUnassignedDecision explainUnassignedShardAllocation(ShardRouting unassignedShard, RoutingAllocation routingAllocation) { + return super.explainUnassignedShardAllocation(unassignedShard, routingAllocation); + } + + protected ReplicationCheckpoint getReplicationCheckpoint(ShardId shardId, String nodeName) { + return shardIdNodeToReplicationCheckPointMap.getOrDefault(getReplicationCheckPointKey(shardId, nodeName), null); + } + + public String getReplicationCheckPointKey(ShardId shardId, String nodeName) { + return shardId.toString() + "_" + nodeName; + } +} From 81707dcc74fd8a66349c80c0c47254de110b9f5d Mon Sep 17 00:00:00 2001 From: Sagar <99425694+sgup432@users.noreply.github.com> Date: Thu, 25 Apr 2024 22:09:42 -0700 Subject: [PATCH 31/32] [Tiered Caching] Expose a dynamic setting to disable/enable disk cache (#13373) * [Tiered Caching] Expose a dynamic setting to disable/enable disk cache Signed-off-by: Sagar Upadhyaya * Putting tiered cache settings behind feature flag Signed-off-by: Sagar Upadhyaya * Adding a changelog Signed-off-by: Sagar Upadhyaya * Addressing Sorabh's comments Signed-off-by: Sagar Upadhyaya * Putting new setting behind feature flag Signed-off-by: Sagar Upadhyaya --------- Signed-off-by: Sagar Upadhyaya Signed-off-by: Sagar <99425694+sgup432@users.noreply.github.com> --- CHANGELOG.md | 1 + .../TieredSpilloverCacheIT.java | 115 ++++++++++++++++++ .../common/tier/TieredSpilloverCache.java | 74 +++++++---- .../tier/TieredSpilloverCachePlugin.java | 13 +- .../tier/TieredSpilloverCacheSettings.java | 22 +++- .../tier/TieredSpilloverCachePluginTests.java | 11 +- .../tier/TieredSpilloverCacheTests.java | 115 ++++++++++++++++++ 7 files changed, 323 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7adf9a752c846..ee2bb839db324 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add an individual setting of rate limiter for segment replication ([#12959](https://github.com/opensearch-project/OpenSearch/pull/12959)) - [Streaming Indexing] Ensure support of the new transport by security plugin ([#13174](https://github.com/opensearch-project/OpenSearch/pull/13174)) - Add cluster setting to dynamically configure the buckets for filter rewrite optimization. ([#13179](https://github.com/opensearch-project/OpenSearch/pull/13179)) +- [Tiered Caching] Add a dynamic setting to disable/enable disk cache. ([#13373](https://github.com/opensearch-project/OpenSearch/pull/13373)) - [Remote Store] Add capability of doing refresh as determined by the translog ([#12992](https://github.com/opensearch-project/OpenSearch/pull/12992)) - Batch mode for async fetching shard information in GatewayAllocator for unassigned shards ([#8746](https://github.com/opensearch-project/OpenSearch/pull/8746)) diff --git a/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java b/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java index 977a66c53b7e8..cbe16a690c104 100644 --- a/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java +++ b/modules/cache-common/src/internalClusterTest/java/org.opensearch.cache.common.tier/TieredSpilloverCacheIT.java @@ -425,6 +425,121 @@ public void testWithExplicitCacheClear() throws Exception { }, 1, TimeUnit.SECONDS); } + public void testWithDynamicDiskCacheSetting() throws Exception { + int onHeapCacheSizeInBytes = 10; // Keep it low so that all items are cached onto disk. + internalCluster().startNode( + Settings.builder() + .put(defaultSettings(onHeapCacheSizeInBytes + "b")) + .put(INDICES_CACHE_CLEAN_INTERVAL_SETTING.getKey(), new TimeValue(1)) + .build() + ); + Client client = client(); + assertAcked( + client.admin() + .indices() + .prepareCreate("index") + .setMapping("k", "type=keyword") + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.refresh_interval", -1) + ) + .get() + ); + // Update took time policy to zero so that all entries are eligible to be cached on disk. + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest().transientSettings( + Settings.builder() + .put( + TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), + new TimeValue(0, TimeUnit.MILLISECONDS) + ) + .build() + ); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).get()); + int numberOfIndexedItems = randomIntBetween(5, 10); + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + indexRandom(true, client.prepareIndex("index").setSource("k" + iterator, "hello" + iterator)); + } + ensureSearchable("index"); + refreshAndWaitForReplication(); + // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); + long perQuerySizeInCacheInBytes = -1; + // Step 1: Hit some queries. We will see misses and queries will be cached(onto disk cache) for subsequent + // requests. + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + if (perQuerySizeInCacheInBytes == -1) { + RequestCacheStats requestCacheStats = getRequestCacheStats(client, "index"); + perQuerySizeInCacheInBytes = requestCacheStats.getMemorySizeInBytes(); + } + assertSearchResponse(resp); + } + + RequestCacheStats requestCacheStats = getRequestCacheStats(client, "index"); + assertEquals(numberOfIndexedItems * perQuerySizeInCacheInBytes, requestCacheStats.getMemorySizeInBytes()); + assertEquals(numberOfIndexedItems, requestCacheStats.getMissCount()); + assertEquals(0, requestCacheStats.getHitCount()); + assertEquals(0, requestCacheStats.getEvictions()); + + // Step 2: Hit same queries again. We will see hits now. + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + assertSearchResponse(resp); + } + requestCacheStats = getRequestCacheStats(client, "index"); + assertEquals(numberOfIndexedItems * perQuerySizeInCacheInBytes, requestCacheStats.getMemorySizeInBytes()); + assertEquals(numberOfIndexedItems, requestCacheStats.getMissCount()); + assertEquals(numberOfIndexedItems, requestCacheStats.getHitCount()); + assertEquals(0, requestCacheStats.getEvictions()); + long lastKnownHitCount = requestCacheStats.getHitCount(); + long lastKnownMissCount = requestCacheStats.getMissCount(); + + // Step 3: Turn off disk cache now. And hit same queries again. We should not see hits now as all queries + // were cached onto disk cache. + updateSettingsRequest = new ClusterUpdateSettingsRequest().transientSettings( + Settings.builder() + .put(TieredSpilloverCacheSettings.DISK_CACHE_ENABLED_SETTING_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), false) + .build() + ); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).get()); + + for (int iterator = 0; iterator < numberOfIndexedItems; iterator++) { + SearchResponse resp = client.prepareSearch("index") + .setRequestCache(true) + .setQuery(QueryBuilders.termQuery("k" + iterator, "hello" + iterator)) + .get(); + assertSearchResponse(resp); + } + requestCacheStats = getRequestCacheStats(client, "index"); + assertEquals(numberOfIndexedItems * perQuerySizeInCacheInBytes, requestCacheStats.getMemorySizeInBytes()); // + // Still shows disk cache entries as explicit clear or invalidation is required to clean up disk cache. + assertEquals(lastKnownMissCount + numberOfIndexedItems, requestCacheStats.getMissCount()); + assertEquals(0, lastKnownHitCount - requestCacheStats.getHitCount()); // No new hits being seen. + lastKnownMissCount = requestCacheStats.getMissCount(); + lastKnownHitCount = requestCacheStats.getHitCount(); + + // Step 4: Invalidate entries via refresh. + // Explicit refresh would invalidate cache entries. + refreshAndWaitForReplication(); + assertBusy(() -> { + // Explicit refresh should clear up cache entries + assertTrue(getRequestCacheStats(client, "index").getMemorySizeInBytes() == 0); + }, 1, TimeUnit.SECONDS); + requestCacheStats = getRequestCacheStats(client, "index"); + assertEquals(0, lastKnownMissCount - requestCacheStats.getMissCount()); + assertEquals(0, lastKnownHitCount - requestCacheStats.getHitCount()); + } + private RequestCacheStats getRequestCacheStats(Client client, String indexName) { return client.admin().indices().prepareStats(indexName).setRequestCache(true).get().getTotal().getRequestCache(); } diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java index ae3d9f1dbcf62..34f17df751d7a 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java @@ -27,8 +27,9 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; @@ -38,6 +39,8 @@ import java.util.function.Function; import java.util.function.Predicate; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.DISK_CACHE_ENABLED_SETTING_MAP; + /** * This cache spillover the evicted items from heap tier to disk tier. All the new items are first cached on heap * and the items evicted from on heap cache are moved to disk based cache. If disk based cache also gets full, @@ -67,12 +70,14 @@ public class TieredSpilloverCache implements ICache { /** * Maintains caching tiers in ascending order of cache latency. */ - private final List> cacheList; + private final Map, Boolean> caches; private final List> policies; TieredSpilloverCache(Builder builder) { Objects.requireNonNull(builder.onHeapCacheFactory, "onHeap cache builder can't be null"); Objects.requireNonNull(builder.diskCacheFactory, "disk cache builder can't be null"); + Objects.requireNonNull(builder.cacheConfig, "cache config can't be null"); + Objects.requireNonNull(builder.cacheConfig.getClusterSettings(), "cluster settings can't be null"); this.removalListener = Objects.requireNonNull(builder.removalListener, "Removal listener can't be null"); this.onHeapCache = builder.onHeapCacheFactory.create( @@ -80,7 +85,8 @@ public class TieredSpilloverCache implements ICache { @Override public void onRemoval(RemovalNotification, V> notification) { try (ReleasableLock ignore = writeLock.acquire()) { - if (SPILLOVER_REMOVAL_REASONS.contains(notification.getRemovalReason()) + if (caches.get(diskCache) + && SPILLOVER_REMOVAL_REASONS.contains(notification.getRemovalReason()) && evaluatePolicies(notification.getValue())) { diskCache.put(notification.getKey(), notification.getValue()); } else { @@ -103,9 +109,15 @@ && evaluatePolicies(notification.getValue())) { ); this.diskCache = builder.diskCacheFactory.create(builder.cacheConfig, builder.cacheType, builder.cacheFactories); - this.cacheList = Arrays.asList(onHeapCache, diskCache); + Boolean isDiskCacheEnabled = DISK_CACHE_ENABLED_SETTING_MAP.get(builder.cacheType).get(builder.cacheConfig.getSettings()); + LinkedHashMap, Boolean> cacheListMap = new LinkedHashMap<>(); + cacheListMap.put(onHeapCache, true); + cacheListMap.put(diskCache, isDiskCacheEnabled); + this.caches = Collections.synchronizedMap(cacheListMap); this.dimensionNames = builder.cacheConfig.getDimensionNames(); this.policies = builder.policies; // Will never be null; builder initializes it to an empty list + builder.cacheConfig.getClusterSettings() + .addSettingsUpdateConsumer(DISK_CACHE_ENABLED_SETTING_MAP.get(builder.cacheType), this::enableDisableDiskCache); } // Package private for testing @@ -118,6 +130,13 @@ ICache getDiskCache() { return diskCache; } + // Package private for testing. + void enableDisableDiskCache(Boolean isDiskCacheEnabled) { + // When disk cache is disabled, we are not clearing up the disk cache entries yet as that should be part of + // separate cache/clear API. + this.caches.put(diskCache, isDiskCacheEnabled); + } + @Override public V get(ICacheKey key) { return getValueFromTieredCache().apply(key); @@ -132,7 +151,6 @@ public void put(ICacheKey key, V value) { @Override public V computeIfAbsent(ICacheKey key, LoadAwareCacheLoader, V> loader) throws Exception { - V cacheValue = getValueFromTieredCache().apply(key); if (cacheValue == null) { // Add the value to the onHeap cache. We are calling computeIfAbsent which does another get inside. @@ -151,10 +169,10 @@ public V computeIfAbsent(ICacheKey key, LoadAwareCacheLoader, V> public void invalidate(ICacheKey key) { // We are trying to invalidate the key from all caches though it would be present in only of them. // Doing this as we don't know where it is located. We could do a get from both and check that, but what will - // also trigger a hit/miss listener event, so ignoring it for now. + // also count hits/misses stats, so ignoring it for now. try (ReleasableLock ignore = writeLock.acquire()) { - for (ICache cache : cacheList) { - cache.invalidate(key); + for (Map.Entry, Boolean> cacheEntry : caches.entrySet()) { + cacheEntry.getKey().invalidate(key); } } } @@ -162,8 +180,8 @@ public void invalidate(ICacheKey key) { @Override public void invalidateAll() { try (ReleasableLock ignore = writeLock.acquire()) { - for (ICache cache : cacheList) { - cache.invalidateAll(); + for (Map.Entry, Boolean> cacheEntry : caches.entrySet()) { + cacheEntry.getKey().invalidateAll(); } } } @@ -175,15 +193,21 @@ public void invalidateAll() { @SuppressWarnings({ "unchecked" }) @Override public Iterable> keys() { - Iterable>[] iterables = (Iterable>[]) new Iterable[] { onHeapCache.keys(), diskCache.keys() }; - return new ConcatenatedIterables>(iterables); + List>> iterableList = new ArrayList<>(); + for (Map.Entry, Boolean> cacheEntry : caches.entrySet()) { + iterableList.add(cacheEntry.getKey().keys()); + } + Iterable>[] iterables = (Iterable>[]) iterableList.toArray(new Iterable[0]); + return new ConcatenatedIterables<>(iterables); } @Override public long count() { long count = 0; - for (ICache cache : cacheList) { - count += cache.count(); + for (Map.Entry, Boolean> cacheEntry : caches.entrySet()) { + // Count for all the tiers irrespective of whether they are enabled or not. As eventually + // this will turn to zero once cache is cleared up either via invalidation or manually. + count += cacheEntry.getKey().count(); } return count; } @@ -191,16 +215,17 @@ public long count() { @Override public void refresh() { try (ReleasableLock ignore = writeLock.acquire()) { - for (ICache cache : cacheList) { - cache.refresh(); + for (Map.Entry, Boolean> cacheEntry : caches.entrySet()) { + cacheEntry.getKey().refresh(); } } } @Override public void close() throws IOException { - for (ICache cache : cacheList) { - cache.close(); + for (Map.Entry, Boolean> cacheEntry : caches.entrySet()) { + // Close all the caches here irrespective of whether they are enabled or not. + cacheEntry.getKey().close(); } } @@ -212,13 +237,12 @@ public ImmutableCacheStatsHolder stats() { private Function, V> getValueFromTieredCache() { return key -> { try (ReleasableLock ignore = readLock.acquire()) { - for (ICache cache : cacheList) { - V value = cache.get(key); - if (value != null) { - // update hit stats - return value; - } else { - // update miss stats + for (Map.Entry, Boolean> cacheEntry : caches.entrySet()) { + if (cacheEntry.getValue()) { + V value = cacheEntry.getKey().get(key); + if (value != null) { + return value; + } } } } diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java index dfd40199d859e..1c10e51630460 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCachePlugin.java @@ -11,6 +11,8 @@ import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.ICache; import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.plugins.CachePlugin; import org.opensearch.plugins.Plugin; @@ -18,6 +20,7 @@ import java.util.List; import java.util.Map; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.DISK_CACHE_ENABLED_SETTING_MAP; import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP; /** @@ -30,10 +33,15 @@ public class TieredSpilloverCachePlugin extends Plugin implements CachePlugin { */ public static final String TIERED_CACHE_SPILLOVER_PLUGIN_NAME = "tieredSpilloverCachePlugin"; + private final Settings settings; + /** * Default constructor + * @param settings settings */ - public TieredSpilloverCachePlugin() {} + public TieredSpilloverCachePlugin(Settings settings) { + this.settings = settings; + } @Override public Map getCacheFactoryMap() { @@ -54,6 +62,9 @@ public List> getSettings() { TieredSpilloverCacheSettings.TIERED_SPILLOVER_DISK_STORE_NAME.getConcreteSettingForNamespace(cacheType.getSettingPrefix()) ); settingList.add(TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(cacheType)); + if (FeatureFlags.PLUGGABLE_CACHE_SETTING.get(settings)) { + settingList.add(DISK_CACHE_ENABLED_SETTING_MAP.get(cacheType)); + } } return settingList; } diff --git a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java index b89e8c517a351..e8e441d6bd3a6 100644 --- a/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java +++ b/modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheSettings.java @@ -42,6 +42,14 @@ public class TieredSpilloverCacheSettings { (key) -> Setting.simpleString(key, "", NodeScope) ); + /** + * Setting to disable/enable disk cache dynamically. + */ + public static final Setting.AffixSetting TIERED_SPILLOVER_DISK_CACHE_SETTING = Setting.suffixKeySetting( + TieredSpilloverCache.TieredSpilloverCacheFactory.TIERED_SPILLOVER_CACHE_NAME + ".disk.store.enabled", + (key) -> Setting.boolSetting(key, true, NodeScope, Setting.Property.Dynamic) + ); + /** * Setting defining the minimum took time for a query to be allowed into the disk cache. */ @@ -63,17 +71,29 @@ public class TieredSpilloverCacheSettings { public static final Map> TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP; /** - * Fetches concrete took time policy settings. + * Stores disk cache enabled settings for various cache types as these are dynamic so that can be registered and + * retrieved accordingly. + */ + public static final Map> DISK_CACHE_ENABLED_SETTING_MAP; + + /** + * Fetches concrete took time policy and disk cache settings. */ static { Map> concreteTookTimePolicySettingMap = new HashMap<>(); + Map> diskCacheSettingMap = new HashMap<>(); for (CacheType cacheType : CacheType.values()) { concreteTookTimePolicySettingMap.put( cacheType, TIERED_SPILLOVER_DISK_TOOK_TIME_THRESHOLD.getConcreteSettingForNamespace(cacheType.getSettingPrefix()) ); + diskCacheSettingMap.put( + cacheType, + TIERED_SPILLOVER_DISK_CACHE_SETTING.getConcreteSettingForNamespace(cacheType.getSettingPrefix()) + ); } TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP = concreteTookTimePolicySettingMap; + DISK_CACHE_ENABLED_SETTING_MAP = diskCacheSettingMap; } /** diff --git a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCachePluginTests.java b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCachePluginTests.java index 1172a48e97c6a..4a96ffe2069ec 100644 --- a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCachePluginTests.java +++ b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCachePluginTests.java @@ -9,6 +9,8 @@ package org.opensearch.cache.common.tier; import org.opensearch.common.cache.ICache; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.test.OpenSearchTestCase; import java.util.Map; @@ -16,9 +18,16 @@ public class TieredSpilloverCachePluginTests extends OpenSearchTestCase { public void testGetCacheFactoryMap() { - TieredSpilloverCachePlugin tieredSpilloverCachePlugin = new TieredSpilloverCachePlugin(); + TieredSpilloverCachePlugin tieredSpilloverCachePlugin = new TieredSpilloverCachePlugin(Settings.EMPTY); Map map = tieredSpilloverCachePlugin.getCacheFactoryMap(); assertNotNull(map.get(TieredSpilloverCache.TieredSpilloverCacheFactory.TIERED_SPILLOVER_CACHE_NAME)); assertEquals(TieredSpilloverCachePlugin.TIERED_CACHE_SPILLOVER_PLUGIN_NAME, tieredSpilloverCachePlugin.getName()); } + + public void testGetSettingsWithFeatureFlagOn() { + TieredSpilloverCachePlugin tieredSpilloverCachePlugin = new TieredSpilloverCachePlugin( + Settings.builder().put(FeatureFlags.PLUGGABLE_CACHE_SETTING.getKey(), true).build() + ); + assertFalse(tieredSpilloverCachePlugin.getSettings().isEmpty()); + } } diff --git a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java index bf9f8fd22d793..1ecb63414dc68 100644 --- a/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java +++ b/modules/cache-common/src/test/java/org/opensearch/cache/common/tier/TieredSpilloverCacheTests.java @@ -42,6 +42,7 @@ import java.util.function.Function; import java.util.function.Predicate; +import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.DISK_CACHE_ENABLED_SETTING_MAP; import static org.opensearch.cache.common.tier.TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP; import static org.opensearch.common.cache.store.settings.OpenSearchOnHeapCacheSettings.MAXIMUM_SIZE_IN_BYTES_KEY; @@ -56,6 +57,7 @@ public void setup() { Settings settings = Settings.EMPTY; clusterSettings = new ClusterSettings(settings, new HashSet<>()); clusterSettings.registerSetting(TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE)); + clusterSettings.registerSetting(DISK_CACHE_ENABLED_SETTING_MAP.get(CacheType.INDICES_REQUEST_CACHE)); } public void testComputeIfAbsentWithoutAnyOnHeapCacheEviction() throws Exception { @@ -302,6 +304,7 @@ public void testComputeIfAbsentWithEvictionsFromOnHeapCache() throws Exception { ) .build() ) + .setClusterSettings(clusterSettings) .build(); ICache.Factory mockDiskCacheFactory = new MockDiskCache.MockDiskCacheFactory(0, diskCacheSize); @@ -777,6 +780,7 @@ public void testConcurrencyForEvictionFlowFromOnHeapToDiskTier() throws Exceptio ) .build() ) + .setClusterSettings(clusterSettings) .setDimensionNames(dimensionNames) .build(); TieredSpilloverCache tieredSpilloverCache = new TieredSpilloverCache.Builder() @@ -1008,6 +1012,116 @@ public void testMinimumThresholdSettingValue() throws Exception { assertEquals(validDuration, concreteSetting.get(validSettings)); } + public void testPutWithDiskCacheDisabledSetting() throws Exception { + int onHeapCacheSize = randomIntBetween(10, 30); + int diskCacheSize = randomIntBetween(onHeapCacheSize + 1, 100); + int keyValueSize = 50; + int totalSize = onHeapCacheSize + diskCacheSize; + + MockCacheRemovalListener removalListener = new MockCacheRemovalListener<>(); + TieredSpilloverCache tieredSpilloverCache = initializeTieredSpilloverCache( + keyValueSize, + diskCacheSize, + removalListener, + Settings.builder() + .put( + OpenSearchOnHeapCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE) + .get(MAXIMUM_SIZE_IN_BYTES_KEY) + .getKey(), + onHeapCacheSize * keyValueSize + "b" + ) + .put(DISK_CACHE_ENABLED_SETTING_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(), false) + .build(), + 0 + ); + + int numOfItems1 = randomIntBetween(onHeapCacheSize + 1, totalSize); // Create more items than onHeap cache. + for (int iter = 0; iter < numOfItems1; iter++) { + ICacheKey key = getICacheKey(UUID.randomUUID().toString()); + LoadAwareCacheLoader, String> loadAwareCacheLoader = getLoadAwareCacheLoader(); + tieredSpilloverCache.computeIfAbsent(key, loadAwareCacheLoader); + } + ICache onHeapCache = tieredSpilloverCache.getOnHeapCache(); + ICache diskCache = tieredSpilloverCache.getDiskCache(); + assertEquals(onHeapCacheSize, onHeapCache.count()); + assertEquals(0, diskCache.count()); // Disk cache shouldn't have anything considering it is disabled. + assertEquals(numOfItems1 - onHeapCacheSize, removalListener.evictionsMetric.count()); + } + + public void testGetPutAndInvalidateWithDiskCacheDisabled() throws Exception { + int onHeapCacheSize = randomIntBetween(10, 30); + int diskCacheSize = randomIntBetween(onHeapCacheSize + 1, 100); + int keyValueSize = 50; + int totalSize = onHeapCacheSize + diskCacheSize; + + MockCacheRemovalListener removalListener = new MockCacheRemovalListener<>(); + TieredSpilloverCache tieredSpilloverCache = initializeTieredSpilloverCache( + keyValueSize, + diskCacheSize, + removalListener, + Settings.builder() + .put( + OpenSearchOnHeapCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE) + .get(MAXIMUM_SIZE_IN_BYTES_KEY) + .getKey(), + onHeapCacheSize * keyValueSize + "b" + ) + .build(), + 0 + ); + + int numOfItems1 = randomIntBetween(onHeapCacheSize + 1, totalSize - 1); // Create more items than onHeap + // cache to cause spillover. + for (int iter = 0; iter < numOfItems1; iter++) { + ICacheKey key = getICacheKey(UUID.randomUUID().toString()); + LoadAwareCacheLoader, String> loadAwareCacheLoader = getLoadAwareCacheLoader(); + tieredSpilloverCache.computeIfAbsent(key, loadAwareCacheLoader); + } + ICache onHeapCache = tieredSpilloverCache.getOnHeapCache(); + ICache diskCache = tieredSpilloverCache.getDiskCache(); + List> diskCacheKeys = new ArrayList<>(); + tieredSpilloverCache.getDiskCache().keys().forEach(diskCacheKeys::add); + long actualDiskCacheCount = diskCache.count(); + long actualTieredCacheCount = tieredSpilloverCache.count(); + assertEquals(onHeapCacheSize, onHeapCache.count()); + assertEquals(numOfItems1 - onHeapCacheSize, actualDiskCacheCount); + assertEquals(0, removalListener.evictionsMetric.count()); + assertEquals(numOfItems1, actualTieredCacheCount); + for (ICacheKey diskKey : diskCacheKeys) { + assertNotNull(tieredSpilloverCache.get(diskKey)); + } + + tieredSpilloverCache.enableDisableDiskCache(false); // Disable disk cache now. + int numOfItems2 = totalSize - numOfItems1; + for (int iter = 0; iter < numOfItems2; iter++) { + ICacheKey key = getICacheKey(UUID.randomUUID().toString()); + LoadAwareCacheLoader, String> loadAwareCacheLoader = getLoadAwareCacheLoader(); + tieredSpilloverCache.computeIfAbsent(key, loadAwareCacheLoader); + } + for (ICacheKey diskKey : diskCacheKeys) { + assertNull(tieredSpilloverCache.get(diskKey)); // Considering disk cache is disabled, we shouldn't find + // these keys. + } + assertEquals(onHeapCacheSize, onHeapCache.count()); // Should remain same. + assertEquals(0, diskCache.count() - actualDiskCacheCount); // Considering it is disabled now, shouldn't cache + // any more items. + assertEquals(numOfItems2, removalListener.evictionsMetric.count()); // Considering onHeap cache was already + // full, we should all existing onHeap entries being evicted. + assertEquals(0, tieredSpilloverCache.count() - actualTieredCacheCount); // Count still returns disk cache + // entries count as they haven't been cleared yet. + long lastKnownTieredCacheEntriesCount = tieredSpilloverCache.count(); + + // Clear up disk cache keys. + for (ICacheKey diskKey : diskCacheKeys) { + tieredSpilloverCache.invalidate(diskKey); + } + assertEquals(0, diskCache.count()); + assertEquals(lastKnownTieredCacheEntriesCount - diskCacheKeys.size(), tieredSpilloverCache.count()); + + tieredSpilloverCache.invalidateAll(); // Clear up all the keys. + assertEquals(0, tieredSpilloverCache.count()); + } + private List getMockDimensions() { List dims = new ArrayList<>(); for (String dimensionName : dimensionNames) { @@ -1121,6 +1235,7 @@ private TieredSpilloverCache intializeTieredSpilloverCache( .put(settings) .build() ) + .setClusterSettings(clusterSettings) .build(); ICache.Factory mockDiskCacheFactory = new MockDiskCache.MockDiskCacheFactory(diskDeliberateDelay, diskCacheSize); From f7984974963d745ca3cb88460251792acd7b326d Mon Sep 17 00:00:00 2001 From: Kiran Prakash Date: Fri, 26 Apr 2024 11:52:31 -0700 Subject: [PATCH 32/32] [Tiered Caching] Make Indices Request Cache Stale Key Mgmt Threshold setting dynamic (#12941) * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * Update ClusterSettings.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash * spotless Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash * some refactoring Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * address existing tests Signed-off-by: Kiran Prakash * UTs Signed-off-by: Kiran Prakash * Update CHANGELOG.md Signed-off-by: Kiran Prakash * ITs Signed-off-by: Kiran Prakash * spotless Signed-off-by: Kiran Prakash * refactor Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash * Update CHANGELOG.md Signed-off-by: Kiran Prakash * Update CHANGELOG.md Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash * resolve conflicts Signed-off-by: Kiran Prakash * address code comments Signed-off-by: Kiran Prakash * address code comments Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash * rename tests Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash * resolve conflicts Signed-off-by: Kiran Prakash * Update IndicesRequestCache.java Signed-off-by: Kiran Prakash * code comments Signed-off-by: Kiran Prakash * Update IndicesRequestCacheIT.java Signed-off-by: Kiran Prakash --------- Signed-off-by: Kiran Prakash --- CHANGELOG.md | 1 + .../indices/IndicesRequestCacheIT.java | 844 +++++++++++++++--- .../common/settings/ClusterSettings.java | 2 + .../indices/IndicesRequestCache.java | 41 +- .../opensearch/indices/IndicesService.java | 3 +- .../indices/IndicesRequestCacheTests.java | 34 + 6 files changed, 785 insertions(+), 140 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee2bb839db324..0a351a1061e15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add cluster setting to dynamically configure the buckets for filter rewrite optimization. ([#13179](https://github.com/opensearch-project/OpenSearch/pull/13179)) - [Tiered Caching] Add a dynamic setting to disable/enable disk cache. ([#13373](https://github.com/opensearch-project/OpenSearch/pull/13373)) - [Remote Store] Add capability of doing refresh as determined by the translog ([#12992](https://github.com/opensearch-project/OpenSearch/pull/12992)) +- [Tiered caching] Make Indices Request Cache Stale Key Mgmt Threshold setting dynamic ([#12941](https://github.com/opensearch-project/OpenSearch/pull/12941)) - Batch mode for async fetching shard information in GatewayAllocator for unassigned shards ([#8746](https://github.com/opensearch-project/OpenSearch/pull/8746)) ### Dependencies diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java index ec5637cec6485..ea064a3a3212d 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java @@ -34,7 +34,11 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.opensearch.action.admin.cluster.node.stats.NodeStats; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.action.admin.indices.alias.Alias; +import org.opensearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest; import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse; import org.opensearch.action.search.SearchResponse; import org.opensearch.action.search.SearchType; @@ -42,13 +46,16 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; import org.opensearch.common.time.DateFormatter; +import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.cache.request.RequestCacheStats; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.aggregations.bucket.global.GlobalAggregationBuilder; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.opensearch.search.aggregations.bucket.histogram.Histogram; import org.opensearch.search.aggregations.bucket.histogram.Histogram.Bucket; +import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase; import org.opensearch.test.hamcrest.OpenSearchAssertions; @@ -59,7 +66,12 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; +import java.util.concurrent.TimeUnit; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; +import static org.opensearch.indices.IndicesRequestCache.INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING; +import static org.opensearch.indices.IndicesService.INDICES_CACHE_CLEANUP_INTERVAL_SETTING_KEY; import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram; import static org.opensearch.search.aggregations.AggregationBuilders.dateRange; @@ -69,6 +81,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0, supportsDedicatedMasters = false) public class IndicesRequestCacheIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase { public IndicesRequestCacheIT(Settings settings) { super(settings); @@ -92,25 +105,31 @@ protected boolean useRandomReplicationStrategy() { // One of the primary purposes of the query cache is to cache aggs results public void testCacheAggs() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("f", "type=date") - .setSettings(Settings.builder().put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true)) + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + ) .get() ); indexRandom( true, - client.prepareIndex("index").setSource("f", "2014-03-10T00:00:00.000Z"), - client.prepareIndex("index").setSource("f", "2014-05-13T00:00:00.000Z") + client.prepareIndex(index).setSource("f", "2014-03-10T00:00:00.000Z"), + client.prepareIndex(index).setSource("f", "2014-05-13T00:00:00.000Z") ); - ensureSearchable("index"); + ensureSearchable(index); // This is not a random example: serialization with time zones writes shared strings // which used to not work well with the query cache because of the handles stream output // see #9500 - final SearchResponse r1 = client.prepareSearch("index") + final SearchResponse r1 = client.prepareSearch(index) .setSize(0) .setSearchType(SearchType.QUERY_THEN_FETCH) .addAggregation( @@ -124,12 +143,12 @@ public void testCacheAggs() throws Exception { // The cached is actually used assertThat( - client.admin().indices().prepareStats("index").setRequestCache(true).get().getTotal().getRequestCache().getMemorySizeInBytes(), + client.admin().indices().prepareStats(index).setRequestCache(true).get().getTotal().getRequestCache().getMemorySizeInBytes(), greaterThan(0L) ); for (int i = 0; i < 10; ++i) { - final SearchResponse r2 = client.prepareSearch("index") + final SearchResponse r2 = client.prepareSearch(index) .setSize(0) .setSearchType(SearchType.QUERY_THEN_FETCH) .addAggregation( @@ -156,10 +175,11 @@ public void testCacheAggs() throws Exception { public void testQueryRewrite() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("s", "type=date") .setSettings( Settings.builder() @@ -172,28 +192,28 @@ public void testQueryRewrite() throws Exception { ); indexRandom( true, - client.prepareIndex("index").setId("1").setRouting("1").setSource("s", "2016-03-19"), - client.prepareIndex("index").setId("2").setRouting("1").setSource("s", "2016-03-20"), - client.prepareIndex("index").setId("3").setRouting("1").setSource("s", "2016-03-21"), - client.prepareIndex("index").setId("4").setRouting("2").setSource("s", "2016-03-22"), - client.prepareIndex("index").setId("5").setRouting("2").setSource("s", "2016-03-23"), - client.prepareIndex("index").setId("6").setRouting("2").setSource("s", "2016-03-24"), - client.prepareIndex("index").setId("7").setRouting("3").setSource("s", "2016-03-25"), - client.prepareIndex("index").setId("8").setRouting("3").setSource("s", "2016-03-26"), - client.prepareIndex("index").setId("9").setRouting("3").setSource("s", "2016-03-27") + client.prepareIndex(index).setId("1").setRouting("1").setSource("s", "2016-03-19"), + client.prepareIndex(index).setId("2").setRouting("1").setSource("s", "2016-03-20"), + client.prepareIndex(index).setId("3").setRouting("1").setSource("s", "2016-03-21"), + client.prepareIndex(index).setId("4").setRouting("2").setSource("s", "2016-03-22"), + client.prepareIndex(index).setId("5").setRouting("2").setSource("s", "2016-03-23"), + client.prepareIndex(index).setId("6").setRouting("2").setSource("s", "2016-03-24"), + client.prepareIndex(index).setId("7").setRouting("3").setSource("s", "2016-03-25"), + client.prepareIndex(index).setId("8").setRouting("3").setSource("s", "2016-03-26"), + client.prepareIndex(index).setId("9").setRouting("3").setSource("s", "2016-03-27") ); - ensureSearchable("index"); - assertCacheState(client, "index", 0, 0); + ensureSearchable(index); + assertCacheState(client, index, 0, 0); // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache - ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge(index).setFlush(true).get(); OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); refreshAndWaitForReplication(); - ensureSearchable("index"); + ensureSearchable(index); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); - final SearchResponse r1 = client.prepareSearch("index") + final SearchResponse r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-19").lte("2016-03-25")) @@ -202,9 +222,9 @@ public void testQueryRewrite() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 5); + assertCacheState(client, index, 0, 5); - final SearchResponse r2 = client.prepareSearch("index") + final SearchResponse r2 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-20").lte("2016-03-26")) @@ -212,9 +232,9 @@ public void testQueryRewrite() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r2); assertThat(r2.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 3, 7); + assertCacheState(client, index, 3, 7); - final SearchResponse r3 = client.prepareSearch("index") + final SearchResponse r3 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-21").lte("2016-03-27")) @@ -222,15 +242,16 @@ public void testQueryRewrite() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r3); assertThat(r3.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 6, 9); + assertCacheState(client, index, 6, 9); } public void testQueryRewriteMissingValues() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("s", "type=date") .setSettings( Settings.builder() @@ -242,61 +263,62 @@ public void testQueryRewriteMissingValues() throws Exception { ); indexRandom( true, - client.prepareIndex("index").setId("1").setSource("s", "2016-03-19"), - client.prepareIndex("index").setId("2").setSource("s", "2016-03-20"), - client.prepareIndex("index").setId("3").setSource("s", "2016-03-21"), - client.prepareIndex("index").setId("4").setSource("s", "2016-03-22"), - client.prepareIndex("index").setId("5").setSource("s", "2016-03-23"), - client.prepareIndex("index").setId("6").setSource("s", "2016-03-24"), - client.prepareIndex("index").setId("7").setSource("other", "value"), - client.prepareIndex("index").setId("8").setSource("s", "2016-03-26"), - client.prepareIndex("index").setId("9").setSource("s", "2016-03-27") + client.prepareIndex(index).setId("1").setSource("s", "2016-03-19"), + client.prepareIndex(index).setId("2").setSource("s", "2016-03-20"), + client.prepareIndex(index).setId("3").setSource("s", "2016-03-21"), + client.prepareIndex(index).setId("4").setSource("s", "2016-03-22"), + client.prepareIndex(index).setId("5").setSource("s", "2016-03-23"), + client.prepareIndex(index).setId("6").setSource("s", "2016-03-24"), + client.prepareIndex(index).setId("7").setSource("other", "value"), + client.prepareIndex(index).setId("8").setSource("s", "2016-03-26"), + client.prepareIndex(index).setId("9").setSource("s", "2016-03-27") ); - ensureSearchable("index"); - assertCacheState(client, "index", 0, 0); + ensureSearchable(index); + assertCacheState(client, index, 0, 0); // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache - ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge(index).setFlush(true).get(); OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); refreshAndWaitForReplication(); - ensureSearchable("index"); + ensureSearchable(index); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); - final SearchResponse r1 = client.prepareSearch("index") + final SearchResponse r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-19").lte("2016-03-28")) .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(8L)); - assertCacheState(client, "index", 0, 1); + assertCacheState(client, index, 0, 1); - final SearchResponse r2 = client.prepareSearch("index") + final SearchResponse r2 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-19").lte("2016-03-28")) .get(); OpenSearchAssertions.assertAllSuccessful(r2); assertThat(r2.getHits().getTotalHits().value, equalTo(8L)); - assertCacheState(client, "index", 1, 1); + assertCacheState(client, index, 1, 1); - final SearchResponse r3 = client.prepareSearch("index") + final SearchResponse r3 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-19").lte("2016-03-28")) .get(); OpenSearchAssertions.assertAllSuccessful(r3); assertThat(r3.getHits().getTotalHits().value, equalTo(8L)); - assertCacheState(client, "index", 2, 1); + assertCacheState(client, index, 2, 1); } public void testQueryRewriteDates() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("d", "type=date") .setSettings( Settings.builder() @@ -308,28 +330,28 @@ public void testQueryRewriteDates() throws Exception { ); indexRandom( true, - client.prepareIndex("index").setId("1").setSource("d", "2014-01-01T00:00:00"), - client.prepareIndex("index").setId("2").setSource("d", "2014-02-01T00:00:00"), - client.prepareIndex("index").setId("3").setSource("d", "2014-03-01T00:00:00"), - client.prepareIndex("index").setId("4").setSource("d", "2014-04-01T00:00:00"), - client.prepareIndex("index").setId("5").setSource("d", "2014-05-01T00:00:00"), - client.prepareIndex("index").setId("6").setSource("d", "2014-06-01T00:00:00"), - client.prepareIndex("index").setId("7").setSource("d", "2014-07-01T00:00:00"), - client.prepareIndex("index").setId("8").setSource("d", "2014-08-01T00:00:00"), - client.prepareIndex("index").setId("9").setSource("d", "2014-09-01T00:00:00") + client.prepareIndex(index).setId("1").setSource("d", "2014-01-01T00:00:00"), + client.prepareIndex(index).setId("2").setSource("d", "2014-02-01T00:00:00"), + client.prepareIndex(index).setId("3").setSource("d", "2014-03-01T00:00:00"), + client.prepareIndex(index).setId("4").setSource("d", "2014-04-01T00:00:00"), + client.prepareIndex(index).setId("5").setSource("d", "2014-05-01T00:00:00"), + client.prepareIndex(index).setId("6").setSource("d", "2014-06-01T00:00:00"), + client.prepareIndex(index).setId("7").setSource("d", "2014-07-01T00:00:00"), + client.prepareIndex(index).setId("8").setSource("d", "2014-08-01T00:00:00"), + client.prepareIndex(index).setId("9").setSource("d", "2014-09-01T00:00:00") ); - ensureSearchable("index"); - assertCacheState(client, "index", 0, 0); + ensureSearchable(index); + assertCacheState(client, index, 0, 0); // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache - ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge(index).setFlush(true).get(); OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); refreshAndWaitForReplication(); - ensureSearchable("index"); + ensureSearchable(index); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); - final SearchResponse r1 = client.prepareSearch("index") + final SearchResponse r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("d").gte("2013-01-01T00:00:00").lte("now")) @@ -338,9 +360,9 @@ public void testQueryRewriteDates() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(9L)); - assertCacheState(client, "index", 0, 1); + assertCacheState(client, index, 0, 1); - final SearchResponse r2 = client.prepareSearch("index") + final SearchResponse r2 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("d").gte("2013-01-01T00:00:00").lte("now")) @@ -348,9 +370,9 @@ public void testQueryRewriteDates() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r2); assertThat(r2.getHits().getTotalHits().value, equalTo(9L)); - assertCacheState(client, "index", 1, 1); + assertCacheState(client, index, 1, 1); - final SearchResponse r3 = client.prepareSearch("index") + final SearchResponse r3 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("d").gte("2013-01-01T00:00:00").lte("now")) @@ -358,7 +380,7 @@ public void testQueryRewriteDates() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r3); assertThat(r3.getHits().getTotalHits().value, equalTo(9L)); - assertCacheState(client, "index", 2, 1); + assertCacheState(client, index, 2, 1); } public void testQueryRewriteDatesWithNow() throws Exception { @@ -449,53 +471,54 @@ public void testCanCache() throws Exception { .put("index.number_of_routing_shards", 2) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) .build(); - assertAcked(client.admin().indices().prepareCreate("index").setMapping("s", "type=date").setSettings(settings).get()); + String index = "index"; + assertAcked(client.admin().indices().prepareCreate(index).setMapping("s", "type=date").setSettings(settings).get()); indexRandom( true, - client.prepareIndex("index").setId("1").setRouting("1").setSource("s", "2016-03-19"), - client.prepareIndex("index").setId("2").setRouting("1").setSource("s", "2016-03-20"), - client.prepareIndex("index").setId("3").setRouting("1").setSource("s", "2016-03-21"), - client.prepareIndex("index").setId("4").setRouting("2").setSource("s", "2016-03-22"), - client.prepareIndex("index").setId("5").setRouting("2").setSource("s", "2016-03-23"), - client.prepareIndex("index").setId("6").setRouting("2").setSource("s", "2016-03-24"), - client.prepareIndex("index").setId("7").setRouting("3").setSource("s", "2016-03-25"), - client.prepareIndex("index").setId("8").setRouting("3").setSource("s", "2016-03-26"), - client.prepareIndex("index").setId("9").setRouting("3").setSource("s", "2016-03-27") + client.prepareIndex(index).setId("1").setRouting("1").setSource("s", "2016-03-19"), + client.prepareIndex(index).setId("2").setRouting("1").setSource("s", "2016-03-20"), + client.prepareIndex(index).setId("3").setRouting("1").setSource("s", "2016-03-21"), + client.prepareIndex(index).setId("4").setRouting("2").setSource("s", "2016-03-22"), + client.prepareIndex(index).setId("5").setRouting("2").setSource("s", "2016-03-23"), + client.prepareIndex(index).setId("6").setRouting("2").setSource("s", "2016-03-24"), + client.prepareIndex(index).setId("7").setRouting("3").setSource("s", "2016-03-25"), + client.prepareIndex(index).setId("8").setRouting("3").setSource("s", "2016-03-26"), + client.prepareIndex(index).setId("9").setRouting("3").setSource("s", "2016-03-27") ); - ensureSearchable("index"); - assertCacheState(client, "index", 0, 0); + ensureSearchable(index); + assertCacheState(client, index, 0, 0); // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache - ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge(index).setFlush(true).get(); OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); refreshAndWaitForReplication(); - ensureSearchable("index"); + ensureSearchable(index); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); // If size > 0 we should no cache by default - final SearchResponse r1 = client.prepareSearch("index") + final SearchResponse r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(1) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-19").lte("2016-03-25")) .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); // If search type is DFS_QUERY_THEN_FETCH we should not cache - final SearchResponse r2 = client.prepareSearch("index") + final SearchResponse r2 = client.prepareSearch(index) .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("s").gte("2016-03-20").lte("2016-03-26")) .get(); OpenSearchAssertions.assertAllSuccessful(r2); assertThat(r2.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); // If search type is DFS_QUERY_THEN_FETCH we should not cache even if // the cache flag is explicitly set on the request - final SearchResponse r3 = client.prepareSearch("index") + final SearchResponse r3 = client.prepareSearch(index) .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) .setSize(0) .setRequestCache(true) @@ -503,10 +526,10 @@ public void testCanCache() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r3); assertThat(r3.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); // If the request has an non-filter aggregation containing now we should not cache - final SearchResponse r5 = client.prepareSearch("index") + final SearchResponse r5 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setRequestCache(true) @@ -515,10 +538,10 @@ public void testCanCache() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r5); assertThat(r5.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); // If size > 1 and cache flag is set on the request we should cache - final SearchResponse r6 = client.prepareSearch("index") + final SearchResponse r6 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(1) .setRequestCache(true) @@ -526,10 +549,10 @@ public void testCanCache() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r6); assertThat(r6.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 2); + assertCacheState(client, index, 0, 2); // If the request has a filter aggregation containing now we should cache since it gets rewritten - final SearchResponse r4 = client.prepareSearch("index") + final SearchResponse r4 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setRequestCache(true) @@ -538,7 +561,7 @@ public void testCanCache() throws Exception { .get(); OpenSearchAssertions.assertAllSuccessful(r4); assertThat(r4.getHits().getTotalHits().value, equalTo(7L)); - assertCacheState(client, "index", 0, 4); + assertCacheState(client, index, 0, 4); } public void testCacheWithFilteredAlias() throws InterruptedException { @@ -548,61 +571,63 @@ public void testCacheWithFilteredAlias() throws InterruptedException { .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) .build(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("created_at", "type=date") .setSettings(settings) .addAlias(new Alias("last_week").filter(QueryBuilders.rangeQuery("created_at").gte("now-7d/d"))) .get() ); ZonedDateTime now = ZonedDateTime.now(ZoneOffset.UTC); - client.prepareIndex("index").setId("1").setRouting("1").setSource("created_at", DateTimeFormatter.ISO_LOCAL_DATE.format(now)).get(); + client.prepareIndex(index).setId("1").setRouting("1").setSource("created_at", DateTimeFormatter.ISO_LOCAL_DATE.format(now)).get(); // Force merge the index to ensure there can be no background merges during the subsequent searches that would invalidate the cache - ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge("index").setFlush(true).get(); + ForceMergeResponse forceMergeResponse = client.admin().indices().prepareForceMerge(index).setFlush(true).get(); OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); refreshAndWaitForReplication(); - indexRandomForConcurrentSearch("index"); + indexRandomForConcurrentSearch(index); - assertCacheState(client, "index", 0, 0); + assertCacheState(client, index, 0, 0); - SearchResponse r1 = client.prepareSearch("index") + SearchResponse r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("created_at").gte("now-7d/d")) .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(1L)); - assertCacheState(client, "index", 0, 1); + assertCacheState(client, index, 0, 1); - r1 = client.prepareSearch("index") + r1 = client.prepareSearch(index) .setSearchType(SearchType.QUERY_THEN_FETCH) .setSize(0) .setQuery(QueryBuilders.rangeQuery("created_at").gte("now-7d/d")) .get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(1L)); - assertCacheState(client, "index", 1, 1); + assertCacheState(client, index, 1, 1); r1 = client.prepareSearch("last_week").setSearchType(SearchType.QUERY_THEN_FETCH).setSize(0).get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(1L)); - assertCacheState(client, "index", 1, 2); + assertCacheState(client, index, 1, 2); r1 = client.prepareSearch("last_week").setSearchType(SearchType.QUERY_THEN_FETCH).setSize(0).get(); OpenSearchAssertions.assertAllSuccessful(r1); assertThat(r1.getHits().getTotalHits().value, equalTo(1L)); - assertCacheState(client, "index", 2, 2); + assertCacheState(client, index, 2, 2); } public void testProfileDisableCache() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("k", "type=keyword") .setSettings( Settings.builder() @@ -612,14 +637,14 @@ public void testProfileDisableCache() throws Exception { ) .get() ); - indexRandom(true, client.prepareIndex("index").setSource("k", "hello")); - ensureSearchable("index"); + indexRandom(true, client.prepareIndex(index).setSource("k", "hello")); + ensureSearchable(index); int expectedHits = 0; int expectedMisses = 0; for (int i = 0; i < 5; i++) { boolean profile = i % 2 == 0; - SearchResponse resp = client.prepareSearch("index") + SearchResponse resp = client.prepareSearch(index) .setRequestCache(true) .setProfile(profile) .setQuery(QueryBuilders.termQuery("k", "hello")) @@ -634,16 +659,17 @@ public void testProfileDisableCache() throws Exception { expectedHits++; } } - assertCacheState(client, "index", expectedHits, expectedMisses); + assertCacheState(client, index, expectedHits, expectedMisses); } } public void testCacheWithInvalidation() throws Exception { Client client = client(); + String index = "index"; assertAcked( client.admin() .indices() - .prepareCreate("index") + .prepareCreate(index) .setMapping("k", "type=keyword") .setSettings( Settings.builder() @@ -654,38 +680,581 @@ public void testCacheWithInvalidation() throws Exception { ) .get() ); - indexRandom(true, client.prepareIndex("index").setSource("k", "hello")); - ensureSearchable("index"); - SearchResponse resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); + indexRandom(true, client.prepareIndex(index).setSource("k", "hello")); + ensureSearchable(index); + SearchResponse resp = client.prepareSearch(index).setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); assertSearchResponse(resp); OpenSearchAssertions.assertAllSuccessful(resp); assertThat(resp.getHits().getTotalHits().value, equalTo(1L)); - assertCacheState(client, "index", 0, 1); + assertCacheState(client, index, 0, 1); // Index but don't refresh - indexRandom(false, client.prepareIndex("index").setSource("k", "hello2")); - resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); + indexRandom(false, client.prepareIndex(index).setSource("k", "hello2")); + resp = client.prepareSearch(index).setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); assertSearchResponse(resp); // Should expect hit as here as refresh didn't happen - assertCacheState(client, "index", 1, 1); + assertCacheState(client, index, 1, 1); // Explicit refresh would invalidate cache refreshAndWaitForReplication(); // Hit same query again - resp = client.prepareSearch("index").setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); + resp = client.prepareSearch(index).setRequestCache(true).setQuery(QueryBuilders.termQuery("k", "hello")).get(); assertSearchResponse(resp); // Should expect miss as key has changed due to change in IndexReader.CacheKey (due to refresh) - assertCacheState(client, "index", 1, 2); + assertCacheState(client, index, 1, 2); + } + + // calling cache clear api, when staleness threshold is lower than staleness, it should clean the stale keys from cache + public void testCacheClearAPIRemovesStaleKeysWhenStalenessThresholdIsLow() throws Exception { + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.10) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + // setting intentionally high to avoid cache cleaner interfering + TimeValue.timeValueMillis(300) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + ClearIndicesCacheRequest clearIndicesCacheRequest = new ClearIndicesCacheRequest(index2); + client.admin().indices().clearCache(clearIndicesCacheRequest).actionGet(); + + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + } + + // when staleness threshold is lower than staleness, it should clean the stale keys from cache + public void testStaleKeysCleanupWithLowThreshold() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.10) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + } + + // when staleness threshold is equal to staleness, it should clean the stale keys from cache + public void testCacheCleanupOnEqualStalenessAndThreshold() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.33) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // when staleness threshold is higher than staleness, it should NOT clean the cache + public void testCacheCleanupSkipsWithHighStalenessThreshold() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.90) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should NOT have cleaned up the stale key from index 2 + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // when staleness threshold is explicitly set to 0, cache cleaner regularly cleans up stale keys. + public void testCacheCleanupOnZeroStalenessThreshold() throws Exception { + int cacheCleanIntervalInMillis = 50; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create 10 index1 cache entries + for (int i = 1; i <= 10; i++) { + long cacheSizeBefore = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + createCacheEntry(client, index1, "hello" + i); + assertCacheState(client, index1, 0, i); + long cacheSizeAfter = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(cacheSizeAfter > cacheSizeBefore); + } + + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // when staleness threshold is not explicitly set, cache cleaner regularly cleans up stale keys + public void testStaleKeysRemovalWithoutExplicitThreshold() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + String index1 = "index1"; + String index2 = "index2"; + Client client = client(node); + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // when cache cleaner interval setting is not set, cache cleaner is configured appropriately with the fall-back setting + public void testCacheCleanupWithDefaultSettings() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder().put(INDICES_CACHE_CLEANUP_INTERVAL_SETTING_KEY, TimeValue.timeValueMillis(cacheCleanIntervalInMillis)) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // staleness threshold updates flows through to the cache cleaner + public void testDynamicStalenessThresholdUpdate() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.90) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + assertTrue(getRequestCacheStats(client, index1).getMemorySizeInBytes() > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > 0); + + // force refresh so that it creates 1 stale key + flushAndRefresh(index2); + assertBusy(() -> { + // cache cleaner should NOT have cleaned up the stale key from index 2 + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + + // Update indices.requests.cache.cleanup.staleness_threshold to "10%" + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings(Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), 0.10)); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should NOT have cleaned from index 1 + assertEquals(finalMemorySizeForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // staleness threshold dynamic updates should throw exceptions on invalid input + public void testInvalidStalenessThresholdUpdateThrowsException() throws Exception { + int cacheCleanIntervalInMillis = 1; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.90) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + setupIndex(client, index1); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + assertTrue(getRequestCacheStats(client, index1).getMemorySizeInBytes() > 0); + + // Update indices.requests.cache.cleanup.staleness_threshold to "10%" with illegal argument + assertThrows("Ratio should be in [0-1.0]", IllegalArgumentException.class, () -> { + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings( + Settings.builder().put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 10) + ); + client().admin().cluster().updateSettings(updateSettingsRequest).actionGet(); + }); + + // everything else should continue to work fine later on. + // force refresh so that it creates 1 stale key + flushAndRefresh(index1); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should NOT have cleaned from index 1 + assertEquals(0, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // closing the Index after caching will clean up from Indices Request Cache + public void testCacheClearanceAfterIndexClosure() throws Exception { + int cacheCleanIntervalInMillis = 100; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.10) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index = "index"; + setupIndex(client, index); + + // create first cache entry in index + createCacheEntry(client, index, "hello"); + assertCacheState(client, index, 0, 1); + assertTrue(getRequestCacheStats(client, index).getMemorySizeInBytes() > 0); + assertTrue(getNodeCacheStats(client).getMemorySizeInBytes() > 0); + + // close index + assertAcked(client.admin().indices().prepareClose(index)); + // request cache stats cannot be access since Index should be closed + try { + getRequestCacheStats(client, index); + } catch (Exception e) { + assert (e instanceof IndexClosedException); + } + // sleep until cache cleaner would have cleaned up the stale key from index + assertBusy(() -> { + // cache cleaner should have cleaned up the stale keys from index + assertFalse(getNodeCacheStats(client).getMemorySizeInBytes() > 0); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // deleting the Index after caching will clean up from Indices Request Cache + public void testCacheCleanupAfterIndexDeletion() throws Exception { + int cacheCleanIntervalInMillis = 100; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.10) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index = "index"; + setupIndex(client, index); + + // create first cache entry in index + createCacheEntry(client, index, "hello"); + assertCacheState(client, index, 0, 1); + assertTrue(getRequestCacheStats(client, index).getMemorySizeInBytes() > 0); + assertTrue(getNodeCacheStats(client).getMemorySizeInBytes() > 0); + + // delete index + assertAcked(client.admin().indices().prepareDelete(index)); + // request cache stats cannot be access since Index should be deleted + try { + getRequestCacheStats(client, index); + } catch (Exception e) { + assert (e instanceof IndexNotFoundException); + } + + // sleep until cache cleaner would have cleaned up the stale key from index + assertBusy(() -> { + // cache cleaner should have cleaned up the stale keys from index + assertFalse(getNodeCacheStats(client).getMemorySizeInBytes() > 0); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + // when staleness threshold is lower than staleness, it should clean the cache from all indices having stale keys + public void testStaleKeysCleanupWithMultipleIndices() throws Exception { + int cacheCleanIntervalInMillis = 300; + String node = internalCluster().startNode( + Settings.builder() + .put(IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, 0.10) + .put( + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, + TimeValue.timeValueMillis(cacheCleanIntervalInMillis) + ) + ); + Client client = client(node); + String index1 = "index1"; + String index2 = "index2"; + setupIndex(client, index1); + setupIndex(client, index2); + + // create first cache entry in index1 + createCacheEntry(client, index1, "hello"); + assertCacheState(client, index1, 0, 1); + long memorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(memorySizeForIndex1 > 0); + + // create second cache entry in index1 + createCacheEntry(client, index1, "there"); + assertCacheState(client, index1, 0, 2); + long finalMemorySizeForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes(); + assertTrue(finalMemorySizeForIndex1 > memorySizeForIndex1); + + // create first cache entry in index2 + createCacheEntry(client, index2, "hello"); + assertCacheState(client, index2, 0, 1); + assertTrue(getRequestCacheStats(client, index2).getMemorySizeInBytes() > 0); + + // force refresh index 1 so that it creates 2 stale keys + flushAndRefresh(index1); + // create another cache entry in index 1, this should not be cleaned up. + createCacheEntry(client, index1, "hello"); + // record the size of this entry + long memorySizeOfLatestEntryForIndex1 = getRequestCacheStats(client, index1).getMemorySizeInBytes() - finalMemorySizeForIndex1; + // force refresh index 2 so that it creates 1 stale key + flushAndRefresh(index2); + // sleep until cache cleaner would have cleaned up the stale key from index 2 + assertBusy(() -> { + // cache cleaner should have cleaned up the stale key from index 2 + assertEquals(0, getRequestCacheStats(client, index2).getMemorySizeInBytes()); + // cache cleaner should have only cleaned up the stale entities + assertEquals(memorySizeOfLatestEntryForIndex1, getRequestCacheStats(client, index1).getMemorySizeInBytes()); + }, cacheCleanIntervalInMillis * 2, TimeUnit.MILLISECONDS); + } + + private void setupIndex(Client client, String index) throws Exception { + assertAcked( + client.admin() + .indices() + .prepareCreate(index) + .setMapping("k", "type=keyword") + .setSettings( + Settings.builder() + .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + ) + .get() + ); + indexRandom(true, client.prepareIndex(index).setSource("k", "hello")); + indexRandom(true, client.prepareIndex(index).setSource("k", "there")); + ensureSearchable(index); + } + + private void createCacheEntry(Client client, String index, String value) { + SearchResponse resp = client.prepareSearch(index).setRequestCache(true).setQuery(QueryBuilders.termQuery("k", value)).get(); + assertSearchResponse(resp); + OpenSearchAssertions.assertAllSuccessful(resp); } private static void assertCacheState(Client client, String index, long expectedHits, long expectedMisses) { - RequestCacheStats requestCacheStats = client.admin() - .indices() - .prepareStats(index) - .setRequestCache(true) - .get() - .getTotal() - .getRequestCache(); + RequestCacheStats requestCacheStats = getRequestCacheStats(client, index); // Check the hit count and miss count together so if they are not // correct we can see both values assertEquals( @@ -695,4 +1264,17 @@ private static void assertCacheState(Client client, String index, long expectedH } + private static RequestCacheStats getRequestCacheStats(Client client, String index) { + return client.admin().indices().prepareStats(index).setRequestCache(true).get().getTotal().getRequestCache(); + } + + private static RequestCacheStats getNodeCacheStats(Client client) { + NodesStatsResponse stats = client.admin().cluster().prepareNodesStats().execute().actionGet(); + for (NodeStats stat : stats.getNodes()) { + if (stat.getNode().isDataNode()) { + return stat.getIndices().getRequestCache(); + } + } + return null; + } } diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index ded844b3a7f18..4a5a45eb1a17a 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -497,6 +497,8 @@ public void apply(Settings value, Settings current, Settings previous) { IndicesFieldDataCache.INDICES_FIELDDATA_CACHE_SIZE_KEY, IndicesRequestCache.INDICES_CACHE_QUERY_SIZE, IndicesRequestCache.INDICES_CACHE_QUERY_EXPIRE, + IndicesRequestCache.INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING, + IndicesRequestCache.INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING, HunspellService.HUNSPELL_LAZY_LOAD, HunspellService.HUNSPELL_IGNORE_CASE, HunspellService.HUNSPELL_DICTIONARY_OPTIONS, diff --git a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java index 039e14a031f3f..105239b2c351b 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java +++ b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java @@ -112,6 +112,10 @@ public final class IndicesRequestCache implements RemovalListener INDEX_CACHE_REQUEST_ENABLED_SETTING = Setting.boolSetting( "index.requests.cache.enable", true, @@ -128,15 +132,16 @@ public final class IndicesRequestCache implements RemovalListener INDICES_REQUEST_CACHE_CLEAN_INTERVAL_SETTING = Setting.positiveTimeSetting( - "indices.requests.cache.cleanup.interval", + public static final Setting INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING = Setting.positiveTimeSetting( + INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING_KEY, INDICES_CACHE_CLEAN_INTERVAL_SETTING, Property.NodeScope ); public static final Setting INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING = new Setting<>( - "indices.requests.cache.cleanup.staleness_threshold", + INDICES_REQUEST_CACHE_CLEANUP_STALENESS_THRESHOLD_SETTING_KEY, "0%", IndicesRequestCache::validateStalenessSetting, + Property.Dynamic, Property.NodeScope ); @@ -146,6 +151,7 @@ public final class IndicesRequestCache implements RemovalListener cache; + private final ClusterService clusterService; private final Function> cacheEntityLookup; // pkg-private for testing final IndicesRequestCacheCleanupManager cacheCleanupManager; @@ -167,10 +173,13 @@ public final class IndicesRequestCache implements RemovalListener, BytesReference> weigher = (k, v) -> k.ramBytesUsed(k.key.ramBytesUsed()) + v.ramBytesUsed(); this.cacheCleanupManager = new IndicesRequestCacheCleanupManager( threadPool, - INDICES_REQUEST_CACHE_CLEAN_INTERVAL_SETTING.get(settings), + INDICES_REQUEST_CACHE_CLEANUP_INTERVAL_SETTING.get(settings), getStalenessThreshold(settings) ); this.cacheEntityLookup = cacheEntityFunction; + this.clusterService = clusterService; + this.clusterService.getClusterSettings() + .addSettingsUpdateConsumer(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING, this::setStalenessThreshold); this.cache = cacheService.createCache( new CacheConfig.Builder().setSettings(settings) .setWeigher(weigher) @@ -208,6 +217,11 @@ private double getStalenessThreshold(Settings settings) { return RatioValue.parseRatioValue(threshold).getAsRatio(); } + // pkg-private for testing + void setStalenessThreshold(String threshold) { + this.cacheCleanupManager.updateStalenessThreshold(RatioValue.parseRatioValue(threshold).getAsRatio()); + } + void clear(CacheEntity entity) { cacheCleanupManager.enqueueCleanupKey(new CleanupKey(entity, null)); cacheCleanupManager.forceCleanCache(); @@ -274,7 +288,6 @@ BytesReference getOrCompute( } else { cacheEntity.onHit(); } - return value; } @@ -473,7 +486,7 @@ class IndicesRequestCacheCleanupManager implements Closeable { private final Set keysToClean; private final ConcurrentMap> cleanupKeyToCountMap; private final AtomicInteger staleKeysCount; - private final double stalenessThreshold; + private volatile double stalenessThreshold; private final IndicesRequestCacheCleaner cacheCleaner; IndicesRequestCacheCleanupManager(ThreadPool threadpool, TimeValue cleanInterval, double stalenessThreshold) { @@ -485,6 +498,18 @@ class IndicesRequestCacheCleanupManager implements Closeable { threadpool.schedule(cacheCleaner, cleanInterval, ThreadPool.Names.SAME); } + void updateStalenessThreshold(double stalenessThreshold) { + double oldStalenessThreshold = this.stalenessThreshold; + this.stalenessThreshold = stalenessThreshold; + if (logger.isDebugEnabled()) { + logger.debug( + "Staleness threshold for indices request cache changed to {} from {}", + this.stalenessThreshold, + oldStalenessThreshold + ); + } + } + /** * Enqueue cleanup key. * @@ -508,7 +533,7 @@ void enqueueCleanupKey(CleanupKey cleanupKey) { * @param cleanupKey the CleanupKey to be updated in the map */ private void updateStaleCountOnCacheInsert(CleanupKey cleanupKey) { - if (stalenessThreshold == 0.0 || cleanupKey.entity == null) { + if (cleanupKey.entity == null) { return; } IndexShard indexShard = (IndexShard) cleanupKey.entity.getCacheIdentity(); @@ -596,7 +621,7 @@ private void updateStaleCountOnEntryRemoval(CleanupKey cleanupKey, RemovalNotifi * @param cleanupKey the CleanupKey that has been marked for cleanup */ private void incrementStaleKeysCount(CleanupKey cleanupKey) { - if (stalenessThreshold == 0.0 || cleanupKey.entity == null) { + if (cleanupKey.entity == null) { return; } IndexShard indexShard = (IndexShard) cleanupKey.entity.getCacheIdentity(); diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index fd7d897a0e99c..251be8a990055 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -216,10 +216,11 @@ public class IndicesService extends AbstractLifecycleComponent IndicesClusterStateService.AllocatedIndices, IndexService.ShardStoreDeleter { private static final Logger logger = LogManager.getLogger(IndicesService.class); + public static final String INDICES_CACHE_CLEANUP_INTERVAL_SETTING_KEY = "indices.cache.cleanup_interval"; public static final String INDICES_SHARDS_CLOSED_TIMEOUT = "indices.shards_closed_timeout"; public static final Setting INDICES_CACHE_CLEAN_INTERVAL_SETTING = Setting.positiveTimeSetting( - "indices.cache.cleanup_interval", + INDICES_CACHE_CLEANUP_INTERVAL_SETTING_KEY, TimeValue.timeValueMinutes(1), Property.NodeScope ); diff --git a/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java b/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java index 051acfe9d085a..71eb30fca2385 100644 --- a/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java +++ b/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java @@ -906,6 +906,40 @@ public void testClosingIndexWipesStats() throws Exception { IOUtils.close(secondReader); } + public void testCacheCleanupBasedOnStaleThreshold_thresholdUpdate() throws Exception { + threadPool = getThreadPool(); + Settings settings = Settings.builder().put(INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING.getKey(), "51%").build(); + cache = getIndicesRequestCache(settings); + + writer.addDocument(newDoc(0, "foo")); + DirectoryReader reader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + DirectoryReader secondReader = OpenSearchDirectoryReader.wrap(DirectoryReader.open(writer), new ShardId("foo", "bar", 1)); + + // Get 2 entries into the cache + cache.getOrCompute(getEntity(indexShard), getLoader(reader), reader, getTermBytes()); + cache.getOrCompute(getEntity(indexShard), getLoader(secondReader), secondReader, getTermBytes()); + assertEquals(2, cache.count()); + + // Close the reader, to be enqueued for cleanup + // 1 out of 2 keys ie 50% are now stale. + reader.close(); + // cache count should not be affected + assertEquals(2, cache.count()); + + // clean cache with 51% staleness threshold + cache.cacheCleanupManager.cleanCache(); + // cleanup should have been ignored + assertEquals(2, cache.count()); + + cache.setStalenessThreshold("49%"); + // clean cache with 49% staleness threshold + cache.cacheCleanupManager.cleanCache(); + // cleanup should NOT have been ignored + assertEquals(1, cache.count()); + + IOUtils.close(secondReader); + } + public void testEviction() throws Exception { final ByteSizeValue size; {