diff --git a/.github/workflows/rfs_pr_e2e_test.yml b/.github/workflows/rfs_pr_e2e_test.yml new file mode 100644 index 000000000..abbb61b3d --- /dev/null +++ b/.github/workflows/rfs_pr_e2e_test.yml @@ -0,0 +1,40 @@ +name: Jenkins + +on: + push: + branches-ignore: + - 'backport/**' + - 'dependabot/**' + pull_request_target: + types: [opened, synchronize, reopened] + +env: + python-version: '3.11' + +permissions: + contents: read # to fetch code (actions/checkout) + +jobs: + rfs-e2e-aws-test: + runs-on: ubuntu-latest + steps: + - name: Determine Github repository and branch + id: determine-repo-vars + run: | + if [[ "${GITHUB_EVENT_NAME}" == "pull_request_target" ]]; then + branch_name="${{ github.event.pull_request.head.ref }}" + pr_repo_url="https://github.com/${{ github.event.pull_request.head.repo.full_name }}.git" + else + branch_name="${{ github.ref_name }}" + pr_repo_url="https://github.com/${{ github.repository }}.git" + fi + echo "Running jenkins test on repo: $pr_repo_url and branch: $branch_name" + echo "branch_name=$branch_name" >> $GITHUB_OUTPUT + echo "pr_repo_url=$pr_repo_url" >> $GITHUB_OUTPUT + - name: Jenkins Job Trigger and Monitor + uses: lewijacn/jenkins-trigger@1.0.4 + with: + jenkins_url: "https://migrations.ci.opensearch.org" + job_name: "rfs-default-e2e-test" + api_token: "${{ secrets.JENKINS_MIGRATIONS_GENERIC_WEBHOOK_TOKEN }}" + job_params: "GIT_REPO_URL=${{ steps.determine-repo-vars.outputs.pr_repo_url }},GIT_BRANCH=${{ steps.determine-repo-vars.outputs.branch_name }}" diff --git a/MetadataMigration/build.gradle b/MetadataMigration/build.gradle index 155ac1f56..7a80f19d3 100644 --- a/MetadataMigration/build.gradle +++ b/MetadataMigration/build.gradle @@ -18,7 +18,8 @@ dependencies { implementation group: 'org.slf4j', name: 'slf4j-api' implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j2-impl' - testImplementation testFixtures(project(path: ':RFS')) + testImplementation testFixtures(project(':RFS')) + testImplementation testFixtures(project(':testHelperFixtures')) testImplementation group: 'org.apache.logging.log4j', name: 'log4j-core' testImplementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j2-impl' testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-api' diff --git a/MetadataMigration/src/main/java/org/opensearch/migrations/MetadataMigration.java b/MetadataMigration/src/main/java/org/opensearch/migrations/MetadataMigration.java index ad9bc9128..7823e1e04 100644 --- a/MetadataMigration/src/main/java/org/opensearch/migrations/MetadataMigration.java +++ b/MetadataMigration/src/main/java/org/opensearch/migrations/MetadataMigration.java @@ -77,7 +77,7 @@ public static void main(String[] args) throws Exception { result = meta.evaluate(evaluateArgs).execute(context); break; } - log.info(result.toString()); + log.atInfo().setMessage("{}").addArgument(result::asCliOutput).log(); System.exit(result.getExitCode()); } @@ -94,18 +94,20 @@ public Migrate migrate(MigrateOrEvaluateArgs arguments) { } private static void printTopLevelHelp(JCommander commander) { - log.info("Usage: [options] [command] [commandOptions]"); - log.info("Options:"); + var sb = new StringBuilder(); + sb.append("Usage: [options] [command] [commandOptions]"); + sb.append("Options:"); for (var parameter : commander.getParameters()) { - log.info(" " + parameter.getNames()); - log.info(" " + parameter.getDescription()); + sb.append(" " + parameter.getNames()); + sb.append(" " + parameter.getDescription()); } - log.info("Commands:"); + sb.append("Commands:"); for (var command : commander.getCommands().entrySet()) { - log.info(" " + command.getKey()); + sb.append(" " + command.getKey()); } - log.info("\nUse --help with a specific command for more information."); + sb.append("\nUse --help with a specific command for more information."); + log.info(sb.toString()); } private static void printCommandUsage(JCommander jCommander) { diff --git a/MetadataMigration/src/main/java/org/opensearch/migrations/cli/ClusterReaderExtractor.java b/MetadataMigration/src/main/java/org/opensearch/migrations/cli/ClusterReaderExtractor.java index 59a3f00fd..00ef9a4ce 100644 --- a/MetadataMigration/src/main/java/org/opensearch/migrations/cli/ClusterReaderExtractor.java +++ b/MetadataMigration/src/main/java/org/opensearch/migrations/cli/ClusterReaderExtractor.java @@ -3,6 +3,7 @@ import java.nio.file.Path; import org.opensearch.migrations.MigrateOrEvaluateArgs; +import org.opensearch.migrations.Version; import org.opensearch.migrations.cluster.ClusterProviderRegistry; import org.opensearch.migrations.cluster.ClusterReader; @@ -11,14 +12,15 @@ import com.rfs.common.S3Repo; import com.rfs.common.S3Uri; import com.rfs.common.SourceRepo; +import com.rfs.common.http.ConnectionContext; import lombok.AllArgsConstructor; @AllArgsConstructor public class ClusterReaderExtractor { - final MigrateOrEvaluateArgs arguments; + private final MigrateOrEvaluateArgs arguments; - public ClusterReader extractClusterReader() { - if (arguments.fileSystemRepoPath != null && arguments.s3RepoUri != null && arguments.sourceArgs.host != null) { + public ClusterReader extractClusterReader() { + if (arguments.fileSystemRepoPath == null && arguments.s3RepoUri == null && arguments.sourceArgs.host == null) { throw new ParameterException("No details on the source cluster found, please supply a connection details or a snapshot"); } if ((arguments.s3RepoUri != null) && (arguments.s3Region == null || arguments.s3LocalDirPath == null)) { @@ -26,7 +28,7 @@ public ClusterReader extractClusterReader() { } if (arguments.sourceArgs != null && arguments.sourceArgs.host != null) { - return ClusterProviderRegistry.getRemoteReader(arguments.sourceArgs.toConnectionContext()); + return getRemoteReader(arguments.sourceArgs.toConnectionContext()); } SourceRepo repo = null; @@ -38,6 +40,14 @@ public ClusterReader extractClusterReader() { throw new ParameterException("Unable to find valid resource provider"); } - return ClusterProviderRegistry.getSnapshotReader(arguments.sourceVersion, repo); + return getSnapshotReader(arguments.sourceVersion, repo); + } + + ClusterReader getRemoteReader(ConnectionContext connection) { + return ClusterProviderRegistry.getRemoteReader(connection); + } + + ClusterReader getSnapshotReader(Version sourceVersion, SourceRepo repo) { + return ClusterProviderRegistry.getSnapshotReader(sourceVersion, repo); } } diff --git a/MetadataMigration/src/main/java/org/opensearch/migrations/cli/Clusters.java b/MetadataMigration/src/main/java/org/opensearch/migrations/cli/Clusters.java index b967c0775..d29812cd6 100644 --- a/MetadataMigration/src/main/java/org/opensearch/migrations/cli/Clusters.java +++ b/MetadataMigration/src/main/java/org/opensearch/migrations/cli/Clusters.java @@ -14,17 +14,17 @@ public class Clusters { private ClusterReader source; private ClusterWriter target; - public String toString() { + public String asCliOutput() { var sb = new StringBuilder(); sb.append("Clusters:" + System.lineSeparator()); if (getSource() != null) { - sb.append(" Source:" + System.lineSeparator()); - sb.append(" " + getSource() + System.lineSeparator()); + sb.append(Format.indentToLevel(1) + "Source:" + System.lineSeparator()); + sb.append(Format.indentToLevel(2) + getSource() + System.lineSeparator()); sb.append(System.lineSeparator()); } if (getTarget() != null) { - sb.append(" Target:" + System.lineSeparator()); - sb.append(" " + getTarget() + System.lineSeparator()); + sb.append(Format.indentToLevel(1) + "Target:" + System.lineSeparator()); + sb.append(Format.indentToLevel(2) + getTarget() + System.lineSeparator()); sb.append(System.lineSeparator()); } return sb.toString(); diff --git a/MetadataMigration/src/main/java/org/opensearch/migrations/cli/Format.java b/MetadataMigration/src/main/java/org/opensearch/migrations/cli/Format.java new file mode 100644 index 000000000..b4eb989a6 --- /dev/null +++ b/MetadataMigration/src/main/java/org/opensearch/migrations/cli/Format.java @@ -0,0 +1,15 @@ +package org.opensearch.migrations.cli; + +import lombok.experimental.UtilityClass; + +/** Shared formatting for command line interface components */ +@UtilityClass +public class Format { + + private static final String INDENT = " "; + + /** Indents to a given level for printing to the console */ + public static String indentToLevel(final int level) { + return INDENT.repeat(level); + } +} diff --git a/MetadataMigration/src/main/java/org/opensearch/migrations/cli/Items.java b/MetadataMigration/src/main/java/org/opensearch/migrations/cli/Items.java index 3d073f772..b2a12c459 100644 --- a/MetadataMigration/src/main/java/org/opensearch/migrations/cli/Items.java +++ b/MetadataMigration/src/main/java/org/opensearch/migrations/cli/Items.java @@ -5,6 +5,7 @@ import lombok.Builder; import lombok.Data; +import lombok.NonNull; /** * Either items that are candidates for migration or have been migrated; @@ -12,37 +13,42 @@ @Builder @Data public class Items { - public boolean dryRun; - public List indexTemplates; - public List componentTemplates; - public List indexes; - public List aliases; + static final String NONE_FOUND_MARKER = ""; + private final boolean dryRun; + @NonNull + private final List indexTemplates; + @NonNull + private final List componentTemplates; + @NonNull + private final List indexes; + @NonNull + private final List aliases; - public String toString() { + public String asCliOutput() { var sb = new StringBuilder(); if (isDryRun()) { sb.append("Migration Candidates:" + System.lineSeparator()); } else { sb.append("Migrated Items:" + System.lineSeparator()); } - sb.append(" Index Templates:" + System.lineSeparator()); - sb.append(" " + getPrintableList(getIndexTemplates()) + System.lineSeparator()); + sb.append(Format.indentToLevel(1) + "Index Templates:" + System.lineSeparator()); + sb.append(Format.indentToLevel(2) + getPrintableList(getIndexTemplates()) + System.lineSeparator()); sb.append(System.lineSeparator()); - sb.append(" Component Templates:" + System.lineSeparator()); - sb.append(" " + getPrintableList(getComponentTemplates()) + System.lineSeparator()); + sb.append(Format.indentToLevel(1) + "Component Templates:" + System.lineSeparator()); + sb.append(Format.indentToLevel(2) +getPrintableList(getComponentTemplates()) + System.lineSeparator()); sb.append(System.lineSeparator()); - sb.append(" Indexes:" + System.lineSeparator()); - sb.append(" " + getPrintableList(getIndexes()) + System.lineSeparator()); + sb.append(Format.indentToLevel(1) + "Indexes:" + System.lineSeparator()); + sb.append(Format.indentToLevel(2) + getPrintableList(getIndexes()) + System.lineSeparator()); sb.append(System.lineSeparator()); - sb.append(" Aliases:" + System.lineSeparator()); - sb.append(" " + getPrintableList(getAliases()) + System.lineSeparator()); + sb.append(Format.indentToLevel(1) + "Aliases:" + System.lineSeparator()); + sb.append(Format.indentToLevel(2) +getPrintableList(getAliases()) + System.lineSeparator()); sb.append(System.lineSeparator()); return sb.toString(); } private String getPrintableList(List list) { if (list == null || list.isEmpty()) { - return ""; + return NONE_FOUND_MARKER; } return list.stream().sorted().collect(Collectors.joining(", ")); } diff --git a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/Configure.java b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/Configure.java index 9aef0d9d9..b958c864e 100644 --- a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/Configure.java +++ b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/Configure.java @@ -6,7 +6,8 @@ public class Configure { public ConfigureResult execute() { - log.atError().setMessage("configure is not supported").log(); - return new ConfigureResult(9999); + var message = "configure is not supported"; + log.atError().setMessage(message).log(); + return new ConfigureResult(9999, message); } } diff --git a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/ConfigureResult.java b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/ConfigureResult.java index dac296884..294f4bb9a 100644 --- a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/ConfigureResult.java +++ b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/ConfigureResult.java @@ -2,9 +2,18 @@ import lombok.AllArgsConstructor; import lombok.Getter; +import lombok.ToString; @AllArgsConstructor +@ToString public class ConfigureResult implements Result { @Getter private final int exitCode; + + @Getter + private final String errorMessage; + + public String asCliOutput() { + return this.toString(); + } } diff --git a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/EvaluateResult.java b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/EvaluateResult.java index 06f67e602..8f4273e06 100644 --- a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/EvaluateResult.java +++ b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/EvaluateResult.java @@ -1,7 +1,5 @@ package org.opensearch.migrations.commands; -import org.apache.logging.log4j.util.Strings; - import org.opensearch.migrations.cli.Clusters; import org.opensearch.migrations.cli.Items; @@ -15,23 +13,4 @@ public class EvaluateResult implements MigrationItemResult { private final Items items; private final String errorMessage; private final int exitCode; - - public String toString() { - var sb = new StringBuilder(); - if (getClusters() != null) { - sb.append(getClusters() + System.lineSeparator()); - } - if (getItems() != null) { - sb.append(getItems() + System.lineSeparator()); - } - sb.append("Results:" + System.lineSeparator()); - if (Strings.isNotBlank(getErrorMessage())) { - sb.append(" Issue(s) detected" + System.lineSeparator()); - sb.append("Issues:" + System.lineSeparator()); - sb.append(" " + getErrorMessage() + System.lineSeparator()); - } else { - sb.append(" " + getExitCode() + " issue(s) detected" + System.lineSeparator()); - } - return sb.toString(); - } } diff --git a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/MigrateResult.java b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/MigrateResult.java index 385697dfb..51b2ba0b2 100644 --- a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/MigrateResult.java +++ b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/MigrateResult.java @@ -1,7 +1,5 @@ package org.opensearch.migrations.commands; -import org.apache.logging.log4j.util.Strings; - import org.opensearch.migrations.cli.Clusters; import org.opensearch.migrations.cli.Items; @@ -15,23 +13,4 @@ public class MigrateResult implements MigrationItemResult { private final Items items; private final String errorMessage; private final int exitCode; - - public String toString() { - var sb = new StringBuilder(); - if (getClusters() != null) { - sb.append(getClusters() + System.lineSeparator()); - } - if (getItems() != null) { - sb.append(getItems() + System.lineSeparator()); - } - sb.append("Results:" + System.lineSeparator()); - if (Strings.isNotBlank(getErrorMessage())) { - sb.append(" Issue(s) detected" + System.lineSeparator()); - sb.append("Issues:" + System.lineSeparator()); - sb.append(" " + getErrorMessage() + System.lineSeparator()); - } else { - sb.append(" " + getExitCode() + " issue(s) detected" + System.lineSeparator()); - } - return sb.toString(); - } } diff --git a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/MigrationItemResult.java b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/MigrationItemResult.java index d8c3d342c..67396f11a 100644 --- a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/MigrationItemResult.java +++ b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/MigrationItemResult.java @@ -1,10 +1,32 @@ package org.opensearch.migrations.commands; +import org.apache.logging.log4j.util.Strings; + import org.opensearch.migrations.cli.Clusters; +import org.opensearch.migrations.cli.Format; import org.opensearch.migrations.cli.Items; /** All shared cli result information */ public interface MigrationItemResult extends Result { Clusters getClusters(); Items getItems(); + + default String asCliOutput() { + var sb = new StringBuilder(); + if (getClusters() != null) { + sb.append(getClusters().asCliOutput() + System.lineSeparator()); + } + if (getItems() != null) { + sb.append(getItems().asCliOutput() + System.lineSeparator()); + } + sb.append("Results:" + System.lineSeparator()); + if (Strings.isNotBlank(getErrorMessage())) { + sb.append(Format.indentToLevel(1) + "Issue(s) detected" + System.lineSeparator()); + sb.append("Issues:" + System.lineSeparator()); + sb.append(Format.indentToLevel(1) + getErrorMessage() + System.lineSeparator()); + } else { + sb.append(Format.indentToLevel(1) + getExitCode() + " issue(s) detected" + System.lineSeparator()); + } + return sb.toString(); + } } diff --git a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/Result.java b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/Result.java index 552d706fb..95cc78016 100644 --- a/MetadataMigration/src/main/java/org/opensearch/migrations/commands/Result.java +++ b/MetadataMigration/src/main/java/org/opensearch/migrations/commands/Result.java @@ -3,4 +3,7 @@ /** All shared cli result information */ public interface Result { int getExitCode(); + String getErrorMessage(); + /** Render this result as a string for displaying on the command line */ + String asCliOutput(); } diff --git a/MetadataMigration/src/test/java/org/opensearch/migrations/MetadataMigrationTest.java b/MetadataMigration/src/test/java/org/opensearch/migrations/MetadataMigrationTest.java new file mode 100644 index 000000000..37b59b551 --- /dev/null +++ b/MetadataMigration/src/test/java/org/opensearch/migrations/MetadataMigrationTest.java @@ -0,0 +1,53 @@ +package org.opensearch.migrations; + +import java.util.List; + +import org.junit.jupiter.api.Test; + +import org.opensearch.migrations.testutils.CloseableLogSetup; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.hasSize; + +public class MetadataMigrationTest { + + @Test + void testMain_expectTopLevelHelp() throws Exception { + var testCases = List.of( + new String[]{}, + new String[]{"-h"}, + new String[]{"--help"} + ); + for (var testCase : testCases) { + try (var closeableLogSetup = new CloseableLogSetup(MetadataMigration.class.getName())) { + MetadataMigration.main(testCase); + + var logEvents = closeableLogSetup.getLogEvents(); + + assertThat(logEvents, hasSize(2)); + assertThat(logEvents.get(0), containsString("Command line arguments")); + assertThat(logEvents.get(1), containsString("Usage: [options] [command] [commandOptions]")); + } + } + } + + @Test + void testMain_expectCommandHelp() throws Exception { + var testCases = List.of( + new String[]{"evaluate", "-h"}, + new String[]{"migrate", "--help"} + ); + for (var testCase : testCases) { + try (var closeableLogSetup = new CloseableLogSetup(MetadataMigration.class.getName())) { + MetadataMigration.main(testCase); + + var logEvents = closeableLogSetup.getLogEvents(); + + assertThat(logEvents, hasSize(2)); + assertThat(logEvents.get(0), containsString("Command line arguments")); + assertThat(logEvents.get(1), containsString("Usage: " + testCase[0] + " [options]")); + } + } + } +} diff --git a/MetadataMigration/src/test/java/org/opensearch/migrations/cli/ClusterReaderExtractorTest.java b/MetadataMigration/src/test/java/org/opensearch/migrations/cli/ClusterReaderExtractorTest.java new file mode 100644 index 000000000..dd6325aa5 --- /dev/null +++ b/MetadataMigration/src/test/java/org/opensearch/migrations/cli/ClusterReaderExtractorTest.java @@ -0,0 +1,105 @@ +package org.opensearch.migrations.cli; + +import org.junit.jupiter.api.Test; + +import org.opensearch.migrations.MigrateOrEvaluateArgs; +import org.opensearch.migrations.Version; +import org.opensearch.migrations.cluster.ClusterReader; + +import com.beust.jcommander.ParameterException; +import com.rfs.common.FileSystemRepo; +import com.rfs.common.S3Repo; +import com.rfs.common.http.ConnectionContext; +import org.mockito.ArgumentCaptor; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; + + +public class ClusterReaderExtractorTest { + @Test + void testExtractClusterReader_noSnapshotOrRemote() { + var args = new MigrateOrEvaluateArgs(); + var extractor = new ClusterReaderExtractor(args); + + var exception = assertThrows(ParameterException.class, () -> extractor.extractClusterReader()); + assertThat(args.toString(), exception.getMessage(), equalTo("No details on the source cluster found, please supply a connection details or a snapshot")); + } + + @Test + void testExtractClusterReader_invalidS3Snapshot_missingRegion() { + var args = new MigrateOrEvaluateArgs(); + args.s3RepoUri = "foo.bar"; + args.s3LocalDirPath = "fizz.buzz"; + var extractor = new ClusterReaderExtractor(args); + + var exception = assertThrows(ParameterException.class, () -> extractor.extractClusterReader()); + assertThat(exception.getMessage(), equalTo("If an s3 repo is being used, s3-region and s3-local-dir-path must be set")); + } + + @Test + void testExtractClusterReader_invalidS3Snapshot_missingLocalDirPath() { + var args = new MigrateOrEvaluateArgs(); + args.s3RepoUri = "foo.bar"; + args.s3Region = "us-west-1"; + var extractor = new ClusterReaderExtractor(args); + + var exception = assertThrows(ParameterException.class, () -> extractor.extractClusterReader()); + assertThat(exception.getMessage(), equalTo("If an s3 repo is being used, s3-region and s3-local-dir-path must be set")); + } + + @Test + void testExtractClusterReader_validLocalSnapshot() { + var args = new MigrateOrEvaluateArgs(); + args.fileSystemRepoPath = "foo.bar"; + args.sourceVersion = Version.fromString("OS 1.1.1"); + var extractor = spy(new ClusterReaderExtractor(args)); + var mockReader = mock(ClusterReader.class); + doReturn(mockReader).when(extractor).getSnapshotReader(eq(args.sourceVersion), any(FileSystemRepo.class)); + + var result = extractor.extractClusterReader(); + assertThat(result, equalTo(mockReader)); + + verify(extractor).getSnapshotReader(eq(args.sourceVersion), any(FileSystemRepo.class)); + } + + @Test + void testExtractClusterReader_validS3Snapshot() { + var args = new MigrateOrEvaluateArgs(); + args.s3RepoUri = "foo.bar"; + args.s3Region = "us-west-1"; + args.s3LocalDirPath = "fizz.buzz"; + args.sourceVersion = Version.fromString("OS 9.9.9"); + var extractor = spy(new ClusterReaderExtractor(args)); + var mockReader = mock(ClusterReader.class); + doReturn(mockReader).when(extractor).getSnapshotReader(eq(args.sourceVersion), any(S3Repo.class)); + + var result = extractor.extractClusterReader(); + assertThat(result, equalTo(mockReader)); + + verify(extractor).getSnapshotReader(eq(args.sourceVersion), any(S3Repo.class)); + } + + @Test + void testExtractClusterReader_validRemote() { + var args = new MigrateOrEvaluateArgs(); + args.sourceArgs.host = "http://foo.bar"; + var extractor = spy(new ClusterReaderExtractor(args)); + var mockReader = mock(ClusterReader.class); + doReturn(mockReader).when(extractor).getRemoteReader(any()); + + var result = extractor.extractClusterReader(); + assertThat(result, equalTo(mockReader)); + + var foundContext = ArgumentCaptor.forClass(ConnectionContext.class); + verify(extractor).getRemoteReader(foundContext.capture()); + assertThat(args.sourceArgs.toConnectionContext(), equalTo(foundContext.getValue())); + } +} diff --git a/MetadataMigration/src/test/java/org/opensearch/migrations/cli/ClustersTest.java b/MetadataMigration/src/test/java/org/opensearch/migrations/cli/ClustersTest.java new file mode 100644 index 000000000..6a037bf48 --- /dev/null +++ b/MetadataMigration/src/test/java/org/opensearch/migrations/cli/ClustersTest.java @@ -0,0 +1,55 @@ +package org.opensearch.migrations.cli; + +import org.junit.jupiter.api.Test; + +import org.opensearch.migrations.cluster.ClusterReader; +import org.opensearch.migrations.cluster.ClusterWriter; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.not; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.opensearch.migrations.matchers.HasLineCount.hasLineCount; +import static org.mockito.Mockito.mock; + +public class ClustersTest { + @Test + void testAsString_empty() { + var clusters = Clusters.builder().build(); + + var result = clusters.asCliOutput(); + + assertThat(result, containsString("Clusters:")); + assertThat(result, not(containsString("Source:"))); + assertThat(result, not(containsString("Target:"))); + assertThat(result, hasLineCount(1)); + } + + @Test + void testAsString_withSource() { + var clusters = Clusters.builder() + .source(mock(ClusterReader.class)) + .build(); + + var result = clusters.asCliOutput(); + + assertThat(result, containsString("Clusters:")); + assertThat(result, containsString("Source:")); + assertThat(result, not(containsString("Target:"))); + assertThat(result, hasLineCount(3)); + } + + @Test + void testAsString_withSourceAndTarget() { + var clusters = Clusters.builder() + .source(mock(ClusterReader.class)) + .target(mock(ClusterWriter.class)) + .build(); + + var result = clusters.asCliOutput(); + + assertThat(result, containsString("Clusters:")); + assertThat(result, containsString("Source:")); + assertThat(result, containsString("Target:")); + assertThat(result, hasLineCount(6)); + } +} diff --git a/MetadataMigration/src/test/java/org/opensearch/migrations/cli/ItemsTest.java b/MetadataMigration/src/test/java/org/opensearch/migrations/cli/ItemsTest.java new file mode 100644 index 000000000..6a6f33930 --- /dev/null +++ b/MetadataMigration/src/test/java/org/opensearch/migrations/cli/ItemsTest.java @@ -0,0 +1,77 @@ +package org.opensearch.migrations.cli; + +import java.util.List; + +import org.junit.jupiter.api.Test; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.opensearch.migrations.matchers.ContainsStringCount.containsStringCount; +import static org.opensearch.migrations.matchers.HasLineCount.hasLineCount; + +public class ItemsTest { + @Test + void testAsString_empty() { + var items = Items.builder() + .indexTemplates(List.of()) + .componentTemplates(List.of()) + .indexes(List.of()) + .aliases(List.of()) + .build(); + + var result = items.asCliOutput(); + + assertThat(result, containsString("Migrated Items:")); + assertThat(result, containsString("Index Templates:")); + assertThat(result, containsString("Component Templates:")); + assertThat(result, containsString("Indexes:")); + assertThat(result, containsString("Aliases:")); + assertThat(result, containsStringCount(Items.NONE_FOUND_MARKER, 4)); + assertThat(result, hasLineCount(12)); + } + + @Test + void testAsString_full() { + var items = Items.builder() + .indexTemplates(List.of("it1", "it2")) + .componentTemplates(List.of("ct1", "ct2")) + .indexes(List.of("i1", "i2")) + .aliases(List.of("a1", "a2")) + .build(); + + var result = items.asCliOutput(); + + assertThat(result, containsString("Migrated Items:")); + assertThat(result, containsString("Index Templates:")); + assertThat(result, containsString("it1, it2")); + assertThat(result, containsString("Component Templates:")); + assertThat(result, containsString("ct1, ct2")); + assertThat(result, containsString("Indexes:")); + assertThat(result, containsString("i1, i2")); + assertThat(result, containsString("Aliases:")); + assertThat(result, containsString("a1, a2")); + assertThat(result, containsStringCount(Items.NONE_FOUND_MARKER, 0)); + assertThat(result, hasLineCount(12)); + } + + @Test + void testAsString_itemOrdering() { + var items = Items.builder() + .indexTemplates(List.of()) + .componentTemplates(List.of()) + .indexes(List.of("i1", "i2", "i5", "i3", "i4")) + .aliases(List.of()) + .build(); + + var result = items.asCliOutput(); + + assertThat(result, containsString("Migrated Items:")); + assertThat(result, containsString("Index Templates:")); + assertThat(result, containsString("i1, i2, i3, i4, i5")); + assertThat(result, containsString("Component Templates:")); + assertThat(result, containsString("Indexes:")); + assertThat(result, containsString("Aliases:")); + assertThat(result, containsStringCount(Items.NONE_FOUND_MARKER, 3)); + assertThat(result, hasLineCount(12)); + } +} diff --git a/MetadataMigration/src/test/java/org/opensearch/migrations/commands/MigrationItemResultTest.java b/MetadataMigration/src/test/java/org/opensearch/migrations/commands/MigrationItemResultTest.java new file mode 100644 index 000000000..f5e745e42 --- /dev/null +++ b/MetadataMigration/src/test/java/org/opensearch/migrations/commands/MigrationItemResultTest.java @@ -0,0 +1,75 @@ +package org.opensearch.migrations.commands; + +import org.junit.jupiter.api.Test; + +import org.opensearch.migrations.cli.Clusters; +import org.opensearch.migrations.cli.Items; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; + +public class MigrationItemResultTest { + @Test + void testAsString_fullResults_withMessage() { + var clusters = mock(Clusters.class); + var items = mock(Items.class); + var testObject = EvaluateResult.builder() + .clusters(clusters) + .items(items) + .exitCode(10) + .errorMessage("Full results") + .build(); + + var result = testObject.asCliOutput(); + assertThat(result, containsString("Issue(s) detected")); + assertThat(result, containsString("Issues:")); + + verify(clusters).asCliOutput(); + verify(items).asCliOutput(); + verifyNoMoreInteractions(items, clusters); + } + + @Test + void testAsString_fullResults_withNoMessage() { + var clusters = mock(Clusters.class); + var items = mock(Items.class); + var testObject = EvaluateResult.builder() + .clusters(clusters) + .items(items) + .exitCode(10) + .build(); + + var result = testObject.asCliOutput(); + assertThat(result, containsString("10 issue(s) detected")); + verify(clusters).asCliOutput(); + verify(items).asCliOutput(); + verifyNoMoreInteractions(items, clusters); + } + + @Test + void testAsString_noItems() { + var clusters = mock(Clusters.class); + var testObject = EvaluateResult.builder() + .clusters(clusters) + .exitCode(0) + .build(); + + var result = testObject.asCliOutput(); + assertThat(result, containsString("0 issue(s) detected")); + verify(clusters).asCliOutput(); + verifyNoMoreInteractions(clusters); + } + + @Test + void testAsString_nothing() { + var testObject = EvaluateResult.builder() + .exitCode(0) + .build(); + + var result = testObject.asCliOutput(); + assertThat(result, containsString("0 issue(s) detected")); + } +} diff --git a/RFS/src/main/java/com/rfs/common/OpenSearchClient.java b/RFS/src/main/java/com/rfs/common/OpenSearchClient.java index a091d8fb5..f63d11da4 100644 --- a/RFS/src/main/java/com/rfs/common/OpenSearchClient.java +++ b/RFS/src/main/java/com/rfs/common/OpenSearchClient.java @@ -95,12 +95,12 @@ private Version versionFromResponse(HttpResponse resp) { .major(Integer.parseInt(parts[0])) .minor(Integer.parseInt(parts[1])) .patch(parts.length > 2 ? Integer.parseInt(parts[2]) : 0); - + var distroNode = versionNode.get("distribution"); if (distroNode != null && distroNode.asText().equalsIgnoreCase("opensearch")) { - versionBuilder.flavor(Flavor.OpenSearch); - } else { - versionBuilder.flavor(Flavor.Elasticsearch); + versionBuilder.flavor(Flavor.OPENSEARCH); + } else { + versionBuilder.flavor(Flavor.ELASTICSEARCH); } return versionBuilder.build(); } catch (Exception e) { diff --git a/RFS/src/main/java/com/rfs/common/http/ConnectionContext.java b/RFS/src/main/java/com/rfs/common/http/ConnectionContext.java index f28fb1cf2..05eafe29a 100644 --- a/RFS/src/main/java/com/rfs/common/http/ConnectionContext.java +++ b/RFS/src/main/java/com/rfs/common/http/ConnectionContext.java @@ -6,6 +6,7 @@ import com.beust.jcommander.Parameter; import com.beust.jcommander.ParametersDelegate; +import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; @@ -14,6 +15,7 @@ * Stores the connection context for an Elasticsearch/OpenSearch cluster */ @Getter +@EqualsAndHashCode(exclude={"requestTransformer"}) @ToString(exclude={"requestTransformer"}) public class ConnectionContext { public enum Protocol { @@ -140,7 +142,7 @@ public static class TargetAdvancedArgs { public boolean compressionEnabled = false; } - @Getter + @Getter public static class SourceArgs implements IParams { @Parameter(names = { "--source-host" }, description = "The source host and port (e.g. http://localhost:9200)", required = false) diff --git a/RFS/src/main/java/com/rfs/transformers/Transformer_ES_6_8_to_OS_2_11.java b/RFS/src/main/java/com/rfs/transformers/Transformer_ES_6_8_to_OS_2_11.java index 9c0af53bf..6810592e5 100644 --- a/RFS/src/main/java/com/rfs/transformers/Transformer_ES_6_8_to_OS_2_11.java +++ b/RFS/src/main/java/com/rfs/transformers/Transformer_ES_6_8_to_OS_2_11.java @@ -41,7 +41,7 @@ public GlobalMetadata transformGlobalMetadata(GlobalMetadata globalData) { var templateCopy = (ObjectNode) template.getValue().deepCopy(); var indexTemplate = (Index) () -> templateCopy; transformIndex(indexTemplate, IndexType.Template); - templates.set(template.getKey(), indexTemplate.rawJson()); + templates.set(template.getKey(), indexTemplate.getRawJson()); }); newRoot.set("templates", templates); } @@ -65,12 +65,12 @@ public GlobalMetadata transformGlobalMetadata(GlobalMetadata globalData) { public IndexMetadata transformIndexMetadata(IndexMetadata index) { var copy = index.deepCopy(); transformIndex(copy, IndexType.Concrete); - return new IndexMetadataData_OS_2_11(copy.rawJson(), copy.getId(), copy.getName()); + return new IndexMetadataData_OS_2_11(copy.getRawJson(), copy.getId(), copy.getName()); } private void transformIndex(Index index, IndexType type) { - logger.debug("Original Object: " + index.rawJson().toString()); - var newRoot = index.rawJson(); + logger.debug("Original Object: " + index.getRawJson().toString()); + var newRoot = index.getRawJson(); switch (type) { case Concrete: diff --git a/RFS/src/main/java/com/rfs/transformers/Transformer_ES_7_10_OS_2_11.java b/RFS/src/main/java/com/rfs/transformers/Transformer_ES_7_10_OS_2_11.java index 2809c363b..31e268c70 100644 --- a/RFS/src/main/java/com/rfs/transformers/Transformer_ES_7_10_OS_2_11.java +++ b/RFS/src/main/java/com/rfs/transformers/Transformer_ES_7_10_OS_2_11.java @@ -86,9 +86,9 @@ public GlobalMetadata transformGlobalMetadata(GlobalMetadata metaData) { @Override public IndexMetadata transformIndexMetadata(IndexMetadata indexData) { - logger.debug("Original Object: " + indexData.rawJson().toString()); + logger.debug("Original Object: " + indexData.getRawJson().toString()); var copy = indexData.deepCopy(); - var newRoot = copy.rawJson(); + var newRoot = copy.getRawJson(); TransformFunctions.removeIntermediateMappingsLevels(newRoot); diff --git a/RFS/src/main/java/com/rfs/version_es_6_8/ElasticsearchConstants_ES_6_8.java b/RFS/src/main/java/com/rfs/version_es_6_8/ElasticsearchConstants_ES_6_8.java index a74e69f22..98856c6c5 100644 --- a/RFS/src/main/java/com/rfs/version_es_6_8/ElasticsearchConstants_ES_6_8.java +++ b/RFS/src/main/java/com/rfs/version_es_6_8/ElasticsearchConstants_ES_6_8.java @@ -1,11 +1,15 @@ package com.rfs.version_es_6_8; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.dataformat.smile.SmileFactory; import com.fasterxml.jackson.dataformat.smile.SmileGenerator; + public class ElasticsearchConstants_ES_6_8 { + private ElasticsearchConstants_ES_6_8() {} + public static final int BUFFER_SIZE_IN_BYTES; public static final SmileFactory SMILE_FACTORY; public static final String SOFT_DELETES_FIELD; @@ -18,12 +22,13 @@ public class ElasticsearchConstants_ES_6_8 { // Taken from: // https://github.com/elastic/elasticsearch/blob/6.8/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContent.java#L55 - SmileFactory smileFactory = new SmileFactory(); - smileFactory.configure(SmileGenerator.Feature.ENCODE_BINARY_AS_7BIT, false); - smileFactory.configure(SmileFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false); - smileFactory.configure(JsonGenerator.Feature.AUTO_CLOSE_JSON_CONTENT, false); - smileFactory.configure(JsonParser.Feature.STRICT_DUPLICATE_DETECTION, false); - SMILE_FACTORY = smileFactory; + SMILE_FACTORY = SmileFactory.builder() + .configure(SmileGenerator.Feature.ENCODE_BINARY_AS_7BIT, false) + .configure(JsonFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false) + .build(); + + SMILE_FACTORY.disable(JsonGenerator.Feature.AUTO_CLOSE_JSON_CONTENT); + SMILE_FACTORY.disable(JsonParser.Feature.STRICT_DUPLICATE_DETECTION); // Soft Deletes were added in 7.0 SOFT_DELETES_FIELD = ""; diff --git a/RFS/src/main/java/com/rfs/version_es_6_8/IndexMetadataData_ES_6_8.java b/RFS/src/main/java/com/rfs/version_es_6_8/IndexMetadataData_ES_6_8.java index 5e3264283..45ee64f82 100644 --- a/RFS/src/main/java/com/rfs/version_es_6_8/IndexMetadataData_ES_6_8.java +++ b/RFS/src/main/java/com/rfs/version_es_6_8/IndexMetadataData_ES_6_8.java @@ -5,30 +5,29 @@ import com.rfs.models.IndexMetadata; import com.rfs.transformers.TransformFunctions; +import lombok.Getter; public class IndexMetadataData_ES_6_8 implements IndexMetadata { - private ObjectNode root; + @Getter + private final ObjectNode rawJson; private ObjectNode mappings; private ObjectNode settings; - private String indexId; - private String indexName; + @Getter + private final String id; + @Getter + private final String name; public IndexMetadataData_ES_6_8(ObjectNode root, String indexId, String indexName) { - this.root = root; + this.rawJson = root; this.mappings = null; this.settings = null; - this.indexId = indexId; - this.indexName = indexName; + this.id = indexId; + this.name = indexName; } @Override public ObjectNode getAliases() { - return (ObjectNode) root.get("aliases"); - } - - @Override - public String getId() { - return indexId; + return (ObjectNode) rawJson.get("aliases"); } @Override @@ -37,17 +36,12 @@ public JsonNode getMappings() { return mappings; } - ObjectNode mappingsNode = (ObjectNode) root.get("mappings"); + ObjectNode mappingsNode = (ObjectNode) rawJson.get("mappings"); mappings = mappingsNode; return mappings; } - @Override - public String getName() { - return indexName; - } - @Override public int getNumberOfShards() { return this.getSettings().get("index").get("number_of_shards").asInt(); @@ -59,20 +53,15 @@ public ObjectNode getSettings() { return settings; } - ObjectNode treeSettings = TransformFunctions.convertFlatSettingsToTree((ObjectNode) root.get("settings")); + ObjectNode treeSettings = TransformFunctions.convertFlatSettingsToTree((ObjectNode) rawJson.get("settings")); settings = treeSettings; return settings; } - @Override - public ObjectNode rawJson() { - return root; - } - @Override public IndexMetadata deepCopy() { - return new IndexMetadataData_ES_6_8(root.deepCopy(), indexId, indexName); + return new IndexMetadataData_ES_6_8(rawJson.deepCopy(), id, name); } } diff --git a/RFS/src/main/java/com/rfs/version_es_6_8/ShardMetadataData_ES_6_8.java b/RFS/src/main/java/com/rfs/version_es_6_8/ShardMetadataData_ES_6_8.java index caa97408a..0a5fbbd9a 100644 --- a/RFS/src/main/java/com/rfs/version_es_6_8/ShardMetadataData_ES_6_8.java +++ b/RFS/src/main/java/com/rfs/version_es_6_8/ShardMetadataData_ES_6_8.java @@ -1,13 +1,12 @@ package com.rfs.version_es_6_8; import java.io.IOException; -import java.util.ArrayList; +import java.util.Collections; import java.util.List; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonDeserializer; import com.fasterxml.jackson.databind.JsonNode; @@ -16,20 +15,22 @@ import com.rfs.models.ShardFileInfo; import com.rfs.models.ShardMetadata; +import lombok.Getter; +@Getter public class ShardMetadataData_ES_6_8 implements ShardMetadata { private static final ObjectMapper objectMapper = new ObjectMapper(); - private String snapshotName; - private String indexName; - private String indexId; - private int shardId; - private int indexVersion; - private long startTime; - private long time; - private int numberOfFiles; - private long totalSize; - private List files; + private final String snapshotName; + private final String indexName; + private final String indexId; + private final int shardId; + private final int indexVersion; + private final long startTime; + private final long time; + private final int numberOfFiles; + private final long totalSizeBytes; + private final List files; public ShardMetadataData_ES_6_8( String snapshotName, @@ -51,65 +52,14 @@ public ShardMetadataData_ES_6_8( this.startTime = startTime; this.time = time; this.numberOfFiles = numberOfFiles; - this.totalSize = totalSize; + this.totalSizeBytes = totalSize; // Convert the raw file metadata to the FileMetadata class List convertedFiles = new java.util.ArrayList<>(); for (FileInfoRaw fileMetadataRaw : files) { convertedFiles.add(FileInfo.fromFileMetadataRaw(fileMetadataRaw)); } - this.files = convertedFiles; - } - - @Override - public String getSnapshotName() { - return snapshotName; - } - - @Override - public String getIndexName() { - return indexName; - } - - @Override - public String getIndexId() { - return indexId; - } - - @Override - public int getShardId() { - return shardId; - } - - @Override - public int getIndexVersion() { - return indexVersion; - } - - @Override - public long getStartTime() { - return startTime; - } - - @Override - public long getTime() { - return time; - } - - @Override - public int getNumberOfFiles() { - return numberOfFiles; - } - - @Override - public long getTotalSizeBytes() { - return totalSize; - } - - @Override - public List getFiles() { - List convertedFiles = new ArrayList<>(files); - return convertedFiles; + this.files = Collections.unmodifiableList(convertedFiles); } @Override @@ -150,15 +100,16 @@ public DataRaw( } } + @Getter public static class FileInfo implements ShardFileInfo { - private String name; - private String physicalName; - private long length; - private String checksum; - private long partSize; - private long numberOfParts; - private String writtenBy; - private BytesRef metaHash; + private final String name; + private final String physicalName; + private final long length; + private final String checksum; + private final long partSize; + private final long numberOfParts; + private final String writtenBy; + private final BytesRef metaHash; public static FileInfo fromFileMetadataRaw(FileInfoRaw fileMetadataRaw) { return new FileInfo( @@ -189,7 +140,9 @@ public FileInfo( this.writtenBy = writtenBy; this.metaHash = metaHash; - // Calculate the number of parts the file is chopped into; taken from Elasticsearch code + // Calculate the number of parts the file is chopped into; taken from Elasticsearch code. When Elasticsearch makes + // a snapshot and finds Lucene files over a specified size, it will split those files into multiple parts based on the + // maximum part size. // See: // https://github.com/elastic/elasticsearch/blob/6.8/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java#L68 long partBytes = Long.MAX_VALUE; @@ -198,54 +151,14 @@ public FileInfo( } long totalLength = length; - long numberOfParts = totalLength / partBytes; + long numberOfPartsTemp = totalLength / partBytes; if (totalLength % partBytes > 0) { - numberOfParts++; + numberOfPartsTemp++; } - if (numberOfParts == 0) { - numberOfParts++; + if (numberOfPartsTemp == 0) { + numberOfPartsTemp++; } - this.numberOfParts = numberOfParts; - } - - @Override - public String getName() { - return name; - } - - @Override - public String getPhysicalName() { - return physicalName; - } - - @Override - public long getLength() { - return length; - } - - @Override - public String getChecksum() { - return checksum; - } - - @Override - public long getPartSize() { - return partSize; - } - - @Override - public String getWrittenBy() { - return writtenBy; - } - - @Override - public BytesRef getMetaHash() { - return metaHash; - } - - @Override - public long getNumberOfParts() { - return numberOfParts; + this.numberOfParts = numberOfPartsTemp; } // The Snapshot file may be split into multiple blobs; use this to find the correct file name @@ -298,8 +211,7 @@ public FileInfoRaw( public static class FileInfoRawDeserializer extends JsonDeserializer { @Override - public FileInfoRaw deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, - JsonProcessingException { + public FileInfoRaw deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { JsonNode rootNode = jp.getCodec().readTree(jp); diff --git a/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotMetadataData_ES_6_8.java b/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotMetadataData_ES_6_8.java index ed78bb576..14f4b7b5c 100644 --- a/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotMetadataData_ES_6_8.java +++ b/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotMetadataData_ES_6_8.java @@ -5,9 +5,10 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.rfs.models.SnapshotMetadata; +import lombok.Getter; +@Getter public class SnapshotMetadataData_ES_6_8 implements SnapshotMetadata { - private String name; private String uuid; @JsonProperty("version_id") @@ -26,64 +27,4 @@ public class SnapshotMetadataData_ES_6_8 implements SnapshotMetadata { @JsonProperty("successful_shards") private int successfulShards; private List failures; // Haven't looked at this yet - - @Override - public String getName() { - return name; - } - - @Override - public String getUuid() { - return uuid; - } - - @Override - public int getVersionId() { - return versionId; - } - - @Override - public List getIndices() { - return indices; - } - - @Override - public String getState() { - return state; - } - - @Override - public String getReason() { - return reason; - } - - @Override - public boolean isIncludeGlobalState() { - return includeGlobalState; - } - - @Override - public long getStartTime() { - return startTime; - } - - @Override - public long getEndTime() { - return endTime; - } - - @Override - public int getTotalShards() { - return totalShards; - } - - @Override - public int getSuccessfulShards() { - return successfulShards; - } - - @Override - public List getFailures() { - return failures; - } } diff --git a/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotMetadataFactory_ES_6_8.java b/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotMetadataFactory_ES_6_8.java index 34c8106d1..dc328faf7 100644 --- a/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotMetadataFactory_ES_6_8.java +++ b/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotMetadataFactory_ES_6_8.java @@ -18,11 +18,10 @@ public class SnapshotMetadataFactory_ES_6_8 implements SnapshotMetadata.Factory public SnapshotMetadata fromJsonNode(JsonNode root) throws Exception { ObjectMapper mapper = new ObjectMapper(); ObjectNode objectNodeRoot = (ObjectNode) root; - SnapshotMetadataData_ES_6_8 snapshotMetadata = mapper.treeToValue( + return mapper.treeToValue( objectNodeRoot.get("snapshot"), SnapshotMetadataData_ES_6_8.class ); - return snapshotMetadata; } @Override diff --git a/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotRepoData_ES_6_8.java b/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotRepoData_ES_6_8.java index 74c453c80..8a34e929b 100644 --- a/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotRepoData_ES_6_8.java +++ b/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotRepoData_ES_6_8.java @@ -11,6 +11,10 @@ import com.rfs.common.SnapshotRepo; import com.rfs.common.SnapshotRepo.CantParseRepoFile; import com.rfs.common.SourceRepo; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.RequiredArgsConstructor; public class SnapshotRepoData_ES_6_8 { @@ -36,19 +40,20 @@ public static SnapshotRepoData_ES_6_8 fromRepo(SourceRepo repo) { return fromRepoFile(file); } - public Path filePath; - public List snapshots; - public Map indices; + @Getter + private Path filePath; + @Getter + private List snapshots; + @Getter + private Map indices; + @Getter + @AllArgsConstructor + @NoArgsConstructor public static class Snapshot implements SnapshotRepo.Snapshot { - public String name; - public String uuid; - public int state; - - @Override - public String getName() { - return name; - } + private String name; + private String uuid; + private int state; @Override public String getId() { @@ -56,37 +61,23 @@ public String getId() { } } + @Getter + @AllArgsConstructor + @NoArgsConstructor public static class RawIndex { - public String id; - public List snapshots; + private String id; + private List snapshots; } + @Getter + @RequiredArgsConstructor public static class Index implements SnapshotRepo.Index { public static Index fromRawIndex(String name, RawIndex rawIndex) { - Index index = new Index(); - index.name = name; - index.id = rawIndex.id; - index.snapshots = rawIndex.snapshots; - return index; - } - - public String name; - public String id; - public List snapshots; - - @Override - public String getName() { - return name; + return new Index(name, rawIndex.id, rawIndex.snapshots); } - @Override - public String getId() { - return id; - } - - @Override - public List getSnapshots() { - return snapshots; - } + private final String name; + private final String id; + private final List snapshots; } } diff --git a/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotRepoProvider_ES_6_8.java b/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotRepoProvider_ES_6_8.java index 7576b9757..44cef6202 100644 --- a/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotRepoProvider_ES_6_8.java +++ b/RFS/src/main/java/com/rfs/version_es_6_8/SnapshotRepoProvider_ES_6_8.java @@ -23,7 +23,7 @@ protected SnapshotRepoData_ES_6_8 getRepoData() { } public List getIndices() { - return getRepoData().indices.entrySet() + return getRepoData().getIndices().entrySet() .stream() .map(entry -> SnapshotRepoData_ES_6_8.Index.fromRawIndex(entry.getKey(), entry.getValue())) .collect(Collectors.toList()); @@ -32,14 +32,14 @@ public List getIndices() { @Override public List getIndicesInSnapshot(String snapshotName) { List matchedIndices = new ArrayList<>(); - SnapshotRepoData_ES_6_8.Snapshot targetSnapshot = getRepoData().snapshots.stream() - .filter(snapshot -> snapshotName.equals(snapshot.name)) + SnapshotRepoData_ES_6_8.Snapshot targetSnapshot = getRepoData().getSnapshots().stream() + .filter(snapshot -> snapshotName.equals(snapshot.getName())) .findFirst() .orElse(null); if (targetSnapshot != null) { - getRepoData().indices.forEach((indexName, rawIndex) -> { - if (rawIndex.snapshots.contains(targetSnapshot.uuid)) { + getRepoData().getIndices().forEach((indexName, rawIndex) -> { + if (rawIndex.getSnapshots().contains(targetSnapshot.getId())) { matchedIndices.add(SnapshotRepoData_ES_6_8.Index.fromRawIndex(indexName, rawIndex)); } }); @@ -49,15 +49,14 @@ public List getIndicesInSnapshot(String snapshotName) { @Override public List getSnapshots() { - List convertedList = new ArrayList<>(getRepoData().snapshots); - return convertedList; + return new ArrayList<>(getRepoData().getSnapshots()); } @Override public String getSnapshotId(String snapshotName) { - for (SnapshotRepoData_ES_6_8.Snapshot snapshot : getRepoData().snapshots) { - if (snapshot.name.equals(snapshotName)) { - return snapshot.uuid; + for (SnapshotRepoData_ES_6_8.Snapshot snapshot : getRepoData().getSnapshots()) { + if (snapshot.getName().equals(snapshotName)) { + return snapshot.getId(); } } return null; @@ -65,7 +64,7 @@ public String getSnapshotId(String snapshotName) { @Override public String getIndexId(String indexName) { - return getRepoData().indices.get(indexName).id; + return getRepoData().getIndices().get(indexName).getId(); } @Override diff --git a/RFS/src/main/java/com/rfs/version_es_7_10/ElasticsearchConstants_ES_7_10.java b/RFS/src/main/java/com/rfs/version_es_7_10/ElasticsearchConstants_ES_7_10.java index e26b90244..3d95fb245 100644 --- a/RFS/src/main/java/com/rfs/version_es_7_10/ElasticsearchConstants_ES_7_10.java +++ b/RFS/src/main/java/com/rfs/version_es_7_10/ElasticsearchConstants_ES_7_10.java @@ -1,11 +1,15 @@ package com.rfs.version_es_7_10; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.dataformat.smile.SmileFactory; import com.fasterxml.jackson.dataformat.smile.SmileGenerator; + public class ElasticsearchConstants_ES_7_10 { + private ElasticsearchConstants_ES_7_10() {} + public static final int BUFFER_SIZE_IN_BYTES; public static final SmileFactory SMILE_FACTORY; public static final String SOFT_DELETES_FIELD; @@ -17,12 +21,13 @@ public class ElasticsearchConstants_ES_7_10 { // Taken from: // https://github.com/elastic/elasticsearch/blob/7.10/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/smile/SmileXContent.java#L54 - SmileFactory smileFactory = new SmileFactory(); - smileFactory.configure(SmileGenerator.Feature.ENCODE_BINARY_AS_7BIT, false); - smileFactory.configure(SmileFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false); - smileFactory.configure(JsonGenerator.Feature.AUTO_CLOSE_JSON_CONTENT, false); - smileFactory.configure(JsonParser.Feature.STRICT_DUPLICATE_DETECTION, false); - SMILE_FACTORY = smileFactory; + SMILE_FACTORY = SmileFactory.builder() + .configure(SmileGenerator.Feature.ENCODE_BINARY_AS_7BIT, false) + .configure(JsonFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false) + .build(); + + SMILE_FACTORY.disable(JsonGenerator.Feature.AUTO_CLOSE_JSON_CONTENT); + SMILE_FACTORY.disable(JsonParser.Feature.STRICT_DUPLICATE_DETECTION); // Taken from: // https://github.com/elastic/elasticsearch/blob/v7.10.2/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java#L110 diff --git a/RFS/src/main/java/com/rfs/version_es_7_10/IndexMetadataData_ES_7_10.java b/RFS/src/main/java/com/rfs/version_es_7_10/IndexMetadataData_ES_7_10.java index 54ed58ffb..64d712628 100644 --- a/RFS/src/main/java/com/rfs/version_es_7_10/IndexMetadataData_ES_7_10.java +++ b/RFS/src/main/java/com/rfs/version_es_7_10/IndexMetadataData_ES_7_10.java @@ -4,30 +4,29 @@ import com.rfs.models.IndexMetadata; import com.rfs.transformers.TransformFunctions; +import lombok.Getter; public class IndexMetadataData_ES_7_10 implements IndexMetadata { - private ObjectNode root; + @Getter + private final ObjectNode rawJson; private ObjectNode mappings; private ObjectNode settings; - private String indexId; - private String indexName; + @Getter + private final String id; + @Getter + private final String name; public IndexMetadataData_ES_7_10(ObjectNode root, String indexId, String indexName) { - this.root = root; + this.rawJson = root; this.mappings = null; this.settings = null; - this.indexId = indexId; - this.indexName = indexName; + this.id = indexId; + this.name = indexName; } @Override public ObjectNode getAliases() { - return (ObjectNode) root.get("aliases"); - } - - @Override - public String getId() { - return indexId; + return (ObjectNode) rawJson.get("aliases"); } @Override @@ -36,17 +35,12 @@ public ObjectNode getMappings() { return mappings; } - ObjectNode mappingsNode = (ObjectNode) root.get("mappings"); + ObjectNode mappingsNode = (ObjectNode) rawJson.get("mappings"); mappings = mappingsNode; return mappings; } - @Override - public String getName() { - return indexName; - } - @Override public int getNumberOfShards() { return this.getSettings().get("index").get("number_of_shards").asInt(); @@ -58,20 +52,15 @@ public ObjectNode getSettings() { return settings; } - ObjectNode treeSettings = TransformFunctions.convertFlatSettingsToTree((ObjectNode) root.get("settings")); + ObjectNode treeSettings = TransformFunctions.convertFlatSettingsToTree((ObjectNode) rawJson.get("settings")); settings = treeSettings; return settings; } - @Override - public ObjectNode rawJson() { - return root; - } - @Override public IndexMetadata deepCopy() { - return new IndexMetadataData_ES_7_10(root.deepCopy(), indexId, indexName); + return new IndexMetadataData_ES_7_10(rawJson.deepCopy(), id, name); } } diff --git a/RFS/src/main/java/com/rfs/version_es_7_10/ShardMetadataData_ES_7_10.java b/RFS/src/main/java/com/rfs/version_es_7_10/ShardMetadataData_ES_7_10.java index 102a91c4c..31a238b28 100644 --- a/RFS/src/main/java/com/rfs/version_es_7_10/ShardMetadataData_ES_7_10.java +++ b/RFS/src/main/java/com/rfs/version_es_7_10/ShardMetadataData_ES_7_10.java @@ -1,13 +1,12 @@ package com.rfs.version_es_7_10; import java.io.IOException; -import java.util.ArrayList; +import java.util.Collections; import java.util.List; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.JsonDeserializer; @@ -17,7 +16,9 @@ import com.rfs.models.ShardFileInfo; import com.rfs.models.ShardMetadata; +import lombok.Getter; +@Getter public class ShardMetadataData_ES_7_10 implements ShardMetadata { private static final ObjectMapper objectMapper = new ObjectMapper(); @@ -25,16 +26,16 @@ public class ShardMetadataData_ES_7_10 implements ShardMetadata { objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); } - private String snapshotName; - private String indexName; - private String indexId; - private int shardId; - private int indexVersion; - private long startTime; - private long time; - private int numberOfFiles; - private long totalSize; - private List files; + private final String snapshotName; + private final String indexName; + private final String indexId; + private final int shardId; + private final int indexVersion; + private final long startTime; + private final long time; + private final int numberOfFiles; + private final long totalSizeBytes; + private final List files; public ShardMetadataData_ES_7_10( String snapshotName, @@ -56,65 +57,14 @@ public ShardMetadataData_ES_7_10( this.startTime = startTime; this.time = time; this.numberOfFiles = numberOfFiles; - this.totalSize = totalSize; + this.totalSizeBytes = totalSize; // Convert the raw file metadata to the FileMetadata class List convertedFiles = new java.util.ArrayList<>(); for (FileInfoRaw fileMetadataRaw : files) { convertedFiles.add(FileInfo.fromFileMetadataRaw(fileMetadataRaw)); } - this.files = convertedFiles; - } - - @Override - public String getSnapshotName() { - return snapshotName; - } - - @Override - public String getIndexName() { - return indexName; - } - - @Override - public String getIndexId() { - return indexId; - } - - @Override - public int getShardId() { - return shardId; - } - - @Override - public int getIndexVersion() { - return indexVersion; - } - - @Override - public long getStartTime() { - return startTime; - } - - @Override - public long getTime() { - return time; - } - - @Override - public int getNumberOfFiles() { - return numberOfFiles; - } - - @Override - public long getTotalSizeBytes() { - return totalSize; - } - - @Override - public List getFiles() { - List convertedFiles = new ArrayList<>(files); - return convertedFiles; + this.files = Collections.unmodifiableList(convertedFiles); } @Override @@ -155,15 +105,16 @@ public DataRaw( } } + @Getter public static class FileInfo implements ShardFileInfo { - private String name; - private String physicalName; - private long length; - private String checksum; - private long partSize; - private long numberOfParts; - private String writtenBy; - private BytesRef metaHash; + private final String name; + private final String physicalName; + private final long length; + private final String checksum; + private final long partSize; + private final long numberOfParts; + private final String writtenBy; + private final BytesRef metaHash; public static FileInfo fromFileMetadataRaw(FileInfoRaw fileMetadataRaw) { return new FileInfo( @@ -194,7 +145,9 @@ public FileInfo( this.writtenBy = writtenBy; this.metaHash = metaHash; - // Calculate the number of parts the file is chopped into; taken from Elasticsearch code + // Calculate the number of parts the file is chopped into; taken from Elasticsearch code. When Elasticsearch makes + // a snapshot and finds Lucene files over a specified size, it will split those files into multiple parts based on the + // maximum part size. // See: // https://github.com/elastic/elasticsearch/blob/6.8/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java#L68 long partBytes = Long.MAX_VALUE; @@ -203,54 +156,14 @@ public FileInfo( } long totalLength = length; - long numberOfParts = totalLength / partBytes; + long numberOfPartsTemp = totalLength / partBytes; if (totalLength % partBytes > 0) { - numberOfParts++; + numberOfPartsTemp++; } - if (numberOfParts == 0) { - numberOfParts++; + if (numberOfPartsTemp == 0) { + numberOfPartsTemp++; } - this.numberOfParts = numberOfParts; - } - - @Override - public String getName() { - return name; - } - - @Override - public String getPhysicalName() { - return physicalName; - } - - @Override - public long getLength() { - return length; - } - - @Override - public String getChecksum() { - return checksum; - } - - @Override - public long getPartSize() { - return partSize; - } - - @Override - public String getWrittenBy() { - return writtenBy; - } - - @Override - public BytesRef getMetaHash() { - return metaHash; - } - - @Override - public long getNumberOfParts() { - return numberOfParts; + this.numberOfParts = numberOfPartsTemp; } // The Snapshot file may be split into multiple blobs; use this to find the correct file name @@ -303,8 +216,7 @@ public FileInfoRaw( public static class FileInfoRawDeserializer extends JsonDeserializer { @Override - public FileInfoRaw deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, - JsonProcessingException { + public FileInfoRaw deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { JsonNode rootNode = jp.getCodec().readTree(jp); diff --git a/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotMetadataData_ES_7_10.java b/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotMetadataData_ES_7_10.java index 9a609ef9d..09912837d 100644 --- a/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotMetadataData_ES_7_10.java +++ b/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotMetadataData_ES_7_10.java @@ -5,9 +5,10 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.rfs.models.SnapshotMetadata; +import lombok.Getter; +@Getter public class SnapshotMetadataData_ES_7_10 implements SnapshotMetadata { - private String name; private String uuid; @JsonProperty("version_id") @@ -30,64 +31,4 @@ public class SnapshotMetadataData_ES_7_10 implements SnapshotMetadata { private List dataStreams; // Haven't looked into this yet @JsonProperty("metadata") private Object metaData; // Haven't looked into this yet - - @Override - public String getName() { - return name; - } - - @Override - public String getUuid() { - return uuid; - } - - @Override - public int getVersionId() { - return versionId; - } - - @Override - public List getIndices() { - return indices; - } - - @Override - public String getState() { - return state; - } - - @Override - public String getReason() { - return reason; - } - - @Override - public boolean isIncludeGlobalState() { - return includeGlobalState; - } - - @Override - public long getStartTime() { - return startTime; - } - - @Override - public long getEndTime() { - return endTime; - } - - @Override - public int getTotalShards() { - return totalShards; - } - - @Override - public int getSuccessfulShards() { - return successfulShards; - } - - @Override - public List getFailures() { - return failures; - } } diff --git a/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotMetadataFactory_ES_7_10.java b/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotMetadataFactory_ES_7_10.java index e79b774e8..565803e0f 100644 --- a/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotMetadataFactory_ES_7_10.java +++ b/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotMetadataFactory_ES_7_10.java @@ -20,11 +20,10 @@ public SnapshotMetadata fromJsonNode(JsonNode root) throws Exception { ObjectMapper mapper = new ObjectMapper(); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); ObjectNode objectNodeRoot = (ObjectNode) root; - SnapshotMetadataData_ES_7_10 snapshotMetadata = mapper.treeToValue( + return mapper.treeToValue( objectNodeRoot.get("snapshot"), SnapshotMetadataData_ES_7_10.class ); - return snapshotMetadata; } @Override diff --git a/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotRepoData_ES_7_10.java b/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotRepoData_ES_7_10.java index b7b95f77d..d98a62f1e 100644 --- a/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotRepoData_ES_7_10.java +++ b/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotRepoData_ES_7_10.java @@ -13,7 +13,12 @@ import com.rfs.common.SnapshotRepo; import com.rfs.common.SnapshotRepo.CantParseRepoFile; import com.rfs.common.SourceRepo; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.RequiredArgsConstructor; +@Getter public class SnapshotRepoData_ES_7_10 { public static SnapshotRepoData_ES_7_10 fromRepoFile(Path filePath) { ObjectMapper mapper = new ObjectMapper(); @@ -38,26 +43,24 @@ public static SnapshotRepoData_ES_7_10 fromRepo(SourceRepo repo) { return fromRepoFile(file); } - public Path filePath; - public List snapshots; - public Map indices; + private Path filePath; + private List snapshots; + private Map indices; @JsonProperty("min_version") - public String minVersion; + private String minVersion; @JsonProperty("index_metadata_identifiers") - public Map indexMetadataIdentifiers; + private Map indexMetadataIdentifiers; + @Getter + @AllArgsConstructor + @NoArgsConstructor public static class Snapshot implements SnapshotRepo.Snapshot { - public String name; - public String uuid; - public int state; + private String name; + private String uuid; + private int state; @JsonProperty("index_metadata_lookup") - public Map indexMetadataLookup; - public String version; - - @Override - public String getName() { - return name; - } + private Map indexMetadataLookup; + private String version; @Override public String getId() { @@ -65,41 +68,26 @@ public String getId() { } } + @Getter + @AllArgsConstructor + @NoArgsConstructor public static class RawIndex { - public String id; - public List snapshots; + private String id; + private List snapshots; @JsonProperty("shard_generations") - public List shardGenerations; + private List shardGenerations; } + @Getter + @RequiredArgsConstructor public static class Index implements SnapshotRepo.Index { public static Index fromRawIndex(String name, RawIndex rawIndex) { - Index index = new Index(); - index.name = name; - index.id = rawIndex.id; - index.snapshots = rawIndex.snapshots; - index.shardGenerations = rawIndex.shardGenerations; - return index; - } - - public String name; - public String id; - public List snapshots; - public List shardGenerations; - - @Override - public String getName() { - return name; + return new Index(name, rawIndex.id, rawIndex.snapshots, rawIndex.shardGenerations); } - @Override - public String getId() { - return id; - } - - @Override - public List getSnapshots() { - return snapshots; - } + private final String name; + private final String id; + private final List snapshots; + private final List shardGenerations; } } diff --git a/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotRepoProvider_ES_7_10.java b/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotRepoProvider_ES_7_10.java index 046c1a916..ab154f85e 100644 --- a/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotRepoProvider_ES_7_10.java +++ b/RFS/src/main/java/com/rfs/version_es_7_10/SnapshotRepoProvider_ES_7_10.java @@ -23,7 +23,7 @@ protected SnapshotRepoData_ES_7_10 getRepoData() { } public List getIndices() { - return getRepoData().indices.entrySet() + return getRepoData().getIndices().entrySet() .stream() .map(entry -> SnapshotRepoData_ES_7_10.Index.fromRawIndex(entry.getKey(), entry.getValue())) .collect(Collectors.toList()); @@ -32,15 +32,15 @@ public List getIndices() { @Override public List getIndicesInSnapshot(String snapshotName) { List matchedIndices = new ArrayList<>(); - SnapshotRepoData_ES_7_10.Snapshot targetSnapshot = getRepoData().snapshots.stream() - .filter(snapshot -> snapshotName.equals(snapshot.name)) + SnapshotRepoData_ES_7_10.Snapshot targetSnapshot = getRepoData().getSnapshots().stream() + .filter(snapshot -> snapshotName.equals(snapshot.getName())) .findFirst() .orElse(null); if (targetSnapshot != null) { - targetSnapshot.indexMetadataLookup.keySet().forEach(indexId -> { - getRepoData().indices.forEach((indexName, rawIndex) -> { - if (indexId.equals(rawIndex.id)) { + targetSnapshot.getIndexMetadataLookup().keySet().forEach(indexId -> { + getRepoData().getIndices().forEach((indexName, rawIndex) -> { + if (indexId.equals(rawIndex.getId())) { matchedIndices.add(SnapshotRepoData_ES_7_10.Index.fromRawIndex(indexName, rawIndex)); } }); @@ -51,14 +51,13 @@ public List getIndicesInSnapshot(String snapshotName) { @Override public List getSnapshots() { - List convertedList = new ArrayList<>(getRepoData().snapshots); - return convertedList; + return new ArrayList<>(getRepoData().getSnapshots()); } public String getSnapshotId(String snapshotName) { - for (SnapshotRepoData_ES_7_10.Snapshot snapshot : getRepoData().snapshots) { - if (snapshot.name.equals(snapshotName)) { - return snapshot.uuid; + for (SnapshotRepoData_ES_7_10.Snapshot snapshot : getRepoData().getSnapshots()) { + if (snapshot.getName().equals(snapshotName)) { + return snapshot.getId(); } } return null; @@ -66,7 +65,7 @@ public String getSnapshotId(String snapshotName) { @Override public String getIndexId(String indexName) { - return getRepoData().indices.get(indexName).id; + return getRepoData().getIndices().get(indexName).getId(); } @Override @@ -80,15 +79,15 @@ public String getIndexMetadataId(String snapshotName, String indexName) { return null; } - String metadataLookupKey = getRepoData().snapshots.stream() - .filter(snapshot -> snapshot.name.equals(snapshotName)) - .map(snapshot -> snapshot.indexMetadataLookup.get(indexId)) + String metadataLookupKey = getRepoData().getSnapshots().stream() + .filter(snapshot -> snapshot.getName().equals(snapshotName)) + .map(snapshot -> snapshot.getIndexMetadataLookup().get(indexId)) .findFirst() .orElse(null); if (metadataLookupKey == null) { return null; } - return getRepoData().indexMetadataIdentifiers.get(metadataLookupKey); + return getRepoData().getIndexMetadataIdentifiers().get(metadataLookupKey); } } diff --git a/RFS/src/main/java/com/rfs/version_os_2_11/GlobalMetadataCreator_OS_2_11.java b/RFS/src/main/java/com/rfs/version_os_2_11/GlobalMetadataCreator_OS_2_11.java index 4ffb4e0f6..b40328b2f 100644 --- a/RFS/src/main/java/com/rfs/version_os_2_11/GlobalMetadataCreator_OS_2_11.java +++ b/RFS/src/main/java/com/rfs/version_os_2_11/GlobalMetadataCreator_OS_2_11.java @@ -44,7 +44,7 @@ public List createLegacyTemplates(GlobalMetadataData_OS_2_11 metadata, M return createTemplates( metadata.getTemplates(), legacyTemplateAllowlist, - TemplateTypes.LegacyIndexTemplate, + TemplateTypes.LEGACY_INDEX_TEMPLATE, mode, context ); @@ -54,7 +54,7 @@ public List createComponentTemplates(GlobalMetadataData_OS_2_11 metadata return createTemplates( metadata.getComponentTemplates(), componentTemplateAllowlist, - TemplateTypes.ComponentTemplates, + TemplateTypes.COMPONENT_TEMPLATE, mode, context ); @@ -64,7 +64,7 @@ public List createIndexTemplates(GlobalMetadataData_OS_2_11 metadata, Mi return createTemplates( metadata.getIndexTemplates(), indexTemplateAllowlist, - TemplateTypes.IndexTemplate, + TemplateTypes.INDEX_TEMPLATE, mode, context ); @@ -72,19 +72,19 @@ public List createIndexTemplates(GlobalMetadataData_OS_2_11 metadata, Mi @AllArgsConstructor private enum TemplateTypes { - IndexTemplate( - (client, name, body, context) -> client.createIndexTemplate(name, body, context.createMigrateTemplateContext()), - (client, name) -> client.hasIndexTemplate(name) + INDEX_TEMPLATE( + (targetClient, name, body, context) -> targetClient.createIndexTemplate(name, body, context.createMigrateTemplateContext()), + (targetClient, name) -> targetClient.hasIndexTemplate(name) ), - LegacyIndexTemplate( - (client, name, body, context) -> client.createLegacyTemplate(name, body, context.createMigrateLegacyTemplateContext()), - (client, name) -> client.hasLegacyTemplate(name) + LEGACY_INDEX_TEMPLATE( + (targetClient, name, body, context) -> targetClient.createLegacyTemplate(name, body, context.createMigrateLegacyTemplateContext()), + (targetClient, name) -> targetClient.hasLegacyTemplate(name) ), - ComponentTemplates( - (client, name, body, context) -> client.createComponentTemplate(name, body, context.createComponentTemplateContext()), - (client, name) -> client.hasComponentTemplate(name) + COMPONENT_TEMPLATE( + (targetClient, name, body, context) -> targetClient.createComponentTemplate(name, body, context.createComponentTemplateContext()), + (targetClient, name) -> targetClient.hasComponentTemplate(name) ); final TemplateCreator creator; final TemplateExistsCheck alreadyExistsCheck; @@ -118,7 +118,7 @@ private List createTemplates( return List.of(); } - if (templateAllowlist != null && templateAllowlist.size() == 0) { + if (templateAllowlist != null && templateAllowlist.isEmpty()) { log.info("No {} in specified allowlist", templateType); return List.of(); } else if (templateAllowlist != null) { @@ -144,24 +144,21 @@ private List createTemplates( templatesToCreate.forEach((templateName, templateBody) -> { log.info("Creating {}: {}", templateType, templateName); - switch (mode) { - case SIMULATE: - var alreadyExists = templateType.alreadyExistsCheck.templateAlreadyExists(client, templateName); - if (!alreadyExists) { - templateList.add(templateName); - } else { - log.warn("Template {} already exists on the target, it will not be created during a migration", templateName); - } - break; - - case PERFORM: - var createdTemplate = templateType.creator.createTemplate(client, templateName, templateBody, context); - if (createdTemplate.isPresent()) { - templateList.add(templateName); - } else { - log.warn("Template {} already exists on the target, unable to create", templateName); - } - break; + + if (mode == MigrationMode.SIMULATE) { + var alreadyExists = templateType.alreadyExistsCheck.templateAlreadyExists(client, templateName); + if (!alreadyExists) { + templateList.add(templateName); + } else { + log.warn("Template {} already exists on the target, it will not be created during a migration", templateName); + } + } else if (mode == MigrationMode.PERFORM) { + var createdTemplate = templateType.creator.createTemplate(client, templateName, templateBody, context); + if (createdTemplate.isPresent()) { + templateList.add(templateName); + } else { + log.warn("Template {} already exists on the target, unable to create", templateName); + } } }); diff --git a/RFS/src/main/java/com/rfs/version_os_2_11/GlobalMetadataData_OS_2_11.java b/RFS/src/main/java/com/rfs/version_os_2_11/GlobalMetadataData_OS_2_11.java index 4b5d885a3..12ffbfb18 100644 --- a/RFS/src/main/java/com/rfs/version_os_2_11/GlobalMetadataData_OS_2_11.java +++ b/RFS/src/main/java/com/rfs/version_os_2_11/GlobalMetadataData_OS_2_11.java @@ -21,16 +21,18 @@ public ObjectNode getTemplates() { } public ObjectNode getIndexTemplates() { - if (root.get("index_template") != null) { - return (ObjectNode) root.get("index_template").get("index_template"); + String indexTemplateKey = "index_template"; + if (root.get(indexTemplateKey) != null) { + return (ObjectNode) root.get(indexTemplateKey).get(indexTemplateKey); } else { return null; } } public ObjectNode getComponentTemplates() { - if (root.get("component_template") != null) { - return (ObjectNode) root.get("component_template").get("component_template"); + String componentTemplateKey = "component_template"; + if (root.get(componentTemplateKey) != null) { + return (ObjectNode) root.get(componentTemplateKey).get(componentTemplateKey); } else { return null; } diff --git a/RFS/src/main/java/com/rfs/version_os_2_11/IndexCreator_OS_2_11.java b/RFS/src/main/java/com/rfs/version_os_2_11/IndexCreator_OS_2_11.java index c163eadd8..d19092a40 100644 --- a/RFS/src/main/java/com/rfs/version_os_2_11/IndexCreator_OS_2_11.java +++ b/RFS/src/main/java/com/rfs/version_os_2_11/IndexCreator_OS_2_11.java @@ -24,7 +24,7 @@ public boolean create( MigrationMode mode, ICreateIndexContext context ) { - IndexMetadataData_OS_2_11 indexMetadata = new IndexMetadataData_OS_2_11(index.rawJson(), index.getId(), index.getName()); + IndexMetadataData_OS_2_11 indexMetadata = new IndexMetadataData_OS_2_11(index.getRawJson(), index.getId(), index.getName()); // Remove some settings which will cause errors if you try to pass them to the API ObjectNode settings = indexMetadata.getSettings(); @@ -42,11 +42,10 @@ public boolean create( // Create the index; it's fine if it already exists try { - switch (mode) { - case SIMULATE: - return !client.hasIndex(index.getName()); - case PERFORM: - return client.createIndex(index.getName(), body, context).isPresent(); + if (mode == MigrationMode.SIMULATE) { + return !client.hasIndex(index.getName()); + } else if (mode == MigrationMode.PERFORM) { + return client.createIndex(index.getName(), body, context).isPresent(); } } catch (InvalidResponse invalidResponse) { var illegalArguments = invalidResponse.getIllegalArguments(); diff --git a/RFS/src/main/java/com/rfs/version_os_2_11/IndexMetadataData_OS_2_11.java b/RFS/src/main/java/com/rfs/version_os_2_11/IndexMetadataData_OS_2_11.java index 660bf0afa..db023e619 100644 --- a/RFS/src/main/java/com/rfs/version_os_2_11/IndexMetadataData_OS_2_11.java +++ b/RFS/src/main/java/com/rfs/version_os_2_11/IndexMetadataData_OS_2_11.java @@ -46,7 +46,7 @@ public ObjectNode getSettings() { } @Override - public ObjectNode rawJson() { + public ObjectNode getRawJson() { return root; } diff --git a/RFS/src/main/java/com/rfs/version_universal/RemoteIndexMetadata.java b/RFS/src/main/java/com/rfs/version_universal/RemoteIndexMetadata.java index e9ee759e7..ee4ba4a15 100644 --- a/RFS/src/main/java/com/rfs/version_universal/RemoteIndexMetadata.java +++ b/RFS/src/main/java/com/rfs/version_universal/RemoteIndexMetadata.java @@ -13,7 +13,7 @@ public class RemoteIndexMetadata implements IndexMetadata { private ObjectNode sourceData; @Override - public ObjectNode rawJson() { + public ObjectNode getRawJson() { return sourceData; } @@ -24,7 +24,8 @@ public JsonNode getAliases() { @Override public String getId() { - return indexName; + // The ID is the name in this case + return getName(); } @Override diff --git a/RFS/src/main/java/com/rfs/version_universal/RemoteReaderClient.java b/RFS/src/main/java/com/rfs/version_universal/RemoteReaderClient.java index e781f7389..5e0c5474a 100644 --- a/RFS/src/main/java/com/rfs/version_universal/RemoteReaderClient.java +++ b/RFS/src/main/java/com/rfs/version_universal/RemoteReaderClient.java @@ -109,8 +109,7 @@ Mono getJsonForIndexApis(HttpResponse resp) { var tree = (ObjectNode) objectMapper.readTree(resp.body); return Mono.just(tree); } catch (Exception e) { - log.error("Unable to get json response: ", e); - return Mono.error(new OperationFailed("Unable to get json response: " + e.getMessage(), resp)); + return logAndReturnJsonError(e, resp); } } @@ -140,8 +139,13 @@ Mono getJsonForTemplateApis(HttpResponse resp) { } return Mono.just(tree); } catch (Exception e) { - log.error("Unable to get json response: ", e); - return Mono.error(new OperationFailed("Unable to get json response: " + e.getMessage(), resp)); + return logAndReturnJsonError(e, resp); } } + + Mono logAndReturnJsonError(Exception e, HttpResponse resp) { + String errorPrefix = "Unable to get json response: "; + log.atError().setCause(e).setMessage(errorPrefix).log(); + return Mono.error(new OperationFailed(errorPrefix + e.getMessage(), resp)); + } } diff --git a/RFS/src/test/java/com/rfs/version_es_7_10/SnapshotRepoData_ES_7_10Test.java b/RFS/src/test/java/com/rfs/version_es_7_10/SnapshotRepoData_ES_7_10Test.java index 4a527d510..6d3aacbbb 100644 --- a/RFS/src/test/java/com/rfs/version_es_7_10/SnapshotRepoData_ES_7_10Test.java +++ b/RFS/src/test/java/com/rfs/version_es_7_10/SnapshotRepoData_ES_7_10Test.java @@ -44,8 +44,8 @@ void testFromRepoFile_default() { final var result = SnapshotRepoData_ES_7_10.fromRepoFile(jsonInFile); // Verify - assertThat(result.minVersion, equalTo("7.9.0")); - assertThat(result.indices.size(), equalTo(1)); + assertThat(result.getMinVersion(), equalTo("7.9.0")); + assertThat(result.getIndices().size(), equalTo(1)); } @Test @@ -58,8 +58,8 @@ void testFromRepoFile_extraFields() { final var result = SnapshotRepoData_ES_7_10.fromRepoFile(jsonInFile); // Verify - assertThat(result.minVersion, equalTo("7.9.0")); - assertThat(result.indices.size(), equalTo(1)); + assertThat(result.getMinVersion(), equalTo("7.9.0")); + assertThat(result.getIndices().size(), equalTo(1)); } private String insertAtLine(final String source, final String toAdd, final int lineNumber) { diff --git a/TrafficCapture/captureOffloader/src/main/java/org/opensearch/migrations/trafficcapture/StreamChannelConnectionCaptureSerializer.java b/TrafficCapture/captureOffloader/src/main/java/org/opensearch/migrations/trafficcapture/StreamChannelConnectionCaptureSerializer.java index 35c83baf8..5e836c66e 100644 --- a/TrafficCapture/captureOffloader/src/main/java/org/opensearch/migrations/trafficcapture/StreamChannelConnectionCaptureSerializer.java +++ b/TrafficCapture/captureOffloader/src/main/java/org/opensearch/migrations/trafficcapture/StreamChannelConnectionCaptureSerializer.java @@ -124,10 +124,10 @@ private CodedOutputStreamHolder getOrCreateCodedOutputStreamHolder() throws IOEx } else { currentCodedOutputStreamHolderOrNull = streamManager.createStream(); var currentCodedOutputStream = currentCodedOutputStreamHolderOrNull.getOutputStream(); - // e.g. 1: "9a25a4fffe620014-00034cfa-00000001-d208faac76346d02-864e38e2" + // e.g.
 1: "9a25a4fffe620014-00034cfa-00000001-d208faac76346d02-864e38e2" 
currentCodedOutputStream.writeString(TrafficStream.CONNECTIONID_FIELD_NUMBER, connectionIdString); if (nodeIdString != null) { - // e.g. 5: "5ae27fca-0ac4-11ee-be56-0242ac120002" + // e.g.
 5: "5ae27fca-0ac4-11ee-be56-0242ac120002" 
currentCodedOutputStream.writeString(TrafficStream.NODEID_FIELD_NUMBER, nodeIdString); } if (eomsSoFar > 0) { @@ -213,11 +213,11 @@ private void beginSubstreamObservation( numFlushesSoFar + 1 ) ); - // e.g. 2 { + // e.g.
 2 { 
writeTrafficStreamTag(TrafficStream.SUBSTREAM_FIELD_NUMBER); // Write observation content length getOrCreateCodedOutputStream().writeUInt32NoTag(observationContentSize); - // e.g. 1 { 1: 1234 2: 1234 } + // e.g.
 1 { 1: 1234 2: 1234 } 
writeTimestampForNowToCurrentStream(timestamp); } @@ -371,7 +371,7 @@ private void addStringMessage(int captureFieldNumber, int dataFieldNumber, Insta lengthSize = CodedOutputStream.computeInt32SizeNoTag(dataSize); } beginSubstreamObservation(timestamp, captureFieldNumber, dataSize + lengthSize); - // e.g. 4 { + // e.g.
 4 { 
writeObservationTag(captureFieldNumber); if (dataSize > 0) { getOrCreateCodedOutputStream().writeInt32NoTag(dataSize); @@ -461,7 +461,7 @@ private void addSubstreamMessage( captureClosureLength = CodedOutputStream.computeInt32SizeNoTag(dataSize + segmentCountSize); } beginSubstreamObservation(timestamp, captureFieldNumber, captureClosureLength + dataSize + segmentCountSize); - // e.g. 4 { + // e.g.
 4 {  
writeObservationTag(captureFieldNumber); if (dataSize > 0) { // Write size of data after capture tag @@ -578,7 +578,7 @@ private void writeEndOfHttpMessage(Instant timestamp) throws IOException { ); int eomDataSize = eomPairSize + CodedOutputStream.computeInt32SizeNoTag(eomPairSize); beginSubstreamObservation(timestamp, TrafficObservation.ENDOFMESSAGEINDICATOR_FIELD_NUMBER, eomDataSize); - // e.g. 15 { + // e.g.
 15 { 
writeObservationTag(TrafficObservation.ENDOFMESSAGEINDICATOR_FIELD_NUMBER); getOrCreateCodedOutputStream().writeUInt32NoTag(eomPairSize); getOrCreateCodedOutputStream().writeInt32( @@ -650,6 +650,8 @@ public boolean isOpen() { } @Override - public void close() {} + public void close() { + // No resources to close + } } } diff --git a/TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml b/TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml index 04c432a7c..3a3212b83 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml +++ b/TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml @@ -78,7 +78,7 @@ services: condition: service_started opensearchtarget: condition: service_started - command: /bin/sh -c "/runJavaWithClasspath.sh org.opensearch.migrations.replay.TrafficReplayer --speedup-factor 2 https://opensearchtarget:9200 --auth-header-value Basic\\ YWRtaW46bXlTdHJvbmdQYXNzd29yZDEyMyE= --insecure --kafka-traffic-brokers kafka:9092 --kafka-traffic-topic logging-traffic-topic --kafka-traffic-group-id logging-group-default --otelCollectorEndpoint http://otel-collector:4317" + command: /bin/sh -c "/runJavaWithClasspath.sh org.opensearch.migrations.replay.TrafficReplayer --speedup-factor 2 https://opensearchtarget:9200 --auth-header-value Basic\\ YWRtaW46bXlTdHJvbmdQYXNzd29yZDEyMyE= --insecure --kafka-traffic-brokers kafka:9092 --kafka-traffic-topic logging-traffic-topic --kafka-traffic-group-id logging-group-default --otelCollectorEndpoint http://otel-collector:4317" #--transformer-config-base64 W3sgIkpzb25Kb2x0VHJhbnNmb3JtZXJQcm92aWRlciI6ClsKICB7CiAgICAic2NyaXB0IjogewogICAgICAib3BlcmF0aW9uIjogInNoaWZ0IiwKICAgICAgInNwZWMiOiB7CiAgICAgICAgIm1ldGhvZCI6ICJtZXRob2QiLAogICAgICAgICJVUkkiOiAiVVJJIiwKICAgICAgICAiaGVhZGVycyI6ICJoZWFkZXJzIiwKICAgICAgICAicGF5bG9hZCI6IHsKICAgICAgICAgICJpbmxpbmVkSnNvbkJvZHkiOiB7CiAgICAgICAgICAgICJ0b3AiOiB7CiAgICAgICAgICAgICAgInRhZ1RvRXhjaXNlIjogewogICAgICAgICAgICAgICAgIioiOiAicGF5bG9hZC5pbmxpbmVkSnNvbkJvZHkudG9wLiYiIAogICAgICAgICAgICAgIH0sCiAgICAgICAgICAgICAgIioiOiAicGF5bG9hZC5pbmxpbmVkSnNvbkJvZHkudG9wLiYiCiAgICAgICAgICAgIH0sCiAgICAgICAgICAiKiI6ICJwYXlsb2FkLmlubGluZWRKc29uQm9keS4mIgogICAgICAgICAgfQogICAgICAgIH0KICAgICAgfQogICAgfQogIH0sIAogewogICAic2NyaXB0IjogewogICAgICJvcGVyYXRpb24iOiAibW9kaWZ5LW92ZXJ3cml0ZS1iZXRhIiwKICAgICAic3BlYyI6IHsKICAgICAgICJVUkkiOiAiPXNwbGl0KCcvZXh0cmFUaGluZ1RvUmVtb3ZlJyxAKDEsJikpIgogICAgIH0KICB9CiB9LAogewogICAic2NyaXB0IjogewogICAgICJvcGVyYXRpb24iOiAibW9kaWZ5LW92ZXJ3cml0ZS1iZXRhIiwKICAgICAic3BlYyI6IHsKICAgICAgICJVUkkiOiAiPWpvaW4oJycsQCgxLCYpKSIKICAgICB9CiAgfQogfQpdCn1dCg==" opensearchtarget: image: 'opensearchproject/opensearch:2.15.0' diff --git a/TrafficCapture/dockerSolution/src/main/docker/elasticsearchWithSearchGuard/Dockerfile b/TrafficCapture/dockerSolution/src/main/docker/elasticsearchWithSearchGuard/Dockerfile index 2e77e4eef..9ab4c977c 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/elasticsearchWithSearchGuard/Dockerfile +++ b/TrafficCapture/dockerSolution/src/main/docker/elasticsearchWithSearchGuard/Dockerfile @@ -19,7 +19,8 @@ RUN sed 's/searchguard/plugins.security/g' $ELASTIC_SEARCH_CONFIG_FILE | \ # but maybe not for a demo to show individual steps RUN /root/enableTlsConfig.sh $ELASTIC_SEARCH_CONFIG_FILE # Alter this config line to either enable(searchguard.disabled: false) or disable(searchguard.disabled: true) HTTP auth -RUN echo "searchguard.disabled: false" >> $ELASTIC_SEARCH_CONFIG_FILE +RUN echo -n "searchguard.disabled: false" >> $ELASTIC_SEARCH_CONFIG_FILE && \ + echo -n "plugins.security.ssl.http.enabled_protocols: ['TLSv1.2', 'TLSv1.3']" >> $PROXY_TLS_CONFIG_FILE RUN sed -i '/^-Xms/i # Increase default heap to 80% RAM, Requires JDK >= 10' $ELASTIC_SEARCH_JVM_CONFIG_FILE && \ sed -i 's/^-Xms/#&/' $ELASTIC_SEARCH_JVM_CONFIG_FILE && \ diff --git a/TrafficCapture/dockerSolution/src/main/docker/otelCollector/README.md b/TrafficCapture/dockerSolution/src/main/docker/otelCollector/README.md new file mode 100644 index 000000000..6fe5cf26c --- /dev/null +++ b/TrafficCapture/dockerSolution/src/main/docker/otelCollector/README.md @@ -0,0 +1,24 @@ +## Monitoring Progress via Instrumentation + +The replayer and capture proxy (if started with the `--otelCollectorEndpoint` argument) emit metrics through an +otel-collector endpoint, which is deployed within Migrations Assistant tasks as a sidecar container. The +otel-collectors will publish metrics and traces to Amazon CloudWatch and AWS X-Ray. + +Some of these metrics will show simple progress, such as bytes or records transmitted. Other records can show higher +level information, such the number of responses with status codes that match vs those that don't. To observe those, +search for `statusCodesMatch` in the CloudWatch Console. That's emitted as an attribute along with the method and +the source/target status code (rounded down to the last hundred; i.e. a status code of 201 has a 200 attribute). + +Other metrics will show latencies, the number of requests, unique connections at a time and more. Low-level and +high-level metrics are being improved and added. For the latest information, see the +[README.md](../../../../../../coreUtilities/README.md). + +Along with metrics, traces are emitted by the replayer and the proxy (when proxy is run with metrics enabled, e.g. by +launching with --otelCollectorEndpoint set to the otel-collector sidecar). Traces will include very granular data for +each connection, including how long the TCP connections are open, how long the source and target clusters took to send +a response, as well as other internal details that can explain the progress of each request. + +Notice that traces for the replayer will show connections and Kafka records open, in some cases, much longer than their +representative HTTP transactions. This is because records are considered 'active' to the replayer until they are +committed and records are only committed once _all_ previous records have also been committed. Details such as that +are defensive for when further diagnosis is necessary. diff --git a/TrafficCapture/trafficCaptureProxyServer/build.gradle b/TrafficCapture/trafficCaptureProxyServer/build.gradle index 1fb3f7e1a..bb3430705 100644 --- a/TrafficCapture/trafficCaptureProxyServer/build.gradle +++ b/TrafficCapture/trafficCaptureProxyServer/build.gradle @@ -32,15 +32,17 @@ dependencies { implementation project(':TrafficCapture:captureKafkaOffloader') implementation project(':coreUtilities') + implementation group: "com.google.protobuf", name: "protobuf-java" + implementation group: 'com.fasterxml.jackson.core', name: 'jackson-core' + implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind' + implementation group: 'com.lmax', name: 'disruptor' implementation group: 'io.netty', name: 'netty-all' implementation group: 'org.apache.logging.log4j', name: 'log4j-api' implementation group: 'org.apache.logging.log4j', name: 'log4j-core' implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j2-impl' + implementation group: 'org.jcommander', name: 'jcommander' implementation group: 'org.slf4j', name: 'slf4j-api' - implementation group: 'com.lmax', name: 'disruptor' - implementation group: 'org.jcommander', name: 'jcommander' - implementation group: "com.google.protobuf", name: "protobuf-java" testImplementation project(':TrafficCapture:captureProtobufs') testImplementation testFixtures(project(path: ':testHelperFixtures')) diff --git a/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/CaptureProxy.java b/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/CaptureProxy.java index 0d891910e..4d4981246 100644 --- a/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/CaptureProxy.java +++ b/TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/CaptureProxy.java @@ -1,11 +1,11 @@ package org.opensearch.migrations.trafficcapture.proxyserver; +import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.OutputStream; import java.net.URI; import java.nio.charset.StandardCharsets; -import java.nio.file.Files; import java.nio.file.Paths; import java.time.Duration; import java.util.ArrayList; @@ -18,11 +18,15 @@ import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.function.Supplier; +import java.util.stream.Collectors; import java.util.stream.Stream; import javax.net.ssl.SSLEngine; import javax.net.ssl.SSLException; import com.google.protobuf.CodedOutputStream; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import org.apache.kafka.clients.CommonClientConfigs; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerConfig; @@ -69,6 +73,7 @@ public class CaptureProxy { private static final String HTTPS_CONFIG_PREFIX = "plugins.security.ssl.http."; public static final String DEFAULT_KAFKA_CLIENT_ID = "HttpCaptureProxyProducer"; + public static final String SUPPORTED_TLS_PROTOCOLS_LIST_KEY = "plugins.security.ssl.http.enabled_protocols"; public static class Parameters { @Parameter(required = false, @@ -191,19 +196,26 @@ static Parameters parseArgs(String[] args) { @SneakyThrows protected static Settings getSettings(@NonNull String configFile) { - var builder = Settings.builder(); - try (var lines = Files.lines(Paths.get(configFile))) { - lines.map( - line -> Optional.of(line.indexOf('#')).filter(i -> i >= 0).map(i -> line.substring(0, i)).orElse(line) - ).filter(line -> line.startsWith(HTTPS_CONFIG_PREFIX) && line.contains(":")).forEach(line -> { - var parts = line.split(": *", 2); - builder.put(parts[0], parts[1]); - }); - } - builder.put(SSLConfigConstants.SECURITY_SSL_TRANSPORT_ENABLED, false); + var objectMapper = new ObjectMapper(new YAMLFactory()); + var configMap = objectMapper.readValue(new File(configFile), Map.class); + var configParentDirStr = Paths.get(configFile).toAbsolutePath().getParent(); - builder.put("path.home", configParentDirStr); - return builder.build(); + var httpsSettings = + objectMapper.convertValue(configMap, new TypeReference>(){}) + .entrySet().stream() + .filter(kvp -> kvp.getKey().startsWith(HTTPS_CONFIG_PREFIX)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + if (!httpsSettings.containsKey(SUPPORTED_TLS_PROTOCOLS_LIST_KEY)) { + httpsSettings.put(SUPPORTED_TLS_PROTOCOLS_LIST_KEY, List.of("TLSv1.2", "TLSv1.3")); + } + + return Settings.builder().loadFromMap(httpsSettings) + // Don't bother with configurations the 'transport' (port 9300), which the plugin that we're using + // will also configure (& fail) otherwise. We only use the plugin to setup security for the 'http' + // port and then move the SSLEngine into our implementation. + .put(SSLConfigConstants.SECURITY_SSL_TRANSPORT_ENABLED, false) + .put("path.home", configParentDirStr) + .build(); } protected static IConnectionCaptureFactory getNullConnectionCaptureFactory() { @@ -371,12 +383,10 @@ public static void main(String[] args) throws InterruptedException, IOException ); var sksOp = Optional.ofNullable(params.sslConfigFilePath) - .map( - sslConfigFile -> new DefaultSecurityKeyStore( - getSettings(sslConfigFile), - Paths.get(sslConfigFile).toAbsolutePath().getParent() - ) - ); + .map(sslConfigFile -> new DefaultSecurityKeyStore( + getSettings(sslConfigFile), + Paths.get(sslConfigFile).toAbsolutePath().getParent())) + .filter(sks -> sks.sslHTTPProvider != null); sksOp.ifPresent(DefaultSecurityKeyStore::initHttpSSLConfig); var proxy = new NettyScanningHttpProxy(params.frontsidePort); diff --git a/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/CaptureProxySetupTest.java b/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/CaptureProxySetupTest.java index c8ccb0d23..4e5ff226e 100644 --- a/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/CaptureProxySetupTest.java +++ b/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/CaptureProxySetupTest.java @@ -1,6 +1,9 @@ package org.opensearch.migrations.trafficcapture.proxyserver; import java.io.IOException; +import java.nio.file.Files; +import java.util.List; +import java.util.Map; import java.util.Properties; import org.apache.kafka.clients.CommonClientConfigs; @@ -12,6 +15,7 @@ public class CaptureProxySetupTest { public final static String kafkaBrokerString = "invalid:9092"; + public static final String TLS_PROTOCOLS_KEY = "plugins.security.ssl.http.enabled_protocols"; @Test public void testBuildKafkaPropertiesBaseCase() throws IOException { @@ -111,4 +115,41 @@ public void testBuildKafkaPropertiesWithPropertyFile() throws IOException { // Settings needed for other passed arguments (i.e. --enableMSKAuth) are ignored by property file Assertions.assertEquals("SASL_SSL", props.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)); } + + @Test + public void testTlsParametersAreProperlyRead() throws Exception { + for (var kvp : Map.of( + "[ TLSv1.3, TLSv1.2 ]", List.of("TLSv1.3","TLSv1.2"), + "[ TLSv1.2, TLSv1.3 ]", List.of("TLSv1.2","TLSv1.3"), + "\n - TLSv1.2\n - TLSv1.3", List.of("TLSv1.2","TLSv1.3"), + "\n - TLSv1.2", List.of("TLSv1.2")) + .entrySet()) + { + testTlsParametersAreProperlyRead(TLS_PROTOCOLS_KEY + ": " + kvp.getKey(), kvp.getValue()); + } + } + + @Test + public void testNoProtocolConfigDefaultsToSecureOnesOnly() throws Exception { + testTlsParametersAreProperlyRead("", List.of("TLSv1.2","TLSv1.3")); + } + + public void testTlsParametersAreProperlyRead(String protocolsBlockString, List expectedList) + throws Exception + { + var tempFile = Files.createTempFile("captureProxy_tlsConfig", "yaml"); + try { + Files.writeString(tempFile, "plugins.security.ssl.http.enabled: true\n" + + "plugins.security.ssl.http.pemcert_filepath: esnode.pem\n" + + "plugins.security.ssl.http.pemkey_filepath: esnode-key.pem\n" + + "plugins.security.ssl.http.pemtrustedcas_filepath: root-ca.pem\n" + + protocolsBlockString); + + var settings = CaptureProxy.getSettings(tempFile.toAbsolutePath().toString()); + Assertions.assertEquals(String.join(", ", expectedList), + String.join(", ", settings.getAsList(TLS_PROTOCOLS_KEY))); + } finally { + Files.deleteIfExists(tempFile); + } + } } diff --git a/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/testcontainers/HttpdContainerTestBase.java b/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/testcontainers/HttpdContainerTestBase.java index 4e60dd53a..47e61b3c7 100644 --- a/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/testcontainers/HttpdContainerTestBase.java +++ b/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/testcontainers/HttpdContainerTestBase.java @@ -1,11 +1,13 @@ package org.opensearch.migrations.trafficcapture.proxyserver.testcontainers; +import org.opensearch.migrations.testutils.SharedDockerImageNames; + import org.testcontainers.containers.GenericContainer; public class HttpdContainerTestBase extends TestContainerTestBase> { - private static final GenericContainer httpd = new GenericContainer("httpd:alpine").withExposedPorts(80); // Container - // Port + private static final GenericContainer httpd = new GenericContainer(SharedDockerImageNames.HTTPD) + .withExposedPorts(80); // Container Port public GenericContainer getContainer() { return httpd; diff --git a/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/testcontainers/KafkaContainerTestBase.java b/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/testcontainers/KafkaContainerTestBase.java index 033352b0b..862fc9f2c 100644 --- a/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/testcontainers/KafkaContainerTestBase.java +++ b/TrafficCapture/trafficCaptureProxyServer/src/test/java/org/opensearch/migrations/trafficcapture/proxyserver/testcontainers/KafkaContainerTestBase.java @@ -1,13 +1,12 @@ package org.opensearch.migrations.trafficcapture.proxyserver.testcontainers; +import org.opensearch.migrations.testutils.SharedDockerImageNames; + import org.testcontainers.containers.KafkaContainer; -import org.testcontainers.utility.DockerImageName; public class KafkaContainerTestBase extends TestContainerTestBase { - private static final KafkaContainer kafka = new KafkaContainer( - DockerImageName.parse("confluentinc/cp-kafka:latest") - ); + private static final KafkaContainer kafka = new KafkaContainer(SharedDockerImageNames.KAFKA); public KafkaContainer getContainer() { return kafka; diff --git a/TrafficCapture/trafficReplayer/README.md b/TrafficCapture/trafficReplayer/README.md index 0b74e2425..1b61b11c5 100644 --- a/TrafficCapture/trafficReplayer/README.md +++ b/TrafficCapture/trafficReplayer/README.md @@ -63,7 +63,7 @@ which has comments throughout it to indicate how data percolates and is converte ## Handlers -With the exception of the preparation around JSON model and its transformation, all the other handlers (compression, +Except for the conversions around JSON payloads, all the other handlers (compression, chunked, and JSON parsing/serialization), use streaming data models via mostly custom handlers. This should minimize the memory load (working set size, cache misses, etc). However, attempts have not yet been made to reduce the number of allocations. Those optimization may not have extremely high value, especially when JSON parsing will need to create @@ -80,12 +80,20 @@ Transformations are performed via a simple interface defined by [IJsonTransformer](../transformationPlugins/jsonMessageTransformers/jsonMessageTransformerInterface/src/main/java/org/opensearch/migrations/transform/IJsonTransformer.java) ('transformer'). They are loaded dynamically and are designed to allow for easy extension of the TrafficReplayer to support a diverse set of needs. -The input to the transformer will be an HTTP message represented as a json-like `Map` with +The input to the transformer is an HTTP message represented as a json-like `Map` with top-level key-value pairs defined in [JsonKeysForHttpMessage.java](../transformationPlugins/jsonMessageTransformers/jsonMessageTransformerInterface/src/main/java/org/opensearch/migrations/transform/JsonKeysForHttpMessage.java). -Only bodies that are json-formatted will be accessible, and they will be accessible as a fully-parsed Map (at -the keypath `'payload'->'inlinedJsonBody'`). Transformers have the option to rewrite none, or any of the keys and -values within the original message. The transformer can return either the original message or a completely new message. +Bodies that are json-formatted will be accessible via the path `payload.inlinedJsonBody` and they will be accessible +as a fully-parsed Map. Newline-delimited json (ndjson) sequences will be accessible via +`payload.inlinedJsonSequenceBodies` as a List of json Maps. These two payload entries are mutually exclusive. +Any additional bytes that follow a json object (or all of the bytes if there wasn't a json object at all) will +be available as a ByteBuf in `payload.inlinedBinaryBody`. + +Transformers have the option to rewrite none, or any of the keys and values within the original message. +The transformer can return either the original message or a completely new message. Notice that one json payload +could be broken into multiple ndjson entries or vice-versa by changing the payload key and supplying an appropriately +typed object as its value (e.g. a single Map or a List of Maps respectively for `inlinedJsonBody` and +`inlinedJsonSequenceBodies`). Transformers may be used simultaneously from concurrent threads over the lifetime of the replayer. However, a message will only be processed by one transformer at a time. @@ -108,10 +116,10 @@ The name is defined by the `IJsonTransformerProvider::getName()`, which unless o (e.g. 'JsonJoltTransformerProvider'). The value corresponding to that key is then passed to instantiate an IJsonTransformer object. -The base [jsonJoltMessageTransformerProvider](../transformationPlugins/jsonMessageTransformers/jsonJoltMessageTransformerProvider) -package includes [JsonCompositeTransformer.java] +The jsonMessageTransformerInterface package includes [JsonCompositeTransformer.java] (../transformationPlugins/jsonMessageTransformers/jsonMessageTransformerInterface/src/main/java/org/opensearch/migrations/transform/JsonCompositeTransformer.java), -which run transformers in serial. That composite transformer is also utilized by the TrafficReplayer to combine the +which runs configured transformers in serial. +That composite transformer is also utilized by the TrafficReplayer to combine the list of loaded transformations with a transformer to rewrite the 'Host' header. That host transformation changes the host header of every HTTP message to use the target domain-name rather than the source's. That will be run after all loaded/specified transformations. @@ -140,8 +148,9 @@ To run only one transformer without any configuration, the `--transformer-config be set to the name of the transformer (e.g. 'JsonTransformerForOpenSearch23PlusTargetTransformerProvider', without quotes or any json surrounding it). -The user can also specify a file to read the transformations from using the `--transformer-config-file`, but can't use -both transformer options. +The user can also specify a file to read the transformations from using the `--transformer-config-file`. Users can +also pass the script as an argument via `--transformer-config-base64`. Each of the `transformer-config` options +is mutually exclusive. Some simple transformations are included to change headers to add compression or to force an HTTP message payload to be chunked. Another transformer, [JsonTypeMappingTransformer.java](../transformationPlugins/jsonMessageTransformers/openSearch23PlusTargetTransformerProvider/src/main/java/org/opensearch/migrations/transform/JsonTypeMappingTransformer.java), diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/AggregatedRawResponse.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/AggregatedRawResponse.java index e83d1e6b2..7cc561cc8 100644 --- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/AggregatedRawResponse.java +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/AggregatedRawResponse.java @@ -3,38 +3,17 @@ import java.time.Duration; import java.time.Instant; import java.util.AbstractMap; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.stream.Stream; -import org.opensearch.migrations.replay.datatypes.ByteBufList; - -import io.netty.buffer.ByteBuf; -import io.netty.buffer.Unpooled; import io.netty.handler.codec.http.HttpResponse; import lombok.Getter; import lombok.extern.slf4j.Slf4j; +@Getter @Slf4j -public class AggregatedRawResponse { +public class AggregatedRawResponse extends AggregatedRawResult { - @Getter protected final HttpResponse rawResponse; - @Getter - protected final int responseSizeInBytes; - @Getter - protected final Duration responseDuration; - protected final ArrayList> responsePackets; - @Getter - protected final Throwable error; - - public static Builder builder(Instant i) { - return new Builder(i); - } public AggregatedRawResponse( HttpResponse rawResponse, @@ -43,82 +22,35 @@ public AggregatedRawResponse( List> responsePackets, Throwable error ) { + super(responseSizeInBytes, responseDuration, responsePackets, error); this.rawResponse = rawResponse; - this.responseSizeInBytes = responseSizeInBytes; - this.responseDuration = responseDuration; - this.responsePackets = responsePackets == null ? null : new ArrayList<>(responsePackets); - this.error = error; } - public byte[][] getCopyOfPackets() { - return responsePackets.stream() - .map(Map.Entry::getValue) - .map(x -> Arrays.copyOf(x, x.length)) - .toArray(byte[][]::new); - } - - public ByteBuf getResponseAsByteBuf() { - return responsePackets == null ? Unpooled.EMPTY_BUFFER : - ByteBufList.asCompositeByteBufRetained(responsePackets.stream() - .map(Map.Entry::getValue).map(Unpooled::wrappedBuffer)) - .asReadOnly(); - } - public static class Builder { - private final ArrayList> receiptTimeAndResponsePackets; - private final Instant requestSendTime; + public static class Builder extends AggregatedRawResult.Builder { protected HttpResponse rawResponse; - protected Throwable error; public Builder(Instant requestSendTime) { - receiptTimeAndResponsePackets = new ArrayList<>(); - this.requestSendTime = requestSendTime; - rawResponse = null; + super(requestSendTime); } public AggregatedRawResponse build() { - var totalBytes = receiptTimeAndResponsePackets.stream().mapToInt(kvp -> kvp.getValue().length).sum(); return new AggregatedRawResponse( rawResponse, - totalBytes, - Duration.between(requestSendTime, Instant.now()), + getTotalBytes(), + Duration.between(startTime, Instant.now()), receiptTimeAndResponsePackets, error ); } - public AggregatedRawResponse.Builder addResponsePacket(byte[] packet) { - return addResponsePacket(packet, Instant.now()); - } - - public AggregatedRawResponse.Builder addHttpParsedResponseObject(HttpResponse r) { + public Builder addHttpParsedResponseObject(HttpResponse r) { this.rawResponse = r; return this; } - - public AggregatedRawResponse.Builder addErrorCause(Throwable t) { - error = t; - return this; - } - - public AggregatedRawResponse.Builder addResponsePacket(byte[] packet, Instant timestamp) { - receiptTimeAndResponsePackets.add(new AbstractMap.SimpleEntry<>(timestamp, packet)); - return this; - } - } - - Stream> getReceiptTimeAndResponsePackets() { - return Optional.ofNullable(this.responsePackets).stream().flatMap(Collection::stream); } - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("IResponseSummary{"); - sb.append("responseSizeInBytes=").append(responseSizeInBytes); - sb.append(", responseDuration=").append(responseDuration); - sb.append(", # of responsePackets=") - .append((this.responsePackets == null ? "-1" : "" + this.responsePackets.size())); - sb.append('}'); - return sb.toString(); + public static Builder builder(Instant i) { + return new Builder(i); } } diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/AggregatedRawResult.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/AggregatedRawResult.java new file mode 100644 index 000000000..a62cd21d0 --- /dev/null +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/AggregatedRawResult.java @@ -0,0 +1,105 @@ +package org.opensearch.migrations.replay; + +import java.time.Duration; +import java.time.Instant; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import org.opensearch.migrations.replay.datatypes.ByteBufList; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import lombok.Getter; + +public class AggregatedRawResult { + @Getter + protected final int sizeInBytes; + @Getter + protected final Duration duration; + protected final ArrayList> packets; + @Getter + protected final Throwable error; + + public static class Builder> { + protected final ArrayList> receiptTimeAndResponsePackets; + protected final Instant startTime; + protected Throwable error; + + public Builder(Instant startTime) { + receiptTimeAndResponsePackets = new ArrayList<>(); + this.startTime = startTime; + } + + public AggregatedRawResult build() { + var totalBytes = getTotalBytes(); + return new AggregatedRawResult( + totalBytes, + Duration.between(startTime, Instant.now()), + receiptTimeAndResponsePackets, + error + ); + } + + protected int getTotalBytes() { + return receiptTimeAndResponsePackets.stream().mapToInt(kvp -> kvp.getValue().length).sum(); + } + + public B addErrorCause(Throwable t) { + error = t; + return (B) this; + } + + public B addResponsePacket(byte[] packet) { + return (B) addResponsePacket(packet, Instant.now()); + } + + public B addResponsePacket(byte[] packet, Instant timestamp) { + receiptTimeAndResponsePackets.add(new AbstractMap.SimpleEntry<>(timestamp, packet)); + return (B) this; + } + } + + public AggregatedRawResult(int sizeInBytes, + Duration duration, + List> packets, + Throwable error) + { + this.sizeInBytes = sizeInBytes; + this.duration = duration; + this.packets = packets == null ? null : new ArrayList<>(packets); + this.error = error; + } + + public static Builder builder(Instant i) { + return new Builder<>(i); + } + + public byte[][] getCopyOfPackets() { + return packets.stream() + .map(Map.Entry::getValue) + .map(x -> Arrays.copyOf(x, x.length)) + .toArray(byte[][]::new); + } + + public ByteBuf getResponseAsByteBuf() { + return packets == null ? Unpooled.EMPTY_BUFFER : + ByteBufList.asCompositeByteBufRetained(packets.stream() + .map(Map.Entry::getValue).map(Unpooled::wrappedBuffer)) + .asReadOnly(); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("IResponseSummary{"); + sb.append("responseSizeInBytes=").append(sizeInBytes); + sb.append(", responseDuration=").append(duration); + sb.append(", # of responsePackets=") + .append((this.packets == null ? "-1" : "" + this.packets.size())); + sb.append('}'); + return sb.toString(); + } + +} diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/SourceTargetCaptureTuple.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/SourceTargetCaptureTuple.java index 0e0b0e378..5e3f0f9ff 100644 --- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/SourceTargetCaptureTuple.java +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/SourceTargetCaptureTuple.java @@ -70,8 +70,8 @@ public SourceTargetCaptureTuple( transformedTargetRequestAndResponseList.getTransformationStatus(); this.responseList = transformedTargetRequestAndResponseList == null ? List.of() : transformedTargetRequestAndResponseList.responses().stream() - .map(arr -> new Response(arr.responsePackets.stream().map(AbstractMap.SimpleEntry::getValue) - .collect(Collectors.toList()), arr.error, arr.responseDuration)) + .map(arr -> new Response(arr.packets.stream().map(AbstractMap.SimpleEntry::getValue) + .collect(Collectors.toList()), arr.error, arr.duration)) .collect(Collectors.toList()); this.topLevelErrorCause = topLevelErrorCause; } diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/TrafficReplayer.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/TrafficReplayer.java index 828da65dc..084aa2801 100644 --- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/TrafficReplayer.java +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/TrafficReplayer.java @@ -9,6 +9,7 @@ import java.time.Clock; import java.time.Duration; import java.time.Instant; +import java.util.Base64; import java.util.List; import java.util.Optional; import java.util.concurrent.Executors; @@ -90,93 +91,166 @@ public static boolean validateRequiredKafkaParams(String brokers, String topic, } public static class Parameters { - @Parameter(required = true, arity = 1, description = "URI of the target cluster/domain") + @Parameter( + required = true, + arity = 1, + description = "URI of the target cluster/domain") String targetUriString; - @Parameter(required = false, names = { - "--insecure" }, arity = 0, description = "Do not check the server's certificate") + @Parameter( + required = false, + names = {"--insecure" }, + arity = 0, description = "Do not check the server's certificate") boolean allowInsecureConnections; - @Parameter(required = false, names = { - REMOVE_AUTH_HEADER_VALUE_ARG }, arity = 0, description = "Remove the authorization header if present and do not replace it with anything. " + @Parameter( + required = false, + names = {REMOVE_AUTH_HEADER_VALUE_ARG }, + arity = 0, description = "Remove the authorization header if present and do not replace it with anything. " + "(cannot be used with other auth arguments)") boolean removeAuthHeader; - @Parameter(required = false, names = { - AUTH_HEADER_VALUE_ARG }, arity = 1, description = "Static value to use for the \"authorization\" header of each request " + @Parameter( + required = false, + names = { AUTH_HEADER_VALUE_ARG }, + arity = 1, description = "Static value to use for the \"authorization\" header of each request " + "(cannot be used with other auth arguments)") String authHeaderValue; - @Parameter(required = false, names = { - AWS_AUTH_HEADER_USER_AND_SECRET_ARG }, arity = 2, description = " pair to specify " + @Parameter( + required = false, names = { + AWS_AUTH_HEADER_USER_AND_SECRET_ARG }, + arity = 2, + description = " pair to specify " + "\"authorization\" header value for each request. " + "The USERNAME specifies the plaintext user and the SECRET_ARN specifies the ARN or " + "Secret name from AWS Secrets Manager to retrieve the password from for the password section" + "(cannot be used with other auth arguments)") List awsAuthHeaderUserAndSecret; - @Parameter(required = false, names = { - SIGV_4_AUTH_HEADER_SERVICE_REGION_ARG }, arity = 1, description = "Use AWS SigV4 to sign each request with the specified service name and region. " + @Parameter( + required = false, + names = { SIGV_4_AUTH_HEADER_SERVICE_REGION_ARG }, + arity = 1, + description = "Use AWS SigV4 to sign each request with the specified service name and region. " + "(e.g. es,us-east-1) " + "DefaultCredentialsProvider is used to resolve credentials. " + "(cannot be used with other auth arguments)") String useSigV4ServiceAndRegion; - @Parameter(required = false, names = "--transformer-config", arity = 1, description = "Configuration of message transformers. Either as a string that identifies the " + @Parameter( + required = false, + names = "--transformer-config-base64", + arity = 1, + description = "Configuration of message transformers. The same contents as --transformer-config but " + + "Base64 encoded so that the configuration is easier to pass as a command line parameter.") + String transformerConfigEncoded; + + @Parameter( + required = false, + names = "--transformer-config", + arity = 1, + description = "Configuration of message transformers. Either as a string that identifies the " + "transformer that should be run (with default settings) or as json to specify options " + "as well as multiple transformers to run in sequence. " + "For json, keys are the (simple) names of the loaded transformers and values are the " + "configuration passed to each of the transformers.") String transformerConfig; - @Parameter(required = false, names = "--transformer-config-file", arity = 1, description = "Path to the JSON configuration file of message transformers.") + + @Parameter( + required = false, + names = "--transformer-config-file", + arity = 1, + description = "Path to the JSON configuration file of message transformers.") String transformerConfigFile; - @Parameter(required = false, names = "--user-agent", arity = 1, description = "For HTTP requests to the target cluster, append this string (after \"; \") to" + @Parameter( + required = false, + names = "--user-agent", + arity = 1, + description = "For HTTP requests to the target cluster, append this string (after \"; \") to" + "the existing user-agent field or if the field wasn't present, simply use this value") String userAgent; - @Parameter(required = false, names = { - "-i", - "--input" }, arity = 1, description = "input file to read the request/response traces for the source cluster") + @Parameter( + required = false, + names = { "-i", "--input" }, + arity = 1, + description = "input file to read the request/response traces for the source cluster") String inputFilename; - @Parameter(required = false, names = { - "-t", - PACKET_TIMEOUT_SECONDS_PARAMETER_NAME }, arity = 1, description = "assume that connections were terminated after this many " + @Parameter( + required = false, + names = {"-t", PACKET_TIMEOUT_SECONDS_PARAMETER_NAME }, + arity = 1, + description = "assume that connections were terminated after this many " + "seconds of inactivity observed in the captured stream") int observedPacketConnectionTimeout = 70; - @Parameter(required = false, names = { - "--speedup-factor" }, arity = 1, description = "Accelerate the replayed communications by this factor. " + @Parameter( + required = false, + names = { "--speedup-factor" }, + arity = 1, description = "Accelerate the replayed communications by this factor. " + "This means that between each interaction will be replayed at this rate faster " + "than the original observations, provided that the replayer and target are able to keep up.") double speedupFactor = 1.0; - @Parameter(required = false, names = { - LOOKAHEAD_TIME_WINDOW_PARAMETER_NAME }, arity = 1, description = "Number of seconds of data that will be buffered.") + @Parameter( + required = false, + names = { LOOKAHEAD_TIME_WINDOW_PARAMETER_NAME }, + arity = 1, + description = "Number of seconds of data that will be buffered.") int lookaheadTimeSeconds = 300; - @Parameter(required = false, names = { - "--max-concurrent-requests" }, arity = 1, description = "Maximum number of requests at a time that can be outstanding") + @Parameter( + required = false, + names = { "--max-concurrent-requests" }, + arity = 1, + description = "Maximum number of requests at a time that can be outstanding") int maxConcurrentRequests = 1024; - @Parameter(required = false, names = { - "--num-client-threads" }, arity = 1, description = "Number of threads to use to send requests from.") + @Parameter( + required = false, + names = { "--num-client-threads" }, + arity = 1, + description = "Number of threads to use to send requests from.") int numClientThreads = 0; // https://github.com/opensearch-project/opensearch-java/blob/main/java-client/src/main/java/org/opensearch/client/transport/httpclient5/ApacheHttpClient5TransportBuilder.java#L49-L54 - @Parameter(required = false, names = { - "--target-response-timeout" }, arity = 1, description = "Seconds to wait before timing out a replayed request to the target.") + @Parameter( + required = false, + names = { "--target-response-timeout" }, + arity = 1, + description = "Seconds to wait before timing out a replayed request to the target.") int targetServerResponseTimeoutSeconds = 30; - @Parameter(required = false, names = { - "--kafka-traffic-brokers" }, arity = 1, description = "Comma-separated list of host and port pairs that are the addresses of the Kafka brokers to bootstrap with i.e. 'kafka-1:9092,kafka-2:9092'") + @Parameter( + required = false, + names = { "--kafka-traffic-brokers" }, + arity = 1, + description = "Comma-separated list of host and port pairs that are the addresses of the Kafka brokers " + + "to bootstrap with i.e. 'kafka-1:9092,kafka-2:9092'") String kafkaTrafficBrokers; - @Parameter(required = false, names = { - "--kafka-traffic-topic" }, arity = 1, description = "Topic name used to pull messages from Kafka") + @Parameter( + required = false, + names = { "--kafka-traffic-topic" }, + arity = 1, + description = "Topic name used to pull messages from Kafka") String kafkaTrafficTopic; - @Parameter(required = false, names = { - "--kafka-traffic-group-id" }, arity = 1, description = "Consumer group id that is used when pulling messages from Kafka") + @Parameter( + required = false, + names = { "--kafka-traffic-group-id" }, + arity = 1, + description = "Consumer group id that is used when pulling messages from Kafka") String kafkaTrafficGroupId; - @Parameter(required = false, names = { - "--kafka-traffic-enable-msk-auth" }, arity = 0, description = "Enables SASL properties required for connecting to MSK with IAM auth") + @Parameter( + required = false, + names = { "--kafka-traffic-enable-msk-auth" }, + arity = 0, + description = "Enables SASL properties required for connecting to MSK with IAM auth") boolean kafkaTrafficEnableMSKAuth; - @Parameter(required = false, names = { - "--kafka-traffic-property-file" }, arity = 1, description = "File path for Kafka properties file to use for additional or overriden Kafka properties") + @Parameter( + required = false, + names = { "--kafka-traffic-property-file" }, + arity = 1, + description = "File path for Kafka properties file to use for additional or overriden Kafka properties") String kafkaTrafficPropertyFile; - @Parameter(required = false, names = { - "--otelCollectorEndpoint" }, arity = 1, description = "Endpoint (host:port) for the OpenTelemetry Collector to which metrics logs should be" + @Parameter( + required = false, + names = { "--otelCollectorEndpoint" }, + arity = 1, + description = "Endpoint (host:port) for the OpenTelemetry Collector to which metrics logs should be" + "forwarded. If no value is provided, metrics will not be forwarded.") String otelCollectorEndpoint; } @@ -196,12 +270,17 @@ private static Parameters parseArgs(String[] args) { } } + private static int isConfigured(String s) { + return (s == null || s.isBlank()) ? 0 : 1; + } + private static String getTransformerConfig(Parameters params) { - if (params.transformerConfigFile != null - && !params.transformerConfigFile.isBlank() - && params.transformerConfig != null - && !params.transformerConfig.isBlank()) { - System.err.println("Specify either --transformer-config or --transformer-config-file, not both."); + var configuredCount = isConfigured(params.transformerConfigFile) + + isConfigured(params.transformerConfigEncoded) + + isConfigured(params.transformerConfig); + if (configuredCount > 1) { + System.err.println("Specify only one of --transformer-config-base64, --transformer-config or " + + "--transformer-config-file."); System.exit(4); } @@ -218,6 +297,10 @@ private static String getTransformerConfig(Parameters params) { return params.transformerConfig; } + if (params.transformerConfigEncoded != null && !params.transformerConfigEncoded.isBlank()) { + return new String(Base64.getDecoder().decode(params.transformerConfigEncoded)); + } + return null; } diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/TrafficReplayerCore.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/TrafficReplayerCore.java index f891aa703..42788b4a8 100644 --- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/TrafficReplayerCore.java +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/TrafficReplayerCore.java @@ -111,7 +111,9 @@ public Consumer onRequestReceived( ); finishedAccumulatingResponseFuture.future.whenComplete( (v, t) -> log.atDebug() - .setMessage(() -> "Done receiving captured stream for " + ctx + ":" + v.requestData) + .setMessage(() -> "Done receiving captured stream for {}:{}") + .addArgument(ctx) + .addArgument(v.requestData) .log() ); diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/JsonAccumulator.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/JsonAccumulator.java index 34672dfa5..f78c0b555 100644 --- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/JsonAccumulator.java +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/JsonAccumulator.java @@ -12,6 +12,7 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.async.ByteBufferFeeder; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; /** @@ -30,28 +31,42 @@ public class JsonAccumulator { * Name in the stack. */ private final Deque jsonObjectStack; + private final ByteBufferFeeder feeder; + @Getter + private long totalBytesFullyConsumed; public JsonAccumulator() throws IOException { jsonObjectStack = new ArrayDeque<>(); JsonFactory factory = new JsonFactory(); parser = factory.createNonBlockingByteBufferParser(); + feeder = (ByteBufferFeeder) parser.getNonBlockingInputFeeder(); } protected Map createMap() { return new LinkedHashMap<>(); } + public boolean hasPartialValues() { + return !jsonObjectStack.isEmpty(); + } + /** * Returns the top-level object once it has been fully constructed or null if more input is still required. * @param byteBuffer * @return * @throws IOException */ - public Object consumeByteBuffer(ByteBuffer byteBuffer) throws IOException { - ByteBufferFeeder feeder = (ByteBufferFeeder) parser.getNonBlockingInputFeeder(); - log.trace("Consuming bytes: " + byteBuffer.toString()); - feeder.feedInput(byteBuffer); + public Object consumeByteBufferForSingleObject(ByteBuffer byteBuffer) throws IOException { + consumeByteBuffer(byteBuffer); + return getNextTopLevelObject(); + } + public void consumeByteBuffer(ByteBuffer byteBuffer) throws IOException { + log.atTrace().setMessage(() -> "Consuming bytes: {}").addArgument(() -> byteBuffer.toString()).log(); + feeder.feedInput(byteBuffer); + } + + public Object getNextTopLevelObject() throws IOException { while (!parser.isClosed()) { var token = parser.nextToken(); if (token == null) { @@ -59,7 +74,7 @@ public Object consumeByteBuffer(ByteBuffer byteBuffer) throws IOException { break; } - log.trace(this + " ... adding token=" + token); + log.atTrace().setMessage(() -> "{} ... adding token={}").addArgument(this).addArgument(token).log(); switch (token) { case FIELD_NAME: jsonObjectStack.push(parser.getText()); @@ -71,6 +86,7 @@ public Object consumeByteBuffer(ByteBuffer byteBuffer) throws IOException { var array = ((ArrayList) jsonObjectStack.pop()).toArray(); pushCompletedValue(array); if (jsonObjectStack.isEmpty()) { + totalBytesFullyConsumed = parser.currentLocation().getByteOffset(); return array; } break; @@ -81,6 +97,7 @@ public Object consumeByteBuffer(ByteBuffer byteBuffer) throws IOException { case END_OBJECT: { var popped = jsonObjectStack.pop(); if (jsonObjectStack.isEmpty()) { + totalBytesFullyConsumed = parser.currentLocation().getByteOffset(); return popped; } else { pushCompletedValue(popped); diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/NettyPacketToHttpConsumer.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/NettyPacketToHttpConsumer.java index 4c1398852..4527d1f5d 100644 --- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/NettyPacketToHttpConsumer.java +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/NettyPacketToHttpConsumer.java @@ -336,28 +336,35 @@ private static void addLoggingHandlerLast(ChannelPipeline pipeline, String name) } private void deactivateChannel() { - var pipeline = channel.pipeline(); - log.atDebug() - .setMessage(() -> "Resetting the pipeline for channel " + channel + "currently at: " + pipeline) - .log(); - for (var handlerName : new String[] { WRITE_COUNT_WATCHER_HANDLER_NAME, READ_COUNT_WATCHER_HANDLER_NAME }) { - try { - pipeline.remove(handlerName); - } catch (NoSuchElementException e) { - log.atWarn() - .setMessage(() -> "Ignoring an exception that the " + handlerName + " wasn't present") - .log(); + try { + var pipeline = channel.pipeline(); + log.atDebug() + .setMessage(() -> "Resetting the pipeline for channel {} currently at: {}") + .addArgument(channel) + .addArgument(pipeline) + .log(); + for (var handlerName : new String[] { WRITE_COUNT_WATCHER_HANDLER_NAME, READ_COUNT_WATCHER_HANDLER_NAME }) { + try { + pipeline.remove(handlerName); + } catch (NoSuchElementException e) { + log.atWarn() + .setMessage(() -> "Ignoring an exception that the " + handlerName + " wasn't present") + .log(); + } } - } - while (true) { - var lastHandler = pipeline.last(); - if (lastHandler instanceof SslHandler || lastHandler instanceof ConnectionClosedListenerHandler) { - break; + while (true) { + var lastHandler = pipeline.last(); + if (lastHandler instanceof SslHandler || lastHandler instanceof ConnectionClosedListenerHandler) { + break; + } + pipeline.removeLast(); } - pipeline.removeLast(); + channel.config().setAutoRead(false); + log.atDebug().setMessage(() -> "Reset the pipeline for channel " + channel + " back to: " + pipeline).log(); + } finally { + getCurrentRequestSpan().close(); + getParentContext().close(); } - channel.config().setAutoRead(false); - log.atDebug().setMessage(() -> "Reset the pipeline for channel " + channel + " back to: " + pipeline).log(); } @Override diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/PayloadAccessFaultingMap.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/PayloadAccessFaultingMap.java index ee0dd0e3c..763cbd868 100644 --- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/PayloadAccessFaultingMap.java +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/PayloadAccessFaultingMap.java @@ -3,14 +3,17 @@ import java.util.AbstractMap; import java.util.AbstractSet; import java.util.Iterator; -import java.util.NoSuchElementException; +import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.TreeMap; import org.opensearch.migrations.replay.datahandlers.http.StrictCaseInsensitiveHttpHeadersMap; -import org.opensearch.migrations.transform.JsonKeysForHttpMessage; import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NonNull; +import lombok.Setter; import lombok.extern.slf4j.Slf4j; /** @@ -25,85 +28,59 @@ public class PayloadAccessFaultingMap extends AbstractMap { private final boolean isJson; - private Object onlyValue; + TreeMap underlyingMap; + @Getter + @Setter + private boolean disableThrowingPayloadNotLoaded; public PayloadAccessFaultingMap(StrictCaseInsensitiveHttpHeadersMap headers) { + underlyingMap = new TreeMap<>(); isJson = Optional.ofNullable(headers.get("content-type")) .map(list -> list.stream().anyMatch(s -> s.startsWith("application/json"))) .orElse(false); } @Override - public Object get(Object key) { - if (!JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY.equals(key) || !isJson) { - return null; - } - if (onlyValue == null) { - throw PayloadNotLoadedException.getInstance(); - } else { - return onlyValue; - } - } - - @Override - public Set> entrySet() { - if (onlyValue != null) { - return Set.of(new SimpleEntry<>(JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY, onlyValue)); - } else { - return new AbstractSet>() { + @NonNull + public Set> entrySet() { + if (underlyingMap.isEmpty() && !disableThrowingPayloadNotLoaded) { + return new AbstractSet<>() { @Override - public Iterator> iterator() { + @NonNull + public Iterator> iterator() { return new Iterator<>() { - private int count; - @Override public boolean hasNext() { - return count == 0 && isJson; + throw PayloadNotLoadedException.getInstance(); } @Override - public Entry next() { - if (isJson && count == 0) { - ++count; - if (onlyValue != null) { - return new SimpleEntry<>( - JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY, - onlyValue - ); - } else { - throw PayloadNotLoadedException.getInstance(); - } - } else { - throw new NoSuchElementException(); - } + public Map.Entry next() { + throw PayloadNotLoadedException.getInstance(); } }; } @Override public int size() { - return isJson ? 1 : 0; + throw PayloadNotLoadedException.getInstance(); } }; + } else { + return underlyingMap.entrySet(); } } - @Override public Object put(String key, Object value) { - if (!JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY.equals(key)) { - return null; - } - Object old = onlyValue; - onlyValue = value; - return old; + return underlyingMap.put(key, value); } @Override - public String toString() { - final StringBuilder sb = new StringBuilder("PayloadFaultMap{"); - sb.append("isJson=").append(isJson); - sb.append(", onlyValue=").append(onlyValue); - sb.append('}'); - return sb.toString(); + public Object get(Object key) { + var value = super.get(key); + if (value == null && !disableThrowingPayloadNotLoaded) { + throw PayloadNotLoadedException.getInstance(); + } + return value; } } diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonTransformingConsumer.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonTransformingConsumer.java index e346c3cd0..81424e89a 100644 --- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonTransformingConsumer.java +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonTransformingConsumer.java @@ -20,7 +20,6 @@ import io.netty.channel.embedded.EmbeddedChannel; import io.netty.handler.codec.http.HttpRequestDecoder; import lombok.extern.slf4j.Slf4j; -import org.slf4j.event.Level; /** * This class implements a packet consuming interface by using an EmbeddedChannel to write individual @@ -48,6 +47,7 @@ public class HttpJsonTransformingConsumer implements IPacketFinalizingConsume private final RequestPipelineOrchestrator pipelineOrchestrator; private final EmbeddedChannel channel; private IReplayContexts.IRequestTransformationContext transformationContext; + private Exception lastConsumeException; /** * Roughly try to keep track of how big each data chunk was that came into the transformer. These values @@ -83,10 +83,8 @@ public HttpJsonTransformingConsumer( private NettySendByteBufsToPacketHandlerHandler getOffloadingHandler() { return Optional.ofNullable(channel) - .map( - c -> (NettySendByteBufsToPacketHandlerHandler) c.pipeline() - .get(RequestPipelineOrchestrator.OFFLOADING_HANDLER_NAME) - ) + .map(c -> (NettySendByteBufsToPacketHandlerHandler) + c.pipeline().get(RequestPipelineOrchestrator.OFFLOADING_HANDLER_NAME)) .orElse(null); } @@ -113,21 +111,26 @@ public TrackedFuture consumeBytes(ByteBuf nextRequestPacket) { .map( cf -> cf.thenAccept(x -> channel.writeInbound(nextRequestPacket)), () -> "HttpJsonTransformingConsumer sending bytes to its EmbeddedChannel" - ); + ) + .whenComplete((v,t) -> { + if (t instanceof Exception) { this.lastConsumeException = (Exception) t; } + }, () -> ""); } public TrackedFuture> finalizeRequest() { var offloadingHandler = getOffloadingHandler(); try { channel.checkException(); + if (lastConsumeException != null) { + throw lastConsumeException; + } if (getHttpRequestDecoderHandler() == null) { // LastHttpContent won't be sent channel.writeInbound(new EndOfInput()); // so send our own version of 'EOF' } } catch (Exception e) { this.transformationContext.addCaughtException(e); - log.atLevel( - e instanceof NettyJsonBodyAccumulateHandler.IncompleteJsonBodyException ? Level.DEBUG : Level.WARN - ).setMessage("Caught IncompleteJsonBodyException when sending the end of content").setCause(e).log(); + log.atWarn().setCause(e) + .setMessage("Caught IncompleteJsonBodyException when sending the end of content").log(); return redriveWithoutTransformation(pipelineOrchestrator.packetReceiver, e); } finally { channel.finishAndReleaseAll(); @@ -181,7 +184,11 @@ private TrackedFuture> redriveWithoutTrans r -> new TransformedOutputAndResult<>(r, makeStatusForRedrive(reason)), () -> "redrive final packaging" ).whenComplete((v, t) -> { - transformationContext.onTransformSkip(); + if (t != null || (v != null && v.transformationStatus.isError())) { + transformationContext.onTransformFailure(); + } else { + transformationContext.onTransformSkip(); + } transformationContext.close(); }, () -> "HttpJsonTransformingConsumer.redriveWithoutTransformation().map()"); } diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodyAccumulateHandler.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodyAccumulateHandler.java index 7998717a5..395ca84b0 100644 --- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodyAccumulateHandler.java +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodyAccumulateHandler.java @@ -1,14 +1,23 @@ package org.opensearch.migrations.replay.datahandlers.http; +import java.util.ArrayList; +import java.util.List; + +import com.fasterxml.jackson.core.JacksonException; + import org.opensearch.migrations.replay.datahandlers.JsonAccumulator; import org.opensearch.migrations.replay.tracing.IReplayContexts; import org.opensearch.migrations.transform.JsonKeysForHttpMessage; +import io.netty.buffer.CompositeByteBuf; +import io.netty.buffer.Unpooled; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelInboundHandlerAdapter; import io.netty.handler.codec.http.HttpContent; import io.netty.handler.codec.http.LastHttpContent; +import io.netty.util.ReferenceCountUtil; import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; /** * This accumulates HttpContent messages through a JsonAccumulator and eventually fires off a @@ -18,19 +27,46 @@ * This handler currently has undefined behavior if multiple json objects are within the stream of * HttpContent messages. This will also NOT fire a */ +@Slf4j public class NettyJsonBodyAccumulateHandler extends ChannelInboundHandlerAdapter { private final IReplayContexts.IRequestTransformationContext context; - public static class IncompleteJsonBodyException extends NoContentException {} - JsonAccumulator jsonAccumulator; HttpJsonMessageWithFaultingPayload capturedHttpJsonMessage; + List parsedJsonObjects; + CompositeByteBuf accumulatedBody; + boolean jsonWasInvalid; @SneakyThrows public NettyJsonBodyAccumulateHandler(IReplayContexts.IRequestTransformationContext context) { this.context = context; this.jsonAccumulator = new JsonAccumulator(); + this.parsedJsonObjects = new ArrayList<>(); + } + + @Override + public void handlerAdded(ChannelHandlerContext ctx) throws Exception { + // use 1024 (as opposed to the default of 16) because we really don't ever want the hit of a consolidation. + // For this buffer to continue to be used, we are far-off the happy-path. + // Consolidating will likely burn more cycles + // + // Use Unpooled rather than the context allocator (`ctx.alloc()`) because this is the buffer that will + // be passed into a transformation if there are bytes that aren't json/ndjson formatted. + // A transformation may attempt to do manipulations or replacements of this raw ByteBuf. It may also + // throw an exception. In the interest of keeping that contract as simple as possible, just use an + // Unpooled object so that the GC can take care of this when it needs to and we won't impact the rest of + // the system. Lastly, this handler is parsing JSON - one more alloc on the GC isn't going to be + // noticeable in many cases! + accumulatedBody = Unpooled.compositeBuffer(1024); + super.handlerAdded(ctx); + } + + @Override + public void handlerRemoved(ChannelHandlerContext ctx) throws Exception { + ReferenceCountUtil.release(accumulatedBody); + accumulatedBody = null; + super.handlerRemoved(ctx); } @Override @@ -38,14 +74,54 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception if (msg instanceof HttpJsonMessageWithFaultingPayload) { capturedHttpJsonMessage = (HttpJsonMessageWithFaultingPayload) msg; } else if (msg instanceof HttpContent) { - var jsonObject = jsonAccumulator.consumeByteBuffer(((HttpContent) msg).content().nioBuffer()); - if (jsonObject != null) { - capturedHttpJsonMessage.payload() - .put(JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY, jsonObject); - context.onJsonPayloadParseSucceeded(); + var contentBuf = ((HttpContent) msg).content(); + accumulatedBody.addComponent(true, contentBuf.retainedDuplicate()); + try { + if (!jsonWasInvalid) { + var nioBuf = contentBuf.nioBuffer(); + jsonAccumulator.consumeByteBuffer(nioBuf); + Object nextObj; + while ((nextObj = jsonAccumulator.getNextTopLevelObject()) != null) { + parsedJsonObjects.add(nextObj); + } + } + } catch (JacksonException e) { + log.atInfo().setCause(e).setMessage(() -> "Error parsing json body. " + + "Will pass all payload bytes directly as a ByteBuf within the payload map").log(); + jsonWasInvalid = true; + parsedJsonObjects.clear(); + } + if (msg instanceof LastHttpContent) { + if (!parsedJsonObjects.isEmpty()) { + var payload = capturedHttpJsonMessage.payload(); + if (parsedJsonObjects.size() > 1) { + payload.put(JsonKeysForHttpMessage.INLINED_NDJSON_BODIES_DOCUMENT_KEY, parsedJsonObjects); + } else { + payload.put(JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY, parsedJsonObjects.get(0)); + } + if (!jsonAccumulator.hasPartialValues()) { + context.onJsonPayloadParseSucceeded(); + } + } + if (jsonAccumulator.hasPartialValues() || parsedJsonObjects.isEmpty()) { + if (jsonAccumulator.getTotalBytesFullyConsumed() > Integer.MAX_VALUE) { + throw new IndexOutOfBoundsException("JSON contents were too large " + + jsonAccumulator.getTotalBytesFullyConsumed() + " for a single composite ByteBuf"); + } + // skip the contents that were already parsed and included in the payload as parsed json + // and pass the remaining stream + var jsonBodyByteLength = jsonWasInvalid ? 0 : (int) jsonAccumulator.getTotalBytesFullyConsumed(); + assert accumulatedBody.readerIndex() == 0 : + "Didn't expect the reader index to advance since this is an internal object"; + capturedHttpJsonMessage.payload() + .put(JsonKeysForHttpMessage.INLINED_BINARY_BODY_DOCUMENT_KEY, + accumulatedBody.retainedSlice(jsonBodyByteLength, + accumulatedBody.readableBytes() - jsonBodyByteLength)); + } else { + accumulatedBody.release(); + accumulatedBody = null; + } ctx.fireChannelRead(capturedHttpJsonMessage); - } else if (msg instanceof LastHttpContent) { - throw new IncompleteJsonBodyException(); } } else { super.channelRead(ctx, msg); diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodyConvertHandler.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodyConvertHandler.java index bad57f1da..6c41b9a56 100644 --- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodyConvertHandler.java +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodyConvertHandler.java @@ -1,10 +1,15 @@ package org.opensearch.migrations.replay.datahandlers.http; +import org.opensearch.migrations.replay.datahandlers.PayloadAccessFaultingMap; import org.opensearch.migrations.transform.IJsonTransformer; +import org.opensearch.migrations.transform.JsonKeysForHttpMessage; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelInboundHandlerAdapter; +import io.netty.util.ReferenceCountUtil; +import lombok.extern.slf4j.Slf4j; +@Slf4j public class NettyJsonBodyConvertHandler extends ChannelInboundHandlerAdapter { private final IJsonTransformer transformer; @@ -15,8 +20,20 @@ public NettyJsonBodyConvertHandler(IJsonTransformer transformer) { @Override public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception { if (msg instanceof HttpJsonMessageWithFaultingPayload) { - var output = transformer.transformJson((HttpJsonMessageWithFaultingPayload) msg); - var newHttpJson = new HttpJsonMessageWithFaultingPayload(output); + var httpMsg = (HttpJsonMessageWithFaultingPayload) msg; + if (httpMsg.payload() instanceof PayloadAccessFaultingMap) { + // no reason for transforms to fault if there wasn't a body in the message + ((PayloadAccessFaultingMap) httpMsg.payload()).setDisableThrowingPayloadNotLoaded(true); + } + HttpJsonMessageWithFaultingPayload newHttpJson; + try { + var output = transformer.transformJson(httpMsg); + newHttpJson = new HttpJsonMessageWithFaultingPayload(output); + } catch (Exception e) { + var remainingBytes = httpMsg.payload().get(JsonKeysForHttpMessage.INLINED_BINARY_BODY_DOCUMENT_KEY); + ReferenceCountUtil.release(remainingBytes); // release because we're not passing it along for cleanup + throw new TransformationException(e); + } ctx.fireChannelRead(newHttpJson); } else { super.channelRead(ctx, msg); diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodySerializeHandler.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodySerializeHandler.java index 6bf8df96c..9f9b5ed91 100644 --- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodySerializeHandler.java +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/NettyJsonBodySerializeHandler.java @@ -1,15 +1,18 @@ package org.opensearch.migrations.replay.datahandlers.http; import java.io.IOException; -import java.util.Map; +import java.util.List; import org.opensearch.migrations.replay.datahandlers.JsonEmitter; import org.opensearch.migrations.transform.JsonKeysForHttpMessage; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelInboundHandlerAdapter; import io.netty.handler.codec.http.DefaultHttpContent; import io.netty.handler.codec.http.LastHttpContent; +import io.netty.util.ReferenceCountUtil; import lombok.extern.slf4j.Slf4j; @Slf4j @@ -22,25 +25,58 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception var jsonMessage = (HttpJsonMessageWithFaultingPayload) msg; var payload = jsonMessage.payload(); jsonMessage.setPayloadFaultMap(null); - var payloadContents = (Map) payload.get( - JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY - ); ctx.fireChannelRead(msg); - if (payloadContents != null) { - serializePayload(ctx, payloadContents); + if (payload.containsKey(JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY)) { + serializePayload(ctx, payload.get(JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY)); + } else if (payload.containsKey(JsonKeysForHttpMessage.INLINED_NDJSON_BODIES_DOCUMENT_KEY)) { + serializePayloadList(ctx, + (List) payload.get(JsonKeysForHttpMessage.INLINED_NDJSON_BODIES_DOCUMENT_KEY), + !payload.containsKey(JsonKeysForHttpMessage.INLINED_BINARY_BODY_DOCUMENT_KEY)); } + if (payload.containsKey(JsonKeysForHttpMessage.INLINED_BINARY_BODY_DOCUMENT_KEY)) { + var rawBody = (ByteBuf) payload.get(JsonKeysForHttpMessage.INLINED_BINARY_BODY_DOCUMENT_KEY); + if (rawBody.readableBytes() > 0) { + ctx.fireChannelRead(new DefaultHttpContent(rawBody)); + } else { + ReferenceCountUtil.release(rawBody); + } + } + ctx.fireChannelRead(LastHttpContent.EMPTY_LAST_CONTENT); } else { super.channelRead(ctx, msg); } } - private void serializePayload(ChannelHandlerContext ctx, Map payload) throws IOException { + private static final ByteBuf NEWLINE = Unpooled.unreleasableBuffer(Unpooled.wrappedBuffer(new byte[]{'\n'})); + + private void serializePayloadList(ChannelHandlerContext ctx, List payloadList, boolean addLastNewline) + throws IOException + { + var it = payloadList.iterator(); + while (it.hasNext()) { + var payload = it.next(); + try (var jsonEmitter = new JsonEmitter(ctx.alloc())) { + var pac = jsonEmitter.getChunkAndContinuations(payload, NUM_BYTES_TO_ACCUMULATE_BEFORE_FIRING); + while (true) { + ctx.fireChannelRead(new DefaultHttpContent(pac.partialSerializedContents)); + if (pac.nextSupplier == null) { + break; + } + pac = pac.nextSupplier.get(); + } + if (addLastNewline || it.hasNext()) { + ctx.fireChannelRead(new DefaultHttpContent(NEWLINE.retainedDuplicate())); + } + } + } + } + + private void serializePayload(ChannelHandlerContext ctx, Object payload) throws IOException{ try (var jsonEmitter = new JsonEmitter(ctx.alloc())) { var pac = jsonEmitter.getChunkAndContinuations(payload, NUM_BYTES_TO_ACCUMULATE_BEFORE_FIRING); while (true) { ctx.fireChannelRead(new DefaultHttpContent(pac.partialSerializedContents)); if (pac.nextSupplier == null) { - ctx.fireChannelRead(LastHttpContent.EMPTY_LAST_CONTENT); break; } pac = pac.nextSupplier.get(); diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/TransformationException.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/TransformationException.java new file mode 100644 index 000000000..4ce7c91ac --- /dev/null +++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/TransformationException.java @@ -0,0 +1,11 @@ +package org.opensearch.migrations.replay.datahandlers.http; + +public class TransformationException extends RuntimeException { + public TransformationException(Throwable cause) { + super(cause); + } + + public TransformationException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/PruferTreeGeneratorTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/PruferTreeGeneratorTest.java index aeedb6625..65ed5325e 100644 --- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/PruferTreeGeneratorTest.java +++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/PruferTreeGeneratorTest.java @@ -12,6 +12,7 @@ import org.opensearch.migrations.testutils.WrapWithNettyLeakDetection; + @WrapWithNettyLeakDetection(disableLeakChecks = true) public class PruferTreeGeneratorTest { diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/HeaderTransformerTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/HeaderTransformerTest.java index 074633cfb..fdd69645f 100644 --- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/HeaderTransformerTest.java +++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/HeaderTransformerTest.java @@ -23,7 +23,7 @@ import lombok.extern.slf4j.Slf4j; @Slf4j -@WrapWithNettyLeakDetection +@WrapWithNettyLeakDetection(repetitions = 2) public class HeaderTransformerTest extends InstrumentationTest { private static final String SILLY_TARGET_CLUSTER_NAME = "remoteguest"; @@ -128,54 +128,4 @@ public void testMalformedPayloadIsPassedThrough() throws Exception { + "authorization: Basic YWRtaW46YWRtaW4=\r\n" ); } - - /** - * Fixing this one will involve some thought. Where should we unwind to? I would say probably all - * the way back to the HttpTransformer. - * @throws Exception - */ - @Test - public void testMalformedPayload_andTypeMappingUri_IsPassedThrough() throws Exception { - var referenceStringBuilder = new StringBuilder(); - // mock object. values don't matter at all - not what we're testing - final var dummyAggregatedResponse = new AggregatedRawResponse(null, 12, Duration.ZERO, List.of(), null); - var testPacketCapture = new TestCapturePacketToHttpHandler(Duration.ofMillis(100), dummyAggregatedResponse); - - var transformingHandler = new HttpJsonTransformingConsumer<>( - new TransformationLoader().getTransformerFactoryLoader( - SILLY_TARGET_CLUSTER_NAME, - null, - "[{\"JsonTransformerForOpenSearch23PlusTargetTransformerProvider\":\"\"}]" - ), - null, - testPacketCapture, - rootContext.getTestConnectionRequestContext(0) - ); - - Random r = new Random(2); - var stringParts = IntStream.range(0, 1) - .mapToObj(i -> TestUtils.makeRandomString(r, 10)) - .map(o -> (String) o) - .collect(Collectors.toList()); - - TrackedFuture allConsumesFuture = TestUtils.chainedDualWriteHeaderAndPayloadParts( - transformingHandler, - stringParts, - referenceStringBuilder, - contentLength -> "PUT /foo HTTP/1.1\r\n" - + "HoSt: " - + SOURCE_CLUSTER_NAME - + "\r\n" - + "content-type: application/json\r\n" - + "content-length: " - + contentLength - + "\r\n" - ); - - var finalizationFuture = allConsumesFuture.thenCompose( - v -> transformingHandler.finalizeRequest(), - () -> "HeaderTransformTest.testMalformedPayload_andTypeMappingUri_IsPassedThrough" - ); - Assertions.assertThrows(Exception.class, () -> finalizationFuture.get()); - } } diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/RequestSenderOrchestratorTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/RequestSenderOrchestratorTest.java index 3819dec65..ad82de81e 100644 --- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/RequestSenderOrchestratorTest.java +++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/RequestSenderOrchestratorTest.java @@ -258,8 +258,8 @@ public void testThatSchedulingWorks() throws Exception { var cf = scheduledItems.get(i); var arr = cf.get(); Assertions.assertNull(arr.error); - Assertions.assertTrue(arr.responseSizeInBytes > 0); - var packetBytesArr = arr.responsePackets.stream() + Assertions.assertTrue(arr.sizeInBytes > 0); + var packetBytesArr = arr.packets.stream() .map(SimpleEntry::getValue) .collect(Collectors.toList()); try ( diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/JsonAccumulatorTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/JsonAccumulatorTest.java index 37d157f15..6d08b0364 100644 --- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/JsonAccumulatorTest.java +++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/JsonAccumulatorTest.java @@ -32,7 +32,7 @@ static Object readJson(byte[] testFileBytes, int chunkBound) throws IOException var chunkSize = Math.min(r.nextInt(chunkBound), chunkByteBuffer.remaining()); chunkByteBuffer.limit(chunkSize + i); i += chunkSize; - var completedObject = jsonParser.consumeByteBuffer(chunkByteBuffer); + var completedObject = jsonParser.consumeByteBufferForSingleObject(chunkByteBuffer); if (completedObject != null) { Assertions.assertEquals(testFileBytes.length, i); return completedObject; diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/NettyPacketToHttpConsumerTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/NettyPacketToHttpConsumerTest.java index 6f1900268..c13950705 100644 --- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/NettyPacketToHttpConsumerTest.java +++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/NettyPacketToHttpConsumerTest.java @@ -260,7 +260,7 @@ private void testPeerResets( var br = new BufferedReader(isr) ) { Assertions.assertEquals("", Optional.ofNullable(br.readLine()).orElse("")); - Assertions.assertEquals(0, result.getResponseSizeInBytes()); + Assertions.assertEquals(0, result.getSizeInBytes()); } if (withServerReadTimeout) { log.trace( diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonTransformingConsumerTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonTransformingConsumerTest.java index 3774f64e7..ea1fcb28f 100644 --- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonTransformingConsumerTest.java +++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonTransformingConsumerTest.java @@ -8,6 +8,8 @@ import java.util.List; import java.util.Map; import java.util.Random; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import java.util.stream.Stream; import org.junit.jupiter.api.Assertions; @@ -19,17 +21,32 @@ import org.opensearch.migrations.replay.AggregatedRawResponse; import org.opensearch.migrations.replay.TestCapturePacketToHttpHandler; +import org.opensearch.migrations.replay.TestUtils; import org.opensearch.migrations.replay.TransformationLoader; import org.opensearch.migrations.replay.datatypes.HttpRequestTransformationStatus; +import org.opensearch.migrations.replay.util.TrackedFuture; import org.opensearch.migrations.testutils.WrapWithNettyLeakDetection; import org.opensearch.migrations.tracing.InstrumentationTest; import org.opensearch.migrations.transform.IJsonTransformer; import org.opensearch.migrations.transform.JsonCompositeTransformer; +import org.opensearch.migrations.transform.JsonKeysForHttpMessage; import org.opensearch.migrations.transform.RemovingAuthTransformerFactory; +import io.netty.buffer.ByteBuf; + @WrapWithNettyLeakDetection class HttpJsonTransformingConsumerTest extends InstrumentationTest { + private final static String NDJSON_TEST_REQUEST = ( + "POST /test HTTP/1.1\r\n" + + "Host: foo.example\r\n" + + "Content-Type: application/json\r\n" + + "Content-Length: 97\r\n" + + "\r\n" + + "{\"index\":{\"_index\":\"test\",\"_id\":\"2\"}}\n" + + "{\"field1\":\"value1\"}\n" + + "{\"delete\":{\"_index\":\"test\",\"_id\":\"1\"}}\n"); + private static Stream provideTestParameters() { Integer[] attemptedChunks = { 1, 2, 4, 8, 100, 1000, Integer.MAX_VALUE }; Boolean[] transformationOptions = { true, false }; @@ -135,12 +152,17 @@ public void testRemoveAuthHeadersWorks() throws Exception { } @Test - public void testPartialBodyThrowsAndIsRedriven() throws Exception { + public void testPartialBodyIsPassedThrough() throws Exception { final var dummyAggregatedResponse = new AggregatedRawResponse(null, 17, Duration.ZERO, List.of(), null); var testPacketCapture = new TestCapturePacketToHttpHandler(Duration.ofMillis(100), dummyAggregatedResponse); var complexTransformer = new JsonCompositeTransformer(new IJsonTransformer() { @Override public Map transformJson(Map incomingJson) { + var payload = (Map) incomingJson.get("payload"); + Assertions.assertNull(payload.get(JsonKeysForHttpMessage.INLINED_NDJSON_BODIES_DOCUMENT_KEY)); + Assertions.assertNull(payload.get(JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY)); + ((Map) incomingJson.get("headers")) + .put("extraKey", "extraValue"); // just walk everything - that's enough to touch the payload and throw walkMaps(incomingJson); return incomingJson; @@ -172,13 +194,128 @@ private void walkMaps(Object o) { } transformingHandler.consumeBytes(testBytes); var returnedResponse = transformingHandler.finalizeRequest().get(); - Assertions.assertEquals(new String(testBytes, StandardCharsets.UTF_8), testPacketCapture.getCapturedAsString()); - Assertions.assertArrayEquals(testBytes, testPacketCapture.getBytesCaptured()); - Assertions.assertTrue(returnedResponse.transformationStatus.isError()); - Assertions.assertInstanceOf( - NettyJsonBodyAccumulateHandler.IncompleteJsonBodyException.class, - returnedResponse.transformationStatus.getException() + var expectedString = new String(testBytes, StandardCharsets.UTF_8) + .replace("\r\n\r\n","\r\nextraKey: extraValue\r\n\r\n"); + Assertions.assertEquals(expectedString, testPacketCapture.getCapturedAsString()); + Assertions.assertArrayEquals(expectedString.getBytes(StandardCharsets.UTF_8), + testPacketCapture.getBytesCaptured()); + Assertions.assertEquals(HttpRequestTransformationStatus.completed(), returnedResponse.transformationStatus); + Assertions.assertNull(returnedResponse.transformationStatus.getException()); + } + + @Test + public void testNewlineDelimitedJsonBodyIsHandled() throws Exception { + final var dummyAggregatedResponse = new AggregatedRawResponse(null, 19, Duration.ZERO, List.of(), null); + var testPacketCapture = new TestCapturePacketToHttpHandler(Duration.ofMillis(100), dummyAggregatedResponse); + var sizeCalculatingTransformer = new JsonCompositeTransformer(incomingJson -> { + var payload = (Map) incomingJson.get("payload"); + Assertions.assertNull(payload.get(JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY)); + Assertions.assertNull(payload.get(JsonKeysForHttpMessage.INLINED_BINARY_BODY_DOCUMENT_KEY)); + var list = (List) payload.get(JsonKeysForHttpMessage.INLINED_NDJSON_BODIES_DOCUMENT_KEY); + ((Map) incomingJson.get("headers")) + .put("listSize", ""+list.size()); + return incomingJson; + }); + var transformingHandler = new HttpJsonTransformingConsumer( + sizeCalculatingTransformer, + null, + testPacketCapture, + rootContext.getTestConnectionRequestContext(0) + ); + + transformingHandler.consumeBytes(NDJSON_TEST_REQUEST.getBytes(StandardCharsets.UTF_8)); + var returnedResponse = transformingHandler.finalizeRequest().get(); + var expectedString = NDJSON_TEST_REQUEST.replace("\r\n\r\n","\r\nlistSize: 3\r\n\r\n"); + Assertions.assertEquals(expectedString, testPacketCapture.getCapturedAsString()); + Assertions.assertEquals(HttpRequestTransformationStatus.completed(), returnedResponse.transformationStatus); + Assertions.assertNull(returnedResponse.transformationStatus.getException()); + } + + @Test + public void testPartialNewlineDelimitedJsonBodyIsHandled() throws Exception { + final var dummyAggregatedResponse = new AggregatedRawResponse(null, 19, Duration.ZERO, List.of(), null); + var testPacketCapture = new TestCapturePacketToHttpHandler(Duration.ofMillis(100), dummyAggregatedResponse); + var sizeCalculatingTransformer = new JsonCompositeTransformer(incomingJson -> { + var payload = (Map) incomingJson.get("payload"); + Assertions.assertNull(payload.get(JsonKeysForHttpMessage.INLINED_JSON_BODY_DOCUMENT_KEY)); + Assertions.assertNotNull(payload.get(JsonKeysForHttpMessage.INLINED_BINARY_BODY_DOCUMENT_KEY)); + var list = (List) payload.get(JsonKeysForHttpMessage.INLINED_NDJSON_BODIES_DOCUMENT_KEY); + var leftoverBytes = (ByteBuf) payload.get(JsonKeysForHttpMessage.INLINED_BINARY_BODY_DOCUMENT_KEY); + var headers = (Map) incomingJson.get("headers"); + headers.put("listSize", "" + list.size()); + headers.put("leftover", "" + leftoverBytes.readableBytes()); + return incomingJson; + }); + var transformingHandler = new HttpJsonTransformingConsumer( + sizeCalculatingTransformer, + null, + testPacketCapture, + rootContext.getTestConnectionRequestContext(0) + ); + + var testString = NDJSON_TEST_REQUEST + .replace("Content-Length: 97", "Content-Length: 87") + .substring(0, NDJSON_TEST_REQUEST.length()-10); + var testBytes = testString.getBytes(StandardCharsets.UTF_8); + transformingHandler.consumeBytes(testBytes); + var returnedResponse = transformingHandler.finalizeRequest().get(); + var expectedString = new String(testBytes, StandardCharsets.UTF_8) + .replace("\r\n\r\n","\r\nlistSize: 2\r\nleftover: 30\r\n\r\n"); + Assertions.assertEquals(expectedString, testPacketCapture.getCapturedAsString()); + Assertions.assertEquals(HttpRequestTransformationStatus.completed(), returnedResponse.transformationStatus); + Assertions.assertNull(returnedResponse.transformationStatus.getException()); + } + + @Test + public void testMalformedPayload_andThrowingTransformation_IsPassedThrough() throws Exception { + final String HOST_NAME = "foo.example"; + var referenceStringBuilder = new StringBuilder(); + // mock object. values don't matter at all - not what we're testing + final var dummyAggregatedResponse = new AggregatedRawResponse(null, 12, Duration.ZERO, List.of(), null); + var testPacketCapture = new TestCapturePacketToHttpHandler(Duration.ofMillis(100), dummyAggregatedResponse); + + var transformingHandler = new HttpJsonTransformingConsumer<>( + new TransformationLoader().getTransformerFactoryLoader( + HOST_NAME, + null, + "[{\"JsonTransformerForOpenSearch23PlusTargetTransformerProvider\":\"\"}]" + ), + null, + testPacketCapture, + rootContext.getTestConnectionRequestContext(0) + ); + + Random r = new Random(2); + var stringParts = IntStream.range(0, 1) + .mapToObj(i -> TestUtils.makeRandomString(r, 10)) + .map(o -> (String) o) + .collect(Collectors.toList()); + + TrackedFuture allConsumesFuture = TestUtils.chainedDualWriteHeaderAndPayloadParts( + transformingHandler, + stringParts, + referenceStringBuilder, + contentLength -> "PUT /foo HTTP/1.1\r\n" + + "HoSt: " + HOST_NAME + "\r\n" + + "content-type: application/json\r\n" + + "content-length: " + + contentLength + + "\r\n" + ); + + var finalizationFuture = allConsumesFuture.getDeferredFutureThroughHandle( + (v,t) -> transformingHandler.finalizeRequest(), + () -> "HeaderTransformTest.testMalformedPayload_andTypeMappingUri_IsPassedThrough" ); + var outputAndResult = finalizationFuture.get(); + Assertions.assertInstanceOf(TransformationException.class, + TrackedFuture.unwindPossibleCompletionException(outputAndResult.transformationStatus.getException()), + "It's acceptable for now that the OpenSearch upgrade transformation can't handle non-json " + + "content. If that Transform wants to handle this on its own, we'll need to use another transform " + + "configuration so that it throws and we can do this test."); + var combinedOutputBuf = outputAndResult.transformedOutput.getResponseAsByteBuf(); + Assertions.assertTrue(combinedOutputBuf.readableBytes() == 0); + combinedOutputBuf.release(); } public static List sliceRandomChunks(byte[] bytes, int numChunks) { diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/e2etests/KafkaRestartingTrafficReplayerTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/e2etests/KafkaRestartingTrafficReplayerTest.java index 77ad252b0..659f55feb 100644 --- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/e2etests/KafkaRestartingTrafficReplayerTest.java +++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/e2etests/KafkaRestartingTrafficReplayerTest.java @@ -30,6 +30,7 @@ import org.opensearch.migrations.replay.traffic.generator.ExhaustiveTrafficStreamGenerator; import org.opensearch.migrations.replay.traffic.source.ISimpleTrafficCaptureSource; import org.opensearch.migrations.replay.traffic.source.ITrafficStreamWithKey; +import org.opensearch.migrations.testutils.SharedDockerImageNames; import org.opensearch.migrations.testutils.SimpleNettyHttpServer; import org.opensearch.migrations.testutils.WrapWithNettyLeakDetection; import org.opensearch.migrations.tracing.InstrumentationTest; @@ -43,7 +44,6 @@ import org.testcontainers.containers.KafkaContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; -import org.testcontainers.utility.DockerImageName; @Slf4j @Testcontainers(disabledWithoutDocker = true) @@ -65,9 +65,7 @@ public class KafkaRestartingTrafficReplayerTest extends InstrumentationTest { @Container // see // https://docs.confluent.io/platform/current/installation/versions-interoperability.html#cp-and-apache-kafka-compatibility - private final KafkaContainer embeddedKafkaBroker = new KafkaContainer( - DockerImageName.parse("confluentinc/cp-kafka:7.5.0") - ); + private final KafkaContainer embeddedKafkaBroker = new KafkaContainer(SharedDockerImageNames.KAFKA); private static class CounterLimitedReceiverFactory implements Supplier> { AtomicInteger nextStopPointRef = new AtomicInteger(INITIAL_STOP_REPLAYER_REQUEST_COUNT); diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/http/retries/HttpRetryTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/http/retries/HttpRetryTest.java index feff8bc01..5a48e599b 100644 --- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/http/retries/HttpRetryTest.java +++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/http/retries/HttpRetryTest.java @@ -25,6 +25,7 @@ import org.opensearch.migrations.replay.datatypes.TransformedOutputAndResult; import org.opensearch.migrations.replay.util.TextTrackedFuture; import org.opensearch.migrations.replay.util.TrackedFuture; +import org.opensearch.migrations.testutils.SharedDockerImageNames; import org.opensearch.migrations.testutils.SimpleHttpResponse; import org.opensearch.migrations.testutils.SimpleHttpServer; import org.opensearch.migrations.testutils.ToxiProxyWrapper; @@ -45,9 +46,6 @@ @Slf4j @WrapWithNettyLeakDetection(repetitions = 1) public class HttpRetryTest { - - public static final String HTTPD_IMAGE = "httpd:alpine"; - private ByteBufList makeRequest() { return new ByteBufList(Unpooled.wrappedBuffer(TestHttpServerContext.getRequestStringForSimpleGet("/") .getBytes(StandardCharsets.UTF_8))); @@ -215,7 +213,7 @@ public void testMalformedResponseFailuresNeverGiveUp() throws Exception { var executor = Executors.newSingleThreadScheduledExecutor(new DefaultThreadFactory("HttpRetryTest")); try (var rootContext = TestContext.withAllTracking(); var network = Network.newNetwork(); - var server = new GenericContainer<>(HTTPD_IMAGE) + var server = new GenericContainer<>(SharedDockerImageNames.HTTPD) .withNetwork(network) .withNetworkAliases(SERVERNAME_ALIAS) .waitingFor(Wait.forHttp("/").forStatusCode(200)).withStartupTimeout(Duration.ofMinutes(5)); diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaCommitsWorkBetweenLongPollsTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaCommitsWorkBetweenLongPollsTest.java index 6c3f01de3..d6f03c8b8 100644 --- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaCommitsWorkBetweenLongPollsTest.java +++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaCommitsWorkBetweenLongPollsTest.java @@ -13,6 +13,7 @@ import org.opensearch.migrations.replay.traffic.source.BlockingTrafficSource; import org.opensearch.migrations.replay.traffic.source.ITrafficStreamWithKey; +import org.opensearch.migrations.testutils.SharedDockerImageNames; import org.opensearch.migrations.tracing.InstrumentationTest; import lombok.Lombok; @@ -21,7 +22,6 @@ import org.testcontainers.containers.KafkaContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; -import org.testcontainers.utility.DockerImageName; @Slf4j @Testcontainers(disabledWithoutDocker = true) @@ -33,9 +33,7 @@ public class KafkaCommitsWorkBetweenLongPollsTest extends InstrumentationTest { @Container // see // https://docs.confluent.io/platform/current/installation/versions-interoperability.html#cp-and-apache-kafka-compatibility - private final KafkaContainer embeddedKafkaBroker = new KafkaContainer( - DockerImageName.parse("confluentinc/cp-kafka:7.5.0") - ); + private final KafkaContainer embeddedKafkaBroker = new KafkaContainer(SharedDockerImageNames.KAFKA); @SneakyThrows private KafkaConsumer buildKafkaConsumer() { diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaKeepAliveTests.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaKeepAliveTests.java index 17ad53141..304b02d46 100644 --- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaKeepAliveTests.java +++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaKeepAliveTests.java @@ -18,6 +18,7 @@ import org.opensearch.migrations.replay.datatypes.ITrafficStreamKey; import org.opensearch.migrations.replay.traffic.source.BlockingTrafficSource; +import org.opensearch.migrations.testutils.SharedDockerImageNames; import org.opensearch.migrations.tracing.InstrumentationTest; import org.opensearch.migrations.tracing.TestContext; @@ -27,7 +28,6 @@ import org.testcontainers.containers.KafkaContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; -import org.testcontainers.utility.DockerImageName; @Slf4j @Testcontainers(disabledWithoutDocker = true) @@ -48,9 +48,7 @@ public class KafkaKeepAliveTests extends InstrumentationTest { @Container // see // https://docs.confluent.io/platform/current/installation/versions-interoperability.html#cp-and-apache-kafka-compatibility - private final KafkaContainer embeddedKafkaBroker = new KafkaContainer( - DockerImageName.parse("confluentinc/cp-kafka:7.5.0") - ); + private final KafkaContainer embeddedKafkaBroker = new KafkaContainer(SharedDockerImageNames.KAFKA); private KafkaTrafficCaptureSource kafkaSource; diff --git a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaTrafficCaptureSourceLongTermTest.java b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaTrafficCaptureSourceLongTermTest.java index 8aef4ed69..6c5647ab9 100644 --- a/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaTrafficCaptureSourceLongTermTest.java +++ b/TrafficCapture/trafficReplayer/src/test/java/org/opensearch/migrations/replay/kafka/KafkaTrafficCaptureSourceLongTermTest.java @@ -11,13 +11,13 @@ import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; +import org.opensearch.migrations.testutils.SharedDockerImageNames; import org.opensearch.migrations.tracing.InstrumentationTest; import lombok.extern.slf4j.Slf4j; import org.testcontainers.containers.KafkaContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; -import org.testcontainers.utility.DockerImageName; @Slf4j @Testcontainers(disabledWithoutDocker = true) @@ -31,9 +31,7 @@ public class KafkaTrafficCaptureSourceLongTermTest extends InstrumentationTest { @Container // see // https://docs.confluent.io/platform/current/installation/versions-interoperability.html#cp-and-apache-kafka-compatibility - private final KafkaContainer embeddedKafkaBroker = new KafkaContainer( - DockerImageName.parse("confluentinc/cp-kafka:7.5.0") - ); + private final KafkaContainer embeddedKafkaBroker = new KafkaContainer(SharedDockerImageNames.KAFKA); @Test @Tag("isolatedTest") diff --git a/TrafficCapture/transformationPlugins/jsonMessageTransformers/jsonJoltMessageTransformerProvider/src/test/java/org/opensearch/migrations/replay/MultipleJoltScriptsTest.java b/TrafficCapture/transformationPlugins/jsonMessageTransformers/jsonJoltMessageTransformerProvider/src/test/java/org/opensearch/migrations/replay/MultipleJoltScriptsTest.java index 6cf42927d..5477903c5 100644 --- a/TrafficCapture/transformationPlugins/jsonMessageTransformers/jsonJoltMessageTransformerProvider/src/test/java/org/opensearch/migrations/replay/MultipleJoltScriptsTest.java +++ b/TrafficCapture/transformationPlugins/jsonMessageTransformers/jsonJoltMessageTransformerProvider/src/test/java/org/opensearch/migrations/replay/MultipleJoltScriptsTest.java @@ -56,4 +56,90 @@ public void testAddGzipAndCustom() throws Exception { Assertions.assertEquals("newValue", headers.get("newHeader")); } + @Test + public void testExciseWhenPresent() throws Exception { + var script = + "[{ \"JsonJoltTransformerProvider\":\n" + + "[\n" + + " {\n" + + " \"script\": {\n" + + " \"operation\": \"shift\",\n" + + " \"spec\": {\n" + + " \"payload\": {\n" + + " \"inlinedJsonBody\": {\n" + + " \"top\": {\n" + + " \"tagToExcise\": {\n" + + " \"*\": \"payload.inlinedJsonBody.top.&\" \n" + + " },\n" + + " \"*\": \"payload.inlinedJsonBody.top.&\"\n" + + " },\n" + + " \"*\": \"payload.inlinedJsonBody.&\"\n" + + " },\n" + + " \"*\": \"payload.&\"\n" + + " },\n" + + " \"*\": \"&\"\n" + + " }\n" + + " }\n" + + " }, \n" + + " {\n" + + " \"script\": {\n" + + " \"operation\": \"modify-overwrite-beta\",\n" + + " \"spec\": {\n" + + " \"URI\": \"=split('/extraThingToRemove',@(1,&))\"\n" + + " }\n" + + " }\n" + + " },\n" + + " {\n" + + " \"script\": {\n" + + " \"operation\": \"modify-overwrite-beta\",\n" + + " \"spec\": {\n" + + " \"URI\": \"=join('',@(1,&))\"\n" + + " }\n" + + " }\n" + + " }\n" + + "]\n" + + "}]"; + + + var excisingTransformer = new TransformationLoader().getTransformerFactoryLoader( + "testhostname", + null, + script + ); + var origDocStr = "{\n" + + " \"method\": \"PUT\",\n" + + " \"protocol\": \"HTTP/1.0\",\n" + + " \"URI\": \"/oldStyleIndex/extraThingToRemove/moreStuff\",\n" + + " \"headers\": {\n" + + " \"host\": \"127.0.0.1\"\n" + + " },\n" + + " \"payload\": {\n" + + " \"inlinedJsonBody\": {\n" + + " \"top\": {\n" + + " \"tagToExcise\": {\n" + + " \"properties\": {\n" + + " \"field1\": {\n" + + " \"type\": \"text\"\n" + + " },\n" + + " \"field2\": {\n" + + " \"type\": \"keyword\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}"; + var expectedDocStr = "{\"method\":\"PUT\",\"protocol\":\"HTTP/1.0\",\"URI\":\"/oldStyleIndex/moreStuff\",\"headers\":{\"host\":\"testhostname\"},\"payload\":{\"inlinedJsonBody\":{\"top\":{\"properties\":{\"field1\":{\"type\":\"text\"},\"field2\":{\"type\":\"keyword\"}}}}}}"; + var origDoc = parseAsMap(origDocStr); + var newDoc = excisingTransformer.transformJson(origDoc); + var newAsStr = mapper.writeValueAsString(newDoc); + Assertions.assertEquals(expectedDocStr, newAsStr); + + var secondPassDoc = excisingTransformer.transformJson(newDoc); + var secondPassDocAsStr = mapper.writeValueAsString(secondPassDoc); + Assertions.assertEquals(expectedDocStr, secondPassDocAsStr); + + Assertions.assertEquals("testhostname", ((Map) newDoc.get(JsonKeysForHttpMessage.HEADERS_KEY)).get("host")); + } } diff --git a/TrafficCapture/transformationPlugins/jsonMessageTransformers/jsonMessageTransformerInterface/src/main/java/org/opensearch/migrations/transform/JsonKeysForHttpMessage.java b/TrafficCapture/transformationPlugins/jsonMessageTransformers/jsonMessageTransformerInterface/src/main/java/org/opensearch/migrations/transform/JsonKeysForHttpMessage.java index 8a88f7f92..f5ff837c2 100644 --- a/TrafficCapture/transformationPlugins/jsonMessageTransformers/jsonMessageTransformerInterface/src/main/java/org/opensearch/migrations/transform/JsonKeysForHttpMessage.java +++ b/TrafficCapture/transformationPlugins/jsonMessageTransformers/jsonMessageTransformerInterface/src/main/java/org/opensearch/migrations/transform/JsonKeysForHttpMessage.java @@ -16,4 +16,13 @@ private JsonKeysForHttpMessage() {} * the payload object will be an empty map. */ public static final String INLINED_JSON_BODY_DOCUMENT_KEY = "inlinedJsonBody"; + /** + * for the type application + */ + public static final String INLINED_NDJSON_BODIES_DOCUMENT_KEY = "inlinedJsonSequenceBodies"; + /** + * This maps to a ByteBuf that is owned by the caller. + * Any consumers should retain if they need to access it later. This may be UTF8, UTF16 encoded, or something else. + */ + public static final String INLINED_BINARY_BODY_DOCUMENT_KEY = "inlinedBinaryBody"; } diff --git a/TrafficCapture/transformationPlugins/jsonMessageTransformers/openSearch23PlusTargetTransformerProvider/src/test/java/org/opensearch/migrations/transform/TypeMappingsExcisionTest.java b/TrafficCapture/transformationPlugins/jsonMessageTransformers/openSearch23PlusTargetTransformerProvider/src/test/java/org/opensearch/migrations/transform/TypeMappingsExcisionTest.java index 5eb218337..4f9fd36e2 100644 --- a/TrafficCapture/transformationPlugins/jsonMessageTransformers/openSearch23PlusTargetTransformerProvider/src/test/java/org/opensearch/migrations/transform/TypeMappingsExcisionTest.java +++ b/TrafficCapture/transformationPlugins/jsonMessageTransformers/openSearch23PlusTargetTransformerProvider/src/test/java/org/opensearch/migrations/transform/TypeMappingsExcisionTest.java @@ -51,7 +51,7 @@ private static Map parseJsonFromResourceName(String resourceName var isr = new InputStreamReader(resourceStream, StandardCharsets.UTF_8) ) { var expectedBytes = CharStreams.toString(isr).getBytes(StandardCharsets.UTF_8); - return (Map) jsonAccumulator.consumeByteBuffer(ByteBuffer.wrap(expectedBytes)); + return (Map) jsonAccumulator.consumeByteBufferForSingleObject(ByteBuffer.wrap(expectedBytes)); } } diff --git a/deployment/cdk/opensearch-service-migration/README.md b/deployment/cdk/opensearch-service-migration/README.md index d65bfc437..1a1892bed 100644 --- a/deployment/cdk/opensearch-service-migration/README.md +++ b/deployment/cdk/opensearch-service-migration/README.md @@ -28,15 +28,9 @@ Java is used by the opensearch-migrations repo and Gradle, its associated build ``` More details can be found [here](../../../TrafficCapture/dockerSolution/README.md) -3- Fetch Migration Setup, in order to make use of Fetch Migration for historical data capture, a user should make any modifications necessary to the `dp_pipeline_template.yaml` file located in the same directory as this README before deploying. More information around the parameters used in the pipeline file can be found [here](https://opensearch.org/docs/latest/data-prepper/pipelines/pipelines/). +3- Configure the desired **[AWS credentials](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_prerequisites)**, as these will dictate the region and account used for deployment. -The existing pipeline template works for the `demo-deploy` stack without any further modifications. - -Further steps on starting Fetch Migration after deployment can be found [here](#kicking-off-fetch-migration) - -4- Configure the desired **[AWS credentials](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html#getting_started_prerequisites)**, as these will dictate the region and account used for deployment. - -5- There is a known issue where service linked roles fail to get applied when deploying certain AWS services for the first time in an account. This can be resolved by simply deploying again (for each failing role) or avoided entirely by creating the service linked role initially like seen below: +4- There is a known issue where service linked roles fail to get applied when deploying certain AWS services for the first time in an account. This can be resolved by simply deploying again (for each failing role) or avoided entirely by creating the service linked role initially like seen below: ```shell aws iam create-service-linked-role --aws-service-name opensearchservice.amazonaws.com; aws iam create-service-linked-role --aws-service-name ecs.amazonaws.com ``` @@ -60,10 +54,21 @@ cdk bootstrap --c contextId=demo-deploy Further CDK documentation [here](https://docs.aws.amazon.com/cdk/v2/guide/cli.html) ## Deploying the CDK + +### Which configuration options should I use? +Update the file named `cdk.context.json` in this directory to select migration options for metadata, historical backfill or traffic capture and replay, see details [here](https://github.com/opensearch-project/opensearch-migrations/wiki/Configuration-Options). + +### How is the CDK context used in this solution? This project uses CDK context parameters to configure deployments. These context values will dictate the composition of your stacks as well as which stacks get deployed. The full list of available configuration options for this project are listed [here](./options.md). Each option can be provided as an empty string `""` or simply not included, and in each of these 'empty' cases the option will use the project default value (if it exists) or CloudFormation's default value. +Depending on your use-case, you may choose to provide options from both the `cdk.context.json` and the CDK CLI, in which case it is important to know the precedence level for context values. The below order shows these levels with values being passed by the CDK CLI having the most importance +1. CDK CLI passed context values, e.g. --c stage=dev2 (highest precedence) +2. Created `cdk.context.json` in the same directory as this README +3. Existing `default-values.json` in the same directory as this README + +### Deploying the demo solution A set of demo context values (using the `demo-deploy` label) has been set in the `cdk.context.json` located in this directory, which can be customized or used as is for a quickstart demo solution. This demo solution can be deployed with the following command: @@ -75,7 +80,7 @@ Additionally, another context block in the `cdk.context.json` could be created w ```shell cdk deploy "*" --c contextId=uat-deploy --require-approval never --concurrency 3 ``` -**Note**: Separate deployments within the same account and region should use unique `stage` context values to avoid resource naming conflicts when deploying (**Except** in the multiple replay scenario stated [here](#how-to-run-multiple-traffic-replayer-scenarios)) +**Note**: Separate deployments within the same account and region should use unique `stage` context values to avoid resource naming conflicts when deploying (**Except** in the multiple replay scenario stated [here](#how-to-run-multiple-traffic-replayer-scenarios)) Stacks can also be redeployed individually, with any required stacks also being deployed initially, e.g. the following command would deploy the migration-console stack ```shell @@ -87,15 +92,14 @@ To get a list of all the available stack ids that can be deployed/redeployed for cdk ls --c contextId=demo-deploy ``` +## How to use the deployed Migration tools? +See the [wiki](https://github.com/opensearch-project/opensearch-migrations/wiki) for steps on how to use this tooling to perform different migrations. -Depending on your use-case, you may choose to provide options from both the `cdk.context.json` and the CDK CLI, in which case it is important to know the precedence level for context values. The below order shows these levels with values being passed by the CDK CLI having the most importance -1. CDK CLI passed context values (highest precedence) -2. Created `cdk.context.json` in the same directory as this README -3. Existing `default-values.json` in the same directory as this README +## Accessing the Migration Console -## Executing Commands on a Deployed Service +The Migration Console is a deployed ECS service container in this solution that should be accessed for managing/executing different phases of a migration -Once a service has been deployed, a command shell can be opened for that service's container. If the SSM Session Manager plugin is not installed, it should be installed when prompted from the below exec command. +To open a shell on the Migration Console container execute the below command. If the SSM Session Manager plugin is not installed, it should be installed when prompted from the below exec command. ```shell # ./accessContainer.sh ./accessContainer.sh migration-console dev us-east-1 @@ -117,177 +121,6 @@ To be able to execute this command the user will need to have their AWS credenti } ``` -## Starting the Traffic Replayer -When the Migration solution is deployed, the Traffic Replayer does not immediately begin replaying. This is designed to allow users time to do any historical backfill (e.g. Fetch Migration service) that is needed as well as setup the Capture Proxy on their source coordinating nodes. When the user is ready they can then run the following command from the Migration Console service and begin replaying the traffic that has been captured by the Capture Proxy - -```shell -aws ecs update-service --cluster migration--ecs-cluster --service migration--traffic-replayer-default --desired-count 1 -``` - -With this same command, a user could stop replaying capture traffic by removing the Traffic Replayer instance if they set `--desired-count 0` - -## Testing the deployed solution - -Once the solution is deployed, the easiest way to test the solution is to access the `migration-console` service container and run an opensearch-benchmark workload through to simulate incoming traffic, as the following steps illustrate - -```shell -# Exec into container -./accessContainer.sh migration-console dev us-east-1 - -# Run opensearch-benchmark workload (i.e. geonames, nyc_taxis, http_logs) -./runTestBenchmarks.sh -``` - -After the benchmark has been run, the indices and documents of the source and target clusters can be checked from the same migration-console container to confirm -```shell -# Check doc counts and indices for both source and target cluster -./catIndices.sh -``` - -## Importing Target Clusters -By default, if a `targetClusterEndpoint` option isn't provided, this CDK will create an OpenSearch Service Domain (using provided options) to be the target cluster of this solution. While setting up this Domain, the CDK will also configure a relevant security group and allows options to configure an access policy on the Domain (`accessPolicies` and `openAccessPolicyEnabled` options) such that the Domain is fully setup for use at deployment. - -In the case of an imported target cluster, there are normally some modifications that need to be made on the existing target cluster to allow proper functioning of this solution after deployment which the below subsections elaborate on. - -#### OpenSearch Service -For a Domain, there are typically two items that need to be configured to allow proper functioning of this solution -1. The Domain should have a security group that allows communication from the applicable Migration services (Traffic Replayer, Migration Console, Fetch Migration). This CDK will automatically create an `osClusterAccessSG` security group, which has already been applied to the Migration services, that a user should then add to their existing Domain to allow this access. -2. The access policy on the Domain should be an open access policy that allows all access or an access policy that at least allows the IAM task roles for the applicable Migration services (Traffic Replayer, Migration Console, Fetch Migration) - -#### OpenSearch Serverless -A Collection, will need to configure a Network and Data Access policy to allow proper functioning of this solution -1. The Collection should have a network policy that has a `VPC` access type by creating a VPC endpoint on the VPC used for this solution. This VPC endpoint should be configured for the private subnets of the VPC and attach the `osClusterAccessSG` security group. -2. The data access policy needed should grant permission to perform all [index operations](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless-data-access.html#serverless-data-supported-permissions) (`aoss:*`) for all indexes in the given collection, and use the task roles of the applicable Migration services (Traffic Replayer, Migration Console, Fetch Migration) as the principals for this data access policy. - -See [Configuring SigV4 Replayer Requests](#configuring-sigv4-replayer-requests) for details on enabling SigV4 requests from the Traffic Replayer to the target cluster - -## Configuring SigV4 Replayer Requests -With the [required setup](#importing-target-clusters) on the target cluster having been completed, a user can then use the `trafficReplayerExtraArgs` option to specify the Traffic Replayer service argument for enabling SigV4 authentication, which the below sections show. **Note**: As only one authorization header can be specified, the `trafficReplayerEnableClusterFGACAuth` option should not be used if enabling SigV4 authentication for the Traffic Replayer. See [here](#how-is-an-authorization-header-set-for-requests-from-the-replayer-to-the-target-cluster) for more details on how the Traffic Replayer sets its authorization header. -#### OpenSearch Service -```shell -# e.g. --sigv4-auth-header-service-region es,us-east-1 -"trafficReplayerExtraArgs": "--sigv4-auth-header-service-region es," -``` - -#### OpenSearch Serverless -```shell -# e.g. --sigv4-auth-header-service-region aoss,us-east-1 -"trafficReplayerExtraArgs": "--sigv4-auth-header-service-region aoss," -``` - -## Kicking off Fetch Migration - -* First, access the Migration Console container - -```shell -# ./accessContainer.sh migration-console STAGE REGION -./accessContainer.sh migration-console dev us-east-1 -``` - -* Execute the ECS run task command generated by `showFetchMigrationCommand.sh` script. - * The status of the ECS Task can be monitored from the AWS Console. Once the task is in the `Running` state, logs and progress can be viewed via CloudWatch. -```shell -# This will execute the script and print the required ECS run task command -./showFetchMigrationCommand.sh - -# Paste command output by the script into the terminal to kick off Fetch Migration -``` - -The pipeline configuration file can be viewed (and updated) via AWS Secrets Manager. -Please note that it will be base64 encoded. - -## Kicking off OpenSearch Ingestion Service - -**Note**: Using OpenSearch Ingestion Service is currently an experimental feature that must be enabled with the `migrationConsoleEnableOSI` option. Currently only Managed OpenSearch service as a source to Managed OpenSearch service as a target migrations are supported - -After enabling and deploying the CDK, log into the Migration Console -```shell -# ./accessContainer.sh migration-console STAGE REGION -./accessContainer.sh migration-console dev us-east-1 -``` -Make any modifications to the `osiPipelineTemplate.yaml` on the Migration Console, if needed. Note: Placeholder values exist in the file to automatically populate source/target endpoints and corresponding auth options by the python tool that uses this yaml file. - -The OpenSearch Ingestion pipeline can then be created by giving an existing source cluster endpoint and running the below command -```shell -./osiMigration.py create-pipeline-from-solution --source-endpoint= -``` - -When OpenSearch Ingestion pipelines are created they begin running immediately and can be stopped with the following command -```shell -./osiMigration.py stop-pipeline -``` -Or restarted with the following command -```shell -./osiMigration.py start-pipeline -``` - -## Kicking off Reindex from Snapshot (RFS) - -When the RFS service gets deployed, it does not start running immediately. Instead, the user controls when they want to kick off a historical data migration. - -The following command can be run from the Migration Console to initiate the RFS historical data migration -```shell -aws ecs update-service --cluster migration--ecs-cluster --service migration--reindex-from-snapshot --desired-count 1 -``` - -Currently, the RFS application will enter an idle state with the ECS container still running upon completion. This can be cleaned up by using the same command with `--desired-count 0` - - -## Monitoring Progress via Instrumentation - -The replayer and capture proxy (if started with the `--otelCollectorEndpoint` argument) emit metrics through an -otel-collector endpoint, which is deployed within Migrations Assistant tasks as a sidecar container. The -otel-collectors will publish metrics and traces to Amazon CloudWatch and AWS X-Ray. - -Some of these metrics will show simple progress, such as bytes or records transmitted. Other records can show higher -level information, such the number of responses with status codes that match vs those that don't. To observe those, -search for `statusCodesMatch` in the CloudWatch Console. That's emitted as an attribute along with the method and -the source/target status code (rounded down to the last hundred; i.e. a status code of 201 has a 200 attribute). - -Other metrics will show latencies, the number of requests, unique connections at a time and more. Low-level and -high-level metrics are being improved and added. For the latest information, see the -[README.md](../../../coreUtilities/README.md). - -Along with metrics, traces are emitted by the replayer and the proxy (when proxy is run with metrics enabled, e.g. by -launching with --otelCollectorEndpoint set to the otel-collector sidecar). Traces will include very granular data for -each connection, including how long the TCP connections are open, how long the source and target clusters took to send -a response, as well as other internal details that can explain the progress of each request. - -Notice that traces for the replayer will show connections and Kafka records open, in some cases, much longer than their -representative HTTP transactions. This is because records are considered 'active' to the replayer until they are -committed and records are only committed once _all_ previous records have also been committed. Details such as that -are defensive for when further diagnosis is necessary. - -## Configuring Capture Proxy IAM and Security Groups -Although this CDK does not set up the Capture Proxy on source cluster nodes (except in the case of the demo solution), the Capture Proxy instances do need to communicate with resources deployed by this CDK (e.g. Kafka) which this section covers - -#### Capture Proxy on OpenSearch/Elasticsearch nodes -Before [setting up Capture Proxy instances](../../../TrafficCapture/trafficCaptureProxyServer/README.md#how-to-attach-a-capture-proxy-on-a-coordinator-node) on the source cluster, the IAM policies and Security Groups for the nodes should allow access to the Migration tooling: -1. The coordinator nodes should add the `trafficStreamSourceSG` security group to allow access to Kafka -2. The IAM role used by the coordinator nodes should have permissions to publish captured traffic to Kafka. A template policy to use, can be seen below - * This can be added through the AWS Console (IAM Role -> Add permissions -> Create inline policy -> JSON view) -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Action": "kafka-cluster:Connect", - "Resource": "arn:aws:kafka:::cluster/migration-msk-cluster-/*", - "Effect": "Allow" - }, - { - "Action": [ - "kafka-cluster:CreateTopic", - "kafka-cluster:DescribeTopic", - "kafka-cluster:WriteData" - ], - "Resource": "arn:aws:kafka:::topic/migration-msk-cluster-/*", - "Effect": "Allow" - } - ] -} -``` - ## Tearing down CDK To remove all the CDK stack(s) which get created during a deployment we can execute a command similar to below ```shell @@ -301,7 +134,9 @@ cdk destroy migration-console --c contextId=demo-deploy ``` **Note**: The `demo-deploy`contextId has the retention policy for the OpenSearch Domain set to `DESTROY`, which will remove this resource and all its data when the stack is deleted. In order to retain the Domain on stack deletion the `domainRemovalPolicy` would need to be set to `RETAIN`. -## How to run multiple Traffic Replayer scenarios +## Appendix + +### How to run multiple Traffic Replayer scenarios The project supports running distinct Replayers in parallel, with each Replayer sending traffic to a different target cluster. This functionality allows users to test replaying captured traffic to multiple different target clusters in parallel. Users are able to provide the desired configuration options to spin up a new OpenSearch Domain and Traffic Replayer while using the existing Migration infrastructure that has already been deployed. To give an example of this process, a user could decide to configure an additional Replayer and Domain for the demo setup in the `cdk.context.json` by configuring a new context block like below. **Note**: `addOnMigrationDeployId` is a required field to allow proper naming of these additional resources. @@ -329,19 +164,16 @@ Finally, the additional infrastructure can be removed with: cdk destroy "*" --c contextId=demo-addon1 ``` -## Appendix ### How is an Authorization header set for requests from the Replayer to the target cluster? The Replayer documentation [here](../../../TrafficCapture/trafficReplayer/README.md#authorization-header-for-replayed-requests) explains the reasoning the Replayer uses to determine what auth header it should use when replaying requests to the target cluster. -As it relates to this CDK, the two main avenues for setting an explicit auth header for the Replayer are through the `trafficReplayerEnableClusterFGACAuth` and `trafficReplayerExtraArgs` options -1. The `trafficReplayerEnableClusterFGACAuth` option will utilize the `--auth-header-user-and-secret` parameter of the Replayer service to create a basic auth header with a username and AWS Secrets Manager secret value. This option requires that a Fine Grained Access Control (FGAC) user be configured (see `fineGrainedManagerUserName` and `fineGrainedManagerUserSecretManagerKeyARN` CDK context options [here](./options.md)) or is running in demo mode (see `enableDemoAdmin` CDK context option). -2. The `trafficReplayerExtraArgs` option allows a user to directly specify the Replayer parameter they want to use for setting the auth header. For example to enable SigV4 as the auth header for an OpenSearch service in us-east-1, a user could set this option to `--sigv4-auth-header-service-region es,us-east-1` +As it relates to this CDK, the `targetCluster` configuration option (specifically the `auth` element) that a user provides will dictate which auth the Migration tools will use for communicating with the target cluster ### Common Deployment Errors -**Problem**: +**Problem**: ``` ERROR: failed to solve: public.ecr.aws/sam/build-nodejs18.x: pulling from host public.ecr.aws failed with status code [manifests latest]: 403 Forbidden ``` diff --git a/deployment/cdk/opensearch-service-migration/default-values.json b/deployment/cdk/opensearch-service-migration/default-values.json index 3d15ff04a..9991f0354 100644 --- a/deployment/cdk/opensearch-service-migration/default-values.json +++ b/deployment/cdk/opensearch-service-migration/default-values.json @@ -1,7 +1,6 @@ { "engineVersion": "OS_2.9", "targetClusterVersion": "OS_2.9", - "domainName": "os-service-domain", "tlsSecurityPolicy": "TLS_1_2", "enforceHTTPS": true, "nodeToNodeEncryptionEnabled": true, diff --git a/deployment/cdk/opensearch-service-migration/lib/stack-composer.ts b/deployment/cdk/opensearch-service-migration/lib/stack-composer.ts index bb827a59e..a96b3b739 100644 --- a/deployment/cdk/opensearch-service-migration/lib/stack-composer.ts +++ b/deployment/cdk/opensearch-service-migration/lib/stack-composer.ts @@ -214,7 +214,7 @@ export class StackComposer { const sourceClusterDisabledField = this.getContextForType('sourceClusterDisabled', 'boolean', defaultValues, contextJSON) const sourceClusterEndpointField = this.getContextForType('sourceClusterEndpoint', 'string', defaultValues, contextJSON) let sourceClusterDefinition = this.getContextForType('sourceCluster', 'object', defaultValues, contextJSON) - + if (!sourceClusterDefinition && (sourceClusterEndpointField || sourceClusterDisabledField)) { console.warn("`sourceClusterDisabled` and `sourceClusterEndpoint` are being deprecated in favor of a `sourceCluster` object.") console.warn("Please update your CDK context block to use the `sourceCluster` object.") @@ -261,11 +261,11 @@ export class StackComposer { "and in this case, `targetCluster` was provided to define an existing target cluster." ) } - + const targetClusterAuth = targetCluster?.auth const targetVersion = this.getEngineVersion(targetCluster?.version || engineVersion) - const requiredFields: { [key: string]: any; } = {"stage":stage, "domainName":domainName} + const requiredFields: { [key: string]: any; } = {"stage":stage} for (let key in requiredFields) { if (!requiredFields[key]) { throw new Error(`Required CDK context field ${key} is not present`) @@ -274,6 +274,10 @@ export class StackComposer { if (addOnMigrationDeployId && vpcId) { console.warn("Addon deployments will use the original deployment 'vpcId' regardless of passed 'vpcId' values") } + if (stage.length > 15) { + throw new Error(`Maximum allowed stage name length is 15 characters but received ${stage}`) + } + const clusterDomainName = domainName ? domainName : `os-cluster-${stage}` let preexistingOrContainerTargetEndpoint if (targetCluster && osContainerServiceEnabled) { throw new Error("The following options are mutually exclusive as only one target cluster can be specified for a given deployment: [targetCluster, osContainerServiceEnabled]") @@ -348,7 +352,7 @@ export class StackComposer { if (!preexistingOrContainerTargetEndpoint) { openSearchStack = new OpenSearchDomainStack(scope, `openSearchDomainStack-${deployId}`, { version: targetVersion, - domainName: domainName, + domainName: clusterDomainName, dataNodeInstanceType: dataNodeType, dataNodes: dataNodeCount, dedicatedManagerNodeType: dedicatedManagerNodeType, diff --git a/docs/TrafficCaptureAndReplayDesign.md b/docs/TrafficCaptureAndReplayDesign.md new file mode 100644 index 000000000..02eeafa03 --- /dev/null +++ b/docs/TrafficCaptureAndReplayDesign.md @@ -0,0 +1,271 @@ +# Traffic Capture and Replay + +## Overview + +Two main components support cluster mirroring. The first component is the Capture Proxy, which relays network traffic +from HTTP requests to a source cluster into a durable, scalable stream. The second component, the Traffic Replayer, +replicates the traffic observed by the Proxy onto a target cluster. In this case mirroring does three things. + +1. Illustrates differences in behavior between the source and target clusters. +2. Stresses the target cluster very similarly to the source cluster. +3. It keeps a target’s documents and metadata in sync with a source cluster. + +Data is buffered through Kafka from the proxy to the replayer. The components also send metrics and traces to an +otel-collector, which is deployed as a sidecar, which in turn publishes instrumentation. + +Here are the main steps to synchronize a target cluster from a source cluster: + +1. Traffic is directed to the existing cluster, reaching each coordinator node. +2. A Capture Proxy is added in front of the coordinator nodes in the cluster, allowing for traffic capture and storage. + (see [here](./ClientTrafficSwinging.md) for details about how to use an ALB to do this). +3. A historical backfill is triggered to synchronize the documents in the target from the source as it was at some + point in time. That point in time will/must be after all traffic has been captured. +4. Following the backfill, the Traffic Replayer begins replaying the captured traffic to the target cluster. +5. The user evaluates the differences between source and target responses. +6. After confirming that the new cluster's functionality meets expectations, the target server is ready to become the + new cluster. Note that the target continues to be synchronized from the replayer. If customers are especially + concerned about greater durations where results may be inconsistent due to lag between the source cluster and the + target cluster (ideally this is around 1s), the capture proxy could reject modifications, forcing clients to + resend shortly, allowing the target cluster to pickup those modifications as the target fleet replaces the source + fleet. + +## Capture Proxy + +The Capture Proxy terminates TLS and replicates the decrypted read/writes streams as they arrive to Kafka. Since the +Capture Proxy is handling data on the critical path for the source cluster, the proxy is designed to offload data as +efficiently as possible to minimize the proxy’s impact on overall performance (latency, load, etc). The Capture Proxy +parses its TLS configuration the same way as OpenSearch, from a yaml config, with the same keys. + +The proxy is expected to supplant the original source cluster endpoint so that clients can continue to operate without +any changes. One way to accomplish that is to install a proxy alongside the source cluster’s coordinating nodes and +shift the coordinating nodes’ configuration to use a port bound only to the loopback address and likely without TLS, +as encrypting local traffic with TLS is expensive and unnecessary. Another approach is +described [here](./ClientTrafficSwinging.md). + +The proxy can also be deployed on standalone hardware. However, two caveats remain. + +* The proxy is only designed to proxy traffic for a single destination. If that destination is a large number of nodes + with a load balancer, any number of proxies that are necessary to support the traffic load can be setup and all will + send traffic though the nodes that the load balancer is using. +* The second caveat to installing the proxy on separate hardware is that the infrastructure will need to change and + traffic will need to be routed exclusively through the proxy, which itself is more infrastructure to change. This will + also increase latency for all traffic over a colocated solution. + +### TLS + +In order for the proxy to write data that can be replayed and used for comparison, the request and response data must be +stored so that HTTP messages can be reconstructed at a later point in time. If an existing client and server (cluster) +are using TLS to transfer data, that data will first be decrypted before being offloaded. When using Amazon Managed +Streaming for Apache Kafka, AWS Authentication is used, data is sent via TLS, and it is stored in an encrypted format. + +### Additional Impact + +In addition to the impact incurred from TLS decrypting and encrypting, there may be a significant impact to network +load. This solution assumes that the network has enough capacity to double the network traffic, albeit to different +destinations. The proxy doesn’t compress traffic because many requests and responses may already be compressed. + +If a PUT or any other mutating call is dropped from the replay, it could have a long-lasting and irreversible impact on +all future results. Because of that, the Capture Proxy parses the incoming stream as HTTP messages to determine the +importance of an HTTP request. All GET traffic is immediately forwarded to the source cluster while data is +asynchronously offloaded to Kafka. Mutating requests such as PUT, POST, DELETE, PATCH etc are handled more carefully. +The Capture Proxy makes certain that all mutating requests have been committed to Kafka before the request is fully +‘released’ and sent to the source cluster. This behavior means that GET traffic should flow through the system without +being impacted by the latency of calls to Kafka. However, mutating requests will be impacted. Clients that have made +those requests will not receive a response or will not be able to make another request until all prior offloaded traffic +for the connection has been committed (which could include offloading previous GET requests that had been sent on the +same connection). + +That guarantees that no mutating request was sent to the source without first being committed to Kafka. However, it also +means that a request could be committed to Kafka without ever being sent and handled by the downstream service. Requests +that are suspected of not being processed (or fully processed) by the source cluster are detectable by the Capture +Proxy. Those requests will be missing a response. Notice that since responses themselves may not be fully returned in +every case that its request is handled, there may be other cases where a mutating request DID succeed on the source +cluster but no response is present. The Capture Proxy doesn’t yet reconcile the which of these requests have likely +succeeded and which have failed. However, in practice, many real-world examples would have retried the failed request, +resulting in a received response. + +All network activity is asynchronously data-driven, using the same framework (netty) that Elasticsearch, OpenSearch, and +many other projects use. Using that same framework also mitigates some risk that HTTP could be parsed differently by the +source and the proxy. + +### Protocol + +Captured data is organized into TrafficObservations (Read, Write, Close, etc) that have timestamps and are organized +into larger “TrafficStream” objects which are written as records to Kafka. These observations are serialized +as [Protobuf](../TrafficCapture/captureProtobufs/src/main/proto/TrafficCaptureStream.proto) wrappers to the raw bytes +that were received or sent by the Proxy sans TLS. TrafficStream objects are organized by connection, with each socket +connection represented by a sequences of TrafficStreams, which will have TrafficObservations for that connection only. +Those TrafficStreams are flushed to Kafka after buffering or after a mutating request has been received. Concurrent +connections will have TrafficStream objects that are interleaved with each other. Each TrafficStream will have its own +respective connectionId, which is a globally unique id for that connection. A unique nodeId is also included for +diagnostics and future Kafka partitioning. + +## Traffic Replayer + +The Traffic Replayer parses the Protobuf encoded objects recorded by the proxy; reconstructs them into HTTP requests; +sends them to the target cluster; and records the responses alongside the request and the traffic from the original +source interaction. + +The Traffic Replayer must group TrafficStreams by their connectionIds into reconstituted TCP streams. Individual +requests are parsed and sent through a processing pipeline that rewrites the request as necessary, then schedules and +sends the requests to match the source time intervals. Responses are aggregated along with the request and source +messages. Following that, the TrafficStream objects are committed from the Kafka topic so that they are not processed +again by a future or concurrently running Replayer. + +### Message Transformation + +The reassembly process of the captured traffic is careful to preserve timestamps of the originally observed traffic. +Once all the bytes for a request have been accumulated, the bytes are sent through a netty pipeline for transformation. +As per configuration, this processing may include rewriting headers, such as the Host value, changing +User-Authentication, and transforming the contents of the payload. The TrafficReplayer parses the original source +request traffic stream into a Map object with the headers and payload of the original message in a json-friendly +key-value structure. That Map object is passed through an IJsonTransformer object that may rewrite the request by +altering headers or the body. To minimize unnecessary and expensive operations, the netty pipeline parses the HTTP +headers first and runs the transformation before the pipeline has been fully configured. If based upon the headers only, +the transformation did not attempt to access the payload, the pipeline won’t be configured to parse the json from the +body from the message. The pipeline will attempt to setup as few handlers as possible to eliminate unnecessary (de) +compression and repackaging. + +When the initial transformation has attempted to access the payload, the entire message needs to be transformed. In that +case, netty handlers are attached to do all of the work required to parse an HTTP payload into a json-like Map (HTTP +chunking, decompression, json parsing, followed by compression, etc). Generally, the handlers stream data as much as +possible so that efficiency can be maximized. + +At the end of the transformation pipeline, a new sequence of network buffers has been formatted and is ready to be sent +to the target server. The shape and pacing of the sequence of buffers should closely match that of the original +sequence. In other words, if the source got 120 bytes with one byte per second, the target request will also get 120 +parts over 120 seconds. + +In some cases, the pipeline may be unable to parse a message, or the message might not require any rewrite. In those +cases, the parsing of the current request is unwound and the request is sent exactly as sent to the source to the target +cluster. The response will be handled like any response for a fully transformed message, though the final metadata will +show whether the request had transformation skipped or if it was due to an error. + +This message transformation also includes rewriting authorization headers. In the Basic-Auth case, that rewrite will +only involve the headers. If there were no other transformations, the body of the content will not need to be parsed. +However, if the authorization scheme being used is AWS Auth (SigV4), a handler to parse the body will be added to the +pipeline alongside mechanisms to fully consume the contents so that the signature can be accurately computed. + +#### Auth Caveat + +The Replayer doesn’t have a way to determine the validity of the incoming messages. It doesn’t have the HTTP basic-auth +passwords, nor does it have access to the public keys used by SigV4. This creates a significant security issue that +currently diminishes the value of auth for OpenSearch clusters. Currently, all requests, regardless of whether they were +validly or maliciously signed will be rewritten with the same auth headers as per the configuration. We can’t leverage +the responses to determine validity since there will be some changes that the replayer must run even though there was no +response present. + +#### User Transformations + +Users may specify what Transformation to run by providing a jar file that can load an implemention of the +IJsonTransformer class via Java’s ServiceLoader. As described in Message Transformation section, the complexities of +parsing HTTP messages are abstracted away. A transformer can switch off of URI paths, headers, and run sophisticated +json remapping by pulling in libraries such as Jackson, GSON, or json manipulation packages like Jolt or JMESPath. As +progress continues, additional transformations will be developed to accomplish the required transformations between +versions, plugins, etc more easily. + +### Timing + +The Replayer may be started long after the Capture Proxy has begun recording traffic. Recall that the Replayer tries to +match the client request traffic exactly as it was received by the source cluster so that when comparing results, both +clusters, for any request, the clusters would have been undergoing the same stresses. To do that, the Traffic Replayer +manages its own sense of time to send requests to the target. It shifts the original requests’ timestamps uniformly so +that delays between each observation and request can also be preserved. + +A replay will generally start at the beginning of the captured stream and it will fix the current time to the time of +the first interaction. For users that would like to catch-up or stress test the system, the Replayer’s time mapping +function can include a speedup factor (F) so that something that happened N seconds after the initially observation will +be scheduled N/F seconds after the Replayer has started. This functionality is managed by a TimeShifter class that is +effectively just a function that maps scalar values after some initialization. Timing values can be controlled via +command line parameters. + +That timing drives much of the rest of the TrafficReplayer. When a request is fully reconstructed, the message +transformation work is *scheduled* to be done just before it would be scheduled to be sent. That’s to guarantee that +temporally sensitive values like SigV4 signatures won’t go stale. It also keeps more data within the same thread, making +for less contention (cache invalidations) and allows for simpler code. + +### Sending Requests + +Like the Capture Proxy and the transformation pipelines described above, requests are sent via netty. Netty’s +architecture allows for a large number of requests to be handled concurrently through cooperative multitasking. While +that requires code to be data-driven and to never block, it also affords the use of simple data structures that need not +be thread-safe across multiple threads. Several classes are designed to only run from a single thread. Those will be +initialized for each worker thread that netty spins up (which can be specified on the command line). + +Netty manages connections to the target cluster within its own EventLoops. An EventLoop within the TrafficReplayer will +have a connection to the target service, which may break and need to be reestablished. However, that same EventLoop (and +its Thread) will be affiliated with its connection for the lifetime of the connection. That lifetime will be terminated +either when the connection’s TrafficStream has encountered a “Close” observation OR if no observations have been +encountered in a period of time and they are expired by the accumulation phase. + +Each connection’s EventLoop and Channel are grouped within a ConnectionReplaySession. This session object also includes +data to schedule the interactions for transforming requests, sending them, and closing connections. The schedules are +maintained as a key-value map from the time that an operation should happen, post time shifting (so in real-time, not +source time). As work items are completed for a connection’s session, the next item is pulled from the schedule if it is +ready to run or a timer is set on the EventLoop to rerun it when appropriate. Because there are multiple interactions +scheduled within each connection’s session and each is run sequentially and exclusively, the actual times that +interactions occur could drift. For example, assume the source service took 5 seconds to service each of 6 requests +sequentially on one socket connection that was kept alive. If the target service takes 10 seconds to service each +request, the target will take 60 seconds to run those 6 interactions instead of 30 from the source cluster. + +However, if a source cluster had the same interactions and latencies for 6 requests BUT sent them on separate +connections, the total time to send all the requests to the target service could be 35 seconds, since requests would +overlap by 5 seconds. Currently, schedules are isolated to a given connection. Since a connection is defined by a +sequence of requests, we must wait for the previous requests to finish before proceeding. Without the connection +isolation though, requests will be sent without those constraints. +***This does create a caveat that if the target cluster cannot keep up with the pacing of the source cluster, the +Replayer may not be able to match the timing patterns that the source cluster experienced.*** + +### Throttling + +The Traffic Replayer has a TrafficCaptureSource that it uses as an abstraction over a Kafka topic. If the Kafka topic +has days of data, it’s critical that the Replayer consume and hold only the data that is necessary to effectively send +the requests as per the above constraints. The Replayer needs to throttle its Kafka consumption so that it can keep its +memory footprint manageable. To figure out what’s necessary, the Replayer needs to consume enough data so that it can +reconstruct the TrafficStreams into requests. + +Within a user-specified window, we expect to either accumulate more observations for a TrafficStream or expire it and +give up. That prevents leaking memory within the Replayer when Proxy’s had faults that prevented them from sending close +events for streams. That limit is independent of how far ahead in the recorded stream should the Replayer advance to +while consuming input. Setting that backpressure ‘lookahead’ limit to a multiple of the expiration window provides a +backpressure to limit memory. However, those two limits still don'’t put a total bound on the peak amount of memory +required for the Replayer. Such a bound would be problematic as there could be an unlimited number of simultaneous +connections with ongoing traffic. If a bound were enforced, progress may not be made on accumulating all of the +requests, trading an Out-of-Memory situation with a deadlocked replayer. The right solution in those cases is to scale +the Replayer to use more memory, either vertically or horizontally. Right-sizing in these cases will be an exercise in +understanding peak load to the source cluster and/or trial and error for provisioning the Traffic Replayer. + +The TrafficReplayer may try to send too many requests simultaneously. This will happen if there aren’t enough ports +available on fleet of replayers to support the number of simultaneous connections in use. This can occur when the number +of Replayers is much smaller than the proxy fleet or if the time speedup factor was high on a modestly sized cluster. In +these cases, an additional throttling mechanism is required to restrict how many simultaneous connections can be made. A +command line option is available to limit how many requests can be in-progress at any point in time. When a request has +been reconstructed, if the Replayer has already saturated the total number of requests that it can handle, the new +request will be blocked from sending until other requests have finished. Notice that this is currently bound to the +number of requests, not connections, though a connection will only handle only one request at a time, so this is a rough +substitute for the number of connections. + +For customers that need to achieve higher throughputs, they can scale the solution horizontally. + +### Horizontal Scaling + +The Capture Proxy writes observations to a Kafka topic. Given that Kafka can accept writes from many clients at high +throughputs, the Proxy can easily scale to any number of machines. Like the Proxy, the Replayer simply consumes from +Kafka and Kafka can support many consumers. Since connections are the lowest level atomic groupings and a single client +and server at a prior point in time were already handling that connection, by scaling the Kafka topic for the +appropriate number of Proxies and Replayers, we can handle arbitrarily large loads. + +However, the scaling factor for Kafka is the number of partitions. Those partitions will need to be setup to key off of +the nodeId or the connectionId. The partition count itself will need to be set at the time that a topic is created so +that traffic for a given connection never gets remapped while a connection’s observations are being written to the +topic. + +### Outputting Results + +The Traffic Replayer uses log4j2 for its application logs. It also uses Log4J2 to output some of its other output, +including logs destined for metrics and the results of the source and target traffic interactions. Care should be taken +in adjusting the log4j2.properties files, when necessary. + +The results, which are logged not just through log4j2 but also to a file, which is provided by a command line parameter. +This result output will be a stream of json formatted objects with the source/target requests/responses. Those will +include headers, timestamps, and the full bodies base64 encoded. diff --git a/jenkins/migrationIntegPipelines/rfsBackfillE2EPipeline.groovy b/jenkins/migrationIntegPipelines/rfsBackfillE2EPipeline.groovy deleted file mode 100644 index a4e9038ca..000000000 --- a/jenkins/migrationIntegPipelines/rfsBackfillE2EPipeline.groovy +++ /dev/null @@ -1,78 +0,0 @@ -// Note: -// 1. There is a still a manual step needed on the EC2 source load balancer to replace its security group rule which allows all traffic (0.0.0.0/0) to -// allow traffic for the relevant service security group. This needs a better story around accepting user security groups in our Migration CDK. - -def sourceContextId = 'source-single-node-ec2' -def migrationContextId = 'migration-rfs' -// These default values should only be used on the initial Jenkins run in order to load parameter options into the UI, -// all future runs should use the specified parameters -def gitBranch = params.GIT_BRANCH ?: 'main' -def gitUrl = params.GIT_REPO_URL ?: 'https://github.com/opensearch-project/opensearch-migrations.git' -def source_cdk_context = """ - { - "source-single-node-ec2": { - "suffix": "ec2-source-", - "networkStackSuffix": "ec2-source-", - "distVersion": "7.10.2", - "distributionUrl": "https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-oss-7.10.2-linux-x86_64.tar.gz", - "captureProxyEnabled": false, - "securityDisabled": true, - "minDistribution": false, - "cpuArch": "x64", - "isInternal": true, - "singleNodeCluster": true, - "networkAvailabilityZones": 2, - "dataNodeCount": 1, - "managerNodeCount": 0, - "serverAccessType": "ipv4", - "restrictServerAccessTo": "0.0.0.0/0" - } - } -""" -def migration_cdk_context = """ - { - "migration-rfs": { - "stage": "", - "vpcId": "", - "engineVersion": "OS_2.11", - "domainName": "os-cluster-", - "dataNodeCount": 2, - "openAccessPolicyEnabled": true, - "domainRemovalPolicy": "DESTROY", - "artifactBucketRemovalPolicy": "DESTROY", - "trafficReplayerServiceEnabled": false, - "reindexFromSnapshotServiceEnabled": true, - "sourceClusterEndpoint": "" - } - } -""" - -library identifier: "migrations-lib@${gitBranch}", retriever: modernSCM( - [$class: 'GitSCMSource', - remote: "${gitUrl}"]) - -defaultIntegPipeline( - sourceContext: source_cdk_context, - migrationContext: migration_cdk_context, - sourceContextId: sourceContextId, - migrationContextId: migrationContextId, - defaultStageId: 'rfs-integ', - skipCaptureProxyOnNodeSetup: true, - integTestStep: { - def time = new Date().getTime() - def uniqueId = "integ_min_${time}_${currentBuild.number}" - def test_dir = "/root/lib/integ_test/integ_test" - def test_result_file = "${test_dir}/reports/${uniqueId}/report.xml" - def command = "pipenv run pytest --log-file=${test_dir}/reports/${uniqueId}/pytest.log " + - "--junitxml=${test_result_file} ${test_dir}/backfill_tests.py " + - "--unique_id ${uniqueId} " + - "-s" - withCredentials([string(credentialsId: 'migrations-test-account-id', variable: 'MIGRATIONS_TEST_ACCOUNT_ID')]) { - withAWS(role: 'JenkinsDeploymentRole', roleAccount: "${MIGRATIONS_TEST_ACCOUNT_ID}", duration: 3600, roleSessionName: 'jenkins-session') { - sh "sudo --preserve-env ./awsRunIntegTests.sh --command '${command}' " + - "--test-result-file ${test_result_file} " + - "--stage ${params.STAGE}" - } - } - } -) diff --git a/jenkins/migrationIntegPipelines/rfsDefaultE2ETestCover.groovy b/jenkins/migrationIntegPipelines/rfsDefaultE2ETestCover.groovy new file mode 100644 index 000000000..58ed79a0e --- /dev/null +++ b/jenkins/migrationIntegPipelines/rfsDefaultE2ETestCover.groovy @@ -0,0 +1,9 @@ +def gitBranch = params.GIT_BRANCH ?: 'main' +def gitUrl = params.GIT_REPO_URL ?: 'https://github.com/opensearch-project/opensearch-migrations.git' + +library identifier: "migrations-lib@${gitBranch}", retriever: modernSCM( + [$class: 'GitSCMSource', + remote: "${gitUrl}"]) + +// Shared library function (location from root: vars/rfsDefaultE2ETest.groovy) +rfsDefaultE2ETest() diff --git a/jenkins/migrationIntegPipelines/trafficReplayDefaultE2ETestCover.groovy b/jenkins/migrationIntegPipelines/trafficReplayDefaultE2ETestCover.groovy new file mode 100644 index 000000000..4a8ddc7e0 --- /dev/null +++ b/jenkins/migrationIntegPipelines/trafficReplayDefaultE2ETestCover.groovy @@ -0,0 +1,9 @@ +def gitBranch = params.GIT_BRANCH ?: 'main' +def gitUrl = params.GIT_REPO_URL ?: 'https://github.com/opensearch-project/opensearch-migrations.git' + +library identifier: "migrations-lib@${gitBranch}", retriever: modernSCM( + [$class: 'GitSCMSource', + remote: "${gitUrl}"]) + +// Shared library function (location from root: vars/trafficReplayDefaultE2ETest.groovy) +trafficReplayDefaultE2ETest() diff --git a/testHelperFixtures/build.gradle b/testHelperFixtures/build.gradle index 70b0fd822..e5b3ab7ed 100644 --- a/testHelperFixtures/build.gradle +++ b/testHelperFixtures/build.gradle @@ -35,6 +35,7 @@ dependencies { testFixturesImplementation group: 'org.bouncycastle', name: 'bcprov-jdk18on' testFixturesImplementation group: 'org.bouncycastle', name: 'bcpkix-jdk18on' testFixturesImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-api' + testFixturesImplementation group: 'org.hamcrest', name: 'hamcrest' testFixturesImplementation group: 'org.slf4j', name: 'slf4j-api' testFixturesApi group: 'org.testcontainers', name: 'testcontainers' testFixturesApi group: 'org.testcontainers', name: 'toxiproxy' diff --git a/testHelperFixtures/src/testFixtures/java/org/opensearch/migrations/matchers/ContainsStringCount.java b/testHelperFixtures/src/testFixtures/java/org/opensearch/migrations/matchers/ContainsStringCount.java new file mode 100644 index 000000000..0cec5c714 --- /dev/null +++ b/testHelperFixtures/src/testFixtures/java/org/opensearch/migrations/matchers/ContainsStringCount.java @@ -0,0 +1,35 @@ +package org.opensearch.migrations.matchers; + +import org.hamcrest.Description; +import org.hamcrest.TypeSafeMatcher; + +import lombok.AllArgsConstructor; + +@AllArgsConstructor +public class ContainsStringCount extends TypeSafeMatcher { + private final String expectedString; + private final int expectedCount; + + @Override + public void describeTo(Description description) { + description.appendText("a string containing '" + expectedString + "' " + expectedCount + " times"); + } + + @Override + protected void describeMismatchSafely(String item, Description mismatchDescription) { + mismatchDescription.appendText("was found " + containsStringCount(item) + " times"); + } + + @Override + protected boolean matchesSafely(String item) { + return containsStringCount(item) == expectedCount; + } + + private int containsStringCount(String item) { + return item == null ? 0 : item.split(expectedString, -1).length - 1; + } + + public static ContainsStringCount containsStringCount(String s, int n) { + return new ContainsStringCount(s, n); + } +} diff --git a/testHelperFixtures/src/testFixtures/java/org/opensearch/migrations/matchers/HasLineCount.java b/testHelperFixtures/src/testFixtures/java/org/opensearch/migrations/matchers/HasLineCount.java new file mode 100644 index 000000000..09203d8b1 --- /dev/null +++ b/testHelperFixtures/src/testFixtures/java/org/opensearch/migrations/matchers/HasLineCount.java @@ -0,0 +1,34 @@ +package org.opensearch.migrations.matchers; + +import org.hamcrest.Description; +import org.hamcrest.TypeSafeMatcher; + +import lombok.AllArgsConstructor; + +@AllArgsConstructor +public class HasLineCount extends TypeSafeMatcher { + private int expectedLineCount; + + @Override + public void describeTo(Description description) { + description.appendText("a string with " + expectedLineCount + " lines"); + } + + @Override + protected void describeMismatchSafely(String item, Description mismatchDescription) { + mismatchDescription.appendText("was a string with " + item.split(System.lineSeparator()).length + " lines"); + } + + @Override + protected boolean matchesSafely(String item) { + return newlineCount(item) == expectedLineCount; + } + + private int newlineCount(String item) { + return item == null ? 0 : item.split("\n").length; + } + + public static HasLineCount hasLineCount(int n) { + return new HasLineCount(n); + } +} diff --git a/testHelperFixtures/src/testFixtures/java/org/opensearch/migrations/testutils/SharedDockerImageNames.java b/testHelperFixtures/src/testFixtures/java/org/opensearch/migrations/testutils/SharedDockerImageNames.java new file mode 100644 index 000000000..cd5fc5880 --- /dev/null +++ b/testHelperFixtures/src/testFixtures/java/org/opensearch/migrations/testutils/SharedDockerImageNames.java @@ -0,0 +1,9 @@ +package org.opensearch.migrations.testutils; + +import org.testcontainers.utility.DockerImageName; + +public interface SharedDockerImageNames { + DockerImageName KAFKA = DockerImageName.parse("confluentinc/cp-kafka:7.5.0"); + DockerImageName HTTPD = DockerImageName.parse("httpd:alpine"); + +} diff --git a/transformation/src/main/java/org/opensearch/migrations/Flavor.java b/transformation/src/main/java/org/opensearch/migrations/Flavor.java index 151d1b7fa..f835c92dc 100644 --- a/transformation/src/main/java/org/opensearch/migrations/Flavor.java +++ b/transformation/src/main/java/org/opensearch/migrations/Flavor.java @@ -6,8 +6,8 @@ @RequiredArgsConstructor @Getter public enum Flavor { - Elasticsearch("ES"), - OpenSearch("OS"); + ELASTICSEARCH("ES"), + OPENSEARCH("OS"); final String shorthand; } diff --git a/transformation/src/main/java/org/opensearch/migrations/Version.java b/transformation/src/main/java/org/opensearch/migrations/Version.java index ec11678b5..329a42e78 100644 --- a/transformation/src/main/java/org/opensearch/migrations/Version.java +++ b/transformation/src/main/java/org/opensearch/migrations/Version.java @@ -1,5 +1,7 @@ package org.opensearch.migrations; +import java.util.Arrays; + import lombok.Builder; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -23,20 +25,20 @@ public static Version fromString(final String raw) throws RuntimeException { var builder = Version.builder(); var remainingString = raw.toLowerCase(); - for (var flavor : Flavor.values()) { - if (remainingString.startsWith(flavor.name().toLowerCase())) { - remainingString = remainingString.substring(flavor.name().length()); - builder.flavor(flavor); - break; - } else if (remainingString.startsWith(flavor.shorthand.toLowerCase())) { - remainingString = remainingString.substring(flavor.shorthand.length()); - builder.flavor(flavor); - break; - } - } + var finalRemainingString = remainingString; + var matchedFlavor = Arrays.stream(Flavor.values()) + .filter(flavor -> finalRemainingString.startsWith(flavor.name().toLowerCase()) || + finalRemainingString.startsWith(flavor.shorthand.toLowerCase())) + .findFirst(); - if (remainingString.equals(raw.toLowerCase())) { - throw new RuntimeException("Unable to determine build flavor from '" + raw +"'"); + if (matchedFlavor.isPresent()) { + Flavor flavor = matchedFlavor.get(); + remainingString = remainingString.startsWith(flavor.name().toLowerCase()) ? + remainingString.substring(flavor.name().length()) : + remainingString.substring(flavor.shorthand.length()); + builder.flavor(flavor); + } else { + throw new IllegalArgumentException("Unable to determine build flavor from '" + raw +"'"); } try { @@ -59,7 +61,7 @@ public static Version fromString(final String raw) throws RuntimeException { } return builder.build(); } catch (Exception e) { - throw new RuntimeException("Unable to parse version numbers from the string '" + raw + "'\r\n", e); + throw new IllegalArgumentException("Unable to parse version numbers from the string '" + raw + "'\r\n", e); } } } diff --git a/transformation/src/main/java/org/opensearch/migrations/VersionMatchers.java b/transformation/src/main/java/org/opensearch/migrations/VersionMatchers.java index 4f9efc7c8..6ebc77fc1 100644 --- a/transformation/src/main/java/org/opensearch/migrations/VersionMatchers.java +++ b/transformation/src/main/java/org/opensearch/migrations/VersionMatchers.java @@ -26,18 +26,14 @@ private static Predicate matchesMajorVersion(final Version version) { } private static Predicate matchesMinorVersion(final Version version) { - return other -> { - return matchesMajorVersion(version) - .and(other2 -> version.getMinor() == other2.getMinor()) - .test(other); - }; + return other -> matchesMajorVersion(version) + .and(other2 -> version.getMinor() == other2.getMinor()) + .test(other); } private static Predicate equalOrGreaterThanMinorVersion(final Version version) { - return other -> { - return matchesMajorVersion(version) - .and(other2 -> version.getMinor() <= other2.getMinor()) - .test(other); - }; + return other -> matchesMajorVersion(version) + .and(other2 -> version.getMinor() <= other2.getMinor()) + .test(other); } } diff --git a/transformation/src/main/java/org/opensearch/migrations/transformation/CanApplyResult.java b/transformation/src/main/java/org/opensearch/migrations/transformation/CanApplyResult.java index 8889046f1..a1cdac073 100644 --- a/transformation/src/main/java/org/opensearch/migrations/transformation/CanApplyResult.java +++ b/transformation/src/main/java/org/opensearch/migrations/transformation/CanApplyResult.java @@ -7,8 +7,9 @@ * The result after checking if a transformer can be applied to an entity */ public abstract class CanApplyResult { - public final static CanApplyResult YES = new Yes(); - public final static CanApplyResult NO = new No(); + private CanApplyResult() {} + public static final CanApplyResult YES = new Yes(); + public static final CanApplyResult NO = new No(); /** Yes, the transformation can be applied */ public static final class Yes extends CanApplyResult {} diff --git a/transformation/src/main/java/org/opensearch/migrations/transformation/entity/Entity.java b/transformation/src/main/java/org/opensearch/migrations/transformation/entity/Entity.java index 542d3e965..47161001d 100644 --- a/transformation/src/main/java/org/opensearch/migrations/transformation/entity/Entity.java +++ b/transformation/src/main/java/org/opensearch/migrations/transformation/entity/Entity.java @@ -15,5 +15,5 @@ public interface Entity { /** * Gets the underlying entity as an ObjectNode, supports read and write operations */ - ObjectNode rawJson(); + ObjectNode getRawJson(); } diff --git a/transformation/src/main/java/org/opensearch/migrations/transformation/rules/IndexMappingTypeRemoval.java b/transformation/src/main/java/org/opensearch/migrations/transformation/rules/IndexMappingTypeRemoval.java index 77aab8c67..5ada91268 100644 --- a/transformation/src/main/java/org/opensearch/migrations/transformation/rules/IndexMappingTypeRemoval.java +++ b/transformation/src/main/java/org/opensearch/migrations/transformation/rules/IndexMappingTypeRemoval.java @@ -11,7 +11,7 @@ /** * Supports transformation of the Index Mapping types that were changed from mutliple types to a single type between ES 6 to ES 7 - * + * * Example: * Starting state (ES 6): * { @@ -26,7 +26,7 @@ * } * ] * } - * + * * Ending state (ES 7): * { * "mappings": { @@ -39,30 +39,32 @@ */ public class IndexMappingTypeRemoval implements TransformationRule { + public static final String MAPPINGS_KEY = "mappings"; + @Override public CanApplyResult canApply(final Index index) { - final var mappingNode = index.rawJson().get("mappings"); + final var mappingNode = index.getRawJson().get(MAPPINGS_KEY); if (mappingNode == null) { return CanApplyResult.NO; } - // Detect unsupported multiple type mappings, eg: - // { "mappings": [{ "foo": {...}}, { "bar": {...} }] } - // { "mappings": [{ "foo": {...}, "bar": {...} }] } - if (mappingNode.isArray()) { - if (mappingNode.size() > 1 || mappingNode.get(0).size() > 1) { - return new Unsupported("Multiple mapping types are not supported"); - } + + // Detect unsupported multiple type mappings: + // 1.
{"mappings": [{ "foo": {...} }, { "bar": {...} }]}
+ // 2.
{"mappings": [{ "foo": {...}, "bar": {...}  }]}
+ if (mappingNode.isArray() && (mappingNode.size() > 1 || mappingNode.get(0).size() > 1)) { + return new Unsupported("Multiple mapping types are not supported"); } - // Detect if there is no intermediate type node - // { "mappings": { "_doc": { "properties": { } } } } + // Check for absence of intermediate type node + // 1.
{"mappings": {"properties": {...} }}
if (mappingNode.isObject() && mappingNode.get("properties") != null) { return CanApplyResult.NO; } - // There is a type under mappings, e.g. { "mappings": [{ "foo": {...} }] } + // There is a type under mappings + // 1.
{ "mappings": [{ "foo": {...} }] }
return CanApplyResult.YES; } @@ -72,7 +74,7 @@ public boolean applyTransformation(final Index index) { return false; } - final var mappingsNode = index.rawJson().get("mappings"); + final var mappingsNode = index.getRawJson().get(MAPPINGS_KEY); // Handle array case if (mappingsNode.isArray()) { final var mappingsInnerNode = (ObjectNode) mappingsNode.get(0); @@ -82,7 +84,7 @@ public boolean applyTransformation(final Index index) { mappingsInnerNode.remove(typeName); typeNode.fields().forEachRemaining(node -> mappingsInnerNode.set(node.getKey(), node.getValue())); - index.rawJson().set("mappings", mappingsInnerNode); + index.getRawJson().set(MAPPINGS_KEY, mappingsInnerNode); } if (mappingsNode.isObject()) { diff --git a/transformation/src/test/java/org/opensearch/migrations/VersionTest.java b/transformation/src/test/java/org/opensearch/migrations/VersionTest.java index 17b0150d7..62f936224 100644 --- a/transformation/src/test/java/org/opensearch/migrations/VersionTest.java +++ b/transformation/src/test/java/org/opensearch/migrations/VersionTest.java @@ -10,7 +10,7 @@ public class VersionTest { @Test void fromString() throws ParseException { - var expected = Version.builder().flavor(Flavor.OpenSearch).major(1).minor(3).patch(18).build(); + var expected = Version.builder().flavor(Flavor.OPENSEARCH).major(1).minor(3).patch(18).build(); assertThat(Version.fromString("OpenSearch 1.3.18"), equalTo(expected)); assertThat(Version.fromString("Opensearch 1.3.18"), equalTo(expected)); assertThat(Version.fromString("Opensearch 1.3.18"), equalTo(expected)); @@ -23,7 +23,7 @@ void fromString() throws ParseException { @Test void fromString_defaultPatch() throws ParseException { - var expected = Version.builder().flavor(Flavor.OpenSearch).major(1).minor(3).patch(0).build(); + var expected = Version.builder().flavor(Flavor.OPENSEARCH).major(1).minor(3).patch(0).build(); assertThat(Version.fromString("OpenSearch 1.3.0"), equalTo(expected)); assertThat(Version.fromString("OpenSearch 1.3.x"), equalTo(expected)); assertThat(Version.fromString("OpenSearch 1.3"), equalTo(expected)); @@ -31,7 +31,7 @@ void fromString_defaultPatch() throws ParseException { @Test void fromString_defaultMinor() throws ParseException { - var expected = Version.builder().flavor(Flavor.OpenSearch).major(1).minor(0).patch(0).build(); + var expected = Version.builder().flavor(Flavor.OPENSEARCH).major(1).minor(0).patch(0).build(); assertThat(Version.fromString("OpenSearch 1.0.0"), equalTo(expected)); assertThat(Version.fromString("OpenSearch 1.0"), equalTo(expected)); assertThat(Version.fromString("OpenSearch 1.x.x"), equalTo(expected)); diff --git a/transformation/src/test/java/org/opensearch/migrations/transformation/rules/IndexMappingTypeRemovalTest.java b/transformation/src/test/java/org/opensearch/migrations/transformation/rules/IndexMappingTypeRemovalTest.java index 1cda71c03..3d9e77068 100644 --- a/transformation/src/test/java/org/opensearch/migrations/transformation/rules/IndexMappingTypeRemovalTest.java +++ b/transformation/src/test/java/org/opensearch/migrations/transformation/rules/IndexMappingTypeRemovalTest.java @@ -102,14 +102,14 @@ public ObjectNode indexSettingJson(final String mappingSection) { private CanApplyResult canApply(final ObjectNode indexJson) { var transformer = new IndexMappingTypeRemoval(); var index = mock(Index.class); - Mockito.when(index.rawJson()).thenReturn(indexJson); + Mockito.when(index.getRawJson()).thenReturn(indexJson); return transformer.canApply(index); } private boolean applyTransformation(final ObjectNode indexJson) { var transformer = new IndexMappingTypeRemoval(); var index = mock(Index.class); - Mockito.when(index.rawJson()).thenReturn(indexJson); + Mockito.when(index.getRawJson()).thenReturn(indexJson); log.atInfo().setMessage("Original\n{}").addArgument(indexJson.toPrettyString()).log(); var wasChanged = transformer.applyTransformation(index); diff --git a/vars/defaultIntegPipeline.groovy b/vars/defaultIntegPipeline.groovy index cc67ac6f0..548370d58 100644 --- a/vars/defaultIntegPipeline.groovy +++ b/vars/defaultIntegPipeline.groovy @@ -2,6 +2,7 @@ def call(Map config = [:]) { def sourceContext = config.sourceContext def migrationContext = config.migrationContext def defaultStageId = config.defaultStageId + def jobName = config.jobName if(sourceContext == null || sourceContext.isEmpty()){ throw new RuntimeException("The sourceContext argument must be provided"); } @@ -11,11 +12,16 @@ def call(Map config = [:]) { if(defaultStageId == null || defaultStageId.isEmpty()){ throw new RuntimeException("The defaultStageId argument must be provided"); } + if(jobName == null || jobName.isEmpty()){ + throw new RuntimeException("The jobName argument must be provided"); + } def source_context_id = config.sourceContextId ?: 'source-single-node-ec2' def migration_context_id = config.migrationContextId ?: 'migration-default' def source_context_file_name = 'sourceJenkinsContext.json' def migration_context_file_name = 'migrationJenkinsContext.json' def skipCaptureProxyOnNodeSetup = config.skipCaptureProxyOnNodeSetup ?: false + def testDir = "/root/lib/integ_test/integ_test" + def integTestCommand = config.integTestCommand ?: "${testDir}/replayer_tests.py" pipeline { agent { label config.workerAgent ?: 'Jenkins-Default-Agent-X64-C5xlarge-Single-Host' } @@ -25,6 +31,27 @@ def call(Map config = [:]) { string(name: 'STAGE', defaultValue: "${defaultStageId}", description: 'Stage name for deployment environment') } + options { + // Acquire lock on a given deployment stage + lock(label: params.STAGE, quantity: 1, variable: 'stage') + timeout(time: 3, unit: 'HOURS') + buildDiscarder(logRotator(daysToKeepStr: '30')) + } + + triggers { + GenericTrigger( + genericVariables: [ + [key: 'GIT_REPO_URL', value: '$.GIT_REPO_URL'], + [key: 'GIT_BRANCH', value: '$.GIT_BRANCH'], + [key: 'job_name', value: '$.job_name'] + ], + tokenCredentialId: 'jenkins-migrations-generic-webhook-token', + causeString: 'Triggered by PR on opensearch-migrations repository', + regexpFilterExpression: "^$jobName\$", + regexpFilterText: "\$job_name", + ) + } + stages { stage('Checkout') { steps { @@ -92,13 +119,14 @@ def call(Map config = [:]) { if (config.deployStep) { config.deployStep() } else { + echo "Acquired deployment stage: ${stage}" sh 'sudo usermod -aG docker $USER' sh 'sudo newgrp docker' def baseCommand = "sudo --preserve-env ./awsE2ESolutionSetup.sh --source-context-file './$source_context_file_name' " + "--migration-context-file './$migration_context_file_name' " + "--source-context-id $source_context_id " + "--migration-context-id $migration_context_id " + - "--stage ${params.STAGE} " + + "--stage ${stage} " + "--migrations-git-url ${params.GIT_REPO_URL} " + "--migrations-git-branch ${params.GIT_BRANCH}" if (skipCaptureProxyOnNodeSetup) { @@ -127,17 +155,16 @@ def call(Map config = [:]) { } else { def time = new Date().getTime() def uniqueId = "integ_min_${time}_${currentBuild.number}" - def test_dir = "/root/lib/integ_test/integ_test" - def test_result_file = "${test_dir}/reports/${uniqueId}/report.xml" - def command = "pipenv run pytest --log-file=${test_dir}/reports/${uniqueId}/pytest.log " + - "--junitxml=${test_result_file} ${test_dir}/replayer_tests.py " + + def test_result_file = "${testDir}/reports/${uniqueId}/report.xml" + def command = "pipenv run pytest --log-file=${testDir}/reports/${uniqueId}/pytest.log " + + "--junitxml=${test_result_file} ${integTestCommand} " + "--unique_id ${uniqueId} " + "-s" withCredentials([string(credentialsId: 'migrations-test-account-id', variable: 'MIGRATIONS_TEST_ACCOUNT_ID')]) { withAWS(role: 'JenkinsDeploymentRole', roleAccount: "${MIGRATIONS_TEST_ACCOUNT_ID}", duration: 3600, roleSessionName: 'jenkins-session') { sh "sudo --preserve-env ./awsRunIntegTests.sh --command '${command}' " + "--test-result-file ${test_result_file} " + - "--stage ${params.STAGE}" + "--stage ${stage}" } } } diff --git a/vars/rfsDefaultE2ETest.groovy b/vars/rfsDefaultE2ETest.groovy new file mode 100644 index 000000000..aac9db03a --- /dev/null +++ b/vars/rfsDefaultE2ETest.groovy @@ -0,0 +1,57 @@ +// Note: +// 1. There is a still a manual step needed on the EC2 source load balancer to replace its security group rule which allows all traffic (0.0.0.0/0) to +// allow traffic for the relevant service security group. This needs a better story around accepting user security groups in our Migration CDK. + +def call(Map config = [:]) { + def sourceContextId = 'source-single-node-ec2' + def migrationContextId = 'migration-rfs' + def source_cdk_context = """ + { + "source-single-node-ec2": { + "suffix": "ec2-source-", + "networkStackSuffix": "ec2-source-", + "distVersion": "7.10.2", + "distributionUrl": "https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-oss-7.10.2-linux-x86_64.tar.gz", + "captureProxyEnabled": false, + "securityDisabled": true, + "minDistribution": false, + "cpuArch": "x64", + "isInternal": true, + "singleNodeCluster": true, + "networkAvailabilityZones": 2, + "dataNodeCount": 1, + "managerNodeCount": 0, + "serverAccessType": "ipv4", + "restrictServerAccessTo": "0.0.0.0/0" + } + } + """ + def migration_cdk_context = """ + { + "migration-rfs": { + "stage": "", + "vpcId": "", + "engineVersion": "OS_2.11", + "domainName": "os-cluster-", + "dataNodeCount": 2, + "openAccessPolicyEnabled": true, + "domainRemovalPolicy": "DESTROY", + "artifactBucketRemovalPolicy": "DESTROY", + "trafficReplayerServiceEnabled": false, + "reindexFromSnapshotServiceEnabled": true, + "sourceClusterEndpoint": "" + } + } + """ + + defaultIntegPipeline( + sourceContext: source_cdk_context, + migrationContext: migration_cdk_context, + sourceContextId: sourceContextId, + migrationContextId: migrationContextId, + defaultStageId: 'rfs-integ', + skipCaptureProxyOnNodeSetup: true, + jobName: 'rfs-default-e2e-test', + integTestCommand: '/root/lib/integ_test/integ_test/backfill_tests.py' + ) +} diff --git a/jenkins/migrationIntegPipelines/ec2SourceE2EPipeline.groovy b/vars/trafficReplayDefaultE2ETest.groovy similarity index 65% rename from jenkins/migrationIntegPipelines/ec2SourceE2EPipeline.groovy rename to vars/trafficReplayDefaultE2ETest.groovy index 5cf981ba0..26e44b2c7 100644 --- a/jenkins/migrationIntegPipelines/ec2SourceE2EPipeline.groovy +++ b/vars/trafficReplayDefaultE2ETest.groovy @@ -2,13 +2,10 @@ // 1. There is a still a manual step needed on the EC2 source load balancer to replace its security group rule which allows all traffic (0.0.0.0/0) to // allow traffic for the relevant service security group. This needs a better story around accepting user security groups in our Migration CDK. -def sourceContextId = 'source-single-node-ec2' -def migrationContextId = 'migration-default' -// These default values should only be used on the initial Jenkins run in order to load parameter options into the UI, -// all future runs should use the specified parameters -def gitBranch = params.GIT_BRANCH ?: 'main' -def gitUrl = params.GIT_REPO_URL ?: 'https://github.com/opensearch-project/opensearch-migrations.git' -def source_cdk_context = """ +def call(Map config = [:]) { + def sourceContextId = 'source-single-node-ec2' + def migrationContextId = 'migration-default' + def source_cdk_context = """ { "source-single-node-ec2": { "suffix": "ec2-source-", @@ -29,8 +26,8 @@ def source_cdk_context = """ "restrictServerAccessTo": "0.0.0.0/0" } } -""" -def migration_cdk_context = """ + """ + def migration_cdk_context = """ { "migration-default": { "stage": "", @@ -50,19 +47,17 @@ def migration_cdk_context = """ "migrationAPIEnabled": true } } -""" + """ -library identifier: "migrations-lib@${gitBranch}", retriever: modernSCM( - [$class: 'GitSCMSource', - remote: "${gitUrl}"]) - -defaultIntegPipeline( - sourceContext: source_cdk_context, - migrationContext: migration_cdk_context, - sourceContextId: sourceContextId, - migrationContextId: migrationContextId, - defaultStageId: 'aws-integ', - //deployStep: { - // echo 'Custom Test Step' - //} -) + defaultIntegPipeline( + sourceContext: source_cdk_context, + migrationContext: migration_cdk_context, + sourceContextId: sourceContextId, + migrationContextId: migrationContextId, + defaultStageId: 'aws-integ', + jobName: 'traffic-replay-default-e2e-test', + //deployStep: { + // echo 'Custom Test Step' + //} + ) +}