Skip to content

Commit

Permalink
Merge pull request #409 from opensearch-project/remove_jupyter_and_co…
Browse files Browse the repository at this point in the history
…mparator

Remove Comparator and Jupyter since it will be replaced by OTEL and Analytics Engine
  • Loading branch information
sumobrian committed Nov 12, 2023
2 parents 8710f72 + 0de0a3e commit b011fd2
Show file tree
Hide file tree
Showing 15 changed files with 15 additions and 356 deletions.
10 changes: 0 additions & 10 deletions TrafficCapture/dockerSolution/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,6 @@ down again.
Notice that most of the Dockerfiles are dynamically constructed in the build hierarchy. Some efforts have been made
to ensure that changes will make it into containers to be launched.

If a user wants to use their own checkout of the traffic-comparator repo, just set the environment variable "
TRAFFIC_COMPARATOR_DIRECTORY" to the directory that contains `setup.py`. Otherwise, if that isn't set, the traffic
comparator repo will be checked out to the build directory and that will be used. Notice that the checkout happens when
the directory wasn't present and there wasn't an environment variable specifying a directory. Once a directory exists,
it will be mounted to the traffic-comparator and jupyter services.

Netcat is still used to connect several of the components and we're still working on improving the resiliency story
between these containers. The long term approach is to replace fixed streams with message bus approaches directly (i.e.
Kafka). In the short term, we can and are beginning, to leverage things like conditions on dependent services.

### Running the Docker Solution

While in the TrafficCapture directory, run the following command:
Expand Down
64 changes: 1 addition & 63 deletions TrafficCapture/dockerSolution/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,6 @@ import java.security.MessageDigest
import com.bmuschko.gradle.docker.tasks.image.DockerBuildImage
import org.apache.tools.ant.taskdefs.condition.Os

def getTrafficComparatorDirectory() {
String overrideTrafficComparatorDirectory = System.getenv(TRAFFIC_COMPARATOR_DIRECTORY_ENV)
String rval = overrideTrafficComparatorDirectory != null ?
overrideTrafficComparatorDirectory : TRAFFIC_COMPARATOR_REPO_DIRECTORY;
return rval
}

ext {
TRAFFIC_COMPARATOR_REPO_DIRECTORY = "build/traffic-comparator"
TRAFFIC_COMPARATOR_DIRECTORY_ENV = "TRAFFIC_COMPARATOR_DIRECTORY"
REALIZED_TRAFFIC_COMPARATOR_DIRECTORY = project.file(getTrafficComparatorDirectory())
}

def calculateDockerHash = { projectName ->
CommonUtils.calculateDockerHash(projectName, project)
}
Expand All @@ -31,17 +18,6 @@ dependencies {
implementation project(':trafficReplayer')
}

task cloneComparatorRepoIfNeeded(type: Exec) {
String comparatorDirectory = project.file(REALIZED_TRAFFIC_COMPARATOR_DIRECTORY);
String repo = 'https://github.com/opensearch-project/traffic-comparator.git'
onlyIf {
!(new File(comparatorDirectory).exists())
}
commandLine = Os.isFamily(Os.FAMILY_WINDOWS) ?
['git', 'clone', repo, TRAFFIC_COMPARATOR_REPO_DIRECTORY ] :
['/bin/sh', '-c', "git clone ${repo} ${TRAFFIC_COMPARATOR_REPO_DIRECTORY}"]
}

def dockerFilesForExternalServices = [
"elasticsearchWithSearchGuard": "elasticsearch_searchguard",
"migrationConsole": "migration_console"
Expand All @@ -56,36 +32,6 @@ dockerFilesForExternalServices.each { projectName, dockerImageName ->
}
}

def trafficComparatorServices = [
"trafficComparator": "traffic_comparator",
"jupyterNotebook": "jupyter_notebook"
]
trafficComparatorServices.forEach {projectName, dockerImageName ->
def dockerBuildDir = "build/docker/${projectName}"
task("copyArtifact_${projectName}", type: Copy) {
dependsOn(tasks.getByName('cloneComparatorRepoIfNeeded'))
from REALIZED_TRAFFIC_COMPARATOR_DIRECTORY
into dockerBuildDir
include '*.py'
include '/traffic_comparator/*'
if (projectName == 'jupyterNotebook') {
include '*.ipynb'
}
}

task "createDockerfile_${projectName}"(type: com.bmuschko.gradle.docker.tasks.image.Dockerfile) {
dependsOn "copyArtifact_${projectName}"
destFile = project.file("${dockerBuildDir}/Dockerfile")
from 'python:3.10.10'
runCommand("apt-get update && apt-get install -y netcat lsof")
copyFile("setup.py", "/setup.py")
copyFile(".", "/containerTC/")
runCommand("pip3 install --editable \".[data]\"")
// container stay-alive
defaultCommand('tail', '-f', '/dev/null')
}
}

def javaContainerServices = [
"trafficCaptureProxyServer": "capture_proxy",
"trafficReplayer": "traffic_replayer"
Expand All @@ -101,7 +47,7 @@ javaContainerServices.each { projectName, dockerImageName ->
CommonUtils.createDockerfile(project, projectName, baseImageProjectOverrides, dockerFilesForExternalServices)
}

(javaContainerServices + trafficComparatorServices).forEach { projectName, dockerImageName ->
javaContainerServices.forEach { projectName, dockerImageName ->
def dockerBuildDir = "build/docker/${projectName}"
task "buildDockerImage_${projectName}"(type: DockerBuildImage) {
dependsOn "createDockerfile_${projectName}"
Expand All @@ -112,11 +58,6 @@ javaContainerServices.each { projectName, dockerImageName ->
}

dockerCompose {
String overrideTrafficComparatorDirectory = System.getenv(TRAFFIC_COMPARATOR_DIRECTORY_ENV)
if (overrideTrafficComparatorDirectory == null) {
environment.put(TRAFFIC_COMPARATOR_DIRECTORY_ENV, REALIZED_TRAFFIC_COMPARATOR_DIRECTORY)
exposeAsEnvironment(this)
}
useComposeFiles = project.hasProperty('multiProxy') ?
['src/main/docker/docker-compose.yml', 'src/main/docker/docker-compose-multi.yml'] :
['src/main/docker/docker-compose.yml', 'src/main/docker/docker-compose-single.yml']
Expand All @@ -128,10 +69,7 @@ task buildDockerImages {

dependsOn buildDockerImage_trafficCaptureProxyServer
dependsOn buildDockerImage_trafficReplayer
dependsOn buildDockerImage_trafficComparator
dependsOn buildDockerImage_jupyterNotebook
}

tasks.getByName('composeUp')
.dependsOn(tasks.getByName('buildDockerImages'))
.dependsOn(tasks.getByName('cloneComparatorRepoIfNeeded'))
31 changes: 1 addition & 30 deletions TrafficCapture/dockerSolution/src/main/docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ services:
condition: service_started
opensearchtarget:
condition: service_started
trafficcomparator:
condition: service_healthy
command: /bin/sh -c "/runJavaWithClasspath.sh org.opensearch.migrations.replay.TrafficReplayer https://opensearchtarget:9200 --auth-header-value Basic\\ YWRtaW46YWRtaW4= --insecure --kafka-traffic-brokers kafka:9092 --kafka-traffic-topic logging-traffic-topic --kafka-traffic-group-id default-logging-group | nc trafficcomparator 9220"
command: /bin/sh -c "/runJavaWithClasspath.sh org.opensearch.migrations.replay.TrafficReplayer https://opensearchtarget:9200 --auth-header-value Basic\\ YWRtaW46YWRtaW4= --insecure --kafka-traffic-brokers kafka:9092 --kafka-traffic-topic logging-traffic-topic --kafka-traffic-group-id default-logging-group"

opensearchtarget:
image: 'opensearchproject/opensearch:latest'
Expand All @@ -56,33 +54,6 @@ services:
ports:
- "29200:9200"

trafficcomparator:
image: 'migrations/traffic_comparator:latest'
networks:
- migrations
ports:
- "9220:9220"
healthcheck:
test: "lsof -i -P -n"
volumes:
- ${TRAFFIC_COMPARATOR_DIRECTORY}:/trafficComparator
- sharedComparatorSqlResults:/shared
command: /bin/sh -c "cd trafficComparator && pip3 install --editable . && nc -v -l -p 9220 | tee /dev/stderr | trafficcomparator -vv stream | trafficcomparator dump-to-sqlite --db /shared/comparisons.db"

jupyter-notebook:
image: 'migrations/jupyter_notebook:latest'
networks:
- migrations
ports:
- "8888:8888"
volumes:
- ${TRAFFIC_COMPARATOR_DIRECTORY}:/trafficComparator
- sharedComparatorSqlResults:/shared
environment:
# this needs to match the output db that traffic_comparator writes to
- COMPARISONS_DB_LOCATION=/shared/comparisons.db
command: /bin/sh -c 'cd trafficComparator && pip3 install --editable ".[data]" && jupyter notebook --ip=0.0.0.0 --port=8888 --no-browser --allow-root'

migration-console:
image: 'migrations/migration_console:latest'
networks:
Expand Down
2 changes: 1 addition & 1 deletion TrafficCapture/trafficReplayer/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# HTTP Traffic Replayer

This package consumes streams of IP packets that were previously recorded and replays the requests to another HTTP
server, recording the packet traffic of the new interactions for future analysis (see the Comparator tools).
server, recording the packet traffic of the new interactions for future analysis.

## Overview

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.opensearch.migrations.replay.datatypes.UniqueSourceRequestKey;

import java.io.IOException;
import java.io.OutputStream;
import java.io.SequenceInputStream;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
Expand Down Expand Up @@ -55,11 +54,9 @@ public void close() {
}

public static class TupleToFileWriter implements Consumer<SourceTargetCaptureTuple> {
OutputStream outputStream;
Logger tupleLogger = LogManager.getLogger("OutputTupleJsonLogger");

public TupleToFileWriter(OutputStream outputStream){
this.outputStream = outputStream;
public TupleToFileWriter(){
}

private JSONObject jsonFromHttpDataUnsafe(List<byte[]> data) throws IOException {
Expand Down Expand Up @@ -169,8 +166,6 @@ public void accept(SourceTargetCaptureTuple triple) {
JSONObject jsonObject = toJSONObject(triple);

tupleLogger.info(jsonObject.toString());
outputStream.write((jsonObject.toString()+"\n").getBytes(StandardCharsets.UTF_8));
outputStream.flush();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,8 @@
import software.amazon.awssdk.regions.Region;

import javax.net.ssl.SSLException;
import java.io.BufferedOutputStream;
import java.io.EOFException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.lang.ref.WeakReference;
import java.net.URI;
import java.time.Duration;
Expand Down Expand Up @@ -224,12 +221,6 @@ static class Parameters {
arity=1,
description = "input file to read the request/response traces for the source cluster")
String inputFilename;
@Parameter(required = false,
names = {"-o", "--output"},
arity=1,
description = "output file to hold the request/response traces for the source and target cluster")
String outputFilename;

@Parameter(required = false,
names = {"-t", PACKET_TIMEOUT_SECONDS_PARAMETER_NAME},
arity = 1,
Expand Down Expand Up @@ -325,17 +316,15 @@ public static void main(String[] args)
return;
}

try (OutputStream outputStream = params.outputFilename == null ? System.out :
new FileOutputStream(params.outputFilename, true);
var bufferedOutputStream = new BufferedOutputStream(outputStream);
try (
var blockingTrafficStream = TrafficCaptureSourceFactory.createTrafficCaptureSource(params,
Duration.ofSeconds(params.lookaheadTimeSeconds));
var authTransformer = buildAuthTransformerFactory(params))
{
var tr = new TrafficReplayer(uri, params.transformerConfig, authTransformer,
params.allowInsecureConnections, params.numClientThreads, params.maxConcurrentRequests);
setupShutdownHookForReplayer(tr);
var tupleWriter = new SourceTargetCaptureTuple.TupleToFileWriter(bufferedOutputStream);
var tupleWriter = new SourceTargetCaptureTuple.TupleToFileWriter();
var timeShifter = new TimeShifter(params.speedupFactor);
tr.setupRunAndWaitForReplayWithShutdownChecks(Duration.ofSeconds(params.observedPacketConnectionTimeout),
blockingTrafficStream, timeShifter, tupleWriter);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
* transformation and the headers present (such as for gzipped or chunked encodings).
*
* There will be a number of reasons that we need to reuse the source captured packets - both today
* (for the output to the comparator) and in the future (for retrying transient network errors or
* (for analysing and comparing) and in the future (for retrying transient network errors or
* transformation errors). With that in mind, the HttpJsonTransformer now keeps track of all of
* the ByteBufs passed into it and can redrive them through the underlying network packet handler.
* Cases where that would happen with this edit are where the payload wasn't being modified, nor
Expand Down Expand Up @@ -58,8 +58,8 @@ public class HttpJsonTransformingConsumer<R> implements IPacketFinalizingConsume
private final List<List<Integer>> chunkSizes;
// This is here for recovery, in case anything goes wrong with a transformation & we want to
// just dump it directly. Notice that we're already storing all of the bytes until the response
// comes back so that we can format the output that goes to the comparator. These should be
// backed by the exact same byte[] arrays, so the memory consumption should already be absorbed.
// comes back so that we can format the output. These should be backed by the exact same
// byte[] arrays, so the memory consumption should already be absorbed.
private final List<ByteBuf> chunks;

public HttpJsonTransformingConsumer(IJsonTransformer transformer,
Expand Down
2 changes: 1 addition & 1 deletion deployment/cdk/opensearch-service-migration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ This directory contains the infrastructure-as-code CDK solution for deploying an
###### Docker
Docker is used by CDK to build container images. If not installed, follow the steps [here](https://docs.docker.com/engine/install/) to set up. Later versions are recommended.
###### Git
Git is used by the opensearch-migrations repo to fetch associated repositories (such as the traffic-comparator repo) for constructing their respective Dockerfiles. Steps to set up can be found [here](https://github.com/git-guides/install-git).
Git is used by the opensearch-migrations repo to fetch associated repositories. Steps to set up can be found [here](https://github.com/git-guides/install-git).
###### Java 11
Java is used by the opensearch-migrations repo and Gradle, its associated build tool. The current required version is Java 11.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import {StringParameter} from "aws-cdk-lib/aws-ssm";

export interface MigrationStackProps extends StackPropsExt {
readonly vpc: IVpc,
readonly trafficComparatorEnabled: boolean,
// Future support needed to allow importing an existing MSK cluster
readonly mskImportARN?: string,
readonly mskEnablePublicEndpoints?: boolean,
Expand Down Expand Up @@ -127,30 +126,6 @@ export class MigrationAssistanceStack extends Stack {
stringValue: mskCluster.clusterName
});

if (props.trafficComparatorEnabled) {
const comparatorSQLiteSG = new SecurityGroup(this, 'comparatorSQLiteSG', {
vpc: props.vpc,
allowAllOutbound: false,
});
comparatorSQLiteSG.addIngressRule(comparatorSQLiteSG, Port.allTraffic());
new StringParameter(this, 'SSMParameterComparatorSQLAccessGroupId', {
description: 'OpenSearch migration parameter for Comparator SQL volume access security group id',
parameterName: `/migration/${props.stage}/${props.defaultDeployId}/comparatorSQLAccessSecurityGroupId`,
stringValue: comparatorSQLiteSG.securityGroupId
});

// Create an EFS file system for the traffic-comparator
const comparatorSQLiteEFS = new FileSystem(this, 'comparatorSQLiteEFS', {
vpc: props.vpc,
securityGroup: comparatorSQLiteSG
});
new StringParameter(this, 'SSMParameterComparatorSQLVolumeEFSId', {
description: 'OpenSearch migration parameter for Comparator SQL EFS filesystem id',
parameterName: `/migration/${props.stage}/${props.defaultDeployId}/comparatorSQLVolumeEFSId`,
stringValue: comparatorSQLiteEFS.fileSystemId
});
}

const replayerOutputSG = new SecurityGroup(this, 'replayerOutputSG', {
vpc: props.vpc,
allowAllOutbound: false,
Expand Down

This file was deleted.

Loading

0 comments on commit b011fd2

Please sign in to comment.