From 3df1e006bbac58df1f745799b5c96723d6aeffa6 Mon Sep 17 00:00:00 2001 From: Omar Khasawneh Date: Thu, 5 Oct 2023 13:50:57 -0500 Subject: [PATCH 01/10] Add link checker workflow Signed-off-by: Omar Khasawneh --- .github/workflows/linkCheck.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/linkCheck.yml diff --git a/.github/workflows/linkCheck.yml b/.github/workflows/linkCheck.yml new file mode 100644 index 000000000..c27978f6f --- /dev/null +++ b/.github/workflows/linkCheck.yml @@ -0,0 +1,24 @@ +name: Link Checker +on: + push: + branches: + - "*" + pull_request: + branches: + - "*" + +jobs: + linkchecker: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: lychee Link Checker + id: lychee + uses: lycheeverse/lychee-action@v1.5.0 + with: + args: --accept=200,403,429 "**/*.html" "**/*.md" "**/*.txt" "**/*.json" --exclude "file:///github/workspace/*" --exclude-mail + fail: true + env: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} \ No newline at end of file From 4dcd55b13e9e0ac3f5f25f6c11f754cd214fce09 Mon Sep 17 00:00:00 2001 From: Omar Khasawneh Date: Fri, 6 Oct 2023 11:24:49 -0500 Subject: [PATCH 02/10] exclude localhost links Signed-off-by: Omar Khasawneh --- .github/workflows/linkCheck.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linkCheck.yml b/.github/workflows/linkCheck.yml index c27978f6f..e66c2bb97 100644 --- a/.github/workflows/linkCheck.yml +++ b/.github/workflows/linkCheck.yml @@ -18,7 +18,7 @@ jobs: id: lychee uses: lycheeverse/lychee-action@v1.5.0 with: - args: --accept=200,403,429 "**/*.html" "**/*.md" "**/*.txt" "**/*.json" --exclude "file:///github/workspace/*" --exclude-mail + args: --accept=200,403,429 "**/*.html" "**/*.md" "**/*.txt" "**/*.json" --exclude "file:///github/workspace/*","http://localhost*","https://localhost*" --exclude-mail fail: true env: GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} \ No newline at end of file From 91a11656af7f3edab51a1b06d3d5590b823ef91a Mon Sep 17 00:00:00 2001 From: Omar Khasawneh Date: Fri, 6 Oct 2023 18:47:37 -0500 Subject: [PATCH 03/10] Fix incorrect links Signed-off-by: Omar Khasawneh --- .github/workflows/linkCheck.yml | 2 +- TrafficCapture/trafficReplayer/README.md | 15 +++++++-------- deployment/README.md | 2 -- deployment/copilot/README.md | 2 +- experimental/knowledge_base/README.md | 2 +- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/.github/workflows/linkCheck.yml b/.github/workflows/linkCheck.yml index e66c2bb97..7b5dd915f 100644 --- a/.github/workflows/linkCheck.yml +++ b/.github/workflows/linkCheck.yml @@ -18,7 +18,7 @@ jobs: id: lychee uses: lycheeverse/lychee-action@v1.5.0 with: - args: --accept=200,403,429 "**/*.html" "**/*.md" "**/*.txt" "**/*.json" --exclude "file:///github/workspace/*","http://localhost*","https://localhost*" --exclude-mail + args: --verbose --accept=200,403,429 "**/*.html" "**/*.md" "**/*.txt" "**/*.json" --exclude "file:///github/workspace/*","http://localhost*","https://localhost*" --exclude-mail fail: true env: GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} \ No newline at end of file diff --git a/TrafficCapture/trafficReplayer/README.md b/TrafficCapture/trafficReplayer/README.md index 3b7b2ab17..c017deea8 100644 --- a/TrafficCapture/trafficReplayer/README.md +++ b/TrafficCapture/trafficReplayer/README.md @@ -11,10 +11,9 @@ encoded [TrafficStream](../captureProtobufs/src/main/proto/TrafficCaptureStream. Currently, these TrafficStream objects are ingested via stdin and are reconstructed into entire traffic channels. This involves some buffering for those connections whose contents are divided into a number of TrafficStream objects. Read and write observations are extracted from TrafficStream objects into source requests and source responses. -The [TrafficCaptureToHttpTransactionAccumulator](src/main/java/org/opensearch/migrations/replay/TrafficCaptureToHttpTransactionAccumulator.java) +The [CapturedTrafficToHttpTransactionAccumulator](src/main/java/org/opensearch/migrations/replay/CapturedTrafficToHttpTransactionAccumulator.java) takes full requests (as defined by the data, not necessarily by the HTTP format) and sends them to -an [IPacketToHttpHandler]( -src/main/java/org/opensearch/migrations/replay/datahandlers/IPacketToHttpHandler.java). The packet handler is +an [IPacketConsumer](src/main/java/org/opensearch/migrations/replay/datahandlers/IPacketConsumer.java). The packet handler is responsible for doing any transformation of the request and sending it to the target server. It is also responsible for aggregating the HTTP response from the server and returning that as a CompletableFuture via finalizeRequest(). @@ -25,9 +24,9 @@ other pertinent information is sent to stdout. ## The Netty Request Transformation Pipeline There are two implementations of -IPacketToHttpHandler, [NettyPacketToHttpHandler](../trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/NettyPacketToHttpHandler.java), +IPacketToHttpHandler, [NettyPacketToHttpConsumer](../trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/NettyPacketToHttpConsumer.java), which will send packets to the target server -and [HttpJsonTransformer](../trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonTransformer.java) +and [HttpJsonTransformingConsumer](./src/main/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonTransformingConsumer.java) that is capable of transforming the message as per directives passed to the JsonTransformer. Examples of transformations that the HttpJsonTransfomer needs to run include mapping the host header to match the new @@ -40,9 +39,9 @@ over the constructed JSON document. Since payloads can be arbitrarily complex (compression, chunking), and may not be subject to any changes via the transformation rules, the HttpJsonTransformer creates the channel pipeline *only* to parse the HTTP headers. The transformation is run on this partial, in-construction -[HttpJsonMessageWithFaultablePayload](../trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonMessageWithFaultablePayload.java) +[HttpJsonMessageWithFaultablePayload](./src/main/java/org/opensearch/migrations/replay/datahandlers/http/HttpJsonMessageWithFaultingPayload.java) message. When the transformation (or any other code), attempts to access the payload contents, it will throw a -[PayloadNotLoadedException](TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/datahandlers/PayloadNotLoadedException.java) +[PayloadNotLoadedException](./src/main/java/org/opensearch/migrations/replay/datahandlers/PayloadNotLoadedException.java) exception. That exception triggers the HttpJsonTransformer to add channel handlers to the pipeline to parse the HTTP content stream into JSON, transform it, and to repackage it as the HTTP headers indicate, observing the content-encoding (gzip, etc) and transfer-encoding (chunked)/content-length values. Fixed length streams will be used by default, with @@ -78,7 +77,7 @@ target URI. ## Transformations Transformations are performed via a simple class defined by -[JsonTransformer](../trafficReplayer/src/main/java/org/opensearch/migrations/transform/JsonTransformer.java). Currently, +[IJsonTransformer](../trafficReplayer/src/main/java/org/opensearch/migrations/transform/IJsonTransformer.java). Currently, this class uses [JOLT](https://github.com/bazaarvoice/jolt) to perform transforms that are composed of modular operations that are defined in the [resources](../trafficReplayer/src/main/resources/jolt/operations) associated with the package. Future work will include adding more JSON transformations and other potential JSON transformation tools diff --git a/deployment/README.md b/deployment/README.md index 2117104db..d72612620 100644 --- a/deployment/README.md +++ b/deployment/README.md @@ -1,8 +1,6 @@ ### Deployment This directory is aimed at housing deployment/distribution methods for various migration related images and infrastructure. It is not specific to any given platform and should be expanded to more platforms as needed. -It is worth noting that there is not a hard divide between these subdirectories and deployments such as [opensearch-service-migration](./cdk/opensearch-service-migration) will use Dockerfiles in the [docker](./docker) directory for some of its container deployments. - ### Deploying Migration solution to AWS diff --git a/deployment/copilot/README.md b/deployment/copilot/README.md index 7f792eebe..1dff52bae 100644 --- a/deployment/copilot/README.md +++ b/deployment/copilot/README.md @@ -193,7 +193,7 @@ copilot svc exec -a migration-copilot -e dev -n migration-console -c "bash" ### Addons -Addons are a Copilot concept for adding additional AWS resources outside the core ECS resources that it sets up. An example of this can be seen in the [traffic-replayer](traffic-replayer/addons/taskRole.yml) service which has an `addons` directory and yaml file which adds an IAM ManagedPolicy to the task role that Copilot creates for the service. This added policy is to allow communication with MSK. +Addons are a Copilot concept for adding additional AWS resources outside the core ECS resources that it sets up. An example of this can be seen in the `traffic-replayer/addons/taskRole.yml` service which has an `addons` directory and yaml file which adds an IAM ManagedPolicy to the task role that Copilot creates for the service. This added policy is to allow communication with MSK. Official documentation on Addons can be found [here](https://aws.github.io/copilot-cli/docs/developing/addons/workload/). diff --git a/experimental/knowledge_base/README.md b/experimental/knowledge_base/README.md index db84c4341..41e3078f5 100644 --- a/experimental/knowledge_base/README.md +++ b/experimental/knowledge_base/README.md @@ -2,6 +2,6 @@ The Knowledge Base is the collection of expectations we have about the behavior of clusters of various versions and across upgrades. -It can be used in various ways, but was designed to be a component of the Upgrade Testing Framework. There is more extensive documentation of how it works in [experimental/upgrades/README.md](../experimental/upgrades/README.md). +It can be used in various ways, but was designed to be a component of the Upgrade Testing Framework. There is more extensive documentation of how it works in [experimental/upgrades/README.md](../upgrades/README.md). Further discussion on adding additional expectations to this knowledge base, can be found in the PR [here](https://github.com/opensearch-project/opensearch-migrations/pull/68) From 0390e7a5ef21b2455c0be2837c7863bdf01ad5ac Mon Sep 17 00:00:00 2001 From: Omar Khasawneh Date: Fri, 6 Oct 2023 18:54:19 -0500 Subject: [PATCH 04/10] fix exclusions Signed-off-by: Omar Khasawneh --- .github/workflows/linkCheck.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/linkCheck.yml b/.github/workflows/linkCheck.yml index 7b5dd915f..887da9d83 100644 --- a/.github/workflows/linkCheck.yml +++ b/.github/workflows/linkCheck.yml @@ -18,7 +18,11 @@ jobs: id: lychee uses: lycheeverse/lychee-action@v1.5.0 with: - args: --verbose --accept=200,403,429 "**/*.html" "**/*.md" "**/*.txt" "**/*.json" --exclude "file:///github/workspace/*","http://localhost*","https://localhost*" --exclude-mail + args: --verbose --accept=200,403,429 "**/*.html" "**/*.md" "**/*.txt" "**/*.json" + --exclude "file:///github/workspace/*" + --exclude "http://localhost*" + --exclude "https://localhost*" + --exclude-mail fail: true env: GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} \ No newline at end of file From 09c2b3e637223955ff9c4fae27289d1caf611ad6 Mon Sep 17 00:00:00 2001 From: Omar Khasawneh Date: Fri, 6 Oct 2023 19:07:15 -0500 Subject: [PATCH 05/10] missed a link Signed-off-by: Omar Khasawneh --- SECURITY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SECURITY.md b/SECURITY.md index b6b995583..6ac75a70b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,7 +14,7 @@ When configuring the Traffic Replayer, you can provide a set of credentials that In this case, an attacker could send a series of large and potentially malformed requests to the source cluster via the capture proxy. These messages would be relayed to the Kafka cluster, and if they were PUT/POST/UPDATE requests, block sending the request to the source cluster until the message was finished. If the attacker is able to use this strategy to tie up the proxy and/or Kafka cluster, all other incoming mutating requests to the source cluster would be blocked. -We have partially mitigated this by preventing the proxy from blocking for more than a fixed period of time (10 seconds by default, configurable [here](./TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/Main.java#L182)), however the flow of messages to Kafka could still be disrupted. +We have partially mitigated this by preventing the proxy from blocking for more than a fixed period of time (10 seconds by default, configurable [here](./TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/Main.java#L182) on, however the flow of messages to Kafka could still be disrupted. If you are concerned about this scenario, we recommend fully mitigating it by putting a load-shedder in front of the proxy. From bce2d9bb03d247c6f0ef599d55a9bc9d7fb151e5 Mon Sep 17 00:00:00 2001 From: Omar Khasawneh Date: Fri, 6 Oct 2023 19:09:25 -0500 Subject: [PATCH 06/10] fixed link Signed-off-by: Omar Khasawneh --- SECURITY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SECURITY.md b/SECURITY.md index 6ac75a70b..3f6396fdc 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,7 +14,7 @@ When configuring the Traffic Replayer, you can provide a set of credentials that In this case, an attacker could send a series of large and potentially malformed requests to the source cluster via the capture proxy. These messages would be relayed to the Kafka cluster, and if they were PUT/POST/UPDATE requests, block sending the request to the source cluster until the message was finished. If the attacker is able to use this strategy to tie up the proxy and/or Kafka cluster, all other incoming mutating requests to the source cluster would be blocked. -We have partially mitigated this by preventing the proxy from blocking for more than a fixed period of time (10 seconds by default, configurable [here](./TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/Main.java#L182) on, however the flow of messages to Kafka could still be disrupted. +We have partially mitigated this by preventing the proxy from blocking for more than a fixed period of time (10 seconds by default, configurable [here](./TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/Main.java) line #182, however the flow of messages to Kafka could still be disrupted. If you are concerned about this scenario, we recommend fully mitigating it by putting a load-shedder in front of the proxy. From a733785132d912f6c5b472edde0e6abb87a88fb6 Mon Sep 17 00:00:00 2001 From: Omar Khasawneh Date: Fri, 6 Oct 2023 19:14:08 -0500 Subject: [PATCH 07/10] fixed link after pulling latest Signed-off-by: Omar Khasawneh --- SECURITY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SECURITY.md b/SECURITY.md index 3f6396fdc..241406af7 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,7 +14,7 @@ When configuring the Traffic Replayer, you can provide a set of credentials that In this case, an attacker could send a series of large and potentially malformed requests to the source cluster via the capture proxy. These messages would be relayed to the Kafka cluster, and if they were PUT/POST/UPDATE requests, block sending the request to the source cluster until the message was finished. If the attacker is able to use this strategy to tie up the proxy and/or Kafka cluster, all other incoming mutating requests to the source cluster would be blocked. -We have partially mitigated this by preventing the proxy from blocking for more than a fixed period of time (10 seconds by default, configurable [here](./TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/Main.java) line #182, however the flow of messages to Kafka could still be disrupted. +We have partially mitigated this by preventing the proxy from blocking for more than a fixed period of time (10 seconds by default, configurable [here](./TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/CaptureProxy.java) on line #182, however the flow of messages to Kafka could still be disrupted. If you are concerned about this scenario, we recommend fully mitigating it by putting a load-shedder in front of the proxy. From 6d74c02339903cbb969eb30b778aded86dd20d70 Mon Sep 17 00:00:00 2001 From: Omar Khasawneh Date: Fri, 6 Oct 2023 21:23:02 -0500 Subject: [PATCH 08/10] minor changes according to feedback Signed-off-by: Omar Khasawneh --- SECURITY.md | 2 +- deployment/copilot/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 241406af7..0ba1e6c9e 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,7 +14,7 @@ When configuring the Traffic Replayer, you can provide a set of credentials that In this case, an attacker could send a series of large and potentially malformed requests to the source cluster via the capture proxy. These messages would be relayed to the Kafka cluster, and if they were PUT/POST/UPDATE requests, block sending the request to the source cluster until the message was finished. If the attacker is able to use this strategy to tie up the proxy and/or Kafka cluster, all other incoming mutating requests to the source cluster would be blocked. -We have partially mitigated this by preventing the proxy from blocking for more than a fixed period of time (10 seconds by default, configurable [here](./TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/CaptureProxy.java) on line #182, however the flow of messages to Kafka could still be disrupted. +We have partially mitigated this by preventing the proxy from blocking for more than a fixed period of time (10 seconds by default, configurable in this file [here](./TrafficCapture/trafficCaptureProxyServer/src/main/java/org/opensearch/migrations/trafficcapture/proxyserver/CaptureProxy.java), however the flow of messages to Kafka could still be disrupted. If you are concerned about this scenario, we recommend fully mitigating it by putting a load-shedder in front of the proxy. diff --git a/deployment/copilot/README.md b/deployment/copilot/README.md index 1dff52bae..083ec3440 100644 --- a/deployment/copilot/README.md +++ b/deployment/copilot/README.md @@ -193,7 +193,7 @@ copilot svc exec -a migration-copilot -e dev -n migration-console -c "bash" ### Addons -Addons are a Copilot concept for adding additional AWS resources outside the core ECS resources that it sets up. An example of this can be seen in the `traffic-replayer/addons/taskRole.yml` service which has an `addons` directory and yaml file which adds an IAM ManagedPolicy to the task role that Copilot creates for the service. This added policy is to allow communication with MSK. +Addons are a Copilot concept for adding additional AWS resources outside the core ECS resources that it sets up. An example of this can be seen in the `traffic-replayer/addons/taskRole.yml` (this file would only exist after deploying) service which has an `addons` directory and yaml file which adds an IAM ManagedPolicy to the task role that Copilot creates for the service. This added policy is to allow communication with MSK. Official documentation on Addons can be found [here](https://aws.github.io/copilot-cli/docs/developing/addons/workload/). From 8cb883fbabe9a8c2d459a94d91cd5168022c1d21 Mon Sep 17 00:00:00 2001 From: Omar Khasawneh Date: Fri, 6 Oct 2023 21:44:49 -0500 Subject: [PATCH 09/10] minor changes according to feedback Signed-off-by: Omar Khasawneh --- deployment/copilot/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/deployment/copilot/README.md b/deployment/copilot/README.md index 083ec3440..8712ed652 100644 --- a/deployment/copilot/README.md +++ b/deployment/copilot/README.md @@ -193,7 +193,11 @@ copilot svc exec -a migration-copilot -e dev -n migration-console -c "bash" ### Addons -Addons are a Copilot concept for adding additional AWS resources outside the core ECS resources that it sets up. An example of this can be seen in the `traffic-replayer/addons/taskRole.yml` (this file would only exist after deploying) service which has an `addons` directory and yaml file which adds an IAM ManagedPolicy to the task role that Copilot creates for the service. This added policy is to allow communication with MSK. +Addons are a Copilot concept for adding additional AWS resources outside the core ECS resources that it sets up. + +An example of this can be seen in the `traffic-replayer/addons/taskRole.yml` service which has an `addons` directory and yaml file. + +That yaml file adds an IAM ManagedPolicy to the task role that Copilot creates for the service. This added policy is to allow communication with MSK. (Note that `taskRole.yml` would only exist after building.) Official documentation on Addons can be found [here](https://aws.github.io/copilot-cli/docs/developing/addons/workload/). From 68c5f4660523aadb53ffe7a0bb2308fb928748bf Mon Sep 17 00:00:00 2001 From: Omar Khasawneh Date: Fri, 6 Oct 2023 21:54:33 -0500 Subject: [PATCH 10/10] would to will Signed-off-by: Omar Khasawneh --- deployment/copilot/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/copilot/README.md b/deployment/copilot/README.md index 8712ed652..df5c87ec7 100644 --- a/deployment/copilot/README.md +++ b/deployment/copilot/README.md @@ -197,7 +197,7 @@ Addons are a Copilot concept for adding additional AWS resources outside the cor An example of this can be seen in the `traffic-replayer/addons/taskRole.yml` service which has an `addons` directory and yaml file. -That yaml file adds an IAM ManagedPolicy to the task role that Copilot creates for the service. This added policy is to allow communication with MSK. (Note that `taskRole.yml` would only exist after building.) +That yaml file adds an IAM ManagedPolicy to the task role that Copilot creates for the service. This added policy is to allow communication with MSK. (Note that `taskRole.yml` will only exist after building.) Official documentation on Addons can be found [here](https://aws.github.io/copilot-cli/docs/developing/addons/workload/).