diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 616b62a30..07fd960c4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,71 +20,131 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - name: Prepare Mock server for SCE ApiRequestEnrichmentIntegrationTest (launch in background) run: python integration-tests/sce-api-lookup-test.py 8001 & - - name: Prepare Postgres for SCE SqlLookupEnrichmentIntegrationTest (create entities) - run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/sce-sql-enrichment-test.sql - env: - PGPASSWORD: supersecret1 - name: Prepare Mock server for BE ApiRequestEnrichmentIntegrationTest (launch in background) run: python integration-tests/beam-api-lookup-test.py & - - name: Prepare Postgres for BE SqlLookupEnrichmentIntegrationTest (create entities) - run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/beam-sql-enrichment-test.sql + - name: Prepare Postgres for SCE SqlLookupEnrichmentIntegrationTest (create entities) + run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/sce-sql-enrichment-test.sql env: PGPASSWORD: supersecret1 - name: Run tests - run: sbt coverage +test coverageReport + run: sbt "project common" +test env: OER_KEY: ${{ secrets.OER_KEY }} - - name: Aggregate coverage data + - name: Check Scala formatting if: ${{ always() }} - run: sbt coverageAggregate - - name: Submit coveralls data + run: sbt scalafmtCheck + - name: Check assets can be published if: ${{ always() }} - run: sbt coveralls - env: - COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + run: sbt publishLocal - deploy: + deploy_stream: needs: test if: startsWith(github.ref, 'refs/tags/') runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - name: Compare SBT version with git tag run: .github/check_tag.sh ${GITHUB_REF##*/} + - name: Test stream enrich + run: sbt "project stream" test - name: Docker login run: docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD env: DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} - - name: Build and publish Beam Docker images - run: sbt "project beam" docker:publish - name: Build and publish Stream Kinesis Docker image + if: ${{ always() }} run: sbt "project kinesis" docker:publish - name: Build and publish Stream Kafka Docker image + if: ${{ always() }} run: sbt "project kafka" docker:publish - - name: Build and publish Stream NSQ Docker images + - name: Build and publish Stream NSQ Docker image + if: ${{ always() }} run: sbt "project nsq" docker:publish + deploy_fs2: + needs: test + if: startsWith(github.ref, 'refs/tags/') + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + - name: Compare SBT version with git tag + run: .github/check_tag.sh ${GITHUB_REF##*/} + - name: Test FS2 enrich + run: sbt "project fs2" test + - name: Docker login + run: docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD + env: + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + - name: Build and publish Stream NH Docker image + run: sbt "project fs2" docker:publish + + deploy_beam: + needs: test + if: startsWith(github.ref, 'refs/tags/') + runs-on: ubuntu-latest + + services: + postgres: + image: postgres + ports: + - 5432:5432 + env: + POSTGRES_USER: enricher + POSTGRES_PASSWORD: supersecret1 + POSTGRES_DB: sql_enrichment_test + POSTGRES_PORT: 5432 + options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 + + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + - name: Compare SBT version with git tag + run: .github/check_tag.sh ${GITHUB_REF##*/} + - name: Docker login + run: docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD + env: + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + - name: Prepare Mock server for BE ApiRequestEnrichmentIntegrationTest (launch in background) + run: python integration-tests/beam-api-lookup-test.py & + - name: Prepare Postgres for BE SqlLookupEnrichmentIntegrationTest (create entities) + run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/beam-sql-enrichment-test.sql + env: + PGPASSWORD: supersecret1 + - name: Test Beam enrich + run: sbt "project beam" test + - name: Build and publish Beam Docker images + run: sbt "project beam" docker:publish + deploy_sce: needs: test if: startsWith(github.ref, 'refs/tags/') runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - name: Compare SBT version with git tag run: .github/check_tag.sh ${GITHUB_REF##*/} - name: Deploy SCE on Bintray Maven and Maven Central @@ -94,3 +154,52 @@ jobs: SONA_PASS: ${{ secrets.SONA_PASS }} BINTRAY_SNOWPLOW_MAVEN_USER: ${{ secrets.BINTRAY_SNOWPLOW_MAVEN_USER }} BINTRAY_SNOWPLOW_MAVEN_API_KEY: ${{ secrets.BINTRAY_SNOWPLOW_MAVEN_API_KEY }} + + coverage: + needs: test + runs-on: ubuntu-latest + + services: + postgres: + image: postgres + ports: + - 5432:5432 + env: + POSTGRES_USER: enricher + POSTGRES_PASSWORD: supersecret1 + POSTGRES_DB: sql_enrichment_test + POSTGRES_PORT: 5432 + options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 + + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + + - name: Prepare Mock server for BE ApiRequestEnrichmentIntegrationTest (launch in background) + run: python integration-tests/beam-api-lookup-test.py & + - name: Prepare Postgres for BE SqlLookupEnrichmentIntegrationTest (create entities) + run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/beam-sql-enrichment-test.sql + env: + PGPASSWORD: supersecret1 + - name: Prepare Mock server for SCE ApiRequestEnrichmentIntegrationTest (launch in background) + run: python integration-tests/sce-api-lookup-test.py 8001 & + - name: Prepare Postgres for SCE SqlLookupEnrichmentIntegrationTest (create entities) + run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/sce-sql-enrichment-test.sql + env: + PGPASSWORD: supersecret1 + + - name: Run tests + run: sbt coverage +test coverageReport + env: + OER_KEY: ${{ secrets.OER_KEY }} + - name: Aggregate coverage data + if: ${{ always() }} + run: sbt coverageAggregate + - name: Submit coveralls data + if: ${{ always() }} + run: sbt coveralls + env: + COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} diff --git a/.jvmopts b/.jvmopts index ef0f90285..00c1707db 100644 --- a/.jvmopts +++ b/.jvmopts @@ -8,6 +8,5 @@ -XX:+TieredCompilation -XX:-UseGCOverheadLimit # effectively adds GC to Perm space --XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled # must be enabled for CMSClassUnloadingEnabled to work diff --git a/CHANGELOG b/CHANGELOG index 7bd550f43..d9388dbae 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,43 @@ +Version 1.4.0 (2020-10-21) +-------------------------- +Stream FS2: add (#346) +Stream: bump log4j-core to 2.13.3 (#368) +Stream: bump base-debian to 0.2.1 (#359) +Stream: remove unused Maxmind database (#352) +Beam: bump Scio to 0.9.3 (#308) +Beam: use test Maxmind databases (#269) +Common: add benchmarking module (#370) +Common: bump scala-forex to 1.0.0 (#349) +Common: bump scala-referer-parser to 1.1.0 (#348) +Common: bump scala-weather to 1.0.0 (#347) +Common: bump iglu-scala-client to 1.0.2 (#52) +Common: bump scala-maxmind-iplookups to 0.7.1 (#323) +Common: bump snowplow-badrows to 2.1.0 (#325) +Common: bump YAUAA to 5.19 (#314) +Common: bump postgresql to 42.2.16 (#369) +Common: bump jackson-databind to 2.10.5 (#367) +Common: bump to JDK 11 (#362) +Common: switch to HostName.asInetAddress to validate IP addresses (#355) +Common: fix NullPointerException on serializing invalid state (#371) +Common: fix API Request Enrichment output deserialization (#374) +Common: fix PiiPseudonymizerEnrichment for arrays and improve unit tests coverage (#334) +Common: fix PII enrichment adding empty objects instead of missing properties (#351) +Common: fix PathNotFoundException in PII enrichment (#339) +Common: fix pattern matching against null in ScrambleMapFunction (#338) +Common: fix flaky ThriftLoader test (#306) +Common: handle empty query string parameters in adapters (#341) +Common: make assets publishing independent of each other (#373) +Common: disable formatting on compile (#358) +Common: add sbt publishLocal operation to test action (#357) +Common: add toThrift and toRaw methods to CollectorPayload (#345) +Common: replace deprecated constructors in EnrichedEventSpec (#354) +Common: improve unit tests coverage (#335) +Common: use test Maxmind databases (#350) +Common: get rid of placeholder schema in enrichment configurations (#302) +Common: move EnrichmentConf into its own module (#303) +Common: get rid of Eval instances (#300) +Common: add tests for Input for SQL enrichment (#316) + Version 1.3.2 (2020-09-06) -------------------------- Common: convert null fields in EnrichedEvent to None in PartiallyEnrichedEvent (#331) diff --git a/build.sbt b/build.sbt index 457bdaeb5..0dd1566b0 100644 --- a/build.sbt +++ b/build.sbt @@ -19,7 +19,7 @@ lazy val root = project.in(file(".")) .settings(name := "enrich") .settings(BuildSettings.basicSettings) - .aggregate(common, beam, stream, kinesis, kafka, nsq, stdin, integrationTests) + .aggregate(common, beam, stream, kinesis, kafka, nsq, stdin, fs2) lazy val common = project .in(file("modules/common")) @@ -54,8 +54,8 @@ lazy val common = project Dependencies.Libraries.scalaForex, Dependencies.Libraries.scalaWeather, Dependencies.Libraries.gatlingJsonpath, - Dependencies.Libraries.scalaLruMap, Dependencies.Libraries.badRows, + Dependencies.Libraries.igluClient, Dependencies.Libraries.snowplowRawEvent, Dependencies.Libraries.collectorPayload, Dependencies.Libraries.schemaSniffer, @@ -130,7 +130,11 @@ lazy val nsq = project .settings( packageName in Docker := "snowplow/stream-enrich-nsq", ) - .settings(libraryDependencies ++= Seq(Dependencies.Libraries.nsqClient)) + .settings(libraryDependencies ++= Seq( + Dependencies.Libraries.log4j, + Dependencies.Libraries.log4jApi, + Dependencies.Libraries.nsqClient + )) .enablePlugins(JavaAppPackaging, DockerPlugin) .dependsOn(stream) @@ -178,13 +182,59 @@ lazy val beam = ) .enablePlugins(JavaAppPackaging, DockerPlugin, BuildInfoPlugin) -lazy val integrationTests = project - .in(file("modules/integration-tests")) - .settings(moduleName := "integration-tests") - .settings(allStreamSettings) +Global / onChangedBuildSource := ReloadOnSourceChanges + +lazy val fs2 = project + .in(file("modules/fs2")) + .dependsOn(common) + .settings(BuildSettings.basicSettings) + .settings(BuildSettings.formatting) + .settings(BuildSettings.scoverageSettings) .settings(BuildSettings.addExampleConfToTestCp) - .settings(libraryDependencies ++= Seq( - Dependencies.Libraries.kafka, - Dependencies.Libraries.jinJava - )) - .dependsOn(stream % "test->test", kafka % "test->compile") + .settings(BuildSettings.sbtAssemblySettings) + .settings( + name := "fs2-enrich", + description := "High-performance streaming Snowplow Enrich job built on top of functional streams", + buildInfoKeys := Seq[BuildInfoKey](organization, name, version, description), + buildInfoPackage := "com.snowplowanalytics.snowplow.enrich.fs2.generated", + packageName in Docker := "snowplow/fs2-enrich", + ) + .settings(parallelExecution in Test := false) + .settings( + libraryDependencies ++= Seq( + Dependencies.Libraries.decline, + Dependencies.Libraries.fs2PubSub, + Dependencies.Libraries.circeExtras, + Dependencies.Libraries.circeLiteral, + Dependencies.Libraries.circeConfig, + Dependencies.Libraries.catsEffect, + Dependencies.Libraries.fs2, + Dependencies.Libraries.fs2Io, + Dependencies.Libraries.slf4j, + Dependencies.Libraries.sentry, + Dependencies.Libraries.log4cats, + Dependencies.Libraries.catsRetry, + Dependencies.Libraries.http4sClient, + Dependencies.Libraries.fs2BlobS3, + Dependencies.Libraries.fs2BlobGcs, + Dependencies.Libraries.metrics, + Dependencies.Libraries.pureconfig.withRevision(Dependencies.V.pureconfig013), + Dependencies.Libraries.pureconfigCats.withRevision(Dependencies.V.pureconfig013), + Dependencies.Libraries.pureconfigCirce.withRevision(Dependencies.V.pureconfig013), + Dependencies.Libraries.specs2, + Dependencies.Libraries.specs2CE, + Dependencies.Libraries.scalacheck, + Dependencies.Libraries.specs2Scalacheck, + Dependencies.Libraries.http4sDsl, + Dependencies.Libraries.http4sServer + ), + addCompilerPlugin("com.olegpy" %% "better-monadic-for" % "0.3.1") + ) + .enablePlugins(BuildInfoPlugin) + .settings(BuildSettings.dockerSettings) + .enablePlugins(BuildInfoPlugin, JavaAppPackaging, DockerPlugin) + +lazy val bench = project + .in(file("modules/bench")) + .dependsOn(fs2 % "test->test") + .enablePlugins(JmhPlugin) diff --git a/config/config.fs2.hocon.sample b/config/config.fs2.hocon.sample new file mode 100644 index 000000000..c2b2f0e26 --- /dev/null +++ b/config/config.fs2.hocon.sample @@ -0,0 +1,47 @@ +// "Gcp" is the only valid option now +auth = { + type = "Gcp" +} + +// Collector input +input = { + type = "PubSub" + subscription = "projects/test-project/subscriptions/inputSub" + + // Local FS supported for testing purposes + // type = "FileSystem" + // dir = "/var/collector" +} + +// Enriched events output +good = { + type = "PubSub" + topic = "projects/test-project/topics/good-topic" + + // Local FS supported for testing purposes + // type = "FileSystem" + // dir = "/var/enriched" +} + +// Bad rows output +bad = { + type = "PubSub" + topic = "projects/test-project/topics/bad-topic" + + // Local FS supported for testing purposes + // type = "FileSystem" + // dir = "/var/bad" +} + +// Optional, for tracking runtime exceptions +sentry = { + dsn = "http://sentry.acme.com" +} + +// Optional, period after which enrich assets should be checked for updates +// no assets will be updated if the key is absent +assetsUpdatePeriod = "7 days" + +// Optional, period after Dropwizard will print out its metrics +// no metrics will be printed if the key is absent +metricsReportPeriod = "1 second" \ No newline at end of file diff --git a/modules/beam/src/main/scala/com.snowplowanalytics.snowplow.enrich.beam/utils.scala b/modules/beam/src/main/scala/com.snowplowanalytics.snowplow.enrich.beam/utils.scala index 6b07de9e9..1c4ff1d02 100644 --- a/modules/beam/src/main/scala/com.snowplowanalytics.snowplow.enrich.beam/utils.scala +++ b/modules/beam/src/main/scala/com.snowplowanalytics.snowplow.enrich.beam/utils.scala @@ -25,14 +25,18 @@ import scala.util.Try import cats.Id import cats.effect.Clock + import io.circe.Json import io.circe.syntax._ -import com.snowplowanalytics.snowplow.badrows._ -import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + import org.joda.time.{DateTime, DateTimeZone} import org.joda.time.format.DateTimeFormat + +import com.snowplowanalytics.snowplow.badrows._ + +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.PiiPseudonymizerConf +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.PiiPseudonymizerConf object utils { @@ -88,10 +92,8 @@ object utils { /** Determine if we have to emit pii transformation events. */ def emitPii(confs: List[EnrichmentConf]): Boolean = confs - .collect { case c: PiiPseudonymizerConf => c } - .headOption - .map(_.emitIdentificationEvent) - .getOrElse(false) + .collectFirst { case c: PiiPseudonymizerConf => c } + .exists(_.emitIdentificationEvent) // We want to take one-tenth of the payload characters (not taking into account multi-bytes char) private val ReductionFactor = 10 diff --git a/modules/beam/src/test/resources/beam-enrich-geolite2-city.mmdb b/modules/beam/src/test/resources/beam-enrich-geolite2-city.mmdb new file mode 100644 index 000000000..9dac0f6a2 Binary files /dev/null and b/modules/beam/src/test/resources/beam-enrich-geolite2-city.mmdb differ diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala index 4f72161ce..618369dad 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala @@ -14,15 +14,22 @@ */ package com.snowplowanalytics.snowplow.enrich.beam -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry._ import io.circe.literal._ -import org.scalatest._ -import matchers.should.Matchers._ -import singleton._ +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry._ +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.AnonIpConf + +import org.scalatest.matchers.should.Matchers._ import org.scalatest.freespec.AnyFreeSpec +import com.snowplowanalytics.snowplow.enrich.beam.singleton._ + class SingletonSpec extends AnyFreeSpec { + + val placeholder = SchemaKey("com.acme", "placeholder", "jsonschema", SchemaVer.Full(1, 0, 0)) + "the singleton object should" - { "make a ClientSingleton.get function available" - { "which throws if the resolver can't be parsed" in { @@ -42,7 +49,7 @@ class SingletonSpec extends AnyFreeSpec { "which builds and stores the registry" in { val reg = EnrichmentRegistrySingleton.get( - List(AnonIpConf(AnonIPv4Octets.Two, AnonIPv6Segments.Two)) + List(AnonIpConf(placeholder, AnonIPv4Octets.Two, AnonIPv6Segments.Two)) ) reg.anonIp shouldBe defined } diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala index 5e8f249cc..d7914cebc 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala @@ -114,7 +114,7 @@ object SpecHelpers { contentType: Option[String] = None, headers: List[String] = Nil, ipAddress: String = "", - networkUserId: String = java.util.UUID.randomUUID().toString(), + networkUserId: String = java.util.UUID.randomUUID().toString, path: String = "", querystring: Option[String] = None, refererUri: Option[String] = None, @@ -171,7 +171,7 @@ object SpecHelpers { def copyResource(resource: String, localFile: String): Unit = { Files.copy( - Paths.get(getClass.getResource(resource).toURI()), + Paths.get(getClass.getResource(resource).toURI), Paths.get(localFile) ) () @@ -180,7 +180,7 @@ object SpecHelpers { object CI extends Tag( - if (sys.env.get("CI").map(_ == "true").getOrElse(false)) "" else classOf[Ignore].getName + if (sys.env.get("CI").contains("true")) "" else classOf[Ignore].getName ) -object OER extends Tag(if (sys.env.get("OER_KEY").isDefined) "" else classOf[Ignore].getName) +object OER extends Tag(if (sys.env.contains("OER_KEY")) "" else classOf[Ignore].getName) diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/UtilsSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/UtilsSpec.scala index ec81ba929..76ce5ce88 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/UtilsSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/UtilsSpec.scala @@ -19,10 +19,13 @@ import java.time.Instant import com.snowplowanalytics.snowplow.badrows._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + import cats.implicits._ + import io.circe.parser + import com.snowplowanalytics.iglu.core.SelfDescribingData -import com.snowplowanalytics.iglu.core.circe.instances._ +import com.snowplowanalytics.iglu.core.circe.implicits._ import utils._ import org.scalatest.freespec.AnyFreeSpec @@ -111,7 +114,7 @@ class UtilsSpec extends AnyFreeSpec with Matchers { badRowSizeViolation.failure.maximumAllowedSizeBytes shouldEqual 150 badRowSizeViolation.failure.actualSizeBytes shouldEqual 267 badRowSizeViolation.failure.expectation shouldEqual "bad row exceeded the maximum size" - badRowSizeViolation.payload.line shouldEqual "{\"schema\":\"iglu" + badRowSizeViolation.payload.event shouldEqual "{\"schema\":\"iglu" badRowSizeViolation.processor shouldEqual processor } } @@ -124,7 +127,7 @@ class UtilsSpec extends AnyFreeSpec with Matchers { badRowSizeViolation.failure.maximumAllowedSizeBytes shouldEqual 400 badRowSizeViolation.failure.actualSizeBytes shouldEqual 100 badRowSizeViolation.failure.expectation shouldEqual "event passed enrichment but exceeded the maximum allowed size as a result" - badRowSizeViolation.payload.line shouldEqual ("a" * 40) + badRowSizeViolation.payload.event shouldEqual ("a" * 40) badRowSizeViolation.processor shouldEqual processor } } diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/ApiRequestEnrichmentSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/ApiRequestEnrichmentSpec.scala index 9b1efb54f..39cd3eb4e 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/ApiRequestEnrichmentSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/ApiRequestEnrichmentSpec.scala @@ -12,15 +12,18 @@ * See the Apache License Version 2.0 for the specific language governing permissions and * limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.beam -package enrichments +package com.snowplowanalytics.snowplow.enrich.beam.enrichments import java.nio.file.Paths import cats.syntax.option._ + +import io.circe.literal._ + +import com.snowplowanalytics.snowplow.enrich.beam.{CI, Enrich, SpecHelpers} + import com.spotify.scio.io.PubsubIO import com.spotify.scio.testing._ -import io.circe.literal._ object ApiRequestEnrichmentSpec { val contexts = @@ -51,8 +54,8 @@ class ApiRequestEnrichmentSpec extends PipelineSpec { "--raw=in", "--enriched=out", "--bad=bad", - "--resolver=" + Paths.get(getClass.getResource("/iglu_resolver.json").toURI()), - "--enrichments=" + Paths.get(getClass.getResource("/api_request").toURI()) + "--resolver=" + Paths.get(getClass.getResource("/iglu_resolver.json").toURI), + "--enrichments=" + Paths.get(getClass.getResource("/api_request").toURI) ) .input(PubsubIO.readCoder[Array[Byte]]("in"), raw) .distCache(DistCacheIO(""), List.empty[Either[String, String]]) diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/IpLookupsEnrichmentSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/IpLookupsEnrichmentSpec.scala index f0144a69a..61dd11113 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/IpLookupsEnrichmentSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/IpLookupsEnrichmentSpec.scala @@ -57,7 +57,7 @@ class IpLookupsEnrichmentSpec extends PipelineSpec { val url = "http://snowplow-hosted-assets.s3.amazonaws.com/third-party/maxmind/GeoLite2-City.mmdb" val localFile = "./ip_geo" - SpecHelpers.downloadLocalEnrichmentFile(url, localFile) + SpecHelpers.copyLocalEnrichmentFile("/beam-enrich-geolite2-city.mmdb", localFile) JobTest[Enrich.type] .args( diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/SqlQueryEnrichmentSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/SqlQueryEnrichmentSpec.scala index 6481ea00a..41d8d8adc 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/SqlQueryEnrichmentSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/SqlQueryEnrichmentSpec.scala @@ -12,15 +12,18 @@ * See the Apache License Version 2.0 for the specific language governing permissions and * limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.beam -package enrichments +package com.snowplowanalytics.snowplow.enrich.beam.enrichments import java.nio.file.Paths +import io.circe.literal._ + import cats.syntax.option._ + import com.spotify.scio.io.PubsubIO import com.spotify.scio.testing._ -import io.circe.literal._ + +import com.snowplowanalytics.snowplow.enrich.beam.{CI, Enrich, SpecHelpers} object SqlQueryEnrichmentSpec { val contexts = @@ -48,8 +51,8 @@ class SqlQueryEnrichmentSpec extends PipelineSpec { "--raw=in", "--enriched=out", "--bad=bad", - "--resolver=" + Paths.get(getClass.getResource("/iglu_resolver.json").toURI()), - "--enrichments=" + Paths.get(getClass.getResource("/sql_query").toURI()) + "--resolver=" + Paths.get(getClass.getResource("/iglu_resolver.json").toURI), + "--enrichments=" + Paths.get(getClass.getResource("/sql_query").toURI) ) .input(PubsubIO.readCoder[Array[Byte]]("in"), raw) .distCache(DistCacheIO(""), List.empty[Either[String, String]]) diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala index 1f938b4e5..ac0288786 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala @@ -36,7 +36,7 @@ object YauaaEnrichmentSpec { "event_format" -> "jsonschema", "event_version" -> "1-0-0", "event" -> "page_ping", - "derived_contexts" -> json"""{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:nl.basjes/yauaa_context/jsonschema/1-0-0","data":{"deviceBrand":"Unknown","deviceName":"Desktop","layoutEngineNameVersion":"Gecko 12.0","operatingSystemNameVersion":"Windows 7","layoutEngineBuild":"20100101","layoutEngineNameVersionMajor":"Gecko 12","operatingSystemName":"Windows NT","agentVersionMajor":"12","layoutEngineVersionMajor":"12","deviceClass":"Desktop","agentNameVersionMajor":"Firefox 12","deviceCpuBits":"64","operatingSystemClass":"Desktop","layoutEngineName":"Gecko","agentName":"Firefox","agentVersion":"12.0","layoutEngineClass":"Browser","agentNameVersion":"Firefox 12.0","operatingSystemVersion":"7","deviceCpu":"Intel x86_64","agentClass":"Browser","layoutEngineVersion":"12.0"}}]}""".noSpaces + "derived_contexts" -> json"""{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:nl.basjes/yauaa_context/jsonschema/1-0-1","data":{"deviceBrand":"Unknown","deviceName":"Desktop","operatingSystemVersionMajor":"7","layoutEngineNameVersion":"Gecko 12.0","operatingSystemNameVersion":"Windows 7","layoutEngineBuild":"20100101","layoutEngineNameVersionMajor":"Gecko 12","operatingSystemName":"Windows NT","agentVersionMajor":"12","layoutEngineVersionMajor":"12","deviceClass":"Desktop","agentNameVersionMajor":"Firefox 12","operatingSystemNameVersionMajor":"Windows 7","deviceCpuBits":"64","operatingSystemClass":"Desktop","layoutEngineName":"Gecko","agentName":"Firefox","agentVersion":"12.0","layoutEngineClass":"Browser","agentNameVersion":"Firefox 12.0","operatingSystemVersion":"7","deviceCpu":"Intel x86_64","agentClass":"Browser","layoutEngineVersion":"12.0"}}]}""".noSpaces ) } diff --git a/modules/bench/build.sbt b/modules/bench/build.sbt new file mode 100644 index 000000000..6e115c649 --- /dev/null +++ b/modules/bench/build.sbt @@ -0,0 +1,6 @@ +sourceDirectory in Jmh := (sourceDirectory in Test).value +classDirectory in Jmh := (classDirectory in Test).value +dependencyClasspath in Jmh := (dependencyClasspath in Test).value +// rewire tasks, so that 'jmh:run' automatically invokes 'jmh:compile' (otherwise a clean 'jmh:run' would fail) +compile in Jmh := (compile in Jmh).dependsOn(compile in Test).value +run in Jmh := (run in Jmh).dependsOn(Keys.compile in Jmh).evaluated \ No newline at end of file diff --git a/modules/bench/src/test/resources/simplelogger.properties b/modules/bench/src/test/resources/simplelogger.properties new file mode 100644 index 000000000..7c0551b2b --- /dev/null +++ b/modules/bench/src/test/resources/simplelogger.properties @@ -0,0 +1,2 @@ +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.Assets=off +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.test.TestEnvironment=off diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala new file mode 100644 index 000000000..ca12ab37d --- /dev/null +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.bench + +import org.openjdk.jmh.annotations._ + +import java.util.concurrent.TimeUnit + +import cats.effect.{ContextShift, IO, Clock, Blocker} + +import fs2.Stream + +import com.snowplowanalytics.iglu.client.Client + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry +import com.snowplowanalytics.snowplow.enrich.common.loaders.ThriftLoader +import com.snowplowanalytics.snowplow.enrich.fs2.test.TestEnvironment +import com.snowplowanalytics.snowplow.enrich.fs2.{Enrich, Environment, EnrichSpec, Payload} + +import org.apache.http.message.BasicNameValuePair + + +/** + * @example + * {{{ + * jmh:run -i 15 -wi 10 -f1 -t1 EnrichBench + * }}} + */ +@State(Scope.Thread) +@BenchmarkMode(Array(Mode.AverageTime)) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +class EnrichBench { + + implicit val ioClock: Clock[IO] = Clock.create[IO] + + @Benchmark + def measureEnrichWithMinimalPayload(state: EnrichBench.BenchState) = { + implicit val CS: ContextShift[IO] = state.contextShift + Enrich.enrichWith[IO](IO.pure(EnrichmentRegistry()), state.blocker, Client.IgluCentral, None, (_: Option[Long]) => IO.unit)(state.raw).unsafeRunSync() + } + + @Benchmark + def measureToCollectorPayload(state: EnrichBench.BenchState) = { + ThriftLoader.toCollectorPayload(state.raw.data, Enrich.processor) + } + + @Benchmark + @OperationsPerInvocation(50) // 5 events repetated 10 times + def measureRunWithNoEnrichments(state: EnrichBench.BenchState) = { + // We used this benchmark to check if running the whole `enrichWith` on a blocking + // thread-pool will give us increase in performance. Results haven't confirm it: + // EnrichBench.measureRunWithNoEnrichments avgt 15 341.144 ± 18.884 us/op <- smaller blocker + // EnrichBench.measureRunWithNoEnrichments avgt 15 326.608 ± 16.714 us/op <- wrapping blocker + // EnrichBench.measureRunWithNoEnrichments avgt 15 292.907 ± 15.894 us/op <- no blocker at all + // However, I'm still leaving the "smaller blocker" in a hope that with actual IO enrichments + // it will give the expected increase in performance + implicit val CS: ContextShift[IO] = state.contextShift + state.useEnvironment(e => Enrich.run[IO](e).compile.drain).unsafeRunSync() + } +} + +object EnrichBench { + @State(Scope.Benchmark) + class BenchState { + var raw: Payload[IO, Array[Byte]] = _ + var useEnvironment: (Environment[IO] => IO[Unit]) => IO[Unit] = _ + var contextShift: ContextShift[IO] = _ + var blocker: Blocker = _ + + @Setup(Level.Trial) + def setup(): Unit = { + + raw = EnrichSpec.payload[IO] + + val input = Stream.emits(List( + EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ip", "125.12.2.40") :: EnrichSpec.querystring + ), + EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ip", "125.12.2.41") :: EnrichSpec.querystring + ), + EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ip", "125.12.2.42") :: EnrichSpec.querystring + ), + EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ip", "125.12.2.43") :: EnrichSpec.querystring + ), + EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ip", "125.12.2.44") :: EnrichSpec.querystring + ), + )).repeatN(10).map(cp => Payload(cp.toRaw, IO.unit)).covary[IO] + + useEnvironment = TestEnvironment.make(input).map(_.env).use(_: Environment[IO] => IO[Unit]) + + contextShift = IO.contextShift(scala.concurrent.ExecutionContext.global) + + blocker = Blocker[IO].use(IO.pure).unsafeRunSync() + } + } +} \ No newline at end of file diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala new file mode 100644 index 000000000..5b65e66de --- /dev/null +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.bench + +import org.openjdk.jmh.annotations._ + +import java.util.concurrent.TimeUnit + +import cats.Id +import cats.data.Validated + +import cats.effect.{IO, Clock} + +import io.circe.Json + +import com.snowplowanalytics.iglu.client.{Resolver, Client, CirceValidator} + +import com.snowplowanalytics.snowplow.enrich.common.EtlPipeline +import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry +import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry + +import com.snowplowanalytics.snowplow.enrich.fs2.{Enrich, EnrichSpec} + +import org.joda.time.DateTime + +@State(Scope.Thread) +@BenchmarkMode(Array(Mode.AverageTime, Mode.Throughput)) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +class EtlPipelineBench { + + private implicit val ioClock: Clock[IO] = Clock.create[IO] + + private implicit val idClock: Clock[Id] = new Clock[Id] { + final def realTime(unit: TimeUnit): Id[Long] = + unit.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS) + final def monotonic(unit: TimeUnit): Id[Long] = + unit.convert(System.nanoTime(), TimeUnit.NANOSECONDS) + } + + @Benchmark + def measureProcessEventsIO(state: EtlPipelineBench.BenchState) = { + val payload = EnrichSpec.colllectorPayload + EtlPipeline.processEvents[IO](state.adapterRegistry, state.enrichmentRegistryIo, Client.IgluCentral, Enrich.processor, state.dateTime, Validated.Valid(Some(payload))).unsafeRunSync() + } + + @Benchmark + def measureProcessEventsId(state: EtlPipelineBench.BenchState) = { + val payload = EnrichSpec.colllectorPayload + EtlPipeline.processEvents[Id](state.adapterRegistry, state.enrichmentRegistryId, state.clientId, Enrich.processor, state.dateTime, Validated.Valid(Some(payload))) + } +} + +object EtlPipelineBench { + + + @State(Scope.Benchmark) + class BenchState { + var dateTime: DateTime = _ + var adapterRegistry: AdapterRegistry = _ + var enrichmentRegistryId: EnrichmentRegistry[Id] = _ + var enrichmentRegistryIo: EnrichmentRegistry[IO] = _ + var clientId: Client[Id, Json] = _ + var clientIO: Client[IO, Json] = _ + + @Setup(Level.Trial) + def setup(): Unit = { + dateTime = DateTime.parse("2010-06-30T01:20+02:00") + adapterRegistry = new AdapterRegistry() + enrichmentRegistryId = EnrichmentRegistry[Id]() + enrichmentRegistryIo = EnrichmentRegistry[IO]() + clientId = Client[Id, Json](Resolver(List(), None), CirceValidator) + clientIO = Client[IO, Json](Resolver(List(), None), CirceValidator) + } + } +} diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala new file mode 100644 index 000000000..fb6d84979 --- /dev/null +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.bench + +import org.openjdk.jmh.annotations._ +import java.util.concurrent.TimeUnit + +import com.snowplowanalytics.snowplow.enrich.common.loaders.ThriftLoader +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.fs2.{Enrich, EnrichSpec} + +@State(Scope.Thread) +@BenchmarkMode(Array(Mode.AverageTime)) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +class ThriftLoaderBench { + + @Benchmark + def measureToCollectorPayload(state: ThriftLoaderBench.BenchState) = + ThriftLoader.toCollectorPayload(state.data, Enrich.processor) + + @Benchmark + def measureNormalize(state: ThriftLoaderBench.BenchState) = { + Enrich.encodeEvent(state.event) + } +} + +object ThriftLoaderBench { + @State(Scope.Benchmark) + class BenchState { + var data: Array[Byte] = _ + var event: EnrichedEvent = _ + + @Setup(Level.Trial) + def setup(): Unit = { + data = EnrichSpec.colllectorPayload.toRaw + + event = new EnrichedEvent() + event.setApp_id("foo") + event.setEvent_id("deadbeef-dead-dead-dead-deaddeafbeef") + event.setUser_ipaddress("128.0.1.2") + event.setUnstruct_event("""{"some": "json"}""") + } + } +} + diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala index 18243ba69..b8b06dfe5 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala @@ -10,25 +10,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters +package com.snowplowanalytics.snowplow.enrich.common.adapters import java.time.Instant import cats.Monad import cats.data.{NonEmptyList, Validated} + import cats.effect.Clock -import cats.syntax.functor._ -import cats.syntax.validated._ +import cats.implicits._ + +import io.circe.Json + import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup import com.snowplowanalytics.iglu.client.Client + import com.snowplowanalytics.snowplow.badrows._ -import io.circe.Json -import loaders.CollectorPayload -import registry._ -import registry.snowplow._ -import utils.HttpClient +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry._ +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.snowplow.{RedirectAdapter, Tp1Adapter, Tp2Adapter} +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.HttpClient /** * The AdapterRegistry lets us convert a CollectorPayload into one or more RawEvents, using a given @@ -96,13 +98,13 @@ class AdapterRegistry(remoteAdapters: Map[(String, String), RemoteAdapter] = Map ): F[Validated[BadRow, NonEmptyList[RawEvent]]] = (adapters.get((payload.api.vendor, payload.api.version)) match { case Some(adapter) => adapter.toRawEvents(payload, client) - case _ => + case None => val f = FailureDetails.AdapterFailure.InputData( "vendor/version", Some(s"${payload.api.vendor}/${payload.api.version}"), "vendor/version combination is not supported" ) - Monad[F].pure(f.invalidNel) + Monad[F].pure(f.invalidNel[NonEmptyList[RawEvent]]) }).map(_.leftMap(enrichFailure(_, payload, payload.api.vendor, payload.api.version, processor))) private def enrichFailure( diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/RawEvent.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/RawEvent.scala index e16bd5517..8a8c7e1b0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/RawEvent.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/RawEvent.scala @@ -38,7 +38,7 @@ object RawEvent { RE( re.api.vendor, re.api.version, - re.parameters.toList.map { case (k, v) => NVP(k, Option(v)) }, + re.parameters.toList.map { case (k, v) => NVP(k, v) }, re.contentType, re.source.name, re.source.encoding, diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala index dd4639258..cd9a85738 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala @@ -10,17 +10,12 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry import cats.Monad -import cats.data.{NonEmptyList, ValidatedNel} +import cats.data.{NonEmptyList, Validated, ValidatedNel} import cats.data.Validated._ -import cats.syntax.either._ -import cats.syntax.eq._ -import cats.syntax.option._ -import cats.syntax.validated._ +import cats.implicits._ import cats.effect.Clock @@ -29,23 +24,23 @@ import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.CirceIgluCodecs._ -import com.snowplowanalytics.snowplow.badrows.FailureDetails - import io.circe._ import io.circe.syntax._ import org.apache.http.NameValuePair import org.joda.time.{DateTime, DateTimeZone} -import org.joda.time.format.DateTimeFormat - -import loaders.CollectorPayload -import utils.{HttpClient, JsonUtils => JU} +import org.joda.time.format.{DateTimeFormat, DateTimeFormatter} +import com.snowplowanalytics.snowplow.badrows.FailureDetails +import com.snowplowanalytics.snowplow.enrich.common.RawEventParameters +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, JsonUtils => JU} trait Adapter { // Signature for a Formatter function - type FormatterFunc = (RawEventParameters) => Json + type FormatterFunc = RawEventParameters => Json // The encoding type to be used val EventEncType = "UTF-8" @@ -53,7 +48,7 @@ trait Adapter { private val AcceptedQueryParameters = Set("nuid", "aid", "cv", "eid", "ttm", "url") // Datetime format we need to convert timestamps to - val JsonSchemaDateTimeFormat = + val JsonSchemaDateTimeFormat: DateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'").withZone(DateTimeZone.UTC) private def toStringField(seconds: Long): String = { @@ -112,7 +107,7 @@ trait Adapter { /** * Converts a CollectorPayload instance into raw events. - * @param payload The CollectorPaylod containing one or more raw events as collected by a + * @param payload The `CollectorPayload` containing one or more raw events as collected by a * Snowplow collector * @param client The Iglu client used for schema lookup and validation * @return a Validation boxing either a NEL of RawEvents on Success, or a NEL of Failure Strings @@ -126,8 +121,8 @@ trait Adapter { * @param parameters A NonEmptyList of name:value pairs * @return the name:value pairs in Map form */ - protected[registry] def toMap(parameters: List[NameValuePair]): Map[String, String] = - parameters.map(p => p.getName -> p.getValue).toMap + protected[registry] def toMap(parameters: List[NameValuePair]): Map[String, Option[String]] = + parameters.map(p => p.getName -> Option(p.getValue)).toMap /** * Convenience function to build a simple formatter of RawEventParameters. @@ -168,14 +163,22 @@ trait Adapter { ): RawEventParameters = { val params = formatter(parameters - ("nuid", "aid", "cv", "p")) val json = toUnstructEvent(SelfDescribingData(schema, params)).noSpaces + buildUnstructEventParams(tracker, platform, parameters, json) + } + + def buildUnstructEventParams( + tracker: String, + platform: String, + parameters: RawEventParameters, + json: String + ): Map[String, Option[String]] = Map( - "tv" -> tracker, - "e" -> "ue", - "p" -> parameters.getOrElse("p", platform), // Required field - "ue_pr" -> json + "tv" -> Option(tracker), + "e" -> Some("ue"), + "p" -> parameters.getOrElse("p", Option(platform)), // Required field + "ue_pr" -> Option(json) ) ++ parameters.filterKeys(AcceptedQueryParameters) - } /** * Creates a Snowplow unstructured event by nesting the provided JValue in a self-describing @@ -223,13 +226,7 @@ trait Adapter { platform: String ): RawEventParameters = { val json = toUnstructEvent(SelfDescribingData(schema, eventJson.asJson)).noSpaces - Map( - "tv" -> tracker, - "e" -> "ue", - "p" -> qsParams.getOrElse("p", platform), // Required field - "ue_pr" -> json - ) ++ - qsParams.filterKeys(AcceptedQueryParameters) + buildUnstructEventParams(tracker, platform, qsParams, json) } /** @@ -251,14 +248,7 @@ trait Adapter { platform: String ): RawEventParameters = { val json = toUnstructEvent(SelfDescribingData(schema, eventJson)).noSpaces - - Map( - "tv" -> tracker, - "e" -> "ue", - "p" -> qsParams.getOrElse("p", platform), // Required field - "ue_pr" -> json - ) ++ - qsParams.filterKeys(AcceptedQueryParameters) + buildUnstructEventParams(tracker, platform, qsParams, json) } /** @@ -272,17 +262,11 @@ trait Adapter { * or Failures */ protected[registry] def rawEventsListProcessor( - rawEventsList: List[ValidatedNel[FailureDetails.AdapterFailure, RawEvent]] + rawEventsList: List[Validated[NonEmptyList[FailureDetails.AdapterFailure], RawEvent]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = { - val successes: List[RawEvent] = - for { - Valid(s) <- rawEventsList - } yield s - - val failures: List[FailureDetails.AdapterFailure] = - (for { - Invalid(NonEmptyList(h, t)) <- rawEventsList - } yield h :: t).flatten + val (failures, successes) = rawEventsList.separate match { + case (nel, list) => (nel.flatMap(_.toList), list) + } (successes, failures) match { // No Failures collected. @@ -411,7 +395,7 @@ trait Adapter { object Adapter { /** The Iglu schema URI for a Snowplow unstructured event */ - val UnstructEvent = SchemaKey( + val UnstructEvent: SchemaKey = SchemaKey( "com.snowplowanalytics.snowplow", "unstruct_event", "jsonschema", @@ -419,7 +403,7 @@ object Adapter { ) /** The Iglu schema URI for a Snowplow custom contexts */ - val Contexts = SchemaKey( + val Contexts: SchemaKey = SchemaKey( "com.snowplowanalytics.snowplow", "contexts", "jsonschema", diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/CloudfrontAccessLogAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/CloudfrontAccessLogAdapter.scala index 6b99367dd..a5fe05b77 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/CloudfrontAccessLogAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/CloudfrontAccessLogAdapter.scala @@ -135,9 +135,9 @@ object CloudfrontAccessLogAdapter extends Adapter { case "" => None case nonempty => nonempty.some } - val qsParams: Map[String, String] = schemaCompatibleFields(8) match { + val qsParams: Map[String, Option[String]] = schemaCompatibleFields(8) match { case "" => Map() - case url => Map("url" -> url) + case url => Map("url" -> Option(url)) } val userAgent = schemaCompatibleFields(9) match { case "" => None diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/GoogleAnalyticsAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/GoogleAnalyticsAdapter.scala index 9c87b8e5c..b5578251b 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/GoogleAnalyticsAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/GoogleAnalyticsAdapter.scala @@ -503,10 +503,14 @@ object GoogleAnalyticsAdapter extends Adapter { NonEmptyList .one(FailureDetails.AdapterFailure.InputData("body", bodyPart.some, e)) ) - hitType <- params.get("t").toRight { - val msg = "no t parameter provided: cannot determine hit type" - NonEmptyList - .one(FailureDetails.AdapterFailure.InputData("body", bodyPart.some, msg)) + hitType <- params.get("t") match { + case Some(Some(t)) => Right(t) + case _ => + val msg = "no t parameter provided: cannot determine hit type" + Left( + NonEmptyList + .one(FailureDetails.AdapterFailure.InputData("body", bodyPart.some, msg)) + ) } // direct mappings mappings = translatePayload(params, directMappings(hitType)) @@ -542,9 +546,9 @@ object GoogleAnalyticsAdapter extends Adapter { case (s, d) if hitType != PageViewHitType || s != unstructEventData(PageViewHitType).schemaKey => SelfDescribingData(s, d.asJson) } - val contextParam: Map[String, String] = + val contextParam: Map[String, Option[String]] = if (contextJsons.isEmpty) Map.empty - else Map("co" -> toContexts(contextJsons).noSpaces) + else Map("co" -> Some(toContexts(contextJsons).noSpaces)) (trTable, schema, contextParam) }.toEither payload <- translatePayload(params, result._1) @@ -553,7 +557,7 @@ object GoogleAnalyticsAdapter extends Adapter { RawEvent( api = payload.api, parameters = result._3 ++ mappings ++ - Map("e" -> "ue", "ue_pr" -> unstructEvent, "tv" -> Protocol, "p" -> "srv"), + Map("e" -> Some("ue"), "ue_pr" -> Some(unstructEvent), "tv" -> Some(Protocol), "p" -> Some("srv")), contentType = payload.contentType, source = payload.source, context = payload.context @@ -569,10 +573,11 @@ object GoogleAnalyticsAdapter extends Adapter { * @return a translated params */ private def translatePayload( - originalParams: Map[String, String], + originalParams: Map[String, Option[String]], translationTable: Map[String, KVTranslation] ): Either[FailureDetails.AdapterFailure, Map[String, FieldType]] = { val m = originalParams + .collect { case (k, Some(v)) => (k, v) } .foldLeft(Map.empty[String, Either[FailureDetails.AdapterFailure, FieldType]]) { case (m, (fieldName, value)) => translationTable @@ -592,8 +597,11 @@ object GoogleAnalyticsAdapter extends Adapter { * @param translationTable mapping between original params and the wanted format * @return a translated params */ - private def translatePayload(originalParams: Map[String, String], translationTable: Map[String, String]): Map[String, String] = - originalParams.foldLeft(Map.empty[String, String]) { + private def translatePayload( + originalParams: Map[String, Option[String]], + translationTable: Map[String, String] + ): Map[String, Option[String]] = + originalParams.foldLeft(Map.empty[String, Option[String]]) { case (m, (fieldName, value)) => translationTable .get(fieldName) @@ -610,11 +618,12 @@ object GoogleAnalyticsAdapter extends Adapter { * @return a map containing the discovered contexts keyed by schema */ private def buildContexts( - originalParams: Map[String, String], + originalParams: Map[String, Option[String]], referenceTable: Map[SchemaKey, Map[String, KVTranslation]], fieldToSchemaMap: Map[String, SchemaKey] ): ValidatedNel[FailureDetails.AdapterFailure, Map[SchemaKey, Map[String, FieldType]]] = { val m = originalParams + .collect { case (k, Some(v)) => (k, v) } .foldLeft( Map.empty[SchemaKey, Map[String, ValidatedNel[FailureDetails.AdapterFailure, FieldType]]] ) { @@ -648,7 +657,7 @@ object GoogleAnalyticsAdapter extends Adapter { * @return a map containing the composite contexts keyed by schema */ private def buildCompositeContexts( - originalParams: Map[String, String], + originalParams: Map[String, Option[String]], referenceTable: List[MPData], schemasWithCU: List[SchemaKey], nrCompFieldsPerSchema: Map[SchemaKey, Int], @@ -657,6 +666,7 @@ object GoogleAnalyticsAdapter extends Adapter { for { // composite params have digits in their key composite <- originalParams + .collect { case (k, Some(v)) => (k, v) } .filterKeys(k => k.exists(_.isDigit)) .asRight brokenDown <- composite.toList.sorted.map { @@ -693,7 +703,7 @@ object GoogleAnalyticsAdapter extends Adapter { case (k, m) => val values = transpose(m.values.map(_.toList).toList) k -> (originalParams.get("cu") match { - case Some(currency) if schemasWithCU.contains(k) => + case Some(Some(currency)) if schemasWithCU.contains(k) => values .map(m.keys zip _) .map(l => ("currencyCode" -> StringType(currency) :: l.toList).toMap) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala index cb6de3554..5ba21985b 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala @@ -10,9 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry import cats.Monad import cats.data.{NonEmptyList, ValidatedNel} @@ -20,16 +18,21 @@ import cats.effect.Clock import cats.syntax.either._ import cats.syntax.option._ import cats.syntax.validated._ + +import com.snowplowanalytics.iglu.core.{SchemaKey, SelfDescribingData} +import com.snowplowanalytics.iglu.core.circe.implicits._ + import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup -import com.snowplowanalytics.iglu.core.{SchemaKey, SelfDescribingData} -import com.snowplowanalytics.iglu.core.circe.instances._ + import com.snowplowanalytics.snowplow.badrows._ + import io.circe._ import io.circe.syntax._ -import loaders.CollectorPayload -import utils.{ConversionUtils, HttpClient, JsonUtils} +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.{ConversionUtils, HttpClient, JsonUtils} /** * Transforms a collector payload which either: @@ -66,7 +69,7 @@ object IgluAdapter extends Adapter { ] = { val _ = client val params = toMap(payload.querystring) - (params.get("schema"), payload.body, payload.contentType) match { + (params.get("schema").flatten, payload.body, payload.contentType) match { case (_, Some(_), None) => val msg = s"expected one of $contentTypesStr" Monad[F].pure( @@ -74,9 +77,7 @@ object IgluAdapter extends Adapter { ) case (None, Some(body), Some(contentType)) => Monad[F].pure(payloadSdJsonToEvent(payload, body, contentType, params)) - case (Some(schemaUri), Some(_), Some(_)) => - Monad[F].pure(payloadToEventWithSchema(payload, schemaUri, params)) - case (Some(schemaUri), None, _) => + case (Some(schemaUri), _, _) => // Ignore body Monad[F].pure(payloadToEventWithSchema(payload, schemaUri, params)) case (None, None, _) => val nel = NonEmptyList.of( @@ -101,7 +102,7 @@ object IgluAdapter extends Adapter { payload: CollectorPayload, body: String, contentType: String, - params: Map[String, String] + params: Map[String, Option[String]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = contentType match { case contentTypes._1 => sdJsonBodyToEvent(payload, body, params) @@ -122,7 +123,7 @@ object IgluAdapter extends Adapter { private[registry] def sdJsonBodyToEvent( payload: CollectorPayload, body: String, - params: Map[String, String] + params: Map[String, Option[String]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = JsonUtils.extractJson(body) match { case Right(parsed) => @@ -163,7 +164,7 @@ object IgluAdapter extends Adapter { private[registry] def payloadToEventWithSchema( payload: CollectorPayload, schemaUri: String, - params: Map[String, String] + params: Map[String, Option[String]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = SchemaKey.fromUri(schemaUri) match { case Left(parseError) => @@ -221,7 +222,7 @@ object IgluAdapter extends Adapter { payload: CollectorPayload, body: String, schemaUri: SchemaKey, - params: Map[String, String] + params: Map[String, Option[String]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = { def buildRawEvent(e: Json): RawEvent = RawEvent( @@ -268,7 +269,7 @@ object IgluAdapter extends Adapter { payload: CollectorPayload, body: String, schemaUri: SchemaKey, - params: Map[String, String] + params: Map[String, Option[String]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = (for { bodyMap <- ConversionUtils diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailchimpAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailchimpAdapter.scala index f406a2d3f..187302896 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailchimpAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailchimpAdapter.scala @@ -101,9 +101,11 @@ object MailchimpAdapter extends Adapter { params <- ConversionUtils .parseUrlEncodedForm(body) .leftMap(e => FailureDetails.AdapterFailure.InputData("body", body.some, e)) - eventType <- params.get("type").toRight { - val msg = "no `type` parameter provided: cannot determine event type" - FailureDetails.AdapterFailure.InputData("body", body.some, msg) + eventType <- params.get("type") match { + case Some(Some(typE)) => Right(typE) + case _ => + val msg = "no `type` parameter provided: cannot determine event type" + Left(FailureDetails.AdapterFailure.InputData("body", body.some, msg)) } schema <- lookupSchema(eventType.some, EventSchemaMap) allParams = toMap(payload.querystring) ++ reformatParameters(params) @@ -130,7 +132,7 @@ object MailchimpAdapter extends Adapter { */ private[registry] def toJsons(parameters: RawEventParameters): List[(String, Json)] = for { - (k, v) <- parameters.toList + (k, v) <- parameters.toList.collect { case (k, Some(v)) => (k, v) } } yield toNestedJson(toKeys(k), v) /** @@ -180,8 +182,8 @@ object MailchimpAdapter extends Adapter { */ private[registry] def reformatParameters(parameters: RawEventParameters): RawEventParameters = parameters.get("fired_at") match { - case Some(firedAt) => - parameters.updated("fired_at", JU.toJsonSchemaDateTime(firedAt, MailchimpDateTimeFormat)) - case None => parameters + case Some(Some(firedAt)) => + parameters.updated("fired_at", Some((JU.toJsonSchemaDateTime(firedAt, MailchimpDateTimeFormat)))) + case _ => parameters } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailgunAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailgunAdapter.scala index 1d04a2bd7..aa50b50b0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailgunAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailgunAdapter.scala @@ -111,6 +111,7 @@ object MailgunAdapter extends Adapter { toMap( URLEncodedUtils.parse(URI.create("http://localhost/?" + body), UTF_8).asScala.toList ) + .collect { case (k, Some(v)) => (k, v) } ) } match { case TF(e) => diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MandrillAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MandrillAdapter.scala index 23cd49cd1..89ee8c443 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MandrillAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MandrillAdapter.scala @@ -140,6 +140,7 @@ object MandrillAdapter extends Adapter { for { bodyMap <- ConversionUtils .parseUrlEncodedForm(rawEventString) + .map(_.collect { case (k, Some(v)) => (k, v) }) .leftMap(e => FailureDetails.AdapterFailure.InputData("body", rawEventString.some, e)) res <- bodyMap match { case map if map.size != 1 => diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/OlarkAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/OlarkAdapter.scala index 7b8bca26d..62123b71f 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/OlarkAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/OlarkAdapter.scala @@ -106,6 +106,7 @@ object OlarkAdapter extends Adapter { toMap( URLEncodedUtils.parse(URI.create("http://localhost/?" + body), UTF_8).asScala.toList ) + .collect { case (k, Some(v)) => (k, v) } } match { case TF(e) => val msg = s"could not parse body: ${JU.stripInstanceEtc(e.getMessage).orNull}" diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/PingdomAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/PingdomAdapter.scala index 4d689ea2e..2e72871e6 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/PingdomAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/PingdomAdapter.scala @@ -76,16 +76,7 @@ object PingdomAdapter extends Adapter { case Left(f) => Monad[F].pure(f.invalid) case Right(s) => s.get("message") match { - case None => - val msg = "no `message` parameter provided" - val formattedQS = s.map { case (k, v) => s"$k=$v" }.mkString("&") - val failure = FailureDetails.AdapterFailure.InputData( - "querystring", - formattedQS.some, - msg - ) - Monad[F].pure(failure.invalidNel) - case Some(event) => + case Some(Some(event)) => Monad[F].pure((for { parsedEvent <- JsonUtils .extractJson(event) @@ -117,6 +108,15 @@ object PingdomAdapter extends Adapter { ) ) }).toValidatedNel) + case _ => + val msg = "no `message` parameter provided" + val formattedQS = s.map { case (k, v) => s"$k=${v.getOrElse("null")}" }.mkString("&") + val failure = FailureDetails.AdapterFailure.InputData( + "querystring", + formattedQS.some, + msg + ) + Monad[F].pure(failure.invalidNel) } } } @@ -133,10 +133,10 @@ object PingdomAdapter extends Adapter { */ private[registry] def reformatMapParams( params: List[NameValuePair] - ): Either[NonEmptyList[FailureDetails.AdapterFailure], Map[String, String]] = { - val formatted = params.map { value => - (value.getName, value.getValue) match { - case (k, PingdomValueRegex(v)) => + ): Either[NonEmptyList[FailureDetails.AdapterFailure], Map[String, Option[String]]] = { + val formatted = params.map { nvp => + (nvp.getName, Option(nvp.getValue)) match { + case (k, Some(PingdomValueRegex(v))) => FailureDetails.AdapterFailure .InputData(k, v.some, s"should not pass regex $PingdomValueRegex") .asLeft @@ -144,7 +144,7 @@ object PingdomAdapter extends Adapter { } } - val successes: List[(String, String)] = formatted.collect { case Right(s) => s } + val successes: List[(String, Option[String])] = formatted.collect { case Right(s) => s } val failures: List[FailureDetails.AdapterFailure] = formatted.collect { case Left(f) => f } (successes, failures) match { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala index 53a07e539..b34bcd9e4 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala @@ -115,7 +115,7 @@ final case class RemoteAdapter( rawEvents = nonEmptyEvents.map { e => RawEvent( api = payload.api, - parameters = e, + parameters = e.map { case (k, v) => (k, Option(v)) }, contentType = payload.contentType, source = payload.source, context = payload.context diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UnbounceAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UnbounceAdapter.scala index bdf7b39b1..90474da98 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UnbounceAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UnbounceAdapter.scala @@ -97,6 +97,7 @@ object UnbounceAdapter extends Adapter { toMap( URLEncodedUtils.parse(URI.create("http://localhost/?" + body), UTF_8).asScala.toList ) + .collect { case (k, Some(v)) => (k, v) } } match { case TF(e) => val msg = s"could not parse body: ${JU.stripInstanceEtc(e.getMessage).orNull}" diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UrbanAirshipAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UrbanAirshipAdapter.scala index c6cb50cc7..7d8aabe52 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UrbanAirshipAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UrbanAirshipAdapter.scala @@ -136,7 +136,7 @@ object UrbanAirshipAdapter extends Adapter { api = payload.api, parameters = toUnstructEventParams( TrackerVersion, - toMap(payload.querystring) ++ Map("ttm" -> toTtmFormat(tts), "eid" -> id), + toMap(payload.querystring) ++ Map("ttm" -> Option(toTtmFormat(tts)), "eid" -> Option(id)), schema, json, "srv" diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala index 83b0c3728..902302d17 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala @@ -10,10 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry -package snowplow +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry.snowplow import cats.Monad import cats.data.{NonEmptyList, ValidatedNel} @@ -26,15 +23,18 @@ import cats.effect.Clock import io.circe._ import io.circe.syntax._ -import com.snowplowanalytics.iglu.client.Client -import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.CirceIgluCodecs._ +import com.snowplowanalytics.iglu.client.Client +import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup + import com.snowplowanalytics.snowplow.badrows.FailureDetails -import loaders.CollectorPayload -import utils.{HttpClient, ConversionUtils => CU, JsonUtils => JU} +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.Adapter +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, JsonUtils => JU, ConversionUtils => CU} /** * The Redirect Adapter is essentially a pre-processor for @@ -86,35 +86,26 @@ object RedirectAdapter extends Adapter { Monad[F].pure(failure.invalidNel) } else originalParams.get("u") match { - case None => - val msg = "missing `u` parameter: not a valid URI redirect" - val qs = originalParams.map(t => s"${t._1}=${t._2}").mkString("&") - val failure = - FailureDetails.TrackerProtocolViolation.InputData( - "querystring", - qs.some, - msg - ) - Monad[F].pure(failure.invalidNel) - case Some(u) => + case Some(Some(u)) => val json = buildUriRedirect(u) - val newParams: Either[FailureDetails.TrackerProtocolViolation, Map[String, String]] = - if (originalParams.contains("e")) { - // Already have an event so add the URI redirect as a context (more fiddly) - def newCo = Map("co" -> toContext(json).noSpaces) - (originalParams.get("cx"), originalParams.get("co")) match { - case (None, None) => newCo.asRight - case (None, Some(co)) if co == "" => newCo.asRight - case (None, Some(co)) => addToExistingCo(json, co).map(str => Map("co" -> str)) - case (Some(cx), _) => addToExistingCx(json, cx).map(str => Map("cx" -> str)) - } - } else - // Add URI redirect as an unstructured event - Map("e" -> "ue", "ue_pr" -> toUnstructEvent(json).noSpaces).asRight + val newParams: Either[FailureDetails.TrackerProtocolViolation, Map[String, Option[String]]] = + (if (originalParams.contains("e")) { + // Already have an event so add the URI redirect as a context (more fiddly) + def newCo = Map("co" -> toContext(json).noSpaces) + (originalParams.get("cx"), originalParams.get("co")) match { + case (None, None) => newCo.asRight + case (None, Some(Some(co))) if co == "" => newCo.asRight + case (None, Some(Some(co))) => addToExistingCo(json, co).map(str => Map("co" -> str)) + case (Some(Some(cx)), _) => addToExistingCx(json, cx).map(str => Map("cx" -> str)) + } + } else + // Add URI redirect as an unstructured event + Map("e" -> "ue", "ue_pr" -> toUnstructEvent(json).noSpaces).asRight) + .map(_.map { case (k, v) => (k, Option(v)) }) val fixedParams = Map( - "tv" -> TrackerVersion, - "p" -> originalParams.getOrElse("p", TrackerPlatform) // Required field + "tv" -> Some(TrackerVersion), + "p" -> originalParams.getOrElse("p", Some(TrackerPlatform)) // Required field ) Monad[F].pure((for { @@ -129,6 +120,16 @@ object RedirectAdapter extends Adapter { ) ) } yield ev).leftMap(e => NonEmptyList.one(e)).toValidated) + case _ => + val msg = "missing `u` parameter: not a valid URI redirect" + val qs = originalParams.map(t => s"${t._1}=${t._2.getOrElse("null")}").mkString("&") + val failure = + FailureDetails.TrackerProtocolViolation.InputData( + "querystring", + qs.some, + msg + ) + Monad[F].pure(failure.invalidNel) } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp1Adapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp1Adapter.scala index 6b1bdcd6c..0ee83458c 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp1Adapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp1Adapter.scala @@ -10,22 +10,25 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry -package snowplow +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry.snowplow import cats.Monad import cats.data.{NonEmptyList, ValidatedNel} + import cats.effect.Clock import cats.syntax.validated._ + +import io.circe.Json + import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup -import com.snowplowanalytics.snowplow.badrows._ -import io.circe.Json -import loaders.CollectorPayload -import utils.HttpClient +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.Adapter +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.HttpClient /** Version 1 of the Tracker Protocol is GET only. All data comes in on the querystring. */ object Tp1Adapter extends Adapter { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala index 14fe4dad4..f15b64a7f 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala @@ -10,28 +10,29 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry -package snowplow +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry.snowplow import cats.Monad import cats.data.{EitherT, NonEmptyList, Validated, ValidatedNel} import cats.data.Validated._ import cats.implicits._ + import cats.effect.Clock +import io.circe.Json + import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup import com.snowplowanalytics.iglu.core.{SchemaCriterion, SelfDescribingData} -import com.snowplowanalytics.iglu.core.circe.instances._ +import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.snowplow.badrows.FailureDetails -import io.circe.Json - -import loaders.CollectorPayload -import utils.{HttpClient, JsonUtils => JU} +import com.snowplowanalytics.snowplow.enrich.common.RawEventParameters +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.Adapter +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, JsonUtils => JU} /** * Version 2 of the Tracker Protocol supports GET and POST. Note that with POST, data can still be @@ -40,9 +41,9 @@ import utils.{HttpClient, JsonUtils => JU} object Tp2Adapter extends Adapter { // Expected content types for a request body private object ContentTypes { - val list = + val list: List[String] = List("application/json", "application/json; charset=utf-8", "application/json; charset=UTF-8") - val str = list.mkString(", ") + val str: String = list.mkString(", ") } // Request body expected to validate against this JSON Schema @@ -104,7 +105,7 @@ object Tp2Adapter extends Adapter { case (None, None) => Monad[F].pure(NonEmptyList.one(qsParams).valid) case (Some(bdy), Some(_)) => // Build our NEL of parameters (for { - json <- extractAndValidateJson(PayloadDataSchema, bdy, "body", client) + json <- extractAndValidateJson(PayloadDataSchema, bdy, client) nel <- EitherT.fromEither[F](toParametersNel(json, qsParams)) } yield nel).toValidated } @@ -132,11 +133,11 @@ object Tp2Adapter extends Adapter { FailureDetails.TrackerProtocolViolation ], NonEmptyList[RawEventParameters]] = { val events: Option[ - Vector[Vector[Validated[FailureDetails.TrackerProtocolViolation, (String, String)]]] + Vector[Vector[Validated[FailureDetails.TrackerProtocolViolation, (String, Option[String])]]] ] = for { topLevel <- instance.asArray fields <- topLevel.map(_.asObject).sequence - res = fields.map(_.toVector.map(toParameter)) + res = fields.map(_.toVector.map(toParameter).map(_.map { case (k, v) => (k, Some(v)) })) } yield res events match { @@ -200,7 +201,6 @@ object Tp2Adapter extends Adapter { /** * Extract the JSON from a String, and validate it against the supplied JSON Schema. - * @param field The name of the field containing the JSON instance * @param schemaCriterion The schema that we expected this self-describing JSON to conform to * @param instance A JSON instance as String * @param client Our Iglu client, for schema lookups @@ -210,7 +210,6 @@ object Tp2Adapter extends Adapter { private def extractAndValidateJson[F[_]: Monad: RegistryLookup: Clock]( schemaCriterion: SchemaCriterion, instance: String, - field: String, client: Client[F, Json] ): EitherT[F, NonEmptyList[FailureDetails.TrackerProtocolViolation], Json] = (for { @@ -219,7 +218,7 @@ object Tp2Adapter extends Adapter { .leftMap(e => NonEmptyList.one( FailureDetails.TrackerProtocolViolation - .NotJson(field, instance.some, e) + .NotJson("body", instance.some, e) ) ) ) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala index 35c3a773b..04509e3c0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala @@ -633,11 +633,11 @@ object EnrichmentManager { def setEventFingerprint( event: EnrichedEvent, - sourceMap: Map[String, String], + parameters: RawEventParameters, eventFingerprint: Option[EventFingerprintEnrichment] ): Unit = eventFingerprint match { - case Some(efe) => event.event_fingerprint = efe.getEventFingerprint(sourceMap) + case Some(efe) => event.event_fingerprint = efe.getEventFingerprint(parameters) case _ => () } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala index 699d21691..fa6e36ef1 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala @@ -10,32 +10,35 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments +package com.snowplowanalytics.snowplow.enrich.common.enrichments import cats.Monad import cats.data.{EitherT, NonEmptyList, ValidatedNel} + import cats.effect.Clock import cats.implicits._ import io.circe._ import io.circe.syntax._ +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} +import com.snowplowanalytics.iglu.core.circe.implicits._ + import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} -import com.snowplowanalytics.iglu.core.circe.instances._ import com.snowplowanalytics.forex.CreateForex import com.snowplowanalytics.maxmind.iplookups.CreateIpLookups import com.snowplowanalytics.refererparser.CreateParser import com.snowplowanalytics.weather.providers.openweather.CreateOWM -import registry._ -import registry.apirequest.ApiRequestEnrichment -import registry.pii.PiiPseudonymizerEnrichment -import registry.sqlquery.SqlQueryEnrichment -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf._ + +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry._ +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest.ApiRequestEnrichment +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii.PiiPseudonymizerEnrichment +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery.SqlQueryEnrichment /** Companion which holds a constructor for the EnrichmentRegistry. */ object EnrichmentRegistry { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala index d49e21356..9a4a6d7e1 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala @@ -35,7 +35,7 @@ object Transform { * @param enriched /!\ MUTABLE enriched event, mutated IN-PLACE /!\ */ private[enrichments] def transform(raw: RawEvent, enriched: EnrichedEvent): ValidatedNel[FailureDetails.EnrichmentFailure, Unit] = { - val sourceMap: SourceMap = raw.parameters + val sourceMap: SourceMap = raw.parameters.collect { case (k, Some(v)) => (k, v) } val firstPassTransform = enriched.transform(sourceMap, firstPassTransformMap) val secondPassTransform = enriched.transform(sourceMap, secondPassTransformMap) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala index 49c1087ff..0c22fd0b1 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala @@ -10,20 +10,22 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry -import cats.data.ValidatedNel -import cats.data.Validated +import java.net.{Inet4Address, Inet6Address} + +import scala.util.Try + +import cats.data.{Validated, ValidatedNel} import cats.syntax.either._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} -import io.circe._ -import utils.CirceUtils +import io.circe.Json -import java.net.{Inet4Address, Inet6Address} import com.google.common.net.{InetAddresses => GuavaInetAddress} -import scala.util.Try +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.AnonIpConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create a AnonIpConf from a Json. */ object AnonIpEnrichment extends ParseableEnrichment { @@ -32,7 +34,7 @@ object AnonIpEnrichment extends ParseableEnrichment { /** * Creates an AnonIpEnrichment instance from a Json. - * @param c The anon_ip enrichment JSON + * @param config The anon_ip enrichment JSON * @param schemaKey provided for the enrichment, must be supported by this enrichment * @return an AnonIpEnrichment configuration */ @@ -52,7 +54,7 @@ object AnonIpEnrichment extends ParseableEnrichment { .toEither ipv4Octets <- AnonIPv4Octets.fromInt(paramIPv4Octet) ipv6Segment <- AnonIPv6Segments.fromInt(paramIPv6Segment) - } yield AnonIpConf(ipv4Octets, ipv6Segment)).toValidatedNel + } yield AnonIpConf(schemaKey, ipv4Octets, ipv6Segment)).toValidatedNel } /** How many octets (ipv4) to anonymize */ diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala index 0c1653633..e6d53d7d6 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala @@ -10,16 +10,19 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.data.{NonEmptyList, ValidatedNel} import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + import io.circe._ -import utils.MapTransformer.SourceMap -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.enrich.common.QueryStringParameters +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.CampaignAttributionConf +import com.snowplowanalytics.snowplow.enrich.common.utils.MapTransformer.SourceMap +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create a CampaignAttributionEnrichment from a Json */ object CampaignAttributionEnrichment extends ParseableEnrichment { @@ -58,6 +61,7 @@ object CampaignAttributionEnrichment extends ParseableEnrichment { .extract[Map[String, String]](c, "parameters", "fields", "mktClickId") .fold(_ => Map(), s => s) CampaignAttributionConf( + schemaKey, medium, source, term, @@ -97,7 +101,7 @@ final case class MarketingCampaign( * @param termParameters List of marketing term parameters * @param contentParameters List of marketing content parameters * @param campaignParameters List of marketing campaign parameters - * @param mktClick Map of click ID parameters to networks + * @param clickIdParameters Map of click ID parameters to networks */ final case class CampaignAttributionEnrichment( mediumParameters: List[String], @@ -116,7 +120,7 @@ final case class CampaignAttributionEnrichment( * @return Option boxing the value of the campaign parameter */ private def getFirstParameter(parameterList: List[String], sourceMap: SourceMap): Option[String] = - parameterList.find(sourceMap.contains(_)).map(sourceMap(_)) + parameterList.find(sourceMap.contains).map(sourceMap(_)) /** * Extract the marketing fields from a URL. diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala index 37697ff5f..d6b9bb244 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala @@ -10,20 +10,19 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.data.ValidatedNel import cats.syntax.either._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} - import io.circe._ import io.circe.syntax._ import org.apache.http.message.BasicHeaderValueParser +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.CookieExtractorConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils object CookieExtractorEnrichment extends ParseableEnrichment { override val supportedSchema = @@ -44,7 +43,7 @@ object CookieExtractorEnrichment extends ParseableEnrichment { (for { _ <- isParseable(config, schemaKey) cookieNames <- CirceUtils.extract[List[String]](config, "parameters", "cookies").toEither - } yield CookieExtractorConf(cookieNames)).toValidatedNel + } yield CookieExtractorConf(schemaKey, cookieNames)).toValidatedNel } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala index ddd65818d..ad0d81492 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala @@ -10,23 +10,28 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.time.ZonedDateTime import cats.Monad import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.implicits._ + +import io.circe._ + import com.snowplowanalytics.forex.{CreateForex, Forex} import com.snowplowanalytics.forex.model._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} -import com.snowplowanalytics.snowplow.badrows._ -import io.circe._ + import org.joda.money.CurrencyUnit import org.joda.time.DateTime -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.CurrencyConversionConf /** Companion object. Lets us create an CurrencyConversionEnrichment instance from a Json. */ object CurrencyConversionEnrichment extends ParseableEnrichment { @@ -101,10 +106,10 @@ final case class CurrencyConversionEnrichment[F[_]: Monad]( /** * Attempt to convert if the initial currency and value are both defined - * @param inputCurrency Option boxing the initial currency if it is present + * @param initialCurrency Option boxing the initial currency if it is present * @param value Option boxing the amount to convert * @return None.success if the inputs were not both defined, - * otherwise Validation[Option[_]] boxing the result of the conversion + * otherwise `Validation[Option[_]]` boxing the result of the conversion */ private def performConversion( initialCurrency: Option[Either[FailureDetails.EnrichmentFailure, CurrencyUnit]], diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala new file mode 100644 index 000000000..4502ea59d --- /dev/null +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2012-2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry + +import java.net.URI + +import cats.{Functor, Monad} +import cats.data.EitherT + +import org.joda.money.CurrencyUnit + +import com.snowplowanalytics.iglu.core.SchemaKey + +import com.snowplowanalytics.forex.CreateForex +import com.snowplowanalytics.forex.model.AccountType +import com.snowplowanalytics.maxmind.iplookups.CreateIpLookups +import com.snowplowanalytics.refererparser.CreateParser +import com.snowplowanalytics.weather.providers.openweather.CreateOWM + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest.{ + ApiRequestEnrichment, + CreateApiRequestEnrichment, + HttpApi +} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery.{CreateSqlQueryEnrichment, Rdbms, SqlQueryEnrichment} + +sealed trait EnrichmentConf { + + /** Iglu schema key to identify the enrichment in bad row, some enrichments don't use it */ + def schemaKey: SchemaKey + + /** + * List of files, such as local DBs that need to be downloaded and distributed across workers + * First element of pair is URI to download file from, second is a local path to store it in + */ + def filesToCache: List[(URI, String)] = Nil +} + +object EnrichmentConf { + + final case class ApiRequestConf( + schemaKey: SchemaKey, + inputs: List[apirequest.Input], + api: HttpApi, + outputs: List[apirequest.Output], + cache: apirequest.Cache + ) extends EnrichmentConf { + def enrichment[F[_]: CreateApiRequestEnrichment]: F[ApiRequestEnrichment[F]] = + ApiRequestEnrichment[F](this) + } + + final case class PiiPseudonymizerConf( + schemaKey: SchemaKey, + fieldList: List[pii.PiiField], + emitIdentificationEvent: Boolean, + strategy: pii.PiiStrategy + ) extends EnrichmentConf { + def enrichment: pii.PiiPseudonymizerEnrichment = + pii.PiiPseudonymizerEnrichment(fieldList, emitIdentificationEvent, strategy) + } + + final case class SqlQueryConf( + schemaKey: SchemaKey, + inputs: List[sqlquery.Input], + db: Rdbms, + query: SqlQueryEnrichment.Query, + output: sqlquery.Output, + cache: SqlQueryEnrichment.Cache + ) extends EnrichmentConf { + def enrichment[F[_]: Monad: CreateSqlQueryEnrichment]: F[SqlQueryEnrichment[F]] = + SqlQueryEnrichment[F](this) + } + + final case class AnonIpConf( + schemaKey: SchemaKey, + octets: AnonIPv4Octets.AnonIPv4Octets, + segments: AnonIPv6Segments.AnonIPv6Segments + ) extends EnrichmentConf { + def enrichment: AnonIpEnrichment = AnonIpEnrichment(octets, segments) + } + + final case class CampaignAttributionConf( + schemaKey: SchemaKey, + mediumParameters: List[String], + sourceParameters: List[String], + termParameters: List[String], + contentParameters: List[String], + campaignParameters: List[String], + clickIdParameters: List[(String, String)] + ) extends EnrichmentConf { + def enrichment: CampaignAttributionEnrichment = + CampaignAttributionEnrichment( + mediumParameters, + sourceParameters, + termParameters, + contentParameters, + campaignParameters, + clickIdParameters + ) + } + + final case class CookieExtractorConf( + schemaKey: SchemaKey, + cookieNames: List[String] + ) extends EnrichmentConf { + def enrichment: CookieExtractorEnrichment = CookieExtractorEnrichment(cookieNames) + } + + final case class CurrencyConversionConf( + schemaKey: SchemaKey, + accountType: AccountType, + apiKey: String, + baseCurrency: CurrencyUnit + ) extends EnrichmentConf { + def enrichment[F[_]: Monad: CreateForex]: F[CurrencyConversionEnrichment[F]] = + CurrencyConversionEnrichment[F](this) + } + + final case class EventFingerprintConf( + schemaKey: SchemaKey, + algorithm: String => String, + excludedParameters: List[String] + ) extends EnrichmentConf { + def enrichment: EventFingerprintEnrichment = + EventFingerprintEnrichment(algorithm, excludedParameters) + } + + final case class HttpHeaderExtractorConf( + schemaKey: SchemaKey, + headersPattern: String + ) extends EnrichmentConf { + def enrichment: HttpHeaderExtractorEnrichment = HttpHeaderExtractorEnrichment(headersPattern) + } + + final case class IabConf( + schemaKey: SchemaKey, + ipFile: (URI, String), + excludeUaFile: (URI, String), + includeUaFile: (URI, String) + ) extends EnrichmentConf { + override val filesToCache: List[(URI, String)] = List(ipFile, excludeUaFile, includeUaFile) + def enrichment[F[_]: Monad: CreateIabClient]: F[IabEnrichment] = + IabEnrichment[F](this) + } + + final case class IpLookupsConf( + schemaKey: SchemaKey, + geoFile: Option[(URI, String)], + ispFile: Option[(URI, String)], + domainFile: Option[(URI, String)], + connectionTypeFile: Option[(URI, String)] + ) extends EnrichmentConf { + override val filesToCache: List[(URI, String)] = + List(geoFile, ispFile, domainFile, connectionTypeFile).flatten + def enrichment[F[_]: Functor: CreateIpLookups]: F[IpLookupsEnrichment[F]] = + IpLookupsEnrichment[F](this) + } + + final case class JavascriptScriptConf(schemaKey: SchemaKey, rawFunction: String) extends EnrichmentConf { + def enrichment: JavascriptScriptEnrichment = JavascriptScriptEnrichment(schemaKey, rawFunction) + } + + final case class RefererParserConf( + schemaKey: SchemaKey, + refererDatabase: (URI, String), + internalDomains: List[String] + ) extends EnrichmentConf { + override val filesToCache: List[(URI, String)] = List(refererDatabase) + def enrichment[F[_]: Monad: CreateParser]: EitherT[F, String, RefererParserEnrichment] = + RefererParserEnrichment[F](this) + } + + final case class UaParserConf(schemaKey: SchemaKey, uaDatabase: Option[(URI, String)]) extends EnrichmentConf { + override val filesToCache: List[(URI, String)] = List(uaDatabase).flatten + def enrichment[F[_]: Monad: CreateUaParser]: EitherT[F, String, UaParserEnrichment] = + UaParserEnrichment[F](this) + } + + final case class UserAgentUtilsConf(schemaKey: SchemaKey) extends EnrichmentConf { + def enrichment: UserAgentUtilsEnrichment = UserAgentUtilsEnrichment(schemaKey) + } + + final case class WeatherConf( + schemaKey: SchemaKey, + apiHost: String, + apiKey: String, + timeout: Int, + cacheSize: Int, + geoPrecision: Int + ) extends EnrichmentConf { + def enrichment[F[_]: Monad: CreateOWM]: EitherT[F, String, WeatherEnrichment[F]] = + WeatherEnrichment[F](this) + } + + final case class YauaaConf( + schemaKey: SchemaKey, + cacheSize: Option[Int] + ) extends EnrichmentConf { + def enrichment: YauaaEnrichment = YauaaEnrichment(cacheSize) + } +} diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala index 4e9c9bd7e..df34a5621 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala @@ -10,16 +10,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.data.{NonEmptyList, ValidatedNel} import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + import io.circe._ + import org.apache.commons.codec.digest.DigestUtils -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.EventFingerprintConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.`package`.RawEventParameters /** Lets us create an EventFingerprintEnrichment from a Json. */ object EventFingerprintEnrichment extends ParseableEnrichment { @@ -54,7 +58,7 @@ object EventFingerprintEnrichment extends ParseableEnrichment { ).mapN((_, _)).toEither algorithm <- getAlgorithm(paramsAndAlgo._2) .leftMap(e => NonEmptyList.one(e)) - } yield EventFingerprintConf(algorithm, paramsAndAlgo._1)).toValidated + } yield EventFingerprintConf(schemaKey, algorithm, paramsAndAlgo._1)).toValidated /** * Look up the fingerprinting algorithm by name @@ -83,12 +87,11 @@ final case class EventFingerprintEnrichment(algorithm: String => String, exclude /** * Calculate an event fingerprint using all querystring fields except the excludedParameters - * @param parameterMap * @return Event fingerprint */ - def getEventFingerprint(parameterMap: Map[String, String]): String = { + def getEventFingerprint(parameters: RawEventParameters): String = { val builder = new StringBuilder - parameterMap.toList.sortWith(_._1 < _._1).foreach { + parameters.toList.collect { case (k, Some(v)) => (k, v) }.sortWith(_._1 < _._1).foreach { case (key, value) => if (!excludedParameters.contains(key)) { builder.append(key) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala index cd7f949c0..c7e6060e0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala @@ -10,18 +10,18 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.data.ValidatedNel import cats.syntax.either._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} - import io.circe._ import io.circe.syntax._ -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.HttpHeaderExtractorConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils object HttpHeaderExtractorEnrichment extends ParseableEnrichment { override val supportedSchema = @@ -48,7 +48,7 @@ object HttpHeaderExtractorEnrichment extends ParseableEnrichment { (for { _ <- isParseable(config, schemaKey) headersPattern <- CirceUtils.extract[String](config, "parameters", "headersPattern").toEither - } yield HttpHeaderExtractorConf(headersPattern)).toValidatedNel + } yield HttpHeaderExtractorConf(schemaKey, headersPattern)).toValidatedNel } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala index f695777c4..e12d1c5ac 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala @@ -10,29 +10,30 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.io.File import java.net.{InetAddress, URI} -import cats.{Eval, Id, Monad} +import cats.{Id, Monad} import cats.data.{NonEmptyList, ValidatedNel} import cats.effect.Sync import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} - -import com.snowplowanalytics.iab.spidersandrobotsclient.IabClient -import com.snowplowanalytics.snowplow.badrows.FailureDetails +import org.joda.time.DateTime import io.circe._ import io.circe.generic.auto._ import io.circe.syntax._ -import org.joda.time.DateTime -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.iab.spidersandrobotsclient.IabClient +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.IabConf /** Companion object. Lets us create an IabEnrichment instance from a Json. */ object IabEnrichment extends ParseableEnrichment { @@ -109,10 +110,8 @@ object IabEnrichment extends ParseableEnrichment { /** * Contains enrichments based on IAB Spiders&Robots lookup. - * @param ipFile (Full URI to the IAB excluded IP list, database name) - * @param excludeUaFile (Full URI to the IAB excluded user agent list, database name) - * @param includeUaFile (Full URI to the IAB included user agent list, database name) - * @param localMode Whether to use the local database file. Enabled for tests. + * @param schemaKey enrichment's static Iglu Schema Key + * @param iabClient worker object */ final case class IabEnrichment(schemaKey: SchemaKey, iabClient: IabClient) extends Enrichment { val outputSchema = @@ -188,18 +187,6 @@ object CreateIabClient { } } - implicit def evalCreateIabClient: CreateIabClient[Eval] = - new CreateIabClient[Eval] { - def create( - ipFile: String, - excludeUaFile: String, - includeUaFile: String - ): Eval[IabClient] = - Eval.later { - new IabClient(new File(ipFile), new File(excludeUaFile), new File(includeUaFile)) - } - } - implicit def idCreateIabClient: CreateIabClient[Id] = new CreateIabClient[Id] { def create( diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala index 3fb81b183..bfab38a0c 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala @@ -10,26 +10,25 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments -package registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import inet.ipaddr.HostName - import cats.Functor import cats.data.{NonEmptyList, ValidatedNel} import cats.implicits._ -import com.snowplowanalytics.maxmind.iplookups._ -import com.snowplowanalytics.maxmind.iplookups.model._ +import io.circe._ + +import inet.ipaddr.HostName import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} -import io.circe._ +import com.snowplowanalytics.maxmind.iplookups._ +import com.snowplowanalytics.maxmind.iplookups.model._ -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.IpLookupsConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create an IpLookupsEnrichment instance from a Json. */ object IpLookupsEnrichment extends ParseableEnrichment { @@ -58,6 +57,7 @@ object IpLookupsEnrichment extends ParseableEnrichment { getArgumentFromName(c, "connectionType").sequence ).mapN { (geo, isp, domain, connection) => IpLookupsConf( + schemaKey, file(geo, localMode), file(isp, localMode), file(domain, localMode), @@ -108,6 +108,7 @@ object IpLookupsEnrichment extends ParseableEnrichment { lruCacheSize = 20000 ) .map(i => IpLookupsEnrichment(i)) + } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/JavascriptScriptEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/JavascriptScriptEnrichment.scala index 7ee71405c..630a6e901 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/JavascriptScriptEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/JavascriptScriptEnrichment.scala @@ -10,24 +10,24 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.data.{NonEmptyList, ValidatedNel} import cats.implicits._ +import io.circe._ +import io.circe.parser._ + +import javax.script._ + import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.snowplow.badrows.FailureDetails -import javax.script._ - -import io.circe._ -import io.circe.parser._ - -import outputs.EnrichedEvent -import utils.{CirceUtils, ConversionUtils} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.JavascriptScriptConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.utils.{CirceUtils, ConversionUtils} object JavascriptScriptEnrichment extends ParseableEnrichment { override val supportedSchema = diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala index c47603ad6..4be2235b2 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala @@ -10,20 +10,22 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI import cats.Monad import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.implicits._ + +import io.circe.Json + import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + import com.snowplowanalytics.refererparser._ -import io.circe.Json -import utils.{ConversionUtils => CU} -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.RefererParserConf +import com.snowplowanalytics.snowplow.enrich.common.utils.{ConversionUtils => CU, CirceUtils} /** Companion object. Lets us create a RefererParserEnrichment from a Json */ object RefererParserEnrichment extends ParseableEnrichment { @@ -54,7 +56,7 @@ object RefererParserEnrichment extends ParseableEnrichment { (uri, db, domains) }.toEither source <- getDatabaseUri(conf._1, conf._2).leftMap(NonEmptyList.one) - } yield RefererParserConf(file(source, conf._2, localFile, localMode), conf._3)).toValidated + } yield RefererParserConf(schemaKey, file(source, conf._2, localFile, localMode), conf._3)).toValidated private def file( uri: URI, diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala index a61d0b063..03dd5dabc 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala @@ -9,28 +9,29 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.io.{FileInputStream, InputStream} import java.net.URI -import cats.{Eval, Id, Monad} +import cats.{Id, Monad} import cats.data.{EitherT, NonEmptyList, ValidatedNel} + import cats.effect.Sync import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} - -import com.snowplowanalytics.snowplow.badrows.FailureDetails - -import io.circe._ +import io.circe.Json import io.circe.syntax._ import ua_parser.Parser import ua_parser.Client -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.UaParserConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create a UaParserEnrichment from a Json. */ object UaParserEnrichment extends ParseableEnrichment { @@ -105,7 +106,7 @@ final case class UaParserEnrichment(schemaKey: SchemaKey, parser: Parser) extend /** * Extracts the client attributes from a useragent string, using UserAgentEnrichment. * @param useragent to extract from. Should be encoded, i.e. not previously decoded. - * @return the json or the message of the exception, boxed in a Scalaz Validation + * @return the json or the message of the bad row details */ def extractUserAgent(useragent: String): Either[FailureDetails.EnrichmentFailure, SelfDescribingData[Json]] = Either @@ -168,12 +169,6 @@ object CreateUaParser { Sync[F].delay(parser(uaFile)) } - implicit def evalCreateUaParser: CreateUaParser[Eval] = - new CreateUaParser[Eval] { - def create(uaFile: Option[String]): Eval[Either[String, Parser]] = - Eval.later(parser(uaFile)) - } - implicit def idCreateUaParser: CreateUaParser[Id] = new CreateUaParser[Id] { def create(uaFile: Option[String]): Id[Either[String, Parser]] = diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UserAgentUtilsEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UserAgentUtilsEnrichment.scala index ef11dbf33..78725ea8e 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UserAgentUtilsEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UserAgentUtilsEnrichment.scala @@ -9,18 +9,24 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import scala.util.control.NonFatal import cats.data.ValidatedNel import cats.syntax.either._ import cats.syntax.option._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} -import com.snowplowanalytics.snowplow.badrows._ -import eu.bitwalker.useragentutils._ + import io.circe._ + +import eu.bitwalker.useragentutils._ + +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.UserAgentUtilsConf + import org.slf4j.LoggerFactory object UserAgentUtilsEnrichmentConfig extends ParseableEnrichment { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/WeatherEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/WeatherEnrichment.scala index 804156d75..2fa748057 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/WeatherEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/WeatherEnrichment.scala @@ -10,8 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.lang.{Float => JFloat} import java.time.{Instant, ZoneOffset, ZonedDateTime} @@ -23,19 +22,21 @@ import cats.Monad import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} -import com.snowplowanalytics.snowplow.badrows.FailureDetails - -import com.snowplowanalytics.weather.providers.openweather._ -import com.snowplowanalytics.weather.providers.openweather.responses._ +import org.joda.time.{DateTime, DateTimeZone} import io.circe._ import io.circe.generic.auto._ import io.circe.syntax._ -import org.joda.time.{DateTime, DateTimeZone} +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.weather.providers.openweather._ +import com.snowplowanalytics.weather.providers.openweather.responses._ -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.WeatherConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create an WeatherEnrichment instance from a Json */ object WeatherEnrichment extends ParseableEnrichment { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala index 0cceb153a..a33406fb1 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala @@ -10,8 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import scala.collection.JavaConverters._ @@ -21,15 +20,16 @@ import cats.syntax.either._ import io.circe.Json import io.circe.syntax._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} - import nl.basjes.parse.useragent.{UserAgent, UserAgentAnalyzer} -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.YauaaConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object to create an instance of YauaaEnrichment from the configuration. */ object YauaaEnrichment extends ParseableEnrichment { - val supportedSchema = + val supportedSchema: SchemaCriterion = SchemaCriterion( "com.snowplowanalytics.snowplow.enrichments", "yauaa_enrichment_config", @@ -38,6 +38,11 @@ object YauaaEnrichment extends ParseableEnrichment { 0 ) + val DefaultDeviceClass = "Unknown" + val DefaultResult = Map(decapitalize(UserAgent.DEVICE_CLASS) -> DefaultDeviceClass) + + val outputSchema: SchemaKey = SchemaKey("nl.basjes", "yauaa_context", "jsonschema", SchemaVer.Full(1, 0, 1)) + /** * Creates a YauaaConf instance from a JValue containing the configuration of the enrichment. * @@ -54,7 +59,7 @@ object YauaaEnrichment extends ParseableEnrichment { (for { _ <- isParseable(c, schemaKey) cacheSize <- CirceUtils.extract[Option[Int]](c, "parameters", "cacheSize").toEither - } yield YauaaConf(cacheSize)).toValidatedNel + } yield YauaaConf(schemaKey, cacheSize)).toValidatedNel /** Helper to decapitalize a string. Used for the names of the fields returned in the context. */ def decapitalize(s: String): String = @@ -81,18 +86,13 @@ final case class YauaaEnrichment(cacheSize: Option[Int]) extends Enrichment { a } - val outputSchema = SchemaKey("nl.basjes", "yauaa_context", "jsonschema", SchemaVer.Full(1, 0, 0)) - - val defaultDeviceClass = "Unknown" - val defaultResult = Map(decapitalize(UserAgent.DEVICE_CLASS) -> defaultDeviceClass) - /** * Gets the result of YAUAA user agent analysis as self-describing JSON, for a specific event. * @param userAgent User agent of the event. * @return Attributes retrieved thanks to the user agent (if any), as self-describing JSON. */ def getYauaaContext(userAgent: String): SelfDescribingData[Json] = - SelfDescribingData(outputSchema, parseUserAgent(userAgent).asJson) + SelfDescribingData(YauaaEnrichment.outputSchema, parseUserAgent(userAgent).asJson) /** * Gets the map of attributes retrieved by YAUAA from the user agent. @@ -102,10 +102,10 @@ final case class YauaaEnrichment(cacheSize: Option[Int]) extends Enrichment { def parseUserAgent(userAgent: String): Map[String, String] = userAgent match { case null | "" => - defaultResult + YauaaEnrichment.DefaultResult case _ => val parsedUA = uaa.parse(userAgent) - parsedUA.getAvailableFieldNames.asScala + parsedUA.getAvailableFieldNamesSorted.asScala .map(field => decapitalize(field) -> parsedUA.getValue(field)) .toMap } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala index 65f24b2cd..9ae20a644 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala @@ -10,10 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments -package registry -package apirequest +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest import java.util.UUID @@ -21,17 +18,20 @@ import cats.{Id, Monad} import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} -import com.snowplowanalytics.iglu.core.circe.implicits._ -import com.snowplowanalytics.lrumap._ - -import com.snowplowanalytics.snowplow.badrows.FailureDetails +import cats.effect.Sync import io.circe._ import io.circe.generic.auto._ -import outputs.EnrichedEvent -import utils.{CirceUtils, HttpClient} +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} +import com.snowplowanalytics.iglu.core.circe.implicits._ + +import com.snowplowanalytics.lrumap._ +import com.snowplowanalytics.snowplow.badrows.FailureDetails +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.{Enrichment, ParseableEnrichment} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.utils.{CirceUtils, HttpClient} object ApiRequestEnrichment extends ParseableEnrichment { override val supportedSchema = @@ -134,7 +134,7 @@ final case class ApiRequestEnrichment[F[_]: Monad: HttpClient]( contexts = jsons.parTraverse { json => SelfDescribingData .parse(json) - .leftMap(e => NonEmptyList.one(s"${json.noSpaces} is not self-describing, ${e.code}")) + .leftMap(e => NonEmptyList.one(s"${json.noSpaces} is not self-describing JSON, ${e.code}")) } outputs <- EitherT.fromEither[F](contexts) } yield outputs @@ -148,7 +148,6 @@ final case class ApiRequestEnrichment[F[_]: Monad: HttpClient]( * @return validated list of lookups, whole lookup will be failed if any of outputs were failed */ private[apirequest] def getOutputs(validInputs: Option[Map[String, String]]): EitherT[F, NonEmptyList[String], List[Json]] = { - import cats.instances.parallel._ val result: List[F[Either[Throwable, Json]]] = for { templateContext <- validInputs.toList @@ -233,4 +232,24 @@ object CreateApiRequestEnrichment { ) ) } + + implicit def syncCreateApiRequestEnrichment[F[_]: Sync]( + implicit CLM: CreateLruMap[F, String, (Either[Throwable, Json], Long)], + HTTP: HttpClient[F] + ): CreateApiRequestEnrichment[F] = + new CreateApiRequestEnrichment[F] { + def create(conf: ApiRequestConf): F[ApiRequestEnrichment[F]] = + CLM + .create(conf.cache.size) + .map(c => + ApiRequestEnrichment( + conf.schemaKey, + conf.inputs, + conf.api, + conf.outputs, + conf.cache.ttl, + c + ) + ) + } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Errors.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Errors.scala index 3ecca8e88..1492f0bd9 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Errors.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Errors.scala @@ -13,13 +13,16 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest final case class ValueNotFoundException(message: String) extends Throwable { + override def getMessage: String = "API Request enrichment:" ++ toString override def toString = s"Value not found $message" } final case class JsonPathException(message: String) extends Throwable { + override def getMessage: String = "API Request enrichment:" ++ toString override def toString = s"JSONPath error $message" } final case class InvalidStateException(message: String) extends Throwable { + override def getMessage: String = "API Request enrichment:" ++ toString override def toString = message } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Output.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Output.scala index ae0408edb..253db43d4 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Output.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Output.scala @@ -35,8 +35,8 @@ final case class Output(schema: String, json: Option[JsonOutput]) { def parseResponse(apiResponse: String): Either[Throwable, Json] = json match { case Some(jsonOutput) => jsonOutput.parseResponse(apiResponse) - case output => - new InvalidStateException(s"Error: Unknown output [$output]").asLeft // Cannot happen now + case None => + new InvalidStateException(s"Error: output key is missing").asLeft // Cannot happen now } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/enrichments.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/enrichments.scala index 6ed023b1f..70e996223 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/enrichments.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/enrichments.scala @@ -10,162 +10,22 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import cats.{Functor, Monad} -import cats.data.{EitherT, ValidatedNel} +import cats.data.ValidatedNel import cats.syntax.either._ -import com.snowplowanalytics.forex.CreateForex -import com.snowplowanalytics.forex.model.AccountType -import com.snowplowanalytics.iglu.core._ -import com.snowplowanalytics.maxmind.iplookups.CreateIpLookups -import com.snowplowanalytics.refererparser.CreateParser -import com.snowplowanalytics.weather.providers.openweather.CreateOWM + import io.circe._ -import org.joda.money.CurrencyUnit -import apirequest._ -import sqlquery._ -import utils.ConversionUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.enrich.common.utils.ConversionUtils /** Trait inherited by every enrichment config case class */ trait Enrichment -sealed trait EnrichmentConf { - def schemaKey: SchemaKey = - SchemaKey( - "com.acme", - "placeholder", - "jsonschema", - SchemaVer.Full(1, 0, 0) - ) - def filesToCache: List[(URI, String)] = Nil -} -final case class ApiRequestConf( - override val schemaKey: SchemaKey, - inputs: List[apirequest.Input], - api: HttpApi, - outputs: List[apirequest.Output], - cache: apirequest.Cache -) extends EnrichmentConf { - def enrichment[F[_]: CreateApiRequestEnrichment]: F[ApiRequestEnrichment[F]] = - ApiRequestEnrichment[F](this) -} -final case class PiiPseudonymizerConf( - fieldList: List[pii.PiiField], - emitIdentificationEvent: Boolean, - strategy: pii.PiiStrategy -) extends EnrichmentConf { - def enrichment: pii.PiiPseudonymizerEnrichment = - pii.PiiPseudonymizerEnrichment(fieldList, emitIdentificationEvent, strategy) -} -final case class SqlQueryConf( - override val schemaKey: SchemaKey, - inputs: List[sqlquery.Input], - db: Rdbms, - query: SqlQueryEnrichment.Query, - output: sqlquery.Output, - cache: SqlQueryEnrichment.Cache -) extends EnrichmentConf { - def enrichment[F[_]: Monad: CreateSqlQueryEnrichment]: F[SqlQueryEnrichment[F]] = - SqlQueryEnrichment[F](this) -} -final case class AnonIpConf(octets: AnonIPv4Octets.AnonIPv4Octets, segments: AnonIPv6Segments.AnonIPv6Segments) extends EnrichmentConf { - override val filesToCache: List[(URI, String)] = Nil - def enrichment: AnonIpEnrichment = AnonIpEnrichment(octets, segments) -} -final case class CampaignAttributionConf( - mediumParameters: List[String], - sourceParameters: List[String], - termParameters: List[String], - contentParameters: List[String], - campaignParameters: List[String], - clickIdParameters: List[(String, String)] -) extends EnrichmentConf { - def enrichment: CampaignAttributionEnrichment = - CampaignAttributionEnrichment( - mediumParameters, - sourceParameters, - termParameters, - contentParameters, - campaignParameters, - clickIdParameters - ) -} -final case class CookieExtractorConf(cookieNames: List[String]) extends EnrichmentConf { - def enrichment: CookieExtractorEnrichment = CookieExtractorEnrichment(cookieNames) -} -final case class CurrencyConversionConf( - override val schemaKey: SchemaKey, - accountType: AccountType, - apiKey: String, - baseCurrency: CurrencyUnit -) extends EnrichmentConf { - def enrichment[F[_]: Monad: CreateForex]: F[CurrencyConversionEnrichment[F]] = - CurrencyConversionEnrichment[F](this) -} -final case class EventFingerprintConf(algorithm: String => String, excludedParameters: List[String]) extends EnrichmentConf { - def enrichment: EventFingerprintEnrichment = - EventFingerprintEnrichment(algorithm, excludedParameters) -} -final case class HttpHeaderExtractorConf(headersPattern: String) extends EnrichmentConf { - def enrichment: HttpHeaderExtractorEnrichment = HttpHeaderExtractorEnrichment(headersPattern) -} -final case class IabConf( - override val schemaKey: SchemaKey, - ipFile: (URI, String), - excludeUaFile: (URI, String), - includeUaFile: (URI, String) -) extends EnrichmentConf { - override val filesToCache: List[(URI, String)] = List(ipFile, excludeUaFile, includeUaFile) - def enrichment[F[_]: Monad: CreateIabClient]: F[IabEnrichment] = - IabEnrichment[F](this) -} -final case class IpLookupsConf( - geoFile: Option[(URI, String)], - ispFile: Option[(URI, String)], - domainFile: Option[(URI, String)], - connectionTypeFile: Option[(URI, String)] -) extends EnrichmentConf { - override val filesToCache: List[(URI, String)] = - List(geoFile, ispFile, domainFile, connectionTypeFile).flatten - def enrichment[F[_]: Functor: CreateIpLookups]: F[IpLookupsEnrichment[F]] = - IpLookupsEnrichment[F](this) -} -final case class JavascriptScriptConf(override val schemaKey: SchemaKey, rawFunction: String) extends EnrichmentConf { - def enrichment: JavascriptScriptEnrichment = JavascriptScriptEnrichment(schemaKey, rawFunction) -} -final case class RefererParserConf(refererDatabase: (URI, String), internalDomains: List[String]) extends EnrichmentConf { - override val filesToCache: List[(URI, String)] = List(refererDatabase) - def enrichment[F[_]: Monad: CreateParser]: EitherT[F, String, RefererParserEnrichment] = - RefererParserEnrichment[F](this) -} -final case class UaParserConf(override val schemaKey: SchemaKey, uaDatabase: Option[(URI, String)]) extends EnrichmentConf { - override val filesToCache: List[(URI, String)] = List(uaDatabase).flatten - def enrichment[F[_]: Monad: CreateUaParser]: EitherT[F, String, UaParserEnrichment] = - UaParserEnrichment[F](this) -} -final case class UserAgentUtilsConf(override val schemaKey: SchemaKey) extends EnrichmentConf { - def enrichment: UserAgentUtilsEnrichment = UserAgentUtilsEnrichment(schemaKey) -} -final case class WeatherConf( - override val schemaKey: SchemaKey, - apiHost: String, - apiKey: String, - timeout: Int, - cacheSize: Int, - geoPrecision: Int -) extends EnrichmentConf { - def enrichment[F[_]: Monad: CreateOWM]: EitherT[F, String, WeatherEnrichment[F]] = - WeatherEnrichment[F](this) -} -final case class YauaaConf(cacheSize: Option[Int]) extends EnrichmentConf { - def enrichment: YauaaEnrichment = YauaaEnrichment(cacheSize) -} - /** Trait to hold helpers relating to enrichment config */ trait ParseableEnrichment { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala index d13733105..1282281e8 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala @@ -10,9 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry -package pii +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii import scala.collection.JavaConverters._ import scala.collection.mutable.MutableList @@ -20,24 +18,27 @@ import scala.collection.mutable.MutableList import cats.data.ValidatedNel import cats.implicits._ +import io.circe._ +import io.circe.jackson._ +import io.circe.syntax._ + import com.fasterxml.jackson.databind.JsonNode -import com.fasterxml.jackson.databind.node.{ArrayNode, ObjectNode, TextNode} +import com.fasterxml.jackson.databind.node.{ArrayNode, NullNode, ObjectNode, TextNode} +import com.fasterxml.jackson.databind.ObjectMapper import com.jayway.jsonpath.{Configuration, JsonPath => JJsonPath} import com.jayway.jsonpath.MapFunction -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} - -import io.circe._ -import io.circe.jackson._ -import io.circe.syntax._ - import org.apache.commons.codec.digest.DigestUtils -import adapters.registry.Adapter -import outputs.EnrichedEvent -import serializers._ -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} + +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.Adapter +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.PiiPseudonymizerConf +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.{Enrichment, ParseableEnrichment} +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii.serializers._ +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create a PiiPseudonymizerEnrichment from a Json. */ object PiiPseudonymizerEnrichment extends ParseableEnrichment { @@ -58,7 +59,7 @@ object PiiPseudonymizerEnrichment extends ParseableEnrichment { localMode: Boolean = false ): ValidatedNel[String, PiiPseudonymizerConf] = { for { - conf <- matchesSchema(config, schemaKey) + conf <- isParseable(config, schemaKey) emitIdentificationEvent = CirceUtils .extract[Boolean](conf, "emitEvent") .toOption @@ -70,7 +71,7 @@ object PiiPseudonymizerEnrichment extends ParseableEnrichment { .extract[PiiStrategyPseudonymize](config, "parameters", "strategy") .toEither piiFieldList <- extractFields(piiFields) - } yield PiiPseudonymizerConf(piiFieldList, emitIdentificationEvent, piiStrategy) + } yield PiiPseudonymizerConf(schemaKey, piiFieldList, emitIdentificationEvent, piiStrategy) }.toValidatedNel private[pii] def getHashFunction(strategyFunction: String): Either[String, DigestFunction] = @@ -133,11 +134,24 @@ object PiiPseudonymizerEnrichment extends ParseableEnrichment { .map(_.asRight) .getOrElse(s"The specified json field $fieldName is not supported".asLeft) - private def matchesSchema(config: Json, schemaKey: SchemaKey): Either[String, Json] = - if (supportedSchema.matches(schemaKey)) - config.asRight - else - s"Schema key $schemaKey is not supported. A '${supportedSchema.name}' enrichment must have schema '$supportedSchema'.".asLeft + /** Helper to remove fields that were wrongly added and are not in the original JSON. See #351. */ + private[pii] def removeAddedFields(hashed: Json, original: Json): Json = { + val fixedObject = for { + hashedFields <- hashed.asObject + originalFields <- original.asObject + newFields = hashedFields.toList.flatMap { + case (k, v) => originalFields(k).map(origV => (k, removeAddedFields(v, origV))) + } + } yield Json.fromFields(newFields) + + lazy val fixedArray = for { + hashedArr <- hashed.asArray + originalArr <- original.asArray + newArr = hashedArr.zip(originalArr).map { case (hashed, orig) => removeAddedFields(hashed, orig) } + } yield Json.fromValues(newArr) + + fixedObject.orElse(fixedArray).getOrElse(hashed) + } } /** @@ -209,7 +223,8 @@ final case class PiiJson( ) } .getOrElse((parsed, List.empty[JsonModifiedField])) - } yield (substituted.noSpaces, modifiedFields.toList)).getOrElse((null, List.empty)) + } yield (PiiPseudonymizerEnrichment.removeAddedFields(substituted, parsed).noSpaces, modifiedFields.toList)) + .getOrElse((null, List.empty)) /** Map context top fields with strategy if they match. */ private def mapContextTopFields(tuple: (String, Json), strategy: PiiStrategy): (String, (Json, List[JsonModifiedField])) = @@ -270,15 +285,15 @@ final case class PiiJson( val objectNode = io.circe.jackson.mapper.valueToTree[ObjectNode](json) val documentContext = JJsonPath.using(JsonPathConf).parse(objectNode) val modifiedFields = MutableList[JsonModifiedField]() - val documentContext2 = documentContext.map( - jsonPath, - new ScrambleMapFunction(strategy, modifiedFields, fieldMutator.fieldName, jsonPath, schema) - ) - // make sure it is a structure preserving method, see #3636 - //val transformedJValue = JsonMethods.fromJsonNode(documentContext.json[JsonNode]()) - //val Diff(_, erroneouslyAdded, _) = jValue diff transformedJValue - //val Diff(_, withoutCruft, _) = erroneouslyAdded diff transformedJValue - (jacksonToCirce(documentContext2.json[JsonNode]()), modifiedFields.toList) + Option(documentContext.read[AnyRef](jsonPath)) match { // check that json object not null + case None => (jacksonToCirce(documentContext.json[JsonNode]()), modifiedFields.toList) + case _ => + val documentContext2 = documentContext.map( + jsonPath, + new ScrambleMapFunction(strategy, modifiedFields, fieldMutator.fieldName, jsonPath, schema) + ) + (jacksonToCirce(documentContext2.json[JsonNode]()), modifiedFields.toList) + } } } @@ -296,7 +311,9 @@ private final case class ScrambleMapFunction( val _ = modifiedFields += JsonModifiedField(fieldName, s, newValue, jsonPath, schema) newValue case a: ArrayNode => - a.elements.asScala.map { + val mapper = new ObjectMapper() + val arr = mapper.createArrayNode() + a.elements.asScala.foreach { case t: TextNode => val originalValue = t.asText() val newValue = strategy.scramble(originalValue) @@ -307,9 +324,11 @@ private final case class ScrambleMapFunction( jsonPath, schema ) - newValue - case default: AnyRef => default + arr.add(newValue) + case default: AnyRef => arr.add(default) + case null => arr.add(NullNode.getInstance()) } - case default: AnyRef => default + arr + case _ => currentValue } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala index a35684aab..92112d7e8 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala @@ -12,12 +12,13 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery -import cats.{Eval, Id} +import cats.Id + import cats.effect.Sync import cats.syntax.functor._ import cats.syntax.flatMap._ -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.SqlQueryConf +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.SqlQueryConf /** Initialize resources, necessary for SQL Query enrichment: cache and connection */ sealed trait CreateSqlQueryEnrichment[F[_]] { @@ -49,28 +50,6 @@ object CreateSqlQueryEnrichment { ) } - implicit def evalCreateSqlQueryEnrichment( - implicit CLM: SqlCacheInit[Eval], - CN: ConnectionRefInit[Eval], - DB: DbExecutor[Eval] - ): CreateSqlQueryEnrichment[Eval] = - new CreateSqlQueryEnrichment[Eval] { - def create(conf: SqlQueryConf): Eval[SqlQueryEnrichment[Eval]] = - for { - cache <- CLM.create(conf.cache.size) - connection <- CN.create(1) - } yield SqlQueryEnrichment( - conf.schemaKey, - conf.inputs, - conf.db, - conf.query, - conf.output, - conf.cache.ttl, - cache, - connection - ) - } - implicit def idCreateSqlQueryEnrichment( implicit CLM: SqlCacheInit[Id], CN: ConnectionRefInit[Id], diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala index 8a909229b..cfd798489 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala @@ -19,7 +19,7 @@ import scala.util.control.NonFatal import io.circe.Json -import cats.{Eval, Id, Monad} +import cats.{Id, Monad} import cats.data.EitherT import cats.effect.{Bracket, Sync} import cats.implicits._ @@ -142,77 +142,6 @@ object DbExecutor { } - implicit def evalDbExecutor: DbExecutor[Eval] = - new DbExecutor[Eval] { - self => - def getConnection(rdbms: Rdbms, connectionRef: ConnectionRef[Eval])(implicit M: Monad[Eval]): Eval[Either[Throwable, Connection]] = - for { - cachedConnection <- connectionRef.get(()).map(flattenCached) - connection <- cachedConnection match { - case Right(conn) => - for { - closed <- Eval.now(conn.isClosed) - result <- if (!closed) conn.asRight[Throwable].pure[Eval] - else - for { - newConn <- Eval.now { - Either.catchNonFatal(DriverManager.getConnection(rdbms.connectionString)) - } - _ <- connectionRef.put((), newConn) - } yield newConn - } yield result - case Left(error) => - Eval.now(error.asLeft[Connection]) - - } - } yield connection - - def execute(query: PreparedStatement): EitherT[Eval, Throwable, ResultSet] = - EitherT(Eval.now(Either.catchNonFatal(query.executeQuery()))) - - def convert(resultSet: ResultSet, names: JsonOutput.PropertyNameMode): EitherT[Eval, Throwable, List[Json]] = - EitherT { - Eval.always { - try { - val buffer = ListBuffer.empty[EitherT[Id, Throwable, Json]] - while (resultSet.next()) - buffer += transform[Id](resultSet, names)(idDbExecutor, Monad[Id]) - val parsedJsons = buffer.result().sequence - resultSet.close() - parsedJsons.value: Either[Throwable, List[Json]] - } catch { - case NonFatal(error) => error.asLeft - } - } - } - - def getMetaData(rs: ResultSet): EitherT[Eval, Throwable, ResultSetMetaData] = - Either.catchNonFatal(rs.getMetaData).toEitherT[Eval] - - def getColumnCount(rsMeta: ResultSetMetaData): EitherT[Eval, Throwable, Int] = - Either.catchNonFatal(rsMeta.getColumnCount).toEitherT[Eval] - - def getColumnLabel(column: Int, rsMeta: ResultSetMetaData): EitherT[Eval, Throwable, String] = - Either.catchNonFatal(rsMeta.getColumnLabel(column)).toEitherT[Eval] - - def getColumnType(column: Int, rsMeta: ResultSetMetaData): EitherT[Eval, Throwable, String] = - Either.catchNonFatal(rsMeta.getColumnClassName(column)).toEitherT[Eval] - - def getColumnValue( - datatype: String, - columnIdx: Int, - rs: ResultSet - ): EitherT[Eval, Throwable, Json] = - Either - .catchNonFatal(rs.getObject(columnIdx)) - .map(Option.apply) - .map { - case Some(any) => JsonOutput.getValue(any, datatype) - case None => Json.Null - } - .toEitherT - } - implicit def idDbExecutor: DbExecutor[Id] = new DbExecutor[Id] { def getConnection(rdbms: Rdbms, connectionRef: ConnectionRef[Id])(implicit M: Monad[Id]): Id[Either[Throwable, Connection]] = diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/Errors.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/Errors.scala index a320ebf26..a9aa5760a 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/Errors.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/Errors.scala @@ -15,7 +15,7 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlque sealed trait SqlQueryEnrichmentError extends Throwable { val message: String override def toString = message - override def getMessage = message + override def getMessage = "SQL Query enrichment: " ++ message } final case class ValueNotFoundException(message: String) extends SqlQueryEnrichmentError final case class JsonPathException(message: String) extends SqlQueryEnrichmentError diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala index 73fc5559b..a625d8a82 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala @@ -10,10 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments -package registry -package sqlquery +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery import scala.collection.immutable.IntMap @@ -21,15 +18,17 @@ import cats.Monad import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.implicits._ +import io.circe._ +import io.circe.generic.semiauto._ + import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} import com.snowplowanalytics.snowplow.badrows.FailureDetails -import io.circe._ -import io.circe.generic.semiauto._ - -import outputs.EnrichedEvent -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.SqlQueryConf +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.{Enrichment, ParseableEnrichment} /** Lets us create an SqlQueryConf from a Json */ object SqlQueryEnrichment extends ParseableEnrichment { @@ -100,7 +99,7 @@ object SqlQueryEnrichment extends ParseableEnrichment { * @param db source DB configuration * @param query string representation of prepared SQL statement * @param output configuration of output context - * @param ttl cache TTL + * @param ttl cache TTL in milliseconds * @param cache actual mutable LRU cache * @param connection initialized DB connection (a mutable single-value cache) */ diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala index f6408ebc4..9c5e28363 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala @@ -10,37 +10,55 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders +package com.snowplowanalytics.snowplow.enrich.common.loaders import java.util.UUID +import scala.collection.JavaConverters._ + import cats.syntax.either._ import cats.syntax.option._ -import com.snowplowanalytics.snowplow.badrows -import com.snowplowanalytics.snowplow.badrows.{FailureDetails, NVP} - import org.apache.http.NameValuePair +import org.apache.http.client.utils.URIBuilder +import org.apache.thrift.TSerializer + import org.joda.time.DateTime +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.snowplow.CollectorPayload.thrift.model1.{CollectorPayload => CollectorPayload1} + +import com.snowplowanalytics.snowplow.badrows.{FailureDetails, NVP, Payload} + /** * The canonical input format for the ETL process: it should be possible to convert any collector * input format to this format, ready for the main, collector-agnostic stage of the ETL. * * Unlike `RawEvent`, where `parameters` contain a single event, - * [[CollectorPayload]]'s `body` can contain a POST payload with multiple events + * [[CollectorPayload]]'s `body` can contain a POST payload with multiple events, + * hence [[CollectorPayload]] with `body` is potentially identical to `List[RawEvent]` + * or [[CollectorPayload]] with `querystring` is identical to single `RawEvent` + * + * @param api collector's endpoint + * @param querystring GET parameters, would be empty for buffered events and most webhooks, + * an actual payload of `body` is empty + * @param contentType derived from HTTP header (should be in `Context`) + * @param body POST body, for buffered events and most webhooks, + * an actual payload if `querystring` is empty + * @param source information to identify the collector + * @param context event's meta-information, some properties can be used to augment payload */ final case class CollectorPayload( api: CollectorPayload.Api, - querystring: List[NameValuePair], // Could be empty in future trackers - contentType: Option[String], // Not always set - body: Option[String], // Not set for GETs + querystring: List[NameValuePair], + contentType: Option[String], + body: Option[String], source: CollectorPayload.Source, context: CollectorPayload.Context ) { - def toBadRowPayload: badrows.Payload.CollectorPayload = - badrows.Payload.CollectorPayload( + def toBadRowPayload: Payload.CollectorPayload = + Payload.CollectorPayload( api.vendor, api.version, querystring.map(nvp => NVP(nvp.getName, Option(nvp.getValue))), @@ -56,12 +74,46 @@ final case class CollectorPayload( context.headers, context.userId ) + + /** + * Cast back to Thrift-generated `CollectorPayload` class, coming from collector + * Reverse of [[ThriftLoader.toCollectorPayload]] + * Used for tests and debugging + */ + def toThrift: CollectorPayload1 = { + // Timestamp must be always set, otherwise long will fallback it to 1970-01-01 + val timestamp: Long = context.timestamp.map(_.getMillis.asInstanceOf[java.lang.Long]).orNull + + new CollectorPayload1(CollectorPayload.IgluUri.toSchemaUri, context.ipAddress.orNull, timestamp, source.encoding, source.name) + .setQuerystring((new URIBuilder).setParameters(querystring.asJava).build().getQuery) + .setHostname(source.hostname.orNull) + .setRefererUri(context.refererUri.orNull) + .setContentType(contentType.orNull) + .setUserAgent(context.useragent.orNull) + .setBody(body.orNull) + .setNetworkUserId(context.userId.map(_.toString).orNull) + .setHeaders(context.headers.asJava) + .setPath(api.toRaw) + } + + /** + * Transform back to array of bytes coming from collector topic + * Used for tests and debugging + */ + def toRaw: Array[Byte] = + CollectorPayload.serializer.serialize(toThrift) } object CollectorPayload { + /** Latest payload SchemaKey */ + val IgluUri: SchemaKey = SchemaKey("com.snowplowanalytics.snowplow", "CollectorPayload", "thrift", SchemaVer.Full(1, 0, 0)) + /** * Unambiguously identifies the collector source of this input line. + * @param name kind and version of the collector (e.g. ssc-1.0.1-kafka) + * @param encoding usually "UTF-8" + * @param hostname the actual host the collector was running on */ final case class Source( name: String, @@ -69,26 +121,41 @@ object CollectorPayload { hostname: Option[String] ) - /** Context derived by the collector. */ + /** + * Information *derived* by the collector to be used as meta-data (meta-payload) + * Everything else in [[CollectorPayload]] is directly payload (body and queryparams) + * @param timestamp collector_tstamp (not optional in fact) + * @param ipAddress client's IP address, can be later overwritten by `ip` param in + * `enrichments.Transform` + * @param useragent UA header, can be later overwritten by `ua` param in `entichments.Transform` + * @param refererUri extracted from corresponding HTTP header + * @param headers all headers, including UA and referer URI + * @param userId generated by collector-set third-party cookie + */ final case class Context( - timestamp: Option[DateTime], // Must have a timestamp + timestamp: Option[DateTime], ipAddress: Option[String], useragent: Option[String], refererUri: Option[String], - headers: List[String], // Could be empty - userId: Option[UUID] // User ID generated by collector-set third-party cookie + headers: List[String], + userId: Option[UUID] ) - /** Define the vendor and version of the payload. */ - final case class Api(vendor: String, version: String) + /** + * Define the vendor and version of the payload, defined by collector endpoint + * Coming from [[com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry]] + */ + final case class Api(vendor: String, version: String) { - // Defaults for the tracker vendor and version before we implemented this into Snowplow. - // TODO: make private once the ThriftLoader is updated - val SnowplowTp1 = Api("com.snowplowanalytics.snowplow", "tp1") + /** Reverse back to collector's endpoint */ + def toRaw: String = if (this == SnowplowTp1) "/i" else s"$vendor/$version" + } + + /** Defaults for the tracker vendor and version before we implemented this into Snowplow */ + val SnowplowTp1: Api = Api("com.snowplowanalytics.snowplow", "tp1") // To extract the API vendor and version from the the path to the requested object. - // TODO: move this to somewhere not specific to this collector - private val ApiPathRegex = """^[\/]?([^\/]+)\/([^\/]+)[\/]?$""".r + private val ApiPathRegex = """^[/]?([^/]+)/([^/]+)[/]?$""".r /** * Parses the requested URI path to determine the specific API version this payload follows. @@ -115,4 +182,7 @@ object CollectorPayload { path.startsWith("/ice.png") || // Legacy name for /i path.equals("/i") || // Legacy name for /com.snowplowanalytics.snowplow/tp1 path.startsWith("/i?") + + /** Thrift serializer, used for tests and debugging with `toThrift` */ + private[loaders] lazy val serializer = new TSerializer() } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala index c3456b7d5..71479efec 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala @@ -41,7 +41,7 @@ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, ParseError = object ThriftLoader extends Loader[Array[Byte]] { private val thriftDeserializer = new TDeserializer - private val ExpectedSchema = + private[loaders] val ExpectedSchema = SchemaCriterion("com.snowplowanalytics.snowplow", "CollectorPayload", "thrift", 1, 0) /** Parse Error -> Collector Payload violation */ @@ -133,7 +133,7 @@ object ThriftLoader extends Loader[Array[Byte]] { val headers = Option(collectorPayload.headers).map(_.asScala.toList).getOrElse(Nil) - val ip = IpAddressExtractor.extractIpAddress(headers, collectorPayload.ipAddress).some // Required + val ip = Option(IpAddressExtractor.extractIpAddress(headers, collectorPayload.ipAddress)) // Required val api = Option(collectorPayload.path) match { case None => @@ -196,7 +196,7 @@ object ThriftLoader extends Loader[Array[Byte]] { val headers = Option(snowplowRawEvent.headers).map(_.asScala.toList).getOrElse(Nil) - val ip = IpAddressExtractor.extractIpAddress(headers, snowplowRawEvent.ipAddress).some // Required + val ip = Option(IpAddressExtractor.extractIpAddress(headers, snowplowRawEvent.ipAddress)) // Required (querystring.toValidatedNel, networkUserId).mapN { (q, nuid) => val timestamp = Some(new DateTime(snowplowRawEvent.timestamp, DateTimeZone.UTC)) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/CirceUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/CirceUtils.scala index fabf5e819..9d5818bd0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/CirceUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/CirceUtils.scala @@ -15,8 +15,9 @@ package utils import cats.data.Validated import cats.syntax.either._ - +import com.fasterxml.jackson.databind.ObjectMapper import io.circe._ +import io.circe.jackson.enrich.CirceJsonModule object CirceUtils { @@ -43,4 +44,19 @@ object CirceUtils { s"Could not extract $pathStr as $clas from supplied JSON due to ${e.getMessage}" } } + + /** + * A custom ObjectMapper specific to Circe JSON AST + * + * The only difference from the original mapper `io.circe.jackson.mapper` is + * how `Long` is deserialized. The original mapper maps a `Long` to `JsonBigDecimal` + * whereas this custom mapper deserializes a `Long` to `JsonLong`. + * + * This customization saves Snowplow events from failing when derived contexts are + * validated post-enrichment. If output schema of API Request Enrichment has an integer + * field, `JsonBigDecimal` representation of a Long results in a bad row + * with message `number found, integer expected` in Iglu Scala Client, since jackson + * treats `DecimalNode` as number in all cases. + */ + final val mapper: ObjectMapper = (new ObjectMapper).registerModule(CirceJsonModule) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala index c114fdf89..728108240 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala @@ -465,16 +465,16 @@ object ConversionUtils { def booleanToJByte(bool: Boolean): JByte = (if (bool) 1 else 0).toByte - def parseUrlEncodedForm(s: String): Either[String, Map[String, String]] = + def parseUrlEncodedForm(s: String): Either[String, Map[String, Option[String]]] = for { r <- Either .catchNonFatal(URLEncodedUtils.parse(URI.create("http://localhost/?" + s), UTF_8)) .leftMap(_.getMessage) nvps = r.asScala.toList - pairs = nvps.map(p => p.getName() -> p.getValue()) + pairs = nvps.map(p => p.getName() -> Option(p.getValue())) } yield pairs.toMap /** Extract valid IP (v4 or v6) address from a string */ def extractInetAddress(arg: String): Option[InetAddress] = - Either.catchNonFatal(new HostName(arg).toInetAddress).toOption + Option(new HostName(arg).asInetAddress) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala index 9bce3385b..773dff448 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala @@ -14,7 +14,7 @@ package com.snowplowanalytics.snowplow.enrich.common.utils import scala.util.control.NonFatal -import cats.{Eval, Id} +import cats.Id import cats.effect.Sync import cats.syntax.either._ import scalaj.http._ @@ -32,12 +32,6 @@ object HttpClient { Sync[F].delay(getBody(request)) } - implicit def evalHttpClient: HttpClient[Eval] = - new HttpClient[Eval] { - override def getResponse(request: HttpRequest): Eval[Either[Throwable, String]] = - Eval.later(getBody(request)) - } - implicit def idHttpClient: HttpClient[Id] = new HttpClient[Id] { override def getResponse(request: HttpRequest): Id[Either[Throwable, String]] = getBody(request) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala index e8c1db21d..b445c1a7c 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala @@ -24,10 +24,9 @@ import java.time.Instant import com.snowplowanalytics.iglu.client.{Client, ClientError} import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup -import com.snowplowanalytics.iglu.core.circe.instances._ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} +import com.snowplowanalytics.iglu.core.circe.implicits._ -import com.snowplowanalytics.snowplow.badrows.FailureDetails import com.snowplowanalytics.snowplow.badrows._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent @@ -281,7 +280,7 @@ object IgluUtils { pee: Payload.PartiallyEnrichedEvent, re: Payload.RawEvent, processor: Processor - ) = + ): BadRow.SchemaViolations = BadRow.SchemaViolations( processor, Failure.SchemaViolations(Instant.now(), vs), diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonPath.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonPath.scala index fe5dcce26..9eef3e623 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonPath.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonPath.scala @@ -77,7 +77,7 @@ object JsonPath { * @param any raw JVM type representing JSON * @return Json */ - private def anyToJson(any: Any): Json = + private[utils] def anyToJson(any: Any): Json = if (any == null) Json.Null - else io.circe.jackson.mapper.convertValue(any, classOf[Json]) + else CirceUtils.mapper.convertValue(any, classOf[Json]) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala index 9bc32a1bc..0f124451d 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala @@ -117,19 +117,19 @@ object JsonUtils { */ def toJson( key: String, - value: String, + value: Option[String], bools: List[String], ints: List[String], dateTimes: DateTimeFields ): (String, Json) = { val v = (value, dateTimes) match { - case (null, _) => Json.Null - case ("", _) => Json.Null - case _ if bools.contains(key) => booleanToJson(value) - case _ if ints.contains(key) => integerToJson(value) - case (_, Some((nel, fmt))) if nel.toList.contains(key) => - Json.fromString(toJsonSchemaDateTime(value, fmt)) - case _ => Json.fromString(value) + case (Some(""), _) => Json.Null + case (None, _) => Json.Null + case (Some(bool), _) if bools.contains(key) => booleanToJson(bool) + case (Some(nb), _) if ints.contains(key) => integerToJson(nb) + case (Some(datetime), Some((nel, fmt))) if nel.toList.contains(key) => + Json.fromString(toJsonSchemaDateTime(datetime, fmt)) + case (Some(str), _) => Json.fromString(str) } (key, v) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/package.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/package.scala index 7366b2ac4..617a7c1b3 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/package.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/package.scala @@ -24,7 +24,7 @@ package object common { type EnrichmentMap = Map[String, Enrichment] /** Parameters inside of a raw event */ - type RawEventParameters = Map[String, String] + type RawEventParameters = Map[String, Option[String]] /** Parameters extracted from query string */ type QueryStringParameters = List[(String, Option[String])] diff --git a/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonDeserializer.scala b/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonDeserializer.scala new file mode 100644 index 000000000..1c8edcfdc --- /dev/null +++ b/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonDeserializer.scala @@ -0,0 +1,89 @@ +package io.circe.jackson.enrich + +import java.util + +import com.fasterxml.jackson.core.{JsonParser, JsonTokenId} +import com.fasterxml.jackson.databind.{DeserializationContext, JsonDeserializer} +import io.circe.jackson.{DeserializerContext, JacksonCompat, ReadingList, ReadingMap} +import io.circe.{Json, JsonBigDecimal, JsonLong} + +import scala.annotation.{switch, tailrec} +import scala.collection.JavaConverters._ + +private[jackson] final class CirceJsonDeserializer(klass: Class[_]) extends JsonDeserializer[Object] with JacksonCompat { + override def isCachable: Boolean = true + + override def deserialize(jp: JsonParser, ctxt: DeserializationContext): Json = { + val value = deserialize(jp, ctxt, List()) + if (!klass.isAssignableFrom(value.getClass)) handleUnexpectedToken(ctxt)(klass, jp) + + value + } + + @tailrec + def deserialize( + jp: JsonParser, + ctxt: DeserializationContext, + parserContext: List[DeserializerContext] + ): Json = { + if (jp.getCurrentToken == null) jp.nextToken() + + val (maybeValue, nextContext) = (jp.getCurrentToken.id(): @switch) match { + case JsonTokenId.ID_NUMBER_INT => (Some(Json.JNumber(JsonLong(jp.getLongValue))), parserContext) + case JsonTokenId.ID_NUMBER_FLOAT => (Some(Json.JNumber(JsonBigDecimal(jp.getDecimalValue))), parserContext) + case JsonTokenId.ID_STRING => (Some(Json.JString(jp.getText)), parserContext) + case JsonTokenId.ID_TRUE => (Some(Json.JBoolean(true)), parserContext) + case JsonTokenId.ID_FALSE => (Some(Json.JBoolean(false)), parserContext) + case JsonTokenId.ID_NULL => (Some(Json.JNull), parserContext) + case JsonTokenId.ID_START_ARRAY => (None, ReadingList(new util.ArrayList) +: parserContext) + + case JsonTokenId.ID_END_ARRAY => + parserContext match { + case ReadingList(content) :: stack => + (Some(Json.fromValues(content.asScala)), stack) + case _ => throw new IllegalStateException("Jackson read ']' but parser context is not an array") + } + + case JsonTokenId.ID_START_OBJECT => (None, ReadingMap(new util.ArrayList) +: parserContext) + + case JsonTokenId.ID_FIELD_NAME => + parserContext match { + case (c: ReadingMap) :: stack => (None, c.setField(jp.getCurrentName) +: stack) + case _ => + throw new IllegalStateException("Jackson read a String field name but parser context is not a json object") + } + + case JsonTokenId.ID_END_OBJECT => + parserContext match { + case ReadingMap(content) :: stack => + ( + Some(Json.fromFields(content.asScala)), + stack + ) + case _ => throw new IllegalStateException("Jackson read '}' but parser context is not a json object") + } + + case JsonTokenId.ID_NOT_AVAILABLE => + throw new IllegalStateException("Jackson can't return the json token yet") + + case JsonTokenId.ID_EMBEDDED_OBJECT => + throw new IllegalStateException("Jackson read embedded object but json object was expected") + } + + maybeValue match { + case Some(v) if nextContext.isEmpty => v + case maybeValue => + jp.nextToken() + val toPass = maybeValue + .map { v => + val previous :: stack = nextContext + previous.addValue(v) +: stack + } + .getOrElse(nextContext) + + deserialize(jp, ctxt, toPass) + } + } + + override def getNullValue = Json.JNull +} diff --git a/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonModule.scala b/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonModule.scala new file mode 100644 index 000000000..7a2f4d3a3 --- /dev/null +++ b/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonModule.scala @@ -0,0 +1,46 @@ +package io.circe.jackson.enrich + +import com.fasterxml.jackson.core.Version +import com.fasterxml.jackson.databind.Module.SetupContext +import com.fasterxml.jackson.databind._ +import com.fasterxml.jackson.databind.deser.Deserializers +import com.fasterxml.jackson.databind.module.SimpleModule +import com.fasterxml.jackson.databind.ser.Serializers +import io.circe.Json +import io.circe.jackson.CirceJsonSerializer + +object CirceJsonModule extends SimpleModule("SPCirceJson", Version.unknownVersion()) { + override final def setupModule(context: SetupContext): Unit = { + context.addDeserializers( + new Deserializers.Base { + override final def findBeanDeserializer( + javaType: JavaType, + config: DeserializationConfig, + beanDesc: BeanDescription + ): CirceJsonDeserializer = { + val klass = javaType.getRawClass + if (classOf[Json].isAssignableFrom(klass) || klass == Json.JNull.getClass) + new CirceJsonDeserializer(klass) + else null + } + } + ) + + context.addSerializers( + new Serializers.Base { + override final def findSerializer( + config: SerializationConfig, + javaType: JavaType, + beanDesc: BeanDescription + ): JsonSerializer[Object] = { + val ser: Object = + if (classOf[Json].isAssignableFrom(beanDesc.getBeanClass)) + CirceJsonSerializer + else null + + ser.asInstanceOf[JsonSerializer[Object]] + } + } + ) + } +} diff --git a/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb b/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb index ab8b82d2d..73f81f2fb 100644 Binary files a/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb and b/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb differ diff --git a/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-ISP.mmdb b/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-ISP.mmdb index 24e9e1eae..c06ee50d0 100644 Binary files a/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-ISP.mmdb and b/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-ISP.mmdb differ diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 index 087d4e6cd..18dd216f5 100644 --- a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 @@ -14,6 +14,9 @@ }, "emailAddress2": { "type": "string" + }, + "emailAddress3": { + "type": "string" } }, "required": ["emailAddress", "emailAddress2"], diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 new file mode 100644 index 000000000..eca4ca19d --- /dev/null +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 @@ -0,0 +1,24 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for acme stuff", + "self": { + "vendor": "com.acme", + "name": "email_sent", + "format": "jsonschema", + "version": "1-1-0" + }, + "type": "object", + "properties": { + "emailAddress": { + "type": "string" + }, + "emailAddress2": { + "type": "string" + }, + "emailAddress3": { + "type": ["string", "null"] + } + }, + "required": ["emailAddress", "emailAddress2"], + "additionalProperties": false +} diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 new file mode 100644 index 000000000..b2310754d --- /dev/null +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 @@ -0,0 +1,39 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema to test scrambling of array in PII enrichment", + "self": { + "vendor": "com.test", + "name": "array", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "field": { + "type": "array", + "items": { + "type": ["string", "null" ] + } + }, + "field2": { + "type": ["string", "null"] + }, + "field3": { + "type": ["object", "null"], + "properties": { + "a": { + "type": "string" + }, + "b": { + "type": "string" + } + } + }, + "field4": { + "type": "string", + "maxLength": 64 + } + }, + "required": ["field"], + "additionalProperties": false +} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala index 2d8527e2f..7e71e3b46 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala @@ -22,6 +22,7 @@ import com.snowplowanalytics.iglu.client.resolver.registries.Registry import com.snowplowanalytics.iglu.client.validator.CirceValidator import com.snowplowanalytics.snowplow.badrows.Processor +import com.snowplowanalytics.snowplow.badrows.BadRow import org.apache.thrift.TSerializer @@ -44,6 +45,8 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { def is = s2""" EtlPipeline should always produce either bad or good row for each event of the payload $e1 Processing of events with malformed query string should be supported $e2 + Processing of invalid CollectorPayload (CPFormatViolation bad row) should be supported $e3 + Absence of CollectorPayload (None) should be supported $e4 """ val adapterRegistry = new AdapterRegistry() @@ -88,6 +91,33 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { case res => ko(s"[$res] doesn't contain one enriched event") } } + + def e3 = { + val invalidCollectorPayload = ThriftLoader.toCollectorPayload(Array(1.toByte), processor) + EtlPipeline.processEvents[Id]( + adapterRegistry, + enrichmentReg, + client, + processor, + dateTime, + invalidCollectorPayload + ) must be like { + case Validated.Invalid(_: BadRow.CPFormatViolation) :: Nil => ok + case other => ko(s"One invalid CPFormatViolation expected, got ${other}") + } + } + + def e4 = { + val collectorPayload: Option[CollectorPayload] = None + EtlPipeline.processEvents[Id]( + adapterRegistry, + enrichmentReg, + client, + processor, + dateTime, + collectorPayload.validNel[BadRow] + ) must beEqualTo(Nil) + } } object EtlPipelineSpec { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala index d0f147d5d..5df90e241 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala @@ -14,14 +14,19 @@ package com.snowplowanalytics.snowplow.enrich.common import cats.Id import cats.implicits._ + import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.core.SelfDescribingData -import com.snowplowanalytics.iglu.core.circe.instances._ +import com.snowplowanalytics.iglu.core.circe.implicits._ + import com.snowplowanalytics.lrumap.CreateLruMap._ + import io.circe.Json import io.circe.literal._ + import org.apache.http.NameValuePair import org.apache.http.message.BasicNameValuePair + import com.snowplowanalytics.snowplow.enrich.common.utils.JsonUtils object SpecHelpers { @@ -98,4 +103,8 @@ object SpecHelpers { .leftMap(err => s"Can't parse [$rawJson] as Json, error: [$err]") .flatMap(SelfDescribingData.parse[Json]) .leftMap(err => s"Can't parse Json [$rawJson] as as SelfDescribingData, error: [$err]") + + implicit class MapOps[A, B](underlying: Map[A, B]) { + def toOpt: Map[A, Option[B]] = underlying.map { case (a, b) => (a, Option(b)) } + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/AdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/AdapterSpec.scala index 6328a83a8..3fb7dbc7a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/AdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/AdapterSpec.scala @@ -84,13 +84,13 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { def e1 = { val pairs = toNameValuePairs("a" -> "1", "b" -> "2", "c" -> "3") - BaseAdapter.toMap(pairs) must_== Map("a" -> "1", "b" -> "2", "c" -> "3") + BaseAdapter.toMap(pairs) must_== Map("a" -> "1", "b" -> "2", "c" -> "3").toOpt } def e2 = { val params = BaseAdapter.toUnstructEventParams( "tv", - Map[String, String](), + Map.empty[String, Option[String]], SchemaKey("com.acme", "foo", "jsonschema", SchemaVer.Full(1, 0, 1)), _ => Json.fromJsonObject(JsonObject.empty), "app" @@ -100,7 +100,7 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { "e" -> "ue", "p" -> "app", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/foo/jsonschema/1-0-1","data":{}}}""" - ) + ).toOpt } def e3 = { @@ -112,7 +112,7 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { "eid" -> "321", "ttm" -> "2015-11-13T16:31:52.393Z", "url" -> "http://localhost" - ) + ).toOpt val params = BaseAdapter.toUnstructEventParams( "tv", shared, @@ -127,7 +127,7 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { "ttm" -> "2015-11-13T16:31:52.393Z", "url" -> "http://localhost", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/foo/jsonschema/1-0-1","data":{}}}""" - ) + ).toOpt } def e4 = { @@ -168,7 +168,7 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { def e7 = { val rawEvent = RawEvent( Shared.api, - Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv"), + Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv").toOpt, Shared.contentType.some, Shared.cljSource, Shared.context @@ -195,7 +195,7 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { def e8 = { val rawEvent = RawEvent( Shared.api, - Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv"), + Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv").toOpt, Shared.contentType.some, Shared.cljSource, Shared.context @@ -205,21 +205,21 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { val expected = NonEmptyList.of( RawEvent( Shared.api, - Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv"), + Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv").toOpt, Shared.contentType.some, Shared.cljSource, Shared.context ), RawEvent( Shared.api, - Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv"), + Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv").toOpt, Shared.contentType.some, Shared.cljSource, Shared.context ), RawEvent( Shared.api, - Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv"), + Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv").toOpt, Shared.contentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala index 702213bde..b063077eb 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala @@ -52,8 +52,8 @@ class CallrailAdapterSpec extends Specification with DataTables with ValidatedMa "tv" -> "com.callrail-v1", "e" -> "ue", "cv" -> "clj-0.6.0-tom-0.0.4" - ) - val static = staticNoPlatform + ("p" -> "srv") + ).toOpt + val static = staticNoPlatform ++ Map("p" -> "srv").toOpt } def e1 = { @@ -148,7 +148,7 @@ class CallrailAdapterSpec extends Specification with DataTables with ValidatedMa NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson, "nuid" -> "-"), + Expected.static ++ Map("ue_pr" -> expectedJson, "nuid" -> "-").toOpt, None, Shared.source, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala index aac2f9a7a..0ebf505e0 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with ValidatedMatchers { val processor = Processor("CloudfrontAccessLogAdapterSpec", "v1") @@ -72,10 +74,10 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with "tv" -> "com.amazon.aws.cloudfront/wd_access_log", "e" -> "ue", "url" -> url - ) + ).toOpt val static = staticNoPlatform ++ Map( "p" -> "srv" - ) + ).toOpt } def e1 = { @@ -115,7 +117,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -166,7 +168,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -220,7 +222,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -275,7 +277,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -334,7 +336,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -394,7 +396,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -456,7 +458,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala index 2fd29dd21..2eb9f0785 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala @@ -29,6 +29,8 @@ import loaders._ import GoogleAnalyticsAdapter._ import utils.Clock._ +import SpecHelpers._ + class GoogleAnalyticsAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" @@ -66,7 +68,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali "tv" -> "com.google.analytics.measurement-protocol-v1", "e" -> "ue", "p" -> "srv" - ) + ).toOpt val hitContext = (hitType: String) => s""" |{ @@ -137,7 +139,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", |"data":[${hitContext("pageview")}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedJson, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedJson, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -167,7 +169,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"protocolVersion":"version"} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -193,7 +195,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"ipOverride":"ip"} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO, "ip" -> "ip") + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO, "ip" -> "ip").toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -229,7 +231,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali "ti_pr" -> "12.228", "ti_qu" -> "12", "ti_nm" -> "name" - ) + ).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -254,7 +256,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"documentHostName":"host"} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -287,7 +289,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali "co" -> expectedCO, "tr_cu" -> "EUR", "tr_id" -> "tr" - ) + ).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -315,7 +317,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"productIndex":42,"sku":"s","listIndex":12} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -340,7 +342,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"value":"dim","index":12} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -374,7 +376,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"sku":"s2","index":2} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -402,7 +404,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"index":12,"id":"id"} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -427,7 +429,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", |"data":[${hitContext("pageview")}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedJson, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedJson, "co" -> expectedCO).toOpt val event = RawEvent(api, expectedParams, None, source, context) actual must beValid(NonEmptyList.of(event, event)) } @@ -467,7 +469,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali "ue_pr" -> expectedJson, "co" -> expectedCO, "ti_cu" -> "EUR" - ) + ).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala index 460b98f3e..0b886b59b 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala @@ -26,6 +26,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class HubSpotAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" payloadBodyToEvents must return a Success list of event JSON's from a valid payload body $e1 @@ -91,7 +93,7 @@ class HubSpotAdapterSpec extends Specification with DataTables with ValidatedMat "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.hubspot/contact_creation/jsonschema/1-0-0","data":{"eventId":1,"subscriptionId":25458,"portalId":4737818,"occurredAt":"2018-10-10T04:23:19.845Z","attemptNumber":0,"objectId":123,"changeSource":"CRM","changeFlag":"NEW","appId":177698}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala index 2c776553d..623b0aa11 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class IgluAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents should return a NEL containing one RawEvent if the CloudFront querystring is minimally populated $e1 @@ -63,10 +65,10 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche val staticNoPlatform = Map( "tv" -> "com.snowplowanalytics.iglu-v1", "e" -> "ue" - ) + ).toOpt val static = staticNoPlatform ++ Map( "p" -> "app" - ) + ).toOpt } def e1 = { @@ -102,7 +104,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.cfSource, Shared.context @@ -142,7 +144,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Map( "ue_pr" -> json, "aid" -> "webhooks" - ) + ).toOpt } actual must beValid( @@ -187,7 +189,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "aid" -> "my webhook project", "cv" -> "clj-0.5.0-tom-0.0.4", "nuid" -> "" - ) + ).toOpt } actual must beValid( @@ -223,7 +225,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche NonEmptyList.one( RawEvent( Shared.api, - Expected.staticNoPlatform ++ Map("p" -> "mob", "ue_pr" -> expectedJson), + Expected.staticNoPlatform ++ Map("p" -> "mob", "ue_pr" -> expectedJson).toOpt, None, Shared.cfSource, Shared.context @@ -304,7 +306,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "mob", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/campaign/jsonschema/1-0-1","data":{"key":"value","everwets":"processed"}}}""" - ), + ).toOpt, "application/json".some, Shared.cljSource, Shared.context @@ -409,7 +411,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "mob", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/campaign/jsonschema/1-0-1","data":{"some_param":"foo"}}}""" - ), + ).toOpt, "application/json".some, Shared.cljSource, Shared.context @@ -490,7 +492,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/campaign/jsonschema/1-0-1","data":{"some_param":"foo","hello":"world"}}}""" - ), + ).toOpt, "application/x-www-form-urlencoded".some, Shared.cljSource, Shared.context @@ -525,7 +527,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "mob", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/campaign/jsonschema/1-0-1","data":{"key":"value","everwets":"processed"}}}""" - ), + ).toOpt, "application/json".some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala index 88c92c9e2..17c880aa6 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala @@ -10,21 +10,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry import cats.data.NonEmptyList import cats.syntax.option._ + import com.snowplowanalytics.snowplow.badrows._ + import io.circe._ import io.circe.literal._ + import org.joda.time.DateTime + import org.specs2.Specification import org.specs2.matcher.{DataTables, ValidatedMatchers} -import loaders._ -import utils.Clock._ +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.loaders._ +import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ + +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers._ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" @@ -78,7 +84,7 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM val map = Map( "data[merges][LNAME]" -> "Beemster", "data[merges][FNAME]" -> "Joshua" - ) + ).toOpt val expected = List( ("data", json"""{ "merges": { "LNAME": "Beemster" }}"""), ("data", json"""{ "merges": { "FNAME": "Joshua" }}""") @@ -104,14 +110,14 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM def e5 = "SPEC NAME" || "PARAMS" | "EXPECTED OUTPUT" | - "Return Updated Params" !! Map("type" -> "subscribe", "fired_at" -> "2014-10-22 13:50:00") ! Map( + "Return Updated Params" !! Map("type" -> "subscribe", "fired_at" -> "2014-10-22 13:50:00").toOpt ! Map( "type" -> "subscribe", "fired_at" -> "2014-10-22T13:50:00.000Z" - ) | - "Return Same Params" !! Map("type" -> "subscribe", "id" -> "some_id") ! Map( + ).toOpt | + "Return Same Params" !! Map("type" -> "subscribe", "id" -> "some_id").toOpt ! Map( "type" -> "subscribe", "id" -> "some_id" - ) |> { (_, params, expected) => + ).toOpt |> { (_, params, expected) => val actual = MailchimpAdapter.reformatParameters(params) actual mustEqual expected } @@ -147,7 +153,7 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -188,7 +194,7 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -221,7 +227,7 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -305,7 +311,7 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM "p" -> "srv", "ue_pr" -> expectedJson, "nuid" -> "123" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala index 01209c1a9..fafb668b1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class MailgunAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a Success Nel if every event 'delivered' in the payload is successful $e1 @@ -90,7 +92,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -139,7 +141,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -189,7 +191,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -239,7 +241,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -286,7 +288,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, Some("multipart/form-data; boundary=353d603f-eede-4b49-97ac-724fbc54ea3c"), Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala index 0996ccfca..30456c6cc 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala @@ -25,6 +25,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class MandrillAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" payloadBodyToEvents must return a Success List[JValue] for a valid events string $e1 @@ -110,7 +112,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_sent/jsonschema/1-0-0","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"sent","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[]},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -122,7 +124,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_delayed/jsonschema/1-0-1","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"deferred","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa1","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","smtp_events":[{"size":0,"destination_ip":"127.0.0.1","diag":"451 4.3.5 Temporarily unavailable, try again later.","ts":"2013-04-04T21:31:51.000Z","source_ip":"127.0.0.1","type":"deferred"}],"clicks":[],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[]},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa1","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -134,7 +136,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_bounced/jsonschema/1-0-1","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"bounced","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa2","tags":["webhook-example"],"diag":"smtp;550 5.1.1 The email account that you tried to reach does not exist. Please try double-checking the recipient's email address for typos or unnecessary spaces.","ts":"2013-04-04T21:13:19.000Z","metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","bounce_description":"bad_mailbox","bgtools_code":10},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa2","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -146,7 +148,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_soft_bounced/jsonschema/1-0-1","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"soft-bounced","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa3","tags":["webhook-example"],"diag":"smtp;552 5.2.2 Over Quota","ts":"2013-04-04T21:13:19.000Z","metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","bounce_description":"mailbox_full","bgtools_code":22},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa3","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -158,7 +160,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_opened/jsonschema/1-0-1","data":{"ip":"127.0.0.1","location":{"city":"Oklahoma City","latitude":35.4675598145,"timezone":"-05:00","country":"United States","longitude":-97.5164337158,"country_short":"US","postal_code":"73101","region":"Oklahoma"},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa4","ts":"2014-11-06T09:49:26.000Z","user_agent":"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.1.8) Gecko/20100317 Postbox/1.1.3","msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"sent","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa4","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[{"ts":"2013-04-04T21:31:51.000Z","url":"http://mandrill.com"}],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[{"ts":"2013-04-04T21:31:51.000Z"}]},"user_agent_parsed":{"os_company_url":"http://www.apple.com/","os_family":"OS X","os_company":"Apple Computer, Inc.","os_url":"http://www.apple.com/osx/","ua_url":"http://www.postbox-inc.com/","ua_icon":"http://cdn.mandrill.com/img/email-client-icons/postbox.png","ua_version":"1.1.3","os_name":"OS X 10.6 Snow Leopard","ua_company":"Postbox, Inc.","ua_family":"Postbox","os_icon":"http://cdn.mandrill.com/img/email-client-icons/macosx.png","ua_company_url":"http://www.postbox-inc.com/","ua_name":"Postbox 1.1.3","type":"Email Client","mobile":false}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -170,7 +172,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_clicked/jsonschema/1-0-1","data":{"ip":"127.0.0.1","location":{"city":"Oklahoma City","latitude":35.4675598145,"timezone":"-05:00","country":"United States","longitude":-97.5164337158,"country_short":"US","postal_code":"73101","region":"Oklahoma"},"url":"http://mandrill.com","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa5","ts":"2014-11-06T09:49:26.000Z","user_agent":"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.1.8) Gecko/20100317 Postbox/1.1.3","msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"sent","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa5","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[{"ts":"2013-04-04T21:31:51.000Z","url":"http://mandrill.com"}],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[{"ts":"2013-04-04T21:31:51.000Z"}]},"user_agent_parsed":{"os_company_url":"http://www.apple.com/","os_family":"OS X","os_company":"Apple Computer, Inc.","os_url":"http://www.apple.com/osx/","ua_url":"http://www.postbox-inc.com/","ua_icon":"http://cdn.mandrill.com/img/email-client-icons/postbox.png","ua_version":"1.1.3","os_name":"OS X 10.6 Snow Leopard","ua_company":"Postbox, Inc.","ua_family":"Postbox","os_icon":"http://cdn.mandrill.com/img/email-client-icons/macosx.png","ua_company_url":"http://www.postbox-inc.com/","ua_name":"Postbox 1.1.3","type":"Email Client","mobile":false}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -182,7 +184,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_marked_as_spam/jsonschema/1-0-1","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"sent","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa6","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[{"ts":"2013-04-04T21:31:51.000Z","url":"http://mandrill.com"}],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[{"ts":"2013-04-04T21:31:51.000Z"}]},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa6","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -194,7 +196,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/recipient_unsubscribed/jsonschema/1-0-1","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"sent","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa7","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[{"ts":"2013-04-04T21:31:51.000Z","url":"http://mandrill.com"}],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[{"ts":"2013-04-04T21:31:51.000Z"}]},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa7","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -206,7 +208,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_rejected/jsonschema/1-0-0","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"rejected","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa8","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[]},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa8","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala index fa753f28d..4c848777a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class MarketoAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a success for a valid "event" type payload body being passed $e1 @@ -64,7 +66,7 @@ class MarketoAdapterSpec extends Specification with DataTables with ValidatedMat "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.marketo/event/jsonschema/2-0-0","data":{"lead":{"first_name":"the hulk","acquisition_date":"2010-11-11T11:11:11.000Z","black_listed":false,"last_interesting_moment_date":"2018-09-26T20:26:40.000Z","created_at":"2018-06-16T11:23:58.000Z","updated_at":""},"name":"webhook for A","step":6,"campaign":{"id":987,"name":"triggered event"},"datetime":"2018-03-07T14:28:16.000Z","company":{"name":"iron man","notes":"the something dog leapt over the lazy fox"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala index bb24ed646..d50431194 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala @@ -27,6 +27,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a Success Nel if the transcript event in the payload is successful $e1 @@ -134,7 +136,7 @@ class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatch val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.olark-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.olark-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -198,7 +200,7 @@ class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatch val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.olark-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.olark-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala index 61573c9d5..c0436b40c 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala @@ -25,6 +25,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class PagerdutyAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" reformatParameters must return an updated JSON whereby all null Strings have been replaced by null $e1 @@ -127,7 +129,7 @@ class PagerdutyAdapterSpec extends Specification with DataTables with ValidatedM "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.pagerduty/incident/jsonschema/1-0-0","data":{"type":"trigger","data":{"incident":{"assigned_to_user":{"id":"P9L426X","name":"Yali Sassoon","email":"yali@snowplowanalytics.com","html_url":"https://snowplow.pagerduty.com/users/P9L426X"},"incident_key":"srv01/HTTP","trigger_summary_data":{"description":"FAILURE for production/HTTP on machine srv01.acme.com","client":"Sample Monitoring Service","client_url":"https://monitoring.service.com"},"last_status_change_by":null,"incident_number":139,"service":{"id":"PTHO4FF","name":"Webhooks Test","html_url":"https://snowplow.pagerduty.com/services/PTHO4FF","deleted_at":null},"trigger_details_html_url":"https://snowplow.pagerduty.com/incidents/P9WY9U9/log_entries/P5AWPTR","id":"P9WY9U9","assigned_to":[{"at":"2014-11-12T18:53:47Z","object":{"id":"P9L426X","name":"Yali Sassoon","email":"yali@snowplowanalytics.com","html_url":"https://snowplow.pagerduty.com/users/P9L426X","type":"user"}}],"number_of_escalations":0,"last_status_change_on":"2014-11-12T18:53:47Z","status":"triggered","escalation_policy":{"id":"P8ETVHU","name":"Default","deleted_at":null},"created_on":"2014-11-12T18:53:47+00:00","trigger_type":"trigger_svc_event","html_url":"https://snowplow.pagerduty.com/incidents/P9WY9U9"}},"id":"3c3e8ee0-6a9d-11e4-b3d5-22000ae31361","created_on":"2014-11-12T18:53:47Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala index 2272bf37a..535e0404a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala @@ -29,6 +29,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class PingdomAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" reformatParameters should return either an updated JSON without the 'action' field or the same JSON $e1 @@ -84,7 +86,7 @@ class PingdomAdapterSpec extends Specification with DataTables with ValidatedMat "e" -> "ue", "p" -> "apps", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.pingdom/incident_assign/jsonschema/1-0-0","data":{"check":"1421338","checkname":"Webhooks_Test","host":"7eef51c2.ngrok.com","incidentid":3,"description":"down"}}}""" - ), + ).toOpt, None, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala index 4ca4ef5da..605af85f1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala @@ -42,6 +42,8 @@ import org.specs2.specification.BeforeAfter import loaders.CollectorPayload import utils.Clock._ +import SpecHelpers._ + class RemoteAdapterSpec extends Specification with ValidatedMatchers { def is = @@ -189,7 +191,7 @@ class RemoteAdapterSpec extends Specification with ValidatedMatchers { "e" -> "ue", "p" -> mockPlatform, "ue_pr" -> s"""{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:$mockSchemaVendor/$mockSchemaName/$mockSchemaFormat/$mockSchemaVersion","data":$evtJson}}""" - ), + ).toOpt, None, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala index 0e59562e2..7d4156550 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.mutable.Specification import loaders._ import utils.Clock._ +import SpecHelpers._ + class SendgridAdapterSpec extends Specification with ValidatedMatchers { object Shared { val api = CollectorPayload.Api("com.sendgrid", "v3") @@ -459,7 +461,7 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson // NB this includes removing the "event" keypair as redundant - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala index a26af86ab..02f518144 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class StatusGatorAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a Success Nel if every event in the payload is successful $e1 @@ -80,7 +82,7 @@ class StatusGatorAdapterSpec extends Specification with DataTables with Validate val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.statusgator-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.statusgator-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala index 12f2ae232..f9164d046 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a Success Nel if the query string is valid $e1 @@ -109,7 +111,7 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.unbounce-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.unbounce-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala index d88228647..5d4fdd9ca 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala @@ -113,7 +113,7 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { correctType must be equalTo (Right("CLOSE")) val items = actual.toList.head.toList - val sentSchema = parse(items.head.parameters("ue_pr")) + val sentSchema = parse(items.head.parameters("ue_pr").getOrElse("{}")) .leftMap(_.getMessage) .flatMap(_.hcursor.downField("data").get[String]("schema").leftMap(_.getMessage)) sentSchema must beRight("""iglu:com.urbanairship.connect/CLOSE/jsonschema/1-0-0""") @@ -193,7 +193,7 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { actual match { case Validated.Valid(successes) => val event = successes.head - parse(event.parameters("ue_pr")) must beRight(expectedUnstructEventJson) + parse(event.parameters("ue_pr").getOrElse("{}")) must beRight(expectedUnstructEventJson) case _ => ko("payload was not accepted") } } @@ -203,7 +203,7 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { case Validated.Valid(successes) => val event = successes.head // "occurred" field value in ms past epoch (2015-11-13T16:31:52.393Z) - event.parameters("ttm") must beEqualTo("1447432312393") + event.parameters("ttm") must beEqualTo(Some("1447432312393")) case _ => ko("payload was not populated") } } @@ -213,7 +213,7 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { case Validated.Valid(successes) => val event = successes.head // id field value - event.parameters("eid") must beEqualTo("e3314efb-9058-dbaf-c4bb-b754fca73613") + event.parameters("eid") must beEqualTo(Some("e3314efb-9058-dbaf-c4bb-b754fca73613")) case _ => ko("payload was not populated") } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala index 980b19b84..e4de9aa76 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class VeroAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a success for a valid "sent" type payload body being passed $e1 @@ -72,7 +74,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/sent/jsonschema/1-0-0","data":{"event":{"name":"Test event","triggered_at":"2015-02-15T14:57:18.000Z"},"sent_at":"2015-06-22T23:37:18.000Z","campaign":{"name":"Order confirmation","permalink":"http://app.getvero.com/view/1/341d64944577ac1f70f560e37db54a25","subject":"Your order is being processed","tags":"tag 1, tag 2","variation":"Variation A","trigger-event":"purchased item","id":987,"type":"transactional"},"user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -100,7 +102,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/delivered/jsonschema/1-0-0","data":{"event":{"name":"Test event","triggered_at":"2015-02-15T14:57:18.000Z"},"campaign":{"name":"Order confirmation","permalink":"http://app.getvero.com/view/1/341d64944577ac1f70f560e37db54a25","subject":"Your order is being processed","tags":"tag 1, tag 2","variation":"Variation A","trigger-event":"purchased item","id":987,"type":"transactional"},"delivered_at":"2015-06-22T23:37:18.000Z","message_id":"20130920062934.21270.53268@vero.com","sender_ip":"127.0.0.1","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -128,7 +130,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/opened/jsonschema/1-0-0","data":{"opened_at":"2015-06-22T23:37:18.000Z","event":{"name":"Test event","triggered_at":"2015-02-15T14:57:18.000Z"},"campaign":{"name":"Order confirmation","permalink":"http://app.getvero.com/view/1/341d64944577ac1f70f560e37db54a25","subject":"Your order is being processed","tags":"tag 1, tag 2","variation":"Variation A","trigger-event":"purchased item","id":987,"type":"transactional"},"user_agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)","message_id":"20130920062934.21270.53268@vero.com","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -156,7 +158,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/clicked/jsonschema/1-0-0","data":{"clicked_at":"2015-06-22T23:37:18.000Z","event":{"name":"Test event","triggered_at":"2015-02-15T14:57:18.000Z"},"campaign":{"name":"Order confirmation","permalink":"http://app.getvero.com/view/1/341d64944577ac1f70f560e37db54a25","subject":"Your order is being processed","tags":"tag 1, tag 2","variation":"Variation A","trigger-event":"purchased item","id":987,"type":"transactional"},"user_agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)","message_id":"20130920062934.21270.53268@vero.com","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -184,7 +186,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/bounced/jsonschema/1-0-0","data":{"bounce_message":"521 5.2.1 : AOL will not accept delivery of this message.","event":{"name":"Test event","triggered_at":"2015-02-15T14:57:18.000Z"},"bounced_at":"2015-06-22T23:37:18.000Z","campaign":{"name":"Order confirmation","permalink":"http://app.getvero.com/view/1/341d64944577ac1f70f560e37db54a25","subject":"Your order is being processed","variation":"Variation A","trigger-event":"purchased item","id":987,"type":"transactional"},"message_id":"20130920062934.21270.53268@vero.com","bounce_type":"hard","bounce_code":"521","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -212,7 +214,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/unsubscribed/jsonschema/1-0-0","data":{"unsubscribed_at":"2015-06-22T23:37:18.000Z","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -240,7 +242,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/created/jsonschema/1-0-0","data":{"role":"Bot","firstname":"Steve","company":"Vero","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -268,7 +270,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/updated/jsonschema/1-0-0","data":{"user":{"id":123,"email":"steve@getvero.com"},"changes":{"tags":{"add":["active-customer"],"remove":["unactive-180-days"]}}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -303,7 +305,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.getvero-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.getvero-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala index 3726c1a4a..5764127f3 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala @@ -35,6 +35,8 @@ import loaders._ import utils.{ConversionUtils => CU} import utils.Clock._ +import SpecHelpers._ + class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMatchers with ScalaCheck { def is = s2""" Tp1.toRawEvents should return a NEL containing one RawEvent if the querystring is populated $e1 @@ -56,6 +58,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Redirect.toRawEvents should return a Validation Failure if the querystring does not contain a u parameter $e17 Redirect.toRawEvents should return a Validation Failure if the event type is specified and the co JSON is corrupted $e18 Redirect.toRawEvents should return a Validation Failure if the event type is specified and the cx Base64 is corrupted $e19 + Redirect.toRawEvents should return a Validation Failure if the URI is null (&u param without a value) $e20 """ object Snowplow { @@ -93,7 +96,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa val actual = Tp1Adapter.toRawEvents(payload, SpecHelpers.client) actual must beValid( NonEmptyList - .one(RawEvent(Snowplow.Tp1, Map("aid" -> "test"), None, Shared.source, Shared.context)) + .one(RawEvent(Snowplow.Tp1, Map("aid" -> "test").toOpt, None, Shared.source, Shared.context)) ) } @@ -125,7 +128,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa NonEmptyList.one( RawEvent( Snowplow.Tp2, - Map("aid" -> "tp2", "e" -> "se"), + Map("aid" -> "tp2", "e" -> "se").toOpt, None, Shared.source, Shared.context @@ -151,7 +154,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa NonEmptyList.one( RawEvent( Snowplow.Tp2, - Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se"), + Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se").toOpt, ApplicationJsonWithCharset.some, Shared.source, Shared.context @@ -185,9 +188,9 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa ) actual must beValid( NonEmptyList.of( - rawEvent(Map("tv" -> "0", "p" -> "1", "e" -> "1", "nuid" -> "123")), - rawEvent(Map("tv" -> "0", "p" -> "2", "e" -> "2", "nuid" -> "123")), - rawEvent(Map("tv" -> "0", "p" -> "3", "e" -> "3", "nuid" -> "123")) + rawEvent(Map("tv" -> "0", "p" -> "1", "e" -> "1", "nuid" -> "123").toOpt), + rawEvent(Map("tv" -> "0", "p" -> "2", "e" -> "2", "nuid" -> "123").toOpt), + rawEvent(Map("tv" -> "0", "p" -> "3", "e" -> "3", "nuid" -> "123").toOpt) ) ) } @@ -208,7 +211,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa NonEmptyList.one( RawEvent( Snowplow.Tp2, - Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se"), + Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se").toOpt, ApplicationJsonWithCapitalCharset.some, Shared.source, Shared.context @@ -461,7 +464,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}}""", "p" -> "web", "cx" -> "dGVzdHRlc3R0ZXN0" - ), + ).toOpt, None, Shared.source, Shared.context @@ -494,7 +497,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa "tv" -> "r-tp2", "co" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}]}""", "p" -> "web" - ), + ).toOpt, None, Shared.source, Shared.context @@ -528,7 +531,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa "tv" -> "r-tp2", "co" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}]}""", "p" -> "web" - ), + ).toOpt, None, Shared.source, Shared.context @@ -560,7 +563,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa "tv" -> "r-tp2", "co" -> """{"data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}},{"data":{"osType":"OSX","appleIdfv":"some_appleIdfv","openIdfa":"some_Idfa","carrier":"some_carrier","deviceModel":"large","osVersion":"3.0.0","appleIdfa":"some_appleIdfa","androidIdfa":"some_androidIdfa","deviceManufacturer":"Amstrad"},"schema":"iglu:com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0"},{"data":{"longitude":10,"bearing":50,"speed":16,"altitude":20,"altitudeAccuracy":0.3,"latitudeLongitudeAccuracy":0.5,"latitude":7},"schema":"iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0"}],"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0"}""", "p" -> "web" - ), + ).toOpt, None, Shared.source, Shared.context @@ -597,7 +600,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa """{"data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}},{"data":{"osType":"OSX","appleIdfv":"some_appleIdfv","openIdfa":"some_Idfa","carrier":"some_carrier","deviceModel":"large","osVersion":"3.0.0","appleIdfa":"some_appleIdfa","androidIdfa":"some_androidIdfa","deviceManufacturer":"Amstrad"},"schema":"iglu:com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0"},{"data":{"longitude":10,"bearing":50,"speed":16,"altitude":20,"altitudeAccuracy":0.3,"latitudeLongitudeAccuracy":0.5,"latitude":7},"schema":"iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0"}],"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0"}""" ), "p" -> "web" - ), + ).toOpt, None, Shared.source, Shared.context @@ -689,4 +692,27 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa ) } + def e20 = { + val payload = CollectorPayload( + Snowplow.Tp2, + SpecHelpers.toNameValuePairs( + "u" -> null, // happens with &u in the query string + "cx" -> "dGVzdHRlc3R0ZXN0" + ), + None, + None, + Shared.source, + Shared.context + ) + val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) + actual must beInvalid( + NonEmptyList.one( + FailureDetails.TrackerProtocolViolation.InputData( + "querystring", + "u=null&cx=dGVzdHRlc3R0ZXN0".some, + "missing `u` parameter: not a valid URI redirect" + ) + ) + ) + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index eb152f316..070f835e1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -17,24 +17,28 @@ package enrichments import cats.Id import cats.implicits._ import cats.data.NonEmptyList - import io.circe.literal._ - import org.joda.time.DateTime - import com.snowplowanalytics.snowplow.badrows._ -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} - +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} import loaders._ import adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii.{ + JsonMutators, + PiiJson, + PiiPseudonymizerEnrichment, + PiiStrategyPseudonymize +} import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent import utils.Clock._ import utils.ConversionUtils import enrichments.registry.{IabEnrichment, JavascriptScriptEnrichment, YauaaEnrichment} - +import org.apache.commons.codec.digest.DigestUtils import org.specs2.mutable.Specification import org.specs2.matcher.EitherMatchers +import SpecHelpers._ + class EnrichmentManagerSpec extends Specification with EitherMatchers { import EnrichmentManagerSpec._ @@ -58,7 +62,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ] } """ - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enriched = EnrichmentManager.enrichEvent( enrichmentReg, @@ -87,11 +91,11 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "data": { "emailAddress": "hello@world.com", "emailAddress2": "foo@bar.org", - "emailAddress3": "foo@bar.org" + "unallowedAdditionalField": "foo@bar.org" } } }""" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enriched = EnrichmentManager.enrichEvent( enrichmentReg, @@ -133,7 +137,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "pp", "tv" -> "js-0.13.1", "p" -> "web" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enriched = EnrichmentManager.enrichEvent( enrichmentReg, @@ -196,7 +200,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "pp", "tv" -> "js-0.13.1", "p" -> "web" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enriched = EnrichmentManager.enrichEvent( enrichmentReg, @@ -255,8 +259,68 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { } } }""" + ).toOpt + val rawEvent = RawEvent(api, parameters, None, source, context) + val enriched = EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent ) + enriched.value must beRight + } + + "emit an EnrichedEvent if a PII value that needs to be hashed is an empty string" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": "" + } + } + }""" + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) val enriched = EnrichmentManager.enrichEvent( enrichmentReg, client, @@ -267,6 +331,254 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { enriched.value must beRight } + "emit an EnrichedEvent if a PII value that needs to be hashed is null" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/2-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": null + } + } + }""" + ).toOpt + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + val enriched = EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beRight + } + + "fail to emit an EnrichedEvent if a PII value that needs to be hashed is an empty object" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": {} + } + } + }""" + ).toOpt + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + val enriched = EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beLeft + } + + "fail to emit an EnrichedEvent if a context PII value that needs to be hashed is an empty object" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": {} + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + }""" + ).toOpt + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + def enriched = + EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beLeft + } + + "fail to emit an EnrichedEvent if a PII value needs to be hashed in both co and ue and is invalid in one of them" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": {} + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": "" + } + } + }""" + ).toOpt + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ), + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + def enriched = + EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beLeft + } + "have a preference of 'ua' query string parameter over user agent of HTTP header" >> { val qs_ua = "Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0" val parameters = Map( @@ -274,7 +586,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "tv" -> "js-0.13.1", "ua" -> qs_ua, "p" -> "web" - ) + ).toOpt val contextWithUa = context.copy(useragent = Some("header-useragent")) val rawEvent = RawEvent(api, parameters, None, source, contextWithUa) val enriched = EnrichmentManager.enrichEvent( @@ -293,7 +605,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "pp", "tv" -> "js-0.13.1", "p" -> "web" - ) + ).toOpt val contextWithUa = context.copy(useragent = Some("header-useragent")) val rawEvent = RawEvent(api, parameters, None, source, contextWithUa) val enriched = EnrichmentManager.enrichEvent( @@ -329,7 +641,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { EnrichmentManager.getIabContext(input, iabEnrichment) must beRight(None) } - "return None if user_ipaddress in invalid" >> { + "return None if user_ipaddress is invalid" >> { val input = new EnrichedEvent() input.setUser_ipaddress("invalid") input.setUseragent("Firefox") @@ -337,6 +649,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { EnrichmentManager.getIabContext(input, iabEnrichment) must beRight(None) } + "return None if user_ipaddress is hostname (don't try to resovle it)" >> { + val input = new EnrichedEvent() + input.setUser_ipaddress("localhost") + input.setUseragent("Firefox") + input.setDerived_tstamp("2010-06-30 01:20:01.000") + EnrichmentManager.getIabContext(input, iabEnrichment) must beRight(None) + } + "return Some if all arguments are valid" >> { val input = new EnrichedEvent() input.setUser_ipaddress("127.0.0.1") @@ -345,6 +665,27 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { EnrichmentManager.getIabContext(input, iabEnrichment) must beRight.like { case ctx => ctx must beSome } } } + + "getCollectorVersionSet" should { + "return an enrichment failure if v_collector is null or empty" >> { + val input = new EnrichedEvent() + EnrichmentManager.getCollectorVersionSet(input) must beLeft.like { + case _: FailureDetails.EnrichmentFailure => ok + case other => ko(s"expected EnrichmentFailure but got $other") + } + input.v_collector = "" + EnrichmentManager.getCollectorVersionSet(input) must beLeft.like { + case _: FailureDetails.EnrichmentFailure => ok + case other => ko(s"expected EnrichmentFailure but got $other") + } + } + + "return Unit if v_collector is set" >> { + val input = new EnrichedEvent() + input.v_collector = "v42" + EnrichmentManager.getCollectorVersionSet(input) must beRight(()) + } + } } object EnrichmentManagerSpec { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala index f3cafa750..ebc89d353 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala @@ -10,8 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.Id import cats.data.{NonEmptyList, Validated, ValidatedNel} @@ -19,11 +18,15 @@ import cats.implicits._ import com.snowplowanalytics.forex.CreateForex._ import com.snowplowanalytics.forex.model._ + import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} -import com.snowplowanalytics.snowplow.badrows._ + +import com.snowplowanalytics.snowplow.badrows.FailureDetails +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.CurrencyConversionConf import org.joda.money.CurrencyUnit import org.joda.time.DateTime + import org.specs2.Specification import org.specs2.matcher.DataTables @@ -42,11 +45,10 @@ class CurrencyConversionEnrichmentSpec extends Specification with DataTables { """ lazy val validAppKey = sys.env - .get(OerApiKey) - .getOrElse( - throw new IllegalStateException( - s"No ${OerApiKey} environment variable found, test should have been skipped" - ) + .getOrElse(OerApiKey, + throw new IllegalStateException( + s"No $OerApiKey environment variable found, test should have been skipped" + ) ) type Result = ValidatedNel[ FailureDetails.EnrichmentFailure, diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala index 0282a4063..7dac5ca0f 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala @@ -14,12 +14,17 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import com.snowplowanalytics.forex.model._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} import io.circe.literal._ import io.circe.parser._ + import org.apache.commons.codec.binary.Base64 import org.joda.money.CurrencyUnit + +import com.snowplowanalytics.forex.model._ +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf._ + import org.specs2.matcher.{DataTables, ValidatedMatchers} import org.specs2.mutable.Specification @@ -42,7 +47,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(1, 0, 1) ) val result = AnonIpEnrichment.parse(ipAnonJson, schemaKey) - result must beValid(AnonIpConf(AnonIPv4Octets(2), AnonIPv6Segments(3))) + result must beValid(AnonIpConf(schemaKey, AnonIPv4Octets(2), AnonIPv6Segments(3))) } "successfully construct an AnonIpEnrichment case class with default value for IPv6" in { @@ -59,7 +64,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(1, 0, 0) ) val result = AnonIpEnrichment.parse(ipAnonJson, schemaKey) - result must beValid(AnonIpConf(AnonIPv4Octets(2), AnonIPv6Segments(2))) + result must beValid(AnonIpConf(schemaKey, AnonIPv4Octets(2), AnonIPv6Segments(2))) } } @@ -85,6 +90,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(2, 0, 0) ) val expected = IpLookupsConf( + schemaKey, Some( ( new URI( @@ -131,6 +137,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(2, 0, 0) ) val expected = RefererParserConf( + schemaKey, ( new URI( "http://snowplow-hosted-assets.s3.amazonaws.com/third-party/referer/referer.json" @@ -141,7 +148,6 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da ) val result = RefererParserEnrichment.parse(refererParserJson, schemaKey, false) result must beValid(expected) - } } @@ -174,6 +180,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(1, 0, 0) ) val expected = CampaignAttributionConf( + schemaKey, List("utm_medium", "medium"), List("utm_source", "source"), List("utm_term"), @@ -331,7 +338,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(1, 0, 0) ) val result = CookieExtractorEnrichment.parse(cookieExtractorEnrichmentJson, schemaKey) - result must beValid(CookieExtractorConf(List("foo", "bar"))) + result must beValid(CookieExtractorConf(schemaKey, List("foo", "bar"))) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EventFingerprintEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EventFingerprintEnrichmentSpec.scala index 256b171f7..bfef628a6 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EventFingerprintEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EventFingerprintEnrichmentSpec.scala @@ -14,6 +14,8 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import org.specs2.Specification +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers._ + class EventFingerprintEnrichmentSpec extends Specification { def is = s2""" getEventFingerprint should combine fields into a hash $e1 @@ -43,7 +45,7 @@ class EventFingerprintEnrichmentSpec extends Specification { "stm" -> "1000000000000", "e" -> "se", "se_ac" -> "buy" - ) + ).toOpt ) must_== "15" } @@ -53,14 +55,14 @@ class EventFingerprintEnrichmentSpec extends Specification { "se_ac" -> "action", "se_ca" -> "category", "se_pr" -> "property" - ) + ).toOpt val permutedVersion = Map( "se_ca" -> "category", "se_ac" -> "action", "se_pr" -> "property", "e" -> "se" - ) + ).toOpt standardConfig.getEventFingerprint(permutedVersion) must_== standardConfig.getEventFingerprint( initialVersion @@ -73,12 +75,12 @@ class EventFingerprintEnrichmentSpec extends Specification { "eid" -> "123e4567-e89b-12d3-a456-426655440000", "e" -> "se", "se_ac" -> "buy" - ) + ).toOpt val delayedVersion = Map( "stm" -> "9999999999999", "e" -> "se", "se_ac" -> "buy" - ) + ).toOpt standardConfig.getEventFingerprint(delayedVersion) must_== standardConfig.getEventFingerprint( initialVersion @@ -88,7 +90,7 @@ class EventFingerprintEnrichmentSpec extends Specification { def e4 = { val initialVersion = Map( "prefix" -> "suffix" - ) + ).toOpt standardConfig.getEventFingerprint(initialVersion) should not be standardConfig .getEventFingerprint(initialVersion) @@ -104,7 +106,7 @@ class EventFingerprintEnrichmentSpec extends Specification { val initialVersion = Map( "e" -> "se", "se_ac" -> "action" - ) + ).toOpt sha1Config.getEventFingerprint(initialVersion).length() must_== 40 } @@ -119,7 +121,7 @@ class EventFingerprintEnrichmentSpec extends Specification { val initialVersion = Map( "e" -> "se", "se_ac" -> "action" - ) + ).toOpt sha256Config.getEventFingerprint(initialVersion).length() must_== 64 } @@ -134,7 +136,7 @@ class EventFingerprintEnrichmentSpec extends Specification { val initialVersion = Map( "e" -> "se", "se_ac" -> "action" - ) + ).toOpt sha384Config.getEventFingerprint(initialVersion).length() must_== 96 } @@ -149,7 +151,7 @@ class EventFingerprintEnrichmentSpec extends Specification { val initialVersion = Map( "e" -> "se", "se_ac" -> "action" - ) + ).toOpt sha512Config.getEventFingerprint(initialVersion).length() must_== 128 } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala index 861869b11..cc570ba33 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala @@ -14,7 +14,8 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.InetAddress -import cats.Eval +import cats.Id +import cats.syntax.functor._ import io.circe.literal._ @@ -79,15 +80,14 @@ class IabEnrichmentSpec extends Specification with DataTables { expectedReason, expectedPrimaryImpact ) => - (for { - e <- validConfig.enrichment[Eval] - res = e.performCheck(userAgent, ipAddress, DateTime.now()) - } yield res).value must beRight.like { - case check => - check.spiderOrRobot must_== expectedSpiderOrRobot and - (check.category must_== expectedCategory) and - (check.reason must_== expectedReason) and - (check.primaryImpact must_== expectedPrimaryImpact) + validConfig.enrichment[Id].map { e => + e.performCheck(userAgent, ipAddress, DateTime.now()) must beRight.like { + case check => + check.spiderOrRobot must_== expectedSpiderOrRobot and + (check.category must_== expectedCategory) and + (check.reason must_== expectedReason) and + (check.primaryImpact must_== expectedPrimaryImpact) + } } } @@ -98,9 +98,8 @@ class IabEnrichmentSpec extends Specification with DataTables { json"""{"spiderOrRobot": false, "category": "BROWSER", "reason": "PASSED_ALL", "primaryImpact": "NONE"}""" ) validConfig - .enrichment[Eval] - .map(_.getIabContext("Xdroid", "192.168.0.1".ip, DateTime.now())) - .value must + .enrichment[Id] + .map(_.getIabContext("Xdroid", "192.168.0.1".ip, DateTime.now())) must beRight(responseJson) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala index 4cfdb2b33..7cf9f653f 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala @@ -14,12 +14,15 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import cats.Eval +import cats.Id import cats.data.EitherT import cats.syntax.either._ + +import io.circe.literal._ + import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} import com.snowplowanalytics.refererparser._ -import io.circe.literal._ + import org.specs2.Specification import org.specs2.matcher.DataTables @@ -57,7 +60,7 @@ class RefererParserEnrichmentSpec extends Specification with DataTables { Medium.Unknown ) |> { (_, refererUri, referer) => (for { - c <- EitherT.fromEither[Eval]( + c <- EitherT.fromEither[Id]( RefererParserEnrichment .parse( json"""{ @@ -81,16 +84,16 @@ class RefererParserEnrichmentSpec extends Specification with DataTables { .toEither .leftMap(_.head) ) - e <- c.enrichment[Eval] + e <- c.enrichment[Id] res = e.extractRefererDetails(new URI(refererUri), PageHost) - } yield res).value.value must beRight.like { - case o => o must_== Some(referer) + } yield res).value must beRight.like { + case o => o must beSome(referer) } } def e2 = (for { - c <- EitherT.fromEither[Eval]( + c <- EitherT.fromEither[Id]( RefererParserEnrichment .parse( json"""{ @@ -114,16 +117,16 @@ class RefererParserEnrichmentSpec extends Specification with DataTables { .toEither .leftMap(_.head) ) - e <- c.enrichment[Eval] + e <- c.enrichment[Id] res = e.extractRefererDetails( new URI( "http://www.google.com/search?q=%0Agateway%09oracle%09cards%09denise%09linn&hl=en&client=safari" ), PageHost ) - } yield res).value.value must beRight.like { + } yield res).value must beRight.like { case o => - o must_== Some( + o must beSome( SearchReferer( Medium.Search, "Google", diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala index 030fcf3d6..d51dc85ca 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala @@ -14,13 +14,15 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import cats.Eval +import cats.Id import cats.data.EitherT import io.circe.literal._ import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.UaParserConf + import org.specs2.matcher.DataTables import org.specs2.mutable.Specification @@ -75,10 +77,10 @@ class UaParserEnrichmentSpec extends Specification with DataTables { "Custom Rules" | "Input UserAgent" | "Parsed UserAgent" | Some(badRulefile) !! mobileSafariUserAgent !! "Failed to initialize ua parser" |> { (rules, input, errorPrefix) => (for { - c <- EitherT.rightT[Eval, String](UaParserConf(schemaKey, rules)) - e <- c.enrichment[Eval] + c <- EitherT.rightT[Id, String](UaParserConf(schemaKey, rules)) + e <- c.enrichment[Id] res = e.extractUserAgent(input) - } yield res).value.value must beLeft.like { + } yield res).value must beLeft.like { case a => a must startWith(errorPrefix) } } @@ -90,11 +92,11 @@ class UaParserEnrichmentSpec extends Specification with DataTables { None !! safariUserAgent !! safariJson | Some(customRules) !! mobileSafariUserAgent !! testAgentJson |> { (rules, input, expected) => val json = for { - c <- EitherT.rightT[Eval, String](UaParserConf(schemaKey, rules)) - e <- c.enrichment[Eval].leftMap(_.toString) - res <- EitherT.fromEither[Eval](e.extractUserAgent(input)).leftMap(_.toString) + c <- EitherT.rightT[Id, String](UaParserConf(schemaKey, rules)) + e <- c.enrichment[Id].leftMap(_.toString) + res <- EitherT.fromEither[Id](e.extractUserAgent(input)).leftMap(_.toString) } yield res - json.value.value must beRight(expected) + json.value must beRight(expected) } } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala index 4155b5c8f..a67398c56 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala @@ -14,12 +14,18 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.lang.{Float => JFloat} -import cats.Eval +import cats.Id import cats.data.EitherT -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + import io.circe.generic.auto._ import io.circe.literal._ + import org.joda.time.DateTime + +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.WeatherConf + import org.specs2.Specification object WeatherEnrichmentSpec { @@ -43,11 +49,10 @@ class WeatherEnrichmentSpec extends Specification { val schemaKey = SchemaKey("vendor", "name", "format", SchemaVer.Full(1, 0, 0)) lazy val validAppKey = sys.env - .get(OwmApiKey) - .getOrElse( - throw new IllegalStateException( - s"No $OwmApiKey environment variable found, test should have been skipped" - ) + .getOrElse(OwmApiKey, + throw new IllegalStateException( + s"No $OwmApiKey environment variable found, test should have been skipped" + ) ) object invalidEvent { @@ -65,7 +70,7 @@ class WeatherEnrichmentSpec extends Specification { def e1 = { val res = for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", "KEY", 10, 5200, 1) - .enrichment[Eval] + .enrichment[Id] stamp <- EitherT( enr.getWeatherContext( Option(invalidEvent.lat), @@ -74,7 +79,7 @@ class WeatherEnrichmentSpec extends Specification { ) ).leftMap(_.head.toString) } yield stamp - res.value.value must beLeft.like { + res.value must beLeft.like { case e => e must contain("InputData(derived_tstamp,None,missing)") } @@ -83,7 +88,7 @@ class WeatherEnrichmentSpec extends Specification { def e2 = { val res = for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", validAppKey, 10, 5200, 1) - .enrichment[Eval] + .enrichment[Id] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -92,13 +97,13 @@ class WeatherEnrichmentSpec extends Specification { ) ).leftMap(_.head.toString) } yield stamp - res.value.value must beRight + res.value must beRight } def e3 = { val res = for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", "KEY", 10, 5200, 1) - .enrichment[Eval] + .enrichment[Id] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -107,13 +112,13 @@ class WeatherEnrichmentSpec extends Specification { ) ).leftMap(_.head.toString) } yield stamp - res.value.value must beLeft.like { case e => e must contain("Check your API key") } + res.value must beLeft.like { case e => e must contain("Check your API key") } } def e4 = { val res = for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", validAppKey, 15, 5200, 1) - .enrichment[Eval] + .enrichment[Id] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -122,7 +127,7 @@ class WeatherEnrichmentSpec extends Specification { ) ).leftMap(_.head.toString) } yield stamp - res.value.value must beRight.like { + res.value must beRight.like { case weather => val temp = weather.data.hcursor.downField("main").get[Double]("humidity") temp must beRight(69.0d) @@ -164,7 +169,7 @@ class WeatherEnrichmentSpec extends Specification { def e6 = { val res = for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", validAppKey, 15, 2, 1) - .enrichment[Eval] + .enrichment[Id] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -173,7 +178,7 @@ class WeatherEnrichmentSpec extends Specification { ) ).leftMap(_.head.toString) } yield stamp - res.value.value must beRight.like { // successful request + res.value must beRight.like { // successful request case weather => weather.data.hcursor.as[TransformedWeather] must beRight.like { case w => diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala index 11275d358..fef14842f 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala @@ -10,16 +10,12 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments -package registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} - -import io.circe.parser._ import io.circe.literal._ import nl.basjes.parse.useragent.UserAgent +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} import org.specs2.matcher.ValidatedMatchers import org.specs2.mutable.Specification @@ -28,6 +24,7 @@ class YauaaEnrichmentSpec extends Specification with ValidatedMatchers { import YauaaEnrichment.decapitalize + /** Default enrichment with 1-0-0 context */ val yauaaEnrichment = YauaaEnrichment(None) // Devices @@ -68,11 +65,11 @@ class YauaaEnrichmentSpec extends Specification with ValidatedMatchers { "YAUAA enrichment should" >> { "return default value for null" >> { - yauaaEnrichment.parseUserAgent(null) shouldEqual yauaaEnrichment.defaultResult + yauaaEnrichment.parseUserAgent(null) shouldEqual YauaaEnrichment.DefaultResult } "return default value for empty user agent" >> { - yauaaEnrichment.parseUserAgent("") shouldEqual yauaaEnrichment.defaultResult + yauaaEnrichment.parseUserAgent("") shouldEqual YauaaEnrichment.DefaultResult } "detect correctly DeviceClass" >> { @@ -185,18 +182,41 @@ class YauaaEnrichmentSpec extends Specification with ValidatedMatchers { ) } - "create a JSON with the schema and the data" >> { + "create a JSON with the schema 1-0-0 and the data" >> { val expected = SelfDescribingData( - yauaaEnrichment.outputSchema, - json"""{"deviceBrand":"Samsung","deviceName":"Samsung SM-G960F","layoutEngineNameVersion":"Blink 62.0","operatingSystemNameVersion":"Android 8.0.0","operatingSystemVersionBuild":"R16NW","layoutEngineNameVersionMajor":"Blink 62","operatingSystemName":"Android","agentVersionMajor":"62","layoutEngineVersionMajor":"62","deviceClass":"Phone","agentNameVersionMajor":"Chrome 62","operatingSystemClass":"Mobile","layoutEngineName":"Blink","agentName":"Chrome","agentVersion":"62.0.3202.84","layoutEngineClass":"Browser","agentNameVersion":"Chrome 62.0.3202.84","operatingSystemVersion":"8.0.0","agentClass":"Browser","layoutEngineVersion":"62.0"}""" + YauaaEnrichment.outputSchema, + json"""{ + "deviceBrand":"Samsung", + "deviceName":"Samsung SM-G960F", + "layoutEngineNameVersion":"Blink 62.0", + "operatingSystemNameVersion":"Android 8.0.0", + "operatingSystemVersionBuild":"R16NW", + "layoutEngineNameVersionMajor":"Blink 62", + "operatingSystemName":"Android", + "agentVersionMajor":"62", + "layoutEngineVersionMajor":"62", + "deviceClass":"Phone", + "agentNameVersionMajor":"Chrome 62", + "operatingSystemClass":"Mobile", + "layoutEngineName":"Blink", + "agentName":"Chrome", + "agentVersion":"62.0.3202.84", + "layoutEngineClass":"Browser", + "agentNameVersion":"Chrome 62.0.3202.84", + "operatingSystemVersion":"8.0.0", + "agentClass":"Browser", + "layoutEngineVersion":"62.0", + "operatingSystemNameVersionMajor":"Android 8", + "operatingSystemVersionMajor":"8" + }""" ) val actual = yauaaEnrichment.getYauaaContext(uaGalaxyS9) actual shouldEqual expected val defaultJson = SelfDescribingData( - yauaaEnrichment.outputSchema, + YauaaEnrichment.outputSchema, json"""{"deviceClass":"Unknown"}""" ) yauaaEnrichment.getYauaaContext("") shouldEqual defaultJson @@ -239,24 +259,22 @@ class YauaaEnrichmentSpec extends Specification with ValidatedMatchers { "successfully construct a YauaaEnrichment case class with the right cache size if specified" in { val cacheSize = 42 - val yauaaConfigJson = parse(s"""{ + val yauaaConfigJson = json"""{ "enabled": true, "parameters": { "cacheSize": $cacheSize } - }""").toOption.get + }""" - val expected = YauaaConf(Some(cacheSize)) + val expected = EnrichmentConf.YauaaConf(schemaKey, Some(cacheSize)) val actual = YauaaEnrichment.parse(yauaaConfigJson, schemaKey) actual must beValid(expected) } "successfully construct a YauaaEnrichment case class with a default cache size if none specified" in { - val yauaaConfigJson = parse(s"""{ - "enabled": true - }""").toOption.get + val yauaaConfigJson = json"""{"enabled": true }""" - val expected = YauaaConf(None) + val expected = EnrichmentConf.YauaaConf(schemaKey, None) val actual = YauaaEnrichment.parse(yauaaConfigJson, schemaKey) actual must beValid(expected) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala index 703f8db17..eaf36ba21 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala @@ -10,9 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry -package apirequest +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest import cats.Id import cats.syntax.either._ @@ -21,16 +19,17 @@ import io.circe.Json import io.circe.literal._ import io.circe.parser._ -import org.specs2.Specification -import org.specs2.matcher.ValidatedMatchers -import org.specs2.mock.Mockito - import scalaj.http.HttpRequest import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} -import outputs.EnrichedEvent -import utils.HttpClient +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.utils.HttpClient + +import org.specs2.Specification +import org.specs2.matcher.ValidatedMatchers +import org.specs2.mock.Mockito class ApiRequestEnrichmentSpec extends Specification with ValidatedMatchers with Mockito { def is = s2""" diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala index 93568490e..9f233f676 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala @@ -10,12 +10,15 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry -package apirequest +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest import cats.Id + import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + import org.specs2.Specification import org.specs2.matcher.ValidatedMatchers import org.specs2.mock.Mockito @@ -49,7 +52,6 @@ class HttpApiSpec extends Specification with ValidatedMatchers with Mockito { request must beSome("http://thishostdoesntexist31337:8123/admin/foo/November+2015/admin") } - // This one uses real actor system def e3 = { val schemaKey = SchemaKey("vendor", "name", "format", SchemaVer.Full(1, 0, 0)) val enrichment = ApiRequestConf( @@ -60,7 +62,7 @@ class HttpApiSpec extends Specification with ValidatedMatchers with Mockito { Cache(1, 1) ).enrichment[Id] - val event = new outputs.EnrichedEvent + val event = new EnrichedEvent val request = enrichment.lookup(event, Nil, Nil, None) request must beInvalid } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala index 887da6e64..96e8758a4 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala @@ -10,9 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry -package apirequest +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest import cats.Id import cats.data.ValidatedNel @@ -22,9 +20,11 @@ import io.circe.literal._ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + import org.specs2.Specification import org.specs2.matcher.ValidatedMatchers -import outputs.EnrichedEvent class InputSpec extends Specification with ValidatedMatchers { def is = s2""" @@ -253,7 +253,7 @@ class InputSpec extends Specification with ValidatedMatchers { List(Output("iglu:someschema", JsonOutput("$").some)), Cache(10, 5) ).enrichment[Id] - val event = new outputs.EnrichedEvent + val event = new EnrichedEvent event.setUser_id("chuwy") // time in true_tstamp won't be found val request = enrichment.lookup(event, Nil, Nil, None) @@ -276,7 +276,7 @@ class InputSpec extends Specification with ValidatedMatchers { json"""{ "somekey": "somevalue" }""" ) - input.pull(new outputs.EnrichedEvent, Nil, List(obj), None) must beValid.like { + input.pull(new EnrichedEvent, Nil, List(obj), None) must beValid.like { case Some(context) => context must beEqualTo(Map("permissive" -> "somevalue")) case None => diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ValidatorSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ValidatorSpec.scala new file mode 100644 index 000000000..1dd0beb89 --- /dev/null +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ValidatorSpec.scala @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012-2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ + +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest + +import com.snowplowanalytics.iglu.client.CirceValidator +import com.snowplowanalytics.snowplow.enrich.common.utils.JsonPath.query +import io.circe.Json +import io.circe.literal.JsonStringContext +import org.specs2.Specification +import org.specs2.matcher.ValidatedMatchers +import org.specs2.specification.core.SpecStructure + +class ValidatorSpec extends Specification with ValidatedMatchers { + override def is: SpecStructure = s2""" + validate integer field using a valid long value (maximum long) $e1 + validate integer field using a valid long value (minimum long) $e2 + validate number field using a positive float value $e3 + validate number field using a negative float value $e4 + validate number field using a negative double value $e5 + validate number field using a positive double value $e6 + invalidate integer field using a positive double value $e7 + """ + + val schema = + json"""{ "type": "object", "properties": { "orderID": { "type": "integer" }, "price": { "type": "number" } }, "additionalProperties": false }""" + + def e1 = + query("$", json"""{"orderID": 9223372036854775807 }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e2 = + query("$", json"""{"orderID": -9223372036854775808 }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e3 = + query("$", json"""{"price": ${Json.fromFloatOrString(88.92f)} }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e4 = + query("$", json"""{"price": ${Json.fromFloatOrString(-34345328.72f)} }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e5 = + query("$", json"""{"price": ${Json.fromDoubleOrString(-34345488.72)} }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e6 = + query("$", json"""{"price": ${Json.fromDoubleOrString(32488.72)} }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e7 = + query("$", json"""{"orderID": ${Json.fromDoubleOrString(32488.72)} }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beLeft +} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala index 60d14e364..32afa53a5 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala @@ -37,6 +37,7 @@ import com.snowplowanalytics.snowplow.badrows.{BadRow, Processor} import com.snowplowanalytics.snowplow.enrich.common.{EtlPipeline, SpecHelpers} import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.IpLookupsEnrichment +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.CampaignAttributionEnrichment import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry import com.snowplowanalytics.snowplow.enrich.common.loaders._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent @@ -55,6 +56,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher Hashing configured JSON fields in POJO should silently ignore unsupported types $e6 Hashing configured JSON and scalar fields in POJO emits a correct pii_transformation event $e7 Hashing configured JSON fields in POJO should not create new fields $e8 + removeAddedFields should remove fields added by PII enrichment $e9 """ def commonSetup(enrichmentReg: EnrichmentRegistry[Id]): List[Validated[BadRow, EnrichedEvent]] = { @@ -76,6 +78,17 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher "uid" -> "john@acme.com", "ip" -> "70.46.123.145", "fp" -> "its_you_again!", + "url" -> "http://foo.bar?utm_term=hello&utm_content=world&msclkid=500&_sp=duid", + "dnuid" -> "gfhdgjfgndf", + "nuid" -> "kuykyfkfykukfuy", + "tr_id" -> "t5465463", + "ti_id" -> "6546b56356b354bbv", + "se_ca" -> "super category", + "se_ac" -> "great action", + "se_la" -> "awesome label", + "se_pr" -> "good property", + "duid" -> "786d1b69-a603-4eb8-9178-fed2a195a1ed", + "sid" -> "87857856-a603-4eb8-9178-fed2a195a1ed", "co" -> """ |{ @@ -100,6 +113,15 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher | "someInt": 1 | }, | "schema": "iglu:com.acme/email_sent/jsonschema/1-1-0" + | }, + | { + | "schema": "iglu:com.test/array/jsonschema/1-0-0", + | "data": { + | "field" : ["hello", "world"], + | "field2" : null, + | "field3": null, + | "field4": "" + | } | } | ] |} @@ -178,19 +200,75 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher IpLookupsEnrichment.parse(js, schemaKey, true).toOption.get.enrichment[Id] } + private val campaignAttributionEnrichment = { + val js = json"""{ + "enabled": true, + "parameters": { + "mapping": "static", + "fields": { + "mktMedium": ["utm_medium"], + "mktSource": ["utm_source"], + "mktTerm": ["utm_term"], + "mktContent": ["utm_content"], + "mktCampaign": ["utm_campaign"] + } + } + }""" + val schemaKey = SchemaKey( + "com.snowplowanalytics.snowplow", + "campaign_attribution", + "jsonschema", + SchemaVer.Full(1, 0, 1) + ) + CampaignAttributionEnrichment.parse(js, schemaKey).toOption.get.enrichment + } + def e1 = { val enrichmentReg = EnrichmentRegistry[Id]( ipLookups = ipEnrichment.some, + campaignAttribution = campaignAttributionEnrichment.some, piiPseudonymizer = PiiPseudonymizerEnrichment( List( PiiScalar(fieldMutator = ScalarMutators("user_id")), PiiScalar( fieldMutator = ScalarMutators("user_ipaddress") ), - PiiScalar(fieldMutator = ScalarMutators("ip_domain")), PiiScalar( fieldMutator = ScalarMutators("user_fingerprint") - ) + ), + PiiScalar( + fieldMutator = ScalarMutators("domain_userid") + ), + PiiScalar( + fieldMutator = ScalarMutators("network_userid") + ), + PiiScalar( + fieldMutator = ScalarMutators("ip_organization") + ), + PiiScalar( + fieldMutator = ScalarMutators("ip_domain") + ), + PiiScalar( + fieldMutator = ScalarMutators("tr_orderid") + ), + PiiScalar( + fieldMutator = ScalarMutators("ti_orderid") + ), + PiiScalar( + fieldMutator = ScalarMutators("mkt_term") + ), + PiiScalar( + fieldMutator = ScalarMutators("mkt_clickid") + ), + PiiScalar( + fieldMutator = ScalarMutators("mkt_content") + ), + PiiScalar(fieldMutator = ScalarMutators("se_category")), + PiiScalar(fieldMutator = ScalarMutators("se_action")), + PiiScalar(fieldMutator = ScalarMutators("se_label")), + PiiScalar(fieldMutator = ScalarMutators("se_property")), + PiiScalar(fieldMutator = ScalarMutators("refr_domain_userid")), + PiiScalar(fieldMutator = ScalarMutators("domain_sessionid")) ), false, PiiStrategyPseudonymize( @@ -203,24 +281,52 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher val output = commonSetup(enrichmentReg) val expected = new EnrichedEvent() expected.app_id = "ads" - expected.user_id = "7d8a4beae5bc9d314600667d2f410918f9af265017a6ade99f60a9c8f3aac6e9" - expected.user_ipaddress = "dd9720903c89ae891ed5c74bb7a9f2f90f6487927ac99afe73b096ad0287f3f5" - expected.ip_domain = null - expected.user_fingerprint = "27abac60dff12792c6088b8d00ce7f25c86b396b8c3740480cd18e21068ecff4" expected.geo_city = null expected.etl_tstamp = "1970-01-18 08:40:00.000" expected.collector_tstamp = "2017-07-14 03:39:39.000" + expected.user_id = "7d8a4beae5bc9d314600667d2f410918f9af265017a6ade99f60a9c8f3aac6e9" + expected.user_ipaddress = "dd9720903c89ae891ed5c74bb7a9f2f90f6487927ac99afe73b096ad0287f3f5" + expected.user_fingerprint = "27abac60dff12792c6088b8d00ce7f25c86b396b8c3740480cd18e21068ecff4" + expected.domain_userid = "e97d86d49b16397e8fd654b32a0ed03cfe3a4d8d867d913620ce08e3ca855d6d" + expected.network_userid = "47453d3c4428207d22005463bb3d945b137f9342d445b7114776e88311bbe648" + expected.ip_organization = "4d5dd7eebeb9d47f9ebff5993502c0380a110c34711ef5062fdb84a563759f3b" + expected.ip_domain = null + expected.tr_orderid = "5139219b15f3d1ab0c5056296cf5246eeb0b934ee5d1c96cb2027e694005bbce" + expected.ti_orderid = "326c0bfc5857f21695406ebd93068341c9f2d975cf00d117479e01e9012e196c" + expected.mkt_term = "b62f3a2475ac957009088f9b8ab77ceb7b4ed7c5a6fd920daa204a1953334acb" + expected.mkt_clickid = "fae3733fa03cdf57d82e89ac63026afd8782d07ba3c918acb415a4343457785f" + expected.mkt_content = "8ad32723b7435cbf535025e519cc94dbf1568e17ced2aeb4b9e7941f6346d7d0" + expected.se_category = "f33daec1ed4cb688f4f1762390735fd78f6a06083f855422a7303ed63707c962" + expected.se_action = "53f3e1ca4a0dccce4a1b2900a6bcfd21b22a0f444253067e2fe022948a0b3be7" + expected.se_label = "b243defc0d3b86333a104fb2b3a2f43371b8d73359c429b9177dfc5bb3840efd" + expected.se_property = "eb19004c52cd4557aacfa0b30035160c417c3a6a5fad44b96f03c9e2bebaf0b3" + expected.refr_domain_userid = "f3e68fd96eaef0cafc1257ec7132b4b3dbae20b1073155531f909999e5da9b2c" + expected.domain_sessionid = "7378a72b0183f456df98453b2ff9ed5685206a67f312edb099dc74aed76e1b34" val size = output.size must_== 1 val validOut = output.head must beValid.like { case enrichedEvent => (enrichedEvent.app_id must_== expected.app_id) and + (enrichedEvent.geo_city must_== expected.geo_city) and + (enrichedEvent.etl_tstamp must_== expected.etl_tstamp) and + (enrichedEvent.collector_tstamp must_== expected.collector_tstamp) and (enrichedEvent.user_id must_== expected.user_id) and (enrichedEvent.user_ipaddress must_== expected.user_ipaddress) and - (enrichedEvent.ip_domain must_== expected.ip_domain) and (enrichedEvent.user_fingerprint must_== expected.user_fingerprint) and - (enrichedEvent.geo_city must_== expected.geo_city) and - (enrichedEvent.etl_tstamp must_== expected.etl_tstamp) and - (enrichedEvent.collector_tstamp must_== expected.collector_tstamp) + (enrichedEvent.domain_userid must_== expected.domain_userid) and + (enrichedEvent.network_userid must_== expected.network_userid) and + (enrichedEvent.ip_organization must_== expected.ip_organization) and + (enrichedEvent.ip_domain must_== expected.ip_domain) and + (enrichedEvent.tr_orderid must_== expected.tr_orderid) and + (enrichedEvent.ti_orderid must_== expected.ti_orderid) and + (enrichedEvent.mkt_term must_== expected.mkt_term) and + (enrichedEvent.mkt_clickid must_== expected.mkt_clickid) and + (enrichedEvent.mkt_content must_== expected.mkt_content) and + (enrichedEvent.se_category must_== expected.se_category) and + (enrichedEvent.se_action must_== expected.se_action) and + (enrichedEvent.se_label must_== expected.se_label) and + (enrichedEvent.se_property must_== expected.se_property) and + (enrichedEvent.refr_domain_userid must_== expected.refr_domain_userid) and + (enrichedEvent.domain_sessionid must_== expected.domain_sessionid) } size and validOut } @@ -240,15 +346,35 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 1, 0), jsonPath = "$.data.emailAddress2" ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field" + ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field2" + ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field3.a" + ), PiiJson( fieldMutator = JsonMutators("unstruct_event"), schemaCriterion = SchemaCriterion("com.mailgun", "message_clicked", "jsonschema", 1, 0, 0), jsonPath = "$.ip" + ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field4" ) ), false, PiiStrategyPseudonymize( - "SHA-256", + "MD5", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) @@ -271,10 +397,10 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor val contextJFirstElement = contextJ.downField("data").downArray val contextJSecondElement = contextJFirstElement.right + val contextJThirdElement = contextJSecondElement.right val unstructEventJ = parse(enrichedEvent.unstruct_event).toOption.get.hcursor .downField("data") .downField("data") - val first = (contextJFirstElement .downField("data") .get[String]("emailAddress") must beRight( @@ -311,7 +437,29 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher )) and (unstructEventJ.get[String]("myVar2") must beRight("awesome")) - first and second + val third = (contextJThirdElement + .downField("data") + .get[List[String]]("field") must + beRight( + List[String]("b62f3a2475ac957009088f9b8ab77ceb7b4ed7c5a6fd920daa204a1953334acb", + "8ad32723b7435cbf535025e519cc94dbf1568e17ced2aeb4b9e7941f6346d7d0" + ) + )) and + (contextJThirdElement + .downField("data") + .downField("field2") + .focus must beSome.like { case json => json.isNull }) and + (contextJThirdElement + .downField("data") + .downField("field3") + .focus must beSome.like { case json => json.isNull }) + + // Test that empty string in Pii field gets hashed + val fourth = contextJThirdElement + .downField("data") + .get[String]("field4") must beRight("7a3477dad66e666bd203b834c54b6dfe8b546bdbc5283462ad14052abfb06600") + + first and second and third and fourth } size and validOut @@ -330,7 +478,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ), false, PiiStrategyPseudonymize( - "SHA-256", + "SHA-384", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) @@ -375,7 +523,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ), false, PiiStrategyPseudonymize( - "SHA-256", + "SHA-512", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) @@ -423,7 +571,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ), false, PiiStrategyPseudonymize( - "SHA-256", + "MD-2", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) @@ -593,30 +741,68 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ).some ) val output = commonSetup(enrichmentReg) - val expected = new EnrichedEvent() - expected.app_id = "ads" - expected.user_id = "john@acme.com" - expected.user_ipaddress = "70.46.123.145" - expected.ip_domain = null - expected.user_fingerprint = "its_you_again!" - expected.geo_city = "Delray Beach" - expected.etl_tstamp = "1970-01-18 08:40:00.000" - expected.collector_tstamp = "2017-07-14 03:39:39.000" val size = output.size must_== 1 val validOut = output.head must beValid.like { case enrichedEvent => - val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") - val firstElem = contextJ.downArray.downField("data") - val secondElem = contextJ.downArray.right.downField("data") + val context = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data").downArray + val data = context.downField("data") - (firstElem.get[String]("emailAddress") must beRight( - "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6" - )) and - (firstElem.downField("data").get[String]("nonExistentEmailAddress") must beLeft) and - (firstElem.get[String]("emailAddress2") must beRight("bob@acme.com")) and - (secondElem.get[String]("emailAddress") must beRight("tim@acme.com")) and - (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) + val one = data.get[String]("emailAddress") must beRight("72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6") + val two = data.get[String]("emailAddress2") must beRight("bob@acme.com") + val three = data.downField("nonExistentEmailAddress").focus must beNone + + one and two and three } size and validOut } + + def e9 = { + val orig = json""" + { + "schema" : "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data" : [ + { + "schema" : "iglu:com.acme/email_sent/jsonschema/1-0-0", + "data" : { + "emailAddress" : "foo@bar.com", + "emailAddress2" : "bob@acme.com" + } + } + ] + } + """ + + val hashed = json""" + { + "schema" : "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data" : [ + { + "schema" : "iglu:com.acme/email_sent/jsonschema/1-0-0", + "data" : { + "emailAddress" : "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6", + "emailAddress2" : "bob@acme.com", + "nonExistentEmailAddress" : {} + } + } + ] + } + """ + + val expected = json""" + { + "schema" : "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data" : [ + { + "schema" : "iglu:com.acme/email_sent/jsonschema/1-0-0", + "data" : { + "emailAddress" : "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6", + "emailAddress2" : "bob@acme.com" + } + } + ] + } + """ + + PiiPseudonymizerEnrichment.removeAddedFields(hashed, orig) must beEqualTo(expected) + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/InputSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/InputSpec.scala index fb299c5da..ccfa2f559 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/InputSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/InputSpec.scala @@ -10,21 +10,21 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry.sqlquery +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery import scala.collection.immutable.IntMap +import io.circe.DecodingFailure import io.circe.literal._ import io.circe.parser._ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + import org.specs2.Specification import org.specs2.matcher.ValidatedMatchers -import outputs.EnrichedEvent - class InputSpec extends Specification with ValidatedMatchers { def is = s2""" create template context from POJO inputs $e1 @@ -39,6 +39,11 @@ class InputSpec extends Specification with ValidatedMatchers { check all EnrichedEvent properties can be handled $e10 extract correct path-dependent values from EnrichedEvent $e11 getBySchemaCriterion should return a data payload $e12 + inputsToIntmap assigns inputs to proper positions $e13 + JSON decoding when missing placeholders $e14 + JSON decoding when placeholder number is lt 1 $e15 + JSON decoding when both pojo and json is provided $e16 + JSON decoding when neither pojo nor json is provided $e17 """ object ContextCase { @@ -302,13 +307,17 @@ class InputSpec extends Specification with ValidatedMatchers { val jsonBool = Input.extractFromJson(json"true") val jsonBigInt = Input.extractFromJson(parse((java.lang.Long.MAX_VALUE - 1).toString).toOption.get) + val jsonDouble = Input.extractFromJson(json"12.6") + val jsonArray = Input.extractFromJson(json"[4,8,16]") val o = jsonObject must beNone val n = jsonNull must beNone val b = jsonBool must beSome(Input.BooleanPlaceholder.Value(true)) val l = jsonBigInt must beSome(Input.LongPlaceholder.Value(java.lang.Long.MAX_VALUE - 1)) + val d = jsonDouble must beSome(Input.DoublePlaceholder.Value(12.6)) + val a = jsonArray must beNone - o.and(n).and(b).and(l) + o.and(n).and(b).and(l).and(d).and(a) } def e11 = { @@ -344,4 +353,56 @@ class InputSpec extends Specification with ValidatedMatchers { result must beSome(ContextCase.overriderContext.data) } + + def e13 = { + val result = Input.inputsToIntmap(List(ContextCase.ccInput, ContextCase.derInput)) + result ==== IntMap(1 -> ContextCase.ccInput, 2 -> ContextCase.derInput) + } + + def e14 = { + val in = + json"""{ + "placeholder_wrong": 1, + "pojo": { + "field": "user_id" + } + }""" + val result = in.as[Input] + result must beLeft(DecodingFailure("Placeholder is missing", Nil)) + } + + def e15 = { + val in = + json"""{ + "placeholder": 0, + "pojo": { + "field": "user_id" + } + }""" + val result = in.as[Input] + result must beLeft(DecodingFailure("Placeholder must be greater than 1", Nil)) + } + + def e16 = { + val in = + json"""{ + "placeholder": 1, + "pojo": { + "field": "user_id" + }, + "json": { + "field": "derived_contexts", + "schemaCriterion": "iglu:org.openweathermap/weather/jsonschema/*-*-*", + "jsonPath": "$$.dt" + } + }""" + val result = in.as[Input] + result must beLeft(DecodingFailure("Either json or pojo input must be specified, both provided", Nil)) + } + + def e17 = { + val in = json"""{"placeholder": 1}""" + val result = in.as[Input] + result must beLeft(DecodingFailure("Either json or pojo input must be specified", Nil)) + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentSpec.scala index ebaa0c714..dc3365a1a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentSpec.scala @@ -10,14 +10,14 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry -package sqlquery +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery import io.circe.parser._ import io.circe.literal._ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.SqlQueryConf import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils import org.specs2.Specification diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala index 0b6da214f..253f0494a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala @@ -12,10 +12,23 @@ */ package com.snowplowanalytics.snowplow.enrich.common.loaders +import cats.syntax.option._ + +import org.apache.http.NameValuePair +import org.apache.http.message.BasicNameValuePair +import org.apache.thrift.TSerializer + +import org.joda.time.{DateTimeZone, LocalDate} + +import org.scalacheck.{Arbitrary, Gen} + +import org.specs2.ScalaCheck import org.specs2.mutable.Specification -import org.specs2.matcher.DataTables +import org.specs2.matcher.{DataTables, ValidatedMatchers} + +import com.snowplowanalytics.snowplow.badrows.Processor -class CollectorPayloadSpec extends Specification with DataTables { +class CollectorPayloadSpec extends Specification with DataTables with ScalaCheck with ValidatedMatchers { // TODO: let's abstract this up to a CollectorApi.parse test // (then we can make isIceRequest private again). @@ -31,4 +44,74 @@ class CollectorPayloadSpec extends Specification with DataTables { } } } + + "toThrift" should { + implicit val arbitraryPayload: Arbitrary[CollectorPayload] = + Arbitrary(CollectorPayloadSpec.collectorPayloadGen) + + "be isomorphic to ThriftLoader.toCollectorPayload" >> { + prop { payload: CollectorPayload => + val bytes = CollectorPayloadSpec.thriftSerializer.serialize(payload.toThrift) + val result = ThriftLoader.toCollectorPayload(bytes, Processor("test", "0.0.1")) + result must beValid(Some(payload)) + } + } + } +} + +object CollectorPayloadSpec { + + val thriftSerializer = new TSerializer() + + val apiGen = Gen.oneOf( + CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp1"), + CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp2"), + CollectorPayload.Api("r", "tp2"), + CollectorPayload.Api("com.snowplowanalytics.iglu", "v1"), + CollectorPayload.Api("com.mailchimp", "v1") + ) + + val nameValuePair = for { + k <- Gen.oneOf("qkey", "key2", "key_3", "key-4", "key 5") + v <- Gen.option(Gen.oneOf("iglu:com.acme/under_score/jsonschema/1-0-3", "foo", "1", "null")) + } yield new BasicNameValuePair(k, v.orNull) + val queryParametersGen: Gen[List[NameValuePair]] = + for { + n <- Gen.chooseNum(0, 4) + list <- Gen.listOfN[NameValuePair](n, nameValuePair) + } yield list + + val contentTypeGen: Gen[String] = Gen.oneOf("text/plain", "application/json", "application/json; encoding=utf-8") + + val source: CollectorPayload.Source = CollectorPayload.Source("host", "UTF-8", "localhost".some) + + val localDateGen: Gen[LocalDate] = Gen.calendar.map(LocalDate.fromCalendarFields) + val ipGen: Gen[String] = for { + part1 <- Gen.choose(2, 255) + part2 <- Gen.choose(0, 255) + part3 <- Gen.choose(0, 255) + part4 <- Gen.choose(0, 255) + } yield s"$part1.$part2.$part3.$part4" + val headerGen: Gen[String] = for { + first <- Gen.asciiPrintableStr.map(_.capitalize) + second <- Gen.option(Gen.asciiPrintableStr.map(_.capitalize)) + key = second.fold(first)(s => s"$first-$s") + value <- Gen.identifier + } yield s"$key: $value" + val contextGen: Gen[CollectorPayload.Context] = for { + timestamp <- localDateGen.map(_.toDateTimeAtStartOfDay(DateTimeZone.UTC)).map(Option.apply) + ip <- Gen.option(ipGen) + userAgent <- Gen.option(Gen.identifier) + headersN <- Gen.chooseNum(0, 8) + headers <- Gen.listOfN(headersN, headerGen) + userId <- Gen.option(Gen.uuid) + } yield CollectorPayload.Context(timestamp, ip, userAgent, None, headers, userId) + + val collectorPayloadGen: Gen[CollectorPayload] = for { + api <- apiGen + kvlist <- queryParametersGen + contentType <- Gen.option(contentTypeGen) + body <- Gen.option(Gen.asciiPrintableStr.suchThat(_.nonEmpty)) + context <- contextGen + } yield CollectorPayload(api, kvlist, contentType, body, source, context) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala index cc67ccd9c..13e8817dd 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala @@ -10,189 +10,261 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders +package com.snowplowanalytics.snowplow.enrich.common.loaders import java.util.UUID import cats.data.NonEmptyList import cats.syntax.option._ -import com.snowplowanalytics.snowplow.badrows._ - import org.apache.commons.codec.binary.Base64 - import org.joda.time.DateTime -import org.specs2.{ScalaCheck, Specification} -import org.specs2.matcher.{DataTables, ValidatedMatchers} +import com.snowplowanalytics.snowplow.badrows.{BadRow, Failure, FailureDetails, Payload, Processor} -import SpecHelpers._ +import org.specs2.ScalaCheck +import org.specs2.mutable.Specification +import org.specs2.matcher.ValidatedMatchers -class ThriftLoaderSpec extends Specification with DataTables with ValidatedMatchers with ScalaCheck { - val Process = Processor("ThriftLoaderSpec", "v1") +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers.toNameValuePairs +import com.snowplowanalytics.snowplow.enrich.common.loaders.ThriftLoaderSpec._ - def is = s2""" - toCollectorPayload should return a CollectorPayload for a valid Thrift CollectorPayload (even if parameterless) $e1 - toCollectorPayload should return a Validation Failure for an invalid or corrupted Thrift CollectorPayload $e2 - """ +class ThriftLoaderSpec extends Specification with ValidatedMatchers with ScalaCheck { + "toCollectorPayload" should { + "tolerate fake tracker protocol GET parameters" >> { + val raw = + "CgABAAABQ5iGqAYLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAkxMjcuMC4wLjEMACkIAAEAAAABCAACAAAAAQsAAwAAABh0ZXN0UGFyYW09MyZ0ZXN0UGFyYW0yPTQACwAtAAAACTEyNy4wLjAuMQsAMgAAAGhNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNg8ARgsAAAAIAAAAL0Nvb2tpZTogc3A9YzVmM2EwOWYtNzVmOC00MzA5LWJlYzUtZmVhNTYwZjc4NDU1AAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAJEFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZSwgc2RjaAAAAHRVc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNgAAAFZBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksIGltYWdlL3dlYnAsICovKjtxPTAuOAAAABhDYWNoZS1Db250cm9sOiBtYXgtYWdlPTAAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAABRIb3N0OiAxMjcuMC4wLjE6ODA4MAsAUAAAACRjNWYzYTA5Zi03NWY4LTQzMDktYmVjNS1mZWE1NjBmNzg0NTUA" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) - object Expected { - val encoding = "UTF-8" - val collector = "ssc-0.0.1-Stdout" // Note we have since fixed -stdout to be lowercase - val api = CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp1") - } + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-16T00:49:58.278+00:00").some, + ipAddress = "127.0.0.1".some, + useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36".some, + refererUri = None, + headers = List( + "Cookie: sp=c5f3a09f-75f8-4309-bec5-fea560f78455", + "Accept-Language: en-US, en", + "Accept-Encoding: gzip, deflate, sdch", + "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36", + "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, image/webp, */*;q=0.8", + "Cache-Control: max-age=0", + "Connection: keep-alive", + "Host: 127.0.0.1:8080" + ), + userId = UUID.fromString("c5f3a09f-75f8-4309-bec5-fea560f78455").some + ) + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs("testParam" -> "3", "testParam2" -> "4"), + body = None, + contentType = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "127.0.0.1".some), + context = context + ) - def e1 = - "SPEC NAME" || "RAW" | "EXP. TIMESTAMP" | "EXP. PAYLOAD" | "EXP. HOSTNAME" | "EXP. IP ADDRESS" | "EXP. USER AGENT" | "EXP. REFERER URI" | "EXP. HEADERS" | "EXP. USER ID" | - "Fake params" !! "CgABAAABQ5iGqAYLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAkxMjcuMC4wLjEMACkIAAEAAAABCAACAAAAAQsAAwAAABh0ZXN0UGFyYW09MyZ0ZXN0UGFyYW0yPTQACwAtAAAACTEyNy4wLjAuMQsAMgAAAGhNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNg8ARgsAAAAIAAAAL0Nvb2tpZTogc3A9YzVmM2EwOWYtNzVmOC00MzA5LWJlYzUtZmVhNTYwZjc4NDU1AAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAJEFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZSwgc2RjaAAAAHRVc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNgAAAFZBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksIGltYWdlL3dlYnAsICovKjtxPTAuOAAAABhDYWNoZS1Db250cm9sOiBtYXgtYWdlPTAAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAABRIb3N0OiAxMjcuMC4wLjE6ODA4MAsAUAAAACRjNWYzYTA5Zi03NWY4LTQzMDktYmVjNS1mZWE1NjBmNzg0NTUA" ! - DateTime.parse("2014-01-16T00:49:58.278+00:00") ! toNameValuePairs( - "testParam" -> "3", - "testParam2" -> "4" - ) ! "127.0.0.1".some ! "127.0.0.1".some ! "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36".some ! None ! List( - "Cookie: sp=c5f3a09f-75f8-4309-bec5-fea560f78455", - "Accept-Language: en-US, en", - "Accept-Encoding: gzip, deflate, sdch", - "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36", - "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, image/webp, */*;q=0.8", - "Cache-Control: max-age=0", - "Connection: keep-alive", - "Host: 127.0.0.1:8080" - ) ! UUID.fromString("c5f3a09f-75f8-4309-bec5-fea560f78455").some | - "Page ping" !! "CgABAAABQ9pNXggLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACZmU9cHAmcGFnZT1Bc3luY2hyb25vdXMrd2Vic2l0ZS93ZWJhcHArZXhhbXBsZXMrZm9yK3Nub3dwbG93LmpzJnBwX21peD0wJnBwX21heD0wJnBwX21peT0wJnBwX21heT0wJmNvPSU3QiUyMnBhZ2UlMjI6JTdCJTIycGFnZV90eXBlJTIyOiUyMnRlc3QlMjIsJTIybGFzdF91cGRhdGVkJHRtcyUyMjoxMzkzMzcyODAwMDAwJTdELCUyMnVzZXIlMjI6JTdCJTIydXNlcl90eXBlJTIyOiUyMnRlc3RlciUyMiU3RCU3RCZkdG09MTM5MDkzNjkzODg1NSZ0aWQ9Nzk3NzQzJnZwPTI1NjB4OTYxJmRzPTI1NjB4OTYxJnZpZD03JmR1aWQ9M2MxNzU3NTQ0ZTM5YmNhNCZwPW1vYiZ0dj1qcy0wLjEzLjEmZnA9MjY5NTkzMDgwMyZhaWQ9Q0ZlMjNhJmxhbmc9ZW4tVVMmY3M9VVRGLTgmdHo9RXVyb3BlL0xvbmRvbiZ1aWQ9YWxleCsxMjMmZl9wZGY9MCZmX3F0PTEmZl9yZWFscD0wJmZfd21hPTAmZl9kaXI9MCZmX2ZsYT0xJmZfamF2YT0wJmZfZ2VhcnM9MCZmX2FnPTAmcmVzPTI1NjB4MTQ0MCZjZD0yNCZjb29raWU9MSZ1cmw9ZmlsZTovL2ZpbGU6Ly8vVXNlcnMvYWxleC9EZXZlbG9wbWVudC9kZXYtZW52aXJvbm1lbnQvZGVtby8xLXRyYWNrZXIvZXZlbnRzLmh0bWwvb3ZlcnJpZGRlbi11cmwvAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAcAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAAnBDb29raWU6IF9fdXRtYT0xMTE4NzIyODEuODc4MDg0NDg3LjEzOTAyMzcxMDcuMTM5MDg0ODQ4Ny4xMzkwOTMxNTIxLjY7IF9fdXRtej0xMTE4NzIyODEuMTM5MDIzNzEwNy4xLjEudXRtY3NyPShkaXJlY3QpfHV0bWNjbj0oZGlyZWN0KXx1dG1jbWQ9KG5vbmUpOyBfc3BfaWQuMWZmZj1iODlhNmZhNjMxZWVmYWMyLjEzOTAyMzcxMDcuNi4xMzkwOTMxNTQ1LjEzOTA4NDg2NDE7IGhibGlkPUNQamp1aHZGMDV6a3RQN0o3TTVWbzNOSUdQTEp5MVNGOyBvbGZzaz1vbGZzazU2MjkyMzYzNTYxNzU1NDsgX191dG1jPTExMTg3MjI4MTsgd2NzaWQ9dU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9va2x2PTEzOTA5MzE1ODU0NDUlMkN1TWxvZzFRSlZEN2p1aEZaN001Vm9CQ3lQUHlpQnlTUzsgX29rPTk3NTItNTAzLTEwLTUyMjc7IF9va2JrPWNkNCUzRHRydWUlMkN2aTUlM0QwJTJDdmk0JTNEMTM5MDkzMTUyMTEyMyUyQ3ZpMyUzRGFjdGl2ZSUyQ3ZpMiUzRGZhbHNlJTJDdmkxJTNEZmFsc2UlMkNjZDglM0RjaGF0JTJDY2Q2JTNEMCUyQ2NkNSUzRGF3YXklMkNjZDMlM0RmYWxzZSUyQ2NkMiUzRDAlMkNjZDElM0QwJTJDOyBzcD03NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQAAAAeQWNjZXB0LUVuY29kaW5nOiBnemlwLCBkZWZsYXRlAAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAK0FjY2VwdDogaW1hZ2UvcG5nLCBpbWFnZS8qO3E9MC44LCAqLyo7cT0wLjUAAABdVXNlci1BZ2VudDogTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wAAAAFEhvc3Q6IGxvY2FsaG9zdDo0MDAxCwBQAAAAJDc1YTEzNTgzLTVjOTktNDBlMy04MWZjLTU0MTA4NGRmYzc4NAA=" ! - DateTime.parse("2014-01-28T19:22:20.040+00:00") ! toNameValuePairs( - "e" -> "pp", - "page" -> "Asynchronous website/webapp examples for snowplow.js", - "pp_mix" -> "0", - "pp_max" -> "0", - "pp_miy" -> "0", - "pp_may" -> "0", - "co" -> """{"page":{"page_type":"test","last_updated$tms":1393372800000},"user":{"user_type":"tester"}}""", - "dtm" -> "1390936938855", - "tid" -> "797743", - "vp" -> "2560x961", - "ds" -> "2560x961", - "vid" -> "7", - "duid" -> "3c1757544e39bca4", - "p" -> "mob", - "tv" -> "js-0.13.1", - "fp" -> "2695930803", - "aid" -> "CFe23a", - "lang" -> "en-US", - "cs" -> "UTF-8", - "tz" -> "Europe/London", - "uid" -> "alex 123", - "f_pdf" -> "0", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "0", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "0", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "2560x1440", - "cd" -> "24", - "cookie" -> "1", - "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" - ) ! "localhost".some ! "10.0.2.2".some ! "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some ! None ! List( - "Connection: keep-alive", - "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", - "Accept-Encoding: gzip, deflate", - "Accept-Language: en-US, en", - "Accept: image/png, image/*;q=0.8, */*;q=0.5", - "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Host: localhost:4001" - ) ! UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some | - "Unstructured event" !! "CgABAAABQ9qNGa4LABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACeWU9dWUmdWVfbmE9Vmlld2VkK1Byb2R1Y3QmdWVfcHI9JTdCJTIycHJvZHVjdF9pZCUyMjolMjJBU08wMTA0MyUyMiwlMjJjYXRlZ29yeSUyMjolMjJEcmVzc2VzJTIyLCUyMmJyYW5kJTIyOiUyMkFDTUUlMjIsJTIycmV0dXJuaW5nJTIyOnRydWUsJTIycHJpY2UlMjI6NDkuOTUsJTIyc2l6ZXMlMjI6JTVCJTIyeHMlMjIsJTIycyUyMiwlMjJsJTIyLCUyMnhsJTIyLCUyMnh4bCUyMiU1RCwlMjJhdmFpbGFibGVfc2luY2UkZHQlMjI6MTU4MDElN0QmZHRtPTEzOTA5NDExMTUyNjMmdGlkPTY0NzYxNSZ2cD0yNTYweDk2MSZkcz0yNTYweDk2MSZ2aWQ9OCZkdWlkPTNjMTc1NzU0NGUzOWJjYTQmcD1tb2ImdHY9anMtMC4xMy4xJmZwPTI2OTU5MzA4MDMmYWlkPUNGZTIzYSZsYW5nPWVuLVVTJmNzPVVURi04JnR6PUV1cm9wZS9Mb25kb24mdWlkPWFsZXgrMTIzJmZfcGRmPTAmZl9xdD0xJmZfcmVhbHA9MCZmX3dtYT0wJmZfZGlyPTAmZl9mbGE9MSZmX2phdmE9MCZmX2dlYXJzPTAmZl9hZz0wJnJlcz0yNTYweDE0NDAmY2Q9MjQmY29va2llPTEmdXJsPWZpbGU6Ly9maWxlOi8vL1VzZXJzL2FsZXgvRGV2ZWxvcG1lbnQvZGV2LWVudmlyb25tZW50L2RlbW8vMS10cmFja2VyL2V2ZW50cy5odG1sL292ZXJyaWRkZW4tdXJsLwALAC0AAAAJbG9jYWxob3N0CwAyAAAAUU1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMA8ARgsAAAAHAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAACtBY2NlcHQ6IGltYWdlL3BuZywgaW1hZ2UvKjtxPTAuOCwgKi8qO3E9MC41AAAAXVVzZXItQWdlbnQ6IE1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMAAAABRIb3N0OiBsb2NhbGhvc3Q6NDAwMQsAUAAAACQ3NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQA" ! - DateTime.parse("2014-01-28T20:31:56.846+00:00") ! toNameValuePairs( - "e" -> "ue", - "ue_na" -> "Viewed Product", - "ue_pr" -> """{"product_id":"ASO01043","category":"Dresses","brand":"ACME","returning":true,"price":49.95,"sizes":["xs","s","l","xl","xxl"],"available_since$dt":15801}""", - "dtm" -> "1390941115263", - "tid" -> "647615", - "vp" -> "2560x961", - "ds" -> "2560x961", - "vid" -> "8", - "duid" -> "3c1757544e39bca4", - "p" -> "mob", - "tv" -> "js-0.13.1", - "fp" -> "2695930803", - "aid" -> "CFe23a", - "lang" -> "en-US", - "cs" -> "UTF-8", - "tz" -> "Europe/London", - "uid" -> "alex 123", - "f_pdf" -> "0", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "0", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "0", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "2560x1440", - "cd" -> "24", - "cookie" -> "1", - "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" - ) ! "localhost".some ! "10.0.2.2".some ! "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some ! None ! List( - "Connection: keep-alive", - "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", - "Accept-Encoding: gzip, deflate", - "Accept-Language: en-US, en", - "Accept: image/png, image/*;q=0.8, */*;q=0.5", - "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Host: localhost:4001" - ) ! UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some | - "Parameterless" !! "CgABAAABQ9o8zYULABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAgAAAAYQ2FjaGUtQ29udHJvbDogbWF4LWFnZT0wAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAAEpBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksICovKjtxPTAuOAAAAF1Vc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoTWFjaW50b3NoOyBJbnRlbCBNYWMgT1MgWCAxMC45OyBydjoyNi4wKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzI2LjAAAAAUSG9zdDogbG9jYWxob3N0OjQwMDELAFAAAAAkNzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AA==" ! - DateTime.parse( - "2014-01-28T19:04:14.469+00:00" - ) ! toNameValuePairs() ! "localhost".some ! "10.0.2.2".some ! "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some ! None ! List( - "Cache-Control: max-age=0", - "Connection: keep-alive", - "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", - "Accept-Encoding: gzip, deflate", - "Accept-Language: en-US, en", - "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8", - "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Host: localhost:4001" - ) ! UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some |> { - - (_, raw, timestamp, payload, hostname, ipAddress, userAgent, refererUri, headers, userId) => - val canonicalEvent = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), Process) - - val expected = CollectorPayload( - api = Expected.api, - querystring = payload, - body = None, - contentType = None, - source = CollectorPayload.Source(Expected.collector, Expected.encoding, hostname), - context = CollectorPayload - .Context(timestamp.some, ipAddress, userAgent, refererUri, headers, userId) - ) - - canonicalEvent must beValid(expected.some) + result must beValid(expected.some) } - val msg = - "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" + "parse valid page ping GET payload" >> { + val raw = + "CgABAAABQ9pNXggLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACZmU9cHAmcGFnZT1Bc3luY2hyb25vdXMrd2Vic2l0ZS93ZWJhcHArZXhhbXBsZXMrZm9yK3Nub3dwbG93LmpzJnBwX21peD0wJnBwX21heD0wJnBwX21peT0wJnBwX21heT0wJmNvPSU3QiUyMnBhZ2UlMjI6JTdCJTIycGFnZV90eXBlJTIyOiUyMnRlc3QlMjIsJTIybGFzdF91cGRhdGVkJHRtcyUyMjoxMzkzMzcyODAwMDAwJTdELCUyMnVzZXIlMjI6JTdCJTIydXNlcl90eXBlJTIyOiUyMnRlc3RlciUyMiU3RCU3RCZkdG09MTM5MDkzNjkzODg1NSZ0aWQ9Nzk3NzQzJnZwPTI1NjB4OTYxJmRzPTI1NjB4OTYxJnZpZD03JmR1aWQ9M2MxNzU3NTQ0ZTM5YmNhNCZwPW1vYiZ0dj1qcy0wLjEzLjEmZnA9MjY5NTkzMDgwMyZhaWQ9Q0ZlMjNhJmxhbmc9ZW4tVVMmY3M9VVRGLTgmdHo9RXVyb3BlL0xvbmRvbiZ1aWQ9YWxleCsxMjMmZl9wZGY9MCZmX3F0PTEmZl9yZWFscD0wJmZfd21hPTAmZl9kaXI9MCZmX2ZsYT0xJmZfamF2YT0wJmZfZ2VhcnM9MCZmX2FnPTAmcmVzPTI1NjB4MTQ0MCZjZD0yNCZjb29raWU9MSZ1cmw9ZmlsZTovL2ZpbGU6Ly8vVXNlcnMvYWxleC9EZXZlbG9wbWVudC9kZXYtZW52aXJvbm1lbnQvZGVtby8xLXRyYWNrZXIvZXZlbnRzLmh0bWwvb3ZlcnJpZGRlbi11cmwvAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAcAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAAnBDb29raWU6IF9fdXRtYT0xMTE4NzIyODEuODc4MDg0NDg3LjEzOTAyMzcxMDcuMTM5MDg0ODQ4Ny4xMzkwOTMxNTIxLjY7IF9fdXRtej0xMTE4NzIyODEuMTM5MDIzNzEwNy4xLjEudXRtY3NyPShkaXJlY3QpfHV0bWNjbj0oZGlyZWN0KXx1dG1jbWQ9KG5vbmUpOyBfc3BfaWQuMWZmZj1iODlhNmZhNjMxZWVmYWMyLjEzOTAyMzcxMDcuNi4xMzkwOTMxNTQ1LjEzOTA4NDg2NDE7IGhibGlkPUNQamp1aHZGMDV6a3RQN0o3TTVWbzNOSUdQTEp5MVNGOyBvbGZzaz1vbGZzazU2MjkyMzYzNTYxNzU1NDsgX191dG1jPTExMTg3MjI4MTsgd2NzaWQ9dU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9va2x2PTEzOTA5MzE1ODU0NDUlMkN1TWxvZzFRSlZEN2p1aEZaN001Vm9CQ3lQUHlpQnlTUzsgX29rPTk3NTItNTAzLTEwLTUyMjc7IF9va2JrPWNkNCUzRHRydWUlMkN2aTUlM0QwJTJDdmk0JTNEMTM5MDkzMTUyMTEyMyUyQ3ZpMyUzRGFjdGl2ZSUyQ3ZpMiUzRGZhbHNlJTJDdmkxJTNEZmFsc2UlMkNjZDglM0RjaGF0JTJDY2Q2JTNEMCUyQ2NkNSUzRGF3YXklMkNjZDMlM0RmYWxzZSUyQ2NkMiUzRDAlMkNjZDElM0QwJTJDOyBzcD03NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQAAAAeQWNjZXB0LUVuY29kaW5nOiBnemlwLCBkZWZsYXRlAAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAK0FjY2VwdDogaW1hZ2UvcG5nLCBpbWFnZS8qO3E9MC44LCAqLyo7cT0wLjUAAABdVXNlci1BZ2VudDogTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wAAAAFEhvc3Q6IGxvY2FsaG9zdDo0MDAxCwBQAAAAJDc1YTEzNTgzLTVjOTktNDBlMy04MWZjLTU0MTA4NGRmYzc4NAA=" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) + + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-28T19:22:20.040+00:00").some, + ipAddress = "10.0.2.2".some, + useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some, + refererUri = None, + headers = List( + "Connection: keep-alive", + "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", + "Accept-Encoding: gzip, deflate", + "Accept-Language: en-US, en", + "Accept: image/png, image/*;q=0.8, */*;q=0.5", + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", + "Host: localhost:4001" + ), + userId = UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some + ) - // A bit of fun: the chances of generating a valid Thrift CollectorPayload at random are - // so low that we can just use ScalaCheck here - def e2 = - prop { (raw: String) => - ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), Process) must beInvalid.like { - case NonEmptyList( - BadRow.CPFormatViolation( - Process, - Failure.CPFormatViolation(_, "thrift", f), - Payload.RawPayload(_) - ), - List() - ) => - f must_== FailureDetails.CPFormatViolationMessage.Fallback(msg) + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs( + "e" -> "pp", + "page" -> "Asynchronous website/webapp examples for snowplow.js", + "pp_mix" -> "0", + "pp_max" -> "0", + "pp_miy" -> "0", + "pp_may" -> "0", + "co" -> """{"page":{"page_type":"test","last_updated$tms":1393372800000},"user":{"user_type":"tester"}}""", + "dtm" -> "1390936938855", + "tid" -> "797743", + "vp" -> "2560x961", + "ds" -> "2560x961", + "vid" -> "7", + "duid" -> "3c1757544e39bca4", + "p" -> "mob", + "tv" -> "js-0.13.1", + "fp" -> "2695930803", + "aid" -> "CFe23a", + "lang" -> "en-US", + "cs" -> "UTF-8", + "tz" -> "Europe/London", + "uid" -> "alex 123", + "f_pdf" -> "0", + "f_qt" -> "1", + "f_realp" -> "0", + "f_wma" -> "0", + "f_dir" -> "0", + "f_fla" -> "1", + "f_java" -> "0", + "f_gears" -> "0", + "f_ag" -> "0", + "res" -> "2560x1440", + "cd" -> "24", + "cookie" -> "1", + "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" + ), + body = None, + contentType = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "localhost".some), + context = context + ) + + result must beValid(expected.some) + } + + "parse valid unstructured event GET payload" >> { + val raw = + "CgABAAABQ9qNGa4LABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACeWU9dWUmdWVfbmE9Vmlld2VkK1Byb2R1Y3QmdWVfcHI9JTdCJTIycHJvZHVjdF9pZCUyMjolMjJBU08wMTA0MyUyMiwlMjJjYXRlZ29yeSUyMjolMjJEcmVzc2VzJTIyLCUyMmJyYW5kJTIyOiUyMkFDTUUlMjIsJTIycmV0dXJuaW5nJTIyOnRydWUsJTIycHJpY2UlMjI6NDkuOTUsJTIyc2l6ZXMlMjI6JTVCJTIyeHMlMjIsJTIycyUyMiwlMjJsJTIyLCUyMnhsJTIyLCUyMnh4bCUyMiU1RCwlMjJhdmFpbGFibGVfc2luY2UkZHQlMjI6MTU4MDElN0QmZHRtPTEzOTA5NDExMTUyNjMmdGlkPTY0NzYxNSZ2cD0yNTYweDk2MSZkcz0yNTYweDk2MSZ2aWQ9OCZkdWlkPTNjMTc1NzU0NGUzOWJjYTQmcD1tb2ImdHY9anMtMC4xMy4xJmZwPTI2OTU5MzA4MDMmYWlkPUNGZTIzYSZsYW5nPWVuLVVTJmNzPVVURi04JnR6PUV1cm9wZS9Mb25kb24mdWlkPWFsZXgrMTIzJmZfcGRmPTAmZl9xdD0xJmZfcmVhbHA9MCZmX3dtYT0wJmZfZGlyPTAmZl9mbGE9MSZmX2phdmE9MCZmX2dlYXJzPTAmZl9hZz0wJnJlcz0yNTYweDE0NDAmY2Q9MjQmY29va2llPTEmdXJsPWZpbGU6Ly9maWxlOi8vL1VzZXJzL2FsZXgvRGV2ZWxvcG1lbnQvZGV2LWVudmlyb25tZW50L2RlbW8vMS10cmFja2VyL2V2ZW50cy5odG1sL292ZXJyaWRkZW4tdXJsLwALAC0AAAAJbG9jYWxob3N0CwAyAAAAUU1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMA8ARgsAAAAHAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAACtBY2NlcHQ6IGltYWdlL3BuZywgaW1hZ2UvKjtxPTAuOCwgKi8qO3E9MC41AAAAXVVzZXItQWdlbnQ6IE1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMAAAABRIb3N0OiBsb2NhbGhvc3Q6NDAwMQsAUAAAACQ3NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQA" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) + + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-28T20:31:56.846+00:00").some, + ipAddress = "10.0.2.2".some, + useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some, + refererUri = None, + headers = List( + "Connection: keep-alive", + "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", + "Accept-Encoding: gzip, deflate", + "Accept-Language: en-US, en", + "Accept: image/png, image/*;q=0.8, */*;q=0.5", + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", + "Host: localhost:4001" + ), + userId = UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some + ) + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs( + "e" -> "ue", + "ue_na" -> "Viewed Product", + "ue_pr" -> """{"product_id":"ASO01043","category":"Dresses","brand":"ACME","returning":true,"price":49.95,"sizes":["xs","s","l","xl","xxl"],"available_since$dt":15801}""", + "dtm" -> "1390941115263", + "tid" -> "647615", + "vp" -> "2560x961", + "ds" -> "2560x961", + "vid" -> "8", + "duid" -> "3c1757544e39bca4", + "p" -> "mob", + "tv" -> "js-0.13.1", + "fp" -> "2695930803", + "aid" -> "CFe23a", + "lang" -> "en-US", + "cs" -> "UTF-8", + "tz" -> "Europe/London", + "uid" -> "alex 123", + "f_pdf" -> "0", + "f_qt" -> "1", + "f_realp" -> "0", + "f_wma" -> "0", + "f_dir" -> "0", + "f_fla" -> "1", + "f_java" -> "0", + "f_gears" -> "0", + "f_ag" -> "0", + "res" -> "2560x1440", + "cd" -> "24", + "cookie" -> "1", + "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" + ), + body = None, + contentType = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "localhost".some), + context = context + ) + + result must beValid(expected.some) + } + + "parse valid parameterless payload" >> { + val raw = + "CgABAAABQ9o8zYULABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAgAAAAYQ2FjaGUtQ29udHJvbDogbWF4LWFnZT0wAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAAEpBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksICovKjtxPTAuOAAAAF1Vc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoTWFjaW50b3NoOyBJbnRlbCBNYWMgT1MgWCAxMC45OyBydjoyNi4wKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzI2LjAAAAAUSG9zdDogbG9jYWxob3N0OjQwMDELAFAAAAAkNzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AA==" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) + + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-28T19:04:14.469+00:00").some, + ipAddress = "10.0.2.2".some, + useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some, + refererUri = None, + headers = List( + "Cache-Control: max-age=0", + "Connection: keep-alive", + "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", + "Accept-Encoding: gzip, deflate", + "Accept-Language: en-US, en", + "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8", + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", + "Host: localhost:4001" + ), + userId = UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some + ) + + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs(), + contentType = None, + body = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "localhost".some), + context = context + ) + + result must beValid(expected.some) + } + + "fail to parse random bytes" >> { + prop { (raw: String) => + ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), Process) must beInvalid.like { + case NonEmptyList( + BadRow.CPFormatViolation( + Process, + Failure.CPFormatViolation(_, "thrift", f), + Payload.RawPayload(_) + ), + List() + ) => + (f must beEqualTo(violation1byte)) or (f must beEqualTo(violation2bytes)) + } } } + } +} + +object ThriftLoaderSpec { + val Encoding = "UTF-8" + val Collector = "ssc-0.0.1-Stdout" // Note we have since fixed -stdout to be lowercase + val Api = CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp1") + val Process = Processor("ThriftLoaderSpec", "v1") + val DeserializeMessage = + "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" + val violation1byte: FailureDetails.CPFormatViolationMessage = + FailureDetails.CPFormatViolationMessage.Fallback( + "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" + ) + val violation2bytes: FailureDetails.CPFormatViolationMessage = + FailureDetails.CPFormatViolationMessage.Fallback( + "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" + ) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala index 5c118448c..74cd42d4b 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala @@ -56,14 +56,14 @@ class EnrichedEventSpec extends Specification { testField(_.user_ipaddress = "user_ipaddress", _.user_ipaddress) testField(_.user_fingerprint = "user_fingerprint", _.user_fingerprint) testField(_.domain_userid = "domain_userid", _.domain_userid) - testField(_.domain_sessionidx = new JInteger(0), _.domain_sessionidx) + testField(_.domain_sessionidx = JInteger.valueOf(0), _.domain_sessionidx) testField(_.network_userid = "network_userid", _.network_userid) testField(_.geo_country = "geo_country", _.geo_country) testField(_.geo_region = "geo_region", _.geo_region) testField(_.geo_city = "geo_city", _.geo_city) testField(_.geo_zipcode = "geo_zipcode", _.geo_zipcode) - testField(_.geo_latitude = new JFloat(0.0), _.geo_latitude) - testField(_.geo_longitude = new JFloat(0.0), _.geo_longitude) + testField(_.geo_latitude = JFloat.valueOf("0.0"), _.geo_latitude) + testField(_.geo_longitude = JFloat.valueOf("0.0"), _.geo_longitude) testField(_.geo_region_name = "geo_region_name", _.geo_region_name) testField(_.ip_isp = "ip_isp", _.ip_isp) testField(_.ip_organization = "ip_organization", _.ip_organization) @@ -74,13 +74,13 @@ class EnrichedEventSpec extends Specification { testField(_.page_referrer = "page_referrer", _.page_referrer) testField(_.page_urlscheme = "page_urlscheme", _.page_urlscheme) testField(_.page_urlhost = "page_urlhost", _.page_urlhost) - testField(_.page_urlport = new JInteger(0), _.page_urlport) + testField(_.page_urlport = JInteger.valueOf(0), _.page_urlport) testField(_.page_urlpath = "page_urlpath", _.page_urlpath) testField(_.page_urlquery = "page_urlquery", _.page_urlquery) testField(_.page_urlfragment = "page_urlfragment", _.page_urlfragment) testField(_.refr_urlscheme = "refr_urlscheme", _.refr_urlscheme) testField(_.refr_urlhost = "refr_urlhost", _.refr_urlhost) - testField(_.refr_urlport = new JInteger(0), _.refr_urlport) + testField(_.refr_urlport = JInteger.valueOf(0), _.refr_urlport) testField(_.refr_urlpath = "refr_urlpath", _.refr_urlpath) testField(_.refr_urlquery = "refr_urlquery", _.refr_urlquery) testField(_.refr_urlfragment = "refr_urlfragment", _.refr_urlfragment) @@ -112,11 +112,11 @@ class EnrichedEventSpec extends Specification { testField(_.ti_name = "ti_name", _.ti_name) testField(_.ti_category = "ti_category", _.ti_category) testField(_.ti_price = "ti_price", _.ti_price) - testField(_.ti_quantity = new JInteger(0), _.ti_quantity) - testField(_.pp_xoffset_min = new JInteger(0), _.pp_xoffset_min) - testField(_.pp_xoffset_max = new JInteger(0), _.pp_xoffset_max) - testField(_.pp_yoffset_min = new JInteger(0), _.pp_yoffset_min) - testField(_.pp_yoffset_max = new JInteger(0), _.pp_yoffset_max) + testField(_.ti_quantity = JInteger.valueOf(0), _.ti_quantity) + testField(_.pp_xoffset_min = JInteger.valueOf(0), _.pp_xoffset_min) + testField(_.pp_xoffset_max = JInteger.valueOf(0), _.pp_xoffset_max) + testField(_.pp_yoffset_min = JInteger.valueOf(0), _.pp_yoffset_min) + testField(_.pp_yoffset_max = JInteger.valueOf(0), _.pp_yoffset_max) testField(_.useragent = "useragent", _.useragent) testField(_.br_name = "br_name", _.br_name) testField(_.br_family = "br_family", _.br_family) @@ -124,30 +124,30 @@ class EnrichedEventSpec extends Specification { testField(_.br_type = "br_type", _.br_type) testField(_.br_renderengine = "br_renderengine", _.br_renderengine) testField(_.br_lang = "br_lang", _.br_lang) - testField(_.br_features_pdf = new JByte(Byte.MinValue), _.br_features_pdf) - testField(_.br_features_flash = new JByte(Byte.MinValue), _.br_features_flash) - testField(_.br_features_java = new JByte(Byte.MinValue), _.br_features_java) - testField(_.br_features_director = new JByte(Byte.MinValue), _.br_features_director) - testField(_.br_features_quicktime = new JByte(Byte.MinValue), _.br_features_quicktime) - testField(_.br_features_realplayer = new JByte(Byte.MinValue), _.br_features_realplayer) - testField(_.br_features_windowsmedia = new JByte(Byte.MinValue), _.br_features_windowsmedia) - testField(_.br_features_gears = new JByte(Byte.MinValue), _.br_features_gears) - testField(_.br_features_silverlight = new JByte(Byte.MinValue), _.br_features_silverlight) - testField(_.br_cookies = new JByte(Byte.MinValue), _.br_cookies) + testField(_.br_features_pdf = JByte.valueOf(Byte.MinValue), _.br_features_pdf) + testField(_.br_features_flash = JByte.valueOf(Byte.MinValue), _.br_features_flash) + testField(_.br_features_java = JByte.valueOf(Byte.MinValue), _.br_features_java) + testField(_.br_features_director = JByte.valueOf(Byte.MinValue), _.br_features_director) + testField(_.br_features_quicktime = JByte.valueOf(Byte.MinValue), _.br_features_quicktime) + testField(_.br_features_realplayer = JByte.valueOf(Byte.MinValue), _.br_features_realplayer) + testField(_.br_features_windowsmedia = JByte.valueOf(Byte.MinValue), _.br_features_windowsmedia) + testField(_.br_features_gears = JByte.valueOf(Byte.MinValue), _.br_features_gears) + testField(_.br_features_silverlight = JByte.valueOf(Byte.MinValue), _.br_features_silverlight) + testField(_.br_cookies = JByte.valueOf(Byte.MinValue), _.br_cookies) testField(_.br_colordepth = "br_colordepth", _.br_colordepth) - testField(_.br_viewwidth = new JInteger(0), _.br_viewwidth) - testField(_.br_viewheight = new JInteger(0), _.br_viewheight) + testField(_.br_viewwidth = JInteger.valueOf(0), _.br_viewwidth) + testField(_.br_viewheight = JInteger.valueOf(0), _.br_viewheight) testField(_.os_name = "os_name", _.os_name) testField(_.os_family = "os_family", _.os_family) testField(_.os_manufacturer = "os_manufacturer", _.os_manufacturer) testField(_.os_timezone = "os_timezone", _.os_timezone) testField(_.dvce_type = "dvce_type", _.dvce_type) - testField(_.dvce_ismobile = new JByte(Byte.MinValue), _.dvce_ismobile) - testField(_.dvce_screenwidth = new JInteger(0), _.dvce_screenwidth) - testField(_.dvce_screenheight = new JInteger(0), _.dvce_screenheight) + testField(_.dvce_ismobile = JByte.valueOf(Byte.MinValue), _.dvce_ismobile) + testField(_.dvce_screenwidth = JInteger.valueOf(0), _.dvce_screenwidth) + testField(_.dvce_screenheight = JInteger.valueOf(0), _.dvce_screenheight) testField(_.doc_charset = "doc_charset", _.doc_charset) - testField(_.doc_width = new JInteger(0), _.doc_width) - testField(_.doc_height = new JInteger(0), _.doc_height) + testField(_.doc_width = JInteger.valueOf(0), _.doc_width) + testField(_.doc_height = JInteger.valueOf(0), _.doc_height) testField(_.tr_currency = "tr_currency", _.tr_currency) testField(_.tr_total_base = "tr_total_base", _.tr_total_base) testField(_.tr_tax_base = "tr_tax_base", _.tr_tax_base) @@ -171,7 +171,6 @@ class EnrichedEventSpec extends Specification { testField(_.event_version = "event_version", _.event_version) testField(_.event_fingerprint = "event_fingerprint", _.event_fingerprint) testField(_.true_tstamp = "true_tstamp", _.true_tstamp) - } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala index 4cdbc9cf0..c5c0574a4 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala @@ -15,17 +15,10 @@ package utils import java.util.concurrent.TimeUnit -import cats.{Eval, Id} +import cats.Id import cats.effect.{Clock => CEClock} object Clock { - implicit val evalClock: CEClock[Eval] = new CEClock[Eval] { - final def realTime(unit: TimeUnit): Eval[Long] = - Eval.later(unit.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS)) - final def monotonic(unit: TimeUnit): Eval[Long] = - Eval.later(unit.convert(System.nanoTime(), TimeUnit.NANOSECONDS)) - } - implicit val idClock: CEClock[Id] = new CEClock[Id] { final def realTime(unit: TimeUnit): Id[Long] = unit.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala index 6ac16b54b..7bf6b20f0 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala @@ -34,7 +34,7 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { val raw = RawEvent( CollectorPayload.Api("vendor", "version"), - Map.empty[String, String], + Map.empty[String, Option[String]], None, CollectorPayload.Source("source", "enc", None), CollectorPayload.Context(None, None, None, None, Nil, None) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonPathSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonPathSpec.scala index 951e9a491..7ac889c9e 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonPathSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonPathSpec.scala @@ -13,6 +13,7 @@ package com.snowplowanalytics.snowplow.enrich.common.utils import io.circe._ +import io.circe.literal.JsonStringContext import io.circe.syntax._ import org.specs2.Specification @@ -21,9 +22,14 @@ class JsonPathSpec extends Specification { test JSONPath query $e1 test query of non-exist value $e2 test query of empty array $e3 - test primtive JSON type (JString) $e6 + test primitive JSON type (JString) $e6 invalid JSONPath (JQ syntax) must fail $e4 invalid JSONPath must fail $e5 + test query of long $e7 + test query of integer $e8 + test query of string $e9 + test query of double $e10 + test query of big decimal $e11 """ val someJson = Json.obj( @@ -88,4 +94,34 @@ class JsonPathSpec extends Specification { def e6 = JsonPath.query("$.store.book[2]", Json.fromString("somestring")) must beRight(List()) + + def e7 = { + val q1 = JsonPath.query("$.empId", json"""{ "empId": 2147483649 }""") must beRight(List(Json.fromLong(2147483649L))) + val q2 = JsonPath.query("$.empId", json"""{ "empId": ${Json.fromLong(2147483649L)} }""") must beRight(List(Json.fromLong(2147483649L))) + q1 and q2 + } + + def e8 = { + val q1 = JsonPath.query("$.empId", json"""{ "empId": 1086 }""") must beRight(List(Json.fromInt(1086))) + val q2 = JsonPath.query("$.empId", json"""{ "empId": ${Json.fromInt(-1086)} }""") must beRight(List(Json.fromInt(-1086))) + q1 and q2 + } + + def e9 = { + val q1 = JsonPath.query("$.empName", json"""{ "empName": "ABC" }""") must beRight(List(Json.fromString("ABC"))) + val q2 = JsonPath.query("$.empName", json"""{ "empName": ${Json.fromString("XYZ")} }""") must beRight(List(Json.fromString("XYZ"))) + q1 and q2 + } + + def e10 = { + val q1 = JsonPath.query("$.id", json"""{ "id": ${Json.fromDouble(44.54)} }""") must beRight(List(Json.fromDoubleOrNull(44.54))) + val q2 = JsonPath.query("$.id", json"""{ "id": ${Json.fromDouble(20.20)} }""") must beRight(List(Json.fromDoubleOrString(20.20))) + q1 and q2 + } + + def e11 = { + val q1 = JsonPath.query("$.id", json"""{ "id": ${Json.fromBigDecimal(44.54)} }""") must beRight(List(Json.fromBigDecimal(44.54))) + val q2 = JsonPath.query("$.id", json"""{ "id": ${Json.fromBigDecimal(20.20)} }""") must beRight(List(Json.fromBigDecimal(20.20))) + q1 and q2 + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala index 5586b55b8..9b004e11c 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala @@ -15,25 +15,80 @@ package utils import org.specs2.Specification +import org.joda.time.format.DateTimeFormat + import io.circe.Json +import cats.data.NonEmptyList + class JsonUtilsSpec extends Specification { def is = s2""" toJson can deal with non-null String $e1 toJson can deal with null String $e2 + toJson can deal with booleans $e3 + toJson can deal with integers $e4 + toJson can deal with dates $e5 """ def e1 = { val key = "key" val value = "value" - JsonUtils.toJson(key, value, Nil, Nil, None) must + JsonUtils.toJson(key, Option(value), Nil, Nil, None) must beEqualTo((key, Json.fromString(value))) } def e2 = { val key = "key" val value: String = null - JsonUtils.toJson(key, value, Nil, Nil, None) must + JsonUtils.toJson(key, Option(value), Nil, Nil, None) must beEqualTo((key, Json.Null)) } + + def e3 = { + val key = "field" + + val truE = "true" + val exp1 = JsonUtils.toJson(key, Option(truE), List(key), Nil, None) must + beEqualTo(key -> Json.True) + + val falsE = "false" + val exp2 = JsonUtils.toJson(key, Option(falsE), List(key), Nil, None) must + beEqualTo(key -> Json.False) + + val foo = "foo" + val exp3 = JsonUtils.toJson(key, Option(foo), List(key), Nil, None) must + beEqualTo(key -> Json.fromString(foo)) + + exp1 and exp2 and exp3 + } + + def e4 = { + val key = "field" + + val number = 123 + val exp1 = JsonUtils.toJson(key, Option(number.toString()), Nil, List(key), None) must + beEqualTo(key -> Json.fromBigInt(number)) + + val notNumber = "abc" + val exp2 = JsonUtils.toJson(key, Option(notNumber), Nil, List(key), None) must + beEqualTo(key -> Json.fromString(notNumber)) + + exp1 and exp2 + } + + def e5 = { + val key = "field" + + val formatter = DateTimeFormat.forPattern("yyyy-MM-dd") + val malformedDate = "2020-09-02" + val correctDate = "2020-09-02T22:00:00.000Z" + + val exp1 = JsonUtils.toJson(key, Option(malformedDate), Nil, Nil, Some(NonEmptyList.one(key) -> formatter)) must + be !== (key -> Json.fromString(malformedDate)) + + val exp2 = JsonUtils.toJson(key, Option(correctDate), Nil, Nil, Some(NonEmptyList.one(key) -> formatter)) must + beEqualTo(key -> Json.fromString(correctDate)) + + exp1 and exp2 + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala index be3ea91db..5cf98ed60 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala @@ -15,6 +15,7 @@ package utils import java.net.{Inet6Address, InetAddress, URI} import java.nio.ByteBuffer +import java.nio.charset.StandardCharsets import cats.syntax.either._ import cats.syntax.option._ @@ -275,6 +276,49 @@ class ValidateUuidSpec extends Specification with DataTables with ScalaCheck { } } +class ValidateIntegerSpec extends Specification { + def is = s2""" + validateInteger should return the original string if it contains an integer $e1 + validateInteger should return an enrichment failure for a string not containing a valid integer $e2 + """ + + val FieldName = "integer" + + def e1 = ConversionUtils.validateInteger(FieldName, "123") must beRight("123") + + def e2 = { + val str = "abc" + ConversionUtils.validateInteger(FieldName, str) must beLeft( + FailureDetails.EnrichmentFailure( + None, + FailureDetails.EnrichmentFailureMessage.InputData( + FieldName, + Some(str), + "not a valid integer" + ) + ) + ) + } +} + +class DecodeStringSpec extends Specification { + def is = s2""" + decodeString should decode a correctly URL-encoded string $e1 + decodeString should fail decoding a string not correctly URL-encoded $e2 + """ + + val utf8 = StandardCharsets.UTF_8 + + def e1 = { + val clear = "12 ++---=&&3abc%%%34%2234%$#@%^PLLPbgfxbf$#%$@#@^" + val encoded = ConversionUtils.encodeString(utf8.toString(), clear) + ConversionUtils.decodeString(utf8, encoded) must beRight(clear) + } + + def e2 = + ConversionUtils.decodeString(utf8, "%%23") must beLeft +} + class StringToDoubleLikeSpec extends Specification with DataTables { def is = s2""" stringToDoublelike should fail if the supplied String is not parseable as a number $e1 diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Assets.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Assets.scala new file mode 100644 index 000000000..78453adf3 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Assets.scala @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.net.URI +import java.nio.file.{Path, Paths} + +import scala.concurrent.duration._ +import scala.util.control.NonFatal + +import cats.{Applicative, Parallel} +import cats.implicits._ + +import cats.effect.{Blocker, Concurrent, ConcurrentEffect, ContextShift, Resource, Sync, Timer} +import cats.effect.concurrent.Ref + +import retry.{RetryDetails, RetryPolicies, RetryPolicy, retryingOnSomeErrors} + +import fs2.Stream +import fs2.hash.md5 +import fs2.io.file.{copy, deleteIfExists, exists, readAll, tempFileResource, writeAll} + +import _root_.io.chrisdavenport.log4cats.Logger +import _root_.io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +import com.snowplowanalytics.snowplow.enrich.fs2.io.Clients + +/** + * Functions responsible for periodic assets (such as MaxMind/IAB DBs) updates + * The common logic is to periodically invoke a function that: + * 1. Downloads a file (in background) to a temp location + * 2. Compares file's checksum with existing one (stored in a mutable hashmap) + * 3. If checksums match - delete the temp file, return + * 4. If checksums don't match - send a signal to stop raw stream + * (via `SignallingRef` in [[Environment]]) + * 5. Once raw stream is stopped - delete an old file and move + * temp file to the old's file location + * If any of those URIs been updated and stopped the raw stream, it will be + * immediately resumed once the above procedure traversed all files + */ +object Assets { + + private implicit def unsafeLogger[F[_]: Sync]: Logger[F] = + Slf4jLogger.getLogger[F] + + /** + * State of the [[updateStream]], containing information about tracked URIs + * and `stop` signal from [[Environment]] as well as all clients necessary + * to download URIs + * + * @param files mutable hash map of URIs and their latest known state + * @param pauseEnrich stop signal coming from [[Environment]] and that can be used + * to stop the raw stream consumption + * @param clients HTTP, GCS, S3 clients if necessary + */ + final case class State[F[_]]( + files: Ref[F, Map[URI, Hash]], + pauseEnrich: Ref[F, Boolean], + clients: Clients[F] + ) + + object State { + + /** Test pair is used in tests to initialize HTTP client, will be ignored during initialization */ + private val TestPair: Asset = URI.create("http://localhost:8080") -> "index" + + /** + * Initialize an assets state. Try to find them on local FS + * or download if they're missing. Also initializes all necessary + * clients (S3, GCP, HTTP etc) + * @param blocker thread pool for downloading and reading files + * @param stop global stop signal from [[Environment]] + * @param assets all assets that have to be tracked + */ + def make[F[_]: ConcurrentEffect: Timer: ContextShift]( + blocker: Blocker, + stop: Ref[F, Boolean], + assets: List[Asset] + ): Resource[F, State[F]] = + for { + clients <- Clients.make[F](blocker, assets.map(_._1)) + map <- Resource.liftF(build[F](blocker, clients, assets.filterNot(asset => asset == TestPair))) + files <- Resource.liftF(Ref.of[F, Map[URI, Hash]](map)) + } yield State(files, stop, clients) + + def build[F[_]: Concurrent: Timer: ContextShift]( + blocker: Blocker, + clients: Clients[F], + assets: List[Asset] + ): F[Map[URI, Hash]] = + Logger[F].info("Preparing enrichment assets") *> + buildFromLocal(blocker, assets) + .flatMap { hashes => + hashes.traverse { + case (uri, path, Some(hash)) => + Logger[F].info(s"Asset from $uri is found on local system at $path").as(uri -> hash) + case (uri, path, None) => + downloadAndHash[F](clients, blocker, uri, Paths.get(path)).map(hash => uri -> hash) + } + } + .map(_.toMap) + + def buildFromLocal[F[_]: Sync: ContextShift](blocker: Blocker, assets: List[Asset]): F[List[(URI, String, Option[Hash])]] = + assets.traverse { case (uri, path) => local[F](blocker, path).map(hash => (uri, path, hash)) } + + /** Check if file already exists */ + def local[F[_]: Sync: ContextShift](blocker: Blocker, path: String): F[Option[Hash]] = { + val fpath = Paths.get(path) + exists(blocker, fpath).ifM( + Hash.fromStream(readAll(fpath, blocker, 1024)).map(_.some), + Sync[F].pure(none) + ) + } + } + + /** Valid MD5 hash */ + final case class Hash private (s: String) extends AnyVal + + object Hash { + private[this] def fromBytes(bytes: Array[Byte]): Hash = { + val bi = new java.math.BigInteger(1, bytes) + Hash(String.format("%0" + (bytes.length << 1) + "x", bi)) + } + + def fromStream[F[_]: Sync](stream: Stream[F, Byte]): F[Hash] = + stream.through(md5).compile.to(Array).map(fromBytes) + } + + /** Pair of a tracked `URI` and destination path on local FS (`java.nio.file.Path` is not serializable) */ + type Asset = (URI, String) + + /** Initialise the [[updateStream]] with all necessary resources if refresh period is specified */ + def run[F[_]: ConcurrentEffect: ContextShift: Timer: Parallel](env: Environment[F]): Stream[F, Unit] = + env.assetsUpdatePeriod match { + case Some(duration) => + val init = for { + curDir <- getCurDir + _ <- Logger[F].info(s"Initializing assets refresh stream in $curDir, ticking every $duration") + assets <- env.enrichments.get.map(_.configs.flatMap(_.filesToCache)) + } yield updateStream[F](env.blocker, env.assetsState, env.enrichments, curDir, duration, assets) + Stream.eval(init).flatten + case None => + Stream.empty.covary[F] + } + + def getCurDir[F[_]: Sync]: F[Path] = + Sync[F].delay(Paths.get("").toAbsolutePath) + + /** + * At the end of every update, the stop signal will be resumed to `false` + * Create an update stream that ticks periodically and can invoke an update action, + * which will download an URI and check if it has been update. If it has the + * raw stream will be stopped via `stop` signal from [[Environment]] and assets updated + */ + def updateStream[F[_]: ConcurrentEffect: ContextShift: Parallel: Timer]( + blocker: Blocker, + state: State[F], + enrichments: Ref[F, Environment.Enrichments[F]], + curDir: Path, + duration: FiniteDuration, + assets: List[Asset] + ): Stream[F, Unit] = + Stream.fixedDelay[F](duration).evalMap { _ => + val log = Logger[F].debug(show"Checking remote assets: ${assets.map(_._1).mkString(", ")}") + val reinitialize: F[Unit] = + for { + // side-effecting get-set is inherently not thread-safe + // we need to be sure the state.stop is set to true + // before re-initializing enrichments + _ <- Logger[F].info("Resuming enrich stream") + old <- enrichments.get + _ <- Logger[F].info(show"Reinitializing enrichments: ${old.configs.map(_.schemaKey.name).mkString(", ")}") + fresh <- old.reinitialize + _ <- enrichments.set(fresh) + _ <- state.pauseEnrich.set(false) + } yield () + + val updated = downloadAndPause[F](blocker, state, curDir, assets) + log *> updated.ifM(reinitialize, Logger[F].debug("No assets have been updated since last check")) + } + + /** + * Download list of assets, return false if none has been downloaded + * It also can set `pauseEnrich` into `true` - a caller should make sure it's unpaused + */ + def downloadAndPause[F[_]: ConcurrentEffect: ContextShift: Timer]( + blocker: Blocker, + state: State[F], + dir: Path, + assets: List[Asset] + ): F[Boolean] = + assets + .traverse { + case (uri, path) => + update(blocker, state, dir, uri, Paths.get(path)) + } + .map(_.contains(true)) + + /** + * Update a file in current directory if it has been updated on remote storage + * If a new file has been discovered - stops the enriching streams (signal in `state`) + * Do nothing if file hasn't been updated + * + * Note: this function has a potential to be thread-unsafe if download time + * exceeds tick period. We assume that no two threads will be downloading the same URI + * + * @param blocker a thread pool to execute download/copy operations + * @param state a map of URI to MD5 hash to keep track latest state of remote files + * @param curDir a local FS destination for temporary files + * @param uri a remote file (S3, GCS or HTTP), the URI is used as an identificator + * @param path a static file name that enrich clients will access + * file itself is placed in current dir (`dir`) + * @return true if file has been updated + */ + def update[F[_]: ConcurrentEffect: ContextShift: Timer]( + blocker: Blocker, + state: State[F], + curDir: Path, + uri: URI, + path: Path + ): F[Boolean] = + tempFileResource[F](blocker, curDir).use { tmp => + // Set stop signal and replace old file with temporary + def stopAndCopy(hash: Hash, delete: Boolean): F[Unit] = + for { + _ <- Logger[F].info(s"An asset at $uri has been updated since last check, pausing the enrich stream to reinitialize") + _ <- state.pauseEnrich.set(true) + _ <- if (delete) { + val deleted = Logger[F].info(s"Deleted outdated asset $path") + val notDeleted = Logger[F].warn(s"Couldn't delete $path, file didn't exist") + deleteIfExists(blocker, path).ifM(deleted, notDeleted) + } else Sync[F].unit + _ <- copy(blocker, tmp, path) + _ <- state.files.update(_.updated(uri, hash)) + _ <- Logger[F].debug(s"Replaced $uri in Assets.State") + } yield () + + for { + hash <- downloadAndHash(state.clients, blocker, uri, tmp) + localFiles <- state.files.get + updated <- localFiles.get(uri) match { + case Some(known) if known == hash => + Sync[F].pure(false) + case Some(_) => + stopAndCopy(hash, true).as(true) + case None => + stopAndCopy(hash, false).as(true) + } + } yield updated + } + + def downloadAndHash[F[_]: Concurrent: ContextShift: Timer]( + clients: Clients[F], + blocker: Blocker, + uri: URI, + destination: Path + ): F[Hash] = { + val stream = clients.download(uri).observe(writeAll[F](destination, blocker)) + Logger[F].info(s"Downloading $uri") *> retryDownload(Hash.fromStream(stream)) + } + + def retryDownload[F[_]: Sync: Timer, A](download: F[A]): F[A] = + retryingOnSomeErrors[A](retryPolicy[F], worthRetrying, onError[F])(download) + + def retryPolicy[F[_]: Applicative]: RetryPolicy[F] = + RetryPolicies.fullJitter[F](1500.milliseconds).join(RetryPolicies.limitRetries[F](5)) + + def worthRetrying(e: Throwable): Boolean = + e match { + case _: Clients.DownloadingFailure => true + case _: IllegalArgumentException => false + case NonFatal(_) => false + } + + def onError[F[_]: Sync](error: Throwable, details: RetryDetails): F[Unit] = + if (details.givingUp) + Logger[F].error(show"Failed to download an asset after ${details.retriesSoFar}. ${error.getMessage}. Aborting the job") + else if (details.retriesSoFar == 0) + Logger[F].warn(show"Failed to download an asset. ${error.getMessage}. Keep retrying") + else + Logger[F].warn( + show"Failed to download an asset after ${details.retriesSoFar} retries, " + + show"waiting for ${details.cumulativeDelay.toMillis} ms. ${error.getMessage}. " + + show"Keep retrying" + ) +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Enrich.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Enrich.scala new file mode 100644 index 000000000..4983aca9c --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Enrich.scala @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.time.Instant +import java.util.Base64 +import java.util.concurrent.TimeUnit + +import org.joda.time.DateTime +import cats.data.{NonEmptyList, ValidatedNel} +import cats.implicits._ + +import cats.effect.{Blocker, Clock, Concurrent, ContextShift, Sync} + +import fs2.Stream + +import _root_.io.sentry.SentryClient +import _root_.io.circe.Json +import _root_.io.circe.syntax._ + +import _root_.io.chrisdavenport.log4cats.Logger +import _root_.io.chrisdavenport.log4cats.slf4j.Slf4jLogger +import com.snowplowanalytics.iglu.client.Client + +import com.snowplowanalytics.snowplow.badrows.{Processor, BadRow, Failure, Payload => BadRowPayload} +import com.snowplowanalytics.snowplow.enrich.common.EtlPipeline +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry +import com.snowplowanalytics.snowplow.enrich.common.loaders.{CollectorPayload, ThriftLoader} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry + +object Enrich { + + /** + * Parallelism of an enrich stream. + * Unlike for thread pools it doesn't make much sense to use `CPUs x 2` formulae + * as we're not sizing threads, but fibers and memory is the only cost of them + */ + val ConcurrencyLevel = 64 + + /** Default adapter registry, can be constructed dynamically in future */ + val adapterRegistry = new AdapterRegistry() + + val processor: Processor = Processor(generated.BuildInfo.name, generated.BuildInfo.version) + + private implicit def unsafeLogger[F[_]: Sync]: Logger[F] = + Slf4jLogger.getLogger[F] + + /** + * Run a primary enrichment stream, reading from [[Environment]] source, enriching + * via [[enrichWith]] and sinking into [[GoodSink]] and [[BadSink]] respectively. + * Can be stopped via _stop signal_ from [[Environment]] + * + * The stream won't download any enrichment DBs, it is responsibility of [[Assets]] + * [[Assets.State.make]] downloads assets for the first time unconditionally during + * [[Environment]] initialisation, then if `assetsUpdatePeriod` has been specified - + * they'll be refreshed periodically by [[Assets.updateStream]] + */ + def run[F[_]: Concurrent: ContextShift: Clock](env: Environment[F]): Stream[F, Unit] = { + val registry: F[EnrichmentRegistry[F]] = env.enrichments.get.map(_.registry) + val enrich: Enrich[F] = enrichWith[F](registry, env.blocker, env.igluClient, env.sentry, env.metrics.enrichLatency) + val badSink: BadSink[F] = _.evalTap(_ => env.metrics.badCount).through(env.bad) + val goodSink: GoodSink[F] = _.evalTap(_ => env.metrics.goodCount).through(env.good) + + env.source + .pauseWhen(env.pauseEnrich) + .evalTap(_ => env.metrics.rawCount) + .parEvalMapUnordered(ConcurrencyLevel)(enrich) + .flatMap(_.decompose[BadRow, EnrichedEvent]) + .observeEither(badSink, goodSink) + .void + } + + /** + * Enrich a single `CollectorPayload` to get list of bad rows and/or enriched events + * + * Along with actual `ack` the `enrichLatency` gauge will be updated + */ + def enrichWith[F[_]: Clock: Sync: ContextShift]( + enrichRegistry: F[EnrichmentRegistry[F]], + blocker: Blocker, + igluClient: Client[F, Json], + sentry: Option[SentryClient], + enrichLatency: Option[Long] => F[Unit] + )( + row: Payload[F, Array[Byte]] + ): F[Result[F]] = { + val payload = ThriftLoader.toCollectorPayload(row.data, processor) + val collectorTstamp = payload.toOption.flatMap(_.flatMap(_.context.timestamp).map(_.getMillis)) + + val result = + for { + _ <- Logger[F].debug(payloadToString(payload)) + etlTstamp <- Clock[F].realTime(TimeUnit.MILLISECONDS).map(millis => new DateTime(millis)) + registry <- enrichRegistry + enrich = EtlPipeline.processEvents[F](adapterRegistry, registry, igluClient, processor, etlTstamp, payload) + enriched <- blocker.blockOn(enrich) + trackLatency = enrichLatency(collectorTstamp) + } yield Payload(enriched, trackLatency *> row.finalise) + + result.handleErrorWith(sendToSentry[F](row, sentry)) + } + + /** Stringify `ThriftLoader` result for debugging purposes */ + def payloadToString(payload: ValidatedNel[BadRow.CPFormatViolation, Option[CollectorPayload]]): String = + payload.fold(_.asJson.noSpaces, _.map(_.toBadRowPayload.asJson.noSpaces).getOrElse("None")) + + private val EnrichedFields = + classOf[EnrichedEvent].getDeclaredFields + .filterNot(_.getName.equals("pii")) + .map { field => field.setAccessible(true); field } + .toList + + /** Transform enriched event into canonical TSV */ + def encodeEvent(enrichedEvent: EnrichedEvent): String = + EnrichedFields + .map { field => + val prop = field.get(enrichedEvent) + if (prop == null) "" else prop.toString + } + .mkString("\t") + + /** Log an error, turn the problematic `CollectorPayload` into `BadRow` and notify Sentry if configured */ + def sendToSentry[F[_]: Sync: Clock](original: Payload[F, Array[Byte]], sentry: Option[SentryClient])(error: Throwable): F[Result[F]] = + for { + _ <- Logger[F].error("Runtime exception during payload enrichment. CollectorPayload converted to generic_error and ack'ed") + now <- Clock[F].realTime(TimeUnit.MILLISECONDS).map(Instant.ofEpochMilli) + _ <- original.finalise + badRow = genericBadRow(original.data, now, error) + _ <- sentry match { + case Some(client) => + Sync[F].delay(client.sendException(error)) + case None => + Sync[F].unit + } + } yield Payload(List(badRow.invalid), Sync[F].unit) + + /** Build a `generic_error` bad row for unhandled runtime errors */ + def genericBadRow( + row: Array[Byte], + time: Instant, + error: Throwable + ): BadRow.GenericError = { + val base64 = new String(Base64.getEncoder.encode(row)) + val rawPayload = BadRowPayload.RawPayload(base64) + val failure = Failure.GenericFailure(time, NonEmptyList.one(error.toString)) + BadRow.GenericError(processor, failure, rawPayload) + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Environment.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Environment.scala new file mode 100644 index 000000000..1a08c1519 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Environment.scala @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import scala.concurrent.duration.FiniteDuration + +import cats.Show +import cats.data.EitherT +import cats.implicits._ + +import cats.effect.{Async, Blocker, Clock, Concurrent, ConcurrentEffect, ContextShift, Resource, Sync, Timer} +import cats.effect.concurrent.Ref + +import fs2.concurrent.SignallingRef + +import _root_.io.circe.Json +import _root_.io.circe.syntax._ + +import _root_.io.sentry.{Sentry, SentryClient} +import _root_.io.chrisdavenport.log4cats.Logger +import _root_.io.chrisdavenport.log4cats.slf4j.Slf4jLogger +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} +import com.snowplowanalytics.iglu.core.circe.implicits._ + +import com.snowplowanalytics.iglu.client.Client + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf +import com.snowplowanalytics.snowplow.enrich.fs2.config.{CliConfig, ConfigFile} +import com.snowplowanalytics.snowplow.enrich.fs2.io.{FileSystem, Metrics, Sinks, Source} + +/** + * All allocated resources, configs and mutable variables necessary for running Enrich process + * Also responsiblle for initial assets downloading (during `assetsState` initialisation) + * + * @param igluClient Iglu Client + * @param enrichments enrichment registry with all clients and parsed configuration files + * it's wrapped in mutable variable because all resources need to be + * reinitialized after DB assets are updated via [[Assets]] stream + * @param pauseEnrich a signalling reference that can pause a raw stream and enrichment, + * should be used only by [[Assets]] + * @param assetsState a main entity from [[Assets]] stream, controlling when assets + * have to be replaced with newer ones + * @param blocker thread pool for blocking operations and enrichments themselves + * @param source a stream of raw collector payloads + * @param good a sink for successfully enriched events + * @param bad a sink for events that failed validation or enrichment + * @param sentry optional sentry client + * @param metrics common counters + * @param assetsUpdatePeriod time after which enrich assets should be refresh + * @param metricsReportPeriod period after which metrics are updated + */ +final case class Environment[F[_]]( + igluClient: Client[F, Json], + enrichments: Ref[F, Environment.Enrichments[F]], + pauseEnrich: SignallingRef[F, Boolean], + assetsState: Assets.State[F], + blocker: Blocker, + source: RawSource[F], + good: GoodSink[F], + bad: BadSink[F], + sentry: Option[SentryClient], + metrics: Metrics[F], + assetsUpdatePeriod: Option[FiniteDuration], + metricsReportPeriod: Option[FiniteDuration] +) + +object Environment { + + private implicit def unsafeLogger[F[_]: Sync]: Logger[F] = + Slf4jLogger.getLogger[F] + + type Parsed[F[_], A] = EitherT[F, String, A] + + type Allocated[F[_]] = Parsed[F, Resource[F, Environment[F]]] + + /** Registry with all allocated clients (MaxMind, IAB etc) and their original configs */ + final case class Enrichments[F[_]](registry: EnrichmentRegistry[F], configs: List[EnrichmentConf]) { + + /** Initialize same enrichments, specified by configs (in case DB files updated) */ + def reinitialize(implicit A: Async[F]): F[Enrichments[F]] = + Enrichments.buildRegistry(configs).map(registry => Enrichments(registry, configs)) + } + + object Enrichments { + def make[F[_]: Async: Clock](configs: List[EnrichmentConf]): Resource[F, Ref[F, Enrichments[F]]] = + Resource.liftF { + for { + registry <- buildRegistry[F](configs) + ref <- Ref.of(Enrichments[F](registry, configs)) + } yield ref + } + + def buildRegistry[F[_]: Async](configs: List[EnrichmentConf]) = + EnrichmentRegistry.build[F](configs).value.flatMap { + case Right(reg) => Async[F].pure(reg) + case Left(error) => Async[F].raiseError[EnrichmentRegistry[F]](new RuntimeException(error)) + } + } + + /** Schema for all enrichments combined */ + val EnrichmentsKey: SchemaKey = + SchemaKey("com.snowplowanalytics.snowplow", "enrichments", "jsonschema", SchemaVer.Full(1, 0, 0)) + + /** Initialize and allocate all necessary resources */ + def make[F[_]: ConcurrentEffect: ContextShift: Clock: Timer](config: CliConfig): Allocated[F] = + parse[F](config).map { parsedConfigs => + val file = parsedConfigs.configFile + for { + client <- Client.parseDefault[F](parsedConfigs.igluJson).resource + blocker <- Blocker[F] + metrics <- Metrics.resource[F] + rawSource = Source.read[F](blocker, file.auth, file.input) + goodSink <- Sinks.goodSink[F](blocker, file.auth, file.good) + badSink <- Sinks.badSink[F](blocker, file.auth, file.bad) + assets = parsedConfigs.enrichmentConfigs.flatMap(_.filesToCache) + pauseEnrich <- makePause[F] + assets <- Assets.State.make[F](blocker, pauseEnrich, assets) + enrichments <- Enrichments.make[F](parsedConfigs.enrichmentConfigs) + sentry <- file.sentry.map(_.dsn) match { + case Some(dsn) => Resource.liftF[F, Option[SentryClient]](Sync[F].delay(Sentry.init(dsn.toString).some)) + case None => Resource.pure[F, Option[SentryClient]](none[SentryClient]) + } + _ <- Resource.liftF(pauseEnrich.set(false) *> Logger[F].info("Enrich environment initialized")) + } yield Environment[F](client, + enrichments, + pauseEnrich, + assets, + blocker, + rawSource, + goodSink, + badSink, + sentry, + metrics, + file.assetsUpdatePeriod, + file.metricsReportPeriod + ) + } + + /** + * Make sure `enrichPause` gets into paused state before destroying pipes + * Initialised into `true` because enrich stream should not start until + * [[Assets.State]] is constructed - it will download all assets + */ + def makePause[F[_]: Concurrent]: Resource[F, SignallingRef[F, Boolean]] = + Resource.make(SignallingRef(true))(_.set(true)) + + /** Decode base64-encoded configs, passed via CLI. Read files, validate and parse */ + def parse[F[_]: Async: Clock: ContextShift](config: CliConfig): Parsed[F, ParsedConfigs] = + for { + igluJson <- config.resolver.fold(b => EitherT.rightT[F, String](b.value), p => FileSystem.readJson[F](p)) + enrichmentJsons <- config.enrichments match { + case Left(base64) => + EitherT.rightT[F, String](base64.value) + case Right(path) => + FileSystem + .readJsonDir[F](path) + .map(jsons => Json.arr(jsons: _*)) + .map(json => SelfDescribingData(EnrichmentsKey, json).asJson) + } + configFile <- ConfigFile.parse[F](config.config) + client <- Client.parseDefault[F](igluJson).leftMap(x => show"Cannot decode Iglu Client. $x") + _ <- EitherT.liftF( + Logger[F].info(show"Parsed Iglu Client with following registries: ${client.resolver.repos.map(_.config.name).mkString(", ")}") + ) + configs <- EitherT(EnrichmentRegistry.parse[F](enrichmentJsons, client, false).map(_.toEither)).leftMap { x => + show"Cannot decode enrichments ${x.mkString_(", ")}" + } + _ <- EitherT.liftF(Logger[F].info(show"Parsed following enrichments: ${configs.map(_.schemaKey.name).mkString(", ")}")) + } yield ParsedConfigs(igluJson, configs, configFile) + + private[fs2] final case class ParsedConfigs( + igluJson: Json, + enrichmentConfigs: List[EnrichmentConf], + configFile: ConfigFile + ) + + private implicit class EitherTOps[F[_], E: Show, A](eitherT: EitherT[F, E, A]) { + def resource(implicit F: Sync[F]): Resource[F, A] = { + val action: F[A] = eitherT.value.flatMap { + case Right(a) => Sync[F].pure(a) + case Left(error) => Sync[F].raiseError(new RuntimeException(error.show)) // Safe since we already parsed it + } + Resource.liftF[F, A](action) + } + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Main.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Main.scala new file mode 100644 index 000000000..497192676 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Main.scala @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import cats.syntax.flatMap._ +import cats.effect.{ExitCode, IO, IOApp} + +import _root_.io.sentry.SentryClient + +import _root_.io.chrisdavenport.log4cats.Logger +import _root_.io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +import com.snowplowanalytics.snowplow.enrich.fs2.io.Metrics + +object Main extends IOApp { + + private implicit val logger: Logger[IO] = + Slf4jLogger.getLogger[IO] + + def run(args: List[String]): IO[ExitCode] = + config.CliConfig.command.parse(args) match { + case Right(cfg) => + for { + _ <- logger.info("Initialising resources for Enrich job") + environment <- Environment.make[IO](cfg).value + exit <- environment match { + case Right(e) => + e.use { env => + val log = logger.info("Running enrichment stream") + val enrich = Enrich.run[IO](env) + val updates = Assets.run[IO](env) + val reporting = Metrics.run[IO](env) + val flow = enrich.merge(updates).merge(reporting) + log >> flow.compile.drain.attempt.flatMap { + case Left(exception) => + unsafeSendSentry(exception, env.sentry) + IO.raiseError[ExitCode](exception).as(ExitCode.Error) + case Right(_) => + IO.pure(ExitCode.Success) + } + } + case Left(error) => + logger.error(s"Cannot initialise enrichment resources\n$error").as(ExitCode.Error) + } + } yield exit + case Left(error) => + IO(System.err.println(error)).as(ExitCode.Error) + } + + /** Last attempt to notify about an exception (possibly just interruption) */ + private def unsafeSendSentry(error: Throwable, sentry: Option[SentryClient]): Unit = { + sentry match { + case Some(client) => + client.sendException(error) + case None => () + } + logger.error(s"The Enrich job has stopped ${sentry.fold("")(_ => "Sentry report has been sent")}").unsafeRunSync() + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Payload.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Payload.scala new file mode 100644 index 000000000..10bd2b725 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Payload.scala @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import scala.annotation.tailrec + +import cats.Applicative +import cats.syntax.either._ +import cats.data.Validated + +import fs2.{Pure, Stream} + +import com.snowplowanalytics.snowplow.enrich.fs2.Payload.Parsed + +/** + * Anything that has been read from [[RawSource]] and needs to be acknowledged + * or a derivative (parsed `A`) that can be used to acknowledge the original message + * @param data original data or anything it has been transformed to + * @param finalise a side-effect to acknowledge (commit, log on-finish) the message or + * no-op in case the original message has been flattened into + * multiple rows and only last row contains the actual side-effect + */ +case class Payload[F[_], A](data: A, finalise: F[Unit]) { + + /** + * Flatten all payloads from a list and replace an `ack` action to no-op everywhere + * except last message, so that original collector payload (with multiple events) + * will be ack'ed only when last event has sunk into good or bad sink + */ + def decompose[L, R](implicit ev: A <:< List[Validated[L, R]], F: Applicative[F]): Stream[F, Parsed[F, L, R]] = { + val _ = ev + val noop: F[Unit] = Applicative[F].unit + def use(op: F[Unit])(v: Validated[L, R]): Parsed[F, L, R] = + v.fold(a => Payload(a, op).asLeft, b => Payload(b, op).asRight) + + Payload.mapWithLast(use(noop), use(finalise))(data) + } +} + +object Payload { + + /** + * Original [[Payload]] that has been transformed into either `A` or `B` + * Despite of the result (`A` or `B`) the original one still has to be acknowledged + * + * If original contained only one row (good or bad), the `Parsed` must have a real + * `ack` action, otherwise if it has been accompanied by other rows, only the last + * element from the original will contain the `ack`, all others just `noop` + */ + type Parsed[F[_], A, B] = Either[Payload[F, A], Payload[F, B]] + + /** Apply `f` function to all elements in a list, except last one, where `lastF` applied */ + def mapWithLast[A, B](f: A => B, lastF: A => B)(as: List[A]): Stream[Pure, B] = { + @tailrec + def go(aas: List[A], accum: Vector[B]): Vector[B] = + aas match { + case Nil => + accum + case last :: Nil => + accum :+ lastF(last) + case a :: remaining => + go(remaining, accum :+ f(a)) + } + + Stream.emits(go(as, Vector.empty)) + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Hocon.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Hocon.scala new file mode 100644 index 000000000..e40037ba3 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Hocon.scala @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.util.Base64 + +import cats.data.ValidatedNel +import cats.syntax.either._ + +import com.typesafe.config.{ConfigException, ConfigFactory} + +import _root_.io.circe.Json + +import pureconfig.syntax._ +import pureconfig.module.circe._ + +import com.monovore.decline.Argument + +final case class Base64Hocon(value: Json) extends AnyVal + +object Base64Hocon { + + private val base64 = Base64.getDecoder + + implicit val base64Hocon: Argument[Base64Hocon] = + new Argument[Base64Hocon] { + def read(string: String): ValidatedNel[String, Base64Hocon] = { + val result = for { + bytes <- Either.catchOnly[IllegalArgumentException](base64.decode(string)).leftMap(_.getMessage) + hocon <- parseHocon(new String(bytes)) + } yield hocon + result.toValidatedNel + } + + def defaultMetavar: String = "base64" + } + + def parseHocon(str: String): Either[String, Base64Hocon] = + for { + configValue <- Either.catchOnly[ConfigException](ConfigFactory.parseString(str)).leftMap(_.toString).map(_.toConfig) + json <- configValue.to[Json].leftMap(_.toString) + } yield Base64Hocon(json) +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Json.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Json.scala new file mode 100644 index 000000000..ab8fc4879 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Json.scala @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.util.Base64 + +import cats.data.ValidatedNel +import cats.syntax.show._ +import cats.syntax.either._ + +import _root_.io.circe.Json +import _root_.io.circe.parser.parse + +import com.monovore.decline.Argument + +final case class Base64Json(value: Json) extends AnyVal + +object Base64Json { + + private val base64 = Base64.getDecoder + + implicit val base64Json: Argument[Base64Json] = + new Argument[Base64Json] { + + def read(string: String): ValidatedNel[String, Base64Json] = { + val result = for { + bytes <- Either.catchOnly[IllegalArgumentException](base64.decode(string)).leftMap(_.getMessage) + str = new String(bytes) + json <- parse(str).leftMap(_.show) + } yield Base64Json(json) + result.toValidatedNel + } + + def defaultMetavar: String = "base64" + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfig.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfig.scala new file mode 100644 index 000000000..d15a00dab --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfig.scala @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.nio.file.Path + +import cats.data.{NonEmptyList, ValidatedNel} +import cats.implicits._ + +import com.monovore.decline.{Argument, Command, Opts} + +import com.snowplowanalytics.snowplow.enrich.fs2.generated.BuildInfo + +final case class CliConfig( + config: EncodedHoconOrPath, + resolver: EncodedOrPath, + enrichments: EncodedOrPath +) + +object CliConfig { + + implicit val encodedOrPathArgument: Argument[EncodedOrPath] = + new Argument[EncodedOrPath] { + def read(string: String): ValidatedNel[String, EncodedOrPath] = { + val encoded = Argument[Base64Json].read(string).map(_.asLeft) + val path = Argument[Path].read(string).map(_.asRight) + val error = show"Value $string cannot be parsed as Base64 JSON neither as FS path" + encoded.orElse(path).leftMap(_ => NonEmptyList.one(error)) + } + + def defaultMetavar: String = "input" + } + + implicit val encodedHoconOrPathArgument: Argument[EncodedHoconOrPath] = + new Argument[EncodedHoconOrPath] { + def read(string: String): ValidatedNel[String, EncodedHoconOrPath] = { + val encoded = Argument[Base64Hocon].read(string).map(_.asLeft) + val path = Argument[Path].read(string).map(_.asRight) + val error = show"Value $string cannot be parsed as Base64 JSON neither as FS path" + encoded.orElse(path).leftMap(_ => NonEmptyList.one(error)) + } + + def defaultMetavar: String = "input" + } + + val configFile: Opts[EncodedHoconOrPath] = + Opts.option[EncodedHoconOrPath]("config", "Base64-encoded HOCON string with enrichment configurations", "c", "base64") + + val enrichments: Opts[EncodedOrPath] = + Opts.option[EncodedOrPath]("enrichments", "Base64-encoded JSON string with enrichment configurations", "e", "base64") + + val igluConfig: Opts[EncodedOrPath] = + Opts.option[EncodedOrPath]("iglu-config", "Iglu resolver configuration JSON", "r", "base64") + + val enrichedJobConfig: Opts[CliConfig] = + (configFile, igluConfig, enrichments).mapN(CliConfig.apply) + + val command: Command[CliConfig] = + Command(show"${BuildInfo.name}", show"${BuildInfo.name} ${BuildInfo.version}\n${BuildInfo.description}")(enrichedJobConfig) +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFile.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFile.scala new file mode 100644 index 000000000..21ba8d8e6 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFile.scala @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import scala.concurrent.duration.FiniteDuration + +import cats.data.EitherT +import cats.implicits._ + +import cats.effect.{Blocker, ContextShift, Sync} + +import _root_.io.circe.{Decoder, Encoder, Json} +import _root_.io.circe.config.syntax._ +import _root_.io.circe.generic.extras.semiauto.{deriveConfiguredDecoder, deriveConfiguredEncoder} + +import com.snowplowanalytics.snowplow.enrich.fs2.config.io.{Authentication, Input, Output} + +import pureconfig.ConfigSource +import pureconfig.module.catseffect.syntax._ +import pureconfig.module.circe._ + +/** + * Parsed HOCON configuration file + * + * @param auth authentication details, such as credentials + * @param input input (PubSub, Kinesis etc) + * @param good good enriched output (PubSub, Kinesis, FS etc) + * @param bad bad rows output (PubSub, Kinesis, FS etc) + * @param assetsUpdatePeriod time after which assets should be updated, in minutes + */ +final case class ConfigFile( + auth: Authentication, + input: Input, + good: Output, + bad: Output, + assetsUpdatePeriod: Option[FiniteDuration], + sentry: Option[Sentry], + metricsReportPeriod: Option[FiniteDuration] +) + +object ConfigFile { + + // Missing in circe-config + implicit val finiteDurationEncoder: Encoder[FiniteDuration] = + implicitly[Encoder[String]].contramap(_.toString) + + implicit val configFileDecoder: Decoder[ConfigFile] = + deriveConfiguredDecoder[ConfigFile].emap { + case ConfigFile(_, _, _, _, Some(aup), _, _) if aup._1 <= 0L => + "assetsUpdatePeriod in config file cannot be less than 0".asLeft // TODO: use newtype + case ConfigFile(_, _, _, _, _, _, Some(mrp)) if mrp._1 <= 0L => + "metricsReportPeriod in config file cannot be less than 0".asLeft + case other => other.asRight + } + implicit val configFileEncoder: Encoder[ConfigFile] = + deriveConfiguredEncoder[ConfigFile] + + def parse[F[_]: Sync: ContextShift](in: EncodedHoconOrPath): EitherT[F, String, ConfigFile] = + in match { + case Right(path) => + val result = Blocker[F].use { blocker => + ConfigSource + .default(ConfigSource.file(path)) + .loadF[F, Json](blocker) + .map(_.as[ConfigFile].leftMap(f => show"Couldn't parse the config $f")) + } + result.attemptT.leftMap(_.getMessage).subflatMap(identity) + case Left(encoded) => + EitherT.fromEither[F](encoded.value.as[ConfigFile].leftMap(failure => show"Couldn't parse a base64-encoded config file:\n$failure")) + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Sentry.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Sentry.scala new file mode 100644 index 000000000..3dbe4e6fc --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Sentry.scala @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.net.URI + +import cats.syntax.either._ + +import _root_.io.circe.{Decoder, Encoder} +import _root_.io.circe.generic.extras.semiauto._ + +case class Sentry(dsn: URI) + +object Sentry { + + implicit val javaNetUriDecoder: Decoder[URI] = + Decoder[String].emap { str => + Either.catchOnly[IllegalArgumentException](URI.create(str)).leftMap(_.getMessage) + } + + implicit val javaNetUriEncoder: Encoder[URI] = + Encoder[String].contramap(_.toString) + + implicit val authenticationDecoder: Decoder[Sentry] = + deriveConfiguredDecoder[Sentry] + implicit val authenticationEncoder: Encoder[Sentry] = + deriveConfiguredEncoder[Sentry] +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/io.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/io.scala new file mode 100644 index 000000000..7cb28bcb6 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/io.scala @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.nio.file.{InvalidPathException, Path, Paths} + +import cats.syntax.either._ + +import _root_.io.circe.{Decoder, Encoder} +import _root_.io.circe.generic.extras.semiauto._ + +object io { + + implicit val javaPathDecoder: Decoder[Path] = + Decoder[String].emap { s => + Either.catchOnly[InvalidPathException](Paths.get(s)).leftMap(_.getMessage) + } + implicit val javaPathEncoder: Encoder[Path] = + Encoder[String].contramap(_.toString) + + sealed trait Authentication extends Product with Serializable + + object Authentication { + case object Gcp extends Authentication + + implicit val authenticationDecoder: Decoder[Authentication] = + deriveConfiguredDecoder[Authentication] + implicit val authenticationEncoder: Encoder[Authentication] = + deriveConfiguredEncoder[Authentication] + } + + /** Source of raw collector data (only PubSub supported atm) */ + sealed trait Input + + object Input { + + case class PubSub private (subscription: String) extends Input { + val (project, name) = + subscription.split("/").toList match { + case List("projects", project, "subscriptions", name) => + (project, name) + case _ => + throw new IllegalArgumentException(s"Cannot construct Input.PubSub from $subscription") + } + } + case class FileSystem(dir: Path) extends Input + + implicit val inputDecoder: Decoder[Input] = + deriveConfiguredDecoder[Input].emap { + case s @ PubSub(sub) => + sub.split("/").toList match { + case List("projects", _, "subscriptions", _) => + s.asRight + case _ => + s"Subscription must conform projects/project-name/subscriptions/subscription-name format, $s given".asLeft + } + case other => other.asRight + } + implicit val inputEncoder: Encoder[Input] = + deriveConfiguredEncoder[Input] + } + + sealed trait Output + + object Output { + case class PubSub private (topic: String) extends Output { + val (project, name) = + topic.split("/").toList match { + case List("projects", project, "topics", name) => + (project, name) + case _ => + throw new IllegalArgumentException(s"Cannot construct Output.PubSub from $topic") + } + } + case class FileSystem(dir: Path) extends Output + + implicit val outputDecoder: Decoder[Output] = + deriveConfiguredDecoder[Output].emap { + case s @ PubSub(top) => + top.split("/").toList match { + case List("projects", _, "topics", _) => + s.asRight + case _ => + s"Topic must conform projects/project-name/topics/topic-name format, $s given".asLeft + } + case other => other.asRight + } + implicit val outputEncoder: Encoder[Output] = + deriveConfiguredEncoder[Output] + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/package.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/package.scala new file mode 100644 index 000000000..543b9214b --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/package.scala @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.nio.file.Path + +import _root_.io.circe.generic.extras.Configuration + +package object config { + + type EncodedOrPath = Either[Base64Json, Path] + type EncodedHoconOrPath = Either[Base64Hocon, Path] + + private[config] implicit def customCodecConfig: Configuration = + Configuration.default.withDiscriminator("type") + +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Clients.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Clients.scala new file mode 100644 index 000000000..1496217ce --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Clients.scala @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.io + +import java.net.URI + +import cats.syntax.option._ +import cats.syntax.functor._ +import cats.syntax.flatMap._ + +import cats.effect.{Blocker, ConcurrentEffect, ContextShift, Resource, Sync} + +import fs2.{RaiseThrowable, Stream} + +import blobstore.Path +import blobstore.s3.S3Store +import blobstore.gcs.GcsStore + +import com.google.cloud.storage.StorageOptions + +import org.http4s.{Request, Uri} +import org.http4s.client.{Client => HttpClient} +import org.http4s.client.blaze.BlazeClientBuilder + +import software.amazon.awssdk.services.s3.S3AsyncClient + +case class Clients[F[_]]( + s3Store: Option[S3Store[F]], + gcsStore: Option[GcsStore[F]], + http: Option[HttpClient[F]] +) { + + /** Download an `uri` as a stream of bytes, using the appropriate client */ + def download(uri: URI)(implicit RT: RaiseThrowable[F]): Stream[F, Byte] = + Clients.Client.getByUri(uri) match { + case Some(Clients.Client.S3) => + for { + s3 <- s3Store match { + case Some(c) => Stream.emit(c) + case None => Stream.raiseError(new IllegalStateException(s"S3 client is not initialized to download $uri")) + } + data <- s3.get(Path(uri.toString), 16 * 1024) + } yield data + case Some(Clients.Client.GCS) => + for { + gcs <- gcsStore match { + case Some(c) => Stream.emit(c) + case None => Stream.raiseError(new IllegalStateException(s"GCS client is not initialized to download $uri")) + } + data <- gcs.get(Path(uri.toString), 16 * 1024) + } yield data + case Some(Clients.Client.HTTP) => + http match { + case Some(c) => + val request = Request[F](uri = Uri.unsafeFromString(uri.toString)) + for { + response <- c.stream(request) + body <- if (response.status.isSuccess) response.body + else Stream.raiseError[F](Clients.DownloadingFailure(uri)) + } yield body + case None => + Stream.raiseError(new IllegalStateException(s"HTTP client is not initialized to download $uri")) + } + case None => + Stream.raiseError(new IllegalStateException(s"No client initialized to download $uri")) + } +} + +object Clients { + + sealed trait Client + object Client { + case object S3 extends Client + case object GCS extends Client + case object HTTP extends Client + + def getByUri(uri: URI): Option[Client] = + uri.getScheme match { + case "http" | "https" => + Some(HTTP) + case "gs" => + Some(GCS) + case "s3" => + Some(S3) + case _ => + None + } + + def required(uris: List[URI]): Set[Client] = + uris.foldLeft(Set.empty[Client]) { (acc, uri) => + getByUri(uri) match { + case Some(client) => acc + client + case None => acc // This should short-circuit on initialisation + } + } + } + + def mkS3[F[_]: ConcurrentEffect]: F[S3Store[F]] = + Sync[F].delay(S3AsyncClient.builder().build()).flatMap(client => S3Store[F](client)) + + def mkGCS[F[_]: ConcurrentEffect: ContextShift](blocker: Blocker): F[GcsStore[F]] = + Sync[F].delay(StorageOptions.getDefaultInstance.getService).map { storage => + GcsStore(storage, blocker, List.empty) + } + + def mkHTTP[F[_]: ConcurrentEffect]: Resource[F, HttpClient[F]] = + BlazeClientBuilder[F](concurrent.ExecutionContext.global).resource + + /** Initialise all necessary clients capable of fetching provides `uris` */ + def make[F[_]: ConcurrentEffect: ContextShift](blocker: Blocker, uris: List[URI]): Resource[F, Clients[F]] = { + val toInit = Client.required(uris) + for { + s3 <- if (toInit.contains(Client.S3)) Resource.liftF(mkS3[F]).map(_.some) else Resource.pure[F, Option[S3Store[F]]](none) + gcs <- if (toInit.contains(Client.GCS)) Resource.liftF(mkGCS[F](blocker).map(_.some)) else Resource.pure[F, Option[GcsStore[F]]](none) + http <- if (toInit.contains(Client.HTTP)) mkHTTP[F].map(_.some) else Resource.pure[F, Option[HttpClient[F]]](none) + } yield Clients(s3, gcs, http) + } + + case class DownloadingFailure(uri: URI) extends Throwable { + override def getMessage: String = s"Cannot download $uri" + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/FileSystem.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/FileSystem.scala new file mode 100644 index 000000000..58a80b3aa --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/FileSystem.scala @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.io + +import java.nio.file.{Files, Path} + +import scala.collection.JavaConverters._ + +import cats.data.EitherT + +import cats.effect.Sync +import cats.implicits._ + +import fs2.Stream + +import _root_.io.circe.Json +import _root_.io.circe.parser.parse + +import io.chrisdavenport.log4cats.Logger +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +object FileSystem { + + private implicit def unsafeLogger[F[_]: Sync]: Logger[F] = + Slf4jLogger.getLogger[F] + + def list[F[_]: Sync](dir: Path): Stream[F, Path] = + for { + paths <- Stream.eval(Sync[F].delay(Files.list(dir))) + path <- Stream.fromIterator(paths.iterator().asScala) + } yield path + + def readJson[F[_]: Sync](path: Path): EitherT[F, String, Json] = + Sync[F] + .delay[String](Files.readString(path)) + .attemptT + .leftMap(e => show"Error reading ${path.toAbsolutePath.toString} JSON file from filesystem: ${e.getMessage}") + .subflatMap(str => parse(str).leftMap(e => show"Cannot parse JSON in ${path.toAbsolutePath.toString}: ${e.getMessage()}")) + + def readJsonDir[F[_]: Sync](dir: Path): EitherT[F, String, List[Json]] = + list(dir).compile.toList.attemptT + .leftMap(e => show"Cannot list ${dir.toAbsolutePath.toString} directory with JSON: ${e.getMessage}") + .flatMap { paths => + EitherT.liftF[F, String, Unit](Logger[F].info(s"Files found in $dir: ${paths.mkString(", ")}")) *> + paths.filter(_.toString.endsWith(".json")).traverse(readJson[F]) + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Metrics.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Metrics.scala new file mode 100644 index 000000000..18f189858 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Metrics.scala @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.io + +import cats.syntax.applicativeError._ +import cats.effect.{Resource, Sync, Timer} + +import fs2.Stream + +import com.codahale.metrics.{Gauge, MetricRegistry, Slf4jReporter} + +import org.slf4j.LoggerFactory + +import com.snowplowanalytics.snowplow.enrich.fs2.Environment + +trait Metrics[F[_]] { + + /** Send latest metrics to reporter */ + def report: F[Unit] + + /** + * Track latency between collector hit and enrichment + * This function gets current timestamp by itself + */ + def enrichLatency(collectorTstamp: Option[Long]): F[Unit] + + /** Increment raw payload count */ + def rawCount: F[Unit] + + /** Increment good enriched events */ + def goodCount: F[Unit] + + /** Increment bad events */ + def badCount: F[Unit] +} + +object Metrics { + + val LoggerName = "enrich.metrics" + val LatencyGaugeName = "enrich.metrics.latency" + val RawCounterName = "enrich.metrics.raw.count" + val GoodCounterName = "enrich.metrics.good.count" + val BadCounterName = "enrich.metrics.bad.count" + + def run[F[_]: Sync: Timer](env: Environment[F]): Stream[F, Unit] = + env.metricsReportPeriod match { + case Some(period) => + Stream.fixedRate[F](period).evalMap(_ => env.metrics.report) + case None => + Stream.empty.covary[F] + } + + /** + * Technically `Resource` doesn't give us much as we don't allocate a thread pool, + * but it will make sure the last report is issued + */ + def resource[F[_]: Sync]: Resource[F, Metrics[F]] = + Resource + .make(init) { case (res, _) => Sync[F].delay(res.close()) } + .map { case (res, reg) => make[F](res, reg) } + + /** Initialise backend resources */ + def init[F[_]: Sync]: F[(Slf4jReporter, MetricRegistry)] = + Sync[F].delay { + val registry = new MetricRegistry() + val logger = LoggerFactory.getLogger(LoggerName) + val reporter = Slf4jReporter.forRegistry(registry).outputTo(logger).build() + (reporter, registry) + } + + def make[F[_]: Sync](reporter: Slf4jReporter, registry: MetricRegistry): Metrics[F] = + new Metrics[F] { + val rawCounter = registry.counter(RawCounterName) + val goodCounter = registry.counter(GoodCounterName) + val badCounter = registry.counter(BadCounterName) + + def report: F[Unit] = + Sync[F].delay(reporter.report()) + + def enrichLatency(collectorTstamp: Option[Long]): F[Unit] = + collectorTstamp match { + case Some(tstamp) => + Sync[F] + .delay { + registry.remove(LatencyGaugeName) + val now = System.currentTimeMillis() + val _ = registry.register(LatencyGaugeName, getGauge(now, tstamp)) + } + .handleError { + // Two threads can run into a race condition registering a gauge + case _: IllegalArgumentException => () + } + case None => + Sync[F].unit + } + + def rawCount: F[Unit] = + Sync[F].delay(rawCounter.inc()) + + def goodCount: F[Unit] = + Sync[F].delay(goodCounter.inc()) + + def badCount: F[Unit] = + Sync[F].delay(badCounter.inc()) + + private def getGauge(now: Long, collectorTstamp: Long): Gauge[Long] = + new Gauge[Long] { + def getValue: Long = now - collectorTstamp + } + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Sinks.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Sinks.scala new file mode 100644 index 000000000..230c9dd6d --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Sinks.scala @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.io + +import java.nio.file.{Path, StandardOpenOption} + +import scala.concurrent.duration._ + +import cats.syntax.flatMap._ +import cats.syntax.functor._ + +import cats.effect.{Blocker, Concurrent, ContextShift, Resource, Sync} + +import fs2.{Pipe, Stream, text} +import fs2.io.file.writeAll + +import io.chrisdavenport.log4cats.Logger +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +import com.permutive.pubsub.producer.Model.{ProjectId, Topic} +import com.permutive.pubsub.producer.encoder.MessageEncoder +import com.permutive.pubsub.producer.grpc.{GooglePubsubProducer, PubsubProducerConfig} + +import com.snowplowanalytics.snowplow.badrows.BadRow + +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + +import com.snowplowanalytics.snowplow.enrich.fs2.{BadSink, Enrich, GoodSink, Payload} +import com.snowplowanalytics.snowplow.enrich.fs2.config.io.{Authentication, Output} + +object Sinks { + + /** + * Set the delay threshold to use for batching. After this amount of time has elapsed (counting + * from the first element added), the elements will be wrapped up in a batch and sent. This + * value should not be set too high, usually on the order of milliseconds. Otherwise, calls + * might appear to never complete. + */ + val DelayThreshold: FiniteDuration = 200.milliseconds + + private implicit def unsafeLogger[F[_]: Sync]: Logger[F] = + Slf4jLogger.getLogger[F] + + def goodSink[F[_]: Concurrent: ContextShift]( + blocker: Blocker, + auth: Authentication, + output: Output + ): Resource[F, GoodSink[F]] = + (auth, output) match { + case (Authentication.Gcp, o: Output.PubSub) => + pubsubSink[F, EnrichedEvent](o) + case (_, o: Output.FileSystem) => + Resource.pure(goodFileSink(o.dir, blocker)) + } + + def badSink[F[_]: Concurrent: ContextShift]( + blocker: Blocker, + auth: Authentication, + output: Output + ): Resource[F, BadSink[F]] = + (auth, output) match { + case (Authentication.Gcp, o: Output.PubSub) => + pubsubSink[F, BadRow](o) + case (_, o: Output.FileSystem) => + Resource.pure(badFileSink(o.dir, blocker)) + } + + def pubsubSink[F[_]: Concurrent, A: MessageEncoder]( + output: Output.PubSub + ): Resource[F, Pipe[F, Payload[F, A], Unit]] = { + val config = PubsubProducerConfig[F]( + batchSize = 5, + delayThreshold = DelayThreshold, + onFailedTerminate = err => Logger[F].error(err)("PubSub sink termination error") + ) + + GooglePubsubProducer + .of[F, A](ProjectId(output.project), Topic(output.name), config) + .map(producer => + (s: Stream[F, Payload[F, A]]) => s.parEvalMapUnordered(Enrich.ConcurrencyLevel)(row => producer.produce(row.data) >> row.finalise) + ) + } + + def goodFileSink[F[_]: Sync: ContextShift](goodOut: Path, blocker: Blocker): GoodSink[F] = + goodStream => + goodStream + .evalMap(p => p.finalise.as(Enrich.encodeEvent(p.data))) + .intersperse("\n") + .through(text.utf8Encode) + .through(writeAll[F](goodOut, blocker, List(StandardOpenOption.CREATE_NEW))) + + def badFileSink[F[_]: Sync: ContextShift](badOut: Path, blocker: Blocker): BadSink[F] = + badStream => + badStream + .evalMap(p => p.finalise.as(p.data.compact)) + .intersperse("\n") + .through(text.utf8Encode) + .through(writeAll[F](badOut, blocker, List(StandardOpenOption.CREATE_NEW))) +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Source.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Source.scala new file mode 100644 index 000000000..ce9b734bb --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Source.scala @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.io + +import cats.effect.{Blocker, Concurrent, ContextShift, Sync} +import cats.implicits._ + +import fs2.Stream +import fs2.io.file.{directoryStream, readAll} + +import com.permutive.pubsub.consumer.Model +import com.permutive.pubsub.consumer.grpc.{PubsubGoogleConsumer, PubsubGoogleConsumerConfig} + +import com.snowplowanalytics.snowplow.enrich.fs2.{Payload, RawSource} +import com.snowplowanalytics.snowplow.enrich.fs2.config.io.{Authentication, Input} + +import com.google.pubsub.v1.PubsubMessage + +object Source { + + def read[F[_]: Concurrent: ContextShift]( + blocker: Blocker, + auth: Authentication, + input: Input + ): RawSource[F] = + (auth, input) match { + case (Authentication.Gcp, p: Input.PubSub) => + pubSub(blocker, p) + case (_, p: Input.FileSystem) => + directoryStream(blocker, p.dir).evalMap { file => + readAll[F](file, blocker, 4096).compile + .to(Array) + .map(bytes => Payload(bytes, Sync[F].unit)) + } + } + + def pubSub[F[_]: Concurrent: ContextShift]( + blocker: Blocker, + input: Input.PubSub + ): Stream[F, Payload[F, Array[Byte]]] = { + val onFailedTerminate: Throwable => F[Unit] = + e => Sync[F].delay(System.err.println(s"Cannot terminate ${e.getMessage}")) + val pubSubConfig = PubsubGoogleConsumerConfig(onFailedTerminate = onFailedTerminate) + val projectId = Model.ProjectId(input.project) + val subscriptionId = Model.Subscription(input.name) + val errorHandler: (PubsubMessage, Throwable, F[Unit], F[Unit]) => F[Unit] = // Should be useless + (message, error, _, _) => + Sync[F].delay(System.err.println(s"Cannot decode message ${message.getMessageId} into array of bytes. ${error.getMessage}")) + PubsubGoogleConsumer + .subscribe[F, Array[Byte]](blocker, projectId, subscriptionId, errorHandler, pubSubConfig) + .map(record => Payload(record.value, record.ack)) + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/package.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/package.scala new file mode 100644 index 000000000..9ad3e5eca --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/package.scala @@ -0,0 +1,31 @@ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import cats.syntax.either._ + +import com.permutive.pubsub.consumer.decoder.MessageDecoder +import com.permutive.pubsub.producer.encoder.MessageEncoder + +import com.snowplowanalytics.snowplow.badrows.BadRow + +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + +package object io { + + implicit val badRowEncoder: MessageEncoder[BadRow] = + new MessageEncoder[BadRow] { + def encode(a: BadRow): Either[Throwable, Array[Byte]] = + a.compact.getBytes.asRight + } + + implicit val enrichedEventEncoder: MessageEncoder[EnrichedEvent] = + new MessageEncoder[EnrichedEvent] { + def encode(enrichedEvent: EnrichedEvent): Either[Throwable, Array[Byte]] = + Enrich.encodeEvent(enrichedEvent).getBytes.asRight + } + + implicit val byteArrayMessageDecoder: MessageDecoder[Array[Byte]] = + new MessageDecoder[Array[Byte]] { + def decode(message: Array[Byte]): Either[Throwable, Array[Byte]] = + message.asRight + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/package.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/package.scala new file mode 100644 index 000000000..57d85526b --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/package.scala @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich + +import cats.data.Validated + +import _root_.fs2.{Pipe, Stream} + +import com.snowplowanalytics.snowplow.badrows.BadRow + +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + +package object fs2 { + + /** Raw Thrift payloads coming from a collector */ + type RawSource[F[_]] = Stream[F, Payload[F, Array[Byte]]] + + type BadSink[F[_]] = Pipe[F, Payload[F, BadRow], Unit] + type GoodSink[F[_]] = Pipe[F, Payload[F, EnrichedEvent], Unit] + + /** Enrichment result, containing list of (valid and invalid) results */ + type Result[F[_]] = Payload[F, List[Validated[BadRow, EnrichedEvent]]] + + /** Function to transform an origin raw payload into good and/or bad rows */ + type Enrich[F[_]] = Payload[F, Array[Byte]] => F[Result[F]] +} diff --git a/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-1.mmdb b/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-1.mmdb new file mode 100644 index 000000000..902c8d78d Binary files /dev/null and b/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-1.mmdb differ diff --git a/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-2.mmdb b/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-2.mmdb new file mode 100644 index 000000000..e76492b80 Binary files /dev/null and b/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-2.mmdb differ diff --git a/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-3.mmdb b/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-3.mmdb new file mode 100644 index 000000000..0a9098648 Binary files /dev/null and b/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-3.mmdb differ diff --git a/modules/fs2/src/test/resources/simplelogger.properties b/modules/fs2/src/test/resources/simplelogger.properties new file mode 100644 index 000000000..c4ed0bdd7 --- /dev/null +++ b/modules/fs2/src/test/resources/simplelogger.properties @@ -0,0 +1,13 @@ +org.slf4j.simpleLogger.showThreadName=false +org.slf4j.simpleLogger.showDateTime=true + +org.slf4j.simpleLogger.log.org.http4s.blaze.channel.ServerChannel=off +org.slf4j.simpleLogger.log.org.http4s.blaze.channel.nio1.SelectorLoop=off +org.slf4j.simpleLogger.log.org.http4s.blaze.channel.nio1.NIO1SocketServerGroup=off +org.slf4j.simpleLogger.log.org.http4s.client.PoolManager=off +org.slf4j.simpleLogger.log.org.http4s.server.blaze.BlazeServerBuilder=off + +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.Enrich=info +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.Assets=warn +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.TestEnvironmentinfo=info +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.test.HttpServer=info diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/AssetsSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/AssetsSpec.scala new file mode 100644 index 000000000..12c1e6317 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/AssetsSpec.scala @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.net.URI +import java.nio.file.Paths + +import scala.concurrent.duration._ + +import fs2.Stream +import fs2.io.file.{exists, readAll} + +import cats.effect.{Blocker, Concurrent, ContextShift, IO, Resource, Timer} + +import com.snowplowanalytics.snowplow.enrich.fs2.test._ +import com.snowplowanalytics.snowplow.enrich.fs2.Assets.Asset + +import org.specs2.ScalaCheck +import org.specs2.mutable.Specification +import cats.effect.testing.specs2.CatsIO + +class AssetsSpec extends Specification with CatsIO with ScalaCheck { + + sequential + + "updateStream" should { + "not set stop signal if no updates required" in + AssetsSpec.run(1.second) { (state, run) => + run(100.millis, List.empty) *> state.pauseEnrich.get.map { pause => + pause must beFalse + } + } + + "download an asset and leave pauseEnrich signal with false" in { + val path = Paths.get("asset") + val input = List( + (URI.create("http://localhost:8080/asset"), path.toString) + ) + AssetsSpec.run(1500.millis) { (state, run) => + for { + assetExistsBefore <- Blocker[IO].use(b => exists[IO](b, path)) + _ <- run(100.millis, input) + pauseEnrich <- state.pauseEnrich.get + assetExists <- Blocker[IO].use(b => exists[IO](b, path)) + } yield { + assetExistsBefore must beFalse // Otherwise previous execution left the file + pauseEnrich must beFalse + assetExists must beTrue + } + } + } + + "set stop signal to true when long downloads are performed" in { + val input = List( + (URI.create("http://localhost:8080/slow"), "asset1"), // First sets stop to true + (URI.create("http://localhost:8080/slow"), "asset2") // Second doesn't allow update to return prematurely + ) + AssetsSpec.run(3.seconds) { (state, run) => + for { + fiber <- (IO.sleep(2.seconds) *> state.pauseEnrich.get).start + _ <- run(500.milliseconds, input) + stop <- fiber.join + } yield stop must beTrue + } + } + + "attempt to re-download non-existing file" in { + val path = Paths.get("flaky-asset") + val input = List( + (URI.create("http://localhost:8080/flaky"), path.toString) + ) + AssetsSpec.run(5.seconds) { (state, run) => + for { + _ <- run(800.millis, input) + stop <- state.pauseEnrich.get + assetExists <- Blocker[IO].use { b => + readAll[IO](path, b, 8).compile.to(Array).map(arr => new String(arr)) + } + } yield { + stop must beFalse + assetExists must beEqualTo("3") + } + } + } + } + + "Hash.fromStream" should { + "always create a valid MD5 hash" in { + prop { (bytes: Array[Byte]) => + val input = Stream.emits(bytes).covary[IO] + Assets.Hash.fromStream(input).map { hash => + hash.s.matches("^[a-f0-9]{32}$") must beTrue + } + } + } + } +} + +object AssetsSpec { + + /** Run assets refresh function with specified refresh interval and list of assets */ + type Run = (FiniteDuration, List[Asset]) => IO[Unit] + + /** + * User-written function to test effects of [[Assets]] stream + * * First argument - state initialised to empty, can be inspected after + * * Second argument - [[Run]] function to specify custom refresh interval and list of assets + */ + type Test[A] = (Assets.State[IO], Run) => IO[A] + + /** + * Run a test with resources allocated specifically for it + * It will allocate thread pool, empty state, HTTP server and will + * automatically remove all files after the test is over + * + * @param time timeout after which the test will be forced to exit + * @param test the actual test suite function + */ + def run[A]( + time: FiniteDuration + )( + test: Test[A] + )( + implicit C: Concurrent[IO], + T: Timer[IO], + CS: ContextShift[IO] + ): IO[A] = { + val resources = for { + blocker <- Blocker[IO] + state <- SpecHelpers.refreshState(List(URI.create("http://localhost:8080") -> "index")) + enrichments <- Environment.Enrichments.make[IO](List()) + path <- Resource.liftF(Assets.getCurDir[IO]) + _ <- SpecHelpers.filesResource(blocker, TestFiles) + } yield (blocker, state, enrichments, path) + + resources.use { + case (blocker, state, enrichments, curDir) => + val testFunction: Run = Assets + .updateStream[IO](blocker, state, enrichments, curDir, _, _) + .withHttp + .haltAfter(time) + .compile + .drain + test(state, testFunction) + } + } + + /** List of local files that have to be deleted after every test */ + private val TestFiles = List( + Paths.get("asset"), + Paths.get("asset1"), + Paths.get("asset2"), + Paths.get("flaky-asset") + ) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/EnrichSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/EnrichSpec.scala new file mode 100644 index 000000000..da5b1fab8 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/EnrichSpec.scala @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.time.Instant +import java.util.UUID + +import scala.concurrent.duration._ + +import cats.Applicative +import cats.data.Validated +import cats.implicits._ + +import cats.effect.IO + +import fs2.Stream + +import _root_.io.circe.literal._ + +import org.apache.http.NameValuePair +import org.apache.http.message.BasicNameValuePair + +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.snowplow.analytics.scalasdk.Event +import com.snowplowanalytics.snowplow.badrows.{Processor, BadRow, Payload => BadRowPayload} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.IpLookupsEnrichment +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + +import com.snowplowanalytics.snowplow.enrich.fs2.EnrichSpec.{Expected, minimalEvent, normalizeResult} +import com.snowplowanalytics.snowplow.enrich.fs2.test._ + +import org.specs2.ScalaCheck +import org.specs2.mutable.Specification + +import cats.effect.testing.specs2.CatsIO + +import org.specs2.scalacheck.Parameters + +class EnrichSpec extends Specification with CatsIO with ScalaCheck { + + sequential + + "enrichWith" should { + "enrich a minimal page_view CollectorPayload event without any enrichments enabled" in { + val expected = minimalEvent + .copy( + etl_tstamp = Some(Instant.ofEpochMilli(SpecHelpers.StaticTime)), + user_ipaddress = Some("175.16.199.0"), + event = Some("page_view"), + event_vendor = Some("com.snowplowanalytics.snowplow"), + event_name = Some("page_view"), + event_format = Some("jsonschema"), + event_version = Some("1-0-0"), + derived_tstamp = Some(Instant.ofEpochMilli(0L)) + ) + + TestEnvironment.ioBlocker.use { blocker => + Enrich + .enrichWith(TestEnvironment.enrichmentReg.pure[IO], blocker, TestEnvironment.igluClient, None, _ => IO.unit)( + EnrichSpec.payload[IO] + ) + .map(normalizeResult) + .map { + case List(Validated.Valid(event)) => event must beEqualTo(expected) + case other => ko(s"Expected one valid event, got $other") + } + } + } + + "enrich a randomly generated page view event" in { + implicit val cpGen = PayloadGen.getPageViewArbitrary + prop { (collectorPayload: CollectorPayload) => + val payload = Payload(collectorPayload.toRaw, IO.unit) + TestEnvironment.ioBlocker.use { blocker => + Enrich + .enrichWith(TestEnvironment.enrichmentReg.pure[IO], blocker, TestEnvironment.igluClient, None, _ => IO.unit)(payload) + .map(normalizeResult) + .map { + case List(Validated.Valid(e)) => e.event must beSome("page_view") + case other => ko(s"Expected one valid event, got $other") + } + } + }.setParameters(Parameters(maxSize = 20, minTestsOk = 25)) + } + } + + "enrich" should { + "update metrics with raw, good and bad counters" in { + val input = Stream(Payload(Array.empty[Byte], IO.unit), EnrichSpec.payload[IO]) + TestEnvironment.make(input).use { test => + val enrichStream = Enrich.run[IO](test.env) + val rows = test.bad.dequeue + .either(test.good.dequeue) + .concurrently(enrichStream) + .haltAfter(1.second) + for { + _ <- test.env.pauseEnrich.set(false) + payloads <- rows.compile.toList + _ <- IO.sleep(100.millis) + counter <- test.counter.get + } yield { + counter mustEqual Counter(2L, 1L, 1L, None) + payloads must be like { + case List(Left(_), Right(_)) => ok + case List(Right(_), Left(_)) => ok + case other => ko(s"Expected one bad and one good row, got $other") + } + } + } + } + + "enrich event using refreshing MaxMind DB" in { + // 4 enrichments can update assets: MaxMind, IAB, referer-parser, ua-parser + val input = Stream(EnrichSpec.payload[IO]) ++ Stream.sleep_(2.seconds) ++ Stream(EnrichSpec.payload[IO]) + val ipLookupsConf = IpLookupsEnrichment + .parse( + json"""{ + "name": "ip_lookups", + "vendor": "com.snowplowanalytics.snowplow", + "enabled": true, + "parameters": { + "geo": { + "database": "GeoIP2-City.mmdb", + "uri": "http://localhost:8080/maxmind" + } + } + }""", + SchemaKey( + "com.snowplowanalytics.snowplow", + "ip_lookups", + "jsonschema", + SchemaVer.Full(2, 0, 0) + ), + false // Unlike in other tests we actually download it + ) + .getOrElse(throw new RuntimeException("Invalid test configuration")) + + val one = Expected + .copy( + geo_country = Some("CN"), + geo_region = Some("22"), + geo_city = Some("Changchun"), + geo_latitude = Some(43.88), + geo_longitude = Some(125.3228), + geo_region_name = Some("Jilin Sheng"), + geo_timezone = Some("Asia/Harbin") + ) + val two = one.copy(geo_city = Some("Baishan")) + // Third one is Fuyu + + val assetsServer = HttpServer.resource(6.seconds) + (assetsServer *> TestEnvironment.make(input, List(ipLookupsConf))).use { test => + test + .run(_.copy(assetsUpdatePeriod = Some(1800.millis))) + .map { events => + events must containTheSameElementsAs(List(Right(one), Right(two))) + } + } + } + } +} + +object EnrichSpec { + val eventId: UUID = UUID.fromString("deadbeef-dead-beef-dead-beefdead") + + val api: CollectorPayload.Api = + CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp2") + val source: CollectorPayload.Source = + CollectorPayload.Source("ssc-0.0.0-test", "UTF-8", Some("collector.snplow.net")) + val context: CollectorPayload.Context = CollectorPayload.Context(None, Some("175.16.199.0"), None, None, List(), None) + val querystring: List[NameValuePair] = List( + new BasicNameValuePair("e", "pv"), + new BasicNameValuePair("eid", eventId.toString) + ) + val colllectorPayload: CollectorPayload = CollectorPayload(api, querystring, None, None, source, context) + def payload[F[_]: Applicative]: Payload[F, Array[Byte]] = + Payload(colllectorPayload.toRaw, Applicative[F].unit) + + def normalize(payload: Payload[IO, EnrichedEvent]) = + Event + .parse(Enrich.encodeEvent(payload.data)) + .map(_.copy(etl_tstamp = Some(Instant.ofEpochMilli(SpecHelpers.StaticTime)))) match { + case Validated.Valid(event) => + Validated.Valid(event) + case Validated.Invalid(error) => + val rawPayload = BadRowPayload.RawPayload(Enrich.encodeEvent(payload.data)) + val badRow = BadRow.LoaderParsingError(Processor("fs2-enrich-test-suite", "x"), error, rawPayload) + Validated.Invalid(badRow) + } + + def normalizeResult(payload: Result[IO]) = + payload.data.map { + case Validated.Valid(a) => normalize(Payload(a, IO.unit)) + case Validated.Invalid(e) => e.invalid + } + + val minimalEvent = Event + .minimal( + EnrichSpec.eventId, + Instant.ofEpochMilli(0L), + "ssc-0.0.0-test", + s"fs2-enrich-${generated.BuildInfo.version}-common-${generated.BuildInfo.version}" + ) + + val Expected = minimalEvent + .copy( + etl_tstamp = Some(Instant.ofEpochMilli(SpecHelpers.StaticTime)), + user_ipaddress = Some("175.16.199.0"), + event = Some("page_view"), + event_vendor = Some("com.snowplowanalytics.snowplow"), + event_name = Some("page_view"), + event_format = Some("jsonschema"), + event_version = Some("1-0-0"), + derived_tstamp = Some(Instant.ofEpochMilli(0L)) + ) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadGen.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadGen.scala new file mode 100644 index 000000000..3e1bba9c9 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadGen.scala @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.nio.file.{Path, Paths} +import java.util.Base64 + +import cats.effect.{Blocker, IO} +import cats.effect.concurrent.Ref + +import _root_.io.circe.literal._ +import fs2.{Chunk, Stream} +import fs2.io.file.{createDirectory, writeAll} + +import org.apache.http.message.BasicNameValuePair + +import org.joda.time.{DateTimeZone, LocalDate} + +import org.scalacheck.{Arbitrary, Gen} +import cats.effect.testing.specs2.CatsIO + +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload + +object PayloadGen extends CatsIO { + + val api: CollectorPayload.Api = + CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp2") + val source: CollectorPayload.Source = + CollectorPayload.Source("ssc-0.0.0-test", "UTF-8", Some("collector.snplow.net")) + + val userAgentGen: Gen[String] = for { + os <- Gen.oneOf("Windows NT 10.0; Win64; x64", + "Windows NT 5.1; rv:7.0.1", + "Macintosh; Intel Mac OS X 10_14_5", + "Macintosh; Intel Mac OS X 10_15_4" + ) + engine <- Gen.oneOf("AppleWebKit/603.3.8 (KHTML, like Gecko)", + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169", + "AppleWebKit/605.1.15 (KHTML, like Gecko)" + ) + version <- Gen.oneOf("Version/11.1.2 Safari/605.1.15", "Chrome/60.0.3112.113 Safari/537.36", "Gecko/20100101 Firefox/40.1") + } yield s"Mozilla/5.0 ($os) $engine $version" + + val geolocationGen = for { + latitude <- Gen.choose(-90.0, 90.0) + longitude <- Gen.choose(-180.0, 180.0) + payload = json"""{"latitude":$latitude,"longitude":$longitude}""" + schemaKey = "iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-1-0" + } yield json"""{"schema":$schemaKey, "data": $payload}""" + val contextsGen = for { + geo <- Gen.option(geolocationGen).map(_.toList) + schemaKey = "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1" + } yield json"""{"schema":$schemaKey, "data": $geo}""" + + val localDateGen: Gen[LocalDate] = Gen.calendar.map(LocalDate.fromCalendarFields).suchThat(_.year().get() < 3000) + val ipGen: Gen[String] = for { + part1 <- Gen.choose(2, 255) + part2 <- Gen.choose(0, 255) + part3 <- Gen.choose(0, 255) + part4 <- Gen.choose(0, 255) + } yield s"$part1.$part2.$part3.$part4" + val contextGen: Gen[CollectorPayload.Context] = for { + timestamp <- localDateGen.map(_.toDateTimeAtStartOfDay(DateTimeZone.UTC)).map(Option.apply) + ip <- Gen.option(ipGen) + userAgent <- userAgentGen.map(x => Some(x)) + userId <- Gen.option(Gen.uuid) + } yield CollectorPayload.Context(timestamp, ip, userAgent, None, List(), userId) + + val getPageView = for { + eventId <- Gen.uuid + aid <- Gen.oneOf("test-app", "scalacheck") + cx <- contextsGen.map(json => Base64.getEncoder.encodeToString(json.noSpaces.getBytes)) + querystring = List( + new BasicNameValuePair("aid", aid), + new BasicNameValuePair("e", "pv"), + new BasicNameValuePair("eid", eventId.toString), + new BasicNameValuePair("cx", cx) + ) + context <- contextGen + } yield CollectorPayload(api, querystring, None, None, source, context) + + val getPageViewArbitrary: Arbitrary[CollectorPayload] = Arbitrary.apply(getPageView) + + val payloadStream = Stream.repeatEval(IO(getPageView.sample)).collect { + case Some(x) => x + } + + def write(dir: Path, cardinality: Long): IO[Unit] = + for { + counter <- Ref.of[IO, Int](0) + dir <- Blocker[IO].use(b => createDirectory[IO](b, dir)) + filename = counter.updateAndGet(_ + 1).map(i => Paths.get(s"${dir.toAbsolutePath}/payload.$i.thrift")) + _ <- Blocker[IO].use { b => + val result = + for { + payload <- payloadStream.take(cardinality) + fileName <- Stream.eval(filename) + _ <- Stream.chunk(Chunk.bytes(payload.toRaw)).through(writeAll[IO](fileName, b)) + } yield () + result.compile.drain + } + } yield () +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadSpec.scala new file mode 100644 index 000000000..d154db6c5 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadSpec.scala @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import cats.implicits._ + +import cats.effect.IO +import cats.effect.concurrent.Ref +import cats.effect.testing.specs2.CatsIO + +import org.specs2.ScalaCheck +import org.specs2.mutable.Specification + +class PayloadSpec extends Specification with CatsIO with ScalaCheck { + "mapWithLast" should { + "always apply a lastF function to the last element" in { + prop { (list: List[String]) => + val lastF: String => String = _ => "unique" + val result = Payload.mapWithLast(identity[String], lastF)(list).toList + result.lastOption must (beSome("unique") or beNone) + } + } + + "always apply an f function to all elements except last" in { + prop { (list: List[String]) => + val f: String => String = _ => "unique" + val result = Payload.mapWithLast(f, identity[String])(list).toList + list match { + case Nil => ok + case _ => + val init = List.fill(list.length - 1)("unique") + result.mkString("-") must startWith(init.mkString("-")) + } + } + } + } + + "decompose" should { + "preserve the order" in { + val input = List("error-1".invalid, 42.valid, "error-2".invalid) + val payload = Payload(input, IO.unit) + payload.decompose[String, Int].compile.toList.map { + case List(error1, valid, error2) => + error1 must beLeft.like { + case Payload(data, _) => data must be("error-1") + } + valid must beRight.like { + case Payload(data, _) => data must beEqualTo(42) + } + error2 must beLeft.like { + case Payload(data, _) => data must be("error-2") + } + case other => + ko(s"Expected list of 3, got $other") + } + } + + "execute finalize action only once" in { + val input = List("error-1".invalid, 42.valid, "error-2".invalid) + for { + ref <- Ref.of[IO, Int](0) + payload = Payload(input, ref.update(_ + 1)) + parsed <- payload.decompose[String, Int].compile.toList + _ <- parsed.traverse_(_.fold(_.finalise, _.finalise)) + result <- ref.get + } yield result must beEqualTo(1) + } + + "not execute finalize action until last element" in { + val input = List("error-1".invalid, 42.valid, "error-2".invalid) + for { + ref <- Ref.of[IO, Int](0) + payload = Payload(input, ref.update(_ + 1)) + parsed <- payload.decompose[String, Int].compile.toList + _ <- parsed.init.traverse_(_.fold(_.finalise, _.finalise)) + result <- ref.get + } yield result must beEqualTo(0) + } + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/SpecHelpers.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/SpecHelpers.scala new file mode 100644 index 000000000..2c6db6f88 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/SpecHelpers.scala @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.nio.file.{NoSuchFileException, Path} + +import scala.concurrent.duration.TimeUnit + +import cats.effect.{Blocker, Clock, IO, Resource} +import cats.effect.concurrent.Ref + +import cats.implicits._ + +import fs2.io.file.deleteIfExists + +import com.snowplowanalytics.snowplow.enrich.fs2.test._ + +import cats.effect.testing.specs2.CatsIO + +object SpecHelpers extends CatsIO { + implicit val ioClock: Clock[IO] = + Clock.create[IO] + + val StaticTime = 1599750938180L + + val staticIoClock: Clock[IO] = + new Clock[IO] { + def realTime(unit: TimeUnit): IO[Long] = IO.pure(StaticTime) + def monotonic(unit: TimeUnit): IO[Long] = IO.pure(StaticTime) + } + + def refreshState(uris: List[Assets.Asset]): Resource[IO, Assets.State[IO]] = + for { + b <- TestEnvironment.ioBlocker + stop <- Resource.liftF(Ref.of[IO, Boolean](false)) + state <- Assets.State.make[IO](b, stop, uris) + } yield state + + /** Clean-up predefined list of files */ + def filesCleanup(blocker: Blocker, files: List[Path]): IO[Unit] = + files.traverse_ { path => + deleteIfExists[IO](blocker, path).recover { + case _: NoSuchFileException => false + } + } + + /** Make sure files don't exist before and after test starts */ + def filesResource(blocker: Blocker, files: List[Path]): Resource[IO, Unit] = + Resource.make(filesCleanup(blocker, files))(_ => filesCleanup(blocker, files)) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64HoconSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64HoconSpec.scala new file mode 100644 index 000000000..10f065064 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64HoconSpec.scala @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.util.Base64.getEncoder + +import com.monovore.decline.Argument + +import org.specs2.mutable.Specification + +class Base64HoconSpec extends Specification { + "Argument[Base64Hocon]" should { + "parse a base64-encoded HOCON" in { + val inputStr = """input = {}""" + val input = getEncoder.encodeToString(inputStr.getBytes()) + Argument[Base64Hocon].read(input).toEither must beRight + } + + "fail to parse plain string as HOCON" in { + val inputStr = "+" + val input = getEncoder.encodeToString(inputStr.getBytes()) + Argument[Base64Hocon].read(input).toEither must beLeft + } + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfigSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfigSpec.scala new file mode 100644 index 000000000..2083a7e37 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfigSpec.scala @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import cats.syntax.either._ + +import cats.effect.IO + +import org.specs2.mutable.Specification +import cats.effect.testing.specs2.CatsIO + +class CliConfigSpec extends Specification with CatsIO { + "parseHocon" should { + "parse valid HOCON" in { + val string = """ + input = { + type = "PubSub" + subscription = "inputSub" + } + """.stripMargin + Base64Hocon.parseHocon(string) must beRight + } + } + + "ConfigFile.parse" should { + "parse valid HOCON" in { + val hocon = + Base64Hocon + .parseHocon(""" + auth = { + type = "Gcp" + } + input = { + type = "PubSub" + subscription = "projects/test-project/subscriptions/inputSub" + } + good = { + type = "PubSub" + topic = "projects/test-project/topics/good-topic" + } + bad = { + type = "PubSub" + topic = "projects/test-project/topics/bad-topic" + } + """) + .getOrElse(throw new RuntimeException("Cannot parse HOCON file")) + + val expected = ConfigFile( + io.Authentication.Gcp, + io.Input.PubSub("projects/test-project/subscriptions/inputSub"), + io.Output.PubSub("projects/test-project/topics/good-topic"), + io.Output.PubSub("projects/test-project/topics/bad-topic"), + None, + None, + None + ) + + ConfigFile.parse[IO](hocon.asLeft).value.map(result => result must beRight(expected)) + } + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFileSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFileSpec.scala new file mode 100644 index 000000000..9ebb50f82 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFileSpec.scala @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.net.URI +import java.nio.file.Paths + +import scala.concurrent.duration._ + +import cats.syntax.either._ + +import cats.effect.IO + +import _root_.io.circe.literal._ + +import org.specs2.mutable.Specification +import cats.effect.testing.specs2.CatsIO + +class ConfigFileSpec extends Specification with CatsIO { + "parse" should { + "parse valid HOCON file with path provided" in { + val configPath = Paths.get(getClass.getResource("/config.fs2.hocon.sample").toURI) + val expected = ConfigFile( + io.Authentication.Gcp, + io.Input.PubSub("projects/test-project/subscriptions/inputSub"), + io.Output.PubSub("projects/test-project/topics/good-topic"), + io.Output.PubSub("projects/test-project/topics/bad-topic"), + Some(7.days), + Some(Sentry(URI.create("http://sentry.acme.com"))), + Some(1.second) + ) + ConfigFile.parse[IO](configPath.asRight).value.map(result => result must beRight(expected)) + } + + "parse valid 0 minutes as None" in { + val input = + json"""{ + "auth": { + "type": "Gcp" + }, + "input": { + "type": "PubSub", + "subscription": "projects/test-project/subscriptions/inputSub" + }, + "good": { + "type": "PubSub", + "topic": "projects/test-project/topics/good-topic" + }, + "bad": { + "type": "PubSub", + "topic": "projects/test-project/topics/bad-topic" + }, + + "assetsUpdatePeriod": "0 minutes", + "metricsReportPeriod": "10 second" + }""" + + ConfigFile.parse[IO](Base64Hocon(input).asLeft).value.map { + case Left(message) => message must contain("assetsUpdatePeriod in config file cannot be less than 0") + case _ => ko("Decoding should have failed") + } + } + + "not throw an exception if file not found" in { + val configPath = Paths.get("does-not-exist") + ConfigFile.parse[IO](configPath.asRight).value.map(result => result must beLeft) + } + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/ApiRequestEnrichmentSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/ApiRequestEnrichmentSpec.scala new file mode 100644 index 000000000..8bc991c01 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/ApiRequestEnrichmentSpec.scala @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.enrichments + +import java.util.Base64 + +import scala.concurrent.duration._ + +import org.apache.http.message.BasicNameValuePair + +import cats.implicits._ + +import cats.effect.IO +import cats.effect.testing.specs2.CatsIO + +import fs2.Stream + +import io.circe.Json +import io.circe.literal._ + +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.Contexts + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest.{ + Authentication, + Cache, + HttpApi, + Input, + JsonOutput, + Output +} + +import com.snowplowanalytics.snowplow.enrich.fs2.enrichments.ApiRequestEnrichmentSpec.unstructEvent +import com.snowplowanalytics.snowplow.enrich.fs2.{EnrichSpec, Payload} +import com.snowplowanalytics.snowplow.enrich.fs2.test._ + +import org.specs2.mutable.Specification + +class ApiRequestEnrichmentSpec extends Specification with CatsIO { + + sequential + + "ApiRequestEnrichment" should { + "add a derived context" in { + val event = + json"""{ + "schema": "iglu:com.acme/test/jsonschema/1-0-1", + "data": {"path": {"id": 3}} + }""" + val payload = EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ue_px", unstructEvent(event)) :: EnrichSpec.colllectorPayload.querystring + ) + val input = Stream(Payload(payload.toRaw, IO.unit)) + + /** Schemas defined at [[SchemaRegistry]] */ + val enrichment = ApiRequestConf( + SchemaKey("com.acme", "enrichment", "jsonschema", SchemaVer.Full(1, 0, 0)), + List(Input.Json("key1", "unstruct_event", SchemaCriterion("com.acme", "test", "jsonschema", 1), "$.path.id")), + HttpApi("GET", "http://localhost:8080/enrichment/api/{{key1}}", 2000, Authentication(None)), + List(Output("iglu:com.acme/output/jsonschema/1-0-0", Some(JsonOutput("$")))), + Cache(1, 1000) + ) + + val expected = Contexts( + List( + SelfDescribingData( + SchemaKey("com.acme", "output", "jsonschema", SchemaVer.Full(1, 0, 0)), + json"""{"output": "3"}""" + ) + ) + ) + + val testWithHttp = HttpServer.resource(4.seconds) *> TestEnvironment.make(input, List(enrichment)) + testWithHttp.use { test => + test.run().map { events => + events must beLike { + case List(Right(event)) => + event.derived_contexts must beEqualTo(expected) + case other => ko(s"Expected one enriched event, got $other") + } + } + } + } + } +} + +object ApiRequestEnrichmentSpec { + private val encoder = Base64.getEncoder + + def encode(json: Json): String = + new String(encoder.encode(json.noSpaces.getBytes)) + + def unstructEvent(json: Json): String = + encode(json"""{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":$json}""") +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/IabEnrichmentSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/IabEnrichmentSpec.scala new file mode 100644 index 000000000..a0f7e5fba --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/IabEnrichmentSpec.scala @@ -0,0 +1,102 @@ +package com.snowplowanalytics.snowplow.enrich.fs2.enrichments + +import java.net.URI + +import scala.concurrent.duration._ + +import cats.syntax.apply._ +import cats.syntax.option._ + +import cats.effect.IO + +import io.circe.literal._ + +import fs2.Stream + +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.Contexts +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf +import com.snowplowanalytics.snowplow.enrich.fs2.{EnrichSpec, Payload} +import com.snowplowanalytics.snowplow.enrich.fs2.test.{HttpServer, TestEnvironment} + +import org.specs2.mutable.Specification +import cats.effect.testing.specs2.CatsIO + +class IabEnrichmentSpec extends Specification with CatsIO { + + sequential + + "IabEnrichment" should { + "recognize a robot by IP address" in { + val payload = EnrichSpec.colllectorPayload.copy( + context = EnrichSpec.colllectorPayload.context.copy( + useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0".some + ) + ) + val input = Stream(Payload(payload.toRaw, IO.unit)) + val expected = Contexts( + List( + SelfDescribingData( + SchemaKey("com.iab.snowplow", "spiders_and_robots", "jsonschema", SchemaVer.Full(1, 0, 0)), + json"""{"spiderOrRobot":true,"category":"SPIDER_OR_ROBOT","reason":"FAILED_IP_EXCLUDE","primaryImpact":"UNKNOWN"}""" + ) + ) + ) + val testWithHttp = HttpServer.resource(6.seconds) *> TestEnvironment.make(input, List(IabEnrichmentSpec.enrichmentConf)) + testWithHttp.use { test => + test.run().map { + case List(Right(event)) => + event.derived_contexts must beEqualTo(expected) + case other => + ko(s"Expected one valid event, got $other") + } + } + } + + "refresh assets" in { + val payload = EnrichSpec.colllectorPayload.copy( + context = EnrichSpec.colllectorPayload.context.copy( + useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0".some + ) + ) + val input = Stream(Payload(payload.toRaw, IO.unit)) ++ Stream.sleep_(2.seconds) ++ Stream(Payload(payload.toRaw, IO.unit)) + + val expectedOne = Contexts( + List( + SelfDescribingData( + SchemaKey("com.iab.snowplow", "spiders_and_robots", "jsonschema", SchemaVer.Full(1, 0, 0)), + json"""{"spiderOrRobot":true,"category":"SPIDER_OR_ROBOT","reason":"FAILED_IP_EXCLUDE","primaryImpact":"UNKNOWN"}""" + ) + ) + ) + val expectedTwo = Contexts( + List( + SelfDescribingData( + SchemaKey("com.iab.snowplow", "spiders_and_robots", "jsonschema", SchemaVer.Full(1, 0, 0)), + json"""{"spiderOrRobot":false,"category":"BROWSER","reason":"PASSED_ALL","primaryImpact":"NONE"}""" + ) + ) + ) + + val testWithHttp = HttpServer.resource(6.seconds) *> TestEnvironment.make(input, List(IabEnrichmentSpec.enrichmentConf)) + testWithHttp.use { test => + test.run(_.copy(assetsUpdatePeriod = Some(1800.millis))).map { + case List(Right(eventOne), Right(eventTwo)) => + List(eventOne.derived_contexts, eventTwo.derived_contexts) must containTheSameElementsAs(List(expectedOne, expectedTwo)) + case other => + ko(s"Expected two valid events, got $other") + } + } + } + } +} + +object IabEnrichmentSpec { + val enrichmentConf = EnrichmentConf.IabConf( + SchemaKey("com.acme", "enrichment", "jsonschema", SchemaVer.Full(1, 0, 0)), + (URI.create("http://localhost:8080/iab/ip"), "ip"), + (URI.create("http://localhost:8080/iab/exclude"), "exclude"), + (URI.create("http://localhost:8080/iab/include"), "include") + ) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/YauaaEnrichmentSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/YauaaEnrichmentSpec.scala new file mode 100644 index 000000000..9acf00fa5 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/YauaaEnrichmentSpec.scala @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.enrichments + +import cats.implicits._ + +import cats.effect.IO +import cats.effect.testing.specs2.CatsIO + +import fs2.Stream + +import io.circe.literal._ + +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.Contexts + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.YauaaConf +import com.snowplowanalytics.snowplow.enrich.fs2.{EnrichSpec, Payload} +import com.snowplowanalytics.snowplow.enrich.fs2.test._ + +import org.specs2.mutable.Specification + +class YauaaEnrichmentSpec extends Specification with CatsIO { + + sequential + + "YauaaEnrichment" should { + "add a derived context" in { + val payload = EnrichSpec.colllectorPayload.copy( + context = EnrichSpec.colllectorPayload.context.copy( + useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:81.0) Gecko/20100101 Firefox/81.0".some + ) + ) + val input = Stream(Payload(payload.toRaw, IO.unit)) + + /** Schemas defined at [[SchemaRegistry]] */ + val enrichment = YauaaConf( + SchemaKey("com.acme", "enrichment", "jsonschema", SchemaVer.Full(1, 0, 0)), + Some(1) + ) + + val expected = Contexts( + List( + SelfDescribingData( + SchemaKey("nl.basjes", "yauaa_context", "jsonschema", SchemaVer.Full(1, 0, 1)), + json"""{ + "deviceBrand" : "Apple", + "deviceName" : "Apple Macintosh", + "operatingSystemVersionMajor" : "10", + "layoutEngineNameVersion" : "Gecko 81.0", + "operatingSystemNameVersion" : "Mac OS X 10.14", + "layoutEngineBuild" : "20100101", + "layoutEngineNameVersionMajor" : "Gecko 81", + "operatingSystemName" : "Mac OS X", + "agentVersionMajor" : "81", + "layoutEngineVersionMajor" : "81", + "deviceClass" : "Desktop", + "agentNameVersionMajor" : "Firefox 81", + "operatingSystemNameVersionMajor" : "Mac OS X 10", + "deviceCpuBits" : "32", + "operatingSystemClass" : "Desktop", + "layoutEngineName" : "Gecko", + "agentName" : "Firefox", + "agentVersion" : "81.0", + "layoutEngineClass" : "Browser", + "agentNameVersion" : "Firefox 81.0", + "operatingSystemVersion" : "10.14", + "deviceCpu" : "Intel", + "agentClass" : "Browser", + "layoutEngineVersion" : "81.0" + }""" + ) + ) + ) + + TestEnvironment.make(input, List(enrichment)).use { test => + test.run().map { events => + events must beLike { + case List(Right(event)) => + event.derived_contexts must beEqualTo(expected) + case other => ko(s"Expected one enriched event, got $other") + } + } + } + } + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/Counter.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/Counter.scala new file mode 100644 index 000000000..62522f9db --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/Counter.scala @@ -0,0 +1,47 @@ +package com.snowplowanalytics.snowplow.enrich.fs2.test + +import java.util.concurrent.TimeUnit + +import cats.Monad +import cats.syntax.flatMap._ + +import cats.effect.concurrent.Ref +import cats.effect.{Clock, Sync} + +import com.snowplowanalytics.snowplow.enrich.fs2.io.Metrics + +/** Metrics container for testing */ +case class Counter( + raw: Long, + good: Long, + bad: Long, + latency: Option[Long] +) + +object Counter { + val empty: Counter = Counter(0L, 0L, 0L, None) + + def make[F[_]: Sync]: F[Ref[F, Counter]] = + Ref.of[F, Counter](empty) + + /** Create a pure metrics with mutable state */ + def mkCounterMetrics[F[_]: Monad: Clock](ref: Ref[F, Counter]): Metrics[F] = + new Metrics[F] { + def report: F[Unit] = + Monad[F].unit + + def enrichLatency(collectorTstamp: Option[Long]): F[Unit] = + Clock[F].realTime(TimeUnit.MILLISECONDS).flatMap { now => + ref.update(_.copy(latency = collectorTstamp.map(ct => now - ct))) + } + + def rawCount: F[Unit] = + ref.update(cnt => cnt.copy(raw = cnt.raw + 1)) + + def goodCount: F[Unit] = + ref.update(cnt => cnt.copy(good = cnt.good + 1)) + + def badCount: F[Unit] = + ref.update(cnt => cnt.copy(bad = cnt.bad + 1)) + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/HttpServer.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/HttpServer.scala new file mode 100644 index 000000000..80bdfa38e --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/HttpServer.scala @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2012-2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.test + +import scala.concurrent.duration._ + +import cats.implicits._ + +import cats.effect.{Blocker, Fiber, IO, Resource} +import cats.effect.concurrent.Ref + +import io.circe.literal._ + +import fs2.Stream +import fs2.io.readInputStream + +import io.chrisdavenport.log4cats.Logger +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +import org.http4s.HttpRoutes +import org.http4s.Method.GET +import org.http4s.server.blaze.BlazeServerBuilder +import org.http4s.dsl.io._ +import org.http4s.syntax.all._ + +import cats.effect.testing.specs2.CatsIO + +/** + * Embedded HTTP Server for testing, mostly for assets refresh, + * but can serve + */ +object HttpServer extends CatsIO { + + private val logger: Logger[IO] = + Slf4jLogger.getLogger[IO] + + /** + * Set of testing routes: + * * Plain data + * * Imitating slow connection + * * Frequently updating resource + * * Sometimes non-working resource + * + * @param counter mutable variable with counter updated on every request + */ + def routes(counter: Ref[IO, Int]): HttpRoutes[IO] = + HttpRoutes + .of[IO] { + case r @ GET -> Root / "asset" => + logger.debug(r.pathInfo) *> Ok("data") + case r @ GET -> Root / "slow" => + val action = for { + i <- counter.updateAndGet(_ + 1) + _ <- if (i == 1) IO.sleep(100.milliseconds) else IO.sleep(10.seconds) + res <- Ok(s"slow data $i") + } yield res + logger.debug(r.pathInfo) *> action + case r @ GET -> Root / "counter" => + logger.debug(r.pathInfo) *> counter.updateAndGet(_ + 1).flatMap { i => + Ok(s"counter $i") + } + case r @ GET -> Root / "flaky" => + logger.debug(r.pathInfo) *> counter.update(_ + 1) *> + counter.get.flatMap { i => + val s = i.toString + if (i == 1 || i == 2) NotFound(s) + else if (i == 3) Ok(s) + else NotFound(s) + } + case GET -> Root / "maxmind" / "GeoIP2-City.mmdb" => + counter.updateAndGet(_ + 1).flatMap { i => + val is = readMaxMindDb(i) + Ok(Blocker[IO].use(b => readInputStream[IO](is, 256, b).compile.to(Array))) + } + case GET -> Root / "iab" / file => + counter.updateAndGet(_ + 1).flatMap { i => + file match { + case "include" => Ok("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0|1|1") + case "exclude" => Ok("") + case "ip" if i == 1 => Ok("175.16.199.0/32") + case "ip" => Ok("175.1.1.0/32") + case other => + println(s"Not Found ${other}") + NotFound(other) + } + } + case GET -> Root / "enrichment" / "api" / output => + counter.updateAndGet(_ + 1).flatMap { _ => + Ok(json"""{"output": $output}""".noSpaces) + } + } + + def run: Stream[IO, Unit] = + for { + counter <- Stream.eval(Ref.of[IO, Int](0)) + stream <- BlazeServerBuilder[IO](concurrent.ExecutionContext.global) + .bindHttp(8080) + .withHttpApp(routes(counter).orNotFound) + .withoutBanner + .withoutSsl + .serve + .void + } yield stream + + /** + * Run HTTP server for some time and destroy afterwards + * @param duration how long the server should be running + * recommended test stream duration + 1 second, + * especially if asset stream used + */ + def resource(duration: FiniteDuration): Resource[IO, Fiber[IO, Unit]] = + Resource.make { + run + .haltAfter(duration) + .compile + .drain + .start + .flatTap(_ => IO.sleep(500.millis) *> logger.info("Running test HttpServer")) + }(_.cancel *> logger.info("Destroyed test HttpServer")) + + private def readMaxMindDb(req: Int) = { + val path = + if (req < 4) s"/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-$req.mmdb" + else s"/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-3.mmdb" + IO(getClass.getResourceAsStream(path)) + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/SchemaRegistry.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/SchemaRegistry.scala new file mode 100644 index 000000000..6604575a9 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/SchemaRegistry.scala @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.test + +import io.circe.Json +import io.circe.literal._ + +import com.snowplowanalytics.iglu.core.SelfDescribingSchema +import com.snowplowanalytics.iglu.core.circe.implicits._ + +/** + * In-memory test registry to avoid unnecessary HTTP and FS IO. All schemas used in [[TestEnvironment]] + * Iglu Client + */ +object SchemaRegistry { + val acmeTest: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.acme", + "name": "test", + "format": "jsonschema", + "version": "1-0-1" + }, + "properties": { + "path": { + "properties": { + "id": { + "type": "integer" + } + } + } + } + }""" + + val acmeOutput: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.acme", + "name": "output", + "format": "jsonschema", + "version": "1-0-0" + }, + "properties": { + "output": { + "type": "string" + } + } + }""" + + // Defined on Iglu Central + val unstructEvent: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "unstruct_event", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$$" + }, + "data": {} + }, + "required": ["schema", "data"], + "additionalProperties": false + }""" + + // Defined on Iglu Central + val contexts: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "contexts", + "format": "jsonschema", + "version": "1-0-1" + }, + "type": "array", + "items": { + "type": "object", + "properties": { + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$$" + }, + "data": {} + }, + "required": ["schema", "data"], + "additionalProperties": false + } + }""" + + // Defined on Iglu Central + val geolocationContext: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "geolocation_context", + "format": "jsonschema", + "version": "1-1-0" + }, + "type": "object", + "properties": { + "latitude": { "type": "number", "minimum": -90, "maximum": 90 }, + "longitude": { "type": "number", "minimum": -180, "maximum": 180 }, + "latitudeLongitudeAccuracy": { "type": ["number", "null"] }, + "altitude": { "type": ["number", "null"] }, + "altitudeAccuracy": { "type": ["number", "null"] }, + "bearing": { "type": ["number", "null"] }, + "speed": { "type": ["number", "null"] }, + "timestamp": { "type": ["integer", "null"] } + }, + "required": ["latitude", "longitude"], + "additionalProperties": false + }""" + + // Defined on Iglu Central + val iabAbdRobots: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.iab.snowplow", + "name": "spiders_and_robots", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "spiderOrRobot": {"type": "boolean" }, + "category": {"enum": ["SPIDER_OR_ROBOT", "ACTIVE_SPIDER_OR_ROBOT", "INACTIVE_SPIDER_OR_ROBOT", "BROWSER"]}, + "reason": {"enum": ["FAILED_IP_EXCLUDE", "FAILED_UA_INCLUDE", "FAILED_UA_EXCLUDE", "PASSED_ALL"]}, + "primaryImpact": {"enum": ["PAGE_IMPRESSIONS", "AD_IMPRESSIONS", "PAGE_AND_AD_IMPRESSIONS", "UNKNOWN", "NONE"]} + }, + "required": ["spiderOrRobot", "category", "reason", "primaryImpact"], + "additionalProperties": false + }""" + + val yauaaContext: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "nl.basjes", + "name": "yauaa_context", + "format": "jsonschema", + "version": "1-0-1" + }, + "type": "object", + "properties": { + "deviceClass": {"enum":["Desktop","Anonymized","Unknown","UNKNOWN","Mobile","Tablet","Phone","Watch","Virtual Reality","eReader","Set-top box","TV","Game Console","Handheld Game Console","Voice","Robot","Robot Mobile","Spy","Hacker"]}, + "deviceName": {"type":"string","maxLength": 100 }, + "deviceBrand": {"type":"string","maxLength": 50 }, + "deviceCpu": {"type":"string","maxLength": 50 }, + "deviceCpuBits": {"type":"string","maxLength": 20 }, + "deviceFirmwareVersion": {"type":"string","maxLength": 100 }, + "deviceVersion": {"type":"string","maxLength": 100 }, + "operatingSystemClass": {"enum":["Desktop","Mobile","Cloud","Embedded","Game Console","Hacker","Anonymized","Unknown"] }, + "operatingSystemName": {"type":"string","maxLength": 100 }, + "operatingSystemVersion": {"type":"string","maxLength": 50 }, + "operatingSystemNameVersion": {"type":"string","maxLength": 150 }, + "operatingSystemVersionBuild": {"type":"string","maxLength": 100 }, + "layoutEngineClass": {"enum":["Browser", "Mobile App", "Hacker", "Robot", "Unknown"] }, + "layoutEngineName": {"type":"string","maxLength": 100 }, + "layoutEngineVersion": {"type":"string","maxLength": 50 }, + "layoutEngineVersionMajor": {"type":"string","maxLength": 20 }, + "layoutEngineNameVersion": {"type":"string","maxLength": 150 }, + "layoutEngineNameVersionMajor": {"type":"string","maxLength": 120 }, + "layoutEngineBuild": {"type":"string","maxLength": 100 }, + "agentClass": {"enum":["Browser", "Browser Webview", "Mobile App", "Robot", "Robot Mobile", "Cloud Application", "Email Client", "Voice", "Special", "Testclient", "Hacker", "Unknown"] }, + "agentName": {"type":"string","maxLength": 100 }, + "agentVersion": {"type":"string","maxLength": 100 }, + "agentVersionMajor": {"type":"string","maxLength": 20 }, + "agentNameVersion": {"type":"string","maxLength": 200 }, + "agentNameVersionMajor": {"type":"string","maxLength": 120 }, + "agentBuild": {"type":"string","maxLength": 100 }, + "agentLanguage": {"type":"string","maxLength": 50 }, + "agentLanguageCode": {"type":"string","maxLength": 20 }, + "agentInformationEmail": {"type":"string","format": "email" }, + "agentInformationUrl": {"type":"string"}, + "agentSecurity": {"type":"string","enum":["Weak security", "Strong security", "Unknown", "Hacker"] }, + "agentUuid": {"type":"string"}, + "webviewAppName": {"type":"string"}, + "webviewAppVersion": {"type":"string"}, + "webviewAppVersionMajor": {"type":"string","maxLength":50}, + "webviewAppNameVersionMajor": {"type":"string","maxLength":50}, + "facebookCarrier": {"type":"string"}, + "facebookDeviceClass": {"type":"string","maxLength":1024}, + "facebookDeviceName": {"type":"string","maxLength":1024}, + "facebookDeviceVersion": {"type":"string"}, + "facebookFBOP": {"type":"string"}, + "facebookFBSS": {"type":"string"}, + "facebookOperatingSystemName": {"type":"string"}, + "facebookOperatingSystemVersion": {"type":"string"}, + "anonymized": {"type":"string"}, + "hackerAttackVector": {"type":"string"}, + "hackerToolkit": {"type":"string"}, + "koboAffiliate": {"type":"string"}, + "koboPlatformId": {"type":"string"}, + "iECompatibilityVersion": {"type":"string","maxLength":100}, + "iECompatibilityVersionMajor": {"type":"string","maxLength":50}, + "iECompatibilityNameVersion": {"type":"string","maxLength":50}, + "iECompatibilityNameVersionMajor": {"type":"string","maxLength":70}, + "carrier": {"type":"string"}, + "gSAInstallationID": {"type":"string"}, + "networkType": {"type":"string"}, + "operatingSystemNameVersionMajor": {"type":"string"}, + "operatingSystemVersionMajor": {"type":"string"} + }, + "required": ["deviceClass"], + "additionalProperties": false + }""" + + private[test] implicit def jsonToSchema(json: Json): SelfDescribingSchema[Json] = + SelfDescribingSchema.parse(json).getOrElse(throw new IllegalStateException("InMemory SchemaRegistry JSON cannot be parsed as schema")) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/TestEnvironment.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/TestEnvironment.scala new file mode 100644 index 000000000..1430a1f86 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/TestEnvironment.scala @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.test + +import java.nio.file.Paths + +import scala.concurrent.duration._ + +import cats.Monad +import cats.syntax.either._ + +import cats.effect.{Blocker, Concurrent, ContextShift, IO, Resource, Timer} +import cats.effect.concurrent.Ref + +import io.circe.Json + +import fs2.concurrent.Queue + +import com.snowplowanalytics.iglu.client.{CirceValidator, Client, Resolver} +import com.snowplowanalytics.iglu.client.resolver.registries.Registry + +import com.snowplowanalytics.snowplow.badrows.BadRow +import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.fs2.{Assets, Enrich, EnrichSpec, Environment, Payload, RawSource} +import com.snowplowanalytics.snowplow.enrich.fs2.Environment.Enrichments +import com.snowplowanalytics.snowplow.enrich.fs2.SpecHelpers.{filesResource, ioClock} +import cats.effect.testing.specs2.CatsIO + +import com.snowplowanalytics.snowplow.analytics.scalasdk.Event + +import io.chrisdavenport.log4cats.Logger +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +case class TestEnvironment( + env: Environment[IO], + counter: Ref[IO, Counter], + good: Queue[IO, Payload[IO, EnrichedEvent]], + bad: Queue[IO, Payload[IO, BadRow]] +) { + + /** + * Run all streams for 5 seconds and get produced events as a list + * Assets and metrics streams are empty by default, can be enabled + * by updating default [[Environment]] + * If assets stream is enabled, first events get enriched with assets + * downloaded by [[Assets.State.make]], not by [[Assets.run]] + * @param updateEnv function to update an environment created by + * [[TestEnvironment.make]] + */ + def run( + updateEnv: Environment[IO] => Environment[IO] = identity + )( + implicit C: Concurrent[IO], + CS: ContextShift[IO], + T: Timer[IO] + ): IO[List[Either[BadRow, Event]]] = { + val updatedEnv = updateEnv(env) + + val pauses = updatedEnv.pauseEnrich.discrete.evalMap(p => TestEnvironment.logger.info(s"Pause signal is $p")) + val stream = Enrich.run[IO](updatedEnv).merge(Assets.run[IO](updatedEnv)).merge(pauses) + bad.dequeue + .either(good.dequeue) + .concurrently(stream) + .haltAfter(5.seconds) + .compile + .toList + .map { rows => + rows.map(_.fold(_.data.asLeft, event => EnrichSpec.normalize(event).toEither)) + } + } +} + +object TestEnvironment extends CatsIO { + + val logger: Logger[IO] = + Slf4jLogger.getLogger[IO] + + val enrichmentReg: EnrichmentRegistry[IO] = + EnrichmentRegistry[IO]() + val enrichments: Environment.Enrichments[IO] = + Environment.Enrichments(enrichmentReg, Nil) + + val ioBlocker: Resource[IO, Blocker] = Blocker[IO] + + val embeddedRegistry = + Registry.InMemory( + Registry.Config("fs2-enrich embedded test registry", 1, List("com.acme")), + List( + SchemaRegistry.unstructEvent, + SchemaRegistry.contexts, + SchemaRegistry.geolocationContext, + SchemaRegistry.iabAbdRobots, + SchemaRegistry.yauaaContext, + SchemaRegistry.acmeTest, + SchemaRegistry.acmeOutput + ) + ) + val igluClient: Client[IO, Json] = + Client[IO, Json](Resolver(List(embeddedRegistry), None), CirceValidator) + + /** + * A dummy test environment without enrichmenta and with noop sinks and sources + * One can replace stream and sinks via `.copy` + */ + def make(source: RawSource[IO], enrichments: List[EnrichmentConf] = Nil): Resource[IO, TestEnvironment] = + for { + blocker <- ioBlocker + _ <- filesResource(blocker, enrichments.flatMap(_.filesToCache).map(p => Paths.get(p._2))) + counter <- Resource.liftF(Counter.make[IO]) + goodQueue <- Resource.liftF(Queue.unbounded[IO, Payload[IO, EnrichedEvent]]) + badQueue <- Resource.liftF(Queue.unbounded[IO, Payload[IO, BadRow]]) + metrics = Counter.mkCounterMetrics[IO](counter)(Monad[IO], ioClock) + pauseEnrich <- Environment.makePause[IO] + assets <- Assets.State.make(blocker, pauseEnrich, enrichments.flatMap(_.filesToCache)) + _ <- Resource.liftF(logger.info("AssetsState initialized")) + enrichmentsRef <- Enrichments.make[IO](enrichments) + environment = Environment[IO](igluClient, + enrichmentsRef, + pauseEnrich, + assets, + blocker, + source, + goodQueue.enqueue, + badQueue.enqueue, + None, + metrics, + None, + None + ) + _ <- Resource.liftF(pauseEnrich.set(false) *> logger.info("TestEnvironment initialized")) + } yield TestEnvironment(environment, counter, goodQueue, badQueue) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/package.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/package.scala new file mode 100644 index 000000000..4ae834cdc --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/package.scala @@ -0,0 +1,24 @@ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import scala.concurrent.duration.FiniteDuration + +import cats.effect.{Concurrent, IO, Timer} + +import _root_.fs2.Stream + +package object test { + + implicit class StreamOps[F[_], A](s: Stream[F, A]) { + + /** Halting a stream after specified period of time */ + def haltAfter(after: FiniteDuration)(implicit T: Timer[F], C: Concurrent[F]): Stream[F, A] = + Stream.eval_(Timer[F].sleep(after)).mergeHaltL(s) + } + + implicit class StreamIoOps[A](s: Stream[IO, A]) { + + /** Run test [[HttpServer]] in parallel with the stream */ + def withHttp(implicit C: Concurrent[IO]): Stream[IO, A] = + s.concurrently(HttpServer.run) + } +} diff --git a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaIntegrationSpec.scala b/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaIntegrationSpec.scala deleted file mode 100644 index 08eaaf015..000000000 --- a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaIntegrationSpec.scala +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2013-2020 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ - -package com.snowplowanalytics.snowplow.enrich.stream - -import java.time.{Duration => JDuration} -import java.util.Properties -import java.util.concurrent.ForkJoinPool - -import scala.concurrent._ -import scala.concurrent.duration.Duration -import scala.util.Try -import scala.collection.JavaConverters._ - -import cats.Id -import com.snowplowanalytics.iglu.client.Client -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry -import com.snowplowanalytics.snowplow.scalatracker.Tracker -import org.apache.kafka.clients.consumer.{ConsumerRecords, KafkaConsumer} -import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} -import org.specs2.matcher.{TraversableMatchers, TryMatchers} -import io.circe.Json - -import model.StreamsConfig - -/* - * Extending this trait creates a new integration test with a new instance of kafka - * See PiiEmitSpec for an example of how to use it - */ -trait KafkaIntegrationSpec extends TryMatchers with TraversableMatchers { - - import KafkaIntegrationSpecValues._ - implicit val ec = ExecutionContext.fromExecutor(new ForkJoinPool(16)) - val kafkaTopics = Set(testGoodIn, testGood, testBad, testPii) - - def expectedGood: Int - def expectedBad: Int - def expectedPii: Int - - def inputGood: List[Array[Byte]] - - def getMainApplicationFuture( - configuration: StreamsConfig, - client: Client[Id, Json], - adapterRegistry: AdapterRegistry, - registry: EnrichmentRegistry[Id], - tracker: Option[Tracker[Id]] - ): Future[Unit] = - Future { - val p = Processor("test", "1.0.0") - KafkaEnrich - .getSource(configuration, None, client, adapterRegistry, registry, tracker, p) - .toOption - .get - .run() - } - - def producerTimeoutSec: Int - def inputProduced(address: String): Try[Unit] = - Try(Await.result(produce(address: String), Duration(s"$producerTimeoutSec sec"))) - def testKafkaPropertiesProducer(address: String) = { - val props = new Properties() - props.put("bootstrap.servers", address) - props.put("client.id", "producer-george") - props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") - props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer") - props - } - def produce(address: String): Future[Unit] = - Future { - val testProducer = new KafkaProducer[String, Array[Byte]](testKafkaPropertiesProducer(address)) - val events = inputGood - events.foreach { r => - testProducer.send(new ProducerRecord(testGoodIn, "key", r)) - } - testProducer.flush - testProducer.close - } - private def getListOfRecords(cr: ConsumerRecords[String, String]): List[String] = - cr.asScala.map(_.value).toList - - val POLL_TIME_MSEC = 100L - - def getRecords( - topic: String, - expectedRecords: Int, - timeoutSec: Int, - address: String - ): Future[List[String]] = - Future { - val started = System.currentTimeMillis - val testKafkaPropertiesConsumer = { - val props = new Properties() - props.put("bootstrap.servers", address) - props.put("auto.offset.reset", "earliest") - props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") - props - .put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") - props.put("group.id", s"consumer-$topic") - props - } - val testConsumerPii = new KafkaConsumer[String, String](testKafkaPropertiesConsumer) - testConsumerPii.subscribe(List(topic).asJava) - var records = getListOfRecords(testConsumerPii.poll(JDuration.ofMillis(POLL_TIME_MSEC))) - while (((System.currentTimeMillis - started) / 1000 < timeoutSec - 1) && records.size < expectedRecords) - records = records ++ getListOfRecords( - testConsumerPii.poll(JDuration.ofMillis(POLL_TIME_MSEC)) - ) - testConsumerPii.close() - records - } - - def consumerExecutionTimeoutSec: Int - def producedBadRecords(address: String): Future[List[String]] = - getRecords(testBad, expectedBad, consumerExecutionTimeoutSec, address) - def producedGoodRecords(address: String): Future[List[String]] = - getRecords(testGood, expectedGood, consumerExecutionTimeoutSec, address) - def producedPiiRecords(address: String): Future[List[String]] = - getRecords(testPii, expectedPii, consumerExecutionTimeoutSec, address) - def allResults(address: String): Future[(List[String], List[String], List[String])] = - for { - good <- producedGoodRecords(address) - bad <- producedBadRecords(address) - pii <- producedPiiRecords(address) - } yield (good, bad, pii) - -} - -object KafkaIntegrationSpecValues { - val (testGoodIn, testGood, testBad, testPii) = - ("testGoodIn", "testEnrichedGood", "testEnrichedBad", "testEnrichedUglyPii") -} diff --git a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaTestUtils.scala b/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaTestUtils.scala deleted file mode 100644 index 6ae18b881..000000000 --- a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaTestUtils.scala +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2013-2020 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ - -package com.snowplowanalytics.snowplow.enrich.stream - -import java.io.File -import java.net.InetSocketAddress -import java.util.Properties - -import scala.collection.JavaConverters._ -import scala.util.Random - -import kafka.server.{KafkaConfig, KafkaServerStartable} -import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer} -import org.apache.kafka.clients.admin.AdminClient -import org.apache.kafka.clients.admin.NewTopic - -class KafkaTestUtils { - // zk - private val zkHost = "localhost" - private val zkPort = 2181 - private var zk: EmbeddedZookeeper = _ - private var zkReady = false - - // kafka - private val brokerHost = "localhost" - private val brokerPort = 9092 - private var kafkaServer: KafkaServerStartable = _ - private var adminClient: AdminClient = _ - private var topicCountMap = Map.empty[String, Int] - private var brokerReady = false - - /** Zookeeper address */ - def zkAddress: String = { - assert(zkReady, "Zk not ready, cannot get address") - s"$zkHost:$zkPort" - } - - /** Kafka broker address */ - def brokerAddress: String = { - assert(brokerReady, "Broker not ready, cannot get address") - s"$brokerHost:$brokerPort" - } - - /** Start the Zookeeper and Kafka servers */ - def setup(): Unit = { - setupEmbeddedZookeeper() - setupEmbeddedKafkaServer() - } - - private def setupEmbeddedZookeeper(): Unit = { - zk = new EmbeddedZookeeper(zkHost, zkPort) - zkReady = true - } - - private def setupEmbeddedKafkaServer(): Unit = { - assert(zkReady, "Zk should be setup beforehand") - val kafkaConfig = new KafkaConfig(brokerProps) - kafkaServer = new KafkaServerStartable(kafkaConfig) - kafkaServer.startup() - brokerReady = true - val adminProps = { - val props = new Properties() - props.put("bootstrap.servers", brokerAddress) - props - } - adminClient = AdminClient.create(adminProps) - } - - /** Close the Kafka as well as the Zookeeper client and server */ - def tearDown(): Unit = { - brokerReady = false - zkReady = false - - if (adminClient != null) { - adminClient.close() - adminClient = null - } - - if (kafkaServer != null) { - kafkaServer.shutdown() - kafkaServer = null - } - - if (zk != null) { - zk.shutdown() - zk = null - } - - topicCountMap = Map.empty - } - - /** Create one or more topics */ - @scala.annotation.varargs - def createTopics(topics: String*): Unit = - for (topic <- topics) { - adminClient.createTopics(List(new NewTopic(topic, 1, 1)).asJava) - Thread.sleep(1000) - topicCountMap = topicCountMap + (topic -> 1) - } - - private def brokerProps: Properties = { - val props = new Properties - props.put("broker.id", "0") - props.put("host.name", brokerHost) - props.put("offsets.topic.replication.factor", "1") - props.put( - "log.dir", { - val dir = System.getProperty("java.io.tmpdir") + - "/logDir-" + new Random().nextInt(Int.MaxValue) - val f = new File(dir) - f.mkdirs() - dir - } - ) - props.put("port", brokerPort.toString) - props.put("zookeeper.connect", zkAddress) - props.put("zookeeper.connection.timeout.ms", "10000") - props - } - - private class EmbeddedZookeeper(hostname: String, port: Int) { - private val snapshotDir = { - val f = new File( - System.getProperty("java.io.tmpdir"), - "snapshotDir-" + Random.nextInt(Int.MaxValue) - ) - f.mkdirs() - f - } - private val logDir = { - val f = - new File(System.getProperty("java.io.tmpdir"), "logDir-" + Random.nextInt(Int.MaxValue)) - f.mkdirs() - f - } - - private val factory = { - val zkTickTime = 500 - val zk = new ZooKeeperServer(snapshotDir, logDir, zkTickTime) - val f = new NIOServerCnxnFactory - val maxCnxn = 16 - f.configure(new InetSocketAddress(hostname, port), maxCnxn) - f.startup(zk) - f - } - - def shutdown(): Unit = { - factory.shutdown() - snapshotDir.delete() - logDir.delete() - () - } - } -} diff --git a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/PiiEmitSpec.scala b/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/PiiEmitSpec.scala deleted file mode 100644 index d058015a7..000000000 --- a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/PiiEmitSpec.scala +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2013-2020 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream - -import java.util.regex.Pattern -import java.util.concurrent.TimeUnit - -import scala.util.{Failure, Success, Try} -import scala.collection.JavaConverters._ -import scala.concurrent.duration.FiniteDuration -import scala.io.Source -import com.hubspot.jinjava.Jinjava -import com.typesafe.config.ConfigFactory -import org.apache.commons.codec.binary.Base64 -import org.specs2.concurrent.ExecutionEnv -import org.specs2.mutable.Specification -import org.specs2.matcher.{FutureMatchers, Matcher} -import org.specs2.specification.BeforeAfterAll -import pureconfig._ -import pureconfig.generic.auto._ -import pureconfig.generic.{FieldCoproductHint, ProductHint} -import good._ -import model.{StreamsConfig, TargetPlatformConfig} - -class PiiEmitSpec(implicit ee: ExecutionEnv) extends Specification with FutureMatchers with KafkaIntegrationSpec with BeforeAfterAll { - - var ktu: KafkaTestUtils = _ - override def beforeAll(): Unit = { - ktu = new KafkaTestUtils - ktu.setup() - ktu.createTopics(kafkaTopics.toList: _*) - } - override def afterAll(): Unit = - if (ktu != null) - ktu = null - - import KafkaIntegrationSpecValues._ - - def configValues = - Map( - "sinkType" -> "kafka", - "streamsInRaw" -> s"$testGoodIn", - "outEnriched" -> s"$testGood", - "outPii" -> s"$testPii", - "outBad" -> s"$testBad", - "partitionKeyName" -> "\"\"", - "kafkaBrokers" -> ktu.brokerAddress, - "bufferTimeThreshold" -> "1", - "bufferRecordThreshold" -> "1", - "bufferByteThreshold" -> "100000", - "enrichAppName" -> "Jim", - "enrichStreamsOutMaxBackoff" -> "1000", - "enrichStreamsOutMinBackoff" -> "1000", - "appName" -> "jim" - ) - - def config: String = - Try { - val configRes = getClass.getResourceAsStream("/config.hocon.sample") - Source.fromInputStream(configRes).getLines.mkString("\n") - } match { - case Failure(t) => - println(s"Unable to get config.hocon.sample: $t"); throw new Exception(t) - case Success(s) => s - } - - def configInstance: String = { - val jinJava = new Jinjava() - jinJava.render(config, configValues.asJava) - } - - private def decode(s: String): Array[Byte] = Base64.decodeBase64(s) - - // Input - override val inputGood = List( - decode(PagePingWithContextSpec.raw), - decode(PageViewWithContextSpec.raw), - decode(StructEventSpec.raw), - decode(StructEventWithContextSpec.raw), - decode(TransactionItemSpec.raw), - decode(TransactionSpec.raw) - ) - // Expected output counts - override val (expectedGood, expectedBad, expectedPii) = (inputGood.size, 0, inputGood.size) - - // Timeout for the producer - override val producerTimeoutSec = 5 - - // Timeout for all the consumers (good, bad, and pii) (running in parallel) - // You may want to adjust this if you are doing lots of slow work in the app - // Ordinarily the consumers return in less than 1 sec - override val consumerExecutionTimeoutSec = 15 - - implicit def hint[T]: ProductHint[T] = - ProductHint[T](ConfigFieldMapping(CamelCase, CamelCase)) - implicit val _: FieldCoproductHint[TargetPlatformConfig] = - new FieldCoproductHint[TargetPlatformConfig]("enabled") - - "Pii" should { - "emit all events" in { - - val parsedConfig = ConfigFactory.parseString(configInstance).resolve() - val configObject = Try { - loadConfigOrThrow[StreamsConfig](parsedConfig.getConfig("enrich.streams")) - } - configObject aka "enrichment config loading" must not beAFailedTry - - getMainApplicationFuture( - configObject.get, - SpecHelpers.client, - SpecHelpers.adapterRegistry, - SpecHelpers.enrichmentRegistry, - None - ) - inputProduced(ktu.brokerAddress) aka "sending input" must beSuccessfulTry - - def spaceJoinResult(expected: List[StringOrRegex]) = - expected - .flatMap({ - case JustRegex(r) => Some(r.toString) - case JustString(s) if s.nonEmpty => Some(Pattern.quote(s)) - case _ => None - }) - .mkString("\\s*") - - val expectedMatcher: Matcher[(List[String], List[String], List[String])] = beLike { - case (good: List[String], bad: List[String], pii: List[String]) => - bad aka "bad result list" must have size expectedBad - pii aka "pii result list" must have size expectedPii - good aka "good result list" must have size expectedGood - good aka "good result list" must containMatch( - spaceJoinResult(PagePingWithContextSpec.expected) - ) - pii aka "pii result list" must containMatch(spaceJoinResult(PagePingWithContextSpec.pii)) - good aka "good result list" must containMatch( - spaceJoinResult(PageViewWithContextSpec.expected) - ) - pii aka "pii result list" must containMatch(spaceJoinResult(PageViewWithContextSpec.pii)) - good aka "good result list" must containMatch(spaceJoinResult(StructEventSpec.expected)) - pii aka "pii result list" must containMatch(spaceJoinResult(StructEventSpec.pii)) - good aka "good result list" must containMatch( - spaceJoinResult(StructEventWithContextSpec.expected) - ) - pii aka "pii result list" must containMatch( - spaceJoinResult(StructEventWithContextSpec.pii) - ) - good aka "good result list" must containMatch( - spaceJoinResult(TransactionItemSpec.expected) - ) - pii aka "pii result list" must containMatch(spaceJoinResult(TransactionItemSpec.pii)) - good aka "good result list" must containMatch(spaceJoinResult(TransactionSpec.expected)) - pii aka "pii result list" must containMatch(spaceJoinResult(TransactionSpec.pii)) - } - allResults(ktu.brokerAddress) must expectedMatcher.await( - retries = 0, - timeout = FiniteDuration(consumerExecutionTimeoutSec.toLong, TimeUnit.SECONDS) - ) - } - } -} diff --git a/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KafkaSink.scala b/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KafkaSink.scala index 889911594..b3175b060 100644 --- a/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KafkaSink.scala +++ b/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KafkaSink.scala @@ -22,8 +22,6 @@ package sinks import java.util.Properties -import scala.collection.JavaConverters._ - import cats.syntax.either._ import org.apache.kafka.clients.producer._ @@ -41,7 +39,7 @@ object KafkaSink { */ private def createProducer(kafkaConfig: Kafka, bufferConfig: BufferConfig): KafkaProducer[String, String] = { val properties = createProperties(kafkaConfig, bufferConfig) - properties.putAll(kafkaConfig.producerConf.getOrElse(Map()).asJava) + kafkaConfig.producerConf.getOrElse(Map()).foreach { case (k, v) => properties.setProperty(k, v) } new KafkaProducer[String, String](properties) } diff --git a/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KafkaSource.scala b/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KafkaSource.scala index 1f6fe48d7..214754dfe 100644 --- a/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KafkaSource.scala +++ b/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KafkaSource.scala @@ -133,7 +133,7 @@ class KafkaSource private ( private def createConsumer(brokers: String, groupId: String): KafkaConsumer[String, Array[Byte]] = { val properties = createProperties(brokers, groupId) - properties.putAll(kafkaConfig.consumerConf.getOrElse(Map()).asJava) + kafkaConfig.consumerConf.getOrElse(Map()).foreach { case (k, v) => properties.setProperty(k, v) } new KafkaConsumer[String, Array[Byte]](properties) } diff --git a/modules/stream/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb b/modules/stream/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb deleted file mode 100644 index ab8b82d2d..000000000 Binary files a/modules/stream/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb and /dev/null differ diff --git a/project/BuildSettings.scala b/project/BuildSettings.scala index 21f92c32b..78a3ee7b9 100644 --- a/project/BuildSettings.scala +++ b/project/BuildSettings.scala @@ -36,9 +36,10 @@ object BuildSettings { lazy val basicSettings = Seq( organization := "com.snowplowanalytics", scalaVersion := "2.12.11", - version := "1.3.2", - javacOptions := Seq("-source", "1.8", "-target", "1.8"), - resolvers ++= Dependencies.resolutionRepos + version := "1.4.0", + javacOptions := Seq("-source", "11", "-target", "11"), + resolvers ++= Dependencies.resolutionRepos, + licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), ) /** Custom sbt-buildinfo replacement, used by SCE only */ @@ -62,7 +63,6 @@ object BuildSettings { publishMavenStyle := true, publishArtifact := true, publishArtifact in Test := false, - licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), bintrayOrganization := Some("snowplow"), bintrayRepository := "snowplow-maven", pomIncludeRepository := { _ => false }, @@ -82,7 +82,7 @@ object BuildSettings { lazy val formatting = Seq( scalafmtConfig := file(".scalafmt.conf"), - scalafmtOnCompile := true + scalafmtOnCompile := false ) lazy val scoverageSettings = Seq( @@ -102,6 +102,9 @@ object BuildSettings { lazy val sbtAssemblySettings = Seq( assemblyJarName in assembly := { s"${moduleName.value}-${version.value}.jar" }, assemblyMergeStrategy in assembly := { + case x if x.endsWith("native-image.properties") => MergeStrategy.first + case x if x.endsWith("io.netty.versions.properties") => MergeStrategy.first + case x if x.endsWith("public-suffix-list.txt") => MergeStrategy.first case x if x.endsWith("ProjectSettings$.class") => MergeStrategy.first case x if x.endsWith("module-info.class") => MergeStrategy.first case x => @@ -120,7 +123,7 @@ object BuildSettings { /** Docker settings, used by SE */ lazy val dockerSettings = Seq( maintainer in Docker := "Snowplow Analytics Ltd. ", - dockerBaseImage := "snowplow-docker-registry.bintray.io/snowplow/base-debian:0.1.0", + dockerBaseImage := "snowplow-docker-registry.bintray.io/snowplow/base-debian:0.2.1", daemonUser in Docker := "snowplow", dockerUpdateLatest := true, dockerVersion := Some(DockerVersion(18, 9, 0, Some("ce"))), diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 3ce07ddc5..782ca655b 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -34,25 +34,27 @@ object Dependencies { val jodaTime = "2.10.1" val useragent = "1.21" val uaParser = "1.4.3" - val postgresDriver = "42.2.5" + val postgresDriver = "42.2.16" val mysqlConnector = "8.0.16" val jaywayJsonpath = "2.4.0" val iabClient = "0.2.0" - val yauaa = "5.8" + val yauaa = "5.19" val guava = "28.1-jre" val slf4j = "1.7.26" + val log4j = "2.13.3" - val refererParser = "1.0.0" - val maxmindIplookups = "0.6.1" - val circe = "0.11.1" - val circeOptics = "0.11.0" - val circeJackson = "0.11.1" - val scalaForex = "0.7.0" - val scalaWeather = "0.5.0" + val refererParser = "1.1.0" + val maxmindIplookups = "0.7.1" + val circe = "0.13.0" + val circeOptics = "0.13.0" + val circeConfig = "0.7.0" + val circeJackson = "0.13.0" + val scalaForex = "1.0.0" + val scalaWeather = "1.0.0" val gatlingJsonpath = "0.6.14" val scalaUri = "1.4.5" - val scalaLruMap = "0.3.1" - val badRows = "1.0.0" + val badRows = "2.1.0" + val igluClient = "1.0.2" val snowplowRawEvent = "0.1.0" val collectorPayload = "0.0.0" @@ -63,20 +65,32 @@ object Dependencies { val kinesisClient = "1.13.3" val kafka = "2.2.1" val nsqClient = "1.2.0" - val jackson = "2.9.9" + val jackson = "2.10.5" val config = "1.3.4" + val decline = "1.0.0" + val fs2 = "2.4.4" + val catsEffect = "2.2.0" + val fs2PubSub = "0.16.1" + val fs2BlobStorage = "0.7.3" + val http4s = "0.21.7" + val log4cats = "1.1.1" + val catsRetry = "1.1.1" + val metrics = "4.1.12.1" + val scopt = "3.7.1" val pureconfig = "0.11.0" + val pureconfig013 = "0.13.0" val snowplowTracker = "0.6.1" val specs2 = "4.5.1" + val specs2CE = "0.4.1" val scalacheck = "1.14.0" val jinJava = "2.5.0" val sentry = "1.7.30" - val scio = "0.9.2" - val beam = "2.22.0" + val scio = "0.9.3" + val beam = "2.23.0" val macros = "2.1.1" val scalaTest = "3.0.8" } @@ -92,22 +106,27 @@ object Dependencies { val jaywayJsonpath = "com.jayway.jsonpath" % "json-path" % V.jaywayJsonpath val yauaa = "nl.basjes.parse.useragent" % "yauaa" % V.yauaa val guava = "com.google.guava" % "guava" % V.guava + val log4j = "org.apache.logging.log4j" % "log4j-core" % V.log4j + val log4jApi = "org.apache.logging.log4j" % "log4j-api" % V.log4j val circeCore = "io.circe" %% "circe-core" % V.circe val circeGeneric = "io.circe" %% "circe-generic" % V.circe + val circeExtras = "io.circe" %% "circe-generic-extras" % V.circe val circeParser = "io.circe" %% "circe-parser" % V.circe val circeLiteral = "io.circe" %% "circe-literal" % V.circe val circeJava8 = "io.circe" %% "circe-java8" % V.circe + val circeJawn = "io.circe" %% "circe-jawn" % V.circe + val circeConfig = "io.circe" %% "circe-config" % V.circeConfig val circeOptics = "io.circe" %% "circe-optics" % V.circeOptics - val circeJackson = "io.circe" %% "circe-jackson29" % V.circeJackson + val circeJackson = "io.circe" %% "circe-jackson210" % V.circeJackson val scalaUri = "io.lemonlabs" %% "scala-uri" % V.scalaUri val gatlingJsonpath = "io.gatling" %% "jsonpath" % V.gatlingJsonpath val scalaForex = "com.snowplowanalytics" %% "scala-forex" % V.scalaForex val refererParser = "com.snowplowanalytics" %% "scala-referer-parser" % V.refererParser val maxmindIplookups = "com.snowplowanalytics" %% "scala-maxmind-iplookups" % V.maxmindIplookups val scalaWeather = "com.snowplowanalytics" %% "scala-weather" % V.scalaWeather - val scalaLruMap = "com.snowplowanalytics" %% "scala-lru-map" % V.scalaLruMap val badRows = "com.snowplowanalytics" %% "snowplow-badrows" % V.badRows + val igluClient = "com.snowplowanalytics" %% "iglu-scala-client" % V.igluClient val snowplowRawEvent = "com.snowplowanalytics" % "snowplow-thrift-raw-event" % V.snowplowRawEvent val collectorPayload = "com.snowplowanalytics" % "collector-payload-1" % V.collectorPayload val schemaSniffer = "com.snowplowanalytics" % "schema-sniffer-1" % V.schemaSniffer @@ -117,6 +136,7 @@ object Dependencies { val specs2Cats = "org.specs2" %% "specs2-cats" % V.specs2 % Test val specs2Scalacheck = "org.specs2" %% "specs2-scalacheck" % V.specs2 % Test val specs2Mock = "org.specs2" %% "specs2-mock" % V.specs2 % Test + val specs2CE = "com.codecommit" %% "cats-effect-testing-specs2" % V.specs2CE % Test // Beam val sentry = "io.sentry" % "sentry" % V.sentry @@ -139,9 +159,26 @@ object Dependencies { val scopt = "com.github.scopt" %% "scopt" % V.scopt val pureconfig = "com.github.pureconfig" %% "pureconfig" % V.pureconfig val nsqClient = "com.snowplowanalytics" % "nsq-java-client" % V.nsqClient + val catsEffect = "org.typelevel" %% "cats-effect" % V.catsEffect val snowplowTracker = "com.snowplowanalytics" %% "snowplow-scala-tracker-emitter-id" % V.snowplowTracker val scalacheck = "org.scalacheck" %% "scalacheck" % V.scalacheck % Test val kafka = "org.apache.kafka" %% "kafka" % V.kafka % Test val jinJava = "com.hubspot.jinjava" % "jinjava" % V.jinJava % Test + + // FS2 + val decline = "com.monovore" %% "decline" % V.decline + val fs2PubSub = "com.permutive" %% "fs2-google-pubsub-grpc" % V.fs2PubSub + val fs2 = "co.fs2" %% "fs2-core" % V.fs2 + val fs2Io = "co.fs2" %% "fs2-io" % V.fs2 + val http4sClient = "org.http4s" %% "http4s-blaze-client" % V.http4s + val log4cats = "io.chrisdavenport" %% "log4cats-slf4j" % V.log4cats + val catsRetry = "com.github.cb372" %% "cats-retry" % V.catsRetry + val fs2BlobS3 = "com.github.fs2-blobstore" %% "s3" % V.fs2BlobStorage + val fs2BlobGcs = "com.github.fs2-blobstore" %% "gcs" % V.fs2BlobStorage + val pureconfigCats = "com.github.pureconfig" %% "pureconfig-cats-effect" % V.pureconfig + val pureconfigCirce = "com.github.pureconfig" %% "pureconfig-circe" % V.pureconfig + val metrics = "io.dropwizard.metrics" % "metrics-core" % V.metrics + val http4sDsl = "org.http4s" %% "http4s-dsl" % V.http4s % Test + val http4sServer = "org.http4s" %% "http4s-blaze-server" % V.http4s % Test } } diff --git a/project/plugins.sbt b/project/plugins.sbt index 21136454b..180ed82ca 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -7,3 +7,4 @@ addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.4") addSbtPlugin("org.scoverage" % "sbt-coveralls" % "1.2.7") addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.6.1") addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.10.0-RC1") +addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.0") \ No newline at end of file