From 501c9bd6f35d36cddba038430c457603d5df2de3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Indykiewicz?= Date: Wed, 12 Aug 2020 22:08:14 +0200 Subject: [PATCH 01/38] Common: add tests for Input for SQL enrichment (close #316) --- .../registry/sqlquery/InputSpec.scala | 71 +++++++++++++++++-- 1 file changed, 66 insertions(+), 5 deletions(-) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/InputSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/InputSpec.scala index fb299c5da..ccfa2f559 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/InputSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/InputSpec.scala @@ -10,21 +10,21 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry.sqlquery +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery import scala.collection.immutable.IntMap +import io.circe.DecodingFailure import io.circe.literal._ import io.circe.parser._ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + import org.specs2.Specification import org.specs2.matcher.ValidatedMatchers -import outputs.EnrichedEvent - class InputSpec extends Specification with ValidatedMatchers { def is = s2""" create template context from POJO inputs $e1 @@ -39,6 +39,11 @@ class InputSpec extends Specification with ValidatedMatchers { check all EnrichedEvent properties can be handled $e10 extract correct path-dependent values from EnrichedEvent $e11 getBySchemaCriterion should return a data payload $e12 + inputsToIntmap assigns inputs to proper positions $e13 + JSON decoding when missing placeholders $e14 + JSON decoding when placeholder number is lt 1 $e15 + JSON decoding when both pojo and json is provided $e16 + JSON decoding when neither pojo nor json is provided $e17 """ object ContextCase { @@ -302,13 +307,17 @@ class InputSpec extends Specification with ValidatedMatchers { val jsonBool = Input.extractFromJson(json"true") val jsonBigInt = Input.extractFromJson(parse((java.lang.Long.MAX_VALUE - 1).toString).toOption.get) + val jsonDouble = Input.extractFromJson(json"12.6") + val jsonArray = Input.extractFromJson(json"[4,8,16]") val o = jsonObject must beNone val n = jsonNull must beNone val b = jsonBool must beSome(Input.BooleanPlaceholder.Value(true)) val l = jsonBigInt must beSome(Input.LongPlaceholder.Value(java.lang.Long.MAX_VALUE - 1)) + val d = jsonDouble must beSome(Input.DoublePlaceholder.Value(12.6)) + val a = jsonArray must beNone - o.and(n).and(b).and(l) + o.and(n).and(b).and(l).and(d).and(a) } def e11 = { @@ -344,4 +353,56 @@ class InputSpec extends Specification with ValidatedMatchers { result must beSome(ContextCase.overriderContext.data) } + + def e13 = { + val result = Input.inputsToIntmap(List(ContextCase.ccInput, ContextCase.derInput)) + result ==== IntMap(1 -> ContextCase.ccInput, 2 -> ContextCase.derInput) + } + + def e14 = { + val in = + json"""{ + "placeholder_wrong": 1, + "pojo": { + "field": "user_id" + } + }""" + val result = in.as[Input] + result must beLeft(DecodingFailure("Placeholder is missing", Nil)) + } + + def e15 = { + val in = + json"""{ + "placeholder": 0, + "pojo": { + "field": "user_id" + } + }""" + val result = in.as[Input] + result must beLeft(DecodingFailure("Placeholder must be greater than 1", Nil)) + } + + def e16 = { + val in = + json"""{ + "placeholder": 1, + "pojo": { + "field": "user_id" + }, + "json": { + "field": "derived_contexts", + "schemaCriterion": "iglu:org.openweathermap/weather/jsonschema/*-*-*", + "jsonPath": "$$.dt" + } + }""" + val result = in.as[Input] + result must beLeft(DecodingFailure("Either json or pojo input must be specified, both provided", Nil)) + } + + def e17 = { + val in = json"""{"placeholder": 1}""" + val result = in.as[Input] + result must beLeft(DecodingFailure("Either json or pojo input must be specified", Nil)) + } } From 024e2c8ca6f998ac4eff63846f89da8f49ca7f8e Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Tue, 28 Jul 2020 21:07:20 +0300 Subject: [PATCH 02/38] Common: get rid of Eval instances (close #300) --- .../enrichments/registry/IabEnrichment.scala | 14 +--- .../registry/UaParserEnrichment.scala | 8 +- .../sqlquery/CreateSqlQueryEnrichment.scala | 24 +----- .../registry/sqlquery/DbExecutor.scala | 73 +------------------ .../common/utils/HttpClient.scala | 8 +- .../registry/IabEnrichmentSpec.scala | 25 +++---- .../RefererParserEnrichmentSpec.scala | 23 +++--- .../registry/UaParserEnrichmentSpec.scala | 16 ++-- .../registry/WeatherEnrichmentSpec.scala | 37 +++++----- .../utils/Clock.scala | 9 +-- 10 files changed, 59 insertions(+), 178 deletions(-) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala index f695777c4..0ef2ceffa 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala @@ -16,7 +16,7 @@ package enrichments.registry import java.io.File import java.net.{InetAddress, URI} -import cats.{Eval, Id, Monad} +import cats.{Id, Monad} import cats.data.{NonEmptyList, ValidatedNel} import cats.effect.Sync @@ -188,18 +188,6 @@ object CreateIabClient { } } - implicit def evalCreateIabClient: CreateIabClient[Eval] = - new CreateIabClient[Eval] { - def create( - ipFile: String, - excludeUaFile: String, - includeUaFile: String - ): Eval[IabClient] = - Eval.later { - new IabClient(new File(ipFile), new File(excludeUaFile), new File(includeUaFile)) - } - } - implicit def idCreateIabClient: CreateIabClient[Id] = new CreateIabClient[Id] { def create( diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala index a61d0b063..cb9931a8b 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala @@ -15,7 +15,7 @@ package enrichments.registry import java.io.{FileInputStream, InputStream} import java.net.URI -import cats.{Eval, Id, Monad} +import cats.{Id, Monad} import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.effect.Sync import cats.implicits._ @@ -168,12 +168,6 @@ object CreateUaParser { Sync[F].delay(parser(uaFile)) } - implicit def evalCreateUaParser: CreateUaParser[Eval] = - new CreateUaParser[Eval] { - def create(uaFile: Option[String]): Eval[Either[String, Parser]] = - Eval.later(parser(uaFile)) - } - implicit def idCreateUaParser: CreateUaParser[Id] = new CreateUaParser[Id] { def create(uaFile: Option[String]): Id[Either[String, Parser]] = diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala index a35684aab..3ca02cf07 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala @@ -12,7 +12,7 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery -import cats.{Eval, Id} +import cats.Id import cats.effect.Sync import cats.syntax.functor._ import cats.syntax.flatMap._ @@ -49,28 +49,6 @@ object CreateSqlQueryEnrichment { ) } - implicit def evalCreateSqlQueryEnrichment( - implicit CLM: SqlCacheInit[Eval], - CN: ConnectionRefInit[Eval], - DB: DbExecutor[Eval] - ): CreateSqlQueryEnrichment[Eval] = - new CreateSqlQueryEnrichment[Eval] { - def create(conf: SqlQueryConf): Eval[SqlQueryEnrichment[Eval]] = - for { - cache <- CLM.create(conf.cache.size) - connection <- CN.create(1) - } yield SqlQueryEnrichment( - conf.schemaKey, - conf.inputs, - conf.db, - conf.query, - conf.output, - conf.cache.ttl, - cache, - connection - ) - } - implicit def idCreateSqlQueryEnrichment( implicit CLM: SqlCacheInit[Id], CN: ConnectionRefInit[Id], diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala index 8a909229b..cfd798489 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/DbExecutor.scala @@ -19,7 +19,7 @@ import scala.util.control.NonFatal import io.circe.Json -import cats.{Eval, Id, Monad} +import cats.{Id, Monad} import cats.data.EitherT import cats.effect.{Bracket, Sync} import cats.implicits._ @@ -142,77 +142,6 @@ object DbExecutor { } - implicit def evalDbExecutor: DbExecutor[Eval] = - new DbExecutor[Eval] { - self => - def getConnection(rdbms: Rdbms, connectionRef: ConnectionRef[Eval])(implicit M: Monad[Eval]): Eval[Either[Throwable, Connection]] = - for { - cachedConnection <- connectionRef.get(()).map(flattenCached) - connection <- cachedConnection match { - case Right(conn) => - for { - closed <- Eval.now(conn.isClosed) - result <- if (!closed) conn.asRight[Throwable].pure[Eval] - else - for { - newConn <- Eval.now { - Either.catchNonFatal(DriverManager.getConnection(rdbms.connectionString)) - } - _ <- connectionRef.put((), newConn) - } yield newConn - } yield result - case Left(error) => - Eval.now(error.asLeft[Connection]) - - } - } yield connection - - def execute(query: PreparedStatement): EitherT[Eval, Throwable, ResultSet] = - EitherT(Eval.now(Either.catchNonFatal(query.executeQuery()))) - - def convert(resultSet: ResultSet, names: JsonOutput.PropertyNameMode): EitherT[Eval, Throwable, List[Json]] = - EitherT { - Eval.always { - try { - val buffer = ListBuffer.empty[EitherT[Id, Throwable, Json]] - while (resultSet.next()) - buffer += transform[Id](resultSet, names)(idDbExecutor, Monad[Id]) - val parsedJsons = buffer.result().sequence - resultSet.close() - parsedJsons.value: Either[Throwable, List[Json]] - } catch { - case NonFatal(error) => error.asLeft - } - } - } - - def getMetaData(rs: ResultSet): EitherT[Eval, Throwable, ResultSetMetaData] = - Either.catchNonFatal(rs.getMetaData).toEitherT[Eval] - - def getColumnCount(rsMeta: ResultSetMetaData): EitherT[Eval, Throwable, Int] = - Either.catchNonFatal(rsMeta.getColumnCount).toEitherT[Eval] - - def getColumnLabel(column: Int, rsMeta: ResultSetMetaData): EitherT[Eval, Throwable, String] = - Either.catchNonFatal(rsMeta.getColumnLabel(column)).toEitherT[Eval] - - def getColumnType(column: Int, rsMeta: ResultSetMetaData): EitherT[Eval, Throwable, String] = - Either.catchNonFatal(rsMeta.getColumnClassName(column)).toEitherT[Eval] - - def getColumnValue( - datatype: String, - columnIdx: Int, - rs: ResultSet - ): EitherT[Eval, Throwable, Json] = - Either - .catchNonFatal(rs.getObject(columnIdx)) - .map(Option.apply) - .map { - case Some(any) => JsonOutput.getValue(any, datatype) - case None => Json.Null - } - .toEitherT - } - implicit def idDbExecutor: DbExecutor[Id] = new DbExecutor[Id] { def getConnection(rdbms: Rdbms, connectionRef: ConnectionRef[Id])(implicit M: Monad[Id]): Id[Either[Throwable, Connection]] = diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala index 9bce3385b..773dff448 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/HttpClient.scala @@ -14,7 +14,7 @@ package com.snowplowanalytics.snowplow.enrich.common.utils import scala.util.control.NonFatal -import cats.{Eval, Id} +import cats.Id import cats.effect.Sync import cats.syntax.either._ import scalaj.http._ @@ -32,12 +32,6 @@ object HttpClient { Sync[F].delay(getBody(request)) } - implicit def evalHttpClient: HttpClient[Eval] = - new HttpClient[Eval] { - override def getResponse(request: HttpRequest): Eval[Either[Throwable, String]] = - Eval.later(getBody(request)) - } - implicit def idHttpClient: HttpClient[Id] = new HttpClient[Id] { override def getResponse(request: HttpRequest): Id[Either[Throwable, String]] = getBody(request) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala index 861869b11..cc570ba33 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/IabEnrichmentSpec.scala @@ -14,7 +14,8 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.InetAddress -import cats.Eval +import cats.Id +import cats.syntax.functor._ import io.circe.literal._ @@ -79,15 +80,14 @@ class IabEnrichmentSpec extends Specification with DataTables { expectedReason, expectedPrimaryImpact ) => - (for { - e <- validConfig.enrichment[Eval] - res = e.performCheck(userAgent, ipAddress, DateTime.now()) - } yield res).value must beRight.like { - case check => - check.spiderOrRobot must_== expectedSpiderOrRobot and - (check.category must_== expectedCategory) and - (check.reason must_== expectedReason) and - (check.primaryImpact must_== expectedPrimaryImpact) + validConfig.enrichment[Id].map { e => + e.performCheck(userAgent, ipAddress, DateTime.now()) must beRight.like { + case check => + check.spiderOrRobot must_== expectedSpiderOrRobot and + (check.category must_== expectedCategory) and + (check.reason must_== expectedReason) and + (check.primaryImpact must_== expectedPrimaryImpact) + } } } @@ -98,9 +98,8 @@ class IabEnrichmentSpec extends Specification with DataTables { json"""{"spiderOrRobot": false, "category": "BROWSER", "reason": "PASSED_ALL", "primaryImpact": "NONE"}""" ) validConfig - .enrichment[Eval] - .map(_.getIabContext("Xdroid", "192.168.0.1".ip, DateTime.now())) - .value must + .enrichment[Id] + .map(_.getIabContext("Xdroid", "192.168.0.1".ip, DateTime.now())) must beRight(responseJson) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala index 4cfdb2b33..7cf9f653f 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/RefererParserEnrichmentSpec.scala @@ -14,12 +14,15 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import cats.Eval +import cats.Id import cats.data.EitherT import cats.syntax.either._ + +import io.circe.literal._ + import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} import com.snowplowanalytics.refererparser._ -import io.circe.literal._ + import org.specs2.Specification import org.specs2.matcher.DataTables @@ -57,7 +60,7 @@ class RefererParserEnrichmentSpec extends Specification with DataTables { Medium.Unknown ) |> { (_, refererUri, referer) => (for { - c <- EitherT.fromEither[Eval]( + c <- EitherT.fromEither[Id]( RefererParserEnrichment .parse( json"""{ @@ -81,16 +84,16 @@ class RefererParserEnrichmentSpec extends Specification with DataTables { .toEither .leftMap(_.head) ) - e <- c.enrichment[Eval] + e <- c.enrichment[Id] res = e.extractRefererDetails(new URI(refererUri), PageHost) - } yield res).value.value must beRight.like { - case o => o must_== Some(referer) + } yield res).value must beRight.like { + case o => o must beSome(referer) } } def e2 = (for { - c <- EitherT.fromEither[Eval]( + c <- EitherT.fromEither[Id]( RefererParserEnrichment .parse( json"""{ @@ -114,16 +117,16 @@ class RefererParserEnrichmentSpec extends Specification with DataTables { .toEither .leftMap(_.head) ) - e <- c.enrichment[Eval] + e <- c.enrichment[Id] res = e.extractRefererDetails( new URI( "http://www.google.com/search?q=%0Agateway%09oracle%09cards%09denise%09linn&hl=en&client=safari" ), PageHost ) - } yield res).value.value must beRight.like { + } yield res).value must beRight.like { case o => - o must_== Some( + o must beSome( SearchReferer( Medium.Search, "Google", diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala index 030fcf3d6..5fdf789d8 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala @@ -14,7 +14,7 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import cats.Eval +import cats.Id import cats.data.EitherT import io.circe.literal._ @@ -75,10 +75,10 @@ class UaParserEnrichmentSpec extends Specification with DataTables { "Custom Rules" | "Input UserAgent" | "Parsed UserAgent" | Some(badRulefile) !! mobileSafariUserAgent !! "Failed to initialize ua parser" |> { (rules, input, errorPrefix) => (for { - c <- EitherT.rightT[Eval, String](UaParserConf(schemaKey, rules)) - e <- c.enrichment[Eval] + c <- EitherT.rightT[Id, String](UaParserConf(schemaKey, rules)) + e <- c.enrichment[Id] res = e.extractUserAgent(input) - } yield res).value.value must beLeft.like { + } yield res).value must beLeft.like { case a => a must startWith(errorPrefix) } } @@ -90,11 +90,11 @@ class UaParserEnrichmentSpec extends Specification with DataTables { None !! safariUserAgent !! safariJson | Some(customRules) !! mobileSafariUserAgent !! testAgentJson |> { (rules, input, expected) => val json = for { - c <- EitherT.rightT[Eval, String](UaParserConf(schemaKey, rules)) - e <- c.enrichment[Eval].leftMap(_.toString) - res <- EitherT.fromEither[Eval](e.extractUserAgent(input)).leftMap(_.toString) + c <- EitherT.rightT[Id, String](UaParserConf(schemaKey, rules)) + e <- c.enrichment[Id].leftMap(_.toString) + res <- EitherT.fromEither[Id](e.extractUserAgent(input)).leftMap(_.toString) } yield res - json.value.value must beRight(expected) + json.value must beRight(expected) } } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala index 4155b5c8f..149fc087b 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala @@ -14,12 +14,16 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.lang.{Float => JFloat} -import cats.Eval +import cats.Id import cats.data.EitherT -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + import io.circe.generic.auto._ import io.circe.literal._ + import org.joda.time.DateTime + +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + import org.specs2.Specification object WeatherEnrichmentSpec { @@ -43,11 +47,10 @@ class WeatherEnrichmentSpec extends Specification { val schemaKey = SchemaKey("vendor", "name", "format", SchemaVer.Full(1, 0, 0)) lazy val validAppKey = sys.env - .get(OwmApiKey) - .getOrElse( - throw new IllegalStateException( - s"No $OwmApiKey environment variable found, test should have been skipped" - ) + .getOrElse(OwmApiKey, + throw new IllegalStateException( + s"No $OwmApiKey environment variable found, test should have been skipped" + ) ) object invalidEvent { @@ -65,7 +68,7 @@ class WeatherEnrichmentSpec extends Specification { def e1 = { val res = for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", "KEY", 10, 5200, 1) - .enrichment[Eval] + .enrichment[Id] stamp <- EitherT( enr.getWeatherContext( Option(invalidEvent.lat), @@ -74,7 +77,7 @@ class WeatherEnrichmentSpec extends Specification { ) ).leftMap(_.head.toString) } yield stamp - res.value.value must beLeft.like { + res.value must beLeft.like { case e => e must contain("InputData(derived_tstamp,None,missing)") } @@ -83,7 +86,7 @@ class WeatherEnrichmentSpec extends Specification { def e2 = { val res = for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", validAppKey, 10, 5200, 1) - .enrichment[Eval] + .enrichment[Id] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -92,13 +95,13 @@ class WeatherEnrichmentSpec extends Specification { ) ).leftMap(_.head.toString) } yield stamp - res.value.value must beRight + res.value must beRight } def e3 = { val res = for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", "KEY", 10, 5200, 1) - .enrichment[Eval] + .enrichment[Id] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -107,13 +110,13 @@ class WeatherEnrichmentSpec extends Specification { ) ).leftMap(_.head.toString) } yield stamp - res.value.value must beLeft.like { case e => e must contain("Check your API key") } + res.value must beLeft.like { case e => e must contain("Check your API key") } } def e4 = { val res = for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", validAppKey, 15, 5200, 1) - .enrichment[Eval] + .enrichment[Id] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -122,7 +125,7 @@ class WeatherEnrichmentSpec extends Specification { ) ).leftMap(_.head.toString) } yield stamp - res.value.value must beRight.like { + res.value must beRight.like { case weather => val temp = weather.data.hcursor.downField("main").get[Double]("humidity") temp must beRight(69.0d) @@ -164,7 +167,7 @@ class WeatherEnrichmentSpec extends Specification { def e6 = { val res = for { enr <- WeatherConf(schemaKey, "history.openweathermap.org", validAppKey, 15, 2, 1) - .enrichment[Eval] + .enrichment[Id] stamp <- EitherT( enr.getWeatherContext( Option(validEvent.lat), @@ -173,7 +176,7 @@ class WeatherEnrichmentSpec extends Specification { ) ).leftMap(_.head.toString) } yield stamp - res.value.value must beRight.like { // successful request + res.value must beRight.like { // successful request case weather => weather.data.hcursor.as[TransformedWeather] must beRight.like { case w => diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala index 4cdbc9cf0..c5c0574a4 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/Clock.scala @@ -15,17 +15,10 @@ package utils import java.util.concurrent.TimeUnit -import cats.{Eval, Id} +import cats.Id import cats.effect.{Clock => CEClock} object Clock { - implicit val evalClock: CEClock[Eval] = new CEClock[Eval] { - final def realTime(unit: TimeUnit): Eval[Long] = - Eval.later(unit.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS)) - final def monotonic(unit: TimeUnit): Eval[Long] = - Eval.later(unit.convert(System.nanoTime(), TimeUnit.NANOSECONDS)) - } - implicit val idClock: CEClock[Id] = new CEClock[Id] { final def realTime(unit: TimeUnit): Id[Long] = unit.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS) From bfc443fb0a7cc1c9e319f9e234f6be73da97e9ec Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Tue, 28 Jul 2020 22:03:54 +0300 Subject: [PATCH 03/38] Common: move EnrichmentConf into its own module (close #303) --- .../utils.scala | 16 +- .../SingletonSpec.scala | 10 +- .../SpecHelpers.scala | 2 +- .../ApiRequestEnrichmentSpec.scala | 13 +- .../enrichments/SqlQueryEnrichmentSpec.scala | 13 +- .../common/adapters/AdapterRegistry.scala | 27 +-- .../registry/snowplow/RedirectAdapter.scala | 16 +- .../registry/snowplow/Tp1Adapter.scala | 19 +- .../registry/snowplow/Tp2Adapter.scala | 17 +- .../enrichments/EnrichmentRegistry.scala | 21 +- .../registry/AnonIpEnrichment.scala | 22 +- .../CampaignAttributionEnrichment.scala | 17 +- .../registry/CookieExtractorEnrichment.scala | 9 +- .../CurrencyConversionEnrichment.scala | 21 +- .../enrichments/registry/EnrichmentConf.scala | 192 ++++++++++++++++++ .../registry/EventFingerprintEnrichment.scala | 11 +- .../HttpHeaderExtractorEnrichment.scala | 10 +- .../enrichments/registry/IabEnrichment.scala | 23 +-- .../registry/IpLookupsEnrichment.scala | 17 +- .../registry/JavascriptScriptEnrichment.scala | 18 +- .../registry/RefererParserEnrichment.scala | 12 +- .../registry/UaParserEnrichment.scala | 19 +- .../registry/UserAgentUtilsEnrichment.scala | 16 +- .../registry/WeatherEnrichment.scala | 19 +- .../registry/YauaaEnrichment.scala | 10 +- .../apirequest/ApiRequestEnrichment.scala | 19 +- .../enrichments/registry/enrichments.scala | 152 +------------- .../pii/PiiPseudonymizerEnrichment.scala | 26 +-- .../sqlquery/CreateSqlQueryEnrichment.scala | 3 +- .../sqlquery/SqlQueryEnrichment.scala | 17 +- .../CurrencyConversionEnrichmentSpec.scala | 18 +- .../registry/EnrichmentConfigsSpec.scala | 9 +- .../registry/UaParserEnrichmentSpec.scala | 2 + .../registry/WeatherEnrichmentSpec.scala | 2 + .../registry/YauaaEnrichmentSpec.scala | 10 +- .../apirequest/ApiRequestEnrichmentSpec.scala | 17 +- .../registry/apirequest/HttpApiSpec.scala | 12 +- .../registry/apirequest/InputSpec.scala | 12 +- .../sqlquery/SqlQueryEnrichmentSpec.scala | 4 +- 39 files changed, 488 insertions(+), 385 deletions(-) create mode 100644 modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala diff --git a/modules/beam/src/main/scala/com.snowplowanalytics.snowplow.enrich.beam/utils.scala b/modules/beam/src/main/scala/com.snowplowanalytics.snowplow.enrich.beam/utils.scala index 6b07de9e9..1c4ff1d02 100644 --- a/modules/beam/src/main/scala/com.snowplowanalytics.snowplow.enrich.beam/utils.scala +++ b/modules/beam/src/main/scala/com.snowplowanalytics.snowplow.enrich.beam/utils.scala @@ -25,14 +25,18 @@ import scala.util.Try import cats.Id import cats.effect.Clock + import io.circe.Json import io.circe.syntax._ -import com.snowplowanalytics.snowplow.badrows._ -import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + import org.joda.time.{DateTime, DateTimeZone} import org.joda.time.format.DateTimeFormat + +import com.snowplowanalytics.snowplow.badrows._ + +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.PiiPseudonymizerConf +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.PiiPseudonymizerConf object utils { @@ -88,10 +92,8 @@ object utils { /** Determine if we have to emit pii transformation events. */ def emitPii(confs: List[EnrichmentConf]): Boolean = confs - .collect { case c: PiiPseudonymizerConf => c } - .headOption - .map(_.emitIdentificationEvent) - .getOrElse(false) + .collectFirst { case c: PiiPseudonymizerConf => c } + .exists(_.emitIdentificationEvent) // We want to take one-tenth of the payload characters (not taking into account multi-bytes char) private val ReductionFactor = 10 diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala index 4f72161ce..2a250baa3 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala @@ -14,14 +14,16 @@ */ package com.snowplowanalytics.snowplow.enrich.beam -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry._ import io.circe.literal._ -import org.scalatest._ -import matchers.should.Matchers._ -import singleton._ +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry._ +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.AnonIpConf + +import org.scalatest.matchers.should.Matchers._ import org.scalatest.freespec.AnyFreeSpec +import com.snowplowanalytics.snowplow.enrich.beam.singleton._ + class SingletonSpec extends AnyFreeSpec { "the singleton object should" - { "make a ClientSingleton.get function available" - { diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala index 5e8f249cc..a0a88a2ef 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala @@ -114,7 +114,7 @@ object SpecHelpers { contentType: Option[String] = None, headers: List[String] = Nil, ipAddress: String = "", - networkUserId: String = java.util.UUID.randomUUID().toString(), + networkUserId: String = java.util.UUID.randomUUID().toString, path: String = "", querystring: Option[String] = None, refererUri: Option[String] = None, diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/ApiRequestEnrichmentSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/ApiRequestEnrichmentSpec.scala index 9b1efb54f..39cd3eb4e 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/ApiRequestEnrichmentSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/ApiRequestEnrichmentSpec.scala @@ -12,15 +12,18 @@ * See the Apache License Version 2.0 for the specific language governing permissions and * limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.beam -package enrichments +package com.snowplowanalytics.snowplow.enrich.beam.enrichments import java.nio.file.Paths import cats.syntax.option._ + +import io.circe.literal._ + +import com.snowplowanalytics.snowplow.enrich.beam.{CI, Enrich, SpecHelpers} + import com.spotify.scio.io.PubsubIO import com.spotify.scio.testing._ -import io.circe.literal._ object ApiRequestEnrichmentSpec { val contexts = @@ -51,8 +54,8 @@ class ApiRequestEnrichmentSpec extends PipelineSpec { "--raw=in", "--enriched=out", "--bad=bad", - "--resolver=" + Paths.get(getClass.getResource("/iglu_resolver.json").toURI()), - "--enrichments=" + Paths.get(getClass.getResource("/api_request").toURI()) + "--resolver=" + Paths.get(getClass.getResource("/iglu_resolver.json").toURI), + "--enrichments=" + Paths.get(getClass.getResource("/api_request").toURI) ) .input(PubsubIO.readCoder[Array[Byte]]("in"), raw) .distCache(DistCacheIO(""), List.empty[Either[String, String]]) diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/SqlQueryEnrichmentSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/SqlQueryEnrichmentSpec.scala index 6481ea00a..41d8d8adc 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/SqlQueryEnrichmentSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/SqlQueryEnrichmentSpec.scala @@ -12,15 +12,18 @@ * See the Apache License Version 2.0 for the specific language governing permissions and * limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.beam -package enrichments +package com.snowplowanalytics.snowplow.enrich.beam.enrichments import java.nio.file.Paths +import io.circe.literal._ + import cats.syntax.option._ + import com.spotify.scio.io.PubsubIO import com.spotify.scio.testing._ -import io.circe.literal._ + +import com.snowplowanalytics.snowplow.enrich.beam.{CI, Enrich, SpecHelpers} object SqlQueryEnrichmentSpec { val contexts = @@ -48,8 +51,8 @@ class SqlQueryEnrichmentSpec extends PipelineSpec { "--raw=in", "--enriched=out", "--bad=bad", - "--resolver=" + Paths.get(getClass.getResource("/iglu_resolver.json").toURI()), - "--enrichments=" + Paths.get(getClass.getResource("/sql_query").toURI()) + "--resolver=" + Paths.get(getClass.getResource("/iglu_resolver.json").toURI), + "--enrichments=" + Paths.get(getClass.getResource("/sql_query").toURI) ) .input(PubsubIO.readCoder[Array[Byte]]("in"), raw) .distCache(DistCacheIO(""), List.empty[Either[String, String]]) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala index 18243ba69..6133d4095 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala @@ -10,25 +10,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters +package com.snowplowanalytics.snowplow.enrich.common.adapters import java.time.Instant import cats.Monad import cats.data.{NonEmptyList, Validated} + import cats.effect.Clock -import cats.syntax.functor._ -import cats.syntax.validated._ +import cats.implicits._ + +import io.circe.Json + import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup import com.snowplowanalytics.iglu.client.Client + import com.snowplowanalytics.snowplow.badrows._ -import io.circe.Json -import loaders.CollectorPayload -import registry._ -import registry.snowplow._ -import utils.HttpClient +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry._ +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.snowplow.{RedirectAdapter, Tp1Adapter, Tp2Adapter} +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.HttpClient /** * The AdapterRegistry lets us convert a CollectorPayload into one or more RawEvents, using a given @@ -95,14 +97,15 @@ class AdapterRegistry(remoteAdapters: Map[(String, String), RemoteAdapter] = Map processor: Processor ): F[Validated[BadRow, NonEmptyList[RawEvent]]] = (adapters.get((payload.api.vendor, payload.api.version)) match { - case Some(adapter) => adapter.toRawEvents(payload, client) + case Some(adapter) => + adapter.toRawEvents(payload, client) case _ => - val f = FailureDetails.AdapterFailure.InputData( + val f: FailureDetails.AdapterFailureOrTrackerProtocolViolation = FailureDetails.AdapterFailure.InputData( "vendor/version", Some(s"${payload.api.vendor}/${payload.api.version}"), "vendor/version combination is not supported" ) - Monad[F].pure(f.invalidNel) + Monad[F].pure(f.invalidNel[NonEmptyList[RawEvent]]) }).map(_.leftMap(enrichFailure(_, payload, payload.api.vendor, payload.api.version, processor))) private def enrichFailure( diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala index 83b0c3728..83af6f59c 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala @@ -10,10 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry -package snowplow +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry.snowplow import cats.Monad import cats.data.{NonEmptyList, ValidatedNel} @@ -26,15 +23,18 @@ import cats.effect.Clock import io.circe._ import io.circe.syntax._ -import com.snowplowanalytics.iglu.client.Client -import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.CirceIgluCodecs._ +import com.snowplowanalytics.iglu.client.Client +import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup + import com.snowplowanalytics.snowplow.badrows.FailureDetails -import loaders.CollectorPayload -import utils.{HttpClient, ConversionUtils => CU, JsonUtils => JU} +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.Adapter +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, JsonUtils => JU, ConversionUtils => CU} /** * The Redirect Adapter is essentially a pre-processor for diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp1Adapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp1Adapter.scala index 6b1bdcd6c..0ee83458c 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp1Adapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp1Adapter.scala @@ -10,22 +10,25 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry -package snowplow +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry.snowplow import cats.Monad import cats.data.{NonEmptyList, ValidatedNel} + import cats.effect.Clock import cats.syntax.validated._ + +import io.circe.Json + import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup -import com.snowplowanalytics.snowplow.badrows._ -import io.circe.Json -import loaders.CollectorPayload -import utils.HttpClient +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.Adapter +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.HttpClient /** Version 1 of the Tracker Protocol is GET only. All data comes in on the querystring. */ object Tp1Adapter extends Adapter { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala index 14fe4dad4..10051a596 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala @@ -10,17 +10,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry -package snowplow +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry.snowplow import cats.Monad import cats.data.{EitherT, NonEmptyList, Validated, ValidatedNel} import cats.data.Validated._ import cats.implicits._ + import cats.effect.Clock +import io.circe.Json + import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup import com.snowplowanalytics.iglu.core.{SchemaCriterion, SelfDescribingData} @@ -28,10 +28,11 @@ import com.snowplowanalytics.iglu.core.circe.instances._ import com.snowplowanalytics.snowplow.badrows.FailureDetails -import io.circe.Json - -import loaders.CollectorPayload -import utils.{HttpClient, JsonUtils => JU} +import com.snowplowanalytics.snowplow.enrich.common.RawEventParameters +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.Adapter +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, JsonUtils => JU} /** * Version 2 of the Tracker Protocol supports GET and POST. Note that with POST, data can still be diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala index 699d21691..015e3f5c8 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala @@ -10,32 +10,35 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments +package com.snowplowanalytics.snowplow.enrich.common.enrichments import cats.Monad import cats.data.{EitherT, NonEmptyList, ValidatedNel} + import cats.effect.Clock import cats.implicits._ import io.circe._ import io.circe.syntax._ -import com.snowplowanalytics.iglu.client.Client -import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.instances._ +import com.snowplowanalytics.iglu.client.Client +import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup + import com.snowplowanalytics.forex.CreateForex import com.snowplowanalytics.maxmind.iplookups.CreateIpLookups import com.snowplowanalytics.refererparser.CreateParser import com.snowplowanalytics.weather.providers.openweather.CreateOWM -import registry._ -import registry.apirequest.ApiRequestEnrichment -import registry.pii.PiiPseudonymizerEnrichment -import registry.sqlquery.SqlQueryEnrichment -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf._ + +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry._ +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest.ApiRequestEnrichment +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii.PiiPseudonymizerEnrichment +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery.SqlQueryEnrichment /** Companion which holds a constructor for the EnrichmentRegistry. */ object EnrichmentRegistry { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala index 49c1087ff..5dfe3dc95 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala @@ -10,20 +10,22 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry -import cats.data.ValidatedNel -import cats.data.Validated +import java.net.{Inet4Address, Inet6Address} + +import scala.util.Try + +import cats.data.{Validated, ValidatedNel} import cats.syntax.either._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} -import io.circe._ -import utils.CirceUtils +import io.circe.Json -import java.net.{Inet4Address, Inet6Address} import com.google.common.net.{InetAddresses => GuavaInetAddress} -import scala.util.Try +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.AnonIpConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create a AnonIpConf from a Json. */ object AnonIpEnrichment extends ParseableEnrichment { @@ -32,7 +34,7 @@ object AnonIpEnrichment extends ParseableEnrichment { /** * Creates an AnonIpEnrichment instance from a Json. - * @param c The anon_ip enrichment JSON + * @param config The anon_ip enrichment JSON * @param schemaKey provided for the enrichment, must be supported by this enrichment * @return an AnonIpEnrichment configuration */ diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala index 0c1653633..f4f1cd874 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala @@ -10,16 +10,19 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.data.{NonEmptyList, ValidatedNel} import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + import io.circe._ -import utils.MapTransformer.SourceMap -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.enrich.common.QueryStringParameters +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.CampaignAttributionConf +import com.snowplowanalytics.snowplow.enrich.common.utils.MapTransformer.SourceMap +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create a CampaignAttributionEnrichment from a Json */ object CampaignAttributionEnrichment extends ParseableEnrichment { @@ -97,7 +100,7 @@ final case class MarketingCampaign( * @param termParameters List of marketing term parameters * @param contentParameters List of marketing content parameters * @param campaignParameters List of marketing campaign parameters - * @param mktClick Map of click ID parameters to networks + * @param clickIdParameters Map of click ID parameters to networks */ final case class CampaignAttributionEnrichment( mediumParameters: List[String], @@ -116,7 +119,7 @@ final case class CampaignAttributionEnrichment( * @return Option boxing the value of the campaign parameter */ private def getFirstParameter(parameterList: List[String], sourceMap: SourceMap): Option[String] = - parameterList.find(sourceMap.contains(_)).map(sourceMap(_)) + parameterList.find(sourceMap.contains).map(sourceMap(_)) /** * Extract the marketing fields from a URL. diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala index 37697ff5f..13721cebb 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala @@ -10,20 +10,19 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.data.ValidatedNel import cats.syntax.either._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} - import io.circe._ import io.circe.syntax._ import org.apache.http.message.BasicHeaderValueParser +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.CookieExtractorConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils object CookieExtractorEnrichment extends ParseableEnrichment { override val supportedSchema = diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala index ddd65818d..ad0d81492 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CurrencyConversionEnrichment.scala @@ -10,23 +10,28 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.time.ZonedDateTime import cats.Monad import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.implicits._ + +import io.circe._ + import com.snowplowanalytics.forex.{CreateForex, Forex} import com.snowplowanalytics.forex.model._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} -import com.snowplowanalytics.snowplow.badrows._ -import io.circe._ + import org.joda.money.CurrencyUnit import org.joda.time.DateTime -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.CurrencyConversionConf /** Companion object. Lets us create an CurrencyConversionEnrichment instance from a Json. */ object CurrencyConversionEnrichment extends ParseableEnrichment { @@ -101,10 +106,10 @@ final case class CurrencyConversionEnrichment[F[_]: Monad]( /** * Attempt to convert if the initial currency and value are both defined - * @param inputCurrency Option boxing the initial currency if it is present + * @param initialCurrency Option boxing the initial currency if it is present * @param value Option boxing the amount to convert * @return None.success if the inputs were not both defined, - * otherwise Validation[Option[_]] boxing the result of the conversion + * otherwise `Validation[Option[_]]` boxing the result of the conversion */ private def performConversion( initialCurrency: Option[Either[FailureDetails.EnrichmentFailure, CurrencyUnit]], diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala new file mode 100644 index 000000000..e4f6dab0d --- /dev/null +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2012-2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry + +import java.net.URI + +import cats.{Functor, Monad} +import cats.data.EitherT + +import org.joda.money.CurrencyUnit + +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.forex.CreateForex +import com.snowplowanalytics.forex.model.AccountType +import com.snowplowanalytics.maxmind.iplookups.CreateIpLookups +import com.snowplowanalytics.refererparser.CreateParser +import com.snowplowanalytics.weather.providers.openweather.CreateOWM + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest.{ + ApiRequestEnrichment, + CreateApiRequestEnrichment, + HttpApi +} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery.{CreateSqlQueryEnrichment, Rdbms, SqlQueryEnrichment} + +sealed trait EnrichmentConf { + def schemaKey: SchemaKey = + SchemaKey( + "com.acme", + "placeholder", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ) + + /** + * List of files, such as local DBs that need to be downloaded and distributed across workers + * First element of pair is URI to download file from, second is a local path to store it in + */ + def filesToCache: List[(URI, String)] = Nil +} + +object EnrichmentConf { + + final case class ApiRequestConf( + override val schemaKey: SchemaKey, + inputs: List[apirequest.Input], + api: HttpApi, + outputs: List[apirequest.Output], + cache: apirequest.Cache + ) extends EnrichmentConf { + def enrichment[F[_]: CreateApiRequestEnrichment]: F[ApiRequestEnrichment[F]] = + ApiRequestEnrichment[F](this) + } + + final case class PiiPseudonymizerConf( + fieldList: List[pii.PiiField], + emitIdentificationEvent: Boolean, + strategy: pii.PiiStrategy + ) extends EnrichmentConf { + def enrichment: pii.PiiPseudonymizerEnrichment = + pii.PiiPseudonymizerEnrichment(fieldList, emitIdentificationEvent, strategy) + } + + final case class SqlQueryConf( + override val schemaKey: SchemaKey, + inputs: List[sqlquery.Input], + db: Rdbms, + query: SqlQueryEnrichment.Query, + output: sqlquery.Output, + cache: SqlQueryEnrichment.Cache + ) extends EnrichmentConf { + def enrichment[F[_]: Monad: CreateSqlQueryEnrichment]: F[SqlQueryEnrichment[F]] = + SqlQueryEnrichment[F](this) + } + + final case class AnonIpConf(octets: AnonIPv4Octets.AnonIPv4Octets, segments: AnonIPv6Segments.AnonIPv6Segments) extends EnrichmentConf { + override val filesToCache: List[(URI, String)] = Nil + def enrichment: AnonIpEnrichment = AnonIpEnrichment(octets, segments) + } + + final case class CampaignAttributionConf( + mediumParameters: List[String], + sourceParameters: List[String], + termParameters: List[String], + contentParameters: List[String], + campaignParameters: List[String], + clickIdParameters: List[(String, String)] + ) extends EnrichmentConf { + def enrichment: CampaignAttributionEnrichment = + CampaignAttributionEnrichment( + mediumParameters, + sourceParameters, + termParameters, + contentParameters, + campaignParameters, + clickIdParameters + ) + } + + final case class CookieExtractorConf(cookieNames: List[String]) extends EnrichmentConf { + def enrichment: CookieExtractorEnrichment = CookieExtractorEnrichment(cookieNames) + } + + final case class CurrencyConversionConf( + override val schemaKey: SchemaKey, + accountType: AccountType, + apiKey: String, + baseCurrency: CurrencyUnit + ) extends EnrichmentConf { + def enrichment[F[_]: Monad: CreateForex]: F[CurrencyConversionEnrichment[F]] = + CurrencyConversionEnrichment[F](this) + } + + final case class EventFingerprintConf(algorithm: String => String, excludedParameters: List[String]) extends EnrichmentConf { + def enrichment: EventFingerprintEnrichment = + EventFingerprintEnrichment(algorithm, excludedParameters) + } + + final case class HttpHeaderExtractorConf(headersPattern: String) extends EnrichmentConf { + def enrichment: HttpHeaderExtractorEnrichment = HttpHeaderExtractorEnrichment(headersPattern) + } + + final case class IabConf( + override val schemaKey: SchemaKey, + ipFile: (URI, String), + excludeUaFile: (URI, String), + includeUaFile: (URI, String) + ) extends EnrichmentConf { + override val filesToCache: List[(URI, String)] = List(ipFile, excludeUaFile, includeUaFile) + def enrichment[F[_]: Monad: CreateIabClient]: F[IabEnrichment] = + IabEnrichment[F](this) + } + + final case class IpLookupsConf( + geoFile: Option[(URI, String)], + ispFile: Option[(URI, String)], + domainFile: Option[(URI, String)], + connectionTypeFile: Option[(URI, String)] + ) extends EnrichmentConf { + override val filesToCache: List[(URI, String)] = + List(geoFile, ispFile, domainFile, connectionTypeFile).flatten + def enrichment[F[_]: Functor: CreateIpLookups]: F[IpLookupsEnrichment[F]] = + IpLookupsEnrichment[F](this) + } + + final case class JavascriptScriptConf(override val schemaKey: SchemaKey, rawFunction: String) extends EnrichmentConf { + def enrichment: JavascriptScriptEnrichment = JavascriptScriptEnrichment(schemaKey, rawFunction) + } + + final case class RefererParserConf(refererDatabase: (URI, String), internalDomains: List[String]) extends EnrichmentConf { + override val filesToCache: List[(URI, String)] = List(refererDatabase) + def enrichment[F[_]: Monad: CreateParser]: EitherT[F, String, RefererParserEnrichment] = + RefererParserEnrichment[F](this) + } + + final case class UaParserConf(override val schemaKey: SchemaKey, uaDatabase: Option[(URI, String)]) extends EnrichmentConf { + override val filesToCache: List[(URI, String)] = List(uaDatabase).flatten + def enrichment[F[_]: Monad: CreateUaParser]: EitherT[F, String, UaParserEnrichment] = + UaParserEnrichment[F](this) + } + + final case class UserAgentUtilsConf(override val schemaKey: SchemaKey) extends EnrichmentConf { + def enrichment: UserAgentUtilsEnrichment = UserAgentUtilsEnrichment(schemaKey) + } + + final case class WeatherConf( + override val schemaKey: SchemaKey, + apiHost: String, + apiKey: String, + timeout: Int, + cacheSize: Int, + geoPrecision: Int + ) extends EnrichmentConf { + def enrichment[F[_]: Monad: CreateOWM]: EitherT[F, String, WeatherEnrichment[F]] = + WeatherEnrichment[F](this) + } + + final case class YauaaConf(cacheSize: Option[Int]) extends EnrichmentConf { + def enrichment: YauaaEnrichment = YauaaEnrichment(cacheSize) + } +} diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala index 4e9c9bd7e..607012426 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala @@ -10,16 +10,19 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.data.{NonEmptyList, ValidatedNel} import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + import io.circe._ + import org.apache.commons.codec.digest.DigestUtils -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.EventFingerprintConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Lets us create an EventFingerprintEnrichment from a Json. */ object EventFingerprintEnrichment extends ParseableEnrichment { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala index cd7f949c0..64d934ebd 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala @@ -10,18 +10,18 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.data.ValidatedNel import cats.syntax.either._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} - import io.circe._ import io.circe.syntax._ -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.HttpHeaderExtractorConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils object HttpHeaderExtractorEnrichment extends ParseableEnrichment { override val supportedSchema = diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala index 0ef2ceffa..e12d1c5ac 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IabEnrichment.scala @@ -10,8 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.io.File import java.net.{InetAddress, URI} @@ -22,17 +21,19 @@ import cats.data.{NonEmptyList, ValidatedNel} import cats.effect.Sync import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} - -import com.snowplowanalytics.iab.spidersandrobotsclient.IabClient -import com.snowplowanalytics.snowplow.badrows.FailureDetails +import org.joda.time.DateTime import io.circe._ import io.circe.generic.auto._ import io.circe.syntax._ -import org.joda.time.DateTime -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.iab.spidersandrobotsclient.IabClient +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.IabConf /** Companion object. Lets us create an IabEnrichment instance from a Json. */ object IabEnrichment extends ParseableEnrichment { @@ -109,10 +110,8 @@ object IabEnrichment extends ParseableEnrichment { /** * Contains enrichments based on IAB Spiders&Robots lookup. - * @param ipFile (Full URI to the IAB excluded IP list, database name) - * @param excludeUaFile (Full URI to the IAB excluded user agent list, database name) - * @param includeUaFile (Full URI to the IAB included user agent list, database name) - * @param localMode Whether to use the local database file. Enabled for tests. + * @param schemaKey enrichment's static Iglu Schema Key + * @param iabClient worker object */ final case class IabEnrichment(schemaKey: SchemaKey, iabClient: IabClient) extends Enrichment { val outputSchema = diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala index 3fb81b183..956221f55 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala @@ -10,26 +10,25 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments -package registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import inet.ipaddr.HostName - import cats.Functor import cats.data.{NonEmptyList, ValidatedNel} import cats.implicits._ -import com.snowplowanalytics.maxmind.iplookups._ -import com.snowplowanalytics.maxmind.iplookups.model._ +import io.circe._ + +import inet.ipaddr.HostName import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} -import io.circe._ +import com.snowplowanalytics.maxmind.iplookups._ +import com.snowplowanalytics.maxmind.iplookups.model._ -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.IpLookupsConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create an IpLookupsEnrichment instance from a Json. */ object IpLookupsEnrichment extends ParseableEnrichment { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/JavascriptScriptEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/JavascriptScriptEnrichment.scala index 7ee71405c..630a6e901 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/JavascriptScriptEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/JavascriptScriptEnrichment.scala @@ -10,24 +10,24 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.data.{NonEmptyList, ValidatedNel} import cats.implicits._ +import io.circe._ +import io.circe.parser._ + +import javax.script._ + import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.snowplow.badrows.FailureDetails -import javax.script._ - -import io.circe._ -import io.circe.parser._ - -import outputs.EnrichedEvent -import utils.{CirceUtils, ConversionUtils} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.JavascriptScriptConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.utils.{CirceUtils, ConversionUtils} object JavascriptScriptEnrichment extends ParseableEnrichment { override val supportedSchema = diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala index c47603ad6..5bc29aedd 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala @@ -10,20 +10,22 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI import cats.Monad import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.implicits._ + +import io.circe.Json + import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + import com.snowplowanalytics.refererparser._ -import io.circe.Json -import utils.{ConversionUtils => CU} -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.RefererParserConf +import com.snowplowanalytics.snowplow.enrich.common.utils.{ConversionUtils => CU, CirceUtils} /** Companion object. Lets us create a RefererParserEnrichment from a Json */ object RefererParserEnrichment extends ParseableEnrichment { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala index cb9931a8b..03dd5dabc 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UaParserEnrichment.scala @@ -9,28 +9,29 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.io.{FileInputStream, InputStream} import java.net.URI import cats.{Id, Monad} import cats.data.{EitherT, NonEmptyList, ValidatedNel} + import cats.effect.Sync import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} - -import com.snowplowanalytics.snowplow.badrows.FailureDetails - -import io.circe._ +import io.circe.Json import io.circe.syntax._ import ua_parser.Parser import ua_parser.Client -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.UaParserConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create a UaParserEnrichment from a Json. */ object UaParserEnrichment extends ParseableEnrichment { @@ -105,7 +106,7 @@ final case class UaParserEnrichment(schemaKey: SchemaKey, parser: Parser) extend /** * Extracts the client attributes from a useragent string, using UserAgentEnrichment. * @param useragent to extract from. Should be encoded, i.e. not previously decoded. - * @return the json or the message of the exception, boxed in a Scalaz Validation + * @return the json or the message of the bad row details */ def extractUserAgent(useragent: String): Either[FailureDetails.EnrichmentFailure, SelfDescribingData[Json]] = Either diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UserAgentUtilsEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UserAgentUtilsEnrichment.scala index ef11dbf33..78725ea8e 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UserAgentUtilsEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/UserAgentUtilsEnrichment.scala @@ -9,18 +9,24 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import scala.util.control.NonFatal import cats.data.ValidatedNel import cats.syntax.either._ import cats.syntax.option._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} -import com.snowplowanalytics.snowplow.badrows._ -import eu.bitwalker.useragentutils._ + import io.circe._ + +import eu.bitwalker.useragentutils._ + +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.UserAgentUtilsConf + import org.slf4j.LoggerFactory object UserAgentUtilsEnrichmentConfig extends ParseableEnrichment { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/WeatherEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/WeatherEnrichment.scala index 804156d75..2fa748057 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/WeatherEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/WeatherEnrichment.scala @@ -10,8 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.lang.{Float => JFloat} import java.time.{Instant, ZoneOffset, ZonedDateTime} @@ -23,19 +22,21 @@ import cats.Monad import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.implicits._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} -import com.snowplowanalytics.snowplow.badrows.FailureDetails - -import com.snowplowanalytics.weather.providers.openweather._ -import com.snowplowanalytics.weather.providers.openweather.responses._ +import org.joda.time.{DateTime, DateTimeZone} import io.circe._ import io.circe.generic.auto._ import io.circe.syntax._ -import org.joda.time.{DateTime, DateTimeZone} +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.weather.providers.openweather._ +import com.snowplowanalytics.weather.providers.openweather.responses._ -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.WeatherConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create an WeatherEnrichment instance from a Json */ object WeatherEnrichment extends ParseableEnrichment { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala index 0cceb153a..9f0b9de36 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala @@ -10,8 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import scala.collection.JavaConverters._ @@ -21,11 +20,12 @@ import cats.syntax.either._ import io.circe.Json import io.circe.syntax._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} - import nl.basjes.parse.useragent.{UserAgent, UserAgentAnalyzer} -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.YauaaConf +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object to create an instance of YauaaEnrichment from the configuration. */ object YauaaEnrichment extends ParseableEnrichment { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala index 65f24b2cd..fd589350a 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala @@ -10,10 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments -package registry -package apirequest +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest import java.util.UUID @@ -21,17 +18,19 @@ import cats.{Id, Monad} import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.implicits._ +import io.circe._ +import io.circe.generic.auto._ + import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.implicits._ -import com.snowplowanalytics.lrumap._ +import com.snowplowanalytics.lrumap._ import com.snowplowanalytics.snowplow.badrows.FailureDetails -import io.circe._ -import io.circe.generic.auto._ - -import outputs.EnrichedEvent -import utils.{CirceUtils, HttpClient} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.{Enrichment, ParseableEnrichment} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.utils.{CirceUtils, HttpClient} object ApiRequestEnrichment extends ParseableEnrichment { override val supportedSchema = diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/enrichments.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/enrichments.scala index 6ed023b1f..70e996223 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/enrichments.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/enrichments.scala @@ -10,162 +10,22 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import cats.{Functor, Monad} -import cats.data.{EitherT, ValidatedNel} +import cats.data.ValidatedNel import cats.syntax.either._ -import com.snowplowanalytics.forex.CreateForex -import com.snowplowanalytics.forex.model.AccountType -import com.snowplowanalytics.iglu.core._ -import com.snowplowanalytics.maxmind.iplookups.CreateIpLookups -import com.snowplowanalytics.refererparser.CreateParser -import com.snowplowanalytics.weather.providers.openweather.CreateOWM + import io.circe._ -import org.joda.money.CurrencyUnit -import apirequest._ -import sqlquery._ -import utils.ConversionUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} + +import com.snowplowanalytics.snowplow.enrich.common.utils.ConversionUtils /** Trait inherited by every enrichment config case class */ trait Enrichment -sealed trait EnrichmentConf { - def schemaKey: SchemaKey = - SchemaKey( - "com.acme", - "placeholder", - "jsonschema", - SchemaVer.Full(1, 0, 0) - ) - def filesToCache: List[(URI, String)] = Nil -} -final case class ApiRequestConf( - override val schemaKey: SchemaKey, - inputs: List[apirequest.Input], - api: HttpApi, - outputs: List[apirequest.Output], - cache: apirequest.Cache -) extends EnrichmentConf { - def enrichment[F[_]: CreateApiRequestEnrichment]: F[ApiRequestEnrichment[F]] = - ApiRequestEnrichment[F](this) -} -final case class PiiPseudonymizerConf( - fieldList: List[pii.PiiField], - emitIdentificationEvent: Boolean, - strategy: pii.PiiStrategy -) extends EnrichmentConf { - def enrichment: pii.PiiPseudonymizerEnrichment = - pii.PiiPseudonymizerEnrichment(fieldList, emitIdentificationEvent, strategy) -} -final case class SqlQueryConf( - override val schemaKey: SchemaKey, - inputs: List[sqlquery.Input], - db: Rdbms, - query: SqlQueryEnrichment.Query, - output: sqlquery.Output, - cache: SqlQueryEnrichment.Cache -) extends EnrichmentConf { - def enrichment[F[_]: Monad: CreateSqlQueryEnrichment]: F[SqlQueryEnrichment[F]] = - SqlQueryEnrichment[F](this) -} -final case class AnonIpConf(octets: AnonIPv4Octets.AnonIPv4Octets, segments: AnonIPv6Segments.AnonIPv6Segments) extends EnrichmentConf { - override val filesToCache: List[(URI, String)] = Nil - def enrichment: AnonIpEnrichment = AnonIpEnrichment(octets, segments) -} -final case class CampaignAttributionConf( - mediumParameters: List[String], - sourceParameters: List[String], - termParameters: List[String], - contentParameters: List[String], - campaignParameters: List[String], - clickIdParameters: List[(String, String)] -) extends EnrichmentConf { - def enrichment: CampaignAttributionEnrichment = - CampaignAttributionEnrichment( - mediumParameters, - sourceParameters, - termParameters, - contentParameters, - campaignParameters, - clickIdParameters - ) -} -final case class CookieExtractorConf(cookieNames: List[String]) extends EnrichmentConf { - def enrichment: CookieExtractorEnrichment = CookieExtractorEnrichment(cookieNames) -} -final case class CurrencyConversionConf( - override val schemaKey: SchemaKey, - accountType: AccountType, - apiKey: String, - baseCurrency: CurrencyUnit -) extends EnrichmentConf { - def enrichment[F[_]: Monad: CreateForex]: F[CurrencyConversionEnrichment[F]] = - CurrencyConversionEnrichment[F](this) -} -final case class EventFingerprintConf(algorithm: String => String, excludedParameters: List[String]) extends EnrichmentConf { - def enrichment: EventFingerprintEnrichment = - EventFingerprintEnrichment(algorithm, excludedParameters) -} -final case class HttpHeaderExtractorConf(headersPattern: String) extends EnrichmentConf { - def enrichment: HttpHeaderExtractorEnrichment = HttpHeaderExtractorEnrichment(headersPattern) -} -final case class IabConf( - override val schemaKey: SchemaKey, - ipFile: (URI, String), - excludeUaFile: (URI, String), - includeUaFile: (URI, String) -) extends EnrichmentConf { - override val filesToCache: List[(URI, String)] = List(ipFile, excludeUaFile, includeUaFile) - def enrichment[F[_]: Monad: CreateIabClient]: F[IabEnrichment] = - IabEnrichment[F](this) -} -final case class IpLookupsConf( - geoFile: Option[(URI, String)], - ispFile: Option[(URI, String)], - domainFile: Option[(URI, String)], - connectionTypeFile: Option[(URI, String)] -) extends EnrichmentConf { - override val filesToCache: List[(URI, String)] = - List(geoFile, ispFile, domainFile, connectionTypeFile).flatten - def enrichment[F[_]: Functor: CreateIpLookups]: F[IpLookupsEnrichment[F]] = - IpLookupsEnrichment[F](this) -} -final case class JavascriptScriptConf(override val schemaKey: SchemaKey, rawFunction: String) extends EnrichmentConf { - def enrichment: JavascriptScriptEnrichment = JavascriptScriptEnrichment(schemaKey, rawFunction) -} -final case class RefererParserConf(refererDatabase: (URI, String), internalDomains: List[String]) extends EnrichmentConf { - override val filesToCache: List[(URI, String)] = List(refererDatabase) - def enrichment[F[_]: Monad: CreateParser]: EitherT[F, String, RefererParserEnrichment] = - RefererParserEnrichment[F](this) -} -final case class UaParserConf(override val schemaKey: SchemaKey, uaDatabase: Option[(URI, String)]) extends EnrichmentConf { - override val filesToCache: List[(URI, String)] = List(uaDatabase).flatten - def enrichment[F[_]: Monad: CreateUaParser]: EitherT[F, String, UaParserEnrichment] = - UaParserEnrichment[F](this) -} -final case class UserAgentUtilsConf(override val schemaKey: SchemaKey) extends EnrichmentConf { - def enrichment: UserAgentUtilsEnrichment = UserAgentUtilsEnrichment(schemaKey) -} -final case class WeatherConf( - override val schemaKey: SchemaKey, - apiHost: String, - apiKey: String, - timeout: Int, - cacheSize: Int, - geoPrecision: Int -) extends EnrichmentConf { - def enrichment[F[_]: Monad: CreateOWM]: EitherT[F, String, WeatherEnrichment[F]] = - WeatherEnrichment[F](this) -} -final case class YauaaConf(cacheSize: Option[Int]) extends EnrichmentConf { - def enrichment: YauaaEnrichment = YauaaEnrichment(cacheSize) -} - /** Trait to hold helpers relating to enrichment config */ trait ParseableEnrichment { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala index d13733105..474922111 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala @@ -10,9 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry -package pii +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii import scala.collection.JavaConverters._ import scala.collection.mutable.MutableList @@ -20,24 +18,26 @@ import scala.collection.mutable.MutableList import cats.data.ValidatedNel import cats.implicits._ +import io.circe._ +import io.circe.jackson._ +import io.circe.syntax._ + import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.node.{ArrayNode, ObjectNode, TextNode} import com.jayway.jsonpath.{Configuration, JsonPath => JJsonPath} import com.jayway.jsonpath.MapFunction -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} - -import io.circe._ -import io.circe.jackson._ -import io.circe.syntax._ - import org.apache.commons.codec.digest.DigestUtils -import adapters.registry.Adapter -import outputs.EnrichedEvent -import serializers._ -import utils.CirceUtils +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} + +import com.snowplowanalytics.snowplow.enrich.common.adapters.registry.Adapter +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.PiiPseudonymizerConf +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.{Enrichment, ParseableEnrichment} +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii.serializers._ +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object. Lets us create a PiiPseudonymizerEnrichment from a Json. */ object PiiPseudonymizerEnrichment extends ParseableEnrichment { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala index 3ca02cf07..92112d7e8 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/CreateSqlQueryEnrichment.scala @@ -13,11 +13,12 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery import cats.Id + import cats.effect.Sync import cats.syntax.functor._ import cats.syntax.flatMap._ -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.SqlQueryConf +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.SqlQueryConf /** Initialize resources, necessary for SQL Query enrichment: cache and connection */ sealed trait CreateSqlQueryEnrichment[F[_]] { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala index 73fc5559b..4aa3a63f0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala @@ -10,10 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments -package registry -package sqlquery +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery import scala.collection.immutable.IntMap @@ -21,15 +18,17 @@ import cats.Monad import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.implicits._ +import io.circe._ +import io.circe.generic.semiauto._ + import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} import com.snowplowanalytics.snowplow.badrows.FailureDetails -import io.circe._ -import io.circe.generic.semiauto._ - -import outputs.EnrichedEvent -import utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.SqlQueryConf +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.{Enrichment, ParseableEnrichment} /** Lets us create an SqlQueryConf from a Json */ object SqlQueryEnrichment extends ParseableEnrichment { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala index f3cafa750..ebc89d353 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CurrencyConversionEnrichmentSpec.scala @@ -10,8 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import cats.Id import cats.data.{NonEmptyList, Validated, ValidatedNel} @@ -19,11 +18,15 @@ import cats.implicits._ import com.snowplowanalytics.forex.CreateForex._ import com.snowplowanalytics.forex.model._ + import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} -import com.snowplowanalytics.snowplow.badrows._ + +import com.snowplowanalytics.snowplow.badrows.FailureDetails +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.CurrencyConversionConf import org.joda.money.CurrencyUnit import org.joda.time.DateTime + import org.specs2.Specification import org.specs2.matcher.DataTables @@ -42,11 +45,10 @@ class CurrencyConversionEnrichmentSpec extends Specification with DataTables { """ lazy val validAppKey = sys.env - .get(OerApiKey) - .getOrElse( - throw new IllegalStateException( - s"No ${OerApiKey} environment variable found, test should have been skipped" - ) + .getOrElse(OerApiKey, + throw new IllegalStateException( + s"No $OerApiKey environment variable found, test should have been skipped" + ) ) type Result = ValidatedNel[ FailureDetails.EnrichmentFailure, diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala index 0282a4063..49460c360 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala @@ -14,12 +14,17 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import java.net.URI -import com.snowplowanalytics.forex.model._ -import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} import io.circe.literal._ import io.circe.parser._ + import org.apache.commons.codec.binary.Base64 import org.joda.money.CurrencyUnit + +import com.snowplowanalytics.forex.model._ +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf._ + import org.specs2.matcher.{DataTables, ValidatedMatchers} import org.specs2.mutable.Specification diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala index 5fdf789d8..d51dc85ca 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/UaParserEnrichmentSpec.scala @@ -21,6 +21,8 @@ import io.circe.literal._ import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.UaParserConf + import org.specs2.matcher.DataTables import org.specs2.mutable.Specification diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala index 149fc087b..a67398c56 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/WeatherEnrichmentSpec.scala @@ -24,6 +24,8 @@ import org.joda.time.DateTime import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.WeatherConf + import org.specs2.Specification object WeatherEnrichmentSpec { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala index 11275d358..50089a985 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala @@ -10,17 +10,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments -package registry - -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import io.circe.parser._ import io.circe.literal._ import nl.basjes.parse.useragent.UserAgent +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.YauaaConf + import org.specs2.matcher.ValidatedMatchers import org.specs2.mutable.Specification diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala index 703f8db17..eaf36ba21 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ApiRequestEnrichmentSpec.scala @@ -10,9 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry -package apirequest +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest import cats.Id import cats.syntax.either._ @@ -21,16 +19,17 @@ import io.circe.Json import io.circe.literal._ import io.circe.parser._ -import org.specs2.Specification -import org.specs2.matcher.ValidatedMatchers -import org.specs2.mock.Mockito - import scalaj.http.HttpRequest import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} -import outputs.EnrichedEvent -import utils.HttpClient +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.utils.HttpClient + +import org.specs2.Specification +import org.specs2.matcher.ValidatedMatchers +import org.specs2.mock.Mockito class ApiRequestEnrichmentSpec extends Specification with ValidatedMatchers with Mockito { def is = s2""" diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala index 93568490e..9f233f676 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/HttpApiSpec.scala @@ -10,12 +10,15 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry -package apirequest +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest import cats.Id + import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + import org.specs2.Specification import org.specs2.matcher.ValidatedMatchers import org.specs2.mock.Mockito @@ -49,7 +52,6 @@ class HttpApiSpec extends Specification with ValidatedMatchers with Mockito { request must beSome("http://thishostdoesntexist31337:8123/admin/foo/November+2015/admin") } - // This one uses real actor system def e3 = { val schemaKey = SchemaKey("vendor", "name", "format", SchemaVer.Full(1, 0, 0)) val enrichment = ApiRequestConf( @@ -60,7 +62,7 @@ class HttpApiSpec extends Specification with ValidatedMatchers with Mockito { Cache(1, 1) ).enrichment[Id] - val event = new outputs.EnrichedEvent + val event = new EnrichedEvent val request = enrichment.lookup(event, Nil, Nil, None) request must beInvalid } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala index 887da6e64..96e8758a4 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/InputSpec.scala @@ -10,9 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package enrichments.registry -package apirequest +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest import cats.Id import cats.data.ValidatedNel @@ -22,9 +20,11 @@ import io.circe.literal._ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + import org.specs2.Specification import org.specs2.matcher.ValidatedMatchers -import outputs.EnrichedEvent class InputSpec extends Specification with ValidatedMatchers { def is = s2""" @@ -253,7 +253,7 @@ class InputSpec extends Specification with ValidatedMatchers { List(Output("iglu:someschema", JsonOutput("$").some)), Cache(10, 5) ).enrichment[Id] - val event = new outputs.EnrichedEvent + val event = new EnrichedEvent event.setUser_id("chuwy") // time in true_tstamp won't be found val request = enrichment.lookup(event, Nil, Nil, None) @@ -276,7 +276,7 @@ class InputSpec extends Specification with ValidatedMatchers { json"""{ "somekey": "somevalue" }""" ) - input.pull(new outputs.EnrichedEvent, Nil, List(obj), None) must beValid.like { + input.pull(new EnrichedEvent, Nil, List(obj), None) must beValid.like { case Some(context) => context must beEqualTo(Map("permissive" -> "somevalue")) case None => diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentSpec.scala index ebaa0c714..dc3365a1a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/sqlquery/SqlQueryEnrichmentSpec.scala @@ -10,14 +10,14 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry -package sqlquery +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery import io.circe.parser._ import io.circe.literal._ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.SqlQueryConf import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils import org.specs2.Specification From 718084e163077b920014c8ea29bc456a843ec14f Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Wed, 19 Aug 2020 15:54:49 +0300 Subject: [PATCH 04/38] Beam: bump Scio to 0.9.3 (close #308) --- project/Dependencies.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 3ce07ddc5..d989ca263 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -75,8 +75,8 @@ object Dependencies { val jinJava = "2.5.0" val sentry = "1.7.30" - val scio = "0.9.2" - val beam = "2.22.0" + val scio = "0.9.3" + val beam = "2.23.0" val macros = "2.1.1" val scalaTest = "3.0.8" } From 4560f78da374f99a9fbedb0a3ddbd978bba47a2a Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Wed, 29 Jul 2020 21:59:26 +0300 Subject: [PATCH 05/38] Common: get rid of placeholder schema in enrichment configurations (close #302) --- .../SingletonSpec.scala | 7 ++- .../registry/AnonIpEnrichment.scala | 2 +- .../CampaignAttributionEnrichment.scala | 1 + .../registry/CookieExtractorEnrichment.scala | 2 +- .../enrichments/registry/EnrichmentConf.scala | 62 ++++++++++++------- .../registry/EventFingerprintEnrichment.scala | 2 +- .../HttpHeaderExtractorEnrichment.scala | 2 +- .../registry/IpLookupsEnrichment.scala | 2 + .../registry/RefererParserEnrichment.scala | 2 +- .../registry/YauaaEnrichment.scala | 2 +- .../pii/PiiPseudonymizerEnrichment.scala | 10 +-- .../registry/EnrichmentConfigsSpec.scala | 10 +-- .../registry/YauaaEnrichmentSpec.scala | 4 +- 13 files changed, 64 insertions(+), 44 deletions(-) diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala index 2a250baa3..618369dad 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SingletonSpec.scala @@ -16,6 +16,8 @@ package com.snowplowanalytics.snowplow.enrich.beam import io.circe.literal._ +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry._ import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.AnonIpConf @@ -25,6 +27,9 @@ import org.scalatest.freespec.AnyFreeSpec import com.snowplowanalytics.snowplow.enrich.beam.singleton._ class SingletonSpec extends AnyFreeSpec { + + val placeholder = SchemaKey("com.acme", "placeholder", "jsonschema", SchemaVer.Full(1, 0, 0)) + "the singleton object should" - { "make a ClientSingleton.get function available" - { "which throws if the resolver can't be parsed" in { @@ -44,7 +49,7 @@ class SingletonSpec extends AnyFreeSpec { "which builds and stores the registry" in { val reg = EnrichmentRegistrySingleton.get( - List(AnonIpConf(AnonIPv4Octets.Two, AnonIPv6Segments.Two)) + List(AnonIpConf(placeholder, AnonIPv4Octets.Two, AnonIPv6Segments.Two)) ) reg.anonIp shouldBe defined } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala index 5dfe3dc95..0c22fd0b1 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/AnonIpEnrichment.scala @@ -54,7 +54,7 @@ object AnonIpEnrichment extends ParseableEnrichment { .toEither ipv4Octets <- AnonIPv4Octets.fromInt(paramIPv4Octet) ipv6Segment <- AnonIPv6Segments.fromInt(paramIPv6Segment) - } yield AnonIpConf(ipv4Octets, ipv6Segment)).toValidatedNel + } yield AnonIpConf(schemaKey, ipv4Octets, ipv6Segment)).toValidatedNel } /** How many octets (ipv4) to anonymize */ diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala index f4f1cd874..e6d53d7d6 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CampaignAttributionEnrichment.scala @@ -61,6 +61,7 @@ object CampaignAttributionEnrichment extends ParseableEnrichment { .extract[Map[String, String]](c, "parameters", "fields", "mktClickId") .fold(_ => Map(), s => s) CampaignAttributionConf( + schemaKey, medium, source, term, diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala index 13721cebb..d6b9bb244 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CookieExtractorEnrichment.scala @@ -43,7 +43,7 @@ object CookieExtractorEnrichment extends ParseableEnrichment { (for { _ <- isParseable(config, schemaKey) cookieNames <- CirceUtils.extract[List[String]](config, "parameters", "cookies").toEither - } yield CookieExtractorConf(cookieNames)).toValidatedNel + } yield CookieExtractorConf(schemaKey, cookieNames)).toValidatedNel } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala index e4f6dab0d..9e8743ce9 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala @@ -19,7 +19,7 @@ import cats.data.EitherT import org.joda.money.CurrencyUnit -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} +import com.snowplowanalytics.iglu.core.SchemaKey import com.snowplowanalytics.forex.CreateForex import com.snowplowanalytics.forex.model.AccountType @@ -35,13 +35,9 @@ import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequ import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquery.{CreateSqlQueryEnrichment, Rdbms, SqlQueryEnrichment} sealed trait EnrichmentConf { - def schemaKey: SchemaKey = - SchemaKey( - "com.acme", - "placeholder", - "jsonschema", - SchemaVer.Full(1, 0, 0) - ) + + /** Iglu schema key to identify the enrichment in bad row, some enrichments don't use it */ + def schemaKey: SchemaKey /** * List of files, such as local DBs that need to be downloaded and distributed across workers @@ -53,7 +49,7 @@ sealed trait EnrichmentConf { object EnrichmentConf { final case class ApiRequestConf( - override val schemaKey: SchemaKey, + schemaKey: SchemaKey, inputs: List[apirequest.Input], api: HttpApi, outputs: List[apirequest.Output], @@ -64,6 +60,7 @@ object EnrichmentConf { } final case class PiiPseudonymizerConf( + schemaKey: SchemaKey, fieldList: List[pii.PiiField], emitIdentificationEvent: Boolean, strategy: pii.PiiStrategy @@ -73,7 +70,7 @@ object EnrichmentConf { } final case class SqlQueryConf( - override val schemaKey: SchemaKey, + schemaKey: SchemaKey, inputs: List[sqlquery.Input], db: Rdbms, query: SqlQueryEnrichment.Query, @@ -84,12 +81,16 @@ object EnrichmentConf { SqlQueryEnrichment[F](this) } - final case class AnonIpConf(octets: AnonIPv4Octets.AnonIPv4Octets, segments: AnonIPv6Segments.AnonIPv6Segments) extends EnrichmentConf { - override val filesToCache: List[(URI, String)] = Nil + final case class AnonIpConf( + schemaKey: SchemaKey, + octets: AnonIPv4Octets.AnonIPv4Octets, + segments: AnonIPv6Segments.AnonIPv6Segments + ) extends EnrichmentConf { def enrichment: AnonIpEnrichment = AnonIpEnrichment(octets, segments) } final case class CampaignAttributionConf( + schemaKey: SchemaKey, mediumParameters: List[String], sourceParameters: List[String], termParameters: List[String], @@ -108,12 +109,15 @@ object EnrichmentConf { ) } - final case class CookieExtractorConf(cookieNames: List[String]) extends EnrichmentConf { + final case class CookieExtractorConf( + schemaKey: SchemaKey, + cookieNames: List[String] + ) extends EnrichmentConf { def enrichment: CookieExtractorEnrichment = CookieExtractorEnrichment(cookieNames) } final case class CurrencyConversionConf( - override val schemaKey: SchemaKey, + schemaKey: SchemaKey, accountType: AccountType, apiKey: String, baseCurrency: CurrencyUnit @@ -122,17 +126,24 @@ object EnrichmentConf { CurrencyConversionEnrichment[F](this) } - final case class EventFingerprintConf(algorithm: String => String, excludedParameters: List[String]) extends EnrichmentConf { + final case class EventFingerprintConf( + schemaKey: SchemaKey, + algorithm: String => String, + excludedParameters: List[String] + ) extends EnrichmentConf { def enrichment: EventFingerprintEnrichment = EventFingerprintEnrichment(algorithm, excludedParameters) } - final case class HttpHeaderExtractorConf(headersPattern: String) extends EnrichmentConf { + final case class HttpHeaderExtractorConf( + schemaKey: SchemaKey, + headersPattern: String + ) extends EnrichmentConf { def enrichment: HttpHeaderExtractorEnrichment = HttpHeaderExtractorEnrichment(headersPattern) } final case class IabConf( - override val schemaKey: SchemaKey, + schemaKey: SchemaKey, ipFile: (URI, String), excludeUaFile: (URI, String), includeUaFile: (URI, String) @@ -143,6 +154,7 @@ object EnrichmentConf { } final case class IpLookupsConf( + schemaKey: SchemaKey, geoFile: Option[(URI, String)], ispFile: Option[(URI, String)], domainFile: Option[(URI, String)], @@ -154,28 +166,32 @@ object EnrichmentConf { IpLookupsEnrichment[F](this) } - final case class JavascriptScriptConf(override val schemaKey: SchemaKey, rawFunction: String) extends EnrichmentConf { + final case class JavascriptScriptConf(schemaKey: SchemaKey, rawFunction: String) extends EnrichmentConf { def enrichment: JavascriptScriptEnrichment = JavascriptScriptEnrichment(schemaKey, rawFunction) } - final case class RefererParserConf(refererDatabase: (URI, String), internalDomains: List[String]) extends EnrichmentConf { + final case class RefererParserConf( + schemaKey: SchemaKey, + refererDatabase: (URI, String), + internalDomains: List[String] + ) extends EnrichmentConf { override val filesToCache: List[(URI, String)] = List(refererDatabase) def enrichment[F[_]: Monad: CreateParser]: EitherT[F, String, RefererParserEnrichment] = RefererParserEnrichment[F](this) } - final case class UaParserConf(override val schemaKey: SchemaKey, uaDatabase: Option[(URI, String)]) extends EnrichmentConf { + final case class UaParserConf(schemaKey: SchemaKey, uaDatabase: Option[(URI, String)]) extends EnrichmentConf { override val filesToCache: List[(URI, String)] = List(uaDatabase).flatten def enrichment[F[_]: Monad: CreateUaParser]: EitherT[F, String, UaParserEnrichment] = UaParserEnrichment[F](this) } - final case class UserAgentUtilsConf(override val schemaKey: SchemaKey) extends EnrichmentConf { + final case class UserAgentUtilsConf(schemaKey: SchemaKey) extends EnrichmentConf { def enrichment: UserAgentUtilsEnrichment = UserAgentUtilsEnrichment(schemaKey) } final case class WeatherConf( - override val schemaKey: SchemaKey, + schemaKey: SchemaKey, apiHost: String, apiKey: String, timeout: Int, @@ -186,7 +202,7 @@ object EnrichmentConf { WeatherEnrichment[F](this) } - final case class YauaaConf(cacheSize: Option[Int]) extends EnrichmentConf { + final case class YauaaConf(schemaKey: SchemaKey, cacheSize: Option[Int]) extends EnrichmentConf { def enrichment: YauaaEnrichment = YauaaEnrichment(cacheSize) } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala index 607012426..91696c001 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala @@ -57,7 +57,7 @@ object EventFingerprintEnrichment extends ParseableEnrichment { ).mapN((_, _)).toEither algorithm <- getAlgorithm(paramsAndAlgo._2) .leftMap(e => NonEmptyList.one(e)) - } yield EventFingerprintConf(algorithm, paramsAndAlgo._1)).toValidated + } yield EventFingerprintConf(schemaKey, algorithm, paramsAndAlgo._1)).toValidated /** * Look up the fingerprinting algorithm by name diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala index 64d934ebd..c7e6060e0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/HttpHeaderExtractorEnrichment.scala @@ -48,7 +48,7 @@ object HttpHeaderExtractorEnrichment extends ParseableEnrichment { (for { _ <- isParseable(config, schemaKey) headersPattern <- CirceUtils.extract[String](config, "parameters", "headersPattern").toEither - } yield HttpHeaderExtractorConf(headersPattern)).toValidatedNel + } yield HttpHeaderExtractorConf(schemaKey, headersPattern)).toValidatedNel } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala index 956221f55..bfab38a0c 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/IpLookupsEnrichment.scala @@ -57,6 +57,7 @@ object IpLookupsEnrichment extends ParseableEnrichment { getArgumentFromName(c, "connectionType").sequence ).mapN { (geo, isp, domain, connection) => IpLookupsConf( + schemaKey, file(geo, localMode), file(isp, localMode), file(domain, localMode), @@ -107,6 +108,7 @@ object IpLookupsEnrichment extends ParseableEnrichment { lruCacheSize = 20000 ) .map(i => IpLookupsEnrichment(i)) + } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala index 5bc29aedd..4be2235b2 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/RefererParserEnrichment.scala @@ -56,7 +56,7 @@ object RefererParserEnrichment extends ParseableEnrichment { (uri, db, domains) }.toEither source <- getDatabaseUri(conf._1, conf._2).leftMap(NonEmptyList.one) - } yield RefererParserConf(file(source, conf._2, localFile, localMode), conf._3)).toValidated + } yield RefererParserConf(schemaKey, file(source, conf._2, localFile, localMode), conf._3)).toValidated private def file( uri: URI, diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala index 9f0b9de36..1a3efc8c0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala @@ -54,7 +54,7 @@ object YauaaEnrichment extends ParseableEnrichment { (for { _ <- isParseable(c, schemaKey) cacheSize <- CirceUtils.extract[Option[Int]](c, "parameters", "cacheSize").toEither - } yield YauaaConf(cacheSize)).toValidatedNel + } yield YauaaConf(schemaKey, cacheSize)).toValidatedNel /** Helper to decapitalize a string. Used for the names of the fields returned in the context. */ def decapitalize(s: String): String = diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala index 474922111..9c5aaa2e6 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala @@ -58,7 +58,7 @@ object PiiPseudonymizerEnrichment extends ParseableEnrichment { localMode: Boolean = false ): ValidatedNel[String, PiiPseudonymizerConf] = { for { - conf <- matchesSchema(config, schemaKey) + conf <- isParseable(config, schemaKey) emitIdentificationEvent = CirceUtils .extract[Boolean](conf, "emitEvent") .toOption @@ -70,7 +70,7 @@ object PiiPseudonymizerEnrichment extends ParseableEnrichment { .extract[PiiStrategyPseudonymize](config, "parameters", "strategy") .toEither piiFieldList <- extractFields(piiFields) - } yield PiiPseudonymizerConf(piiFieldList, emitIdentificationEvent, piiStrategy) + } yield PiiPseudonymizerConf(schemaKey, piiFieldList, emitIdentificationEvent, piiStrategy) }.toValidatedNel private[pii] def getHashFunction(strategyFunction: String): Either[String, DigestFunction] = @@ -132,12 +132,6 @@ object PiiPseudonymizerEnrichment extends ParseableEnrichment { .get(fieldName) .map(_.asRight) .getOrElse(s"The specified json field $fieldName is not supported".asLeft) - - private def matchesSchema(config: Json, schemaKey: SchemaKey): Either[String, Json] = - if (supportedSchema.matches(schemaKey)) - config.asRight - else - s"Schema key $schemaKey is not supported. A '${supportedSchema.name}' enrichment must have schema '$supportedSchema'.".asLeft } /** diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala index 49460c360..7dac5ca0f 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EnrichmentConfigsSpec.scala @@ -47,7 +47,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(1, 0, 1) ) val result = AnonIpEnrichment.parse(ipAnonJson, schemaKey) - result must beValid(AnonIpConf(AnonIPv4Octets(2), AnonIPv6Segments(3))) + result must beValid(AnonIpConf(schemaKey, AnonIPv4Octets(2), AnonIPv6Segments(3))) } "successfully construct an AnonIpEnrichment case class with default value for IPv6" in { @@ -64,7 +64,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(1, 0, 0) ) val result = AnonIpEnrichment.parse(ipAnonJson, schemaKey) - result must beValid(AnonIpConf(AnonIPv4Octets(2), AnonIPv6Segments(2))) + result must beValid(AnonIpConf(schemaKey, AnonIPv4Octets(2), AnonIPv6Segments(2))) } } @@ -90,6 +90,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(2, 0, 0) ) val expected = IpLookupsConf( + schemaKey, Some( ( new URI( @@ -136,6 +137,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(2, 0, 0) ) val expected = RefererParserConf( + schemaKey, ( new URI( "http://snowplow-hosted-assets.s3.amazonaws.com/third-party/referer/referer.json" @@ -146,7 +148,6 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da ) val result = RefererParserEnrichment.parse(refererParserJson, schemaKey, false) result must beValid(expected) - } } @@ -179,6 +180,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(1, 0, 0) ) val expected = CampaignAttributionConf( + schemaKey, List("utm_medium", "medium"), List("utm_source", "source"), List("utm_term"), @@ -336,7 +338,7 @@ class EnrichmentConfigsSpec extends Specification with ValidatedMatchers with Da SchemaVer.Full(1, 0, 0) ) val result = CookieExtractorEnrichment.parse(cookieExtractorEnrichmentJson, schemaKey) - result must beValid(CookieExtractorConf(List("foo", "bar"))) + result must beValid(CookieExtractorConf(schemaKey, List("foo", "bar"))) } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala index 50089a985..0e6f0e4af 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala @@ -246,7 +246,7 @@ class YauaaEnrichmentSpec extends Specification with ValidatedMatchers { } }""").toOption.get - val expected = YauaaConf(Some(cacheSize)) + val expected = YauaaConf(schemaKey, Some(cacheSize)) val actual = YauaaEnrichment.parse(yauaaConfigJson, schemaKey) actual must beValid(expected) } @@ -256,7 +256,7 @@ class YauaaEnrichmentSpec extends Specification with ValidatedMatchers { "enabled": true }""").toOption.get - val expected = YauaaConf(None) + val expected = YauaaConf(schemaKey, None) val actual = YauaaEnrichment.parse(yauaaConfigJson, schemaKey) actual must beValid(expected) } From bdde15cb3f23af19c404c61ec0496d4f311b96fe Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Sat, 22 Aug 2020 22:53:41 +0300 Subject: [PATCH 06/38] Common: fix flaky ThriftLoader test (close #306) --- .../loaders/ThriftLoaderSpec.scala | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala index cc67ccd9c..4a8c0e81c 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala @@ -175,8 +175,14 @@ class ThriftLoaderSpec extends Specification with DataTables with ValidatedMatch canonicalEvent must beValid(expected.some) } - val msg = - "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" + val violation1byte: FailureDetails.CPFormatViolationMessage = + FailureDetails.CPFormatViolationMessage.Fallback( + "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" + ) + val violation2bytes: FailureDetails.CPFormatViolationMessage = + FailureDetails.CPFormatViolationMessage.Fallback( + "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" + ) // A bit of fun: the chances of generating a valid Thrift CollectorPayload at random are // so low that we can just use ScalaCheck here @@ -191,7 +197,7 @@ class ThriftLoaderSpec extends Specification with DataTables with ValidatedMatch ), List() ) => - f must_== FailureDetails.CPFormatViolationMessage.Fallback(msg) + (f must beEqualTo(violation1byte)) or (f must beEqualTo(violation2bytes)) } } From 453ead414e118583cfaa117b98d8c3c7cf65811e Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Thu, 17 Sep 2020 14:57:01 +0300 Subject: [PATCH 07/38] Common: switch to HostName.asInetAddress to validate IP addresses (close #355) --- .../common/utils/ConversionUtils.scala | 2 +- .../enrichments/EnrichmentManagerSpec.scala | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala index c114fdf89..454807efe 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala @@ -476,5 +476,5 @@ object ConversionUtils { /** Extract valid IP (v4 or v6) address from a string */ def extractInetAddress(arg: String): Option[InetAddress] = - Either.catchNonFatal(new HostName(arg).toInetAddress).toOption + Option(new HostName(arg).asInetAddress) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index eb152f316..bbf15578c 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -329,7 +329,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { EnrichmentManager.getIabContext(input, iabEnrichment) must beRight(None) } - "return None if user_ipaddress in invalid" >> { + "return None if user_ipaddress is invalid" >> { val input = new EnrichedEvent() input.setUser_ipaddress("invalid") input.setUseragent("Firefox") @@ -337,6 +337,14 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { EnrichmentManager.getIabContext(input, iabEnrichment) must beRight(None) } + "return None if user_ipaddress is hostname (don't try to resovle it)" >> { + val input = new EnrichedEvent() + input.setUser_ipaddress("localhost") + input.setUseragent("Firefox") + input.setDerived_tstamp("2010-06-30 01:20:01.000") + EnrichmentManager.getIabContext(input, iabEnrichment) must beRight(None) + } + "return Some if all arguments are valid" >> { val input = new EnrichedEvent() input.setUser_ipaddress("127.0.0.1") From cb98ad15957d2e2153a9eb4a7ea2158841171d5b Mon Sep 17 00:00:00 2001 From: Oguzhan Unlu Date: Tue, 8 Sep 2020 21:31:22 +0300 Subject: [PATCH 08/38] Beam: use test Maxmind databases (close #269) --- .../resources/beam-enrich-geolite2-city.mmdb | Bin 0 -> 1467 bytes .../enrichments/IpLookupsEnrichmentSpec.scala | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 modules/beam/src/test/resources/beam-enrich-geolite2-city.mmdb diff --git a/modules/beam/src/test/resources/beam-enrich-geolite2-city.mmdb b/modules/beam/src/test/resources/beam-enrich-geolite2-city.mmdb new file mode 100644 index 0000000000000000000000000000000000000000..9dac0f6a2a918385b2969dd938ee4ec4840a237d GIT binary patch literal 1467 zcmZ9JS#wlH7>3`3U=TMpF|G`Y5EKxjxGRuMmYK{zkSIIHo^v{r#+mN%ERn&5T~PLY zk!>IsmfrFMpeQaV%imxr=E5ugfkn?d)uD9i)KkxSzwYmR-2*fMZvxFgJ1_xw3}^vH zfr-E*;1PN&X+!%w>U^@3$0<*^*C&CeoKK-lC8v?off|~AK4fw3)}$u zndE@uKpxlutYWg78~`F<4IqJ!nXDxT$xp~n$#vvOQc<2rf9%WD&!Zg8aj!cB*2LCFM;j!s!JLJdXKBYBCmtu9lpkXM0glp1-R9G~CHe}7|XU&Yn?S>Na(6-e#N+Ca7_jTU9; zso+lg-tO+YM}Krm-IGoVC8c|Plju0Cj-^^_UTVgj_GMQGRpcq{$s{xtDYTbpWpq<( zpNfpIxOEqXJdED0U-m5eW9yXgLE;rmG_{HhmYWd4%J$69v zr(aWBSLPZM^yQ?|Q7pq>e)}gcw&#@&miVUp_ojl*=q;|+! F+XI;bS#|&b literal 0 HcmV?d00001 diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/IpLookupsEnrichmentSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/IpLookupsEnrichmentSpec.scala index f0144a69a..61dd11113 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/IpLookupsEnrichmentSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/IpLookupsEnrichmentSpec.scala @@ -57,7 +57,7 @@ class IpLookupsEnrichmentSpec extends PipelineSpec { val url = "http://snowplow-hosted-assets.s3.amazonaws.com/third-party/maxmind/GeoLite2-City.mmdb" val localFile = "./ip_geo" - SpecHelpers.downloadLocalEnrichmentFile(url, localFile) + SpecHelpers.copyLocalEnrichmentFile("/beam-enrich-geolite2-city.mmdb", localFile) JobTest[Enrich.type] .args( From 72b4138722d578c315b356a15a95c6867b4e05e9 Mon Sep 17 00:00:00 2001 From: Oguzhan Unlu Date: Wed, 16 Sep 2020 15:17:09 +0300 Subject: [PATCH 09/38] Common: use test Maxmind databases (close #350) --- .../enrichments/registry/GeoIP2-City.mmdb | Bin 19403 -> 1462 bytes .../enrichments/registry/GeoIP2-ISP.mmdb | Bin 75113 -> 2507 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb b/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb index ab8b82d2df33b110a891452132887384cfce43dc..73f81f2fb2646298719ea658d8809212c40461e4 100644 GIT binary patch literal 1462 zcmZ9J$#axN7{(t$Ajl#hh#+W276lQI;ErgLNz5z+0vV7^KYG5eGt)TJJ$y@G;)3jp zE1;mbC8Y*wu$d-;F~z!N|VFb$Xpi~_CX zBwz|Kncha)lKns8e5w;0P6uWHbAU%(I1`vfpG`jI@@A8{96nCYbEWw}FMR>yCFLvNYsy*j9C@DnhWwViKwc!j zBW=j0gmifkM)?`FjRC;5S6azJ^BHKBd(0bRNt$ohi zRNURs(cC?sQx&CCgJr2pSAPGiUmWaLI#}(S>iykzy$I{xkO`|I_Nro^3B6$%HhPV< zJX}@!|LSPIn5atN$(r%Ytu3DTXL5faVo?;4^x}F==KG|{4s@(a+xO20GV()JYh1qZ zm^%tGic@ZS$_%Bl103d5ZG8LIY{h<>M3khxm0gt6_N&-;wO`=+Xg-v_2?H-uBXVs1 Ezxgy*R{#J2 literal 19403 zcmZ`<349b)(tq7OprWVP9_N<<^bd{4oQ;?Br{_G9SJa0|n&47V{1U>L}tGsH0rVz`}QFvAdrp$x+qoD9PmMljsLFp^;u z!)S&v40kfz#W0p(9K+oV_b}Yc@Dqmn8183yfZ;)ghZr7ac!c3mhVcyX3<(U03`q>h z3@HpQhExVOLmERmLk7bHhD?Tu3|S11!9y_j+1$!u$Ysc5n8YxdVG6_J3{x4VF-&Ke z!SDn_KEsm?9wEdVLd*m_B}4%e3mFWCSp?EaM3E51O!P*Hv&n(P9EK8xQU;$8{^-au zCe9Ti5G4kMDCc{I80Im|XIMZ$Pp@G1g+lx^O8glS9hZf8ni+l$IoAqN$*rpB$ZBS< z5#kx{dp6p)NQmd6t;IqtiMEz9-!g{fLabo+mC+%qxNkMX8X=yizAIkhk?SB*gAnVv zwIRy4QHV`U+|00rVJpKnhV2Aof*nHaWa6$Uu~vxPOx(k;H`=#Pi2c!49Se9shOQJ`m!UO#BtYhYY`t4*7`h0=s<7eZOV+U3AFrdB`UO*E}Y~A9=_n z$oVDreJaFfQHB;FTBEHt9?~vE2lHKK=!_2eT!=q0@y}7>UzqQ&3}1+z+0nkg3GsJk z_=@4{Xx}$N{3F`>rx5?*A^&Fh55u<%-!XhoKoj?a5dZCJDTrnGj}AqR5;YP@aR{Xw z_jQlF?9`A^9b3guo-m2t^UVj1P5J<)H(3)hmWS=BXVR$^|E_kdmQz+8``9hh&L!O8kVj9a6 zLo5KpETI%KgF#>^joc@nR~*AI8w^W?G6%3wC?$ZPP)Y%1Lh&(!KjtoTnHV6j%u^1A z`9cYC-@F)x1x&0Uu&ne`FjNcWXMmpzS?pse_JrB80*&&oofUQE= z%tN*i+yKR-t+z33C$Jv06AZhBvWxp_WBT?maj%^y%$cquB8hZBDD^z@Aj2Vs7Z?sR zyvT5bfcP4Oax~gH#_Q;~P)?x0e@7@MAsOc*G;r$=41XlBwEYwetwQ1ar?gPtO{QXP%+Sunt=zf{hJOmB6Y!-_J_r0oD1YL< zKNH-X%B{aLd_mB!C%66vhOdQU#yZY_{Y=Te;US#=`oo^m2mb|z{|e>bfbWFzA0~cF za0`p8e9y!m2+UbQ!V{_{RE3Btnz)&HPu0Q1ZUnbhbL$E)j1cOTfLnxm72tZI_5k!0 z>eW2t8fIX7s@F2{Is$Xz)LvldBh(wXuQ!2dPxVIh-7M6;+;K$NsM5rSHO_D~ zs9At~p*{wfBGha^o=|i6*0}@&*lE>COq@($u0Qp0FiaQfRPLJ=(>H^OPY{?3Pkj;$ zMM8xvYN1eP@{j@o%a;r$&LSAd^Pm=kVUAF}+{a!p@DjI57)qJJB>KVdv{1_c^MyJW zP%hK}j|>u6W(_fM9)UG+3&8MGp;mC;LIVAHZvBkmDFVw9KL^7ip;iKFgj&Tzc#T;P zdWMP55?E$^4h&0$x|sWx5LowI#>C|@#Fbz;EYwwi9YS3V*eKLBfOSGO1za1$u%7R- zfxxoaCNOLj>Spd^U$P#%jfvX{%xI(T1jBxz?gH!)YAp}hO%Ml{k}kZLVIP5I$T~0_ z6zT!)t0%B#`VbReAh4ePA{fpI^$6f4p(5z0$Ax;7M;?nAiTo!Ydy=5{d2XEo!)c*5 za;u5Jy7d_*o+TJ`6}P~rz9!TQfLDZik%=!8SnDXv#8(L{3%(A9H--8J_x*yvQu-|> zzD;1=1^G{F7V3Kdtg-h2HwZO?yL=(kX22z(egODbsJ{gKTBw}=)DP({R?zu~Z~YsB zL8e`P3x-dG`aACXJ;Cj!@BV>_emcZhd62g5 z1jC<&`Z>4$L|`%eg^7P9uq68uYHC9L8~DBx>fZtX7V1}ke+cz!X84A{T3r8R;=c$6 z_uiamIDk|3#}XAN}+XU;uQpDp4QkWv>rq> zW0rOe7_JjqPiDB5;3o5)*E6vffjLK7Z!nA%8idzw7ut=0TZPsa&`)SLF~iLS=0ec= zGw~JzQ$OuCFz7-XzjaoDYU788A6-JeA5Z62R*^Wd;&{q z4-;oHaQ?IU4BV_lXtMxbp%w9vVuE4p<=Si}&LJ=}iB<}RGNJjn&re`RMQtt<0|b`W zm4l&1Xd%GQgf2h0_&cf|Fkz_h{%81 zdqR7M8QvwZlt%tTLeU~I49I_u$A$JwKwqK#3he(B+J}J4Li;t~6QO+s_^r@>!|Wdu zm^&%$cTD8`XYL!dKY*c4XnzEJCbUaD*&eEYYEJ!JFZ6yN1eU6?*;TgcxEHS}x&8=DrjHOO{k7x?_mxXk`mW24JFaOyD7z1ZMT?$YSDS z1lAnopfyQ2a=9;$z>Ecs$xNI=U@=TZYqM}n11uDd=>WWzn*k^kjwb*f;m8L-%_j*g zsb?~=fWXq-K&x0dW^rE;f#q&qCe9|Xo?e1hP&i5fWy0a(A)NoL^)Q!-0fJjOs5;8g znlBt7?wiMb)!bUZP!U7?DOz|f_cK6^a6AR56pp6>Q0(UfW`uTBF_CXJkb{on84#Zn zj%T@V5rL)HVkRyjuoPQ{*7L%#9DrASD|pCC0*h}o6W0(}-ntg84Z^XG`_>bf1*>Bt z6E_i9588rOlW=SW92AaifIY&o9k5F{b^xH?P6EpdYMHp3;0}I*V=r2D!m*G0_7n7$ z0Rrp?80rZuIS-+QN^&URFY|zbuZ#pEsJjwo1@y-fW_u>K)EsE3IoncmxTU71rDjcXS^21h zZ1e|08IkpoW6hP3^N|aYhUA6CqY^Vy+Fz(^KXJ@e;1BuA0~IZJj`NS3iWiauc**p3SrW-G1RYvUg(Ed>Dm4Y zPpH%w1p)-jXsO=YQvG5}&B2!HCOxEIhGf$t&qq#1&O{m`ry<=@$k^Ci9chT11Fq6n zq+DKhv~BCT)DnMzr`+rJwNCUD6odkvf{Ofrr_dV;))uCgpr<@kXcUikV?E=~UE=o@ zG3mnBL=oD5 z(LMI-^$982Y>>2+fX6q-vO$V$708Kj(UsQ&z2klhZSz`c*0 zG|@veE?nn<@|lsnP#DTKFEZ6_KfWG=GJ>I*h2DAIAT$oD4mmgZ2j14}6O%Jtdf!&* zFv-565|6JC&rNg7O9&m1k4}q6%aAQY4-J|wS;zj~1J=n2>3ZK5Ict-RV7Z=P1m*;b zy#WJa%upj6Aq89`assj}i!|wxHIb7L?R4{E2v(`D$hf@kLhIQhZO<<2tf_6&oh|yf zg!mlt%{$mE$?-w2;lurtZ~+$$4*E01kqraJaJN+NZmDj>uO3PcSIH59P-*+16W%?v3VbPj*Yq2I=(G7hvd) zmCMM)G-i(PinDp=xbOvQq2+~4E8nN;%i zz&PpLV;;QN|K8936jzmHhQFM0f5Dt$e@Q7%<;drAA!(8+X?l_oDi0PEbA<7^Tt)!C zT!G08##w3wS0^v6MM0vX%nNSzCy2JAcZY zShCr-hxGb*2=W;$pBLUB!*NmgrE)2Yo+yKI1#C4r(O+5~uGb5P#f~sn#K3-g^}g+x z$n?Zw1dF1A;@1wz2;xpG_Q+O9#*bl(VQP2h`cv(j_O&%uL%PgJ9Yy2kuprMhFNf20 zuD^g@-8rb4L(?rs--n$NGkHdn6N|k*nU05j1l!x$&EplacBzyolI;^ z`ify+*%nyxmwP_y@$g(ZN%^@4Ue0}UQCw9zMW9qqV5Zj>S2yf6>GaYz1AF(u)5nfw z=+no%i3N%&pK8}N5rm9 z{z)C5sN_pi#|a3LC7!s=NWjVC?!I&EU2%0zF*he*lp$`R42mY=R_GrMID;=Gl~sox!*@~x=l*iyd2i25GPYtX1u&xV3cW^Oq5@{Ge$Cx@dWaxzh#@~M@>1C z4TwJi;~LbKe-Z0M9+k9R&>+OZs#cF8tZRJaTuk$o_B z+p324rt^Bpi9s?l?v}9&9xJO=L`J9Mm`$MjNC;lw<4u7$h~egCh8Ge!7TP3=4Wlx8 z`leRN@JkeTd_qL*8Lu=^=`=PN8swZNY+} z5+=TqC4wJJ9nY6_&TiM(#d$Rw21-SNEO%L{sn|Zu!GB2Qme-hb*$6tjHDQ9~FYME~ ze{*}|5!eS!X`i*^hH?`F*N1(&Va=k>WD)K)p9>MDFFn|}@4abpRhb;*aqE0}f0(G- zR>&kPFd6H|{CgK<$}~&PRK%D0MiJKz^4cS_u?trrqloHFT+Rz0D3YlMJCBvJ1OFLF<2rOPxmo43p$?u z64JUeOc!$p10JIU_H%asLK+UgvjQ$j4Ie6jgxRzcY?9lfYV27L(aMmcq$M{+PSLK2 zqBa6y>zRF*mmF(bywN7#qm?9;TmkV<@}+6eG$RlSdQg~15kD~v8h6^pi!HtUle~0+ z%c=}hw=7S<8;q-qJ8hc@=jv+cmT2Zt7gcfIg%lCyd&?IZ0dpgkoD=dcGz&RzFeqnY zA{BDj^fqx-gxw$Nx5K>q%jWtm(u4}G0UGm$S5|R*4%x-6?Doj0nfFU^^(!)s^Se&R zRau+ZsUf7LVrY6rlPT~Jdu?Hx#nG&4T-DH}adkt!vQJ&cnTU4CyxY9&@`}lxK+rQEM!f2MQ{bW7ZLIE##2_7N$d6fZEP$Fau5Rdd zo2qvPTcE1j+>g85I56S8Z?e3UfhdJ&*vuL$F>}&JrF*6{^7>{y~NzXY@(VN zHKa^*?)GO^Az8L9uD3~>Jp16v69sN5?eK#KA81}N6!o#WTCs~Zpk@#IYVC2uC+_ku;=;8#Hz;@Ml4d8>Y)F@xCJ$|7(TlR?f6q{w92ws2DK1WF&aQ^=QjgA@#wAbu(!~2N2b#Z1XUII7da-MV37DK|POH zgfo*WndLJ&HlMRCkFI={R@vo34_Pi`h0qCJUvLf-x@M$Jq0wJofI^8=S)sgKY%Yz0 zVy^@%-_JqF$xK`bvPCI?ep$A}p9mi5WA5C7;&A=k5NU`5penghu8!HD^n3uJXhwJJ z=T0^+#!5#?Eo7P=D!8o2uXl*OsEF91eYF zsC=93a6MHjrI#L>{-Y=CmQRrT5(uyd21|`> zLfl4$r>~gOdUk31wvF<41CIR~y6$4L&Ui8oB~r^F%xzt+fh2-3;_TUObAldAi(s2P zY&K+><;t5Q6Ag||ea`FpzznXu0x*#qO9CB(;0)vlRS7s*SlT6n9!kYnEJRv|nUOs> zsFZTcZHL@CwKgrM2}Qu{is-48B(01imfUsD>r_*`m@}_*PoGsEfBzE)!uNR+hH8OPwEwK|VSLLXjuep>#Df^y7TFW7T2kXPdGlnlb5L7xA)6{y~-N z?ZN))(Up)a0f*w!qudFh5}2N4OEnvE1Mlo5k2Nfc85ePEj1rmVASOT2UBb0I3o<4m z52H}-s?ai8FKlmHx(oAw<5` zozo?R3W_~A3&AYje%3Z8BVK8enwtl9G9eV1=h1W-l5b;D6Oapvu*6AuIRfzl>k1(g zNdd~^Rm|D0&A|=wI9OJ1$N|mEt*wE({m43~+P?86o51d9PPQhdE2ZHksC`RNL_2TD zguqr{bga4q0_U26(UluM8<=Bwv0zeiL%5>#f~&bGkI9Yb+KTWvVSu>y2=C&4J_p^6 zd{t?kgSy+c?Yvxxv37|QpIfo#m0pXbyT_LwKD0A$Yh0JaZk$j1X41C#hX2}49IS=J z@n+eR&IgUwdmmSPC|E9i1m4yA18Ly+5ZcIRFz~dNYPqn<{UQ2~3ZT?*mB}mZy&5m( zs+y~#t9;+SwrveRTIC13YM>ET4P>RENtkw@^sU}UZL(ke%~C15S)JkpcBZvvU3^V~ z_gMzPO#-h9d31$R0hRr-*s4w5A{ua4V@Vv1uHK2An=Z69tV5yCFD-1Yl2j>I0_BLV z0A-1u>qWem8MIHS-BE^iK=cgr^`k4p3(@6yl+Tr6n3s675k4%hA$~FqfYGt*#bSo! z*!$W&#d%{Ed@>F1IU4!+0tuq?%t|TjA$y@Y{_`#)$D5aCHcK$PfshQNH#%)59{$hQ zAZMm&;`B@}j`nFQC^K4mrcqwZO zIEa_62_EkPe-N_gV#nTy0#hCqqCAturaWp*xh(6Jm`%a>E@R(BR|r8?Ac-%PC+Rrm zlzK$$_MRrUx9QgDPv}h_82dg9k-Mrb zD68b5LQ``!Ox#ho21g1VOZasgAA>wx*(IOr5?`7o=BGlmzH;AZ#SCTDuOV8FSujk< z@q`M!qv8XenH=geh@cZe`g0~<=|nD<#-7N7^`OTORvg>X+B`|}%a@U4{I$5Sj1Puz z?M$zy>mH~g9g$i@{~CHlhP7(dlB=oYIL7&FV*9bp&=u>Tec2McD7CEsE2Jazq*TTz zo$amg6w2NEP2+5SmH%x9l+NzTwg%n;nD30_B`w^Wlc8Kn4s}5%B-%5yZlx;ni~+~h z^Qel*fb7z3j|?h|OIo#UDKJNw?uLJl-98`O!$EI2s;hKEb)Ufgk2E?CA z+JHf)RdmL@v!!N{HA8O6U`3)V92UuQ(i72Gl+tl}fBVM$wh4HYj7}ja!Mrq2OXExN zE=%eq*8{!MPNDY#Bep6%}^1YkWSpyz zlRpI)KRP}RS7A-acM>#6diji=9edBWp54;gc&6h#3YF71ex_iv~{Gp-<~_a9I-^StsAxH>_zryD|2B+S(Jso$j(Mo++(WfcNSF zZ<)VD?s)t6w0Zrg`&PgpZnK_Aahr!~K9@Uyy)J#|kZ0Sy;B^EKp<012rQSGL#CHbR zEWnA({X5o!HB=sc${mgwM9tE(WARa$ykaA1CCM#IPubdM+#*(Z|0x|KINfYEWes* zY^jl_61Kg9b&uI$dKEH>KBnNcE>G%D;9Uakdstc9mpXQ_NQ`DwwA?uMX!OJoPWCR&Pg zgo7DmOu3bkpSGODi#B*b13G0SZZ&E*5P}EPJHyT{z9yW z+g^C2lkJdc`P~w{!y~=>*7G*&sfQLx>rLRek{TZ zIp7ZzS=QOYHa^VhJZ%r#8Bt-oUO_x&o|`55%M9!!@je8W9&o}Y@#r~nuj)1}?RNXH z7t!&S84{=CP{ojmh?+s!ly7vYDL+bCiC(iwXU7owc-!%p^iaCHq_}HjAT%Met$9V{ z+(ZEQViD%BH^MoqOg!k4FXq?6vE{dP z4cN-zql4y3n{WxeuK%Y0n+;O98)wm9v`FlFBt+8(9eVeR37!Qw&np~V;4kf*hOZHD zu#t}s6-zzk`K6xO{y_dbBOt$0t5tv8i|+utdUvJI40%fm^NlipL9s)f^3B+%CKP(g zJu^K)BVWG9Pj?x9ch=ZZiTFw~z0e32AW6v2{p2?wGe&TyE<5#H`SDtk$*VhOdFB|p z*)Q)nQipW9vA|P`cZqsgD1h(#5KH{Nl8WI83yUYVHLPmev8;W4L;H#s+fJ`d^OkjK q(A|wZQxt+z;gdT&7sJzh_y&`|1boHuoAiKDfZ8KJ=v`>k-ugdtYHEf6 diff --git a/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-ISP.mmdb b/modules/common/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-ISP.mmdb index 24e9e1eae372b22328d1b5ab901a8d83a3152289..c06ee50d04da4ae86560b4bf909485959984be6e 100644 GIT binary patch literal 2507 zcmZA1WposG7{%c`A-ENHSX=`oxDz1|MG^?UOm-*9gzQYZLP=Wc?rv0|-coljOyqVBOZXAurqeSuGkH`V-M_!y|6d-!M@lJ`{Mu%;6NONgK-EB#bG!cN8m^tg`;r{ zX5d&HhX>+#%)|*e5hvkfoPtwv8cxRA=HYx?fcaQ}g;<2e zSb__25iZ6hSc+x16qjK+F2@zP5?A4BT!U*di0jal&XFpl^-_o?jFwa_Rg$Ybn=aXY zsPRLsbfUz99KUqEpgQRwLKI{EG%h9lyn)fdxKXMnH2Bj)I39|d@G$9ce|iMXk$9AJ zG~pP3daQID&GC3bsy8-CCrM{WC;OYHNT;%M8lLWNHhrlxDbK>QQzM-#T}N}CbiQ9+ zAYCY3AzkD*o7uS-FTqRkGTee&@p4b^N4pZQ!mIHbyw)?dbG?ja(l+`Vq;I7grMIP< zXl}+^q*tU{rN^b)Xl}BEID5O<(rItA2P*dfm@&NN=+CR;o#T>K*A_`uFgC z{6N}8_)ywO_{ekLrH`dg=s!)B`i$e}(r&-`g62!c z!cX`!{(`^aZ}>a@fq!~>xB3hJ#=XfGKfQrzxZhVWn$xtvme>kgV;gLX?XW#|z>diK z-{?f{j9suRcEj%21AAgG?2Ub}FZRR!H~<4U5C`F49D+k}7!Jn~I1)$UXdHtXI2Okt z?|)-FITI(~M4W_^aSBewX*eBcVAHQUi#!|W;9ShYY|O!2oQHWh9~WTqZuv3_$c0#h z#aJR^g^Yz{-v7p8@)9h?GF*zwupF0T)6*thDPxu2tR}3HvDR;bgmq}js33&=Y12(C zj+I!2)o5dljP-=%a&0Q;zkj=yn~AvVxDhuI3&!eWaVrvZ5|Iik+Bjx=jvcF8l9yW& z$Z;c)gky)yxa~TzA~#xPI(7r6vfR?5tl~gcEV#rZe;hW z3fnQG^}$Lv8ZqO+h*{%CgBz@9EP1!BY5$+v&cD+wg%t_AHXO9-+)#CTTF~6Pxj1ab z%?dMS1>^N~R>6GB&0mz6kzcx~Fl@y_QM=B2#+})gv!cw3#RL9!z^x1wPXfJdSItrbH&O#TVtI$p8F7yz33cZBhLLZ^8 z&`;Oj(;0RZuv!IctaGh0*8%ITbAy~4g-ya424%U|Hml27 zw%}~w968Sw&a<)efeWm4p_*L;Tx^_WOZL)MPH-o1iOMe(E(4say4=8A-xa`>a$Y4| zEnH($t_8Nqd7W^*jokojmve`l+-^?a8#($5z%C7Y6P=TsAAl>on|MstEx@geafNTA z^FH8qW0}l5fIF>q7jU<=?os)@1}t|v;C^8b@PINM=Yw)SBs>f}Vq=dQXIZZG<8*R% z+~^LT6rK|H3j2hog=Y-Ru+IX|$@#pnzZhdPXM>gHrM$$sybZieyb8QR95DW}Z=>u{ zvr1+szovKdAn-a<-UQw##>$mb>@Dz>dHD`g{s9gV-vjT;`JV88u?`;qhvodxIzLjg zkAY9D^N5_E3ZDs_-7%GaZZnT7c0%|9_);lf318dHZ`frreonw~uU*mM~G;#jtHdeWpp} zdb(;BTRU~NnNkm~Iz(MFI-P|s_33O-unNvbbhd@7G0_4p_H;GVteY05~qty|v_9n&E;s0cIf~zxAx)cfubLmFsP`J7i{ov|B^oFaa%AFB< z6$)H^ialL@3s>ao&y>M%4Il;+Ezg2nF6ODEGcQJ4hRWLi^-DU3O9&NMpDf@?al46Yf(Y!#d)%!F%Jv6)ja zhbar;noG=wYo6&+n9_p(u@*69iMlK<4C`7-=jjEjWS*|&bgqMI1+f~gm1aEGs{a{w zjht)$$GM&bn>4}(VPm1S>kP&=n>BNtSzNl3{dqQHSHng9b6o%z@8ZH@od?(XwD{(f zWM0UWOX0eR*b3Lh#Fj#=kaCI2OA0Py%9U_kPFzu#OJRAg`k%~en6eWtYM<*mO@3RU zwd;C1x664$u?t6V?I>8T8)@AN*Dm5_Q|G#=7~4(fEd{GE`fcXIT(@fydi$ND%XKHp z4<_z{>uz%dITi!1d+9t1*L}nTI>_!9_MrSmr93D+#FSSRdl;@qXgw$EQMev6DQ2L@ z<$MCJCsjxNa|WXRIoKyq{~YTX15@{`sVjfL!1cVaAFdaa`J(WW@UlStW34Gafbz$! z^%`6Ut@XN6-Vok|>n)YPEl~f;pLBlUIt16d)_M=F_l@PE_7zeN!}Xz@9|_dHLhKWD zIRe+GihX8K!8zGa!gY*_P!TTbUxiZ64_wFLIzjvl*B93LC0t(_%SHVwWPYQ9Z{hk* zvG0W+gdYXx8h%pdDOBiaoX$Xe>H4K;xqgL<`sen+^}BUaxu}24cKs#%E&L<=Yv6`J z{bQz^rUQ?GGmzVh3S;H;(_$8Gful83s3U_tJ+QD6;xU<|f z;jU$^+Hh0<+->2mYn}Dru5Yada5prTyODJ^HsiUQz}-}_W&-uEWP7_?z}-^LRzhnV zYh%hQmKLhRP5pCsK*h#3r6b&(tkoIrF6z?NpyF&6bc4IQoYX%K>+UJ^5_$`LguX&Q zp}#==bLs{PgM`7t5Mih=OrZWb1tSb9Zjd#Kb6ExVXkt0sV~7dn!rWuwrv4QcWIQTf zqk@UTB%tCxWln~BikwpoSOfPoxTniW{VR0Q;o+XCx>>?(xaTO7`sYk=o-kinU|@PK zgnN;ki-je^Qk!x*+{=nqB{$qFIG2@Xph^j$3*4*e+z2=I&#ec)O0zYWb#Sj&Pc2U+ zoyqP^%sdp;+~*Rn!F?Wa9o*dW?yKOwKq(g*m@B$S z!(J?GfqScsT>|%|*18OC>R(|YuTa6229@5Dbv4{2@6ok3bDL7Wg8O<-;w89mAZ~?w zJ8>i2nzfnBPLs*UKsWWTFwjkEeKXvq^!pb%&etr(BE4(MX5BCRlgu`%C|5$5A*YltIV_BcTeZCzL|{EA;#l?yuye{uP{h{&Rl|_jlI$y}JA${3x6hP6CGgPe84iz^Xb6vwC=HLl z*wYh$hx*6SJs}}%V-YiuCo05*xS#{VlSJh&*)t`i;mPog`%+d`$ib7h&Qiu%SuMSOZTDcxuX7OQ>yNVs+G|E9b2l{XWb3oV3}LMx#)JZ;#e6FhCzr5&vf@KFCakt!uK>sV|} zkEe6d@^n#`uEosmHr4~4p4RFGPj74Wfv2yrJpHV*znTqzNBU=0Vh}uol_~x643#DQ zb5g1_g@^j*xfq_2#5{PYe+-!N(eRA1)>t{m3FG0JU}F>IoP;V}*k!UX1)ixKp$k0I z3`}ggVlxE3{WEQBmYlPNIq=La#;Wwv2=j#n@GR7&EaG^pYd2|B5NeCb&$tP|D?(m&5eS(`ZQh47qV%GnQ|Go7~ZY=(#W$Ct)B8=iBl zbuK*AKL$MK%ej>W%p?j07nwR|5?dJCiYj-R@v3~IVX1$f1Mpl%JOB^%&vTB_Z=>}Fs(uet3&V4?*pnYoJ)cuT_#Pc6*1>av=nT&ngc}~7 z^VO=t^A)?Cgy(DGTX?wns?{>X5+3TG=LdM$n%Ci6isSszSbXJpJg15=&rk4B|2%)f zBmMLIs+8Ys%J1+<|Em4x0F?fD{)Xoto2lT!|OLG z-TZSJ8%DJm){_2J({bUA!<(?NB)qAjRqaA}sej&@@Mek1@aBke@KXPH z?cP#CX`xK9&RZ6R>6W+73T6ep72&O9oc~z@Zxwi@f8J`gpgO!Y{!eBtcxxM{w~o!M z3va#uW_lZNo{iycXj2-Y+RfH#qPaAMx0!Kzn+q*iC&lwh|2Ro+YsK0a6fTVV=PZHr z&)b1@o#5?goFyBj1l}%;O^3HD(G%WoL=TPL9o6ntPu@;WvtEpiR;)L?edO#*=U{mI z$?9+5wDt~wm-^=&WD2T%jp}^+88ERS@KXQ0BV-MOcX%;Yy|kPo;T>h1)ms^O#|UFt zHwE5tiVdPQ9^MH+^+C=Lyc6M_WGwGwImgT5ap;|9T36qI>UYxliUyhiuQOeJ`t{C) zca{d4EzA+-8kot?L-kKtFdsEy##y5!t%Yi~2;RlCR?1oe?^2bYZp)>A-sSMFFwXz( zvsLh}wwY_xWo==%n$h*!2=4}XH`>fi$~*(!GdcPVvNpqe7Okt{J)5`)-gAH&{S-SF z-t&y*Jzu!M#x67|%;XQe7Ykd2t->YnQvV7mm&tiKyjQ5%mBLlUlp1`)uQAg#J+Fm# zo3*ZkSNi9~c#Yoe!VY1laHFuxz^sJy&nx}&?uPdkPWM(bd49a(k7;&0ov*-q2k|1j zeBW7X%I|{rZfo5m=e@#x@ZPWTJ^Vn>1>Of({usOu8q1VF1nCp-=BGbZMJ)}}lM@AGo*7hWjFYRrT8C64g2nQo08sBr+OSqfgl z`5;ytKKk^Xt#P$u=y`=L_(s&xu0@CiX78@5%YTb$+0N!v;>l zN9^*AQws0L@P49#BPwv5pDOklyhp9`7`&e=^SE%r#=cO&m%>-V*QhyCncot;TIwHR zhW%dn0bc5#6FVuKvME2A6esoz;M)Q3uLRYJ`d4uuyuWjve`@Le(6Szo^%uNn9H_}+4>`brCBgt9_8f%@mG2ww%R(q-mq z3120}Dl^sqzA7e0KYUf;t7a@;b?dC*l)_h2s0APO&*@S}s4LX71@%$$8aW%9ft+$@ zo{fbj2F^X^)AOHC&woBW|M^;&dHPzy*UDzLhOdpv+gfKkWwsYO7}R_cKJG|gXIU@8 z*M)O=Q7K)8Zt!(ivmUltPsMt{*IThZHr5wD9bIPD{oxy+lz{?ei2)NE4BrqrhYG`N zY`9V+iZ%a$ZxrXb9lp`TIp$XL>9^dzvG7s z2EJ*uX2Ca|D4Eg>Q_kbkH?vqr%S=~aJ^JPpQ|775e3mbTZvn9wzJJ1ekpug%(-hax59Tx(JDF1ecbGq8>e&X z`>ufRN@Mx1vd*i`Iq$nhxK`K(-*qXDb;fi?3`EH|APiMZ{;iLYsq3_OOitjG??zYxF@ae;zxy<{tAomM< z;G_OI<_$cSMYtU*f$2|4fqzm@2vGbd_O4VN9#PP@>B5rq}b0k_6utBfOp3G z4JaqY^ZgFrA2vm5==%#k>7VZ(8`JZjpWlSKicY^9evh&I)IYX%=IQsLb_+SFfBxU# z4-yZ-A0lSJPyO?^fj>g%$;KarpZdpIf1J)n@F$4c@F$6?@TZ6h@TZ9~@MnlT{8<&` z3~JlM*k20%(sJ_1b9$B)${CmmmPc)!HvWp%SqXkEk4dS5+Lzi`HCXdXDWXkKT=FA4GUYp>S(FSI`G4PMI);KjA5B~&ZPPDN} z@K2U=iZE4}CTKG_J?q>Gzq3uxfuH*4Uk(2(Vm9j7Gs`~*{<*4~Cs6-bYo?^Mb^xasm7os_vpfzPD9sV6k*(uz}>D~wb zF5(vWZvyJhQ^C#f?=D(&`foLs|26|U;J+RIJLJ66I`48iz<;-J5B&Ek#wBpN+^=Ex z2oDGk8aQ>%f;`Ofcj148cusS9)HzAve@xcn@IPTP&GI}6|5Mi53;#ZKd0KeJ#-26h zb>D#hdF$K{KlP8DOf!98^}hr^_0O5hE5ZTcRpB+^pzt~`>;(L82yYtD0smX@bDQ#| zvEC658Pxq3{`Z*j1^n+5EdPKw{6BZchwy)7Mldt`82(Q*-Vy8kRI~m}I4T?yK8OE! zF|%GC^=cO_|Cd_Nuax;U{NJecw>ITF_`kQ-5AgqJt&{MdQo&Ew`Lmk+0{^c?Cl$*7 zI}kVze!hQ!PVoQ9%lt=~f5HEEG1Iw${$)xj1VDsMO2CDHyOOzfTtlkO)YH@_t2Cz2>?sQH^pa(HX&YnUq1bVAkA3IQA1p3L@-#Q1V zU?A!}tJq*R8-u_Qn=(`>!vsD51x6qs{R>F{%#=o>-aEE#tO}%m0X_c(CLmz-FE9y# z$+lpM3Z@Fv4C;M_zzogIS-MpS%p?{fFw54>MqrM$=BixhZeTtF3yQJ&F2(d17|`$k z0!!pvD(LwyunYm}AFnE~LZJT9svj*j3#>+f`WH9@0qI|0osF$WfN#H3zERjzjMXoV zz?pJx7S0mR7S1s+>v^u35|7xx`5Z{UxeHuioPi4wp#GJ7Bo17R0QE1hO|h*ATw=Ri zioj)R%{}N$=?dXW;VR*312gP3>T)gWbF(`IR{sLhzku{Fuw9uu5ZGxl>$g$CF3yFs zzKJN=!g?AE>_*@grQB*@#=A{jZWr!A;7+C7CERVmd%?`*UiN$(f%}YQ%Juvg*rW0X z5O`3T57`twVFn&S;8DdME5_>gL4f)fcoBgo6|?#m*sGrVgr|jP5YWA1Mwg}ro=0H6 z%3m-jd73aOFCifP3rPQrQ(xN!^!;Z*--ZSbBJjE?uRp@TWWI^OTh@9T0o`MPL)Q7O zX7--&zVHD8hfPZ0L!0>#0v{Wz{#3=Ne~~T-d0jU% z1f+j~-_(Wr=L}2z3wB0;`WH+g@D~w5{Y9Ma-w6Ce3#iZgDDW@pZ|BPg5OmS$MUeUz z^q~H$bbf~VEN_rQ&_`zwK|c{dgSvF~K!XWppkRp3FdFa>XmBYS@Z(m4=Mm%~5KN*0 zmo*qeFis@U;Afy=91Uv`9nf$XF$=*oV|5VB5arPD3?hqQjwp>_o+yQe*D~`yH017S zcmzS}U$871{sJ_Ln`>xP3&HYqRzjmrL^VoA;^mkHbvv>h9uq8h=EvXJa~>ph*m9QUgsmr6%JLY({5GG+9P8N0W<*7HD!iW6vPiidI_$TN7>2 zggdgycWBBbZ(0e#c64??)0RYgH0?|0X$W?t)d@{E(b9R=^dSVh=W+QyxK(A5Mb9(CjT*C()ccqzz$Iv+z&9|8@ z%^#%mH3Y}gIR!y(so+F3KVf3dtsR_XHhXX~T5!58G6+u9^-W`pD>0pzffjsoT69N? z$%@USa~4`Gr*jKh+#%;2I^RNYF0mWIdBhe3=QD+GzyjhE1Q*hIE`r=P!F32OCRQN0 z1Ze3(a4As}!P5y|*fRCx2%Kcgb#z{b;7a021XmG<5nQd5HNsjte>a!Xinr5xw%&|Z zEeS5jMxfOYrv|}I2=WeSH3zM(q;;k#VEf=%bUu#Y*}^#nt=^Y)9y8BJs~>>Yl?{Ry z2p0+$2^Sl*?ug)47F>bgCB$V~-AfHxFGrC2*LoZH%JBp_iPrBUcoj3HW5KHtyoNDm z@}_iB^fm^0TR7JBEcg?_8;sSaw46Iwev=s?xD&w}jg!d;?m`7n|JtlU8|RLb{snJGknd*jPE*i^d;Tt_{D9!y#J>ni|AP0b%YDVxZOb9Jhjj-L ze1LcfLGHTXqZ;-hwCzXhVFVum+Ac&}UbHib$LM?xZJ#5!@=p+Z(e?}CN!2}N(5@ze z`{;ZI?RpbWqup3IFN4%SqF_Bwi)*%@*o1amg5V2aDS~?bj9SI~wm0ogEN-m(Gt6 ze2*4q#%{;_tAarnJzIn-$=Zxi6^&4pne`E>M$|;8I#B~%bQgEIUn#ZdtcOr- zS#@aDMVI}?DuA28nRNrk8Y9$@(DzbZ{bszbbT*;0sjQypIvwb`*;t|G2(?goO9M7S zNZ%QUS|ij(v9`rn*WC!Um$L&iyQ@n_gea}fBsv!}LtPN!c5|HF47&10cP#GY(0YV= z5)%>XMGQfRH*BaMLOOIplvZZ)2VGxAs6S(a5E?)XMAxs1F}4j2wpK%ghSD<{p<%>G zgoYC%(2e7jH0w4Ap;6Yl3ZXGf8IKU}htN25d(FhUyNW5H3FuBab7nn>DGL#rOw2-P z3Na0#sm09h0}$e}hGrr(Lj{~X>$+3N99%2sY^Kb290<)pXfDuw4?@&GdK_o>HxXK( z0^NsrOzBb9Ahe1pYY|#atU-?<#qu7K zv(P&9xKo)Mm~sI^8;Nre+C-G>kTcB8LMq^DIP(mhh0xiy*|T!$`7d;ytn&?ee2UP8 zEVvAzi-@g?T`X)tPpx(6l47%v^si@Sgf3^n)d*do3G!?WU4@<#i=1SeHwwFin+%*TPTg)h&?^Yt%7OITxsdcPbi1zM4urUZ zA^mbMbQeN*7wbazAarlhat@@Ddz$*kQ9=(0x-HDGKd9%!2vPr>lt+cfgvSk>%+M3c zl>UXJe?5PbbDyB+ztA%XJ!|WpQ|x(!_FLx*a!UU~FUg|*IRo+9517o*s|LL)$T|pw zDS@LjB$Rlj^FI*boGA1)z))tgQ znEJ;~rdb8&U-~;Q!HH)44Ygc}oW5pJS7u64MjGMfp_g%$?4tFkqSG0ur^=~+&K318#=?D6(6=~je;XTs z@IX#tD8hrRb1=d~iWXa&1sNs`7e*jF^1mtJ(d@Da;W5N4gvSz75FTez!sCmX;Ry)a z12?SazwqS3%*;Sj5uRoXrXwu<3v(|y7shQ$a8|~m{?RgZa}cKfIVtmm`N9HWp@Ea> zEY@O9aHZ;&AiPv5)W1TrWe6{~)(VqZScz4tTaEA<)vdLe>l9lrY$())H*vf>5k7;E z@bx>6@Mc;!BYYNdIl^ZXmm+))aWTT@s`Yuo`E*{Wp8Ec?-;W4W|2WL24KOt=|#;MFqbK)W4EUr`aF;-`}eH z6XCy1tWf?BXJ#A-|7%hha7Kvew;!}39{w+lh?j^U;v<4e@goxW??91|u_9rEk`W@* zzeqv_(!YqF|D4uI6{P;Bfch6Hk4Tm%g-FiC%#`wl2}VjYrYF8gnZim$%Kf*CDX5@f zDFIU+4=GihU_6(X&zQ~DQat4nEzNP82DQ2!#GG4OJtBO;yD zbJu?n=?X+wBhrm2ClTQ-6S)$R9>jP=dMeYIL@z{o6T=bdW1ME``XbVg&S8l3H_pfa z8ykqoAjXDhN`o=*pi+kNq7N3Vfq&Y95y~8i$S7rwwy`mYQ2(4&8fP#lFN>d-B5M$l z{zWFyTB0tKg(-r5{}-8t$aK}s5S%SM8xiVXWEKW-B{)I}B6H}R>wH`0I2}ahso8u) z7SLLR$U^gNnzIs&M-~?|>2aKqrNZgLGDNKYMOGlPQj=IEtTu4!1}#NI`WIQJ*m_}u zu(6mK*@Va$9B2z7XAL2??s8ta?_zl_t{ut*4bY6tWg~l4hlikU@ z*uc!?VMMkvMyq-5U0F1Jg`u7`Y9R+u8bVMDE~vev8PRHsvnU zb8rHYd(`D#8`C4=;F9}j4@~W{SuUV%= zI3oRvyn%?-KYr|wQ2!#wG~IU)Ii&J;h4*Zi_YwJknV%qXSg{XB{niR7C5ULqI>W{}B%@dq(U^%v<2EIMXi`q;Uo_2G=Mkm;ap-7P z$Q28Qv_-TOQ_3P*TCp+)^g9L4bfe{j@&-;pMLO#tTFItVR!SA2Dx%eFtU97KtW^_H z>0ea(7p-Gr(YnBp$r@oY2KJS+0SD@bXhWj2S~o(pv0~J}Xd6VE%98$>>FW6}s^`CG zOGI0llp#F2qOIXzhz^e^FQuJ{nY#8Gp#!2Ft+SJ+q(dOuMJZho?PgQDBih4SJrV6? ztSI%bu!4I2i|YB$S;79C%K}6P5K|EyNQ_bILBe1`Y8V}g=rG%CIHDso!boA1jg7{T z{npZV)X{NjHeQ$@OcW*wlZ7dTGdVgRQ8hsHxN3nB@vFLe-o-ZeNk2CCr!bQTx z!WLnxApJAfr|&%W$%yIJMC1&L`?`WMxy7`+|QJ4~G^xKo)WTmSBYIz_2}QOQg6K1A=Q z^%SCeG_3S5`k<_bgolMkghvg`2#+E9xSTpThvwPrNjPBKr0f;;2~P{pAo{H8p0k}}y4;gF!ee~G?_ z==*9-{VQC<;X;=v^)Kc|^kb!b!U_J2=n+}czo_&tdQ{G1g7hzXT-FKJeTV25#xgVe z5>e@2^lPPjV`JZ9Xm4wMkLV9d`O!L0BC3sPn*C(1h(*Tw1<_wsP;$rpZtMO~%Abh- zrIf#ge|VXkqx7#tVV!0%mnop95;rY9F)xt34>4}q_~(fEnX(D70MQ1qAW;r6>R&8v z>SAHUBE`{TQN%b3yO`E`{);6LOIl|NLnl~Ex*5wNX7w+o=f7Af#7djYSQ%ST7DG>$ zvpgqJ53vfeD$=TiN?mMbWlci*7psa`HKx>*v${~jVCemb)ncp;Vzo^<9ZtYpL*3$d z^^G&uz{VOP*2r3oRnP=6>7OZYhFEi(BK?c$`7fs5|HWEs5?pKQd#t;<=)3z^d&D}} z@{WjgvQ}rrbhWXr*4Yh1wTWWXzt}j$dJ@Cbvln98b+JBl4wSPmV*PA+f5ZkDYuJA- zO!^m-{>7w!v7x4Xn3O9vT+K!xHqtg5h1h6g#l{F@)%qe?<2i}Nsw?R-5wS@oW|nTU zVp9;Cs)A`YHeIn90yphUHq*mhY!+hFzhU<{KZwl{=3*ERKzd^HO&1eepx8oTkpYK} zEzx+?zt}2`ApMIiLu|R7g!C`A62rc?&ee!f|JcP`!&=1FvC9To>oL4KFr4p-^l$hE z4Ri)?wYLyElbHt)+e}=6*jdDdh@GvJbA)qk>+=wk{+WUF{pau}5##E{E=NrI7u%vb z>7U8GM01h;#V#|}@DDJ8_dDaUD_PDi)W4WMk;Se?>>4}lwTMaoV%J&c^`^^+rig84 z-Q9@sD2d&M81*l9Gh#Q|Onu~t-GmXNtaCSFw^&R1H)1Pdw;N~d4ja1@vAc{lf~`4S zb4B+E_X_t3_X~T32N09~#U4^m>0eCx7kgCB#{}tL>_y=v;pJjx>=lgEAs2g9vDdgKo^V7cFL+%#RSG{>AS?>=O?3y_`o7`;^u(#6BaA z8t2G97|C07Q@~9m7Ax7nCoj~jhU{rU+z7(i`j`g)b{d25uh3^dLz$nTK z^)L1-Vm!KHKOuIKIE7J{G4=*V{RNC}fY{G;{({jQee?ww%{?*tAY#AK`6pt(6MtZg z7Z}qSW9AdP5My2JZ;aszj`<5?%hLJ>v444)4G;%W9&wk6#oe^BhY=iFkmqFycWuLk44~3M=J|G9~TQARZIqh$j@I{yEK322RS@yAjV=>jlJfO5s8} znWco%0=LH4Ul8XtII(gD3jW^NRw1y+zj~ImUy@~#a4^V8N!T8w*rfx9eLlhgT*tskihVeJb zIfB-BjW<#lC5#rvAWr>bC(~>k#y_XJ39LH}@rlGV#JRoWQxKnA%$(p>!Bk8r*&)+a zH^Zh3KzwG=iqAq^vz}1$R?TJ4GZCLhtVf)8M|`<@EVzUW+ zZ~1YHQ;sh)n81U2!al@T$jPlAU!!KL5MQm74-L#^u0@>s$Cneh@`Q12KzyUI;+ups z3`*8I&e13GZaj;vFI2(Vh@YcOE;(Do&oeO1&PV)$qBXH(Ctrm4#Vo%J@h!w9>arCR z`4&6nm)e4Lh+kf`;#Zi=iPs{2l`^jut`V+fvl}sSAF&Pb>xk`YeLdng6uV6PRL&iU z@8qSN!laV>c9*)`#JbxMznQoN@!bF~d=l@RN!(z!nv_W;OMbfwc-F=5#H2G7yU)Nm zw&OfQ?lEV1{9am*ASTSx>&cICKBg8+p)+cfvLHtu?ekL3h zjukT}^B_A8Bs#0$1ma)N3L*X_--kaC|B5(?_}9gPI9K;ub@@*C-o}1Vmme{CkaeCy z{3m7pY@NR-^HT-LZULEr>sO3B&sS_tyqw#jzkSPYg%V5jb0mxI*Qe` zv3iQt7a9l+k!Yk$>K|v8;9XppU{j?uL!!B2Ees~x9huPcUqa7+3H|;rq2F61^!%65 z_n(OlYSz(U^5+Kpw>F6`d{hr3(Un(w0*P)Ks5`BmD(Hd9+69STiuGpfCM5dE>MQi4 zvp=Tj?MMuub&bjgBEhYgSct@6Vj>blh>=K8{}RJ7g{zd%qvaGmA|^&)%3@&3ZAgq_ zYzz{k%?ML;hbP9;ISx~P(6HlaO~6#Xw^MszDi8Rndyt_1C1xTqnV5mZ6k<9OQ;BJ2 zN>h(vns(&0mg;hv!88^)Z@?^O&O>50F$dFlUeh^^M?zvQrkzsCd|C@IU7KM#$D4j0 z5{u|O7YR;2u^Ndb#BwdcQY22NwG7i=H09HEJ0(`oxe_z9Q8)+^t1x36FoRvz&^i-| zwZujwsDIAP)-!fH5*rL=@U(HPO-wlhr^SHN8sjuBh?7bEOPqrQ^)GR@nZ#);aoQ~= zcG@ROIgha|NSsexsCiz1nOAj>ILzrI@LMEOD7h znH2(NHAdnJ#;!EYW{pMSDq2@#7H7>avu@L{TF=BbS=TY;dd$*>PTW9iCuZ}2NNh)9 z2Qa%DW^NIXvLMdAtKDI}zSbGe1*{*HOAisgxYNIcDyXE1LS zFmF5NN%0cT(fK0g{XsmB#D3xh%&%9hn?DGNm*{*&SNk&NF9YW9MB)IQuNvq4{g}^d z=Oh>8={$(U>%<`>-XPvW;!VX$PO!J>d7I42L-qS$uBk=*P zPmnlVoJ-JfLTcVs9{eEr-Yx7_?iE%b-l5O#IHR00!aL(^51Q_ zzW+@8iNt>{$KvIJxQ>JHLFR*TGGE{ zLp5t8G)A(CNl7*}DESlSr1US@LNOiNN!>@u*0yIGB->i69g^);(7`%8BH77WossNP zv=+XMB=s*TAxi37)MO9Mtf$aR=#69_weDLiPxeEyzqJM+InY=Ob$28O+t?5!hjJkO z9xo~VOX~a2Yk-W%S7i&seklbpWmmqnmwJt;Qa%)|IMI~!}m2oDo zwlRGsN?wcPHeQ&1SC_m_Bk23j-i+k#qP1uxlD8`5 zHsSVSEP00p((nJ0cgeb2xChC5P39t9Sn__A?*X#hJr4jG-W?AD>8VIQ1f<3y`7lTS z0Le!PX-@J{HG2%n$LZXU_r=U z3CWkO^$L;)iq@h%NWNCIk_VA|-CBD7OX~SA`Ict(Hj?j{l;k0s`7V<0S?hi9VbRG> z9_A91L-Ipjri3B+5t7nB^R9e?)Mv(qF!!A0e%pZ|FX?sfll0PB&vvvNWOg;Z4_57FoUCuv{{L^Os zg~c8X^bapOh~&S72h<@aJLM{7rrf4{am*m4=f9Lsmh>+bz~Y9g3)#9bQq;ecbSxD` ziu%Xa&K0HNNF`K|w9b@b>B4_oB$ZW4PRI+TY(Z%ilo85e@mQsl=Lo%!qW+~CBBcW` zRY|eRw!8{bRpqQERJSn+bgHIW*FvhcjnzS_t_tc|r=I^(4X{|}S*j67*RxuxF;db$ zbL~x$YNqn$w!8&WE#+)wovk%`8$r*1i*G=xy-n$$n4bSqon&=Js*9R+wVBMmyw zp{K!Od#j~-Bh^Q-zMT9Zr1}}l%(}l?4?s%C_Tm%D9BfmDC}k*8!xS4XjIb#qks2lE zXpS((6f8-CKhDe4IG#iaq{cJ$DpC`ObCH@ztV3!Np+8wkO-5>p3Z@Fv49q;IYZ5br zlI=DVsaeXLZLmb|pDCED%y~#r|5B$TwLqCx|5DPw)M8~WL29Wa>- zSZk#URw1?8I@chz)>upa^Y*VtYJ+j6HrmWhNS$G=Gm+Y?l(U4h1?k_CPO{G9HEh?g z=OZQkOI=9k<#Jwx)Ws&#Om~Z%TZK!Il1!M`WmwWnJ+Bb16s{7af2nJXWeToEYMZsB ze<|r->IN*CuDTtZi?k}W6R8{J+-2)-(p+v9b|ZC*Qlx)oApQMI>UO2vfhAhA)Lm@- zl$>`Xb&u_NFH-tUYs&9e${yhXq#m@H56P+L&D0~Z9u*!F9!Kg4P3g&E-I5)O?G^SR z^>i_odIqUyt@WHzo=0lGb-sX<^e^?2ai;!rQ|f6jb->gu*^AU`yv%=*I!Jtp6!kA9 z1xiW(Qg0$9IZM55M|cORL(Ke8qrWS>ht&HvfKQ@;7igWoBsjrL9)Hg_dYpw58_dQZS7-#B7 z8`JY&>J(BxDfYAQi%pUKrG8VF-vvGYEjfwQUn=<97W@P1o0I0uI&EGd?LyjZUZy=Z z=0)0PEx!r^LJ;W?%cHWwNJoIB`YRJ6tr603Q%;AImgdQqu8A~t*ReQsx~$4GNN4Rp z`u;PWN4k`CmR4pNEN!atazc4_sf;xB&*>@sOII@1QXa96Q={`0(t7?&S3_F*m#(2q z9Z2a~OlgC3ZPnEgcz&i^AYG4WtVz^Ix`BzAo_hXE>-le~J|w1_Agy_tlxC`Hj-}eI z>6U8U%64gurT@yw=fCs_q}$1APiqj;9c*Stq&q3rnX$fV-395ca(1(s-I4BLt)58h zkTX-#A5o?InDW!hBi&Esdj3lfuyq4*`hR*3)_6mZ9;$+2HgouY|I>fzkw}kH3iU5N zL3Lx0)(4k#$(u92Sbq9MrA$P6lIkW4Q*6p=q^I$N{CcEy#HMGk^+}}p-twx_GimKZ zdKPgx(zA&Rke)-VLwYW8x<;6X^n6;2k(U0M1zD(=^zZcBkY1vmOYP{-D`lCm9O)Is zSb8PWeEv(XL7Mt^y3QUVz1F1gl~a_~=cTl^n;CW^(wl6TGmt)$UCz-6n}xHGKAS!N zHWtAjq|X)3L;8G;VDIqsg-Bnd@vQ!(w;;Wh|KIhz)h;oyW%b$RQlu}Vvp$w}H%L?e z(l=^YeGQtv3Tf$|neH`6U(1wjCS}DzP-x3k~@bGyRmX*I;=YSx*bkaP(J^ zepc3V!t=s@#`LG2=@-=VMT6y&WxXuCg7g7X;LLOR2BfKf>93Hc{-xhi%IkvkFRkal z<@ywqejDj`nEAd^4k7)nX||kKn|==t*mwCUT6`Z46Q3bnvN1n0vGm7u9#Q!xSkcfp z&6<5`oGT_EeN>spn0W%}&t)AqSfLLmX{&!}J^!t^4(YFz`Hg|;`7P4lvC9t{?|ZCx z7+CReptx^DU;SD^xT-q%98%AJPVmpw(e$R$`q}PbTd;9neyyXNp%&Fsc2GGK58>7 zBP0D=`2jPlB2x|EOJh|>riQUHH3jM)0~y{cP6`)`FxOrenR+a5Z2qGLnfgKlp&>Gj z_}^L3fA?TjJ7k)e(KFJ&Ota!#Rt-g_1-o=YM*5d&rOeg>Z>LOqWZKG-{yC$sTBS*J z5IQ;z=VgZamzjl37g^N5%yeYB5yO$;d6tpJWqPQtCoGYy=c)ISGvn6eld zXS>Z;vv~%q56N19%tFQ_d}|U|!*^`WC}gC6nWgG-x`CP5GGvxBMKV}|%t~bVu4LA# z=W1lun1VH1Y~4Dnxs`P_UdeMdl)8to~)rK;}$auo;=NjFmatI?qAoTshAZ z&bP4(H2Q@GYraRu>R)CHGSt7!HOO3|x=V%2Z29FXzXF+(9deaYtp2U_A#<&q+icx+ z$XqYy4Z?Q8>R(3smyxutFLNt0(!b2@YIcWkr-6AN?n35n zIqwmue+-z|eaPHjwAOZ#^8sYEZ88t3;9=nrL8nIMG0yC16+Dj26U8o>Cy{wdnR~5s zpUGTXvdf=Q-LuF%XH)dCDYIWO>0gGs;H6^9+KI@#!tZIzB6EPFA4BF<6U)3-OvxNn z`RmBMVV%;yjI=SsyYU?xJA}-;>cX3Zi;{VtttBrR9&(3mrarB$R&bvj^)I&Ng-vsq>;^7D~oJSDS4rkjg>~W zjIq{gn`X;#X3deM{*|m?wgR#hRaZ%VCK>W*|u`F6WR+MY)VID zJIUEukp7wSuE=(ivpWasVJ28bS43o~f7y}9_O{LXAlp|3{RHY?;Zg=5JJ4E#kR7a) zA;M5$m@wSHOkxDq@>VN!8D%DsmHuVNAUjqC`dYUWM>hp)oeDhbL=K)@ z6j|wCc9~L^3oC?`Hggr$CFEQqtQFST6#e=>y8+paidp^3o}sy%iR@-uaF(2BBYTbt z&b6`gkUhU>acJ`fT!`#NMQ8S6IkyN~k-fymE|v2#WG^?)>=j(PtC77DSjYWxm4VZf z7Jrbv2H9&B+h(2DAxr%$q}-s)?a1z6!A-JuB6}lK8pzsZKr73W!5L_`${WbKmH+oi zIwP~UX@a*Sdxs`?r=9#=$V&grMc<>$dy&0QnfD8Oga>ThgUCK)t%s47{$+Krm}ZY5 z`*<;h|G&=JCwVDvYlNqe-K(Dagr|jPiUrwck$q0i=dDxvmzDlyUqn{=mwnl$yn^fj zYrTrB)xYdPjs7~aZ`c&+U-m7m`_G;F4zh=A%Dbw2Ptdbr_5)-Oo0RN_HuEE7rGI7# zK2gCDWIt8rXM*%Edra2n!f^vrcf!>1H^^D)U%ouDUjexrkfr|Rx+D7yQ4QH|397|+ zgcsTGxw?NK`vdVavOn5pCp8J_Usn3JZUM5t*c9ns_BUnzZm@0(vVYo?zmWZ#1C?mp zKgj-Dj7e`$d4#FUxsmgj*t%D_6~9H!XEXgu2?)}^ob)dz{ag2sVo~H`rY;xfu%+cp zAeXewQplyPl~GC-InBpe$Xs5|QU)de$1Axqa_adnS58)Wp#pN!KT}=_xytNP#l+S( zMy{$EkFOk0t~z5akgH)QQ4=}d;ZCeJV@=evj!+jl>0hqCEogvTLu)lsW@FUvW1UTr zlm6wJ7oF>SBiE86bU;q}XQta)TlG)819NL25l1xgm;4|4hL!#fA$b zkQ-@I*6X0ojaI=J15+>-xp8tz|8f&-OkacMCaKwEVG43nZHoR3G&fzb8Nz7>>+eEt zmd%{4*c{~MTIW3E=5q;FBDcUg7bsM>|3%{2~ywO z8d+vu*h~$dxSXd5Tg0a{8_^cL8!2%6XA3 zxEQ%Da&8qau`%gi?lR5ua^$FgytvxDbBG-E&%xEU;2LFKi`+I~Lv`e?GcXHsy@@%_ z?VQpR$n8-1PFfGE?ndNx$$1l0jR===)DP`Ged6;Z@{bGj)!W*{0xi z7N<> zGZh>~PWqSo+-4qE>;!UOSm&3>QU6$*``S9cLGD{?eTN+NuW)_(Hgv;F$erY6@(ws< zV?QDHv$cNFc)ud|n{`V6a(^iHr|=gx`bX1*MUX+GhGIntUAjgzBV!`IM}*&CF<^tdK*VQ{xQsr3^N< zu$g6$FRNI&qI2UY^Do}>6$=0GJzohZbtCe;i%V@sz6xhH4|(cezLiF(hCKDJWXbb2 zkgutp(m!+UwUsIT%hyG|o-L@4d;?%3CrJIvH$k3T%CRV|gj4QVO%3=8`DTs-`Q}0k zp(Qr*ByuuaYxFiu8K_~~3hj_@Z-?!Gd`D$=5;_ZAY)V(;yUE#Ip#C`v+0&-~5#|~= z3o@Ur-$b7Jm*+-VNSuTGB4VYMN2g+b3G&jvy!0=>%(ON;WVtd|U{glJu44H{}6{LUp+im6@$V>n7cUkA%$lqhE zO>AB0c^~rkbD)=y-$Oi#y!0>spiOy5!%CU+k6_ag6U#qlQyy2!6T*|oKV@Tk<=lt- z)2fsHnfKvY#iW1v=Vk3j{sqqE{}uKfaB>~>{ZGAEmSx$}u3Sfwbz0fFlPnwCG|Ss= zcW>)$xhAdGZ+G9_J>BlUW6PcHLUWZHAqgc30fH$3Lkx)xCO|0m#Xv&u9sV}efN2gr z!2kRE{odQRcM=I7qq#Tpo0;Ft{N~r@H?tqrZvH7(uoJHEU)s)(xlMmuDttoQ3Hf*P zAGDpH;)EgP{BP%HD4)P~ehy6TPHiXT-%db&0m;v$`WGenlD1RiA6Ng1WPVlK`87a( zSH8smxl??jJHM&z6#2JPCWPxfE|5=XJHIX6iu}9fA{XoLX*<8~e*Hk(`K0^xLv81e zq~4F+%%`-SKaotC5bo$_+AY^e<}W09+N~h=-_BnOF29oG*OL53+xc4;@^{+K-@9M` zqupY&u0LY3HflTnghZ@CNSIq>!&2{_)tYAw*Ym8EMC7Jty?kxp_mWSo1vBZTfo6a}=3k>UnO^Jg)q^^^=+>$dH~;x5BNW1$hRg zBawfesOE_Y zj7dk3f1aA=nLuI)$T3NdYaYlykFI$j|D0Azn&&2Iy6R?@HBZI;s%p0hOZR|wX>F(6 zvT4m@x?jTQJu}*E54)Ll&C@`)EkQ!;KaU&(dPM$tn%Zq&m8M1h@xpba!kp%r=d#;m z<9QYYQnr)lFXZd_=;0R4^8)_j>MzthFOsqsyO}p@w+lft$gP^^HUSa&=efg$JjK1= zepK_^B_MZ8at}KC8_jdC3;7bkQ~aNvm&(^)X&z`@=;e0k6G*tfmua4tODnICBL#Q}*U= zbH8<LAx|RR3oe&QT`VcjziJ-HKWBVmXx{#1&GRnK z1eE4^So6G_D|r4*lJ{`JW$)EI?~^ZZVW-XaOGh7&MC706L-O^AB#%lW^3Nmk&m;2B z^I<9bh$J7CMC?D0V6EMxd2E*V2~G39QS*Ef$!5*-DaP9KX-PgK$!8_`oFtHc7_H|E zl6+B;FQNK+=?G*03cglp9uTzW7nyOgdpET``A88N|l;A(@ z)DF$Nn#;UvFiUH-Q}J#`-gVlk0)NrKyc;y{MuFVK$tl_I-m_5lJk7gB${_!Mq<5<% zkbm^GO%ljI?{hTob|i;2FUIcOuX)c$a$0Ru}Nl}s;B`I-o z=biGUqiIv~mgTD=NmUXMA%hw&8@T#2By!O06{?TafmgF8z?)->+Vas@5Aszjt=6xw=a`nH`ynihqetdcLMVFoZNMmeEowYua@LBlDt-uf0X2PlH4cB>w(f6w7U-S z7xi#gQ1g=f8`8Z0gtGT*-Z#s%zD1JzC3&kPZ<9o}sP_R>cu4cUU7CJ}B>y7Gzjk}@ zKB(O#B+Mgvr{;Z^fIKY8yCwNIx59fg?|UWleUium++|bi2Ly=^O7bB|9+BixNtPt} z4Z8x@0V7-jm+!ickR%2?UXVP zCwB{F?ZSwuu?r-*P|7d`lzFivmvC~Ajf9fS-I6Jd?b<7umm#@Y+qDnLVQp6*61Y0| zOPP<8d#;wR%aOp;yF$LOebL`NS@{}}m~E3BsWOnmju+h3r1m3zCvzhSlcxyUlB>7lEgT1 zh7y;|gd|Bxz&r@0v?P#!_##%aYgm%3BspzY9+_j>t^#w7T_ci=a&ph7IN^HZQei@p zW0D+~q$ml_|4L}AqV2j#TG1sbOJh&Cja9W>AaArwP2cmBw&H(J1kKj$L_VEYnNGQ8 zR#ecc&+3f@l`!jO+o&e2=9Jzr7xlJjHPpKAjOm@W)v)STr&Ve#wAx0!)aca9M)T3N zj}=#)Uf5eboi}Q&c2ifGHrj7jsq2iB7i+YQX2WPJrC!WPjUHht>dYOrw);6s z?S2k-8uri5)(m`BsNNctC~i#Zt(IPy?zB+5rDBa%+iZ8*hO#DA-l$ABteQ2oz|CMj zcDGUQ(R*K|665|Ey{?-sjA^tEqIzY3U~^|EMeUD0u2gYN!atf)$wDF#PN_Z>NvA_9 zmdb~-DStkePWcl&g`r3y5TMSse^(_Yf~jC6n=WKjaN2C>S6=f>6~H*lR>Pp)3Y@D7 zSQx7gYSFER*_cvl&oiGu16L{xb91_>H>M1gF`6x_q1RTfWY2Uyy5%=2kqa4}cB?XN zV1PAa2F>~-)YYDQ(d@k#hAiKx*+g-xg^Xjeo(m0qwoSBKvpQ83wJ@EHDVpr3R3e)+ znr2&pQ0A@XOn0)9%d^#<-_W=(dk|=!BO}Y3M&81R2p0^omjr`WbJl7yJraJ&RZdzD+{5_iDR%mYYGKj=Rlg2ifZq&@WLBn{v zN)&?Dl+kFfm_q7(?^~4G`xYB)?^~R#pU6bPM2<5 z8Gn1)Xy_`?wDf9OZ&XqF(FZTYT&>CIGiIx;H=Jp5_JG4b2%S(SA51898CN_05|CCV zKj5q}cJ_?kGO@R6otoYx_!y=ftKY)zSC_qHc(KnnUhwtVd_f9mIZ1d)0ncT;maOHgO73}nGvh1PlA?J=!lBc%TZafZf?q$ zR}%)7P6oTJPbH$f%-{pml(XhS97^rGj_|qU7rIGB# z7BSM8IcnxIs#YUj@#x~4vFAg&S+5yQ zn2frx?;t41Xj03YM^z$}i{<^)$iCkL|KC&g_SdRJG7dV`o%N*hzU@l&ZKv|^7L^#z zS(9xL!Hfz7u-eU8W;1imr&v8wPXAXgVK` zB+`K}D!)Xa3Sc2vNqZAARB=-PND;H?Q}$n&Rm>XJN(Dg$6}{OsF=MLlA+A7j2W{lq z)}*fdhjJ+R4i{t!nIPsB*rAw+040BrKF$u)3TBPQWYd^$O@rlNter;1tbq$Qo709~ z#c;|^y}6*&eh(vC+?eak8qE;><6zvg^v;qzj)}baqa=84{wR}D-kJyb$`r+v%Et93 zrUY!$08418L?VNUjKnQkJe|?o^8s+g4m+T+UL;g;ZMadjs%Q-y)8A@Y6;p4coAe|$%beb5=+kKFk#h@<gUCe30~N?zi`dEl@}Z?kL0Ma?|?!%GE{baV{MWl}Ys>tN@Wr*uWl7HJK=L&ky+TF#)@-$Lg2-x}KE5^2=HhrLmr&u8 z*w%GIV$Fo~ux4DJSy{&y)EjzLKY;w^EWyqStQMTcegypmAsI=$MXg#Dj1Nj5b`}A? zngNG$vrQAqL59}Vx8^aT(+j^&WOU)ziB7TbU=>YuQdy)Y=`|`6CXpQsrBsGuN0Ah7CTg7tc^zc&XB$TACiq}gtO$xWNHDirW{ zx3ifh+2`GolRCia5^4RbX!+l*Pd~5IT4*Aby4=0Qcq{ldTYywg# zRMwrb`PLqh%Gj?M2F+vi4w|g!9h9FBT65t0D>Q zRS!^0!A`p~gB}Tw=R8JhkOPZ1U|kFnXVcA=p{gB~)vKn}?rx>rfS*Pf%nfG28t^Y^ z6VF?v5<~u62>+>8vqmjamT&!Tf*o^9l&(mQSPI%4R$GlEL9Y!yM=mg$F{w?I8ziErP=NoumV(avX{ z4&mb}Dvq+$QvLu`qVVJ-YVc6R3$vA+_&+MglB36m@2N%V=uK}1j2mJ zLP$W}Y^jieMQK*Rv#Z!-Hio40w8Y^6u8o;O)3<#{CDOT0b8;4xi5aH_rYn-l7%9YV#_$14<{1+*yG$p z)^9fZQJ}(b48VGZ(a8D1k!Xbf7z*d8HQN1XCrYzE3+ar%V=>a>iJtK>8p!YA)T^sFo@% zeRk;~DhLX6)tEQT(j=--rl>usre3vLHGK}M;87h$itnuFF?9NbKGP|e&;e8*QeIJV zx3mNErKOk3HP+6~dLw6Mz!clfPSq%tJI%&4)vP)Amni@9(gQJP zI3r3GtJySoi=OpkX$<~|w#itvtj5-T&< zvQ%)J6rAY5aBGyxXvb>NI&66j7sRV@zfkrJnR&h~qWH#*x?Y-FdbqjtFgNmXh8>!L z6ROm1?T6zGpigjSx@y%;5K*aYO*NM8<3@hXCCQpTT{g>@Bcox^BAk6L=O-|!GjMvI zfZ|E5pM5}DZ!i6GW4biG^g4#UL0YeMz?@5Ev@iz-PW_%;;F7qG-B~J|Qy4GhTAZ7l z$L5#>E*)|VQU2|mpMvpOtuBBjsq}QN>F5O2( zLIDXFS6HOV^nR1)eCr)tBRSQCR!?M9g~>Y#&Jy;Ex$4bC27Ix5X~YLf)}UCvCOu=I+FU1Bz>Q%&jicU+Qq9{5>l0VtKG$p$&+ ze5o*LG@E({SYjJt%BbWDE{V?=P}AEW1M?*1hBSe4v# z&1qoHYyg;aD8SppHyVq2Y0|3IAYM3M)cthbguzBEq(yoYE%Q0=a>38@vfjh4%fn zi@7WbaSKA5*V{8@gX=s~8auJ{UToTuz6ji^g!Q&zE{LA!V1)1!Q`QO2N-UVA6Q)s` z-7hS1+Y2~54Jxkbl~UV0!D=H_c)4An1IALCA?=rjz3uIs9ji<)y}nbb9i_?M_88|4 zSEic~IH1N-)tVRIv~Sy2xD2UM85@9BnQU(%mGq3&0EaHYpI?J-ged#mZCoi+2iaP+ zdI`LE#wog-i;|N|_mxenL|#8$sB?t^QnO1BV6~0f(*0oSu$q167P&&IuAeOFa6Dn4 zW}6G}bU3PwA z5{DKgkWm#xN!f?FiAk$HeJfeP=C?5YVDNmYjd!IQ9E%>RTv zn^k>|=V1Fcquh{#4VLT6$M$1{!=zWXX9Uu9AY=3|4Qcj`au9RKs3>>^RWNqCT%z7g-dZC+ikrIuNf+@A`b=2SzkERI zSpt-VO*)M&!2-2Yfo2Ui06Za3^Xy3x zH$64-TZWy$#I)P`6k#Cldia9bl}{tsxsQQzk(yP;#B|pdCj(SE#HC0fnFXZonrg)j zGUwrHN4!jI`DcxmL57rL1X43EvYUAkjE6jYz*Ey5rZRiB+|uGXIZ{@2>?GPpSP^oW zwLz71me=(#5bZgJE5-**dvTK-MTCC^R)*A^hUGoYCr#c~+tM10D3twY$1>5WpPT8|F4c(I7iA zy|TQ+qoB!2MThh@fl()a$)LpzVX6yHCmdLOP=4CEY=V^w?^_vuJP4_tArEcW1K!i- zR~hDbyaR-waxY&L`{V&#|53VL7fiyn=UCP_6Cy$1serxndUC*o^6iq1Fg}7gu$n8{ zv{}tA@wpdwq{RgT#a3ltd?CTg)0f>%eE?r7WpyBhaWIU*U@CUaGd`osmkM5(#uP|~ zR%Dm(yT}kIA~l$%oG)=5q;T9IdpTZQre2KD2RrH_mmbUq{3*B`!ig{~Hy`n#oeM+_ zDxR0~r^eHNj^lRyGpOe>_Y1C+c|6zIRWwL4t- zVLCftnQ(#ZfnX{R{}lbpU1hr`xqQ9{`--IC1w!h@4Gtaw3u+T@f)606*ZE)NMjAXJo6?BlJGyeYzW)$b!E{rqC z}wmH0I1_nwDm@EY-zAE9CO*u7d_ehdvZn63gRk zW7*(&_Y$;i8iwGWQL!MjB_81J_j4oZ{4^c0^{SD(h1nI)@prJD4cAa`&6ohVeoco4 zGTozl?f!|NwKWWmGSlChh4I9~k96Y$y^BcPiP6{({Py%owaqD`GiNR;A93`Ii8B)z z928^*&SC;VHriU|0jeUFaZw9cQ&67#Q_vz|P$L?n=2%@~6zvgVHwH@2apWl<9xNN} z9x;SRM|7B?bJP*5=|aB%2M9M&KJ&oLQN0K9BFH=Xb|s&GD-ir6S+OWZ>)i+C7E5pcK8 zWm&FNC^TvY{QZQ(EJ9u(D`dJZldYeJy{bly<_zg~bd1PmIvST%FYXwE>$R;QT`Cer zVIRPPdS?{KNev>71?~)RThKDu2_uBsJGk>y632z;82@*AeM)9&-3V(gv5XQU&ezo% z5An#dEu<;2(-(_5wOzKUGM2Z|c zs?755t3>(p^$~0A6Y;N%)p%2e}yc-NI!tR-J2z7NFX%<-8#hI1R+% zT@|5BIrA>Ap`cJFILw_HY>4)PvnBdI!VRQ>D+L#K`8!$>&VSiMh!1^;fsU=5O*;y~ zrknjcOe<%P<@Q1>7l*{ZXyWoC++kY6CRD&`x8UE^sbr8#h9-1GgycF^W+<1- zi1$2z@YM&nJW8mR4THkoE`Kp+#d1cOM4{Ap8Rr$(4R`b^&Q?|+i7Osy#1@Gyi5A2c zP+TVm{_4uBmFs(Q?76NF>T;QuSPo*XW<)K*;R+dKQDgPYYSk@7kH8RWkSIw&gpc6h zlj!3L4($oPzd1gc)ATSf}Go@xiF*tHdH2<4l5uY$Hk2l)UYYN z3i3l788WirMy0s`BOV?Co9lB!xm>)5(RIc2LD6@MYl6uDX4@cVxho+_E3u7Z%;&)e zC~BoIgUGi{u4l~VUdzY_mDr%>;F!}um9Kvz;}lFqQ{-8SikV(qBc4@RsQieEgW{*) zx@QJWN*s{rmwi!O6J5aZIeaP0I;PMpTTBz|joeKpq*ttZ|7@p>twpgbRCG5N71!j; zr0c<^tGM0Bum=v-#GvnglfZ7w2eN5@i1i4Zh+#{s3M-5@?`j>i70Z5c3>4R{7|#$U zw+zyCdjci{tGV?=I)(5tW-lzt1`cs{DtMxQ8h%Qv>+~5Io3+LNYu{>F31OLf* zB@)n^lVM8TcmWr`Z;n*6mn3n(6`J_puxf*I7Oy@6a}ejz#)TI6}^MNYtJv zT;o;)i%4MCF<5bP$_Zq~YD2lD1?t1l*~(#3_u~#vh0MJ}ajr5hQ=j5(jaG#0 zg9M`JU#>9(KH^LTV8peVAS#@LsDXqggXgd<|8_VR@YpMXh{>gg5cY4W1@gd$TkQ@a zWpRo?*E;A9SBtWh?@C)zY<89fjtG!5A~+i4x{%vCI>9F43Pf|LLJ@M&h=J>sP9R@# zM}}@8ldb z^y(Ci4id5osf(CYt2qzXDz5p&6e9az&7vKXMAzE1N{r*Xp!Hz{xf;x`%j9=(Hh6KfyW@aEQin$nBnev2 zz~7@V^RqQ9|Cp&$M994n`I4%O=6M2^IZ%Qk3oI54l4gZ zg|_#I5PqLM25HyI@b*JSXv!IkGJnZ;Tkk$10&SrGtLue z9BV*X#r0iK6RCCj!tt(#;SAT=!dG6|S+?DScpFC+S& zNw7i&@KlN8&ZZUIk}~-e>RJ(o<15NZfz(fQu*cBGwa=3Ndih)L=xaTEq$?h-y-_Ni z*V}Ru2Rs(`CyXkfN!M9ZAR0vR(r45J`A%_%FO$063rs8LZ~|?BHraN$Rt$mS*}WZg z*9J#64tAKzBqSc94QWjO^MA}?zNVkVq|+Q;XY0RbMhBP=ci@HwO%d+b$+&aC1}tyo zKD*9d?9-QS=^?BK+QQi*P7w*k>+WY1eb?Os-UF8)Onq9 z1h_`=Y+F}jY9@CnNne89mE1q&&WCxS6l7pTXG z25MT6P;e4JSl|`_MIq*}&FTCNIstj7pu1EMwP)&XTVrv3b`~c+b>ei`CU#Jj(Q-is z^ZM*EEEB|fakx~2&#nipa4h9dhuwO|z=FUw@lQ+f^9eXPfKRq_5+;zEzWfQJW%;K2m~xR%y1pt86C;UIXKZL|^vj0A-ND8ANm1~R~X zuG6t}+e?QVP4<7#YCur9C8M}S$j0|1BH}Q=eh4ds3-&<3t3@7V&=@EW)LDTKfypY| z=*nE;p(v(34Oe(RkjHUptI|^zA*BsxFfKIiWFn-yCnGfo^>7w9L?ODN zgSn8yQ3-&_b`Sxt1`+w{mf`TNNivU)#<7+_3bDLBb(644=CNB>Eqi)LB~oB zL}!Y@3eHI0VA9F^VAc20ZLLfYK@l(lt|YCUN|6_f;3f|4^h~x{mmLP4RXWDv<7K)X zN?ja*ivjnsSi#@)BcQP9M~rzqO$V5^`$YMU+U7nyoa{@${^!M6dh^n|u)-L+j=D!B z7-Td_maa_W(tXTe34qksqu-QyIR|2r9Et25U3xQU4-?~<&h``mLvEPr-Jo&3(I^Ih zi;yNf9Jv_?*cm<}r@n6xcgO_03328NhzYP!fVXk_Ya@8WgmkZ^pnW_2tsDsVCy{}C zFph1BYm$mr68BgUZMbwlZ+}?lV$$F67HQU#EzVXt+3_j5NMK* zt@VDkd~f)o)L%0+$Of!Et7n`+|4xD3NTs$nbr~Rq>CPI!(;l;5d=sOYtM~}dUDW{M z32~sMe0~wVh)=6^>^tt{Eyi6WjvBK~2BnD>vQ{1Q z0rE@NxIr*z1KMAoBGt$4GdL=h*7#58%s6|I>)F^eYr%U3c2lxK!&rvXnR!7_ctLS( zb|5s68-V5n2~g`SF4ErZVu1s`Ff}iZkkPzV!!g)0a<-8T$-zhQEDo}P^9q`A&oW@7 zJyYY;9e8Fzr-fM0T2meyVerm84Ny_$;Ohnv7F13<0X*w z=VA$G%0jQUr!2|-o_q&41iKwCqaNi8NmS5qpn1gIa}H+0WQ*%X6{I2Ctd>S$!eb*n zVtPn)g*QmI=cLF4!#08=pWYX1)Nw2Cw{9Z(Xawt@ zMX-r%76uaGS={NLZNSn10u-fz#X4AppBttVi-Mw38NSkt?0NPp-s$hmZR6N!WnYv4gQ@B zXjQl`L8HO_0h$oZ5QT2_S^aSs($Jj2YHT*um4?*XlhWGiq&|x~mpJ5u*a|i~ ziy-VNxTNU1smdK9=@vO#K-bf`GCBc{%XrC(?HUnbSloc9Cm3Hf+Oz-L`rFe}3Je}_n zN9Fu0Utm?=5oKL9CbpW}^Tk$!XPwW~gZ^2ml?Sz8AN0E$z7IozstdKAxqXf+kLB^^ z1Jo^>&~NUgxtQ(k*}w!2mkv1>9OeUO6LeAdd}#0VGiQ$7EaTfo&T!l20DyxxxCw3L z6&s4!O6*^y;u`p~S>s%}tUY>T%=Uw8Fw@+%!LI6JW^d*pQGPNwi-tbNPq#Tb*pRGt zaox~V9fwS;+Vt{=rDc+N$W{c&lhJPJlWfrn55H?u^~0ej%b{__A&^Y@5kjR_!teR0;d5~ zm0l)2t36wZviRHba(nt*%4uTQ*G~W`dG~z@7^M8M84B zWy^ruTDHHfjU+ICjEbO#}g3Ts<1L6`tkG#!~^Q4w&%~OF!w39T+=9Mq!+W zKuZ7y$g`QC$NqRm>GeP5eq!+f?@N6Q|Kbu%wJmZ3U$8{lCLzzW!(&D62p%aknq#wI z;P8Svr%F1HBQc1#c1B}a+=j~K02`;;BcU8UO(JVb;j-{^0TC!5vgwsC9RO@5GmsgG z4j>wyT)P!KREsA|2h$li)p6oCJp=hpJbr{&q%qgfQ9S6cSFMVefg?~u_|d!W$)O6! z*;Crle%VU{1q2(m1-CNzpEF_LBzW2tvyhgc@bO$?)GcJQ0g+3XDcU7z(SHg7yudMy zQ?>?97N({Z-eR>Nmk>m0D?MEwJYtp+4@GQ_ZBjh-H$vU(1lx~cj7OmvCvp83D3Jw~ z)Nyei7u{_WxTuAvDEbGUPlw&%2_3p@;)K^_=6G_`c>N~_eB>P;C_HHYA8DF`Avlv} z@mFO2Dtdl-4!{5hQ5?iVfSu1%t50)Tf%Eay z7M+yp-K$T^H+&A4=W_^NhE*W-5!+z<9fl7972Msa)bJw=IL;BsBqCjwjI-rjlO}c8Isfjii^K@vSe>sN z^SLna;T4DE?BVOEo!!bk#PcQ}y5V%&Pr2f(Ev{jb#M>q;+IzW-8}FMfa|>Cg1-wDk zQ}~x$cszhW>KPoI11Iw&EGR58l#lKVW#7hN89(-N)c_6m6p8sW>};sqJV^t7TFRGw zimMgZcZYsPlJtcODLN<3ZDR1@LBvZ;T2|YkS(3~-QT7vbThu0H&K5>7VqcK%?ohv6 zZ$Z5oZZjU40n*bHphEMP6H6b@Fk)Xh44FtF_j(3TW)XM>${{+I3h(25U+#TiGhL@B zhxzdFyL0b}p+ZU8259WJ{5ck)2pmWqH;CH4>8*TuQGSqu#&=MBEnRtN(i1j(1Rdu1 zS_)QhBGdV>A3FsM1_Z|)z+V6x9UDH;&F8z`PL&CWScnIDFB(0XTqiwY*2{W%9?lba znGGec;gUfdT!Zcj^xd3s9RA@P&K@D?=yelaHfXEYT*Q^o(M25( zR!}SA7es{ejR^0<`(g7dtBqX2)sFSF z1vElXnZV6yJ+L5Xz>WnTw|3hoQl<#;6a`QVCDU7xRA3uAt0?rv37Md&gOeH_MDQML z91eFLc@_5_&r{U4tT@&G56&&f_QGo^2)>qE9gucZ{#7nd)4swhE2c|aBfqn2VjAh4 z^B!#+^rC#D=W}^1hTDpCPD#6T^lHukg$vyU&MWmoNSME~gH43_P3B{w`-`Nm}D7k4j20wrrXADOt8%a1SbkXRo@ zq~bsoJBOMXWqmC}A%4V*9cjtAqe5A@{jrR8g*2NHCSI24LSVqp0FDY~?k zNs#2301Ec)Tja$PkzguEfhbVo$zY1pyPr`wgJGGV{CHxaO{cDY3TEy~W_g!&L1rSD z%(?2hSWGz9rFb$s8~z|V4yL%vE6%03%PaUzMjR%}GxGglfV359=N)^5I+ZPL*K_4M zT_3Ow42%_bNodHZEZ6(ws=%>7@G@fKTMbafBu+egpYSDKOl#U3K#J{xO$Fhje8+7f zv9;@@zF;-?(+lH#1rFuMx%I(Z#+IYUpD*2Q3Z@f1uSp|JR_^S#90uOK(sukMTqBu7 z%(>nG{`x?RESclt?M>xK4D#zA_Uoy~W#gw~LB1S84coi;_|JF+BK{f}tNr^S6FDg4 z{ERB_hCW}X`?#W(qm!bCJ3m&K#Pd1LIzs9oDRJE!d^^=aK#nXz@i{W6RS`TcHc5to z{ebo9?TPk#+)wWy;>;L3D;PdQx1tahSLNuTEBNGbkXAIw9kfD)qHV3Og7}||;xSMFejfo*UFVEh%Lq@WO1PgvNL7#!O zM$c$ZVd4|8^TJR&>{q{TJSec|+53p)cG{nYsyJ2c;2A8d+E_3ZI3|T(khP7czN;2H zhHo6>x~U;F+l_|OF`gS=&IJjQ4TGR?+v>XU0nQ#Br(gE4Czld>r9Z(>H{B?@0D?!- zhz`fU^vvl+Ux3$RZLZUZ(4$=bIk>6xGHtaUdMo7oR=*d{Niu%w)aDQ1j@d6Uq6T5R(G#j3UF5<5!7wL?fJa;Kj5vXEnbk)Ex5 zy4ZmEynPQCV}`pEOM<8aDwd5uhL?2PZG+!tg~`2a_vy!^Qw?ub!97d1DnI3Y8f(yB z$bebWg@Y8`PSo$XW%M_5;RIxUMQ283AYz@OynC3HZmYu~M7EOv!4Kq+Z^v!qNkL`R zAZ!ynF^^+e2OB(&OP(^PyRrbP(gxtT=!xhgZHz+FT2%f;JOY6{_){8)CW0 zz~c!l9^IEd6YDGA%`gSTlQ&^@uGs6!SAIK}PvrH6++2s1iJ&Wln89f1;{Qp(YE8aV zh52vy~Ty>h2?lEB6A1`YlUqM`Cuh8c^4;)t5g1Y(bV3+r@XW*z(}QP)0=nu`df zqgPd7Y~r243~V=$a^>gTZgDf6R5OPUS8=!s4?A3hRZq*eQf$8P$P zN`!|pxP*a?J_7$aInZGq6DP!6QS6dI43^-0)#Joq6C>eZ-k-{oEFw`}y@wjQU=y~aTV$LSF58IT38UXQ<6vAU&DaP2 z9QC{Qx9E3`xQnTWu2wiozcw~cKE~Nvg&;RQ4oCzX%qtwI-~(`$13 zk3llm=&#UODsenY!N#~yJ$H@6@|`ga93&w6{jELs;;%=Z!XJ-Q7X27d-*)<`CmMJI$*=U-HWzz5F;c2qmc8qPkx$sl-T(f7cBD8T{N?9+8P* zJb9Mp#rX0~z<%3}I?KO>#--l^4r_8&KZLosacSE5o>WiI z1W7*(weJf0i+<0>r@fH=T>NYIx7wByo+zZ>A&xP=-} z55t=Q{{-GvXp?sY;V+BI8nC$gt6oeqN(%`6kpz6iz^_u_A{?vdt%PKUaxkR0mCBc4qVnVg=Cyp9a|D+5$Q+5DZlJt{CQ1cW&X7O2lo~j58BkzK>rZ zgHHlF4yyR@R~x?{LAR?85bL387kmgNB{fRxpR$?Cm2bn3Uk&Mti_Ubg8p`0W0Ls(6 z*%l`;&aH8sf-~%&v!j+Nq6CqOP{$yJyF1XC`n~T#cVQflkxSFbq~GrFt$TvnKO2jY zr=@}06Svw_D6GP8^QZhg#Op8t0i!mh({T8a14wyrnG~b;c3Ma0of+E)V_%s^1W1=- zuARlsiQ@?fp;rvMtKL#Gdv*f@yg&+Ct(hlg3`L;M#9WXAk7I7sXL@GAT9ol+b4dCyPyags#2`SlgIGL2aGN} ze@(w$hR3N-Sj`gqFs#OXt5=rdRqbx+>*8g4tEObk;*4$W>akxQc|j6xavc%wMyb6p zYYasU3(+n8u+8JJo$%%ZUqpW#nq~2Tl=1J0<;lB2esQX)e3N*G7i3FS5%SlDmy66` z<0Q1Q8kltl1`U4oLE&s?;lPkN+eLPL3>1J1NmE8^)mrp1)PTi<_vNARJ-TZ$PQO2Z Nhn-vIqH*6v{~tI&R(Jpa From 698a5c46811d9bdf708eaafc317c3b441dc68089 Mon Sep 17 00:00:00 2001 From: Oguzhan Unlu Date: Wed, 16 Sep 2020 15:52:49 +0300 Subject: [PATCH 10/38] Stream: remove unused Maxmind database (close #352) --- .../enrichments/registry/GeoIP2-City.mmdb | Bin 19403 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 modules/stream/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb diff --git a/modules/stream/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb b/modules/stream/src/test/resources/com/snowplowanalytics/snowplow/enrich/common/enrichments/registry/GeoIP2-City.mmdb deleted file mode 100644 index ab8b82d2df33b110a891452132887384cfce43dc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19403 zcmZ`<349b)(tq7OprWVP9_N<<^bd{4oQ;?Br{_G9SJa0|n&47V{1U>L}tGsH0rVz`}QFvAdrp$x+qoD9PmMljsLFp^;u z!)S&v40kfz#W0p(9K+oV_b}Yc@Dqmn8183yfZ;)ghZr7ac!c3mhVcyX3<(U03`q>h z3@HpQhExVOLmERmLk7bHhD?Tu3|S11!9y_j+1$!u$Ysc5n8YxdVG6_J3{x4VF-&Ke z!SDn_KEsm?9wEdVLd*m_B}4%e3mFWCSp?EaM3E51O!P*Hv&n(P9EK8xQU;$8{^-au zCe9Ti5G4kMDCc{I80Im|XIMZ$Pp@G1g+lx^O8glS9hZf8ni+l$IoAqN$*rpB$ZBS< z5#kx{dp6p)NQmd6t;IqtiMEz9-!g{fLabo+mC+%qxNkMX8X=yizAIkhk?SB*gAnVv zwIRy4QHV`U+|00rVJpKnhV2Aof*nHaWa6$Uu~vxPOx(k;H`=#Pi2c!49Se9shOQJ`m!UO#BtYhYY`t4*7`h0=s<7eZOV+U3AFrdB`UO*E}Y~A9=_n z$oVDreJaFfQHB;FTBEHt9?~vE2lHKK=!_2eT!=q0@y}7>UzqQ&3}1+z+0nkg3GsJk z_=@4{Xx}$N{3F`>rx5?*A^&Fh55u<%-!XhoKoj?a5dZCJDTrnGj}AqR5;YP@aR{Xw z_jQlF?9`A^9b3guo-m2t^UVj1P5J<)H(3)hmWS=BXVR$^|E_kdmQz+8``9hh&L!O8kVj9a6 zLo5KpETI%KgF#>^joc@nR~*AI8w^W?G6%3wC?$ZPP)Y%1Lh&(!KjtoTnHV6j%u^1A z`9cYC-@F)x1x&0Uu&ne`FjNcWXMmpzS?pse_JrB80*&&oofUQE= z%tN*i+yKR-t+z33C$Jv06AZhBvWxp_WBT?maj%^y%$cquB8hZBDD^z@Aj2Vs7Z?sR zyvT5bfcP4Oax~gH#_Q;~P)?x0e@7@MAsOc*G;r$=41XlBwEYwetwQ1ar?gPtO{QXP%+Sunt=zf{hJOmB6Y!-_J_r0oD1YL< zKNH-X%B{aLd_mB!C%66vhOdQU#yZY_{Y=Te;US#=`oo^m2mb|z{|e>bfbWFzA0~cF za0`p8e9y!m2+UbQ!V{_{RE3Btnz)&HPu0Q1ZUnbhbL$E)j1cOTfLnxm72tZI_5k!0 z>eW2t8fIX7s@F2{Is$Xz)LvldBh(wXuQ!2dPxVIh-7M6;+;K$NsM5rSHO_D~ zs9At~p*{wfBGha^o=|i6*0}@&*lE>COq@($u0Qp0FiaQfRPLJ=(>H^OPY{?3Pkj;$ zMM8xvYN1eP@{j@o%a;r$&LSAd^Pm=kVUAF}+{a!p@DjI57)qJJB>KVdv{1_c^MyJW zP%hK}j|>u6W(_fM9)UG+3&8MGp;mC;LIVAHZvBkmDFVw9KL^7ip;iKFgj&Tzc#T;P zdWMP55?E$^4h&0$x|sWx5LowI#>C|@#Fbz;EYwwi9YS3V*eKLBfOSGO1za1$u%7R- zfxxoaCNOLj>Spd^U$P#%jfvX{%xI(T1jBxz?gH!)YAp}hO%Ml{k}kZLVIP5I$T~0_ z6zT!)t0%B#`VbReAh4ePA{fpI^$6f4p(5z0$Ax;7M;?nAiTo!Ydy=5{d2XEo!)c*5 za;u5Jy7d_*o+TJ`6}P~rz9!TQfLDZik%=!8SnDXv#8(L{3%(A9H--8J_x*yvQu-|> zzD;1=1^G{F7V3Kdtg-h2HwZO?yL=(kX22z(egODbsJ{gKTBw}=)DP({R?zu~Z~YsB zL8e`P3x-dG`aACXJ;Cj!@BV>_emcZhd62g5 z1jC<&`Z>4$L|`%eg^7P9uq68uYHC9L8~DBx>fZtX7V1}ke+cz!X84A{T3r8R;=c$6 z_uiamIDk|3#}XAN}+XU;uQpDp4QkWv>rq> zW0rOe7_JjqPiDB5;3o5)*E6vffjLK7Z!nA%8idzw7ut=0TZPsa&`)SLF~iLS=0ec= zGw~JzQ$OuCFz7-XzjaoDYU788A6-JeA5Z62R*^Wd;&{q z4-;oHaQ?IU4BV_lXtMxbp%w9vVuE4p<=Si}&LJ=}iB<}RGNJjn&re`RMQtt<0|b`W zm4l&1Xd%GQgf2h0_&cf|Fkz_h{%81 zdqR7M8QvwZlt%tTLeU~I49I_u$A$JwKwqK#3he(B+J}J4Li;t~6QO+s_^r@>!|Wdu zm^&%$cTD8`XYL!dKY*c4XnzEJCbUaD*&eEYYEJ!JFZ6yN1eU6?*;TgcxEHS}x&8=DrjHOO{k7x?_mxXk`mW24JFaOyD7z1ZMT?$YSDS z1lAnopfyQ2a=9;$z>Ecs$xNI=U@=TZYqM}n11uDd=>WWzn*k^kjwb*f;m8L-%_j*g zsb?~=fWXq-K&x0dW^rE;f#q&qCe9|Xo?e1hP&i5fWy0a(A)NoL^)Q!-0fJjOs5;8g znlBt7?wiMb)!bUZP!U7?DOz|f_cK6^a6AR56pp6>Q0(UfW`uTBF_CXJkb{on84#Zn zj%T@V5rL)HVkRyjuoPQ{*7L%#9DrASD|pCC0*h}o6W0(}-ntg84Z^XG`_>bf1*>Bt z6E_i9588rOlW=SW92AaifIY&o9k5F{b^xH?P6EpdYMHp3;0}I*V=r2D!m*G0_7n7$ z0Rrp?80rZuIS-+QN^&URFY|zbuZ#pEsJjwo1@y-fW_u>K)EsE3IoncmxTU71rDjcXS^21h zZ1e|08IkpoW6hP3^N|aYhUA6CqY^Vy+Fz(^KXJ@e;1BuA0~IZJj`NS3iWiauc**p3SrW-G1RYvUg(Ed>Dm4Y zPpH%w1p)-jXsO=YQvG5}&B2!HCOxEIhGf$t&qq#1&O{m`ry<=@$k^Ci9chT11Fq6n zq+DKhv~BCT)DnMzr`+rJwNCUD6odkvf{Ofrr_dV;))uCgpr<@kXcUikV?E=~UE=o@ zG3mnBL=oD5 z(LMI-^$982Y>>2+fX6q-vO$V$708Kj(UsQ&z2klhZSz`c*0 zG|@veE?nn<@|lsnP#DTKFEZ6_KfWG=GJ>I*h2DAIAT$oD4mmgZ2j14}6O%Jtdf!&* zFv-565|6JC&rNg7O9&m1k4}q6%aAQY4-J|wS;zj~1J=n2>3ZK5Ict-RV7Z=P1m*;b zy#WJa%upj6Aq89`assj}i!|wxHIb7L?R4{E2v(`D$hf@kLhIQhZO<<2tf_6&oh|yf zg!mlt%{$mE$?-w2;lurtZ~+$$4*E01kqraJaJN+NZmDj>uO3PcSIH59P-*+16W%?v3VbPj*Yq2I=(G7hvd) zmCMM)G-i(PinDp=xbOvQq2+~4E8nN;%i zz&PpLV;;QN|K8936jzmHhQFM0f5Dt$e@Q7%<;drAA!(8+X?l_oDi0PEbA<7^Tt)!C zT!G08##w3wS0^v6MM0vX%nNSzCy2JAcZY zShCr-hxGb*2=W;$pBLUB!*NmgrE)2Yo+yKI1#C4r(O+5~uGb5P#f~sn#K3-g^}g+x z$n?Zw1dF1A;@1wz2;xpG_Q+O9#*bl(VQP2h`cv(j_O&%uL%PgJ9Yy2kuprMhFNf20 zuD^g@-8rb4L(?rs--n$NGkHdn6N|k*nU05j1l!x$&EplacBzyolI;^ z`ify+*%nyxmwP_y@$g(ZN%^@4Ue0}UQCw9zMW9qqV5Zj>S2yf6>GaYz1AF(u)5nfw z=+no%i3N%&pK8}N5rm9 z{z)C5sN_pi#|a3LC7!s=NWjVC?!I&EU2%0zF*he*lp$`R42mY=R_GrMID;=Gl~sox!*@~x=l*iyd2i25GPYtX1u&xV3cW^Oq5@{Ge$Cx@dWaxzh#@~M@>1C z4TwJi;~LbKe-Z0M9+k9R&>+OZs#cF8tZRJaTuk$o_B z+p324rt^Bpi9s?l?v}9&9xJO=L`J9Mm`$MjNC;lw<4u7$h~egCh8Ge!7TP3=4Wlx8 z`leRN@JkeTd_qL*8Lu=^=`=PN8swZNY+} z5+=TqC4wJJ9nY6_&TiM(#d$Rw21-SNEO%L{sn|Zu!GB2Qme-hb*$6tjHDQ9~FYME~ ze{*}|5!eS!X`i*^hH?`F*N1(&Va=k>WD)K)p9>MDFFn|}@4abpRhb;*aqE0}f0(G- zR>&kPFd6H|{CgK<$}~&PRK%D0MiJKz^4cS_u?trrqloHFT+Rz0D3YlMJCBvJ1OFLF<2rOPxmo43p$?u z64JUeOc!$p10JIU_H%asLK+UgvjQ$j4Ie6jgxRzcY?9lfYV27L(aMmcq$M{+PSLK2 zqBa6y>zRF*mmF(bywN7#qm?9;TmkV<@}+6eG$RlSdQg~15kD~v8h6^pi!HtUle~0+ z%c=}hw=7S<8;q-qJ8hc@=jv+cmT2Zt7gcfIg%lCyd&?IZ0dpgkoD=dcGz&RzFeqnY zA{BDj^fqx-gxw$Nx5K>q%jWtm(u4}G0UGm$S5|R*4%x-6?Doj0nfFU^^(!)s^Se&R zRau+ZsUf7LVrY6rlPT~Jdu?Hx#nG&4T-DH}adkt!vQJ&cnTU4CyxY9&@`}lxK+rQEM!f2MQ{bW7ZLIE##2_7N$d6fZEP$Fau5Rdd zo2qvPTcE1j+>g85I56S8Z?e3UfhdJ&*vuL$F>}&JrF*6{^7>{y~NzXY@(VN zHKa^*?)GO^Az8L9uD3~>Jp16v69sN5?eK#KA81}N6!o#WTCs~Zpk@#IYVC2uC+_ku;=;8#Hz;@Ml4d8>Y)F@xCJ$|7(TlR?f6q{w92ws2DK1WF&aQ^=QjgA@#wAbu(!~2N2b#Z1XUII7da-MV37DK|POH zgfo*WndLJ&HlMRCkFI={R@vo34_Pi`h0qCJUvLf-x@M$Jq0wJofI^8=S)sgKY%Yz0 zVy^@%-_JqF$xK`bvPCI?ep$A}p9mi5WA5C7;&A=k5NU`5penghu8!HD^n3uJXhwJJ z=T0^+#!5#?Eo7P=D!8o2uXl*OsEF91eYF zsC=93a6MHjrI#L>{-Y=CmQRrT5(uyd21|`> zLfl4$r>~gOdUk31wvF<41CIR~y6$4L&Ui8oB~r^F%xzt+fh2-3;_TUObAldAi(s2P zY&K+><;t5Q6Ag||ea`FpzznXu0x*#qO9CB(;0)vlRS7s*SlT6n9!kYnEJRv|nUOs> zsFZTcZHL@CwKgrM2}Qu{is-48B(01imfUsD>r_*`m@}_*PoGsEfBzE)!uNR+hH8OPwEwK|VSLLXjuep>#Df^y7TFW7T2kXPdGlnlb5L7xA)6{y~-N z?ZN))(Up)a0f*w!qudFh5}2N4OEnvE1Mlo5k2Nfc85ePEj1rmVASOT2UBb0I3o<4m z52H}-s?ai8FKlmHx(oAw<5` zozo?R3W_~A3&AYje%3Z8BVK8enwtl9G9eV1=h1W-l5b;D6Oapvu*6AuIRfzl>k1(g zNdd~^Rm|D0&A|=wI9OJ1$N|mEt*wE({m43~+P?86o51d9PPQhdE2ZHksC`RNL_2TD zguqr{bga4q0_U26(UluM8<=Bwv0zeiL%5>#f~&bGkI9Yb+KTWvVSu>y2=C&4J_p^6 zd{t?kgSy+c?Yvxxv37|QpIfo#m0pXbyT_LwKD0A$Yh0JaZk$j1X41C#hX2}49IS=J z@n+eR&IgUwdmmSPC|E9i1m4yA18Ly+5ZcIRFz~dNYPqn<{UQ2~3ZT?*mB}mZy&5m( zs+y~#t9;+SwrveRTIC13YM>ET4P>RENtkw@^sU}UZL(ke%~C15S)JkpcBZvvU3^V~ z_gMzPO#-h9d31$R0hRr-*s4w5A{ua4V@Vv1uHK2An=Z69tV5yCFD-1Yl2j>I0_BLV z0A-1u>qWem8MIHS-BE^iK=cgr^`k4p3(@6yl+Tr6n3s675k4%hA$~FqfYGt*#bSo! z*!$W&#d%{Ed@>F1IU4!+0tuq?%t|TjA$y@Y{_`#)$D5aCHcK$PfshQNH#%)59{$hQ zAZMm&;`B@}j`nFQC^K4mrcqwZO zIEa_62_EkPe-N_gV#nTy0#hCqqCAturaWp*xh(6Jm`%a>E@R(BR|r8?Ac-%PC+Rrm zlzK$$_MRrUx9QgDPv}h_82dg9k-Mrb zD68b5LQ``!Ox#ho21g1VOZasgAA>wx*(IOr5?`7o=BGlmzH;AZ#SCTDuOV8FSujk< z@q`M!qv8XenH=geh@cZe`g0~<=|nD<#-7N7^`OTORvg>X+B`|}%a@U4{I$5Sj1Puz z?M$zy>mH~g9g$i@{~CHlhP7(dlB=oYIL7&FV*9bp&=u>Tec2McD7CEsE2Jazq*TTz zo$amg6w2NEP2+5SmH%x9l+NzTwg%n;nD30_B`w^Wlc8Kn4s}5%B-%5yZlx;ni~+~h z^Qel*fb7z3j|?h|OIo#UDKJNw?uLJl-98`O!$EI2s;hKEb)Ufgk2E?CA z+JHf)RdmL@v!!N{HA8O6U`3)V92UuQ(i72Gl+tl}fBVM$wh4HYj7}ja!Mrq2OXExN zE=%eq*8{!MPNDY#Bep6%}^1YkWSpyz zlRpI)KRP}RS7A-acM>#6diji=9edBWp54;gc&6h#3YF71ex_iv~{Gp-<~_a9I-^StsAxH>_zryD|2B+S(Jso$j(Mo++(WfcNSF zZ<)VD?s)t6w0Zrg`&PgpZnK_Aahr!~K9@Uyy)J#|kZ0Sy;B^EKp<012rQSGL#CHbR zEWnA({X5o!HB=sc${mgwM9tE(WARa$ykaA1CCM#IPubdM+#*(Z|0x|KINfYEWes* zY^jl_61Kg9b&uI$dKEH>KBnNcE>G%D;9Uakdstc9mpXQ_NQ`DwwA?uMX!OJoPWCR&Pg zgo7DmOu3bkpSGODi#B*b13G0SZZ&E*5P}EPJHyT{z9yW z+g^C2lkJdc`P~w{!y~=>*7G*&sfQLx>rLRek{TZ zIp7ZzS=QOYHa^VhJZ%r#8Bt-oUO_x&o|`55%M9!!@je8W9&o}Y@#r~nuj)1}?RNXH z7t!&S84{=CP{ojmh?+s!ly7vYDL+bCiC(iwXU7owc-!%p^iaCHq_}HjAT%Met$9V{ z+(ZEQViD%BH^MoqOg!k4FXq?6vE{dP z4cN-zql4y3n{WxeuK%Y0n+;O98)wm9v`FlFBt+8(9eVeR37!Qw&np~V;4kf*hOZHD zu#t}s6-zzk`K6xO{y_dbBOt$0t5tv8i|+utdUvJI40%fm^NlipL9s)f^3B+%CKP(g zJu^K)BVWG9Pj?x9ch=ZZiTFw~z0e32AW6v2{p2?wGe&TyE<5#H`SDtk$*VhOdFB|p z*)Q)nQipW9vA|P`cZqsgD1h(#5KH{Nl8WI83yUYVHLPmev8;W4L;H#s+fJ`d^OkjK q(A|wZQxt+z;gdT&7sJzh_y&`|1boHuoAiKDfZ8KJ=v`>k-ugdtYHEf6 From ae1b94f4b06dc13fa0ed5b7fc452d5eca0c16c6f Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Wed, 16 Sep 2020 20:50:32 +0300 Subject: [PATCH 11/38] Common: bump YAUAA to 5.19 (close #314) --- .../enrichments/YauaaEnrichmentSpec.scala | 3 +- .../enrichments/registry/EnrichmentConf.scala | 5 +- .../registry/YauaaEnrichment.scala | 18 +++---- .../registry/YauaaEnrichmentSpec.scala | 52 +++++++++++++------ project/Dependencies.scala | 2 +- 5 files changed, 50 insertions(+), 30 deletions(-) diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala index 1f938b4e5..1610c046b 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala @@ -36,8 +36,7 @@ object YauaaEnrichmentSpec { "event_format" -> "jsonschema", "event_version" -> "1-0-0", "event" -> "page_ping", - "derived_contexts" -> json"""{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:nl.basjes/yauaa_context/jsonschema/1-0-0","data":{"deviceBrand":"Unknown","deviceName":"Desktop","layoutEngineNameVersion":"Gecko 12.0","operatingSystemNameVersion":"Windows 7","layoutEngineBuild":"20100101","layoutEngineNameVersionMajor":"Gecko 12","operatingSystemName":"Windows NT","agentVersionMajor":"12","layoutEngineVersionMajor":"12","deviceClass":"Desktop","agentNameVersionMajor":"Firefox 12","deviceCpuBits":"64","operatingSystemClass":"Desktop","layoutEngineName":"Gecko","agentName":"Firefox","agentVersion":"12.0","layoutEngineClass":"Browser","agentNameVersion":"Firefox 12.0","operatingSystemVersion":"7","deviceCpu":"Intel x86_64","agentClass":"Browser","layoutEngineVersion":"12.0"}}]}""".noSpaces - ) + "derived_contexts" -> json"""{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:nl.basjes/yauaa_context/jsonschema/1-0-1","data":{"deviceBrand":"Unknown","deviceName":"Desktop","operatingSystemVersionMajor":"7","layoutEngineNameVersion":"Gecko 12.0","operatingSystemNameVersion":"Windows 7","layoutEngineBuild":"20100101","layoutEngineNameVersionMajor":"Gecko 12","operatingSystemName":"Windows NT","agentVersionMajor":"12","layoutEngineVersionMajor":"12","deviceClass":"Desktop","agentNameVersionMajor":"Firefox 12","operatingSystemNameVersionMajor":"Windows 7","deviceCpuBits":"64","operatingSystemClass":"Desktop","layoutEngineName":"Gecko","agentName":"Firefox","agentVersion":"12.0","layoutEngineClass":"Browser","agentNameVersion":"Firefox 12.0","operatingSystemVersion":"7","deviceCpu":"Intel x86_64","agentClass":"Browser","layoutEngineVersion":"12.0"}}]}""".noSpaces) } class YauaaEnrichmentSpec extends PipelineSpec { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala index 9e8743ce9..4502ea59d 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EnrichmentConf.scala @@ -202,7 +202,10 @@ object EnrichmentConf { WeatherEnrichment[F](this) } - final case class YauaaConf(schemaKey: SchemaKey, cacheSize: Option[Int]) extends EnrichmentConf { + final case class YauaaConf( + schemaKey: SchemaKey, + cacheSize: Option[Int] + ) extends EnrichmentConf { def enrichment: YauaaEnrichment = YauaaEnrichment(cacheSize) } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala index 1a3efc8c0..a33406fb1 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/YauaaEnrichment.scala @@ -29,7 +29,7 @@ import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils /** Companion object to create an instance of YauaaEnrichment from the configuration. */ object YauaaEnrichment extends ParseableEnrichment { - val supportedSchema = + val supportedSchema: SchemaCriterion = SchemaCriterion( "com.snowplowanalytics.snowplow.enrichments", "yauaa_enrichment_config", @@ -38,6 +38,11 @@ object YauaaEnrichment extends ParseableEnrichment { 0 ) + val DefaultDeviceClass = "Unknown" + val DefaultResult = Map(decapitalize(UserAgent.DEVICE_CLASS) -> DefaultDeviceClass) + + val outputSchema: SchemaKey = SchemaKey("nl.basjes", "yauaa_context", "jsonschema", SchemaVer.Full(1, 0, 1)) + /** * Creates a YauaaConf instance from a JValue containing the configuration of the enrichment. * @@ -81,18 +86,13 @@ final case class YauaaEnrichment(cacheSize: Option[Int]) extends Enrichment { a } - val outputSchema = SchemaKey("nl.basjes", "yauaa_context", "jsonschema", SchemaVer.Full(1, 0, 0)) - - val defaultDeviceClass = "Unknown" - val defaultResult = Map(decapitalize(UserAgent.DEVICE_CLASS) -> defaultDeviceClass) - /** * Gets the result of YAUAA user agent analysis as self-describing JSON, for a specific event. * @param userAgent User agent of the event. * @return Attributes retrieved thanks to the user agent (if any), as self-describing JSON. */ def getYauaaContext(userAgent: String): SelfDescribingData[Json] = - SelfDescribingData(outputSchema, parseUserAgent(userAgent).asJson) + SelfDescribingData(YauaaEnrichment.outputSchema, parseUserAgent(userAgent).asJson) /** * Gets the map of attributes retrieved by YAUAA from the user agent. @@ -102,10 +102,10 @@ final case class YauaaEnrichment(cacheSize: Option[Int]) extends Enrichment { def parseUserAgent(userAgent: String): Map[String, String] = userAgent match { case null | "" => - defaultResult + YauaaEnrichment.DefaultResult case _ => val parsedUA = uaa.parse(userAgent) - parsedUA.getAvailableFieldNames.asScala + parsedUA.getAvailableFieldNamesSorted.asScala .map(field => decapitalize(field) -> parsedUA.getValue(field)) .toMap } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala index 0e6f0e4af..fef14842f 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/YauaaEnrichmentSpec.scala @@ -12,15 +12,11 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry -import io.circe.parser._ import io.circe.literal._ import nl.basjes.parse.useragent.UserAgent - import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} -import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.YauaaConf - import org.specs2.matcher.ValidatedMatchers import org.specs2.mutable.Specification @@ -28,6 +24,7 @@ class YauaaEnrichmentSpec extends Specification with ValidatedMatchers { import YauaaEnrichment.decapitalize + /** Default enrichment with 1-0-0 context */ val yauaaEnrichment = YauaaEnrichment(None) // Devices @@ -68,11 +65,11 @@ class YauaaEnrichmentSpec extends Specification with ValidatedMatchers { "YAUAA enrichment should" >> { "return default value for null" >> { - yauaaEnrichment.parseUserAgent(null) shouldEqual yauaaEnrichment.defaultResult + yauaaEnrichment.parseUserAgent(null) shouldEqual YauaaEnrichment.DefaultResult } "return default value for empty user agent" >> { - yauaaEnrichment.parseUserAgent("") shouldEqual yauaaEnrichment.defaultResult + yauaaEnrichment.parseUserAgent("") shouldEqual YauaaEnrichment.DefaultResult } "detect correctly DeviceClass" >> { @@ -185,18 +182,41 @@ class YauaaEnrichmentSpec extends Specification with ValidatedMatchers { ) } - "create a JSON with the schema and the data" >> { + "create a JSON with the schema 1-0-0 and the data" >> { val expected = SelfDescribingData( - yauaaEnrichment.outputSchema, - json"""{"deviceBrand":"Samsung","deviceName":"Samsung SM-G960F","layoutEngineNameVersion":"Blink 62.0","operatingSystemNameVersion":"Android 8.0.0","operatingSystemVersionBuild":"R16NW","layoutEngineNameVersionMajor":"Blink 62","operatingSystemName":"Android","agentVersionMajor":"62","layoutEngineVersionMajor":"62","deviceClass":"Phone","agentNameVersionMajor":"Chrome 62","operatingSystemClass":"Mobile","layoutEngineName":"Blink","agentName":"Chrome","agentVersion":"62.0.3202.84","layoutEngineClass":"Browser","agentNameVersion":"Chrome 62.0.3202.84","operatingSystemVersion":"8.0.0","agentClass":"Browser","layoutEngineVersion":"62.0"}""" + YauaaEnrichment.outputSchema, + json"""{ + "deviceBrand":"Samsung", + "deviceName":"Samsung SM-G960F", + "layoutEngineNameVersion":"Blink 62.0", + "operatingSystemNameVersion":"Android 8.0.0", + "operatingSystemVersionBuild":"R16NW", + "layoutEngineNameVersionMajor":"Blink 62", + "operatingSystemName":"Android", + "agentVersionMajor":"62", + "layoutEngineVersionMajor":"62", + "deviceClass":"Phone", + "agentNameVersionMajor":"Chrome 62", + "operatingSystemClass":"Mobile", + "layoutEngineName":"Blink", + "agentName":"Chrome", + "agentVersion":"62.0.3202.84", + "layoutEngineClass":"Browser", + "agentNameVersion":"Chrome 62.0.3202.84", + "operatingSystemVersion":"8.0.0", + "agentClass":"Browser", + "layoutEngineVersion":"62.0", + "operatingSystemNameVersionMajor":"Android 8", + "operatingSystemVersionMajor":"8" + }""" ) val actual = yauaaEnrichment.getYauaaContext(uaGalaxyS9) actual shouldEqual expected val defaultJson = SelfDescribingData( - yauaaEnrichment.outputSchema, + YauaaEnrichment.outputSchema, json"""{"deviceClass":"Unknown"}""" ) yauaaEnrichment.getYauaaContext("") shouldEqual defaultJson @@ -239,24 +259,22 @@ class YauaaEnrichmentSpec extends Specification with ValidatedMatchers { "successfully construct a YauaaEnrichment case class with the right cache size if specified" in { val cacheSize = 42 - val yauaaConfigJson = parse(s"""{ + val yauaaConfigJson = json"""{ "enabled": true, "parameters": { "cacheSize": $cacheSize } - }""").toOption.get + }""" - val expected = YauaaConf(schemaKey, Some(cacheSize)) + val expected = EnrichmentConf.YauaaConf(schemaKey, Some(cacheSize)) val actual = YauaaEnrichment.parse(yauaaConfigJson, schemaKey) actual must beValid(expected) } "successfully construct a YauaaEnrichment case class with a default cache size if none specified" in { - val yauaaConfigJson = parse(s"""{ - "enabled": true - }""").toOption.get + val yauaaConfigJson = json"""{"enabled": true }""" - val expected = YauaaConf(schemaKey, None) + val expected = EnrichmentConf.YauaaConf(schemaKey, None) val actual = YauaaEnrichment.parse(yauaaConfigJson, schemaKey) actual must beValid(expected) } diff --git a/project/Dependencies.scala b/project/Dependencies.scala index d989ca263..803b8d242 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -38,7 +38,7 @@ object Dependencies { val mysqlConnector = "8.0.16" val jaywayJsonpath = "2.4.0" val iabClient = "0.2.0" - val yauaa = "5.8" + val yauaa = "5.19" val guava = "28.1-jre" val slf4j = "1.7.26" From 873f143b53c81f7f2d06366a1cac25a363ec7047 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Wed, 26 Aug 2020 20:55:04 +0300 Subject: [PATCH 12/38] Common: bump snowplow-badrows to 2.1.0 (close #325) --- .../SpecHelpers.scala | 6 +++--- .../UtilsSpec.scala | 9 ++++++--- .../common/adapters/registry/IgluAdapter.scala | 17 ++++++++++------- .../adapters/registry/snowplow/Tp2Adapter.scala | 12 +++++------- .../common/enrichments/EnrichmentRegistry.scala | 2 +- .../apirequest/ApiRequestEnrichment.scala | 1 - .../common/utils/IgluUtils.scala | 5 ++--- .../SpecHelpers.scala | 7 ++++++- .../registry/MailchimpAdapterSpec.scala | 15 ++++++++++----- project/Dependencies.scala | 9 +++++---- 10 files changed, 48 insertions(+), 35 deletions(-) diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala index a0a88a2ef..d7914cebc 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/SpecHelpers.scala @@ -171,7 +171,7 @@ object SpecHelpers { def copyResource(resource: String, localFile: String): Unit = { Files.copy( - Paths.get(getClass.getResource(resource).toURI()), + Paths.get(getClass.getResource(resource).toURI), Paths.get(localFile) ) () @@ -180,7 +180,7 @@ object SpecHelpers { object CI extends Tag( - if (sys.env.get("CI").map(_ == "true").getOrElse(false)) "" else classOf[Ignore].getName + if (sys.env.get("CI").contains("true")) "" else classOf[Ignore].getName ) -object OER extends Tag(if (sys.env.get("OER_KEY").isDefined) "" else classOf[Ignore].getName) +object OER extends Tag(if (sys.env.contains("OER_KEY")) "" else classOf[Ignore].getName) diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/UtilsSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/UtilsSpec.scala index ec81ba929..76ce5ce88 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/UtilsSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/UtilsSpec.scala @@ -19,10 +19,13 @@ import java.time.Instant import com.snowplowanalytics.snowplow.badrows._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + import cats.implicits._ + import io.circe.parser + import com.snowplowanalytics.iglu.core.SelfDescribingData -import com.snowplowanalytics.iglu.core.circe.instances._ +import com.snowplowanalytics.iglu.core.circe.implicits._ import utils._ import org.scalatest.freespec.AnyFreeSpec @@ -111,7 +114,7 @@ class UtilsSpec extends AnyFreeSpec with Matchers { badRowSizeViolation.failure.maximumAllowedSizeBytes shouldEqual 150 badRowSizeViolation.failure.actualSizeBytes shouldEqual 267 badRowSizeViolation.failure.expectation shouldEqual "bad row exceeded the maximum size" - badRowSizeViolation.payload.line shouldEqual "{\"schema\":\"iglu" + badRowSizeViolation.payload.event shouldEqual "{\"schema\":\"iglu" badRowSizeViolation.processor shouldEqual processor } } @@ -124,7 +127,7 @@ class UtilsSpec extends AnyFreeSpec with Matchers { badRowSizeViolation.failure.maximumAllowedSizeBytes shouldEqual 400 badRowSizeViolation.failure.actualSizeBytes shouldEqual 100 badRowSizeViolation.failure.expectation shouldEqual "event passed enrichment but exceeded the maximum allowed size as a result" - badRowSizeViolation.payload.line shouldEqual ("a" * 40) + badRowSizeViolation.payload.event shouldEqual ("a" * 40) badRowSizeViolation.processor shouldEqual processor } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala index cb6de3554..4c5c9560b 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala @@ -10,9 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry import cats.Monad import cats.data.{NonEmptyList, ValidatedNel} @@ -20,16 +18,21 @@ import cats.effect.Clock import cats.syntax.either._ import cats.syntax.option._ import cats.syntax.validated._ + +import com.snowplowanalytics.iglu.core.{SchemaKey, SelfDescribingData} +import com.snowplowanalytics.iglu.core.circe.implicits._ + import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup -import com.snowplowanalytics.iglu.core.{SchemaKey, SelfDescribingData} -import com.snowplowanalytics.iglu.core.circe.instances._ + import com.snowplowanalytics.snowplow.badrows._ + import io.circe._ import io.circe.syntax._ -import loaders.CollectorPayload -import utils.{ConversionUtils, HttpClient, JsonUtils} +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.{ConversionUtils, HttpClient, JsonUtils} /** * Transforms a collector payload which either: diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala index 10051a596..6291851fb 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala @@ -24,7 +24,7 @@ import io.circe.Json import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup import com.snowplowanalytics.iglu.core.{SchemaCriterion, SelfDescribingData} -import com.snowplowanalytics.iglu.core.circe.instances._ +import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.snowplow.badrows.FailureDetails @@ -41,9 +41,9 @@ import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, JsonUtils object Tp2Adapter extends Adapter { // Expected content types for a request body private object ContentTypes { - val list = + val list: List[String] = List("application/json", "application/json; charset=utf-8", "application/json; charset=UTF-8") - val str = list.mkString(", ") + val str: String = list.mkString(", ") } // Request body expected to validate against this JSON Schema @@ -105,7 +105,7 @@ object Tp2Adapter extends Adapter { case (None, None) => Monad[F].pure(NonEmptyList.one(qsParams).valid) case (Some(bdy), Some(_)) => // Build our NEL of parameters (for { - json <- extractAndValidateJson(PayloadDataSchema, bdy, "body", client) + json <- extractAndValidateJson(PayloadDataSchema, bdy, client) nel <- EitherT.fromEither[F](toParametersNel(json, qsParams)) } yield nel).toValidated } @@ -201,7 +201,6 @@ object Tp2Adapter extends Adapter { /** * Extract the JSON from a String, and validate it against the supplied JSON Schema. - * @param field The name of the field containing the JSON instance * @param schemaCriterion The schema that we expected this self-describing JSON to conform to * @param instance A JSON instance as String * @param client Our Iglu client, for schema lookups @@ -211,7 +210,6 @@ object Tp2Adapter extends Adapter { private def extractAndValidateJson[F[_]: Monad: RegistryLookup: Clock]( schemaCriterion: SchemaCriterion, instance: String, - field: String, client: Client[F, Json] ): EitherT[F, NonEmptyList[FailureDetails.TrackerProtocolViolation], Json] = (for { @@ -220,7 +218,7 @@ object Tp2Adapter extends Adapter { .leftMap(e => NonEmptyList.one( FailureDetails.TrackerProtocolViolation - .NotJson(field, instance.some, e) + .NotJson("body", instance.some, e) ) ) ) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala index 015e3f5c8..fa6e36ef1 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentRegistry.scala @@ -22,7 +22,7 @@ import io.circe._ import io.circe.syntax._ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} -import com.snowplowanalytics.iglu.core.circe.instances._ +import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala index fd589350a..9b9ed39b8 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala @@ -147,7 +147,6 @@ final case class ApiRequestEnrichment[F[_]: Monad: HttpClient]( * @return validated list of lookups, whole lookup will be failed if any of outputs were failed */ private[apirequest] def getOutputs(validInputs: Option[Map[String, String]]): EitherT[F, NonEmptyList[String], List[Json]] = { - import cats.instances.parallel._ val result: List[F[Either[Throwable, Json]]] = for { templateContext <- validInputs.toList diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala index e8c1db21d..b445c1a7c 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala @@ -24,10 +24,9 @@ import java.time.Instant import com.snowplowanalytics.iglu.client.{Client, ClientError} import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup -import com.snowplowanalytics.iglu.core.circe.instances._ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SelfDescribingData} +import com.snowplowanalytics.iglu.core.circe.implicits._ -import com.snowplowanalytics.snowplow.badrows.FailureDetails import com.snowplowanalytics.snowplow.badrows._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent @@ -281,7 +280,7 @@ object IgluUtils { pee: Payload.PartiallyEnrichedEvent, re: Payload.RawEvent, processor: Processor - ) = + ): BadRow.SchemaViolations = BadRow.SchemaViolations( processor, Failure.SchemaViolations(Instant.now(), vs), diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala index d0f147d5d..24e73c7bb 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala @@ -14,14 +14,19 @@ package com.snowplowanalytics.snowplow.enrich.common import cats.Id import cats.implicits._ + import com.snowplowanalytics.iglu.client.Client import com.snowplowanalytics.iglu.core.SelfDescribingData -import com.snowplowanalytics.iglu.core.circe.instances._ +import com.snowplowanalytics.iglu.core.circe.implicits._ + import com.snowplowanalytics.lrumap.CreateLruMap._ + import io.circe.Json import io.circe.literal._ + import org.apache.http.NameValuePair import org.apache.http.message.BasicNameValuePair + import com.snowplowanalytics.snowplow.enrich.common.utils.JsonUtils object SpecHelpers { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala index 88c92c9e2..990b33f79 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala @@ -10,21 +10,26 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry import cats.data.NonEmptyList import cats.syntax.option._ + import com.snowplowanalytics.snowplow.badrows._ + import io.circe._ import io.circe.literal._ + import org.joda.time.DateTime + import org.specs2.Specification import org.specs2.matcher.{DataTables, ValidatedMatchers} -import loaders._ -import utils.Clock._ +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.loaders._ +import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ + +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers class MailchimpAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 803b8d242..64dc9f7dd 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -44,15 +44,15 @@ object Dependencies { val refererParser = "1.0.0" val maxmindIplookups = "0.6.1" - val circe = "0.11.1" - val circeOptics = "0.11.0" - val circeJackson = "0.11.1" + val circe = "0.13.0" + val circeOptics = "0.13.0" + val circeJackson = "0.13.0" val scalaForex = "0.7.0" val scalaWeather = "0.5.0" val gatlingJsonpath = "0.6.14" val scalaUri = "1.4.5" val scalaLruMap = "0.3.1" - val badRows = "1.0.0" + val badRows = "2.1.0" val snowplowRawEvent = "0.1.0" val collectorPayload = "0.0.0" @@ -98,6 +98,7 @@ object Dependencies { val circeParser = "io.circe" %% "circe-parser" % V.circe val circeLiteral = "io.circe" %% "circe-literal" % V.circe val circeJava8 = "io.circe" %% "circe-java8" % V.circe + val circeJawn = "io.circe" %% "circe-jawn" % V.circe val circeOptics = "io.circe" %% "circe-optics" % V.circeOptics val circeJackson = "io.circe" %% "circe-jackson29" % V.circeJackson val scalaUri = "io.lemonlabs" %% "scala-uri" % V.scalaUri From 4e718b0274ae6c36b11425761f03c751697ea4d7 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Wed, 26 Aug 2020 21:01:28 +0300 Subject: [PATCH 13/38] Common: bump scala-maxmind-iplookups to 0.7.1 (close #323) --- project/Dependencies.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 64dc9f7dd..648c8398f 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -43,7 +43,7 @@ object Dependencies { val slf4j = "1.7.26" val refererParser = "1.0.0" - val maxmindIplookups = "0.6.1" + val maxmindIplookups = "0.7.1" val circe = "0.13.0" val circeOptics = "0.13.0" val circeJackson = "0.13.0" @@ -63,7 +63,7 @@ object Dependencies { val kinesisClient = "1.13.3" val kafka = "2.2.1" val nsqClient = "1.2.0" - val jackson = "2.9.9" + val jackson = "2.10.3" val config = "1.3.4" val scopt = "3.7.1" @@ -100,7 +100,7 @@ object Dependencies { val circeJava8 = "io.circe" %% "circe-java8" % V.circe val circeJawn = "io.circe" %% "circe-jawn" % V.circe val circeOptics = "io.circe" %% "circe-optics" % V.circeOptics - val circeJackson = "io.circe" %% "circe-jackson29" % V.circeJackson + val circeJackson = "io.circe" %% "circe-jackson210" % V.circeJackson val scalaUri = "io.lemonlabs" %% "scala-uri" % V.scalaUri val gatlingJsonpath = "io.gatling" %% "jsonpath" % V.gatlingJsonpath val scalaForex = "com.snowplowanalytics" %% "scala-forex" % V.scalaForex From f5363682fa6fd21fdcd5cf6133fe24d4369edd1f Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Thu, 27 Aug 2020 15:24:57 +0300 Subject: [PATCH 14/38] Common: bump iglu-scala-client to 1.0.2 (close #52) --- build.sbt | 2 +- project/Dependencies.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/build.sbt b/build.sbt index 457bdaeb5..09db15707 100644 --- a/build.sbt +++ b/build.sbt @@ -54,8 +54,8 @@ lazy val common = project Dependencies.Libraries.scalaForex, Dependencies.Libraries.scalaWeather, Dependencies.Libraries.gatlingJsonpath, - Dependencies.Libraries.scalaLruMap, Dependencies.Libraries.badRows, + Dependencies.Libraries.igluClient, Dependencies.Libraries.snowplowRawEvent, Dependencies.Libraries.collectorPayload, Dependencies.Libraries.schemaSniffer, diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 648c8398f..2911b660b 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -51,8 +51,8 @@ object Dependencies { val scalaWeather = "0.5.0" val gatlingJsonpath = "0.6.14" val scalaUri = "1.4.5" - val scalaLruMap = "0.3.1" val badRows = "2.1.0" + val igluClient = "1.0.2" val snowplowRawEvent = "0.1.0" val collectorPayload = "0.0.0" @@ -107,8 +107,8 @@ object Dependencies { val refererParser = "com.snowplowanalytics" %% "scala-referer-parser" % V.refererParser val maxmindIplookups = "com.snowplowanalytics" %% "scala-maxmind-iplookups" % V.maxmindIplookups val scalaWeather = "com.snowplowanalytics" %% "scala-weather" % V.scalaWeather - val scalaLruMap = "com.snowplowanalytics" %% "scala-lru-map" % V.scalaLruMap val badRows = "com.snowplowanalytics" %% "snowplow-badrows" % V.badRows + val igluClient = "com.snowplowanalytics" %% "iglu-scala-client" % V.igluClient val snowplowRawEvent = "com.snowplowanalytics" % "snowplow-thrift-raw-event" % V.snowplowRawEvent val collectorPayload = "com.snowplowanalytics" % "collector-payload-1" % V.collectorPayload val schemaSniffer = "com.snowplowanalytics" % "schema-sniffer-1" % V.schemaSniffer From 0fb0c060dd44105c9793744ef67dc2d5c56acc01 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Wed, 16 Sep 2020 01:14:16 +0300 Subject: [PATCH 15/38] Common: bump scala-weather to 1.0.0 (close #347) --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 2911b660b..64557ea40 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -48,7 +48,7 @@ object Dependencies { val circeOptics = "0.13.0" val circeJackson = "0.13.0" val scalaForex = "0.7.0" - val scalaWeather = "0.5.0" + val scalaWeather = "1.0.0" val gatlingJsonpath = "0.6.14" val scalaUri = "1.4.5" val badRows = "2.1.0" From 25797d4192e12e930e0b9f1e11d1a53e65c9d78c Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Wed, 16 Sep 2020 01:15:43 +0300 Subject: [PATCH 16/38] Common: bump scala-referer-parser to 1.1.0 (close #348) --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 64557ea40..2e7983739 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -42,7 +42,7 @@ object Dependencies { val guava = "28.1-jre" val slf4j = "1.7.26" - val refererParser = "1.0.0" + val refererParser = "1.1.0" val maxmindIplookups = "0.7.1" val circe = "0.13.0" val circeOptics = "0.13.0" From 5f16d665772cabe30d7b07d7c260ccbce91db8af Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Wed, 16 Sep 2020 01:17:09 +0300 Subject: [PATCH 17/38] Common: bump scala-forex to 1.0.0 (close #349) --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 2e7983739..a45685850 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -47,7 +47,7 @@ object Dependencies { val circe = "0.13.0" val circeOptics = "0.13.0" val circeJackson = "0.13.0" - val scalaForex = "0.7.0" + val scalaForex = "1.0.0" val scalaWeather = "1.0.0" val gatlingJsonpath = "0.6.14" val scalaUri = "1.4.5" From 109500af0122040ecf8f60256b5f75160f0107b0 Mon Sep 17 00:00:00 2001 From: Benjamin Benoist Date: Tue, 8 Sep 2020 12:35:13 +0200 Subject: [PATCH 18/38] Common: fix PiiPseudonymizerEnrichment for arrays and improve unit tests coverage (close #334) --- .../pii/PiiPseudonymizerEnrichment.scala | 10 +- .../schemas/com.test/array/jsonschema/1-0-0 | 24 +++ .../pii/PiiPseudonymizerEnrichmentSpec.scala | 148 ++++++++++++++++-- 3 files changed, 163 insertions(+), 19 deletions(-) create mode 100644 modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala index 9c5aaa2e6..3d851969a 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala @@ -24,6 +24,7 @@ import io.circe.syntax._ import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.node.{ArrayNode, ObjectNode, TextNode} +import com.fasterxml.jackson.databind.ObjectMapper import com.jayway.jsonpath.{Configuration, JsonPath => JJsonPath} import com.jayway.jsonpath.MapFunction @@ -290,7 +291,9 @@ private final case class ScrambleMapFunction( val _ = modifiedFields += JsonModifiedField(fieldName, s, newValue, jsonPath, schema) newValue case a: ArrayNode => - a.elements.asScala.map { + val mapper = new ObjectMapper() + val arr = mapper.createArrayNode() + a.elements.asScala.foreach { case t: TextNode => val originalValue = t.asText() val newValue = strategy.scramble(originalValue) @@ -301,9 +304,10 @@ private final case class ScrambleMapFunction( jsonPath, schema ) - newValue - case default: AnyRef => default + arr.add(newValue) + case default: AnyRef => arr.add(default) } + arr case default: AnyRef => default } } diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 new file mode 100644 index 000000000..2a342e2c0 --- /dev/null +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 @@ -0,0 +1,24 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema to test scrambling of array in PII enrichment", + "self": { + "vendor": "com.test", + "name": "array", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "field": { + "type": "array", + "items": { + "type": ["string", "null" ] + } + }, + "field2": { + "type": ["string", "null"] + } + }, + "required": ["field"], + "additionalProperties": false +} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala index 60d14e364..3fab13138 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala @@ -37,6 +37,7 @@ import com.snowplowanalytics.snowplow.badrows.{BadRow, Processor} import com.snowplowanalytics.snowplow.enrich.common.{EtlPipeline, SpecHelpers} import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.IpLookupsEnrichment +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.CampaignAttributionEnrichment import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry import com.snowplowanalytics.snowplow.enrich.common.loaders._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent @@ -76,6 +77,17 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher "uid" -> "john@acme.com", "ip" -> "70.46.123.145", "fp" -> "its_you_again!", + "url" -> "http://foo.bar?utm_term=hello&utm_content=world&msclkid=500&_sp=duid", + "dnuid" -> "gfhdgjfgndf", + "nuid" -> "kuykyfkfykukfuy", + "tr_id" -> "t5465463", + "ti_id" -> "6546b56356b354bbv", + "se_ca" -> "super category", + "se_ac" -> "great action", + "se_la" -> "awesome label", + "se_pr" -> "good property", + "duid" -> "786d1b69-a603-4eb8-9178-fed2a195a1ed", + "sid" -> "87857856-a603-4eb8-9178-fed2a195a1ed", "co" -> """ |{ @@ -100,6 +112,12 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher | "someInt": 1 | }, | "schema": "iglu:com.acme/email_sent/jsonschema/1-1-0" + | }, + | { + | "schema": "iglu:com.test/array/jsonschema/1-0-0", + | "data": { + | "field" : ["hello", "world"] + | } | } | ] |} @@ -178,19 +196,75 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher IpLookupsEnrichment.parse(js, schemaKey, true).toOption.get.enrichment[Id] } + private val campaignAttributionEnrichment = { + val js = json"""{ + "enabled": true, + "parameters": { + "mapping": "static", + "fields": { + "mktMedium": ["utm_medium"], + "mktSource": ["utm_source"], + "mktTerm": ["utm_term"], + "mktContent": ["utm_content"], + "mktCampaign": ["utm_campaign"] + } + } + }""" + val schemaKey = SchemaKey( + "com.snowplowanalytics.snowplow", + "campaign_attribution", + "jsonschema", + SchemaVer.Full(1, 0, 1) + ) + CampaignAttributionEnrichment.parse(js, schemaKey).toOption.get.enrichment + } + def e1 = { val enrichmentReg = EnrichmentRegistry[Id]( ipLookups = ipEnrichment.some, + campaignAttribution = campaignAttributionEnrichment.some, piiPseudonymizer = PiiPseudonymizerEnrichment( List( PiiScalar(fieldMutator = ScalarMutators("user_id")), PiiScalar( fieldMutator = ScalarMutators("user_ipaddress") ), - PiiScalar(fieldMutator = ScalarMutators("ip_domain")), PiiScalar( fieldMutator = ScalarMutators("user_fingerprint") - ) + ), + PiiScalar( + fieldMutator = ScalarMutators("domain_userid") + ), + PiiScalar( + fieldMutator = ScalarMutators("network_userid") + ), + PiiScalar( + fieldMutator = ScalarMutators("ip_organization") + ), + PiiScalar( + fieldMutator = ScalarMutators("ip_domain") + ), + PiiScalar( + fieldMutator = ScalarMutators("tr_orderid") + ), + PiiScalar( + fieldMutator = ScalarMutators("ti_orderid") + ), + PiiScalar( + fieldMutator = ScalarMutators("mkt_term") + ), + PiiScalar( + fieldMutator = ScalarMutators("mkt_clickid") + ), + PiiScalar( + fieldMutator = ScalarMutators("mkt_content") + ), + PiiScalar(fieldMutator = ScalarMutators("se_category")), + PiiScalar(fieldMutator = ScalarMutators("se_action")), + PiiScalar(fieldMutator = ScalarMutators("se_label")), + PiiScalar(fieldMutator = ScalarMutators("se_property")), + PiiScalar(fieldMutator = ScalarMutators("refr_domain_userid")), + PiiScalar(fieldMutator = ScalarMutators("domain_sessionid")) ), false, PiiStrategyPseudonymize( @@ -203,24 +277,52 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher val output = commonSetup(enrichmentReg) val expected = new EnrichedEvent() expected.app_id = "ads" - expected.user_id = "7d8a4beae5bc9d314600667d2f410918f9af265017a6ade99f60a9c8f3aac6e9" - expected.user_ipaddress = "dd9720903c89ae891ed5c74bb7a9f2f90f6487927ac99afe73b096ad0287f3f5" - expected.ip_domain = null - expected.user_fingerprint = "27abac60dff12792c6088b8d00ce7f25c86b396b8c3740480cd18e21068ecff4" expected.geo_city = null expected.etl_tstamp = "1970-01-18 08:40:00.000" expected.collector_tstamp = "2017-07-14 03:39:39.000" + expected.user_id = "7d8a4beae5bc9d314600667d2f410918f9af265017a6ade99f60a9c8f3aac6e9" + expected.user_ipaddress = "dd9720903c89ae891ed5c74bb7a9f2f90f6487927ac99afe73b096ad0287f3f5" + expected.user_fingerprint = "27abac60dff12792c6088b8d00ce7f25c86b396b8c3740480cd18e21068ecff4" + expected.domain_userid = "e97d86d49b16397e8fd654b32a0ed03cfe3a4d8d867d913620ce08e3ca855d6d" + expected.network_userid = "47453d3c4428207d22005463bb3d945b137f9342d445b7114776e88311bbe648" + expected.ip_organization = "4d5dd7eebeb9d47f9ebff5993502c0380a110c34711ef5062fdb84a563759f3b" + expected.ip_domain = null + expected.tr_orderid = "5139219b15f3d1ab0c5056296cf5246eeb0b934ee5d1c96cb2027e694005bbce" + expected.ti_orderid = "326c0bfc5857f21695406ebd93068341c9f2d975cf00d117479e01e9012e196c" + expected.mkt_term = "b62f3a2475ac957009088f9b8ab77ceb7b4ed7c5a6fd920daa204a1953334acb" + expected.mkt_clickid = "fae3733fa03cdf57d82e89ac63026afd8782d07ba3c918acb415a4343457785f" + expected.mkt_content = "8ad32723b7435cbf535025e519cc94dbf1568e17ced2aeb4b9e7941f6346d7d0" + expected.se_category = "f33daec1ed4cb688f4f1762390735fd78f6a06083f855422a7303ed63707c962" + expected.se_action = "53f3e1ca4a0dccce4a1b2900a6bcfd21b22a0f444253067e2fe022948a0b3be7" + expected.se_label = "b243defc0d3b86333a104fb2b3a2f43371b8d73359c429b9177dfc5bb3840efd" + expected.se_property = "eb19004c52cd4557aacfa0b30035160c417c3a6a5fad44b96f03c9e2bebaf0b3" + expected.refr_domain_userid = "f3e68fd96eaef0cafc1257ec7132b4b3dbae20b1073155531f909999e5da9b2c" + expected.domain_sessionid = "7378a72b0183f456df98453b2ff9ed5685206a67f312edb099dc74aed76e1b34" val size = output.size must_== 1 val validOut = output.head must beValid.like { case enrichedEvent => (enrichedEvent.app_id must_== expected.app_id) and + (enrichedEvent.geo_city must_== expected.geo_city) and + (enrichedEvent.etl_tstamp must_== expected.etl_tstamp) and + (enrichedEvent.collector_tstamp must_== expected.collector_tstamp) and (enrichedEvent.user_id must_== expected.user_id) and (enrichedEvent.user_ipaddress must_== expected.user_ipaddress) and - (enrichedEvent.ip_domain must_== expected.ip_domain) and (enrichedEvent.user_fingerprint must_== expected.user_fingerprint) and - (enrichedEvent.geo_city must_== expected.geo_city) and - (enrichedEvent.etl_tstamp must_== expected.etl_tstamp) and - (enrichedEvent.collector_tstamp must_== expected.collector_tstamp) + (enrichedEvent.domain_userid must_== expected.domain_userid) and + (enrichedEvent.network_userid must_== expected.network_userid) and + (enrichedEvent.ip_organization must_== expected.ip_organization) and + (enrichedEvent.ip_domain must_== expected.ip_domain) and + (enrichedEvent.tr_orderid must_== expected.tr_orderid) and + (enrichedEvent.ti_orderid must_== expected.ti_orderid) and + (enrichedEvent.mkt_term must_== expected.mkt_term) and + (enrichedEvent.mkt_clickid must_== expected.mkt_clickid) and + (enrichedEvent.mkt_content must_== expected.mkt_content) and + (enrichedEvent.se_category must_== expected.se_category) and + (enrichedEvent.se_action must_== expected.se_action) and + (enrichedEvent.se_label must_== expected.se_label) and + (enrichedEvent.se_property must_== expected.se_property) and + (enrichedEvent.refr_domain_userid must_== expected.refr_domain_userid) and + (enrichedEvent.domain_sessionid must_== expected.domain_sessionid) } size and validOut } @@ -240,6 +342,11 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 1, 0), jsonPath = "$.data.emailAddress2" ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field" + ), PiiJson( fieldMutator = JsonMutators("unstruct_event"), schemaCriterion = SchemaCriterion("com.mailgun", "message_clicked", "jsonschema", 1, 0, 0), @@ -248,7 +355,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ), false, PiiStrategyPseudonymize( - "SHA-256", + "MD5", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) @@ -271,10 +378,10 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor val contextJFirstElement = contextJ.downField("data").downArray val contextJSecondElement = contextJFirstElement.right + val contextJThirdElement = contextJSecondElement.right val unstructEventJ = parse(enrichedEvent.unstruct_event).toOption.get.hcursor .downField("data") .downField("data") - val first = (contextJFirstElement .downField("data") .get[String]("emailAddress") must beRight( @@ -311,7 +418,16 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher )) and (unstructEventJ.get[String]("myVar2") must beRight("awesome")) - first and second + val third = contextJThirdElement + .downField("data") + .get[List[String]]("field") must + beRight( + List[String]("b62f3a2475ac957009088f9b8ab77ceb7b4ed7c5a6fd920daa204a1953334acb", + "8ad32723b7435cbf535025e519cc94dbf1568e17ced2aeb4b9e7941f6346d7d0" + ) + ) + + first and second and third } size and validOut @@ -330,7 +446,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ), false, PiiStrategyPseudonymize( - "SHA-256", + "SHA-384", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) @@ -375,7 +491,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ), false, PiiStrategyPseudonymize( - "SHA-256", + "SHA-512", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) @@ -423,7 +539,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ), false, PiiStrategyPseudonymize( - "SHA-256", + "MD-2", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) From 687cb0a4190ff2c1de5370e9310caa69794edaf6 Mon Sep 17 00:00:00 2001 From: Benjamin Benoist Date: Tue, 8 Sep 2020 12:36:47 +0200 Subject: [PATCH 19/38] Common: improve unit tests coverage (close #335) --- .../EtlPipelineSpec.scala | 30 ++++++++++ .../enrichments/EnrichmentManagerSpec.scala | 21 +++++++ .../utils/JsonUtilsSpec.scala | 55 +++++++++++++++++++ .../utils/conversionUtilsSpecs.scala | 44 +++++++++++++++ 4 files changed, 150 insertions(+) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala index 2d8527e2f..7e71e3b46 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala @@ -22,6 +22,7 @@ import com.snowplowanalytics.iglu.client.resolver.registries.Registry import com.snowplowanalytics.iglu.client.validator.CirceValidator import com.snowplowanalytics.snowplow.badrows.Processor +import com.snowplowanalytics.snowplow.badrows.BadRow import org.apache.thrift.TSerializer @@ -44,6 +45,8 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { def is = s2""" EtlPipeline should always produce either bad or good row for each event of the payload $e1 Processing of events with malformed query string should be supported $e2 + Processing of invalid CollectorPayload (CPFormatViolation bad row) should be supported $e3 + Absence of CollectorPayload (None) should be supported $e4 """ val adapterRegistry = new AdapterRegistry() @@ -88,6 +91,33 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { case res => ko(s"[$res] doesn't contain one enriched event") } } + + def e3 = { + val invalidCollectorPayload = ThriftLoader.toCollectorPayload(Array(1.toByte), processor) + EtlPipeline.processEvents[Id]( + adapterRegistry, + enrichmentReg, + client, + processor, + dateTime, + invalidCollectorPayload + ) must be like { + case Validated.Invalid(_: BadRow.CPFormatViolation) :: Nil => ok + case other => ko(s"One invalid CPFormatViolation expected, got ${other}") + } + } + + def e4 = { + val collectorPayload: Option[CollectorPayload] = None + EtlPipeline.processEvents[Id]( + adapterRegistry, + enrichmentReg, + client, + processor, + dateTime, + collectorPayload.validNel[BadRow] + ) must beEqualTo(Nil) + } } object EtlPipelineSpec { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index bbf15578c..b76e01c12 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -353,6 +353,27 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { EnrichmentManager.getIabContext(input, iabEnrichment) must beRight.like { case ctx => ctx must beSome } } } + + "getCollectorVersionSet" should { + "return an enrichment failure if v_collector is null or empty" >> { + val input = new EnrichedEvent() + EnrichmentManager.getCollectorVersionSet(input) must beLeft.like { + case _: FailureDetails.EnrichmentFailure => ok + case other => ko(s"expected EnrichmentFailure but got $other") + } + input.v_collector = "" + EnrichmentManager.getCollectorVersionSet(input) must beLeft.like { + case _: FailureDetails.EnrichmentFailure => ok + case other => ko(s"expected EnrichmentFailure but got $other") + } + } + + "return Unit if v_collector is set" >> { + val input = new EnrichedEvent() + input.v_collector = "v42" + EnrichmentManager.getCollectorVersionSet(input) must beRight(()) + } + } } object EnrichmentManagerSpec { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala index 5586b55b8..a3e020e18 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala @@ -15,12 +15,19 @@ package utils import org.specs2.Specification +import org.joda.time.format.DateTimeFormat + import io.circe.Json +import cats.data.NonEmptyList + class JsonUtilsSpec extends Specification { def is = s2""" toJson can deal with non-null String $e1 toJson can deal with null String $e2 + toJson can deal with booleans $e3 + toJson can deal with integers $e4 + toJson can deal with dates $e5 """ def e1 = { @@ -36,4 +43,52 @@ class JsonUtilsSpec extends Specification { JsonUtils.toJson(key, value, Nil, Nil, None) must beEqualTo((key, Json.Null)) } + + def e3 = { + val key = "field" + + val truE = "true" + val exp1 = JsonUtils.toJson(key, truE, List(key), Nil, None) must + beEqualTo(key -> Json.True) + + val falsE = "false" + val exp2 = JsonUtils.toJson(key, falsE, List(key), Nil, None) must + beEqualTo(key -> Json.False) + + val foo = "foo" + val exp3 = JsonUtils.toJson(key, foo, List(key), Nil, None) must + beEqualTo(key -> Json.fromString(foo)) + + exp1 and exp2 and exp3 + } + + def e4 = { + val key = "field" + + val number = 123 + val exp1 = JsonUtils.toJson(key, number.toString(), Nil, List(key), None) must + beEqualTo(key -> Json.fromBigInt(number)) + + val notNumber = "abc" + val exp2 = JsonUtils.toJson(key, notNumber, Nil, List(key), None) must + beEqualTo(key -> Json.fromString(notNumber)) + + exp1 and exp2 + } + + def e5 = { + val key = "field" + + val formatter = DateTimeFormat.forPattern("yyyy-MM-dd") + val malformedDate = "2020-09-02" + val correctDate = "2020-09-02T22:00:00.000Z" + + val exp1 = JsonUtils.toJson(key, malformedDate, Nil, Nil, Some(NonEmptyList.one(key) -> formatter)) must + be !== (key -> Json.fromString(malformedDate)) + + val exp2 = JsonUtils.toJson(key, correctDate, Nil, Nil, Some(NonEmptyList.one(key) -> formatter)) must + beEqualTo(key -> Json.fromString(correctDate)) + + exp1 and exp2 + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala index be3ea91db..5cf98ed60 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala @@ -15,6 +15,7 @@ package utils import java.net.{Inet6Address, InetAddress, URI} import java.nio.ByteBuffer +import java.nio.charset.StandardCharsets import cats.syntax.either._ import cats.syntax.option._ @@ -275,6 +276,49 @@ class ValidateUuidSpec extends Specification with DataTables with ScalaCheck { } } +class ValidateIntegerSpec extends Specification { + def is = s2""" + validateInteger should return the original string if it contains an integer $e1 + validateInteger should return an enrichment failure for a string not containing a valid integer $e2 + """ + + val FieldName = "integer" + + def e1 = ConversionUtils.validateInteger(FieldName, "123") must beRight("123") + + def e2 = { + val str = "abc" + ConversionUtils.validateInteger(FieldName, str) must beLeft( + FailureDetails.EnrichmentFailure( + None, + FailureDetails.EnrichmentFailureMessage.InputData( + FieldName, + Some(str), + "not a valid integer" + ) + ) + ) + } +} + +class DecodeStringSpec extends Specification { + def is = s2""" + decodeString should decode a correctly URL-encoded string $e1 + decodeString should fail decoding a string not correctly URL-encoded $e2 + """ + + val utf8 = StandardCharsets.UTF_8 + + def e1 = { + val clear = "12 ++---=&&3abc%%%34%2234%$#@%^PLLPbgfxbf$#%$@#@^" + val encoded = ConversionUtils.encodeString(utf8.toString(), clear) + ConversionUtils.decodeString(utf8, encoded) must beRight(clear) + } + + def e2 = + ConversionUtils.decodeString(utf8, "%%23") must beLeft +} + class StringToDoubleLikeSpec extends Specification with DataTables { def is = s2""" stringToDoublelike should fail if the supplied String is not parseable as a number $e1 From 73c9ebf4940eeae3e82a6d33bec3fde746ef9b3d Mon Sep 17 00:00:00 2001 From: Benjamin Benoist Date: Wed, 9 Sep 2020 14:54:32 +0200 Subject: [PATCH 20/38] Common: fix pattern matching against null in ScrambleMapFunction (close #338) --- .../registry/pii/PiiPseudonymizerEnrichment.scala | 5 +++-- .../registry/pii/PiiPseudonymizerEnrichmentSpec.scala | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala index 3d851969a..414a183ae 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala @@ -23,7 +23,7 @@ import io.circe.jackson._ import io.circe.syntax._ import com.fasterxml.jackson.databind.JsonNode -import com.fasterxml.jackson.databind.node.{ArrayNode, ObjectNode, TextNode} +import com.fasterxml.jackson.databind.node.{ArrayNode, NullNode, ObjectNode, TextNode} import com.fasterxml.jackson.databind.ObjectMapper import com.jayway.jsonpath.{Configuration, JsonPath => JJsonPath} @@ -306,8 +306,9 @@ private final case class ScrambleMapFunction( ) arr.add(newValue) case default: AnyRef => arr.add(default) + case null => arr.add(NullNode.getInstance()) } arr - case default: AnyRef => default + case _ => currentValue } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala index 3fab13138..3c124570e 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala @@ -116,7 +116,8 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher | { | "schema": "iglu:com.test/array/jsonschema/1-0-0", | "data": { - | "field" : ["hello", "world"] + | "field" : ["hello", "world"], + | "field2" : null | } | } | ] @@ -347,6 +348,11 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), jsonPath = "$.field" ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field2" + ), PiiJson( fieldMutator = JsonMutators("unstruct_event"), schemaCriterion = SchemaCriterion("com.mailgun", "message_clicked", "jsonschema", 1, 0, 0), From f4b38955cfd03e3168826f340b1c244a9802b4a4 Mon Sep 17 00:00:00 2001 From: Benjamin Benoist Date: Wed, 9 Sep 2020 19:11:07 +0200 Subject: [PATCH 21/38] Common: fix PathNotFoundException in PII enrichment (close #339) --- .../pii/PiiPseudonymizerEnrichment.scala | 22 +++++++++++-------- .../schemas/com.test/array/jsonschema/1-0-0 | 11 ++++++++++ .../pii/PiiPseudonymizerEnrichmentSpec.scala | 20 ++++++++++++++--- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala index 414a183ae..25f8286e4 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala @@ -265,15 +265,19 @@ final case class PiiJson( val objectNode = io.circe.jackson.mapper.valueToTree[ObjectNode](json) val documentContext = JJsonPath.using(JsonPathConf).parse(objectNode) val modifiedFields = MutableList[JsonModifiedField]() - val documentContext2 = documentContext.map( - jsonPath, - new ScrambleMapFunction(strategy, modifiedFields, fieldMutator.fieldName, jsonPath, schema) - ) - // make sure it is a structure preserving method, see #3636 - //val transformedJValue = JsonMethods.fromJsonNode(documentContext.json[JsonNode]()) - //val Diff(_, erroneouslyAdded, _) = jValue diff transformedJValue - //val Diff(_, withoutCruft, _) = erroneouslyAdded diff transformedJValue - (jacksonToCirce(documentContext2.json[JsonNode]()), modifiedFields.toList) + Option(documentContext.read[AnyRef](jsonPath)) match { // check that json object not null + case None => (jacksonToCirce(documentContext.json[JsonNode]()), modifiedFields.toList) + case _ => + val documentContext2 = documentContext.map( + jsonPath, + new ScrambleMapFunction(strategy, modifiedFields, fieldMutator.fieldName, jsonPath, schema) + ) + // make sure it is a structure preserving method, see #3636 + //val transformedJValue = JsonMethods.fromJsonNode(documentContext.json[JsonNode]()) + //val Diff(_, erroneouslyAdded, _) = jValue diff transformedJValue + //val Diff(_, withoutCruft, _) = erroneouslyAdded diff transformedJValue + (jacksonToCirce(documentContext2.json[JsonNode]()), modifiedFields.toList) + } } } diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 index 2a342e2c0..97e2490a3 100644 --- a/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 @@ -17,6 +17,17 @@ }, "field2": { "type": ["string", "null"] + }, + "field3": { + "type": ["object", "null"], + "properties": { + "a": { + "type": "string" + }, + "b": { + "type": "string" + } + } } }, "required": ["field"], diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala index 3c124570e..4837631f1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala @@ -117,7 +117,8 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher | "schema": "iglu:com.test/array/jsonschema/1-0-0", | "data": { | "field" : ["hello", "world"], - | "field2" : null + | "field2" : null, + | "field3": null | } | } | ] @@ -353,6 +354,11 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), jsonPath = "$.field2" ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field3.a" + ), PiiJson( fieldMutator = JsonMutators("unstruct_event"), schemaCriterion = SchemaCriterion("com.mailgun", "message_clicked", "jsonschema", 1, 0, 0), @@ -424,14 +430,22 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher )) and (unstructEventJ.get[String]("myVar2") must beRight("awesome")) - val third = contextJThirdElement + val third = (contextJThirdElement .downField("data") .get[List[String]]("field") must beRight( List[String]("b62f3a2475ac957009088f9b8ab77ceb7b4ed7c5a6fd920daa204a1953334acb", "8ad32723b7435cbf535025e519cc94dbf1568e17ced2aeb4b9e7941f6346d7d0" ) - ) + )) and + (contextJThirdElement + .downField("data") + .downField("field2") + .focus must beSome.like { case json => json.isNull }) and + (contextJThirdElement + .downField("data") + .downField("field3") + .focus must beSome.like { case json => json.isNull }) first and second and third } From 2e1986681607b6e92671a5f454fd16226afbd692 Mon Sep 17 00:00:00 2001 From: Benjamin Benoist Date: Thu, 17 Sep 2020 12:02:10 +0200 Subject: [PATCH 22/38] Common: replace deprecated constructors in EnrichedEventSpec (close #354) --- .../outputs/EnrichedEventSpec.scala | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala index 5c118448c..e36a02b43 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala @@ -56,14 +56,14 @@ class EnrichedEventSpec extends Specification { testField(_.user_ipaddress = "user_ipaddress", _.user_ipaddress) testField(_.user_fingerprint = "user_fingerprint", _.user_fingerprint) testField(_.domain_userid = "domain_userid", _.domain_userid) - testField(_.domain_sessionidx = new JInteger(0), _.domain_sessionidx) + testField(_.domain_sessionidx = JInteger.valueOf(0), _.domain_sessionidx) testField(_.network_userid = "network_userid", _.network_userid) testField(_.geo_country = "geo_country", _.geo_country) testField(_.geo_region = "geo_region", _.geo_region) testField(_.geo_city = "geo_city", _.geo_city) testField(_.geo_zipcode = "geo_zipcode", _.geo_zipcode) - testField(_.geo_latitude = new JFloat(0.0), _.geo_latitude) - testField(_.geo_longitude = new JFloat(0.0), _.geo_longitude) + testField(_.geo_latitude = JFloat.valueOf("0.0"), _.geo_latitude) + testField(_.geo_longitude = JFloat.valueOf("0.0"), _.geo_longitude) testField(_.geo_region_name = "geo_region_name", _.geo_region_name) testField(_.ip_isp = "ip_isp", _.ip_isp) testField(_.ip_organization = "ip_organization", _.ip_organization) @@ -74,13 +74,13 @@ class EnrichedEventSpec extends Specification { testField(_.page_referrer = "page_referrer", _.page_referrer) testField(_.page_urlscheme = "page_urlscheme", _.page_urlscheme) testField(_.page_urlhost = "page_urlhost", _.page_urlhost) - testField(_.page_urlport = new JInteger(0), _.page_urlport) + testField(_.page_urlport = JInteger.valueOf(0), _.page_urlport) testField(_.page_urlpath = "page_urlpath", _.page_urlpath) testField(_.page_urlquery = "page_urlquery", _.page_urlquery) testField(_.page_urlfragment = "page_urlfragment", _.page_urlfragment) testField(_.refr_urlscheme = "refr_urlscheme", _.refr_urlscheme) testField(_.refr_urlhost = "refr_urlhost", _.refr_urlhost) - testField(_.refr_urlport = new JInteger(0), _.refr_urlport) + testField(_.refr_urlport = JInteger.valueOf(0), _.refr_urlport) testField(_.refr_urlpath = "refr_urlpath", _.refr_urlpath) testField(_.refr_urlquery = "refr_urlquery", _.refr_urlquery) testField(_.refr_urlfragment = "refr_urlfragment", _.refr_urlfragment) @@ -112,11 +112,11 @@ class EnrichedEventSpec extends Specification { testField(_.ti_name = "ti_name", _.ti_name) testField(_.ti_category = "ti_category", _.ti_category) testField(_.ti_price = "ti_price", _.ti_price) - testField(_.ti_quantity = new JInteger(0), _.ti_quantity) - testField(_.pp_xoffset_min = new JInteger(0), _.pp_xoffset_min) - testField(_.pp_xoffset_max = new JInteger(0), _.pp_xoffset_max) - testField(_.pp_yoffset_min = new JInteger(0), _.pp_yoffset_min) - testField(_.pp_yoffset_max = new JInteger(0), _.pp_yoffset_max) + testField(_.ti_quantity = JInteger.valueOf(0), _.ti_quantity) + testField(_.pp_xoffset_min = JInteger.valueOf(0), _.pp_xoffset_min) + testField(_.pp_xoffset_max = JInteger.valueOf(0), _.pp_xoffset_max) + testField(_.pp_yoffset_min = JInteger.valueOf(0), _.pp_yoffset_min) + testField(_.pp_yoffset_max = JInteger.valueOf(0), _.pp_yoffset_max) testField(_.useragent = "useragent", _.useragent) testField(_.br_name = "br_name", _.br_name) testField(_.br_family = "br_family", _.br_family) @@ -124,30 +124,30 @@ class EnrichedEventSpec extends Specification { testField(_.br_type = "br_type", _.br_type) testField(_.br_renderengine = "br_renderengine", _.br_renderengine) testField(_.br_lang = "br_lang", _.br_lang) - testField(_.br_features_pdf = new JByte(Byte.MinValue), _.br_features_pdf) - testField(_.br_features_flash = new JByte(Byte.MinValue), _.br_features_flash) - testField(_.br_features_java = new JByte(Byte.MinValue), _.br_features_java) - testField(_.br_features_director = new JByte(Byte.MinValue), _.br_features_director) - testField(_.br_features_quicktime = new JByte(Byte.MinValue), _.br_features_quicktime) - testField(_.br_features_realplayer = new JByte(Byte.MinValue), _.br_features_realplayer) - testField(_.br_features_windowsmedia = new JByte(Byte.MinValue), _.br_features_windowsmedia) - testField(_.br_features_gears = new JByte(Byte.MinValue), _.br_features_gears) - testField(_.br_features_silverlight = new JByte(Byte.MinValue), _.br_features_silverlight) - testField(_.br_cookies = new JByte(Byte.MinValue), _.br_cookies) + testField(_.br_features_pdf = JByte.valueOf(Byte.MinValue), _.br_features_pdf) + testField(_.br_features_flash = JByte.valueOf(Byte.MinValue), _.br_features_flash) + testField(_.br_features_java = JByte.valueOf(Byte.MinValue), _.br_features_java) + testField(_.br_features_director = JByte.valueOf(Byte.MinValue), _.br_features_director) + testField(_.br_features_quicktime = JByte.valueOf(Byte.MinValue), _.br_features_quicktime) + testField(_.br_features_realplayer = JByte.valueOf(Byte.MinValue), _.br_features_realplayer) + testField(_.br_features_windowsmedia = JByte.valueOf(Byte.MinValue), _.br_features_windowsmedia) + testField(_.br_features_gears = JByte.valueOf(Byte.MinValue), _.br_features_gears) + testField(_.br_features_silverlight = JByte.valueOf(Byte.MinValue), _.br_features_silverlight) + testField(_.br_cookies = JByte.valueOf(Byte.MinValue), _.br_cookies) testField(_.br_colordepth = "br_colordepth", _.br_colordepth) - testField(_.br_viewwidth = new JInteger(0), _.br_viewwidth) - testField(_.br_viewheight = new JInteger(0), _.br_viewheight) + testField(_.br_viewwidth = JInteger.valueOf(0), _.br_viewwidth) + testField(_.br_viewheight = JInteger.valueOf(0), _.br_viewheight) testField(_.os_name = "os_name", _.os_name) testField(_.os_family = "os_family", _.os_family) testField(_.os_manufacturer = "os_manufacturer", _.os_manufacturer) testField(_.os_timezone = "os_timezone", _.os_timezone) testField(_.dvce_type = "dvce_type", _.dvce_type) - testField(_.dvce_ismobile = new JByte(Byte.MinValue), _.dvce_ismobile) - testField(_.dvce_screenwidth = new JInteger(0), _.dvce_screenwidth) - testField(_.dvce_screenheight = new JInteger(0), _.dvce_screenheight) + testField(_.dvce_ismobile = JByte.valueOf(Byte.MinValue), _.dvce_ismobile) + testField(_.dvce_screenwidth = JInteger.valueOf(0), _.dvce_screenwidth) + testField(_.dvce_screenheight = JInteger.valueOf(0), _.dvce_screenheight) testField(_.doc_charset = "doc_charset", _.doc_charset) - testField(_.doc_width = new JInteger(0), _.doc_width) - testField(_.doc_height = new JInteger(0), _.doc_height) + testField(_.doc_width = JInteger.valueOf(0), _.doc_width) + testField(_.doc_height = JInteger.valueOf(0), _.doc_height) testField(_.tr_currency = "tr_currency", _.tr_currency) testField(_.tr_total_base = "tr_total_base", _.tr_total_base) testField(_.tr_tax_base = "tr_tax_base", _.tr_tax_base) From 8d4726876f8daedc416e4009d1a45bbc02bc9076 Mon Sep 17 00:00:00 2001 From: Dilyan Damyanov Date: Tue, 15 Sep 2020 12:32:48 +0100 Subject: [PATCH 23/38] Common: fix PII enrichment adding empty objects instead of missing properties (close #351) --- .../pii/PiiPseudonymizerEnrichment.scala | 26 +- .../com.acme/email_sent/jsonschema/1-0-0 | 3 + .../com.acme/email_sent/jsonschema/2-0-0 | 24 ++ .../schemas/com.test/array/jsonschema/1-0-0 | 4 + .../enrichments/EnrichmentManagerSpec.scala | 324 +++++++++++++++++- .../pii/PiiPseudonymizerEnrichmentSpec.scala | 92 +++-- 6 files changed, 440 insertions(+), 33 deletions(-) create mode 100644 modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala index 25f8286e4..1282281e8 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala @@ -133,6 +133,25 @@ object PiiPseudonymizerEnrichment extends ParseableEnrichment { .get(fieldName) .map(_.asRight) .getOrElse(s"The specified json field $fieldName is not supported".asLeft) + + /** Helper to remove fields that were wrongly added and are not in the original JSON. See #351. */ + private[pii] def removeAddedFields(hashed: Json, original: Json): Json = { + val fixedObject = for { + hashedFields <- hashed.asObject + originalFields <- original.asObject + newFields = hashedFields.toList.flatMap { + case (k, v) => originalFields(k).map(origV => (k, removeAddedFields(v, origV))) + } + } yield Json.fromFields(newFields) + + lazy val fixedArray = for { + hashedArr <- hashed.asArray + originalArr <- original.asArray + newArr = hashedArr.zip(originalArr).map { case (hashed, orig) => removeAddedFields(hashed, orig) } + } yield Json.fromValues(newArr) + + fixedObject.orElse(fixedArray).getOrElse(hashed) + } } /** @@ -204,7 +223,8 @@ final case class PiiJson( ) } .getOrElse((parsed, List.empty[JsonModifiedField])) - } yield (substituted.noSpaces, modifiedFields.toList)).getOrElse((null, List.empty)) + } yield (PiiPseudonymizerEnrichment.removeAddedFields(substituted, parsed).noSpaces, modifiedFields.toList)) + .getOrElse((null, List.empty)) /** Map context top fields with strategy if they match. */ private def mapContextTopFields(tuple: (String, Json), strategy: PiiStrategy): (String, (Json, List[JsonModifiedField])) = @@ -272,10 +292,6 @@ final case class PiiJson( jsonPath, new ScrambleMapFunction(strategy, modifiedFields, fieldMutator.fieldName, jsonPath, schema) ) - // make sure it is a structure preserving method, see #3636 - //val transformedJValue = JsonMethods.fromJsonNode(documentContext.json[JsonNode]()) - //val Diff(_, erroneouslyAdded, _) = jValue diff transformedJValue - //val Diff(_, withoutCruft, _) = erroneouslyAdded diff transformedJValue (jacksonToCirce(documentContext2.json[JsonNode]()), modifiedFields.toList) } } diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 index 087d4e6cd..18dd216f5 100644 --- a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 @@ -14,6 +14,9 @@ }, "emailAddress2": { "type": "string" + }, + "emailAddress3": { + "type": "string" } }, "required": ["emailAddress", "emailAddress2"], diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 new file mode 100644 index 000000000..eca4ca19d --- /dev/null +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 @@ -0,0 +1,24 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for acme stuff", + "self": { + "vendor": "com.acme", + "name": "email_sent", + "format": "jsonschema", + "version": "1-1-0" + }, + "type": "object", + "properties": { + "emailAddress": { + "type": "string" + }, + "emailAddress2": { + "type": "string" + }, + "emailAddress3": { + "type": ["string", "null"] + } + }, + "required": ["emailAddress", "emailAddress2"], + "additionalProperties": false +} diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 index 97e2490a3..b2310754d 100644 --- a/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 @@ -28,6 +28,10 @@ "type": "string" } } + }, + "field4": { + "type": "string", + "maxLength": 64 } }, "required": ["field"], diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index b76e01c12..7fd1a4289 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -17,21 +17,23 @@ package enrichments import cats.Id import cats.implicits._ import cats.data.NonEmptyList - import io.circe.literal._ - import org.joda.time.DateTime - import com.snowplowanalytics.snowplow.badrows._ -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} - +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} import loaders._ import adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii.{ + JsonMutators, + PiiJson, + PiiPseudonymizerEnrichment, + PiiStrategyPseudonymize +} import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent import utils.Clock._ import utils.ConversionUtils import enrichments.registry.{IabEnrichment, JavascriptScriptEnrichment, YauaaEnrichment} - +import org.apache.commons.codec.digest.DigestUtils import org.specs2.mutable.Specification import org.specs2.matcher.EitherMatchers @@ -87,7 +89,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "data": { "emailAddress": "hello@world.com", "emailAddress2": "foo@bar.org", - "emailAddress3": "foo@bar.org" + "unallowedAdditionalField": "foo@bar.org" } } }""" @@ -267,6 +269,314 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { enriched.value must beRight } + "emit an EnrichedEvent if a PII value that needs to be hashed is an empty string" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": "" + } + } + }""" + ) + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + val enriched = EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beRight + } + + "emit an EnrichedEvent if a PII value that needs to be hashed is null" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/2-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": null + } + } + }""" + ) + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + val enriched = EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beRight + } + + "fail to emit an EnrichedEvent if a PII value that needs to be hashed is an empty object" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": {} + } + } + }""" + ) + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + val enriched = EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beLeft + } + + "fail to emit an EnrichedEvent if a context PII value that needs to be hashed is an empty object" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": {} + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + }""" + ) + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + def enriched = + EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beLeft + } + + "fail to emit an EnrichedEvent if a PII value needs to be hashed in both co and ue and is invalid in one of them" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": {} + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": "" + } + } + }""" + ) + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ), + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + def enriched = + EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beLeft + } + "have a preference of 'ua' query string parameter over user agent of HTTP header" >> { val qs_ua = "Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0" val parameters = Map( diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala index 4837631f1..32afa53a5 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala @@ -56,6 +56,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher Hashing configured JSON fields in POJO should silently ignore unsupported types $e6 Hashing configured JSON and scalar fields in POJO emits a correct pii_transformation event $e7 Hashing configured JSON fields in POJO should not create new fields $e8 + removeAddedFields should remove fields added by PII enrichment $e9 """ def commonSetup(enrichmentReg: EnrichmentRegistry[Id]): List[Validated[BadRow, EnrichedEvent]] = { @@ -118,7 +119,8 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher | "data": { | "field" : ["hello", "world"], | "field2" : null, - | "field3": null + | "field3": null, + | "field4": "" | } | } | ] @@ -363,6 +365,11 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher fieldMutator = JsonMutators("unstruct_event"), schemaCriterion = SchemaCriterion("com.mailgun", "message_clicked", "jsonschema", 1, 0, 0), jsonPath = "$.ip" + ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field4" ) ), false, @@ -447,7 +454,12 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher .downField("field3") .focus must beSome.like { case json => json.isNull }) - first and second and third + // Test that empty string in Pii field gets hashed + val fourth = contextJThirdElement + .downField("data") + .get[String]("field4") must beRight("7a3477dad66e666bd203b834c54b6dfe8b546bdbc5283462ad14052abfb06600") + + first and second and third and fourth } size and validOut @@ -729,30 +741,68 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ).some ) val output = commonSetup(enrichmentReg) - val expected = new EnrichedEvent() - expected.app_id = "ads" - expected.user_id = "john@acme.com" - expected.user_ipaddress = "70.46.123.145" - expected.ip_domain = null - expected.user_fingerprint = "its_you_again!" - expected.geo_city = "Delray Beach" - expected.etl_tstamp = "1970-01-18 08:40:00.000" - expected.collector_tstamp = "2017-07-14 03:39:39.000" val size = output.size must_== 1 val validOut = output.head must beValid.like { case enrichedEvent => - val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") - val firstElem = contextJ.downArray.downField("data") - val secondElem = contextJ.downArray.right.downField("data") + val context = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data").downArray + val data = context.downField("data") - (firstElem.get[String]("emailAddress") must beRight( - "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6" - )) and - (firstElem.downField("data").get[String]("nonExistentEmailAddress") must beLeft) and - (firstElem.get[String]("emailAddress2") must beRight("bob@acme.com")) and - (secondElem.get[String]("emailAddress") must beRight("tim@acme.com")) and - (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) + val one = data.get[String]("emailAddress") must beRight("72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6") + val two = data.get[String]("emailAddress2") must beRight("bob@acme.com") + val three = data.downField("nonExistentEmailAddress").focus must beNone + + one and two and three } size and validOut } + + def e9 = { + val orig = json""" + { + "schema" : "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data" : [ + { + "schema" : "iglu:com.acme/email_sent/jsonschema/1-0-0", + "data" : { + "emailAddress" : "foo@bar.com", + "emailAddress2" : "bob@acme.com" + } + } + ] + } + """ + + val hashed = json""" + { + "schema" : "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data" : [ + { + "schema" : "iglu:com.acme/email_sent/jsonschema/1-0-0", + "data" : { + "emailAddress" : "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6", + "emailAddress2" : "bob@acme.com", + "nonExistentEmailAddress" : {} + } + } + ] + } + """ + + val expected = json""" + { + "schema" : "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data" : [ + { + "schema" : "iglu:com.acme/email_sent/jsonschema/1-0-0", + "data" : { + "emailAddress" : "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6", + "emailAddress2" : "bob@acme.com" + } + } + ] + } + """ + + PiiPseudonymizerEnrichment.removeAddedFields(hashed, orig) must beEqualTo(expected) + } } From 295221e0d4d0ee5dc0b55492ed5c3ccfc99c37a0 Mon Sep 17 00:00:00 2001 From: Benjamin Benoist Date: Thu, 10 Sep 2020 17:35:11 +0200 Subject: [PATCH 24/38] Common: handle empty query string parameters in adapters (close #341) --- .../common/adapters/RawEvent.scala | 2 +- .../common/adapters/registry/Adapter.scala | 28 +++++----- .../registry/CloudfrontAccessLogAdapter.scala | 4 +- .../registry/GoogleAnalyticsAdapter.scala | 36 ++++++++----- .../adapters/registry/IgluAdapter.scala | 12 ++--- .../adapters/registry/MailchimpAdapter.scala | 16 +++--- .../adapters/registry/MailgunAdapter.scala | 1 + .../adapters/registry/MandrillAdapter.scala | 1 + .../adapters/registry/OlarkAdapter.scala | 1 + .../adapters/registry/PingdomAdapter.scala | 30 +++++------ .../adapters/registry/RemoteAdapter.scala | 2 +- .../adapters/registry/UnbounceAdapter.scala | 1 + .../registry/UrbanAirshipAdapter.scala | 2 +- .../registry/snowplow/RedirectAdapter.scala | 53 ++++++++++--------- .../registry/snowplow/Tp2Adapter.scala | 4 +- .../enrichments/EnrichmentManager.scala | 4 +- .../common/enrichments/Transform.scala | 2 +- .../registry/EventFingerprintEnrichment.scala | 6 +-- .../common/utils/ConversionUtils.scala | 4 +- .../common/utils/JsonUtils.scala | 16 +++--- .../package.scala | 2 +- .../SpecHelpers.scala | 4 ++ .../adapters/registry/AdapterSpec.scala | 20 +++---- .../registry/CallrailAdapterSpec.scala | 6 +-- .../CloudfrontAccessLogAdapterSpec.scala | 20 +++---- .../registry/GoogleAnalyticsAdapterSpec.scala | 28 +++++----- .../registry/HubSpotAdapterSpec.scala | 4 +- .../adapters/registry/IgluAdapterSpec.scala | 22 ++++---- .../registry/MailchimpAdapterSpec.scala | 19 +++---- .../registry/MailgunAdapterSpec.scala | 12 +++-- .../registry/MandrillAdapterSpec.scala | 20 +++---- .../registry/MarketoAdapterSpec.scala | 4 +- .../adapters/registry/OlarkAdapterSpec.scala | 6 ++- .../registry/PagerdutyAdapterSpec.scala | 4 +- .../registry/PingdomAdapterSpec.scala | 4 +- .../adapters/registry/RemoteAdapterSpec.scala | 4 +- .../registry/SendgridAdapterSpec.scala | 4 +- .../registry/StatusGatorAdapterSpec.scala | 4 +- .../registry/UnbounceAdapterSpec.scala | 4 +- .../registry/UrbanAirshipAdapterSpec.scala | 8 +-- .../adapters/registry/VeroAdapterSpec.scala | 20 +++---- .../snowplow/SnowplowAdapterSpec.scala | 50 ++++++++++++----- .../enrichments/EnrichmentManagerSpec.scala | 26 ++++----- .../EventFingerprintEnrichmentSpec.scala | 22 ++++---- .../utils/IgluUtilsSpec.scala | 2 +- .../utils/JsonUtilsSpec.scala | 18 +++---- 46 files changed, 322 insertions(+), 240 deletions(-) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/RawEvent.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/RawEvent.scala index e16bd5517..8a8c7e1b0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/RawEvent.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/RawEvent.scala @@ -38,7 +38,7 @@ object RawEvent { RE( re.api.vendor, re.api.version, - re.parameters.toList.map { case (k, v) => NVP(k, Option(v)) }, + re.parameters.toList.map { case (k, v) => NVP(k, v) }, re.contentType, re.source.name, re.source.encoding, diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala index dd4639258..a18d48fd8 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala @@ -126,8 +126,8 @@ trait Adapter { * @param parameters A NonEmptyList of name:value pairs * @return the name:value pairs in Map form */ - protected[registry] def toMap(parameters: List[NameValuePair]): Map[String, String] = - parameters.map(p => p.getName -> p.getValue).toMap + protected[registry] def toMap(parameters: List[NameValuePair]): Map[String, Option[String]] = + parameters.map(p => p.getName -> Option(p.getValue)).toMap /** * Convenience function to build a simple formatter of RawEventParameters. @@ -169,10 +169,10 @@ trait Adapter { val params = formatter(parameters - ("nuid", "aid", "cv", "p")) val json = toUnstructEvent(SelfDescribingData(schema, params)).noSpaces Map( - "tv" -> tracker, - "e" -> "ue", - "p" -> parameters.getOrElse("p", platform), // Required field - "ue_pr" -> json + "tv" -> Option(tracker), + "e" -> Some("ue"), + "p" -> parameters.getOrElse("p", Option(platform)), // Required field + "ue_pr" -> Option(json) ) ++ parameters.filterKeys(AcceptedQueryParameters) } @@ -224,10 +224,10 @@ trait Adapter { ): RawEventParameters = { val json = toUnstructEvent(SelfDescribingData(schema, eventJson.asJson)).noSpaces Map( - "tv" -> tracker, - "e" -> "ue", - "p" -> qsParams.getOrElse("p", platform), // Required field - "ue_pr" -> json + "tv" -> Option(tracker), + "e" -> Some("ue"), + "p" -> qsParams.getOrElse("p", Option(platform)), // Required field + "ue_pr" -> Option(json) ) ++ qsParams.filterKeys(AcceptedQueryParameters) } @@ -253,10 +253,10 @@ trait Adapter { val json = toUnstructEvent(SelfDescribingData(schema, eventJson)).noSpaces Map( - "tv" -> tracker, - "e" -> "ue", - "p" -> qsParams.getOrElse("p", platform), // Required field - "ue_pr" -> json + "tv" -> Option(tracker), + "e" -> Some("ue"), + "p" -> qsParams.getOrElse("p", Option(platform)), // Required field + "ue_pr" -> Option(json) ) ++ qsParams.filterKeys(AcceptedQueryParameters) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/CloudfrontAccessLogAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/CloudfrontAccessLogAdapter.scala index 6b99367dd..a5fe05b77 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/CloudfrontAccessLogAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/CloudfrontAccessLogAdapter.scala @@ -135,9 +135,9 @@ object CloudfrontAccessLogAdapter extends Adapter { case "" => None case nonempty => nonempty.some } - val qsParams: Map[String, String] = schemaCompatibleFields(8) match { + val qsParams: Map[String, Option[String]] = schemaCompatibleFields(8) match { case "" => Map() - case url => Map("url" -> url) + case url => Map("url" -> Option(url)) } val userAgent = schemaCompatibleFields(9) match { case "" => None diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/GoogleAnalyticsAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/GoogleAnalyticsAdapter.scala index 9c87b8e5c..b5578251b 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/GoogleAnalyticsAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/GoogleAnalyticsAdapter.scala @@ -503,10 +503,14 @@ object GoogleAnalyticsAdapter extends Adapter { NonEmptyList .one(FailureDetails.AdapterFailure.InputData("body", bodyPart.some, e)) ) - hitType <- params.get("t").toRight { - val msg = "no t parameter provided: cannot determine hit type" - NonEmptyList - .one(FailureDetails.AdapterFailure.InputData("body", bodyPart.some, msg)) + hitType <- params.get("t") match { + case Some(Some(t)) => Right(t) + case _ => + val msg = "no t parameter provided: cannot determine hit type" + Left( + NonEmptyList + .one(FailureDetails.AdapterFailure.InputData("body", bodyPart.some, msg)) + ) } // direct mappings mappings = translatePayload(params, directMappings(hitType)) @@ -542,9 +546,9 @@ object GoogleAnalyticsAdapter extends Adapter { case (s, d) if hitType != PageViewHitType || s != unstructEventData(PageViewHitType).schemaKey => SelfDescribingData(s, d.asJson) } - val contextParam: Map[String, String] = + val contextParam: Map[String, Option[String]] = if (contextJsons.isEmpty) Map.empty - else Map("co" -> toContexts(contextJsons).noSpaces) + else Map("co" -> Some(toContexts(contextJsons).noSpaces)) (trTable, schema, contextParam) }.toEither payload <- translatePayload(params, result._1) @@ -553,7 +557,7 @@ object GoogleAnalyticsAdapter extends Adapter { RawEvent( api = payload.api, parameters = result._3 ++ mappings ++ - Map("e" -> "ue", "ue_pr" -> unstructEvent, "tv" -> Protocol, "p" -> "srv"), + Map("e" -> Some("ue"), "ue_pr" -> Some(unstructEvent), "tv" -> Some(Protocol), "p" -> Some("srv")), contentType = payload.contentType, source = payload.source, context = payload.context @@ -569,10 +573,11 @@ object GoogleAnalyticsAdapter extends Adapter { * @return a translated params */ private def translatePayload( - originalParams: Map[String, String], + originalParams: Map[String, Option[String]], translationTable: Map[String, KVTranslation] ): Either[FailureDetails.AdapterFailure, Map[String, FieldType]] = { val m = originalParams + .collect { case (k, Some(v)) => (k, v) } .foldLeft(Map.empty[String, Either[FailureDetails.AdapterFailure, FieldType]]) { case (m, (fieldName, value)) => translationTable @@ -592,8 +597,11 @@ object GoogleAnalyticsAdapter extends Adapter { * @param translationTable mapping between original params and the wanted format * @return a translated params */ - private def translatePayload(originalParams: Map[String, String], translationTable: Map[String, String]): Map[String, String] = - originalParams.foldLeft(Map.empty[String, String]) { + private def translatePayload( + originalParams: Map[String, Option[String]], + translationTable: Map[String, String] + ): Map[String, Option[String]] = + originalParams.foldLeft(Map.empty[String, Option[String]]) { case (m, (fieldName, value)) => translationTable .get(fieldName) @@ -610,11 +618,12 @@ object GoogleAnalyticsAdapter extends Adapter { * @return a map containing the discovered contexts keyed by schema */ private def buildContexts( - originalParams: Map[String, String], + originalParams: Map[String, Option[String]], referenceTable: Map[SchemaKey, Map[String, KVTranslation]], fieldToSchemaMap: Map[String, SchemaKey] ): ValidatedNel[FailureDetails.AdapterFailure, Map[SchemaKey, Map[String, FieldType]]] = { val m = originalParams + .collect { case (k, Some(v)) => (k, v) } .foldLeft( Map.empty[SchemaKey, Map[String, ValidatedNel[FailureDetails.AdapterFailure, FieldType]]] ) { @@ -648,7 +657,7 @@ object GoogleAnalyticsAdapter extends Adapter { * @return a map containing the composite contexts keyed by schema */ private def buildCompositeContexts( - originalParams: Map[String, String], + originalParams: Map[String, Option[String]], referenceTable: List[MPData], schemasWithCU: List[SchemaKey], nrCompFieldsPerSchema: Map[SchemaKey, Int], @@ -657,6 +666,7 @@ object GoogleAnalyticsAdapter extends Adapter { for { // composite params have digits in their key composite <- originalParams + .collect { case (k, Some(v)) => (k, v) } .filterKeys(k => k.exists(_.isDigit)) .asRight brokenDown <- composite.toList.sorted.map { @@ -693,7 +703,7 @@ object GoogleAnalyticsAdapter extends Adapter { case (k, m) => val values = transpose(m.values.map(_.toList).toList) k -> (originalParams.get("cu") match { - case Some(currency) if schemasWithCU.contains(k) => + case Some(Some(currency)) if schemasWithCU.contains(k) => values .map(m.keys zip _) .map(l => ("currencyCode" -> StringType(currency) :: l.toList).toMap) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala index 4c5c9560b..eeeaa6f12 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala @@ -69,7 +69,7 @@ object IgluAdapter extends Adapter { ] = { val _ = client val params = toMap(payload.querystring) - (params.get("schema"), payload.body, payload.contentType) match { + (params.get("schema").flatten, payload.body, payload.contentType) match { case (_, Some(_), None) => val msg = s"expected one of $contentTypesStr" Monad[F].pure( @@ -104,7 +104,7 @@ object IgluAdapter extends Adapter { payload: CollectorPayload, body: String, contentType: String, - params: Map[String, String] + params: Map[String, Option[String]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = contentType match { case contentTypes._1 => sdJsonBodyToEvent(payload, body, params) @@ -125,7 +125,7 @@ object IgluAdapter extends Adapter { private[registry] def sdJsonBodyToEvent( payload: CollectorPayload, body: String, - params: Map[String, String] + params: Map[String, Option[String]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = JsonUtils.extractJson(body) match { case Right(parsed) => @@ -166,7 +166,7 @@ object IgluAdapter extends Adapter { private[registry] def payloadToEventWithSchema( payload: CollectorPayload, schemaUri: String, - params: Map[String, String] + params: Map[String, Option[String]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = SchemaKey.fromUri(schemaUri) match { case Left(parseError) => @@ -224,7 +224,7 @@ object IgluAdapter extends Adapter { payload: CollectorPayload, body: String, schemaUri: SchemaKey, - params: Map[String, String] + params: Map[String, Option[String]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = { def buildRawEvent(e: Json): RawEvent = RawEvent( @@ -271,7 +271,7 @@ object IgluAdapter extends Adapter { payload: CollectorPayload, body: String, schemaUri: SchemaKey, - params: Map[String, String] + params: Map[String, Option[String]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = (for { bodyMap <- ConversionUtils diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailchimpAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailchimpAdapter.scala index f406a2d3f..187302896 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailchimpAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailchimpAdapter.scala @@ -101,9 +101,11 @@ object MailchimpAdapter extends Adapter { params <- ConversionUtils .parseUrlEncodedForm(body) .leftMap(e => FailureDetails.AdapterFailure.InputData("body", body.some, e)) - eventType <- params.get("type").toRight { - val msg = "no `type` parameter provided: cannot determine event type" - FailureDetails.AdapterFailure.InputData("body", body.some, msg) + eventType <- params.get("type") match { + case Some(Some(typE)) => Right(typE) + case _ => + val msg = "no `type` parameter provided: cannot determine event type" + Left(FailureDetails.AdapterFailure.InputData("body", body.some, msg)) } schema <- lookupSchema(eventType.some, EventSchemaMap) allParams = toMap(payload.querystring) ++ reformatParameters(params) @@ -130,7 +132,7 @@ object MailchimpAdapter extends Adapter { */ private[registry] def toJsons(parameters: RawEventParameters): List[(String, Json)] = for { - (k, v) <- parameters.toList + (k, v) <- parameters.toList.collect { case (k, Some(v)) => (k, v) } } yield toNestedJson(toKeys(k), v) /** @@ -180,8 +182,8 @@ object MailchimpAdapter extends Adapter { */ private[registry] def reformatParameters(parameters: RawEventParameters): RawEventParameters = parameters.get("fired_at") match { - case Some(firedAt) => - parameters.updated("fired_at", JU.toJsonSchemaDateTime(firedAt, MailchimpDateTimeFormat)) - case None => parameters + case Some(Some(firedAt)) => + parameters.updated("fired_at", Some((JU.toJsonSchemaDateTime(firedAt, MailchimpDateTimeFormat)))) + case _ => parameters } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailgunAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailgunAdapter.scala index 1d04a2bd7..aa50b50b0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailgunAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MailgunAdapter.scala @@ -111,6 +111,7 @@ object MailgunAdapter extends Adapter { toMap( URLEncodedUtils.parse(URI.create("http://localhost/?" + body), UTF_8).asScala.toList ) + .collect { case (k, Some(v)) => (k, v) } ) } match { case TF(e) => diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MandrillAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MandrillAdapter.scala index 23cd49cd1..89ee8c443 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MandrillAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/MandrillAdapter.scala @@ -140,6 +140,7 @@ object MandrillAdapter extends Adapter { for { bodyMap <- ConversionUtils .parseUrlEncodedForm(rawEventString) + .map(_.collect { case (k, Some(v)) => (k, v) }) .leftMap(e => FailureDetails.AdapterFailure.InputData("body", rawEventString.some, e)) res <- bodyMap match { case map if map.size != 1 => diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/OlarkAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/OlarkAdapter.scala index 7b8bca26d..62123b71f 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/OlarkAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/OlarkAdapter.scala @@ -106,6 +106,7 @@ object OlarkAdapter extends Adapter { toMap( URLEncodedUtils.parse(URI.create("http://localhost/?" + body), UTF_8).asScala.toList ) + .collect { case (k, Some(v)) => (k, v) } } match { case TF(e) => val msg = s"could not parse body: ${JU.stripInstanceEtc(e.getMessage).orNull}" diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/PingdomAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/PingdomAdapter.scala index 4d689ea2e..2e72871e6 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/PingdomAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/PingdomAdapter.scala @@ -76,16 +76,7 @@ object PingdomAdapter extends Adapter { case Left(f) => Monad[F].pure(f.invalid) case Right(s) => s.get("message") match { - case None => - val msg = "no `message` parameter provided" - val formattedQS = s.map { case (k, v) => s"$k=$v" }.mkString("&") - val failure = FailureDetails.AdapterFailure.InputData( - "querystring", - formattedQS.some, - msg - ) - Monad[F].pure(failure.invalidNel) - case Some(event) => + case Some(Some(event)) => Monad[F].pure((for { parsedEvent <- JsonUtils .extractJson(event) @@ -117,6 +108,15 @@ object PingdomAdapter extends Adapter { ) ) }).toValidatedNel) + case _ => + val msg = "no `message` parameter provided" + val formattedQS = s.map { case (k, v) => s"$k=${v.getOrElse("null")}" }.mkString("&") + val failure = FailureDetails.AdapterFailure.InputData( + "querystring", + formattedQS.some, + msg + ) + Monad[F].pure(failure.invalidNel) } } } @@ -133,10 +133,10 @@ object PingdomAdapter extends Adapter { */ private[registry] def reformatMapParams( params: List[NameValuePair] - ): Either[NonEmptyList[FailureDetails.AdapterFailure], Map[String, String]] = { - val formatted = params.map { value => - (value.getName, value.getValue) match { - case (k, PingdomValueRegex(v)) => + ): Either[NonEmptyList[FailureDetails.AdapterFailure], Map[String, Option[String]]] = { + val formatted = params.map { nvp => + (nvp.getName, Option(nvp.getValue)) match { + case (k, Some(PingdomValueRegex(v))) => FailureDetails.AdapterFailure .InputData(k, v.some, s"should not pass regex $PingdomValueRegex") .asLeft @@ -144,7 +144,7 @@ object PingdomAdapter extends Adapter { } } - val successes: List[(String, String)] = formatted.collect { case Right(s) => s } + val successes: List[(String, Option[String])] = formatted.collect { case Right(s) => s } val failures: List[FailureDetails.AdapterFailure] = formatted.collect { case Left(f) => f } (successes, failures) match { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala index 53a07e539..b34bcd9e4 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/RemoteAdapter.scala @@ -115,7 +115,7 @@ final case class RemoteAdapter( rawEvents = nonEmptyEvents.map { e => RawEvent( api = payload.api, - parameters = e, + parameters = e.map { case (k, v) => (k, Option(v)) }, contentType = payload.contentType, source = payload.source, context = payload.context diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UnbounceAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UnbounceAdapter.scala index bdf7b39b1..90474da98 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UnbounceAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UnbounceAdapter.scala @@ -97,6 +97,7 @@ object UnbounceAdapter extends Adapter { toMap( URLEncodedUtils.parse(URI.create("http://localhost/?" + body), UTF_8).asScala.toList ) + .collect { case (k, Some(v)) => (k, v) } } match { case TF(e) => val msg = s"could not parse body: ${JU.stripInstanceEtc(e.getMessage).orNull}" diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UrbanAirshipAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UrbanAirshipAdapter.scala index c6cb50cc7..7d8aabe52 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UrbanAirshipAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/UrbanAirshipAdapter.scala @@ -136,7 +136,7 @@ object UrbanAirshipAdapter extends Adapter { api = payload.api, parameters = toUnstructEventParams( TrackerVersion, - toMap(payload.querystring) ++ Map("ttm" -> toTtmFormat(tts), "eid" -> id), + toMap(payload.querystring) ++ Map("ttm" -> Option(toTtmFormat(tts)), "eid" -> Option(id)), schema, json, "srv" diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala index 83af6f59c..902302d17 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/RedirectAdapter.scala @@ -86,35 +86,26 @@ object RedirectAdapter extends Adapter { Monad[F].pure(failure.invalidNel) } else originalParams.get("u") match { - case None => - val msg = "missing `u` parameter: not a valid URI redirect" - val qs = originalParams.map(t => s"${t._1}=${t._2}").mkString("&") - val failure = - FailureDetails.TrackerProtocolViolation.InputData( - "querystring", - qs.some, - msg - ) - Monad[F].pure(failure.invalidNel) - case Some(u) => + case Some(Some(u)) => val json = buildUriRedirect(u) - val newParams: Either[FailureDetails.TrackerProtocolViolation, Map[String, String]] = - if (originalParams.contains("e")) { - // Already have an event so add the URI redirect as a context (more fiddly) - def newCo = Map("co" -> toContext(json).noSpaces) - (originalParams.get("cx"), originalParams.get("co")) match { - case (None, None) => newCo.asRight - case (None, Some(co)) if co == "" => newCo.asRight - case (None, Some(co)) => addToExistingCo(json, co).map(str => Map("co" -> str)) - case (Some(cx), _) => addToExistingCx(json, cx).map(str => Map("cx" -> str)) - } - } else - // Add URI redirect as an unstructured event - Map("e" -> "ue", "ue_pr" -> toUnstructEvent(json).noSpaces).asRight + val newParams: Either[FailureDetails.TrackerProtocolViolation, Map[String, Option[String]]] = + (if (originalParams.contains("e")) { + // Already have an event so add the URI redirect as a context (more fiddly) + def newCo = Map("co" -> toContext(json).noSpaces) + (originalParams.get("cx"), originalParams.get("co")) match { + case (None, None) => newCo.asRight + case (None, Some(Some(co))) if co == "" => newCo.asRight + case (None, Some(Some(co))) => addToExistingCo(json, co).map(str => Map("co" -> str)) + case (Some(Some(cx)), _) => addToExistingCx(json, cx).map(str => Map("cx" -> str)) + } + } else + // Add URI redirect as an unstructured event + Map("e" -> "ue", "ue_pr" -> toUnstructEvent(json).noSpaces).asRight) + .map(_.map { case (k, v) => (k, Option(v)) }) val fixedParams = Map( - "tv" -> TrackerVersion, - "p" -> originalParams.getOrElse("p", TrackerPlatform) // Required field + "tv" -> Some(TrackerVersion), + "p" -> originalParams.getOrElse("p", Some(TrackerPlatform)) // Required field ) Monad[F].pure((for { @@ -129,6 +120,16 @@ object RedirectAdapter extends Adapter { ) ) } yield ev).leftMap(e => NonEmptyList.one(e)).toValidated) + case _ => + val msg = "missing `u` parameter: not a valid URI redirect" + val qs = originalParams.map(t => s"${t._1}=${t._2.getOrElse("null")}").mkString("&") + val failure = + FailureDetails.TrackerProtocolViolation.InputData( + "querystring", + qs.some, + msg + ) + Monad[F].pure(failure.invalidNel) } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala index 6291851fb..f15b64a7f 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/snowplow/Tp2Adapter.scala @@ -133,11 +133,11 @@ object Tp2Adapter extends Adapter { FailureDetails.TrackerProtocolViolation ], NonEmptyList[RawEventParameters]] = { val events: Option[ - Vector[Vector[Validated[FailureDetails.TrackerProtocolViolation, (String, String)]]] + Vector[Vector[Validated[FailureDetails.TrackerProtocolViolation, (String, Option[String])]]] ] = for { topLevel <- instance.asArray fields <- topLevel.map(_.asObject).sequence - res = fields.map(_.toVector.map(toParameter)) + res = fields.map(_.toVector.map(toParameter).map(_.map { case (k, v) => (k, Some(v)) })) } yield res events match { diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala index 35c3a773b..04509e3c0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala @@ -633,11 +633,11 @@ object EnrichmentManager { def setEventFingerprint( event: EnrichedEvent, - sourceMap: Map[String, String], + parameters: RawEventParameters, eventFingerprint: Option[EventFingerprintEnrichment] ): Unit = eventFingerprint match { - case Some(efe) => event.event_fingerprint = efe.getEventFingerprint(sourceMap) + case Some(efe) => event.event_fingerprint = efe.getEventFingerprint(parameters) case _ => () } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala index d49e21356..9a4a6d7e1 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala @@ -35,7 +35,7 @@ object Transform { * @param enriched /!\ MUTABLE enriched event, mutated IN-PLACE /!\ */ private[enrichments] def transform(raw: RawEvent, enriched: EnrichedEvent): ValidatedNel[FailureDetails.EnrichmentFailure, Unit] = { - val sourceMap: SourceMap = raw.parameters + val sourceMap: SourceMap = raw.parameters.collect { case (k, Some(v)) => (k, v) } val firstPassTransform = enriched.transform(sourceMap, firstPassTransformMap) val secondPassTransform = enriched.transform(sourceMap, secondPassTransformMap) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala index 91696c001..df34a5621 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/EventFingerprintEnrichment.scala @@ -23,6 +23,7 @@ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey} import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.EventFingerprintConf import com.snowplowanalytics.snowplow.enrich.common.utils.CirceUtils +import com.snowplowanalytics.snowplow.enrich.common.`package`.RawEventParameters /** Lets us create an EventFingerprintEnrichment from a Json. */ object EventFingerprintEnrichment extends ParseableEnrichment { @@ -86,12 +87,11 @@ final case class EventFingerprintEnrichment(algorithm: String => String, exclude /** * Calculate an event fingerprint using all querystring fields except the excludedParameters - * @param parameterMap * @return Event fingerprint */ - def getEventFingerprint(parameterMap: Map[String, String]): String = { + def getEventFingerprint(parameters: RawEventParameters): String = { val builder = new StringBuilder - parameterMap.toList.sortWith(_._1 < _._1).foreach { + parameters.toList.collect { case (k, Some(v)) => (k, v) }.sortWith(_._1 < _._1).foreach { case (key, value) => if (!excludedParameters.contains(key)) { builder.append(key) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala index 454807efe..728108240 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala @@ -465,13 +465,13 @@ object ConversionUtils { def booleanToJByte(bool: Boolean): JByte = (if (bool) 1 else 0).toByte - def parseUrlEncodedForm(s: String): Either[String, Map[String, String]] = + def parseUrlEncodedForm(s: String): Either[String, Map[String, Option[String]]] = for { r <- Either .catchNonFatal(URLEncodedUtils.parse(URI.create("http://localhost/?" + s), UTF_8)) .leftMap(_.getMessage) nvps = r.asScala.toList - pairs = nvps.map(p => p.getName() -> p.getValue()) + pairs = nvps.map(p => p.getName() -> Option(p.getValue())) } yield pairs.toMap /** Extract valid IP (v4 or v6) address from a string */ diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala index 9bc32a1bc..0f124451d 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala @@ -117,19 +117,19 @@ object JsonUtils { */ def toJson( key: String, - value: String, + value: Option[String], bools: List[String], ints: List[String], dateTimes: DateTimeFields ): (String, Json) = { val v = (value, dateTimes) match { - case (null, _) => Json.Null - case ("", _) => Json.Null - case _ if bools.contains(key) => booleanToJson(value) - case _ if ints.contains(key) => integerToJson(value) - case (_, Some((nel, fmt))) if nel.toList.contains(key) => - Json.fromString(toJsonSchemaDateTime(value, fmt)) - case _ => Json.fromString(value) + case (Some(""), _) => Json.Null + case (None, _) => Json.Null + case (Some(bool), _) if bools.contains(key) => booleanToJson(bool) + case (Some(nb), _) if ints.contains(key) => integerToJson(nb) + case (Some(datetime), Some((nel, fmt))) if nel.toList.contains(key) => + Json.fromString(toJsonSchemaDateTime(datetime, fmt)) + case (Some(str), _) => Json.fromString(str) } (key, v) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/package.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/package.scala index 7366b2ac4..617a7c1b3 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/package.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/package.scala @@ -24,7 +24,7 @@ package object common { type EnrichmentMap = Map[String, Enrichment] /** Parameters inside of a raw event */ - type RawEventParameters = Map[String, String] + type RawEventParameters = Map[String, Option[String]] /** Parameters extracted from query string */ type QueryStringParameters = List[(String, Option[String])] diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala index 24e73c7bb..5df90e241 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/SpecHelpers.scala @@ -103,4 +103,8 @@ object SpecHelpers { .leftMap(err => s"Can't parse [$rawJson] as Json, error: [$err]") .flatMap(SelfDescribingData.parse[Json]) .leftMap(err => s"Can't parse Json [$rawJson] as as SelfDescribingData, error: [$err]") + + implicit class MapOps[A, B](underlying: Map[A, B]) { + def toOpt: Map[A, Option[B]] = underlying.map { case (a, b) => (a, Option(b)) } + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/AdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/AdapterSpec.scala index 6328a83a8..3fb7dbc7a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/AdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/AdapterSpec.scala @@ -84,13 +84,13 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { def e1 = { val pairs = toNameValuePairs("a" -> "1", "b" -> "2", "c" -> "3") - BaseAdapter.toMap(pairs) must_== Map("a" -> "1", "b" -> "2", "c" -> "3") + BaseAdapter.toMap(pairs) must_== Map("a" -> "1", "b" -> "2", "c" -> "3").toOpt } def e2 = { val params = BaseAdapter.toUnstructEventParams( "tv", - Map[String, String](), + Map.empty[String, Option[String]], SchemaKey("com.acme", "foo", "jsonschema", SchemaVer.Full(1, 0, 1)), _ => Json.fromJsonObject(JsonObject.empty), "app" @@ -100,7 +100,7 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { "e" -> "ue", "p" -> "app", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/foo/jsonschema/1-0-1","data":{}}}""" - ) + ).toOpt } def e3 = { @@ -112,7 +112,7 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { "eid" -> "321", "ttm" -> "2015-11-13T16:31:52.393Z", "url" -> "http://localhost" - ) + ).toOpt val params = BaseAdapter.toUnstructEventParams( "tv", shared, @@ -127,7 +127,7 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { "ttm" -> "2015-11-13T16:31:52.393Z", "url" -> "http://localhost", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/foo/jsonschema/1-0-1","data":{}}}""" - ) + ).toOpt } def e4 = { @@ -168,7 +168,7 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { def e7 = { val rawEvent = RawEvent( Shared.api, - Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv"), + Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv").toOpt, Shared.contentType.some, Shared.cljSource, Shared.context @@ -195,7 +195,7 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { def e8 = { val rawEvent = RawEvent( Shared.api, - Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv"), + Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv").toOpt, Shared.contentType.some, Shared.cljSource, Shared.context @@ -205,21 +205,21 @@ class AdapterSpec extends Specification with DataTables with ValidatedMatchers { val expected = NonEmptyList.of( RawEvent( Shared.api, - Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv"), + Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv").toOpt, Shared.contentType.some, Shared.cljSource, Shared.context ), RawEvent( Shared.api, - Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv"), + Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv").toOpt, Shared.contentType.some, Shared.cljSource, Shared.context ), RawEvent( Shared.api, - Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv"), + Map("tv" -> "com.adapter-v1", "e" -> "ue", "p" -> "srv").toOpt, Shared.contentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala index 702213bde..b063077eb 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CallrailAdapterSpec.scala @@ -52,8 +52,8 @@ class CallrailAdapterSpec extends Specification with DataTables with ValidatedMa "tv" -> "com.callrail-v1", "e" -> "ue", "cv" -> "clj-0.6.0-tom-0.0.4" - ) - val static = staticNoPlatform + ("p" -> "srv") + ).toOpt + val static = staticNoPlatform ++ Map("p" -> "srv").toOpt } def e1 = { @@ -148,7 +148,7 @@ class CallrailAdapterSpec extends Specification with DataTables with ValidatedMa NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson, "nuid" -> "-"), + Expected.static ++ Map("ue_pr" -> expectedJson, "nuid" -> "-").toOpt, None, Shared.source, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala index aac2f9a7a..0ebf505e0 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/CloudfrontAccessLogAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with ValidatedMatchers { val processor = Processor("CloudfrontAccessLogAdapterSpec", "v1") @@ -72,10 +74,10 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with "tv" -> "com.amazon.aws.cloudfront/wd_access_log", "e" -> "ue", "url" -> url - ) + ).toOpt val static = staticNoPlatform ++ Map( "p" -> "srv" - ) + ).toOpt } def e1 = { @@ -115,7 +117,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -166,7 +168,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -220,7 +222,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -275,7 +277,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -334,7 +336,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -394,7 +396,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context @@ -456,7 +458,7 @@ class CloudfrontAccessLogAdapterSpec extends Specification with DataTables with NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.source, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala index 2fd29dd21..2eb9f0785 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/GoogleAnalyticsAdapterSpec.scala @@ -29,6 +29,8 @@ import loaders._ import GoogleAnalyticsAdapter._ import utils.Clock._ +import SpecHelpers._ + class GoogleAnalyticsAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" @@ -66,7 +68,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali "tv" -> "com.google.analytics.measurement-protocol-v1", "e" -> "ue", "p" -> "srv" - ) + ).toOpt val hitContext = (hitType: String) => s""" |{ @@ -137,7 +139,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", |"data":[${hitContext("pageview")}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedJson, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedJson, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -167,7 +169,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"protocolVersion":"version"} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -193,7 +195,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"ipOverride":"ip"} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO, "ip" -> "ip") + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO, "ip" -> "ip").toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -229,7 +231,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali "ti_pr" -> "12.228", "ti_qu" -> "12", "ti_nm" -> "name" - ) + ).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -254,7 +256,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"documentHostName":"host"} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -287,7 +289,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali "co" -> expectedCO, "tr_cu" -> "EUR", "tr_id" -> "tr" - ) + ).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -315,7 +317,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"productIndex":42,"sku":"s","listIndex":12} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -340,7 +342,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"value":"dim","index":12} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -374,7 +376,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"sku":"s2","index":2} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -402,7 +404,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"data":{"index":12,"id":"id"} |}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedUE, "co" -> expectedCO).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } @@ -427,7 +429,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali |"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1", |"data":[${hitContext("pageview")}] |}""".stripMargin.replaceAll("[\n\r]", "") - val expectedParams = static ++ Map("ue_pr" -> expectedJson, "co" -> expectedCO) + val expectedParams = static ++ Map("ue_pr" -> expectedJson, "co" -> expectedCO).toOpt val event = RawEvent(api, expectedParams, None, source, context) actual must beValid(NonEmptyList.of(event, event)) } @@ -467,7 +469,7 @@ class GoogleAnalyticsAdapterSpec extends Specification with DataTables with Vali "ue_pr" -> expectedJson, "co" -> expectedCO, "ti_cu" -> "EUR" - ) + ).toOpt actual must beValid(NonEmptyList.one(RawEvent(api, expectedParams, None, source, context))) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala index 460b98f3e..0b886b59b 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/HubSpotAdapterSpec.scala @@ -26,6 +26,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class HubSpotAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" payloadBodyToEvents must return a Success list of event JSON's from a valid payload body $e1 @@ -91,7 +93,7 @@ class HubSpotAdapterSpec extends Specification with DataTables with ValidatedMat "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.hubspot/contact_creation/jsonschema/1-0-0","data":{"eventId":1,"subscriptionId":25458,"portalId":4737818,"occurredAt":"2018-10-10T04:23:19.845Z","attemptNumber":0,"objectId":123,"changeSource":"CRM","changeFlag":"NEW","appId":177698}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala index 2c776553d..623b0aa11 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/IgluAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class IgluAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents should return a NEL containing one RawEvent if the CloudFront querystring is minimally populated $e1 @@ -63,10 +65,10 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche val staticNoPlatform = Map( "tv" -> "com.snowplowanalytics.iglu-v1", "e" -> "ue" - ) + ).toOpt val static = staticNoPlatform ++ Map( "p" -> "app" - ) + ).toOpt } def e1 = { @@ -102,7 +104,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche NonEmptyList.one( RawEvent( Shared.api, - Expected.static ++ Map("ue_pr" -> expectedJson), + Expected.static ++ Map("ue_pr" -> expectedJson).toOpt, None, Shared.cfSource, Shared.context @@ -142,7 +144,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche Map( "ue_pr" -> json, "aid" -> "webhooks" - ) + ).toOpt } actual must beValid( @@ -187,7 +189,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "aid" -> "my webhook project", "cv" -> "clj-0.5.0-tom-0.0.4", "nuid" -> "" - ) + ).toOpt } actual must beValid( @@ -223,7 +225,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche NonEmptyList.one( RawEvent( Shared.api, - Expected.staticNoPlatform ++ Map("p" -> "mob", "ue_pr" -> expectedJson), + Expected.staticNoPlatform ++ Map("p" -> "mob", "ue_pr" -> expectedJson).toOpt, None, Shared.cfSource, Shared.context @@ -304,7 +306,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "mob", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/campaign/jsonschema/1-0-1","data":{"key":"value","everwets":"processed"}}}""" - ), + ).toOpt, "application/json".some, Shared.cljSource, Shared.context @@ -409,7 +411,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "mob", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/campaign/jsonschema/1-0-1","data":{"some_param":"foo"}}}""" - ), + ).toOpt, "application/json".some, Shared.cljSource, Shared.context @@ -490,7 +492,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/campaign/jsonschema/1-0-1","data":{"some_param":"foo","hello":"world"}}}""" - ), + ).toOpt, "application/x-www-form-urlencoded".some, Shared.cljSource, Shared.context @@ -525,7 +527,7 @@ class IgluAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "mob", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.acme/campaign/jsonschema/1-0-1","data":{"key":"value","everwets":"processed"}}}""" - ), + ).toOpt, "application/json".some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala index 990b33f79..17c880aa6 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailchimpAdapterSpec.scala @@ -30,6 +30,7 @@ import com.snowplowanalytics.snowplow.enrich.common.loaders._ import com.snowplowanalytics.snowplow.enrich.common.utils.Clock._ import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers._ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" @@ -83,7 +84,7 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM val map = Map( "data[merges][LNAME]" -> "Beemster", "data[merges][FNAME]" -> "Joshua" - ) + ).toOpt val expected = List( ("data", json"""{ "merges": { "LNAME": "Beemster" }}"""), ("data", json"""{ "merges": { "FNAME": "Joshua" }}""") @@ -109,14 +110,14 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM def e5 = "SPEC NAME" || "PARAMS" | "EXPECTED OUTPUT" | - "Return Updated Params" !! Map("type" -> "subscribe", "fired_at" -> "2014-10-22 13:50:00") ! Map( + "Return Updated Params" !! Map("type" -> "subscribe", "fired_at" -> "2014-10-22 13:50:00").toOpt ! Map( "type" -> "subscribe", "fired_at" -> "2014-10-22T13:50:00.000Z" - ) | - "Return Same Params" !! Map("type" -> "subscribe", "id" -> "some_id") ! Map( + ).toOpt | + "Return Same Params" !! Map("type" -> "subscribe", "id" -> "some_id").toOpt ! Map( "type" -> "subscribe", "id" -> "some_id" - ) |> { (_, params, expected) => + ).toOpt |> { (_, params, expected) => val actual = MailchimpAdapter.reformatParameters(params) actual mustEqual expected } @@ -152,7 +153,7 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -193,7 +194,7 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -226,7 +227,7 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailchimp-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -310,7 +311,7 @@ class MailchimpAdapterSpec extends Specification with DataTables with ValidatedM "p" -> "srv", "ue_pr" -> expectedJson, "nuid" -> "123" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala index 01209c1a9..fafb668b1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MailgunAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class MailgunAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a Success Nel if every event 'delivered' in the payload is successful $e1 @@ -90,7 +92,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -139,7 +141,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -189,7 +191,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -239,7 +241,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -286,7 +288,7 @@ class MailgunAdapterSpec extends Specification with DataTables with ValidatedMat val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.mailgun-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, Some("multipart/form-data; boundary=353d603f-eede-4b49-97ac-724fbc54ea3c"), Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala index 0996ccfca..30456c6cc 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MandrillAdapterSpec.scala @@ -25,6 +25,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class MandrillAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" payloadBodyToEvents must return a Success List[JValue] for a valid events string $e1 @@ -110,7 +112,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_sent/jsonschema/1-0-0","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"sent","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[]},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -122,7 +124,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_delayed/jsonschema/1-0-1","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"deferred","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa1","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","smtp_events":[{"size":0,"destination_ip":"127.0.0.1","diag":"451 4.3.5 Temporarily unavailable, try again later.","ts":"2013-04-04T21:31:51.000Z","source_ip":"127.0.0.1","type":"deferred"}],"clicks":[],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[]},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa1","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -134,7 +136,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_bounced/jsonschema/1-0-1","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"bounced","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa2","tags":["webhook-example"],"diag":"smtp;550 5.1.1 The email account that you tried to reach does not exist. Please try double-checking the recipient's email address for typos or unnecessary spaces.","ts":"2013-04-04T21:13:19.000Z","metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","bounce_description":"bad_mailbox","bgtools_code":10},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa2","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -146,7 +148,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_soft_bounced/jsonschema/1-0-1","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"soft-bounced","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa3","tags":["webhook-example"],"diag":"smtp;552 5.2.2 Over Quota","ts":"2013-04-04T21:13:19.000Z","metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","bounce_description":"mailbox_full","bgtools_code":22},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa3","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -158,7 +160,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_opened/jsonschema/1-0-1","data":{"ip":"127.0.0.1","location":{"city":"Oklahoma City","latitude":35.4675598145,"timezone":"-05:00","country":"United States","longitude":-97.5164337158,"country_short":"US","postal_code":"73101","region":"Oklahoma"},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa4","ts":"2014-11-06T09:49:26.000Z","user_agent":"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.1.8) Gecko/20100317 Postbox/1.1.3","msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"sent","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa4","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[{"ts":"2013-04-04T21:31:51.000Z","url":"http://mandrill.com"}],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[{"ts":"2013-04-04T21:31:51.000Z"}]},"user_agent_parsed":{"os_company_url":"http://www.apple.com/","os_family":"OS X","os_company":"Apple Computer, Inc.","os_url":"http://www.apple.com/osx/","ua_url":"http://www.postbox-inc.com/","ua_icon":"http://cdn.mandrill.com/img/email-client-icons/postbox.png","ua_version":"1.1.3","os_name":"OS X 10.6 Snow Leopard","ua_company":"Postbox, Inc.","ua_family":"Postbox","os_icon":"http://cdn.mandrill.com/img/email-client-icons/macosx.png","ua_company_url":"http://www.postbox-inc.com/","ua_name":"Postbox 1.1.3","type":"Email Client","mobile":false}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -170,7 +172,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_clicked/jsonschema/1-0-1","data":{"ip":"127.0.0.1","location":{"city":"Oklahoma City","latitude":35.4675598145,"timezone":"-05:00","country":"United States","longitude":-97.5164337158,"country_short":"US","postal_code":"73101","region":"Oklahoma"},"url":"http://mandrill.com","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa5","ts":"2014-11-06T09:49:26.000Z","user_agent":"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.1.8) Gecko/20100317 Postbox/1.1.3","msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"sent","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa5","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[{"ts":"2013-04-04T21:31:51.000Z","url":"http://mandrill.com"}],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[{"ts":"2013-04-04T21:31:51.000Z"}]},"user_agent_parsed":{"os_company_url":"http://www.apple.com/","os_family":"OS X","os_company":"Apple Computer, Inc.","os_url":"http://www.apple.com/osx/","ua_url":"http://www.postbox-inc.com/","ua_icon":"http://cdn.mandrill.com/img/email-client-icons/postbox.png","ua_version":"1.1.3","os_name":"OS X 10.6 Snow Leopard","ua_company":"Postbox, Inc.","ua_family":"Postbox","os_icon":"http://cdn.mandrill.com/img/email-client-icons/macosx.png","ua_company_url":"http://www.postbox-inc.com/","ua_name":"Postbox 1.1.3","type":"Email Client","mobile":false}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -182,7 +184,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_marked_as_spam/jsonschema/1-0-1","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"sent","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa6","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[{"ts":"2013-04-04T21:31:51.000Z","url":"http://mandrill.com"}],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[{"ts":"2013-04-04T21:31:51.000Z"}]},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa6","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -194,7 +196,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/recipient_unsubscribed/jsonschema/1-0-1","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"sent","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa7","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[{"ts":"2013-04-04T21:31:51.000Z","url":"http://mandrill.com"}],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[{"ts":"2013-04-04T21:31:51.000Z"}]},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa7","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -206,7 +208,7 @@ class MandrillAdapterSpec extends Specification with DataTables with ValidatedMa "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.mandrill/message_rejected/jsonschema/1-0-0","data":{"msg":{"_version":"exampleaaaaaaaaaaaaaaa","subject":"This an example webhook message","email":"example.webhook@mandrillapp.com","state":"rejected","_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa8","tags":["webhook-example"],"ts":"2013-04-04T21:13:19.000Z","clicks":[],"metadata":{"user_id":111},"sender":"example.sender@mandrillapp.com","opens":[]},"_id":"exampleaaaaaaaaaaaaaaaaaaaaaaaaa8","ts":"2014-11-06T09:49:26.000Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala index fa753f28d..4c848777a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/MarketoAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class MarketoAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a success for a valid "event" type payload body being passed $e1 @@ -64,7 +66,7 @@ class MarketoAdapterSpec extends Specification with DataTables with ValidatedMat "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.marketo/event/jsonschema/2-0-0","data":{"lead":{"first_name":"the hulk","acquisition_date":"2010-11-11T11:11:11.000Z","black_listed":false,"last_interesting_moment_date":"2018-09-26T20:26:40.000Z","created_at":"2018-06-16T11:23:58.000Z","updated_at":""},"name":"webhook for A","step":6,"campaign":{"id":987,"name":"triggered event"},"datetime":"2018-03-07T14:28:16.000Z","company":{"name":"iron man","notes":"the something dog leapt over the lazy fox"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala index bb24ed646..d50431194 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/OlarkAdapterSpec.scala @@ -27,6 +27,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a Success Nel if the transcript event in the payload is successful $e1 @@ -134,7 +136,7 @@ class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatch val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.olark-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.olark-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -198,7 +200,7 @@ class OlarkAdapterSpec extends Specification with DataTables with ValidatedMatch val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.olark-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.olark-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala index 61573c9d5..c0436b40c 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PagerdutyAdapterSpec.scala @@ -25,6 +25,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class PagerdutyAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" reformatParameters must return an updated JSON whereby all null Strings have been replaced by null $e1 @@ -127,7 +129,7 @@ class PagerdutyAdapterSpec extends Specification with DataTables with ValidatedM "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.pagerduty/incident/jsonschema/1-0-0","data":{"type":"trigger","data":{"incident":{"assigned_to_user":{"id":"P9L426X","name":"Yali Sassoon","email":"yali@snowplowanalytics.com","html_url":"https://snowplow.pagerduty.com/users/P9L426X"},"incident_key":"srv01/HTTP","trigger_summary_data":{"description":"FAILURE for production/HTTP on machine srv01.acme.com","client":"Sample Monitoring Service","client_url":"https://monitoring.service.com"},"last_status_change_by":null,"incident_number":139,"service":{"id":"PTHO4FF","name":"Webhooks Test","html_url":"https://snowplow.pagerduty.com/services/PTHO4FF","deleted_at":null},"trigger_details_html_url":"https://snowplow.pagerduty.com/incidents/P9WY9U9/log_entries/P5AWPTR","id":"P9WY9U9","assigned_to":[{"at":"2014-11-12T18:53:47Z","object":{"id":"P9L426X","name":"Yali Sassoon","email":"yali@snowplowanalytics.com","html_url":"https://snowplow.pagerduty.com/users/P9L426X","type":"user"}}],"number_of_escalations":0,"last_status_change_on":"2014-11-12T18:53:47Z","status":"triggered","escalation_policy":{"id":"P8ETVHU","name":"Default","deleted_at":null},"created_on":"2014-11-12T18:53:47+00:00","trigger_type":"trigger_svc_event","html_url":"https://snowplow.pagerduty.com/incidents/P9WY9U9"}},"id":"3c3e8ee0-6a9d-11e4-b3d5-22000ae31361","created_on":"2014-11-12T18:53:47Z"}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala index 2272bf37a..535e0404a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/PingdomAdapterSpec.scala @@ -29,6 +29,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class PingdomAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" reformatParameters should return either an updated JSON without the 'action' field or the same JSON $e1 @@ -84,7 +86,7 @@ class PingdomAdapterSpec extends Specification with DataTables with ValidatedMat "e" -> "ue", "p" -> "apps", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.pingdom/incident_assign/jsonschema/1-0-0","data":{"check":"1421338","checkname":"Webhooks_Test","host":"7eef51c2.ngrok.com","incidentid":3,"description":"down"}}}""" - ), + ).toOpt, None, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala index 4ca4ef5da..605af85f1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/RemoteAdapterSpec.scala @@ -42,6 +42,8 @@ import org.specs2.specification.BeforeAfter import loaders.CollectorPayload import utils.Clock._ +import SpecHelpers._ + class RemoteAdapterSpec extends Specification with ValidatedMatchers { def is = @@ -189,7 +191,7 @@ class RemoteAdapterSpec extends Specification with ValidatedMatchers { "e" -> "ue", "p" -> mockPlatform, "ue_pr" -> s"""{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:$mockSchemaVendor/$mockSchemaName/$mockSchemaFormat/$mockSchemaVersion","data":$evtJson}}""" - ), + ).toOpt, None, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala index 0e59562e2..7d4156550 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/SendgridAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.mutable.Specification import loaders._ import utils.Clock._ +import SpecHelpers._ + class SendgridAdapterSpec extends Specification with ValidatedMatchers { object Shared { val api = CollectorPayload.Api("com.sendgrid", "v3") @@ -459,7 +461,7 @@ class SendgridAdapterSpec extends Specification with ValidatedMatchers { "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson // NB this includes removing the "event" keypair as redundant - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala index a26af86ab..02f518144 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/StatusGatorAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class StatusGatorAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a Success Nel if every event in the payload is successful $e1 @@ -80,7 +82,7 @@ class StatusGatorAdapterSpec extends Specification with DataTables with Validate val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.statusgator-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.statusgator-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala index 12f2ae232..f9164d046 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UnbounceAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a Success Nel if the query string is valid $e1 @@ -109,7 +111,7 @@ class UnbounceAdapterSpec extends Specification with DataTables with ValidatedMa val expected = NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.unbounce-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.unbounce-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala index d88228647..5d4fdd9ca 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/UrbanAirshipAdapterSpec.scala @@ -113,7 +113,7 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { correctType must be equalTo (Right("CLOSE")) val items = actual.toList.head.toList - val sentSchema = parse(items.head.parameters("ue_pr")) + val sentSchema = parse(items.head.parameters("ue_pr").getOrElse("{}")) .leftMap(_.getMessage) .flatMap(_.hcursor.downField("data").get[String]("schema").leftMap(_.getMessage)) sentSchema must beRight("""iglu:com.urbanairship.connect/CLOSE/jsonschema/1-0-0""") @@ -193,7 +193,7 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { actual match { case Validated.Valid(successes) => val event = successes.head - parse(event.parameters("ue_pr")) must beRight(expectedUnstructEventJson) + parse(event.parameters("ue_pr").getOrElse("{}")) must beRight(expectedUnstructEventJson) case _ => ko("payload was not accepted") } } @@ -203,7 +203,7 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { case Validated.Valid(successes) => val event = successes.head // "occurred" field value in ms past epoch (2015-11-13T16:31:52.393Z) - event.parameters("ttm") must beEqualTo("1447432312393") + event.parameters("ttm") must beEqualTo(Some("1447432312393")) case _ => ko("payload was not populated") } } @@ -213,7 +213,7 @@ class UrbanAirshipAdapterSpec extends Specification with ValidatedMatchers { case Validated.Valid(successes) => val event = successes.head // id field value - event.parameters("eid") must beEqualTo("e3314efb-9058-dbaf-c4bb-b754fca73613") + event.parameters("eid") must beEqualTo(Some("e3314efb-9058-dbaf-c4bb-b754fca73613")) case _ => ko("payload was not populated") } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala index 980b19b84..e4de9aa76 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/VeroAdapterSpec.scala @@ -24,6 +24,8 @@ import org.specs2.matcher.{DataTables, ValidatedMatchers} import loaders._ import utils.Clock._ +import SpecHelpers._ + class VeroAdapterSpec extends Specification with DataTables with ValidatedMatchers { def is = s2""" toRawEvents must return a success for a valid "sent" type payload body being passed $e1 @@ -72,7 +74,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/sent/jsonschema/1-0-0","data":{"event":{"name":"Test event","triggered_at":"2015-02-15T14:57:18.000Z"},"sent_at":"2015-06-22T23:37:18.000Z","campaign":{"name":"Order confirmation","permalink":"http://app.getvero.com/view/1/341d64944577ac1f70f560e37db54a25","subject":"Your order is being processed","tags":"tag 1, tag 2","variation":"Variation A","trigger-event":"purchased item","id":987,"type":"transactional"},"user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -100,7 +102,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/delivered/jsonschema/1-0-0","data":{"event":{"name":"Test event","triggered_at":"2015-02-15T14:57:18.000Z"},"campaign":{"name":"Order confirmation","permalink":"http://app.getvero.com/view/1/341d64944577ac1f70f560e37db54a25","subject":"Your order is being processed","tags":"tag 1, tag 2","variation":"Variation A","trigger-event":"purchased item","id":987,"type":"transactional"},"delivered_at":"2015-06-22T23:37:18.000Z","message_id":"20130920062934.21270.53268@vero.com","sender_ip":"127.0.0.1","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -128,7 +130,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/opened/jsonschema/1-0-0","data":{"opened_at":"2015-06-22T23:37:18.000Z","event":{"name":"Test event","triggered_at":"2015-02-15T14:57:18.000Z"},"campaign":{"name":"Order confirmation","permalink":"http://app.getvero.com/view/1/341d64944577ac1f70f560e37db54a25","subject":"Your order is being processed","tags":"tag 1, tag 2","variation":"Variation A","trigger-event":"purchased item","id":987,"type":"transactional"},"user_agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)","message_id":"20130920062934.21270.53268@vero.com","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -156,7 +158,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/clicked/jsonschema/1-0-0","data":{"clicked_at":"2015-06-22T23:37:18.000Z","event":{"name":"Test event","triggered_at":"2015-02-15T14:57:18.000Z"},"campaign":{"name":"Order confirmation","permalink":"http://app.getvero.com/view/1/341d64944577ac1f70f560e37db54a25","subject":"Your order is being processed","tags":"tag 1, tag 2","variation":"Variation A","trigger-event":"purchased item","id":987,"type":"transactional"},"user_agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)","message_id":"20130920062934.21270.53268@vero.com","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -184,7 +186,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/bounced/jsonschema/1-0-0","data":{"bounce_message":"521 5.2.1 : AOL will not accept delivery of this message.","event":{"name":"Test event","triggered_at":"2015-02-15T14:57:18.000Z"},"bounced_at":"2015-06-22T23:37:18.000Z","campaign":{"name":"Order confirmation","permalink":"http://app.getvero.com/view/1/341d64944577ac1f70f560e37db54a25","subject":"Your order is being processed","variation":"Variation A","trigger-event":"purchased item","id":987,"type":"transactional"},"message_id":"20130920062934.21270.53268@vero.com","bounce_type":"hard","bounce_code":"521","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -212,7 +214,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/unsubscribed/jsonschema/1-0-0","data":{"unsubscribed_at":"2015-06-22T23:37:18.000Z","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -240,7 +242,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/created/jsonschema/1-0-0","data":{"role":"Bot","firstname":"Steve","company":"Vero","user":{"id":123,"email":"steve@getvero.com"}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -268,7 +270,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche "e" -> "ue", "p" -> "srv", "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.getvero/updated/jsonschema/1-0-0","data":{"user":{"id":123,"email":"steve@getvero.com"},"changes":{"tags":{"add":["active-customer"],"remove":["unactive-180-days"]}}}}}""" - ), + ).toOpt, ContentType.some, Shared.cljSource, Shared.context @@ -303,7 +305,7 @@ class VeroAdapterSpec extends Specification with DataTables with ValidatedMatche NonEmptyList.one( RawEvent( Shared.api, - Map("tv" -> "com.getvero-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson), + Map("tv" -> "com.getvero-v1", "e" -> "ue", "p" -> "srv", "ue_pr" -> expectedJson).toOpt, ContentType.some, Shared.cljSource, Shared.context diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala index 3726c1a4a..5764127f3 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/adapters/registry/snowplow/SnowplowAdapterSpec.scala @@ -35,6 +35,8 @@ import loaders._ import utils.{ConversionUtils => CU} import utils.Clock._ +import SpecHelpers._ + class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMatchers with ScalaCheck { def is = s2""" Tp1.toRawEvents should return a NEL containing one RawEvent if the querystring is populated $e1 @@ -56,6 +58,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa Redirect.toRawEvents should return a Validation Failure if the querystring does not contain a u parameter $e17 Redirect.toRawEvents should return a Validation Failure if the event type is specified and the co JSON is corrupted $e18 Redirect.toRawEvents should return a Validation Failure if the event type is specified and the cx Base64 is corrupted $e19 + Redirect.toRawEvents should return a Validation Failure if the URI is null (&u param without a value) $e20 """ object Snowplow { @@ -93,7 +96,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa val actual = Tp1Adapter.toRawEvents(payload, SpecHelpers.client) actual must beValid( NonEmptyList - .one(RawEvent(Snowplow.Tp1, Map("aid" -> "test"), None, Shared.source, Shared.context)) + .one(RawEvent(Snowplow.Tp1, Map("aid" -> "test").toOpt, None, Shared.source, Shared.context)) ) } @@ -125,7 +128,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa NonEmptyList.one( RawEvent( Snowplow.Tp2, - Map("aid" -> "tp2", "e" -> "se"), + Map("aid" -> "tp2", "e" -> "se").toOpt, None, Shared.source, Shared.context @@ -151,7 +154,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa NonEmptyList.one( RawEvent( Snowplow.Tp2, - Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se"), + Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se").toOpt, ApplicationJsonWithCharset.some, Shared.source, Shared.context @@ -185,9 +188,9 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa ) actual must beValid( NonEmptyList.of( - rawEvent(Map("tv" -> "0", "p" -> "1", "e" -> "1", "nuid" -> "123")), - rawEvent(Map("tv" -> "0", "p" -> "2", "e" -> "2", "nuid" -> "123")), - rawEvent(Map("tv" -> "0", "p" -> "3", "e" -> "3", "nuid" -> "123")) + rawEvent(Map("tv" -> "0", "p" -> "1", "e" -> "1", "nuid" -> "123").toOpt), + rawEvent(Map("tv" -> "0", "p" -> "2", "e" -> "2", "nuid" -> "123").toOpt), + rawEvent(Map("tv" -> "0", "p" -> "3", "e" -> "3", "nuid" -> "123").toOpt) ) ) } @@ -208,7 +211,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa NonEmptyList.one( RawEvent( Snowplow.Tp2, - Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se"), + Map("tv" -> "ios-0.1.0", "p" -> "mob", "e" -> "se").toOpt, ApplicationJsonWithCapitalCharset.some, Shared.source, Shared.context @@ -461,7 +464,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa "ue_pr" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}}""", "p" -> "web", "cx" -> "dGVzdHRlc3R0ZXN0" - ), + ).toOpt, None, Shared.source, Shared.context @@ -494,7 +497,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa "tv" -> "r-tp2", "co" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}]}""", "p" -> "web" - ), + ).toOpt, None, Shared.source, Shared.context @@ -528,7 +531,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa "tv" -> "r-tp2", "co" -> """{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}}]}""", "p" -> "web" - ), + ).toOpt, None, Shared.source, Shared.context @@ -560,7 +563,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa "tv" -> "r-tp2", "co" -> """{"data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}},{"data":{"osType":"OSX","appleIdfv":"some_appleIdfv","openIdfa":"some_Idfa","carrier":"some_carrier","deviceModel":"large","osVersion":"3.0.0","appleIdfa":"some_appleIdfa","androidIdfa":"some_androidIdfa","deviceManufacturer":"Amstrad"},"schema":"iglu:com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0"},{"data":{"longitude":10,"bearing":50,"speed":16,"altitude":20,"altitudeAccuracy":0.3,"latitudeLongitudeAccuracy":0.5,"latitude":7},"schema":"iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0"}],"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0"}""", "p" -> "web" - ), + ).toOpt, None, Shared.source, Shared.context @@ -597,7 +600,7 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa """{"data":[{"schema":"iglu:com.snowplowanalytics.snowplow/uri_redirect/jsonschema/1-0-0","data":{"uri":"https://github.com/snowplow/snowplow"}},{"data":{"osType":"OSX","appleIdfv":"some_appleIdfv","openIdfa":"some_Idfa","carrier":"some_carrier","deviceModel":"large","osVersion":"3.0.0","appleIdfa":"some_appleIdfa","androidIdfa":"some_androidIdfa","deviceManufacturer":"Amstrad"},"schema":"iglu:com.snowplowanalytics.snowplow/mobile_context/jsonschema/1-0-0"},{"data":{"longitude":10,"bearing":50,"speed":16,"altitude":20,"altitudeAccuracy":0.3,"latitudeLongitudeAccuracy":0.5,"latitude":7},"schema":"iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-0-0"}],"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0"}""" ), "p" -> "web" - ), + ).toOpt, None, Shared.source, Shared.context @@ -689,4 +692,27 @@ class SnowplowAdapterSpec extends Specification with DataTables with ValidatedMa ) } + def e20 = { + val payload = CollectorPayload( + Snowplow.Tp2, + SpecHelpers.toNameValuePairs( + "u" -> null, // happens with &u in the query string + "cx" -> "dGVzdHRlc3R0ZXN0" + ), + None, + None, + Shared.source, + Shared.context + ) + val actual = RedirectAdapter.toRawEvents(payload, SpecHelpers.client) + actual must beInvalid( + NonEmptyList.one( + FailureDetails.TrackerProtocolViolation.InputData( + "querystring", + "u=null&cx=dGVzdHRlc3R0ZXN0".some, + "missing `u` parameter: not a valid URI redirect" + ) + ) + ) + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index 7fd1a4289..070f835e1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -37,6 +37,8 @@ import org.apache.commons.codec.digest.DigestUtils import org.specs2.mutable.Specification import org.specs2.matcher.EitherMatchers +import SpecHelpers._ + class EnrichmentManagerSpec extends Specification with EitherMatchers { import EnrichmentManagerSpec._ @@ -60,7 +62,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { ] } """ - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enriched = EnrichmentManager.enrichEvent( enrichmentReg, @@ -93,7 +95,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { } } }""" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enriched = EnrichmentManager.enrichEvent( enrichmentReg, @@ -135,7 +137,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "pp", "tv" -> "js-0.13.1", "p" -> "web" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enriched = EnrichmentManager.enrichEvent( enrichmentReg, @@ -198,7 +200,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "pp", "tv" -> "js-0.13.1", "p" -> "web" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enriched = EnrichmentManager.enrichEvent( enrichmentReg, @@ -257,7 +259,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { } } }""" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enriched = EnrichmentManager.enrichEvent( enrichmentReg, @@ -300,7 +302,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { } } }""" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enrichmentReg = EnrichmentRegistry[Id]( piiPseudonymizer = PiiPseudonymizerEnrichment( @@ -360,7 +362,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { } } }""" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enrichmentReg = EnrichmentRegistry[Id]( piiPseudonymizer = PiiPseudonymizerEnrichment( @@ -420,7 +422,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { } } }""" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enrichmentReg = EnrichmentRegistry[Id]( piiPseudonymizer = PiiPseudonymizerEnrichment( @@ -480,7 +482,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { } } }""" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enrichmentReg = EnrichmentRegistry[Id]( piiPseudonymizer = PiiPseudonymizerEnrichment( @@ -542,7 +544,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { } } }""" - ) + ).toOpt val rawEvent = RawEvent(api, parameters, None, source, context) val enrichmentReg = EnrichmentRegistry[Id]( piiPseudonymizer = PiiPseudonymizerEnrichment( @@ -584,7 +586,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "tv" -> "js-0.13.1", "ua" -> qs_ua, "p" -> "web" - ) + ).toOpt val contextWithUa = context.copy(useragent = Some("header-useragent")) val rawEvent = RawEvent(api, parameters, None, source, contextWithUa) val enriched = EnrichmentManager.enrichEvent( @@ -603,7 +605,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "e" -> "pp", "tv" -> "js-0.13.1", "p" -> "web" - ) + ).toOpt val contextWithUa = context.copy(useragent = Some("header-useragent")) val rawEvent = RawEvent(api, parameters, None, source, contextWithUa) val enriched = EnrichmentManager.enrichEvent( diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EventFingerprintEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EventFingerprintEnrichmentSpec.scala index 256b171f7..bfef628a6 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EventFingerprintEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/EventFingerprintEnrichmentSpec.scala @@ -14,6 +14,8 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry import org.specs2.Specification +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers._ + class EventFingerprintEnrichmentSpec extends Specification { def is = s2""" getEventFingerprint should combine fields into a hash $e1 @@ -43,7 +45,7 @@ class EventFingerprintEnrichmentSpec extends Specification { "stm" -> "1000000000000", "e" -> "se", "se_ac" -> "buy" - ) + ).toOpt ) must_== "15" } @@ -53,14 +55,14 @@ class EventFingerprintEnrichmentSpec extends Specification { "se_ac" -> "action", "se_ca" -> "category", "se_pr" -> "property" - ) + ).toOpt val permutedVersion = Map( "se_ca" -> "category", "se_ac" -> "action", "se_pr" -> "property", "e" -> "se" - ) + ).toOpt standardConfig.getEventFingerprint(permutedVersion) must_== standardConfig.getEventFingerprint( initialVersion @@ -73,12 +75,12 @@ class EventFingerprintEnrichmentSpec extends Specification { "eid" -> "123e4567-e89b-12d3-a456-426655440000", "e" -> "se", "se_ac" -> "buy" - ) + ).toOpt val delayedVersion = Map( "stm" -> "9999999999999", "e" -> "se", "se_ac" -> "buy" - ) + ).toOpt standardConfig.getEventFingerprint(delayedVersion) must_== standardConfig.getEventFingerprint( initialVersion @@ -88,7 +90,7 @@ class EventFingerprintEnrichmentSpec extends Specification { def e4 = { val initialVersion = Map( "prefix" -> "suffix" - ) + ).toOpt standardConfig.getEventFingerprint(initialVersion) should not be standardConfig .getEventFingerprint(initialVersion) @@ -104,7 +106,7 @@ class EventFingerprintEnrichmentSpec extends Specification { val initialVersion = Map( "e" -> "se", "se_ac" -> "action" - ) + ).toOpt sha1Config.getEventFingerprint(initialVersion).length() must_== 40 } @@ -119,7 +121,7 @@ class EventFingerprintEnrichmentSpec extends Specification { val initialVersion = Map( "e" -> "se", "se_ac" -> "action" - ) + ).toOpt sha256Config.getEventFingerprint(initialVersion).length() must_== 64 } @@ -134,7 +136,7 @@ class EventFingerprintEnrichmentSpec extends Specification { val initialVersion = Map( "e" -> "se", "se_ac" -> "action" - ) + ).toOpt sha384Config.getEventFingerprint(initialVersion).length() must_== 96 } @@ -149,7 +151,7 @@ class EventFingerprintEnrichmentSpec extends Specification { val initialVersion = Map( "e" -> "se", "se_ac" -> "action" - ) + ).toOpt sha512Config.getEventFingerprint(initialVersion).length() must_== 128 } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala index 6ac16b54b..7bf6b20f0 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala @@ -34,7 +34,7 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers { val raw = RawEvent( CollectorPayload.Api("vendor", "version"), - Map.empty[String, String], + Map.empty[String, Option[String]], None, CollectorPayload.Source("source", "enc", None), CollectorPayload.Context(None, None, None, None, Nil, None) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala index a3e020e18..9b004e11c 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala @@ -33,14 +33,14 @@ class JsonUtilsSpec extends Specification { def e1 = { val key = "key" val value = "value" - JsonUtils.toJson(key, value, Nil, Nil, None) must + JsonUtils.toJson(key, Option(value), Nil, Nil, None) must beEqualTo((key, Json.fromString(value))) } def e2 = { val key = "key" val value: String = null - JsonUtils.toJson(key, value, Nil, Nil, None) must + JsonUtils.toJson(key, Option(value), Nil, Nil, None) must beEqualTo((key, Json.Null)) } @@ -48,15 +48,15 @@ class JsonUtilsSpec extends Specification { val key = "field" val truE = "true" - val exp1 = JsonUtils.toJson(key, truE, List(key), Nil, None) must + val exp1 = JsonUtils.toJson(key, Option(truE), List(key), Nil, None) must beEqualTo(key -> Json.True) val falsE = "false" - val exp2 = JsonUtils.toJson(key, falsE, List(key), Nil, None) must + val exp2 = JsonUtils.toJson(key, Option(falsE), List(key), Nil, None) must beEqualTo(key -> Json.False) val foo = "foo" - val exp3 = JsonUtils.toJson(key, foo, List(key), Nil, None) must + val exp3 = JsonUtils.toJson(key, Option(foo), List(key), Nil, None) must beEqualTo(key -> Json.fromString(foo)) exp1 and exp2 and exp3 @@ -66,11 +66,11 @@ class JsonUtilsSpec extends Specification { val key = "field" val number = 123 - val exp1 = JsonUtils.toJson(key, number.toString(), Nil, List(key), None) must + val exp1 = JsonUtils.toJson(key, Option(number.toString()), Nil, List(key), None) must beEqualTo(key -> Json.fromBigInt(number)) val notNumber = "abc" - val exp2 = JsonUtils.toJson(key, notNumber, Nil, List(key), None) must + val exp2 = JsonUtils.toJson(key, Option(notNumber), Nil, List(key), None) must beEqualTo(key -> Json.fromString(notNumber)) exp1 and exp2 @@ -83,10 +83,10 @@ class JsonUtilsSpec extends Specification { val malformedDate = "2020-09-02" val correctDate = "2020-09-02T22:00:00.000Z" - val exp1 = JsonUtils.toJson(key, malformedDate, Nil, Nil, Some(NonEmptyList.one(key) -> formatter)) must + val exp1 = JsonUtils.toJson(key, Option(malformedDate), Nil, Nil, Some(NonEmptyList.one(key) -> formatter)) must be !== (key -> Json.fromString(malformedDate)) - val exp2 = JsonUtils.toJson(key, correctDate, Nil, Nil, Some(NonEmptyList.one(key) -> formatter)) must + val exp2 = JsonUtils.toJson(key, Option(correctDate), Nil, Nil, Some(NonEmptyList.one(key) -> formatter)) must beEqualTo(key -> Json.fromString(correctDate)) exp1 and exp2 From c99b2973d7b3b4d002b8bff289612d1dff6dc769 Mon Sep 17 00:00:00 2001 From: Oguzhan Unlu Date: Mon, 21 Sep 2020 14:15:53 +0300 Subject: [PATCH 25/38] Common: bump to JDK 11 (close #362) --- .github/workflows/test.yml | 12 +- .jvmopts | 1 - build.sbt | 13 +- .../enrichments/YauaaEnrichmentSpec.scala | 3 +- .../outputs/EnrichedEventSpec.scala | 1 - .../KafkaIntegrationSpec.scala | 146 --------------- .../KafkaTestUtils.scala | 167 ----------------- .../PiiEmitSpec.scala | 172 ------------------ .../sinks/KafkaSink.scala | 4 +- .../sources/KafkaSource.scala | 2 +- project/BuildSettings.scala | 2 +- 11 files changed, 13 insertions(+), 510 deletions(-) delete mode 100644 modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaIntegrationSpec.scala delete mode 100644 modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaTestUtils.scala delete mode 100644 modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/PiiEmitSpec.scala diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 616b62a30..ab63058e7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,10 +20,10 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - name: Prepare Mock server for SCE ApiRequestEnrichmentIntegrationTest (launch in background) run: python integration-tests/sce-api-lookup-test.py 8001 & - name: Prepare Postgres for SCE SqlLookupEnrichmentIntegrationTest (create entities) @@ -55,10 +55,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - name: Compare SBT version with git tag run: .github/check_tag.sh ${GITHUB_REF##*/} - name: Docker login @@ -81,10 +81,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up JDK 1.8 + - name: Set up JDK 11 uses: actions/setup-java@v1 with: - java-version: 1.8 + java-version: 11 - name: Compare SBT version with git tag run: .github/check_tag.sh ${GITHUB_REF##*/} - name: Deploy SCE on Bintray Maven and Maven Central diff --git a/.jvmopts b/.jvmopts index ef0f90285..00c1707db 100644 --- a/.jvmopts +++ b/.jvmopts @@ -8,6 +8,5 @@ -XX:+TieredCompilation -XX:-UseGCOverheadLimit # effectively adds GC to Perm space --XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled # must be enabled for CMSClassUnloadingEnabled to work diff --git a/build.sbt b/build.sbt index 09db15707..7e8930f9b 100644 --- a/build.sbt +++ b/build.sbt @@ -19,7 +19,7 @@ lazy val root = project.in(file(".")) .settings(name := "enrich") .settings(BuildSettings.basicSettings) - .aggregate(common, beam, stream, kinesis, kafka, nsq, stdin, integrationTests) + .aggregate(common, beam, stream, kinesis, kafka, nsq, stdin) lazy val common = project .in(file("modules/common")) @@ -178,13 +178,4 @@ lazy val beam = ) .enablePlugins(JavaAppPackaging, DockerPlugin, BuildInfoPlugin) -lazy val integrationTests = project - .in(file("modules/integration-tests")) - .settings(moduleName := "integration-tests") - .settings(allStreamSettings) - .settings(BuildSettings.addExampleConfToTestCp) - .settings(libraryDependencies ++= Seq( - Dependencies.Libraries.kafka, - Dependencies.Libraries.jinJava - )) - .dependsOn(stream % "test->test", kafka % "test->compile") +Global / onChangedBuildSource := ReloadOnSourceChanges diff --git a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala index 1610c046b..ac0288786 100644 --- a/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala +++ b/modules/beam/src/test/scala/com.snowplowanalytics.snowplow.enrich.beam/enrichments/YauaaEnrichmentSpec.scala @@ -36,7 +36,8 @@ object YauaaEnrichmentSpec { "event_format" -> "jsonschema", "event_version" -> "1-0-0", "event" -> "page_ping", - "derived_contexts" -> json"""{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:nl.basjes/yauaa_context/jsonschema/1-0-1","data":{"deviceBrand":"Unknown","deviceName":"Desktop","operatingSystemVersionMajor":"7","layoutEngineNameVersion":"Gecko 12.0","operatingSystemNameVersion":"Windows 7","layoutEngineBuild":"20100101","layoutEngineNameVersionMajor":"Gecko 12","operatingSystemName":"Windows NT","agentVersionMajor":"12","layoutEngineVersionMajor":"12","deviceClass":"Desktop","agentNameVersionMajor":"Firefox 12","operatingSystemNameVersionMajor":"Windows 7","deviceCpuBits":"64","operatingSystemClass":"Desktop","layoutEngineName":"Gecko","agentName":"Firefox","agentVersion":"12.0","layoutEngineClass":"Browser","agentNameVersion":"Firefox 12.0","operatingSystemVersion":"7","deviceCpu":"Intel x86_64","agentClass":"Browser","layoutEngineVersion":"12.0"}}]}""".noSpaces) + "derived_contexts" -> json"""{"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:nl.basjes/yauaa_context/jsonschema/1-0-1","data":{"deviceBrand":"Unknown","deviceName":"Desktop","operatingSystemVersionMajor":"7","layoutEngineNameVersion":"Gecko 12.0","operatingSystemNameVersion":"Windows 7","layoutEngineBuild":"20100101","layoutEngineNameVersionMajor":"Gecko 12","operatingSystemName":"Windows NT","agentVersionMajor":"12","layoutEngineVersionMajor":"12","deviceClass":"Desktop","agentNameVersionMajor":"Firefox 12","operatingSystemNameVersionMajor":"Windows 7","deviceCpuBits":"64","operatingSystemClass":"Desktop","layoutEngineName":"Gecko","agentName":"Firefox","agentVersion":"12.0","layoutEngineClass":"Browser","agentNameVersion":"Firefox 12.0","operatingSystemVersion":"7","deviceCpu":"Intel x86_64","agentClass":"Browser","layoutEngineVersion":"12.0"}}]}""".noSpaces + ) } class YauaaEnrichmentSpec extends PipelineSpec { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala index e36a02b43..74cd42d4b 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala @@ -171,7 +171,6 @@ class EnrichedEventSpec extends Specification { testField(_.event_version = "event_version", _.event_version) testField(_.event_fingerprint = "event_fingerprint", _.event_fingerprint) testField(_.true_tstamp = "true_tstamp", _.true_tstamp) - } } diff --git a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaIntegrationSpec.scala b/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaIntegrationSpec.scala deleted file mode 100644 index 08eaaf015..000000000 --- a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaIntegrationSpec.scala +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2013-2020 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ - -package com.snowplowanalytics.snowplow.enrich.stream - -import java.time.{Duration => JDuration} -import java.util.Properties -import java.util.concurrent.ForkJoinPool - -import scala.concurrent._ -import scala.concurrent.duration.Duration -import scala.util.Try -import scala.collection.JavaConverters._ - -import cats.Id -import com.snowplowanalytics.iglu.client.Client -import com.snowplowanalytics.snowplow.badrows.Processor -import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry -import com.snowplowanalytics.snowplow.scalatracker.Tracker -import org.apache.kafka.clients.consumer.{ConsumerRecords, KafkaConsumer} -import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} -import org.specs2.matcher.{TraversableMatchers, TryMatchers} -import io.circe.Json - -import model.StreamsConfig - -/* - * Extending this trait creates a new integration test with a new instance of kafka - * See PiiEmitSpec for an example of how to use it - */ -trait KafkaIntegrationSpec extends TryMatchers with TraversableMatchers { - - import KafkaIntegrationSpecValues._ - implicit val ec = ExecutionContext.fromExecutor(new ForkJoinPool(16)) - val kafkaTopics = Set(testGoodIn, testGood, testBad, testPii) - - def expectedGood: Int - def expectedBad: Int - def expectedPii: Int - - def inputGood: List[Array[Byte]] - - def getMainApplicationFuture( - configuration: StreamsConfig, - client: Client[Id, Json], - adapterRegistry: AdapterRegistry, - registry: EnrichmentRegistry[Id], - tracker: Option[Tracker[Id]] - ): Future[Unit] = - Future { - val p = Processor("test", "1.0.0") - KafkaEnrich - .getSource(configuration, None, client, adapterRegistry, registry, tracker, p) - .toOption - .get - .run() - } - - def producerTimeoutSec: Int - def inputProduced(address: String): Try[Unit] = - Try(Await.result(produce(address: String), Duration(s"$producerTimeoutSec sec"))) - def testKafkaPropertiesProducer(address: String) = { - val props = new Properties() - props.put("bootstrap.servers", address) - props.put("client.id", "producer-george") - props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") - props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer") - props - } - def produce(address: String): Future[Unit] = - Future { - val testProducer = new KafkaProducer[String, Array[Byte]](testKafkaPropertiesProducer(address)) - val events = inputGood - events.foreach { r => - testProducer.send(new ProducerRecord(testGoodIn, "key", r)) - } - testProducer.flush - testProducer.close - } - private def getListOfRecords(cr: ConsumerRecords[String, String]): List[String] = - cr.asScala.map(_.value).toList - - val POLL_TIME_MSEC = 100L - - def getRecords( - topic: String, - expectedRecords: Int, - timeoutSec: Int, - address: String - ): Future[List[String]] = - Future { - val started = System.currentTimeMillis - val testKafkaPropertiesConsumer = { - val props = new Properties() - props.put("bootstrap.servers", address) - props.put("auto.offset.reset", "earliest") - props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") - props - .put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") - props.put("group.id", s"consumer-$topic") - props - } - val testConsumerPii = new KafkaConsumer[String, String](testKafkaPropertiesConsumer) - testConsumerPii.subscribe(List(topic).asJava) - var records = getListOfRecords(testConsumerPii.poll(JDuration.ofMillis(POLL_TIME_MSEC))) - while (((System.currentTimeMillis - started) / 1000 < timeoutSec - 1) && records.size < expectedRecords) - records = records ++ getListOfRecords( - testConsumerPii.poll(JDuration.ofMillis(POLL_TIME_MSEC)) - ) - testConsumerPii.close() - records - } - - def consumerExecutionTimeoutSec: Int - def producedBadRecords(address: String): Future[List[String]] = - getRecords(testBad, expectedBad, consumerExecutionTimeoutSec, address) - def producedGoodRecords(address: String): Future[List[String]] = - getRecords(testGood, expectedGood, consumerExecutionTimeoutSec, address) - def producedPiiRecords(address: String): Future[List[String]] = - getRecords(testPii, expectedPii, consumerExecutionTimeoutSec, address) - def allResults(address: String): Future[(List[String], List[String], List[String])] = - for { - good <- producedGoodRecords(address) - bad <- producedBadRecords(address) - pii <- producedPiiRecords(address) - } yield (good, bad, pii) - -} - -object KafkaIntegrationSpecValues { - val (testGoodIn, testGood, testBad, testPii) = - ("testGoodIn", "testEnrichedGood", "testEnrichedBad", "testEnrichedUglyPii") -} diff --git a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaTestUtils.scala b/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaTestUtils.scala deleted file mode 100644 index 6ae18b881..000000000 --- a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/KafkaTestUtils.scala +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2013-2020 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ - -package com.snowplowanalytics.snowplow.enrich.stream - -import java.io.File -import java.net.InetSocketAddress -import java.util.Properties - -import scala.collection.JavaConverters._ -import scala.util.Random - -import kafka.server.{KafkaConfig, KafkaServerStartable} -import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer} -import org.apache.kafka.clients.admin.AdminClient -import org.apache.kafka.clients.admin.NewTopic - -class KafkaTestUtils { - // zk - private val zkHost = "localhost" - private val zkPort = 2181 - private var zk: EmbeddedZookeeper = _ - private var zkReady = false - - // kafka - private val brokerHost = "localhost" - private val brokerPort = 9092 - private var kafkaServer: KafkaServerStartable = _ - private var adminClient: AdminClient = _ - private var topicCountMap = Map.empty[String, Int] - private var brokerReady = false - - /** Zookeeper address */ - def zkAddress: String = { - assert(zkReady, "Zk not ready, cannot get address") - s"$zkHost:$zkPort" - } - - /** Kafka broker address */ - def brokerAddress: String = { - assert(brokerReady, "Broker not ready, cannot get address") - s"$brokerHost:$brokerPort" - } - - /** Start the Zookeeper and Kafka servers */ - def setup(): Unit = { - setupEmbeddedZookeeper() - setupEmbeddedKafkaServer() - } - - private def setupEmbeddedZookeeper(): Unit = { - zk = new EmbeddedZookeeper(zkHost, zkPort) - zkReady = true - } - - private def setupEmbeddedKafkaServer(): Unit = { - assert(zkReady, "Zk should be setup beforehand") - val kafkaConfig = new KafkaConfig(brokerProps) - kafkaServer = new KafkaServerStartable(kafkaConfig) - kafkaServer.startup() - brokerReady = true - val adminProps = { - val props = new Properties() - props.put("bootstrap.servers", brokerAddress) - props - } - adminClient = AdminClient.create(adminProps) - } - - /** Close the Kafka as well as the Zookeeper client and server */ - def tearDown(): Unit = { - brokerReady = false - zkReady = false - - if (adminClient != null) { - adminClient.close() - adminClient = null - } - - if (kafkaServer != null) { - kafkaServer.shutdown() - kafkaServer = null - } - - if (zk != null) { - zk.shutdown() - zk = null - } - - topicCountMap = Map.empty - } - - /** Create one or more topics */ - @scala.annotation.varargs - def createTopics(topics: String*): Unit = - for (topic <- topics) { - adminClient.createTopics(List(new NewTopic(topic, 1, 1)).asJava) - Thread.sleep(1000) - topicCountMap = topicCountMap + (topic -> 1) - } - - private def brokerProps: Properties = { - val props = new Properties - props.put("broker.id", "0") - props.put("host.name", brokerHost) - props.put("offsets.topic.replication.factor", "1") - props.put( - "log.dir", { - val dir = System.getProperty("java.io.tmpdir") + - "/logDir-" + new Random().nextInt(Int.MaxValue) - val f = new File(dir) - f.mkdirs() - dir - } - ) - props.put("port", brokerPort.toString) - props.put("zookeeper.connect", zkAddress) - props.put("zookeeper.connection.timeout.ms", "10000") - props - } - - private class EmbeddedZookeeper(hostname: String, port: Int) { - private val snapshotDir = { - val f = new File( - System.getProperty("java.io.tmpdir"), - "snapshotDir-" + Random.nextInt(Int.MaxValue) - ) - f.mkdirs() - f - } - private val logDir = { - val f = - new File(System.getProperty("java.io.tmpdir"), "logDir-" + Random.nextInt(Int.MaxValue)) - f.mkdirs() - f - } - - private val factory = { - val zkTickTime = 500 - val zk = new ZooKeeperServer(snapshotDir, logDir, zkTickTime) - val f = new NIOServerCnxnFactory - val maxCnxn = 16 - f.configure(new InetSocketAddress(hostname, port), maxCnxn) - f.startup(zk) - f - } - - def shutdown(): Unit = { - factory.shutdown() - snapshotDir.delete() - logDir.delete() - () - } - } -} diff --git a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/PiiEmitSpec.scala b/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/PiiEmitSpec.scala deleted file mode 100644 index d058015a7..000000000 --- a/modules/integration-tests/src/test/scala/com.snowplowanalytics.snowplow.enrich.stream/PiiEmitSpec.scala +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2013-2020 Snowplow Analytics Ltd. All rights reserved. - * - * This program is licensed to you under the Apache License Version 2.0, and - * you may not use this file except in compliance with the Apache License - * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at - * http://www.apache.org/licenses/LICENSE-2.0. - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the Apache License Version 2.0 is distributed on an "AS - * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - * implied. See the Apache License Version 2.0 for the specific language - * governing permissions and limitations there under. - */ -package com.snowplowanalytics.snowplow.enrich.stream - -import java.util.regex.Pattern -import java.util.concurrent.TimeUnit - -import scala.util.{Failure, Success, Try} -import scala.collection.JavaConverters._ -import scala.concurrent.duration.FiniteDuration -import scala.io.Source -import com.hubspot.jinjava.Jinjava -import com.typesafe.config.ConfigFactory -import org.apache.commons.codec.binary.Base64 -import org.specs2.concurrent.ExecutionEnv -import org.specs2.mutable.Specification -import org.specs2.matcher.{FutureMatchers, Matcher} -import org.specs2.specification.BeforeAfterAll -import pureconfig._ -import pureconfig.generic.auto._ -import pureconfig.generic.{FieldCoproductHint, ProductHint} -import good._ -import model.{StreamsConfig, TargetPlatformConfig} - -class PiiEmitSpec(implicit ee: ExecutionEnv) extends Specification with FutureMatchers with KafkaIntegrationSpec with BeforeAfterAll { - - var ktu: KafkaTestUtils = _ - override def beforeAll(): Unit = { - ktu = new KafkaTestUtils - ktu.setup() - ktu.createTopics(kafkaTopics.toList: _*) - } - override def afterAll(): Unit = - if (ktu != null) - ktu = null - - import KafkaIntegrationSpecValues._ - - def configValues = - Map( - "sinkType" -> "kafka", - "streamsInRaw" -> s"$testGoodIn", - "outEnriched" -> s"$testGood", - "outPii" -> s"$testPii", - "outBad" -> s"$testBad", - "partitionKeyName" -> "\"\"", - "kafkaBrokers" -> ktu.brokerAddress, - "bufferTimeThreshold" -> "1", - "bufferRecordThreshold" -> "1", - "bufferByteThreshold" -> "100000", - "enrichAppName" -> "Jim", - "enrichStreamsOutMaxBackoff" -> "1000", - "enrichStreamsOutMinBackoff" -> "1000", - "appName" -> "jim" - ) - - def config: String = - Try { - val configRes = getClass.getResourceAsStream("/config.hocon.sample") - Source.fromInputStream(configRes).getLines.mkString("\n") - } match { - case Failure(t) => - println(s"Unable to get config.hocon.sample: $t"); throw new Exception(t) - case Success(s) => s - } - - def configInstance: String = { - val jinJava = new Jinjava() - jinJava.render(config, configValues.asJava) - } - - private def decode(s: String): Array[Byte] = Base64.decodeBase64(s) - - // Input - override val inputGood = List( - decode(PagePingWithContextSpec.raw), - decode(PageViewWithContextSpec.raw), - decode(StructEventSpec.raw), - decode(StructEventWithContextSpec.raw), - decode(TransactionItemSpec.raw), - decode(TransactionSpec.raw) - ) - // Expected output counts - override val (expectedGood, expectedBad, expectedPii) = (inputGood.size, 0, inputGood.size) - - // Timeout for the producer - override val producerTimeoutSec = 5 - - // Timeout for all the consumers (good, bad, and pii) (running in parallel) - // You may want to adjust this if you are doing lots of slow work in the app - // Ordinarily the consumers return in less than 1 sec - override val consumerExecutionTimeoutSec = 15 - - implicit def hint[T]: ProductHint[T] = - ProductHint[T](ConfigFieldMapping(CamelCase, CamelCase)) - implicit val _: FieldCoproductHint[TargetPlatformConfig] = - new FieldCoproductHint[TargetPlatformConfig]("enabled") - - "Pii" should { - "emit all events" in { - - val parsedConfig = ConfigFactory.parseString(configInstance).resolve() - val configObject = Try { - loadConfigOrThrow[StreamsConfig](parsedConfig.getConfig("enrich.streams")) - } - configObject aka "enrichment config loading" must not beAFailedTry - - getMainApplicationFuture( - configObject.get, - SpecHelpers.client, - SpecHelpers.adapterRegistry, - SpecHelpers.enrichmentRegistry, - None - ) - inputProduced(ktu.brokerAddress) aka "sending input" must beSuccessfulTry - - def spaceJoinResult(expected: List[StringOrRegex]) = - expected - .flatMap({ - case JustRegex(r) => Some(r.toString) - case JustString(s) if s.nonEmpty => Some(Pattern.quote(s)) - case _ => None - }) - .mkString("\\s*") - - val expectedMatcher: Matcher[(List[String], List[String], List[String])] = beLike { - case (good: List[String], bad: List[String], pii: List[String]) => - bad aka "bad result list" must have size expectedBad - pii aka "pii result list" must have size expectedPii - good aka "good result list" must have size expectedGood - good aka "good result list" must containMatch( - spaceJoinResult(PagePingWithContextSpec.expected) - ) - pii aka "pii result list" must containMatch(spaceJoinResult(PagePingWithContextSpec.pii)) - good aka "good result list" must containMatch( - spaceJoinResult(PageViewWithContextSpec.expected) - ) - pii aka "pii result list" must containMatch(spaceJoinResult(PageViewWithContextSpec.pii)) - good aka "good result list" must containMatch(spaceJoinResult(StructEventSpec.expected)) - pii aka "pii result list" must containMatch(spaceJoinResult(StructEventSpec.pii)) - good aka "good result list" must containMatch( - spaceJoinResult(StructEventWithContextSpec.expected) - ) - pii aka "pii result list" must containMatch( - spaceJoinResult(StructEventWithContextSpec.pii) - ) - good aka "good result list" must containMatch( - spaceJoinResult(TransactionItemSpec.expected) - ) - pii aka "pii result list" must containMatch(spaceJoinResult(TransactionItemSpec.pii)) - good aka "good result list" must containMatch(spaceJoinResult(TransactionSpec.expected)) - pii aka "pii result list" must containMatch(spaceJoinResult(TransactionSpec.pii)) - } - allResults(ktu.brokerAddress) must expectedMatcher.await( - retries = 0, - timeout = FiniteDuration(consumerExecutionTimeoutSec.toLong, TimeUnit.SECONDS) - ) - } - } -} diff --git a/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KafkaSink.scala b/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KafkaSink.scala index 889911594..b3175b060 100644 --- a/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KafkaSink.scala +++ b/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sinks/KafkaSink.scala @@ -22,8 +22,6 @@ package sinks import java.util.Properties -import scala.collection.JavaConverters._ - import cats.syntax.either._ import org.apache.kafka.clients.producer._ @@ -41,7 +39,7 @@ object KafkaSink { */ private def createProducer(kafkaConfig: Kafka, bufferConfig: BufferConfig): KafkaProducer[String, String] = { val properties = createProperties(kafkaConfig, bufferConfig) - properties.putAll(kafkaConfig.producerConf.getOrElse(Map()).asJava) + kafkaConfig.producerConf.getOrElse(Map()).foreach { case (k, v) => properties.setProperty(k, v) } new KafkaProducer[String, String](properties) } diff --git a/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KafkaSource.scala b/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KafkaSource.scala index 1f6fe48d7..214754dfe 100644 --- a/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KafkaSource.scala +++ b/modules/kafka/src/main/scala/com.snowplowanalytics.snowplow.enrich.stream/sources/KafkaSource.scala @@ -133,7 +133,7 @@ class KafkaSource private ( private def createConsumer(brokers: String, groupId: String): KafkaConsumer[String, Array[Byte]] = { val properties = createProperties(brokers, groupId) - properties.putAll(kafkaConfig.consumerConf.getOrElse(Map()).asJava) + kafkaConfig.consumerConf.getOrElse(Map()).foreach { case (k, v) => properties.setProperty(k, v) } new KafkaConsumer[String, Array[Byte]](properties) } diff --git a/project/BuildSettings.scala b/project/BuildSettings.scala index 21f92c32b..f4ac99775 100644 --- a/project/BuildSettings.scala +++ b/project/BuildSettings.scala @@ -37,7 +37,7 @@ object BuildSettings { organization := "com.snowplowanalytics", scalaVersion := "2.12.11", version := "1.3.2", - javacOptions := Seq("-source", "1.8", "-target", "1.8"), + javacOptions := Seq("-source", "11", "-target", "11"), resolvers ++= Dependencies.resolutionRepos ) From c95ee76c00933b4631b6c317546aa9ee04eb069b Mon Sep 17 00:00:00 2001 From: Oguzhan Unlu Date: Tue, 22 Sep 2020 15:10:36 +0300 Subject: [PATCH 26/38] Stream: bump base-debian to 0.2.1 (close #359) --- project/BuildSettings.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/BuildSettings.scala b/project/BuildSettings.scala index f4ac99775..51e8e8dfb 100644 --- a/project/BuildSettings.scala +++ b/project/BuildSettings.scala @@ -120,7 +120,7 @@ object BuildSettings { /** Docker settings, used by SE */ lazy val dockerSettings = Seq( maintainer in Docker := "Snowplow Analytics Ltd. ", - dockerBaseImage := "snowplow-docker-registry.bintray.io/snowplow/base-debian:0.1.0", + dockerBaseImage := "snowplow-docker-registry.bintray.io/snowplow/base-debian:0.2.1", daemonUser in Docker := "snowplow", dockerUpdateLatest := true, dockerVersion := Some(DockerVersion(18, 9, 0, Some("ce"))), From 9e2b6af6f1a98ff4f984d72ba27c36aa9feaf58d Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Fri, 7 Aug 2020 23:53:46 +0300 Subject: [PATCH 27/38] Common: add toThrift and toRaw methods to CollectorPayload (close #345) --- .../common/adapters/AdapterRegistry.scala | 7 +- .../common/adapters/registry/Adapter.scala | 53 +-- .../adapters/registry/IgluAdapter.scala | 4 +- .../common/loaders/CollectorPayload.scala | 114 ++++- .../common/loaders/ThriftLoader.scala | 6 +- .../loaders/CollectorPayloadSpec.scala | 87 +++- .../loaders/ThriftLoaderSpec.scala | 400 ++++++++++-------- 7 files changed, 440 insertions(+), 231 deletions(-) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala index 6133d4095..b8b06dfe5 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala @@ -97,10 +97,9 @@ class AdapterRegistry(remoteAdapters: Map[(String, String), RemoteAdapter] = Map processor: Processor ): F[Validated[BadRow, NonEmptyList[RawEvent]]] = (adapters.get((payload.api.vendor, payload.api.version)) match { - case Some(adapter) => - adapter.toRawEvents(payload, client) - case _ => - val f: FailureDetails.AdapterFailureOrTrackerProtocolViolation = FailureDetails.AdapterFailure.InputData( + case Some(adapter) => adapter.toRawEvents(payload, client) + case None => + val f = FailureDetails.AdapterFailure.InputData( "vendor/version", Some(s"${payload.api.vendor}/${payload.api.version}"), "vendor/version combination is not supported" diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala index a18d48fd8..346dfc6b1 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala @@ -10,9 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry import cats.Monad import cats.data.{NonEmptyList, ValidatedNel} @@ -29,23 +27,23 @@ import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.CirceIgluCodecs._ -import com.snowplowanalytics.snowplow.badrows.FailureDetails - import io.circe._ import io.circe.syntax._ import org.apache.http.NameValuePair import org.joda.time.{DateTime, DateTimeZone} -import org.joda.time.format.DateTimeFormat - -import loaders.CollectorPayload -import utils.{HttpClient, JsonUtils => JU} +import org.joda.time.format.{DateTimeFormat, DateTimeFormatter} +import com.snowplowanalytics.snowplow.badrows.FailureDetails +import com.snowplowanalytics.snowplow.enrich.common.RawEventParameters +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, JsonUtils => JU} trait Adapter { // Signature for a Formatter function - type FormatterFunc = (RawEventParameters) => Json + type FormatterFunc = RawEventParameters => Json // The encoding type to be used val EventEncType = "UTF-8" @@ -53,7 +51,7 @@ trait Adapter { private val AcceptedQueryParameters = Set("nuid", "aid", "cv", "eid", "ttm", "url") // Datetime format we need to convert timestamps to - val JsonSchemaDateTimeFormat = + val JsonSchemaDateTimeFormat: DateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'").withZone(DateTimeZone.UTC) private def toStringField(seconds: Long): String = { @@ -112,7 +110,7 @@ trait Adapter { /** * Converts a CollectorPayload instance into raw events. - * @param payload The CollectorPaylod containing one or more raw events as collected by a + * @param payload The `CollectorPayload` containing one or more raw events as collected by a * Snowplow collector * @param client The Iglu client used for schema lookup and validation * @return a Validation boxing either a NEL of RawEvents on Success, or a NEL of Failure Strings @@ -168,6 +166,15 @@ trait Adapter { ): RawEventParameters = { val params = formatter(parameters - ("nuid", "aid", "cv", "p")) val json = toUnstructEvent(SelfDescribingData(schema, params)).noSpaces + buildUnstructEventParams(tracker, platform, parameters, json) + } + + def buildUnstructEventParams( + tracker: String, + platform: String, + parameters: RawEventParameters, + json: String + ): Map[String, Option[String]] = Map( "tv" -> Option(tracker), "e" -> Some("ue"), @@ -175,7 +182,6 @@ trait Adapter { "ue_pr" -> Option(json) ) ++ parameters.filterKeys(AcceptedQueryParameters) - } /** * Creates a Snowplow unstructured event by nesting the provided JValue in a self-describing @@ -223,13 +229,7 @@ trait Adapter { platform: String ): RawEventParameters = { val json = toUnstructEvent(SelfDescribingData(schema, eventJson.asJson)).noSpaces - Map( - "tv" -> Option(tracker), - "e" -> Some("ue"), - "p" -> qsParams.getOrElse("p", Option(platform)), // Required field - "ue_pr" -> Option(json) - ) ++ - qsParams.filterKeys(AcceptedQueryParameters) + buildUnstructEventParams(tracker, platform, qsParams, json) } /** @@ -251,14 +251,7 @@ trait Adapter { platform: String ): RawEventParameters = { val json = toUnstructEvent(SelfDescribingData(schema, eventJson)).noSpaces - - Map( - "tv" -> Option(tracker), - "e" -> Some("ue"), - "p" -> qsParams.getOrElse("p", Option(platform)), // Required field - "ue_pr" -> Option(json) - ) ++ - qsParams.filterKeys(AcceptedQueryParameters) + buildUnstructEventParams(tracker, platform, qsParams, json) } /** @@ -411,7 +404,7 @@ trait Adapter { object Adapter { /** The Iglu schema URI for a Snowplow unstructured event */ - val UnstructEvent = SchemaKey( + val UnstructEvent: SchemaKey = SchemaKey( "com.snowplowanalytics.snowplow", "unstruct_event", "jsonschema", @@ -419,7 +412,7 @@ object Adapter { ) /** The Iglu schema URI for a Snowplow custom contexts */ - val Contexts = SchemaKey( + val Contexts: SchemaKey = SchemaKey( "com.snowplowanalytics.snowplow", "contexts", "jsonschema", diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala index eeeaa6f12..5ba21985b 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala @@ -77,9 +77,7 @@ object IgluAdapter extends Adapter { ) case (None, Some(body), Some(contentType)) => Monad[F].pure(payloadSdJsonToEvent(payload, body, contentType, params)) - case (Some(schemaUri), Some(_), Some(_)) => - Monad[F].pure(payloadToEventWithSchema(payload, schemaUri, params)) - case (Some(schemaUri), None, _) => + case (Some(schemaUri), _, _) => // Ignore body Monad[F].pure(payloadToEventWithSchema(payload, schemaUri, params)) case (None, None, _) => val nel = NonEmptyList.of( diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala index f6408ebc4..9c5e28363 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala @@ -10,37 +10,55 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders +package com.snowplowanalytics.snowplow.enrich.common.loaders import java.util.UUID +import scala.collection.JavaConverters._ + import cats.syntax.either._ import cats.syntax.option._ -import com.snowplowanalytics.snowplow.badrows -import com.snowplowanalytics.snowplow.badrows.{FailureDetails, NVP} - import org.apache.http.NameValuePair +import org.apache.http.client.utils.URIBuilder +import org.apache.thrift.TSerializer + import org.joda.time.DateTime +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.snowplow.CollectorPayload.thrift.model1.{CollectorPayload => CollectorPayload1} + +import com.snowplowanalytics.snowplow.badrows.{FailureDetails, NVP, Payload} + /** * The canonical input format for the ETL process: it should be possible to convert any collector * input format to this format, ready for the main, collector-agnostic stage of the ETL. * * Unlike `RawEvent`, where `parameters` contain a single event, - * [[CollectorPayload]]'s `body` can contain a POST payload with multiple events + * [[CollectorPayload]]'s `body` can contain a POST payload with multiple events, + * hence [[CollectorPayload]] with `body` is potentially identical to `List[RawEvent]` + * or [[CollectorPayload]] with `querystring` is identical to single `RawEvent` + * + * @param api collector's endpoint + * @param querystring GET parameters, would be empty for buffered events and most webhooks, + * an actual payload of `body` is empty + * @param contentType derived from HTTP header (should be in `Context`) + * @param body POST body, for buffered events and most webhooks, + * an actual payload if `querystring` is empty + * @param source information to identify the collector + * @param context event's meta-information, some properties can be used to augment payload */ final case class CollectorPayload( api: CollectorPayload.Api, - querystring: List[NameValuePair], // Could be empty in future trackers - contentType: Option[String], // Not always set - body: Option[String], // Not set for GETs + querystring: List[NameValuePair], + contentType: Option[String], + body: Option[String], source: CollectorPayload.Source, context: CollectorPayload.Context ) { - def toBadRowPayload: badrows.Payload.CollectorPayload = - badrows.Payload.CollectorPayload( + def toBadRowPayload: Payload.CollectorPayload = + Payload.CollectorPayload( api.vendor, api.version, querystring.map(nvp => NVP(nvp.getName, Option(nvp.getValue))), @@ -56,12 +74,46 @@ final case class CollectorPayload( context.headers, context.userId ) + + /** + * Cast back to Thrift-generated `CollectorPayload` class, coming from collector + * Reverse of [[ThriftLoader.toCollectorPayload]] + * Used for tests and debugging + */ + def toThrift: CollectorPayload1 = { + // Timestamp must be always set, otherwise long will fallback it to 1970-01-01 + val timestamp: Long = context.timestamp.map(_.getMillis.asInstanceOf[java.lang.Long]).orNull + + new CollectorPayload1(CollectorPayload.IgluUri.toSchemaUri, context.ipAddress.orNull, timestamp, source.encoding, source.name) + .setQuerystring((new URIBuilder).setParameters(querystring.asJava).build().getQuery) + .setHostname(source.hostname.orNull) + .setRefererUri(context.refererUri.orNull) + .setContentType(contentType.orNull) + .setUserAgent(context.useragent.orNull) + .setBody(body.orNull) + .setNetworkUserId(context.userId.map(_.toString).orNull) + .setHeaders(context.headers.asJava) + .setPath(api.toRaw) + } + + /** + * Transform back to array of bytes coming from collector topic + * Used for tests and debugging + */ + def toRaw: Array[Byte] = + CollectorPayload.serializer.serialize(toThrift) } object CollectorPayload { + /** Latest payload SchemaKey */ + val IgluUri: SchemaKey = SchemaKey("com.snowplowanalytics.snowplow", "CollectorPayload", "thrift", SchemaVer.Full(1, 0, 0)) + /** * Unambiguously identifies the collector source of this input line. + * @param name kind and version of the collector (e.g. ssc-1.0.1-kafka) + * @param encoding usually "UTF-8" + * @param hostname the actual host the collector was running on */ final case class Source( name: String, @@ -69,26 +121,41 @@ object CollectorPayload { hostname: Option[String] ) - /** Context derived by the collector. */ + /** + * Information *derived* by the collector to be used as meta-data (meta-payload) + * Everything else in [[CollectorPayload]] is directly payload (body and queryparams) + * @param timestamp collector_tstamp (not optional in fact) + * @param ipAddress client's IP address, can be later overwritten by `ip` param in + * `enrichments.Transform` + * @param useragent UA header, can be later overwritten by `ua` param in `entichments.Transform` + * @param refererUri extracted from corresponding HTTP header + * @param headers all headers, including UA and referer URI + * @param userId generated by collector-set third-party cookie + */ final case class Context( - timestamp: Option[DateTime], // Must have a timestamp + timestamp: Option[DateTime], ipAddress: Option[String], useragent: Option[String], refererUri: Option[String], - headers: List[String], // Could be empty - userId: Option[UUID] // User ID generated by collector-set third-party cookie + headers: List[String], + userId: Option[UUID] ) - /** Define the vendor and version of the payload. */ - final case class Api(vendor: String, version: String) + /** + * Define the vendor and version of the payload, defined by collector endpoint + * Coming from [[com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry]] + */ + final case class Api(vendor: String, version: String) { - // Defaults for the tracker vendor and version before we implemented this into Snowplow. - // TODO: make private once the ThriftLoader is updated - val SnowplowTp1 = Api("com.snowplowanalytics.snowplow", "tp1") + /** Reverse back to collector's endpoint */ + def toRaw: String = if (this == SnowplowTp1) "/i" else s"$vendor/$version" + } + + /** Defaults for the tracker vendor and version before we implemented this into Snowplow */ + val SnowplowTp1: Api = Api("com.snowplowanalytics.snowplow", "tp1") // To extract the API vendor and version from the the path to the requested object. - // TODO: move this to somewhere not specific to this collector - private val ApiPathRegex = """^[\/]?([^\/]+)\/([^\/]+)[\/]?$""".r + private val ApiPathRegex = """^[/]?([^/]+)/([^/]+)[/]?$""".r /** * Parses the requested URI path to determine the specific API version this payload follows. @@ -115,4 +182,7 @@ object CollectorPayload { path.startsWith("/ice.png") || // Legacy name for /i path.equals("/i") || // Legacy name for /com.snowplowanalytics.snowplow/tp1 path.startsWith("/i?") + + /** Thrift serializer, used for tests and debugging with `toThrift` */ + private[loaders] lazy val serializer = new TSerializer() } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala index c3456b7d5..71479efec 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala @@ -41,7 +41,7 @@ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, ParseError = object ThriftLoader extends Loader[Array[Byte]] { private val thriftDeserializer = new TDeserializer - private val ExpectedSchema = + private[loaders] val ExpectedSchema = SchemaCriterion("com.snowplowanalytics.snowplow", "CollectorPayload", "thrift", 1, 0) /** Parse Error -> Collector Payload violation */ @@ -133,7 +133,7 @@ object ThriftLoader extends Loader[Array[Byte]] { val headers = Option(collectorPayload.headers).map(_.asScala.toList).getOrElse(Nil) - val ip = IpAddressExtractor.extractIpAddress(headers, collectorPayload.ipAddress).some // Required + val ip = Option(IpAddressExtractor.extractIpAddress(headers, collectorPayload.ipAddress)) // Required val api = Option(collectorPayload.path) match { case None => @@ -196,7 +196,7 @@ object ThriftLoader extends Loader[Array[Byte]] { val headers = Option(snowplowRawEvent.headers).map(_.asScala.toList).getOrElse(Nil) - val ip = IpAddressExtractor.extractIpAddress(headers, snowplowRawEvent.ipAddress).some // Required + val ip = Option(IpAddressExtractor.extractIpAddress(headers, snowplowRawEvent.ipAddress)) // Required (querystring.toValidatedNel, networkUserId).mapN { (q, nuid) => val timestamp = Some(new DateTime(snowplowRawEvent.timestamp, DateTimeZone.UTC)) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala index 0b6da214f..253f0494a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala @@ -12,10 +12,23 @@ */ package com.snowplowanalytics.snowplow.enrich.common.loaders +import cats.syntax.option._ + +import org.apache.http.NameValuePair +import org.apache.http.message.BasicNameValuePair +import org.apache.thrift.TSerializer + +import org.joda.time.{DateTimeZone, LocalDate} + +import org.scalacheck.{Arbitrary, Gen} + +import org.specs2.ScalaCheck import org.specs2.mutable.Specification -import org.specs2.matcher.DataTables +import org.specs2.matcher.{DataTables, ValidatedMatchers} + +import com.snowplowanalytics.snowplow.badrows.Processor -class CollectorPayloadSpec extends Specification with DataTables { +class CollectorPayloadSpec extends Specification with DataTables with ScalaCheck with ValidatedMatchers { // TODO: let's abstract this up to a CollectorApi.parse test // (then we can make isIceRequest private again). @@ -31,4 +44,74 @@ class CollectorPayloadSpec extends Specification with DataTables { } } } + + "toThrift" should { + implicit val arbitraryPayload: Arbitrary[CollectorPayload] = + Arbitrary(CollectorPayloadSpec.collectorPayloadGen) + + "be isomorphic to ThriftLoader.toCollectorPayload" >> { + prop { payload: CollectorPayload => + val bytes = CollectorPayloadSpec.thriftSerializer.serialize(payload.toThrift) + val result = ThriftLoader.toCollectorPayload(bytes, Processor("test", "0.0.1")) + result must beValid(Some(payload)) + } + } + } +} + +object CollectorPayloadSpec { + + val thriftSerializer = new TSerializer() + + val apiGen = Gen.oneOf( + CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp1"), + CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp2"), + CollectorPayload.Api("r", "tp2"), + CollectorPayload.Api("com.snowplowanalytics.iglu", "v1"), + CollectorPayload.Api("com.mailchimp", "v1") + ) + + val nameValuePair = for { + k <- Gen.oneOf("qkey", "key2", "key_3", "key-4", "key 5") + v <- Gen.option(Gen.oneOf("iglu:com.acme/under_score/jsonschema/1-0-3", "foo", "1", "null")) + } yield new BasicNameValuePair(k, v.orNull) + val queryParametersGen: Gen[List[NameValuePair]] = + for { + n <- Gen.chooseNum(0, 4) + list <- Gen.listOfN[NameValuePair](n, nameValuePair) + } yield list + + val contentTypeGen: Gen[String] = Gen.oneOf("text/plain", "application/json", "application/json; encoding=utf-8") + + val source: CollectorPayload.Source = CollectorPayload.Source("host", "UTF-8", "localhost".some) + + val localDateGen: Gen[LocalDate] = Gen.calendar.map(LocalDate.fromCalendarFields) + val ipGen: Gen[String] = for { + part1 <- Gen.choose(2, 255) + part2 <- Gen.choose(0, 255) + part3 <- Gen.choose(0, 255) + part4 <- Gen.choose(0, 255) + } yield s"$part1.$part2.$part3.$part4" + val headerGen: Gen[String] = for { + first <- Gen.asciiPrintableStr.map(_.capitalize) + second <- Gen.option(Gen.asciiPrintableStr.map(_.capitalize)) + key = second.fold(first)(s => s"$first-$s") + value <- Gen.identifier + } yield s"$key: $value" + val contextGen: Gen[CollectorPayload.Context] = for { + timestamp <- localDateGen.map(_.toDateTimeAtStartOfDay(DateTimeZone.UTC)).map(Option.apply) + ip <- Gen.option(ipGen) + userAgent <- Gen.option(Gen.identifier) + headersN <- Gen.chooseNum(0, 8) + headers <- Gen.listOfN(headersN, headerGen) + userId <- Gen.option(Gen.uuid) + } yield CollectorPayload.Context(timestamp, ip, userAgent, None, headers, userId) + + val collectorPayloadGen: Gen[CollectorPayload] = for { + api <- apiGen + kvlist <- queryParametersGen + contentType <- Gen.option(contentTypeGen) + body <- Gen.option(Gen.asciiPrintableStr.suchThat(_.nonEmpty)) + context <- contextGen + } yield CollectorPayload(api, kvlist, contentType, body, source, context) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala index 4a8c0e81c..13e8817dd 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala @@ -10,171 +10,255 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders +package com.snowplowanalytics.snowplow.enrich.common.loaders import java.util.UUID import cats.data.NonEmptyList import cats.syntax.option._ -import com.snowplowanalytics.snowplow.badrows._ - import org.apache.commons.codec.binary.Base64 - import org.joda.time.DateTime -import org.specs2.{ScalaCheck, Specification} -import org.specs2.matcher.{DataTables, ValidatedMatchers} +import com.snowplowanalytics.snowplow.badrows.{BadRow, Failure, FailureDetails, Payload, Processor} -import SpecHelpers._ +import org.specs2.ScalaCheck +import org.specs2.mutable.Specification +import org.specs2.matcher.ValidatedMatchers -class ThriftLoaderSpec extends Specification with DataTables with ValidatedMatchers with ScalaCheck { - val Process = Processor("ThriftLoaderSpec", "v1") +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers.toNameValuePairs +import com.snowplowanalytics.snowplow.enrich.common.loaders.ThriftLoaderSpec._ - def is = s2""" - toCollectorPayload should return a CollectorPayload for a valid Thrift CollectorPayload (even if parameterless) $e1 - toCollectorPayload should return a Validation Failure for an invalid or corrupted Thrift CollectorPayload $e2 - """ +class ThriftLoaderSpec extends Specification with ValidatedMatchers with ScalaCheck { + "toCollectorPayload" should { + "tolerate fake tracker protocol GET parameters" >> { + val raw = + "CgABAAABQ5iGqAYLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAkxMjcuMC4wLjEMACkIAAEAAAABCAACAAAAAQsAAwAAABh0ZXN0UGFyYW09MyZ0ZXN0UGFyYW0yPTQACwAtAAAACTEyNy4wLjAuMQsAMgAAAGhNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNg8ARgsAAAAIAAAAL0Nvb2tpZTogc3A9YzVmM2EwOWYtNzVmOC00MzA5LWJlYzUtZmVhNTYwZjc4NDU1AAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAJEFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZSwgc2RjaAAAAHRVc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNgAAAFZBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksIGltYWdlL3dlYnAsICovKjtxPTAuOAAAABhDYWNoZS1Db250cm9sOiBtYXgtYWdlPTAAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAABRIb3N0OiAxMjcuMC4wLjE6ODA4MAsAUAAAACRjNWYzYTA5Zi03NWY4LTQzMDktYmVjNS1mZWE1NjBmNzg0NTUA" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) - object Expected { - val encoding = "UTF-8" - val collector = "ssc-0.0.1-Stdout" // Note we have since fixed -stdout to be lowercase - val api = CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp1") - } + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-16T00:49:58.278+00:00").some, + ipAddress = "127.0.0.1".some, + useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36".some, + refererUri = None, + headers = List( + "Cookie: sp=c5f3a09f-75f8-4309-bec5-fea560f78455", + "Accept-Language: en-US, en", + "Accept-Encoding: gzip, deflate, sdch", + "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36", + "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, image/webp, */*;q=0.8", + "Cache-Control: max-age=0", + "Connection: keep-alive", + "Host: 127.0.0.1:8080" + ), + userId = UUID.fromString("c5f3a09f-75f8-4309-bec5-fea560f78455").some + ) + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs("testParam" -> "3", "testParam2" -> "4"), + body = None, + contentType = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "127.0.0.1".some), + context = context + ) - def e1 = - "SPEC NAME" || "RAW" | "EXP. TIMESTAMP" | "EXP. PAYLOAD" | "EXP. HOSTNAME" | "EXP. IP ADDRESS" | "EXP. USER AGENT" | "EXP. REFERER URI" | "EXP. HEADERS" | "EXP. USER ID" | - "Fake params" !! "CgABAAABQ5iGqAYLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAkxMjcuMC4wLjEMACkIAAEAAAABCAACAAAAAQsAAwAAABh0ZXN0UGFyYW09MyZ0ZXN0UGFyYW0yPTQACwAtAAAACTEyNy4wLjAuMQsAMgAAAGhNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNg8ARgsAAAAIAAAAL0Nvb2tpZTogc3A9YzVmM2EwOWYtNzVmOC00MzA5LWJlYzUtZmVhNTYwZjc4NDU1AAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAJEFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZSwgc2RjaAAAAHRVc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNgAAAFZBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksIGltYWdlL3dlYnAsICovKjtxPTAuOAAAABhDYWNoZS1Db250cm9sOiBtYXgtYWdlPTAAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAABRIb3N0OiAxMjcuMC4wLjE6ODA4MAsAUAAAACRjNWYzYTA5Zi03NWY4LTQzMDktYmVjNS1mZWE1NjBmNzg0NTUA" ! - DateTime.parse("2014-01-16T00:49:58.278+00:00") ! toNameValuePairs( - "testParam" -> "3", - "testParam2" -> "4" - ) ! "127.0.0.1".some ! "127.0.0.1".some ! "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36".some ! None ! List( - "Cookie: sp=c5f3a09f-75f8-4309-bec5-fea560f78455", - "Accept-Language: en-US, en", - "Accept-Encoding: gzip, deflate, sdch", - "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36", - "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, image/webp, */*;q=0.8", - "Cache-Control: max-age=0", - "Connection: keep-alive", - "Host: 127.0.0.1:8080" - ) ! UUID.fromString("c5f3a09f-75f8-4309-bec5-fea560f78455").some | - "Page ping" !! "CgABAAABQ9pNXggLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACZmU9cHAmcGFnZT1Bc3luY2hyb25vdXMrd2Vic2l0ZS93ZWJhcHArZXhhbXBsZXMrZm9yK3Nub3dwbG93LmpzJnBwX21peD0wJnBwX21heD0wJnBwX21peT0wJnBwX21heT0wJmNvPSU3QiUyMnBhZ2UlMjI6JTdCJTIycGFnZV90eXBlJTIyOiUyMnRlc3QlMjIsJTIybGFzdF91cGRhdGVkJHRtcyUyMjoxMzkzMzcyODAwMDAwJTdELCUyMnVzZXIlMjI6JTdCJTIydXNlcl90eXBlJTIyOiUyMnRlc3RlciUyMiU3RCU3RCZkdG09MTM5MDkzNjkzODg1NSZ0aWQ9Nzk3NzQzJnZwPTI1NjB4OTYxJmRzPTI1NjB4OTYxJnZpZD03JmR1aWQ9M2MxNzU3NTQ0ZTM5YmNhNCZwPW1vYiZ0dj1qcy0wLjEzLjEmZnA9MjY5NTkzMDgwMyZhaWQ9Q0ZlMjNhJmxhbmc9ZW4tVVMmY3M9VVRGLTgmdHo9RXVyb3BlL0xvbmRvbiZ1aWQ9YWxleCsxMjMmZl9wZGY9MCZmX3F0PTEmZl9yZWFscD0wJmZfd21hPTAmZl9kaXI9MCZmX2ZsYT0xJmZfamF2YT0wJmZfZ2VhcnM9MCZmX2FnPTAmcmVzPTI1NjB4MTQ0MCZjZD0yNCZjb29raWU9MSZ1cmw9ZmlsZTovL2ZpbGU6Ly8vVXNlcnMvYWxleC9EZXZlbG9wbWVudC9kZXYtZW52aXJvbm1lbnQvZGVtby8xLXRyYWNrZXIvZXZlbnRzLmh0bWwvb3ZlcnJpZGRlbi11cmwvAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAcAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAAnBDb29raWU6IF9fdXRtYT0xMTE4NzIyODEuODc4MDg0NDg3LjEzOTAyMzcxMDcuMTM5MDg0ODQ4Ny4xMzkwOTMxNTIxLjY7IF9fdXRtej0xMTE4NzIyODEuMTM5MDIzNzEwNy4xLjEudXRtY3NyPShkaXJlY3QpfHV0bWNjbj0oZGlyZWN0KXx1dG1jbWQ9KG5vbmUpOyBfc3BfaWQuMWZmZj1iODlhNmZhNjMxZWVmYWMyLjEzOTAyMzcxMDcuNi4xMzkwOTMxNTQ1LjEzOTA4NDg2NDE7IGhibGlkPUNQamp1aHZGMDV6a3RQN0o3TTVWbzNOSUdQTEp5MVNGOyBvbGZzaz1vbGZzazU2MjkyMzYzNTYxNzU1NDsgX191dG1jPTExMTg3MjI4MTsgd2NzaWQ9dU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9va2x2PTEzOTA5MzE1ODU0NDUlMkN1TWxvZzFRSlZEN2p1aEZaN001Vm9CQ3lQUHlpQnlTUzsgX29rPTk3NTItNTAzLTEwLTUyMjc7IF9va2JrPWNkNCUzRHRydWUlMkN2aTUlM0QwJTJDdmk0JTNEMTM5MDkzMTUyMTEyMyUyQ3ZpMyUzRGFjdGl2ZSUyQ3ZpMiUzRGZhbHNlJTJDdmkxJTNEZmFsc2UlMkNjZDglM0RjaGF0JTJDY2Q2JTNEMCUyQ2NkNSUzRGF3YXklMkNjZDMlM0RmYWxzZSUyQ2NkMiUzRDAlMkNjZDElM0QwJTJDOyBzcD03NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQAAAAeQWNjZXB0LUVuY29kaW5nOiBnemlwLCBkZWZsYXRlAAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAK0FjY2VwdDogaW1hZ2UvcG5nLCBpbWFnZS8qO3E9MC44LCAqLyo7cT0wLjUAAABdVXNlci1BZ2VudDogTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wAAAAFEhvc3Q6IGxvY2FsaG9zdDo0MDAxCwBQAAAAJDc1YTEzNTgzLTVjOTktNDBlMy04MWZjLTU0MTA4NGRmYzc4NAA=" ! - DateTime.parse("2014-01-28T19:22:20.040+00:00") ! toNameValuePairs( - "e" -> "pp", - "page" -> "Asynchronous website/webapp examples for snowplow.js", - "pp_mix" -> "0", - "pp_max" -> "0", - "pp_miy" -> "0", - "pp_may" -> "0", - "co" -> """{"page":{"page_type":"test","last_updated$tms":1393372800000},"user":{"user_type":"tester"}}""", - "dtm" -> "1390936938855", - "tid" -> "797743", - "vp" -> "2560x961", - "ds" -> "2560x961", - "vid" -> "7", - "duid" -> "3c1757544e39bca4", - "p" -> "mob", - "tv" -> "js-0.13.1", - "fp" -> "2695930803", - "aid" -> "CFe23a", - "lang" -> "en-US", - "cs" -> "UTF-8", - "tz" -> "Europe/London", - "uid" -> "alex 123", - "f_pdf" -> "0", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "0", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "0", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "2560x1440", - "cd" -> "24", - "cookie" -> "1", - "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" - ) ! "localhost".some ! "10.0.2.2".some ! "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some ! None ! List( - "Connection: keep-alive", - "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", - "Accept-Encoding: gzip, deflate", - "Accept-Language: en-US, en", - "Accept: image/png, image/*;q=0.8, */*;q=0.5", - "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Host: localhost:4001" - ) ! UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some | - "Unstructured event" !! "CgABAAABQ9qNGa4LABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACeWU9dWUmdWVfbmE9Vmlld2VkK1Byb2R1Y3QmdWVfcHI9JTdCJTIycHJvZHVjdF9pZCUyMjolMjJBU08wMTA0MyUyMiwlMjJjYXRlZ29yeSUyMjolMjJEcmVzc2VzJTIyLCUyMmJyYW5kJTIyOiUyMkFDTUUlMjIsJTIycmV0dXJuaW5nJTIyOnRydWUsJTIycHJpY2UlMjI6NDkuOTUsJTIyc2l6ZXMlMjI6JTVCJTIyeHMlMjIsJTIycyUyMiwlMjJsJTIyLCUyMnhsJTIyLCUyMnh4bCUyMiU1RCwlMjJhdmFpbGFibGVfc2luY2UkZHQlMjI6MTU4MDElN0QmZHRtPTEzOTA5NDExMTUyNjMmdGlkPTY0NzYxNSZ2cD0yNTYweDk2MSZkcz0yNTYweDk2MSZ2aWQ9OCZkdWlkPTNjMTc1NzU0NGUzOWJjYTQmcD1tb2ImdHY9anMtMC4xMy4xJmZwPTI2OTU5MzA4MDMmYWlkPUNGZTIzYSZsYW5nPWVuLVVTJmNzPVVURi04JnR6PUV1cm9wZS9Mb25kb24mdWlkPWFsZXgrMTIzJmZfcGRmPTAmZl9xdD0xJmZfcmVhbHA9MCZmX3dtYT0wJmZfZGlyPTAmZl9mbGE9MSZmX2phdmE9MCZmX2dlYXJzPTAmZl9hZz0wJnJlcz0yNTYweDE0NDAmY2Q9MjQmY29va2llPTEmdXJsPWZpbGU6Ly9maWxlOi8vL1VzZXJzL2FsZXgvRGV2ZWxvcG1lbnQvZGV2LWVudmlyb25tZW50L2RlbW8vMS10cmFja2VyL2V2ZW50cy5odG1sL292ZXJyaWRkZW4tdXJsLwALAC0AAAAJbG9jYWxob3N0CwAyAAAAUU1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMA8ARgsAAAAHAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAACtBY2NlcHQ6IGltYWdlL3BuZywgaW1hZ2UvKjtxPTAuOCwgKi8qO3E9MC41AAAAXVVzZXItQWdlbnQ6IE1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMAAAABRIb3N0OiBsb2NhbGhvc3Q6NDAwMQsAUAAAACQ3NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQA" ! - DateTime.parse("2014-01-28T20:31:56.846+00:00") ! toNameValuePairs( - "e" -> "ue", - "ue_na" -> "Viewed Product", - "ue_pr" -> """{"product_id":"ASO01043","category":"Dresses","brand":"ACME","returning":true,"price":49.95,"sizes":["xs","s","l","xl","xxl"],"available_since$dt":15801}""", - "dtm" -> "1390941115263", - "tid" -> "647615", - "vp" -> "2560x961", - "ds" -> "2560x961", - "vid" -> "8", - "duid" -> "3c1757544e39bca4", - "p" -> "mob", - "tv" -> "js-0.13.1", - "fp" -> "2695930803", - "aid" -> "CFe23a", - "lang" -> "en-US", - "cs" -> "UTF-8", - "tz" -> "Europe/London", - "uid" -> "alex 123", - "f_pdf" -> "0", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "0", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "0", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "2560x1440", - "cd" -> "24", - "cookie" -> "1", - "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" - ) ! "localhost".some ! "10.0.2.2".some ! "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some ! None ! List( - "Connection: keep-alive", - "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", - "Accept-Encoding: gzip, deflate", - "Accept-Language: en-US, en", - "Accept: image/png, image/*;q=0.8, */*;q=0.5", - "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Host: localhost:4001" - ) ! UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some | - "Parameterless" !! "CgABAAABQ9o8zYULABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAgAAAAYQ2FjaGUtQ29udHJvbDogbWF4LWFnZT0wAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAAEpBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksICovKjtxPTAuOAAAAF1Vc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoTWFjaW50b3NoOyBJbnRlbCBNYWMgT1MgWCAxMC45OyBydjoyNi4wKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzI2LjAAAAAUSG9zdDogbG9jYWxob3N0OjQwMDELAFAAAAAkNzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AA==" ! - DateTime.parse( - "2014-01-28T19:04:14.469+00:00" - ) ! toNameValuePairs() ! "localhost".some ! "10.0.2.2".some ! "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some ! None ! List( - "Cache-Control: max-age=0", - "Connection: keep-alive", - "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", - "Accept-Encoding: gzip, deflate", - "Accept-Language: en-US, en", - "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8", - "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Host: localhost:4001" - ) ! UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some |> { - - (_, raw, timestamp, payload, hostname, ipAddress, userAgent, refererUri, headers, userId) => - val canonicalEvent = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), Process) - - val expected = CollectorPayload( - api = Expected.api, - querystring = payload, - body = None, - contentType = None, - source = CollectorPayload.Source(Expected.collector, Expected.encoding, hostname), - context = CollectorPayload - .Context(timestamp.some, ipAddress, userAgent, refererUri, headers, userId) - ) - - canonicalEvent must beValid(expected.some) + result must beValid(expected.some) } + "parse valid page ping GET payload" >> { + val raw = + "CgABAAABQ9pNXggLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACZmU9cHAmcGFnZT1Bc3luY2hyb25vdXMrd2Vic2l0ZS93ZWJhcHArZXhhbXBsZXMrZm9yK3Nub3dwbG93LmpzJnBwX21peD0wJnBwX21heD0wJnBwX21peT0wJnBwX21heT0wJmNvPSU3QiUyMnBhZ2UlMjI6JTdCJTIycGFnZV90eXBlJTIyOiUyMnRlc3QlMjIsJTIybGFzdF91cGRhdGVkJHRtcyUyMjoxMzkzMzcyODAwMDAwJTdELCUyMnVzZXIlMjI6JTdCJTIydXNlcl90eXBlJTIyOiUyMnRlc3RlciUyMiU3RCU3RCZkdG09MTM5MDkzNjkzODg1NSZ0aWQ9Nzk3NzQzJnZwPTI1NjB4OTYxJmRzPTI1NjB4OTYxJnZpZD03JmR1aWQ9M2MxNzU3NTQ0ZTM5YmNhNCZwPW1vYiZ0dj1qcy0wLjEzLjEmZnA9MjY5NTkzMDgwMyZhaWQ9Q0ZlMjNhJmxhbmc9ZW4tVVMmY3M9VVRGLTgmdHo9RXVyb3BlL0xvbmRvbiZ1aWQ9YWxleCsxMjMmZl9wZGY9MCZmX3F0PTEmZl9yZWFscD0wJmZfd21hPTAmZl9kaXI9MCZmX2ZsYT0xJmZfamF2YT0wJmZfZ2VhcnM9MCZmX2FnPTAmcmVzPTI1NjB4MTQ0MCZjZD0yNCZjb29raWU9MSZ1cmw9ZmlsZTovL2ZpbGU6Ly8vVXNlcnMvYWxleC9EZXZlbG9wbWVudC9kZXYtZW52aXJvbm1lbnQvZGVtby8xLXRyYWNrZXIvZXZlbnRzLmh0bWwvb3ZlcnJpZGRlbi11cmwvAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAcAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAAnBDb29raWU6IF9fdXRtYT0xMTE4NzIyODEuODc4MDg0NDg3LjEzOTAyMzcxMDcuMTM5MDg0ODQ4Ny4xMzkwOTMxNTIxLjY7IF9fdXRtej0xMTE4NzIyODEuMTM5MDIzNzEwNy4xLjEudXRtY3NyPShkaXJlY3QpfHV0bWNjbj0oZGlyZWN0KXx1dG1jbWQ9KG5vbmUpOyBfc3BfaWQuMWZmZj1iODlhNmZhNjMxZWVmYWMyLjEzOTAyMzcxMDcuNi4xMzkwOTMxNTQ1LjEzOTA4NDg2NDE7IGhibGlkPUNQamp1aHZGMDV6a3RQN0o3TTVWbzNOSUdQTEp5MVNGOyBvbGZzaz1vbGZzazU2MjkyMzYzNTYxNzU1NDsgX191dG1jPTExMTg3MjI4MTsgd2NzaWQ9dU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9va2x2PTEzOTA5MzE1ODU0NDUlMkN1TWxvZzFRSlZEN2p1aEZaN001Vm9CQ3lQUHlpQnlTUzsgX29rPTk3NTItNTAzLTEwLTUyMjc7IF9va2JrPWNkNCUzRHRydWUlMkN2aTUlM0QwJTJDdmk0JTNEMTM5MDkzMTUyMTEyMyUyQ3ZpMyUzRGFjdGl2ZSUyQ3ZpMiUzRGZhbHNlJTJDdmkxJTNEZmFsc2UlMkNjZDglM0RjaGF0JTJDY2Q2JTNEMCUyQ2NkNSUzRGF3YXklMkNjZDMlM0RmYWxzZSUyQ2NkMiUzRDAlMkNjZDElM0QwJTJDOyBzcD03NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQAAAAeQWNjZXB0LUVuY29kaW5nOiBnemlwLCBkZWZsYXRlAAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAK0FjY2VwdDogaW1hZ2UvcG5nLCBpbWFnZS8qO3E9MC44LCAqLyo7cT0wLjUAAABdVXNlci1BZ2VudDogTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wAAAAFEhvc3Q6IGxvY2FsaG9zdDo0MDAxCwBQAAAAJDc1YTEzNTgzLTVjOTktNDBlMy04MWZjLTU0MTA4NGRmYzc4NAA=" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) + + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-28T19:22:20.040+00:00").some, + ipAddress = "10.0.2.2".some, + useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some, + refererUri = None, + headers = List( + "Connection: keep-alive", + "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", + "Accept-Encoding: gzip, deflate", + "Accept-Language: en-US, en", + "Accept: image/png, image/*;q=0.8, */*;q=0.5", + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", + "Host: localhost:4001" + ), + userId = UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some + ) + + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs( + "e" -> "pp", + "page" -> "Asynchronous website/webapp examples for snowplow.js", + "pp_mix" -> "0", + "pp_max" -> "0", + "pp_miy" -> "0", + "pp_may" -> "0", + "co" -> """{"page":{"page_type":"test","last_updated$tms":1393372800000},"user":{"user_type":"tester"}}""", + "dtm" -> "1390936938855", + "tid" -> "797743", + "vp" -> "2560x961", + "ds" -> "2560x961", + "vid" -> "7", + "duid" -> "3c1757544e39bca4", + "p" -> "mob", + "tv" -> "js-0.13.1", + "fp" -> "2695930803", + "aid" -> "CFe23a", + "lang" -> "en-US", + "cs" -> "UTF-8", + "tz" -> "Europe/London", + "uid" -> "alex 123", + "f_pdf" -> "0", + "f_qt" -> "1", + "f_realp" -> "0", + "f_wma" -> "0", + "f_dir" -> "0", + "f_fla" -> "1", + "f_java" -> "0", + "f_gears" -> "0", + "f_ag" -> "0", + "res" -> "2560x1440", + "cd" -> "24", + "cookie" -> "1", + "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" + ), + body = None, + contentType = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "localhost".some), + context = context + ) + + result must beValid(expected.some) + } + + "parse valid unstructured event GET payload" >> { + val raw = + "CgABAAABQ9qNGa4LABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACeWU9dWUmdWVfbmE9Vmlld2VkK1Byb2R1Y3QmdWVfcHI9JTdCJTIycHJvZHVjdF9pZCUyMjolMjJBU08wMTA0MyUyMiwlMjJjYXRlZ29yeSUyMjolMjJEcmVzc2VzJTIyLCUyMmJyYW5kJTIyOiUyMkFDTUUlMjIsJTIycmV0dXJuaW5nJTIyOnRydWUsJTIycHJpY2UlMjI6NDkuOTUsJTIyc2l6ZXMlMjI6JTVCJTIyeHMlMjIsJTIycyUyMiwlMjJsJTIyLCUyMnhsJTIyLCUyMnh4bCUyMiU1RCwlMjJhdmFpbGFibGVfc2luY2UkZHQlMjI6MTU4MDElN0QmZHRtPTEzOTA5NDExMTUyNjMmdGlkPTY0NzYxNSZ2cD0yNTYweDk2MSZkcz0yNTYweDk2MSZ2aWQ9OCZkdWlkPTNjMTc1NzU0NGUzOWJjYTQmcD1tb2ImdHY9anMtMC4xMy4xJmZwPTI2OTU5MzA4MDMmYWlkPUNGZTIzYSZsYW5nPWVuLVVTJmNzPVVURi04JnR6PUV1cm9wZS9Mb25kb24mdWlkPWFsZXgrMTIzJmZfcGRmPTAmZl9xdD0xJmZfcmVhbHA9MCZmX3dtYT0wJmZfZGlyPTAmZl9mbGE9MSZmX2phdmE9MCZmX2dlYXJzPTAmZl9hZz0wJnJlcz0yNTYweDE0NDAmY2Q9MjQmY29va2llPTEmdXJsPWZpbGU6Ly9maWxlOi8vL1VzZXJzL2FsZXgvRGV2ZWxvcG1lbnQvZGV2LWVudmlyb25tZW50L2RlbW8vMS10cmFja2VyL2V2ZW50cy5odG1sL292ZXJyaWRkZW4tdXJsLwALAC0AAAAJbG9jYWxob3N0CwAyAAAAUU1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMA8ARgsAAAAHAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAACtBY2NlcHQ6IGltYWdlL3BuZywgaW1hZ2UvKjtxPTAuOCwgKi8qO3E9MC41AAAAXVVzZXItQWdlbnQ6IE1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMAAAABRIb3N0OiBsb2NhbGhvc3Q6NDAwMQsAUAAAACQ3NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQA" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) + + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-28T20:31:56.846+00:00").some, + ipAddress = "10.0.2.2".some, + useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some, + refererUri = None, + headers = List( + "Connection: keep-alive", + "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", + "Accept-Encoding: gzip, deflate", + "Accept-Language: en-US, en", + "Accept: image/png, image/*;q=0.8, */*;q=0.5", + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", + "Host: localhost:4001" + ), + userId = UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some + ) + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs( + "e" -> "ue", + "ue_na" -> "Viewed Product", + "ue_pr" -> """{"product_id":"ASO01043","category":"Dresses","brand":"ACME","returning":true,"price":49.95,"sizes":["xs","s","l","xl","xxl"],"available_since$dt":15801}""", + "dtm" -> "1390941115263", + "tid" -> "647615", + "vp" -> "2560x961", + "ds" -> "2560x961", + "vid" -> "8", + "duid" -> "3c1757544e39bca4", + "p" -> "mob", + "tv" -> "js-0.13.1", + "fp" -> "2695930803", + "aid" -> "CFe23a", + "lang" -> "en-US", + "cs" -> "UTF-8", + "tz" -> "Europe/London", + "uid" -> "alex 123", + "f_pdf" -> "0", + "f_qt" -> "1", + "f_realp" -> "0", + "f_wma" -> "0", + "f_dir" -> "0", + "f_fla" -> "1", + "f_java" -> "0", + "f_gears" -> "0", + "f_ag" -> "0", + "res" -> "2560x1440", + "cd" -> "24", + "cookie" -> "1", + "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" + ), + body = None, + contentType = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "localhost".some), + context = context + ) + + result must beValid(expected.some) + } + + "parse valid parameterless payload" >> { + val raw = + "CgABAAABQ9o8zYULABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAgAAAAYQ2FjaGUtQ29udHJvbDogbWF4LWFnZT0wAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAAEpBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksICovKjtxPTAuOAAAAF1Vc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoTWFjaW50b3NoOyBJbnRlbCBNYWMgT1MgWCAxMC45OyBydjoyNi4wKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzI2LjAAAAAUSG9zdDogbG9jYWxob3N0OjQwMDELAFAAAAAkNzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AA==" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) + + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-28T19:04:14.469+00:00").some, + ipAddress = "10.0.2.2".some, + useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some, + refererUri = None, + headers = List( + "Cache-Control: max-age=0", + "Connection: keep-alive", + "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", + "Accept-Encoding: gzip, deflate", + "Accept-Language: en-US, en", + "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8", + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", + "Host: localhost:4001" + ), + userId = UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some + ) + + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs(), + contentType = None, + body = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "localhost".some), + context = context + ) + + result must beValid(expected.some) + } + + "fail to parse random bytes" >> { + prop { (raw: String) => + ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), Process) must beInvalid.like { + case NonEmptyList( + BadRow.CPFormatViolation( + Process, + Failure.CPFormatViolation(_, "thrift", f), + Payload.RawPayload(_) + ), + List() + ) => + (f must beEqualTo(violation1byte)) or (f must beEqualTo(violation2bytes)) + } + } + } + } +} + +object ThriftLoaderSpec { + val Encoding = "UTF-8" + val Collector = "ssc-0.0.1-Stdout" // Note we have since fixed -stdout to be lowercase + val Api = CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp1") + val Process = Processor("ThriftLoaderSpec", "v1") + val DeserializeMessage = + "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" + val violation1byte: FailureDetails.CPFormatViolationMessage = FailureDetails.CPFormatViolationMessage.Fallback( "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" @@ -183,22 +267,4 @@ class ThriftLoaderSpec extends Specification with DataTables with ValidatedMatch FailureDetails.CPFormatViolationMessage.Fallback( "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" ) - - // A bit of fun: the chances of generating a valid Thrift CollectorPayload at random are - // so low that we can just use ScalaCheck here - def e2 = - prop { (raw: String) => - ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), Process) must beInvalid.like { - case NonEmptyList( - BadRow.CPFormatViolation( - Process, - Failure.CPFormatViolation(_, "thrift", f), - Payload.RawPayload(_) - ), - List() - ) => - (f must beEqualTo(violation1byte)) or (f must beEqualTo(violation2bytes)) - } - } - } From 2302e07b264b41fc5caddb3a5373db1b79f05be2 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Fri, 18 Sep 2020 17:19:56 +0300 Subject: [PATCH 28/38] Common: add sbt publishLocal operation to test action (close #357) --- .github/workflows/test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ab63058e7..883a345d9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,6 +48,9 @@ jobs: run: sbt coveralls env: COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + - name: Check assets can be published + if: ${{ always() }} + run: sbt publishLocal deploy: needs: test From 0c4bd7a5bae9cd5fd94fc24e8dfff1a6663c5c5d Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Fri, 18 Sep 2020 17:26:57 +0300 Subject: [PATCH 29/38] Common: disable formatting on compile (close #358) --- .github/workflows/test.yml | 3 +++ project/BuildSettings.scala | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 883a345d9..6db9dae66 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,6 +48,9 @@ jobs: run: sbt coveralls env: COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + - name: Check Scala formatting + if: ${{ always() }} + run: sbt scalafmtCheck - name: Check assets can be published if: ${{ always() }} run: sbt publishLocal diff --git a/project/BuildSettings.scala b/project/BuildSettings.scala index 51e8e8dfb..6b73f77a0 100644 --- a/project/BuildSettings.scala +++ b/project/BuildSettings.scala @@ -82,7 +82,7 @@ object BuildSettings { lazy val formatting = Seq( scalafmtConfig := file(".scalafmt.conf"), - scalafmtOnCompile := true + scalafmtOnCompile := false ) lazy val scoverageSettings = Seq( From 86c86a2de535cb95b172298846fa29f645977246 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Tue, 6 Oct 2020 20:36:41 +0700 Subject: [PATCH 30/38] Common: bump jackson-databind to 2.10.5 (close #367) --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index a45685850..d7aaa97fa 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -63,7 +63,7 @@ object Dependencies { val kinesisClient = "1.13.3" val kafka = "2.2.1" val nsqClient = "1.2.0" - val jackson = "2.10.3" + val jackson = "2.10.5" val config = "1.3.4" val scopt = "3.7.1" From aae1bd91fe41a09af1dd010552d531b2c684bad0 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Tue, 6 Oct 2020 20:47:03 +0700 Subject: [PATCH 31/38] Stream: bump log4j-core to 2.13.3 (close #368) --- build.sbt | 6 +++++- project/Dependencies.scala | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 7e8930f9b..6c5f9ab99 100644 --- a/build.sbt +++ b/build.sbt @@ -130,7 +130,11 @@ lazy val nsq = project .settings( packageName in Docker := "snowplow/stream-enrich-nsq", ) - .settings(libraryDependencies ++= Seq(Dependencies.Libraries.nsqClient)) + .settings(libraryDependencies ++= Seq( + Dependencies.Libraries.log4j, + Dependencies.Libraries.log4jApi, + Dependencies.Libraries.nsqClient + )) .enablePlugins(JavaAppPackaging, DockerPlugin) .dependsOn(stream) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index d7aaa97fa..05e6e05e5 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -41,6 +41,7 @@ object Dependencies { val yauaa = "5.19" val guava = "28.1-jre" val slf4j = "1.7.26" + val log4j = "2.13.3" val refererParser = "1.1.0" val maxmindIplookups = "0.7.1" @@ -92,6 +93,8 @@ object Dependencies { val jaywayJsonpath = "com.jayway.jsonpath" % "json-path" % V.jaywayJsonpath val yauaa = "nl.basjes.parse.useragent" % "yauaa" % V.yauaa val guava = "com.google.guava" % "guava" % V.guava + val log4j = "org.apache.logging.log4j" % "log4j-core" % V.log4j + val log4jApi = "org.apache.logging.log4j" % "log4j-api" % V.log4j val circeCore = "io.circe" %% "circe-core" % V.circe val circeGeneric = "io.circe" %% "circe-generic" % V.circe From 35f9e6d4996fbf6089d166858da14462edff1240 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Tue, 6 Oct 2020 20:48:51 +0700 Subject: [PATCH 32/38] Common: bump postgresql to 42.2.16 (close #369) --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 05e6e05e5..3f545aa97 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -34,7 +34,7 @@ object Dependencies { val jodaTime = "2.10.1" val useragent = "1.21" val uaParser = "1.4.3" - val postgresDriver = "42.2.5" + val postgresDriver = "42.2.16" val mysqlConnector = "8.0.16" val jaywayJsonpath = "2.4.0" val iabClient = "0.2.0" From 9683e923c47b7e4ac4bcc2bbc3c384490d190388 Mon Sep 17 00:00:00 2001 From: Oguzhan Unlu Date: Mon, 12 Oct 2020 13:59:02 +0300 Subject: [PATCH 33/38] Common: fix API Request Enrichment output deserialization (closes #374) --- .../common/utils/CirceUtils.scala | 18 +++- .../common/utils/JsonPath.scala | 4 +- .../enrich/CirceJsonDeserializer.scala | 89 +++++++++++++++++++ .../jackson/enrich/CirceJsonModule.scala | 46 ++++++++++ .../registry/apirequest/ValidatorSpec.scala | 65 ++++++++++++++ .../utils/JsonPathSpec.scala | 38 +++++++- 6 files changed, 256 insertions(+), 4 deletions(-) create mode 100644 modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonDeserializer.scala create mode 100644 modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonModule.scala create mode 100644 modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ValidatorSpec.scala diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/CirceUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/CirceUtils.scala index fabf5e819..9d5818bd0 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/CirceUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/CirceUtils.scala @@ -15,8 +15,9 @@ package utils import cats.data.Validated import cats.syntax.either._ - +import com.fasterxml.jackson.databind.ObjectMapper import io.circe._ +import io.circe.jackson.enrich.CirceJsonModule object CirceUtils { @@ -43,4 +44,19 @@ object CirceUtils { s"Could not extract $pathStr as $clas from supplied JSON due to ${e.getMessage}" } } + + /** + * A custom ObjectMapper specific to Circe JSON AST + * + * The only difference from the original mapper `io.circe.jackson.mapper` is + * how `Long` is deserialized. The original mapper maps a `Long` to `JsonBigDecimal` + * whereas this custom mapper deserializes a `Long` to `JsonLong`. + * + * This customization saves Snowplow events from failing when derived contexts are + * validated post-enrichment. If output schema of API Request Enrichment has an integer + * field, `JsonBigDecimal` representation of a Long results in a bad row + * with message `number found, integer expected` in Iglu Scala Client, since jackson + * treats `DecimalNode` as number in all cases. + */ + final val mapper: ObjectMapper = (new ObjectMapper).registerModule(CirceJsonModule) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonPath.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonPath.scala index fe5dcce26..9eef3e623 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonPath.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonPath.scala @@ -77,7 +77,7 @@ object JsonPath { * @param any raw JVM type representing JSON * @return Json */ - private def anyToJson(any: Any): Json = + private[utils] def anyToJson(any: Any): Json = if (any == null) Json.Null - else io.circe.jackson.mapper.convertValue(any, classOf[Json]) + else CirceUtils.mapper.convertValue(any, classOf[Json]) } diff --git a/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonDeserializer.scala b/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonDeserializer.scala new file mode 100644 index 000000000..1c8edcfdc --- /dev/null +++ b/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonDeserializer.scala @@ -0,0 +1,89 @@ +package io.circe.jackson.enrich + +import java.util + +import com.fasterxml.jackson.core.{JsonParser, JsonTokenId} +import com.fasterxml.jackson.databind.{DeserializationContext, JsonDeserializer} +import io.circe.jackson.{DeserializerContext, JacksonCompat, ReadingList, ReadingMap} +import io.circe.{Json, JsonBigDecimal, JsonLong} + +import scala.annotation.{switch, tailrec} +import scala.collection.JavaConverters._ + +private[jackson] final class CirceJsonDeserializer(klass: Class[_]) extends JsonDeserializer[Object] with JacksonCompat { + override def isCachable: Boolean = true + + override def deserialize(jp: JsonParser, ctxt: DeserializationContext): Json = { + val value = deserialize(jp, ctxt, List()) + if (!klass.isAssignableFrom(value.getClass)) handleUnexpectedToken(ctxt)(klass, jp) + + value + } + + @tailrec + def deserialize( + jp: JsonParser, + ctxt: DeserializationContext, + parserContext: List[DeserializerContext] + ): Json = { + if (jp.getCurrentToken == null) jp.nextToken() + + val (maybeValue, nextContext) = (jp.getCurrentToken.id(): @switch) match { + case JsonTokenId.ID_NUMBER_INT => (Some(Json.JNumber(JsonLong(jp.getLongValue))), parserContext) + case JsonTokenId.ID_NUMBER_FLOAT => (Some(Json.JNumber(JsonBigDecimal(jp.getDecimalValue))), parserContext) + case JsonTokenId.ID_STRING => (Some(Json.JString(jp.getText)), parserContext) + case JsonTokenId.ID_TRUE => (Some(Json.JBoolean(true)), parserContext) + case JsonTokenId.ID_FALSE => (Some(Json.JBoolean(false)), parserContext) + case JsonTokenId.ID_NULL => (Some(Json.JNull), parserContext) + case JsonTokenId.ID_START_ARRAY => (None, ReadingList(new util.ArrayList) +: parserContext) + + case JsonTokenId.ID_END_ARRAY => + parserContext match { + case ReadingList(content) :: stack => + (Some(Json.fromValues(content.asScala)), stack) + case _ => throw new IllegalStateException("Jackson read ']' but parser context is not an array") + } + + case JsonTokenId.ID_START_OBJECT => (None, ReadingMap(new util.ArrayList) +: parserContext) + + case JsonTokenId.ID_FIELD_NAME => + parserContext match { + case (c: ReadingMap) :: stack => (None, c.setField(jp.getCurrentName) +: stack) + case _ => + throw new IllegalStateException("Jackson read a String field name but parser context is not a json object") + } + + case JsonTokenId.ID_END_OBJECT => + parserContext match { + case ReadingMap(content) :: stack => + ( + Some(Json.fromFields(content.asScala)), + stack + ) + case _ => throw new IllegalStateException("Jackson read '}' but parser context is not a json object") + } + + case JsonTokenId.ID_NOT_AVAILABLE => + throw new IllegalStateException("Jackson can't return the json token yet") + + case JsonTokenId.ID_EMBEDDED_OBJECT => + throw new IllegalStateException("Jackson read embedded object but json object was expected") + } + + maybeValue match { + case Some(v) if nextContext.isEmpty => v + case maybeValue => + jp.nextToken() + val toPass = maybeValue + .map { v => + val previous :: stack = nextContext + previous.addValue(v) +: stack + } + .getOrElse(nextContext) + + deserialize(jp, ctxt, toPass) + } + } + + override def getNullValue = Json.JNull +} diff --git a/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonModule.scala b/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonModule.scala new file mode 100644 index 000000000..7a2f4d3a3 --- /dev/null +++ b/modules/common/src/main/scala/io/circe/jackson/enrich/CirceJsonModule.scala @@ -0,0 +1,46 @@ +package io.circe.jackson.enrich + +import com.fasterxml.jackson.core.Version +import com.fasterxml.jackson.databind.Module.SetupContext +import com.fasterxml.jackson.databind._ +import com.fasterxml.jackson.databind.deser.Deserializers +import com.fasterxml.jackson.databind.module.SimpleModule +import com.fasterxml.jackson.databind.ser.Serializers +import io.circe.Json +import io.circe.jackson.CirceJsonSerializer + +object CirceJsonModule extends SimpleModule("SPCirceJson", Version.unknownVersion()) { + override final def setupModule(context: SetupContext): Unit = { + context.addDeserializers( + new Deserializers.Base { + override final def findBeanDeserializer( + javaType: JavaType, + config: DeserializationConfig, + beanDesc: BeanDescription + ): CirceJsonDeserializer = { + val klass = javaType.getRawClass + if (classOf[Json].isAssignableFrom(klass) || klass == Json.JNull.getClass) + new CirceJsonDeserializer(klass) + else null + } + } + ) + + context.addSerializers( + new Serializers.Base { + override final def findSerializer( + config: SerializationConfig, + javaType: JavaType, + beanDesc: BeanDescription + ): JsonSerializer[Object] = { + val ser: Object = + if (classOf[Json].isAssignableFrom(beanDesc.getBeanClass)) + CirceJsonSerializer + else null + + ser.asInstanceOf[JsonSerializer[Object]] + } + } + ) + } +} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ValidatorSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ValidatorSpec.scala new file mode 100644 index 000000000..1dd0beb89 --- /dev/null +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/apirequest/ValidatorSpec.scala @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012-2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ + +package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest + +import com.snowplowanalytics.iglu.client.CirceValidator +import com.snowplowanalytics.snowplow.enrich.common.utils.JsonPath.query +import io.circe.Json +import io.circe.literal.JsonStringContext +import org.specs2.Specification +import org.specs2.matcher.ValidatedMatchers +import org.specs2.specification.core.SpecStructure + +class ValidatorSpec extends Specification with ValidatedMatchers { + override def is: SpecStructure = s2""" + validate integer field using a valid long value (maximum long) $e1 + validate integer field using a valid long value (minimum long) $e2 + validate number field using a positive float value $e3 + validate number field using a negative float value $e4 + validate number field using a negative double value $e5 + validate number field using a positive double value $e6 + invalidate integer field using a positive double value $e7 + """ + + val schema = + json"""{ "type": "object", "properties": { "orderID": { "type": "integer" }, "price": { "type": "number" } }, "additionalProperties": false }""" + + def e1 = + query("$", json"""{"orderID": 9223372036854775807 }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e2 = + query("$", json"""{"orderID": -9223372036854775808 }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e3 = + query("$", json"""{"price": ${Json.fromFloatOrString(88.92f)} }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e4 = + query("$", json"""{"price": ${Json.fromFloatOrString(-34345328.72f)} }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e5 = + query("$", json"""{"price": ${Json.fromDoubleOrString(-34345488.72)} }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e6 = + query("$", json"""{"price": ${Json.fromDoubleOrString(32488.72)} }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beRight + + def e7 = + query("$", json"""{"orderID": ${Json.fromDoubleOrString(32488.72)} }""") + .flatMap(fb => CirceValidator.validate(fb.head, schema)) must beLeft +} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonPathSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonPathSpec.scala index 951e9a491..7ac889c9e 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonPathSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonPathSpec.scala @@ -13,6 +13,7 @@ package com.snowplowanalytics.snowplow.enrich.common.utils import io.circe._ +import io.circe.literal.JsonStringContext import io.circe.syntax._ import org.specs2.Specification @@ -21,9 +22,14 @@ class JsonPathSpec extends Specification { test JSONPath query $e1 test query of non-exist value $e2 test query of empty array $e3 - test primtive JSON type (JString) $e6 + test primitive JSON type (JString) $e6 invalid JSONPath (JQ syntax) must fail $e4 invalid JSONPath must fail $e5 + test query of long $e7 + test query of integer $e8 + test query of string $e9 + test query of double $e10 + test query of big decimal $e11 """ val someJson = Json.obj( @@ -88,4 +94,34 @@ class JsonPathSpec extends Specification { def e6 = JsonPath.query("$.store.book[2]", Json.fromString("somestring")) must beRight(List()) + + def e7 = { + val q1 = JsonPath.query("$.empId", json"""{ "empId": 2147483649 }""") must beRight(List(Json.fromLong(2147483649L))) + val q2 = JsonPath.query("$.empId", json"""{ "empId": ${Json.fromLong(2147483649L)} }""") must beRight(List(Json.fromLong(2147483649L))) + q1 and q2 + } + + def e8 = { + val q1 = JsonPath.query("$.empId", json"""{ "empId": 1086 }""") must beRight(List(Json.fromInt(1086))) + val q2 = JsonPath.query("$.empId", json"""{ "empId": ${Json.fromInt(-1086)} }""") must beRight(List(Json.fromInt(-1086))) + q1 and q2 + } + + def e9 = { + val q1 = JsonPath.query("$.empName", json"""{ "empName": "ABC" }""") must beRight(List(Json.fromString("ABC"))) + val q2 = JsonPath.query("$.empName", json"""{ "empName": ${Json.fromString("XYZ")} }""") must beRight(List(Json.fromString("XYZ"))) + q1 and q2 + } + + def e10 = { + val q1 = JsonPath.query("$.id", json"""{ "id": ${Json.fromDouble(44.54)} }""") must beRight(List(Json.fromDoubleOrNull(44.54))) + val q2 = JsonPath.query("$.id", json"""{ "id": ${Json.fromDouble(20.20)} }""") must beRight(List(Json.fromDoubleOrString(20.20))) + q1 and q2 + } + + def e11 = { + val q1 = JsonPath.query("$.id", json"""{ "id": ${Json.fromBigDecimal(44.54)} }""") must beRight(List(Json.fromBigDecimal(44.54))) + val q2 = JsonPath.query("$.id", json"""{ "id": ${Json.fromBigDecimal(20.20)} }""") must beRight(List(Json.fromBigDecimal(20.20))) + q1 and q2 + } } From b5daeeedf9e4b493bb12225d9c7d59a01dbe6402 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Tue, 1 Sep 2020 14:35:13 +0300 Subject: [PATCH 34/38] Stream FS2: add (close #346) --- .github/workflows/test.yml | 4 +- build.sbt | 52 ++- config/config.fs2.hocon.sample | 47 +++ .../common/adapters/registry/Adapter.scala | 21 +- .../apirequest/ApiRequestEnrichment.scala | 23 +- .../snowplow/enrich/fs2/Assets.scala | 296 ++++++++++++++++++ .../snowplow/enrich/fs2/Enrich.scala | 159 ++++++++++ .../snowplow/enrich/fs2/Environment.scala | 197 ++++++++++++ .../snowplow/enrich/fs2/Main.scala | 69 ++++ .../snowplow/enrich/fs2/Payload.scala | 77 +++++ .../enrich/fs2/config/Base64Hocon.scala | 53 ++++ .../enrich/fs2/config/Base64Json.scala | 46 +++ .../enrich/fs2/config/CliConfig.scala | 70 +++++ .../enrich/fs2/config/ConfigFile.scala | 81 +++++ .../snowplow/enrich/fs2/config/Sentry.scala | 38 +++ .../snowplow/enrich/fs2/config/io.scala | 101 ++++++ .../snowplow/enrich/fs2/config/package.scala | 27 ++ .../snowplow/enrich/fs2/io/Clients.scala | 132 ++++++++ .../snowplow/enrich/fs2/io/FileSystem.scala | 57 ++++ .../snowplow/enrich/fs2/io/Metrics.scala | 121 +++++++ .../snowplow/enrich/fs2/io/Sinks.scala | 109 +++++++ .../snowplow/enrich/fs2/io/Source.scala | 63 ++++ .../snowplow/enrich/fs2/io/package.scala | 31 ++ .../snowplow/enrich/fs2/package.scala | 36 +++ .../assets-refresh/geoip2-city-1.mmdb | Bin 0 -> 1462 bytes .../assets-refresh/geoip2-city-2.mmdb | Bin 0 -> 1460 bytes .../assets-refresh/geoip2-city-3.mmdb | Bin 0 -> 1457 bytes .../test/resources/simplelogger.properties | 13 + .../snowplow/enrich/fs2/AssetsSpec.scala | 165 ++++++++++ .../snowplow/enrich/fs2/EnrichSpec.scala | 229 ++++++++++++++ .../snowplow/enrich/fs2/PayloadGen.scala | 113 +++++++ .../snowplow/enrich/fs2/PayloadSpec.scala | 90 ++++++ .../snowplow/enrich/fs2/SpecHelpers.scala | 60 ++++ .../enrich/fs2/config/Base64HoconSpec.scala | 35 +++ .../enrich/fs2/config/CliConfigSpec.scala | 71 +++++ .../enrich/fs2/config/ConfigFileSpec.scala | 79 +++++ .../ApiRequestEnrichmentSpec.scala | 107 +++++++ .../fs2/enrichments/IabEnrichmentSpec.scala | 102 ++++++ .../fs2/enrichments/YauaaEnrichmentSpec.scala | 98 ++++++ .../snowplow/enrich/fs2/test/Counter.scala | 47 +++ .../snowplow/enrich/fs2/test/HttpServer.scala | 137 ++++++++ .../enrich/fs2/test/SchemaRegistry.scala | 231 ++++++++++++++ .../enrich/fs2/test/TestEnvironment.scala | 144 +++++++++ .../snowplow/enrich/fs2/test/package.scala | 24 ++ project/BuildSettings.scala | 7 +- project/Dependencies.scala | 33 ++ 46 files changed, 3675 insertions(+), 20 deletions(-) create mode 100644 config/config.fs2.hocon.sample create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Assets.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Enrich.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Environment.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Main.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Payload.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Hocon.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Json.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfig.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFile.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Sentry.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/io.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/package.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Clients.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/FileSystem.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Metrics.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Sinks.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Source.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/package.scala create mode 100644 modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/package.scala create mode 100644 modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-1.mmdb create mode 100644 modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-2.mmdb create mode 100644 modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-3.mmdb create mode 100644 modules/fs2/src/test/resources/simplelogger.properties create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/AssetsSpec.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/EnrichSpec.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadGen.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadSpec.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/SpecHelpers.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64HoconSpec.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfigSpec.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFileSpec.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/ApiRequestEnrichmentSpec.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/IabEnrichmentSpec.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/YauaaEnrichmentSpec.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/Counter.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/HttpServer.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/SchemaRegistry.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/TestEnvironment.scala create mode 100644 modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/package.scala diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6db9dae66..266d70251 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -78,8 +78,10 @@ jobs: run: sbt "project kinesis" docker:publish - name: Build and publish Stream Kafka Docker image run: sbt "project kafka" docker:publish - - name: Build and publish Stream NSQ Docker images + - name: Build and publish Stream NSQ Docker image run: sbt "project nsq" docker:publish + - name: Build and publish Stream NH Docker image + run: sbt "project fs2" docker:publish deploy_sce: needs: test diff --git a/build.sbt b/build.sbt index 6c5f9ab99..d2a5d921c 100644 --- a/build.sbt +++ b/build.sbt @@ -19,7 +19,7 @@ lazy val root = project.in(file(".")) .settings(name := "enrich") .settings(BuildSettings.basicSettings) - .aggregate(common, beam, stream, kinesis, kafka, nsq, stdin) + .aggregate(common, beam, stream, kinesis, kafka, nsq, stdin, fs2) lazy val common = project .in(file("modules/common")) @@ -183,3 +183,53 @@ lazy val beam = .enablePlugins(JavaAppPackaging, DockerPlugin, BuildInfoPlugin) Global / onChangedBuildSource := ReloadOnSourceChanges + +lazy val fs2 = project + .in(file("modules/fs2")) + .dependsOn(common) + .settings(BuildSettings.basicSettings) + .settings(BuildSettings.formatting) + .settings(BuildSettings.scoverageSettings) + .settings(BuildSettings.addExampleConfToTestCp) + .settings(BuildSettings.sbtAssemblySettings) + .settings( + name := "fs2-enrich", + description := "High-performance streaming Snowplow Enrich job built on top of functional streams", + buildInfoKeys := Seq[BuildInfoKey](organization, name, version, description), + buildInfoPackage := "com.snowplowanalytics.snowplow.enrich.fs2.generated", + packageName in Docker := "snowplow/fs2-enrich", + ) + .settings(parallelExecution in Test := false) + .settings( + libraryDependencies ++= Seq( + Dependencies.Libraries.decline, + Dependencies.Libraries.fs2PubSub, + Dependencies.Libraries.circeExtras, + Dependencies.Libraries.circeLiteral, + Dependencies.Libraries.circeConfig, + Dependencies.Libraries.catsEffect, + Dependencies.Libraries.fs2, + Dependencies.Libraries.fs2Io, + Dependencies.Libraries.slf4j, + Dependencies.Libraries.sentry, + Dependencies.Libraries.log4cats, + Dependencies.Libraries.catsRetry, + Dependencies.Libraries.http4sClient, + Dependencies.Libraries.fs2BlobS3, + Dependencies.Libraries.fs2BlobGcs, + Dependencies.Libraries.metrics, + Dependencies.Libraries.pureconfig.withRevision(Dependencies.V.pureconfig013), + Dependencies.Libraries.pureconfigCats.withRevision(Dependencies.V.pureconfig013), + Dependencies.Libraries.pureconfigCirce.withRevision(Dependencies.V.pureconfig013), + Dependencies.Libraries.specs2, + Dependencies.Libraries.specs2CE, + Dependencies.Libraries.scalacheck, + Dependencies.Libraries.specs2Scalacheck, + Dependencies.Libraries.http4sDsl, + Dependencies.Libraries.http4sServer + ), + addCompilerPlugin("com.olegpy" %% "better-monadic-for" % "0.3.1") + ) + .enablePlugins(BuildInfoPlugin) + .settings(BuildSettings.dockerSettings) + .enablePlugins(BuildInfoPlugin, JavaAppPackaging, DockerPlugin) diff --git a/config/config.fs2.hocon.sample b/config/config.fs2.hocon.sample new file mode 100644 index 000000000..c2b2f0e26 --- /dev/null +++ b/config/config.fs2.hocon.sample @@ -0,0 +1,47 @@ +// "Gcp" is the only valid option now +auth = { + type = "Gcp" +} + +// Collector input +input = { + type = "PubSub" + subscription = "projects/test-project/subscriptions/inputSub" + + // Local FS supported for testing purposes + // type = "FileSystem" + // dir = "/var/collector" +} + +// Enriched events output +good = { + type = "PubSub" + topic = "projects/test-project/topics/good-topic" + + // Local FS supported for testing purposes + // type = "FileSystem" + // dir = "/var/enriched" +} + +// Bad rows output +bad = { + type = "PubSub" + topic = "projects/test-project/topics/bad-topic" + + // Local FS supported for testing purposes + // type = "FileSystem" + // dir = "/var/bad" +} + +// Optional, for tracking runtime exceptions +sentry = { + dsn = "http://sentry.acme.com" +} + +// Optional, period after which enrich assets should be checked for updates +// no assets will be updated if the key is absent +assetsUpdatePeriod = "7 days" + +// Optional, period after Dropwizard will print out its metrics +// no metrics will be printed if the key is absent +metricsReportPeriod = "1 second" \ No newline at end of file diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala index 346dfc6b1..cd9a85738 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala @@ -13,12 +13,9 @@ package com.snowplowanalytics.snowplow.enrich.common.adapters.registry import cats.Monad -import cats.data.{NonEmptyList, ValidatedNel} +import cats.data.{NonEmptyList, Validated, ValidatedNel} import cats.data.Validated._ -import cats.syntax.either._ -import cats.syntax.eq._ -import cats.syntax.option._ -import cats.syntax.validated._ +import cats.implicits._ import cats.effect.Clock @@ -265,17 +262,11 @@ trait Adapter { * or Failures */ protected[registry] def rawEventsListProcessor( - rawEventsList: List[ValidatedNel[FailureDetails.AdapterFailure, RawEvent]] + rawEventsList: List[Validated[NonEmptyList[FailureDetails.AdapterFailure], RawEvent]] ): ValidatedNel[FailureDetails.AdapterFailure, NonEmptyList[RawEvent]] = { - val successes: List[RawEvent] = - for { - Valid(s) <- rawEventsList - } yield s - - val failures: List[FailureDetails.AdapterFailure] = - (for { - Invalid(NonEmptyList(h, t)) <- rawEventsList - } yield h :: t).flatten + val (failures, successes) = rawEventsList.separate match { + case (nel, list) => (nel.flatMap(_.toList), list) + } (successes, failures) match { // No Failures collected. diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala index 9b9ed39b8..2d2341897 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala @@ -18,6 +18,8 @@ import cats.{Id, Monad} import cats.data.{EitherT, NonEmptyList, ValidatedNel} import cats.implicits._ +import cats.effect.Sync + import io.circe._ import io.circe.generic.auto._ @@ -26,7 +28,6 @@ import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.lrumap._ import com.snowplowanalytics.snowplow.badrows.FailureDetails - import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.{Enrichment, ParseableEnrichment} import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent @@ -231,4 +232,24 @@ object CreateApiRequestEnrichment { ) ) } + + implicit def syncCreateApiRequestEnrichment[F[_]: Sync]( + implicit CLM: CreateLruMap[F, String, (Either[Throwable, Json], Long)], + HTTP: HttpClient[F] + ): CreateApiRequestEnrichment[F] = + new CreateApiRequestEnrichment[F] { + def create(conf: ApiRequestConf): F[ApiRequestEnrichment[F]] = + CLM + .create(conf.cache.size) + .map(c => + ApiRequestEnrichment( + conf.schemaKey, + conf.inputs, + conf.api, + conf.outputs, + conf.cache.ttl, + c + ) + ) + } } diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Assets.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Assets.scala new file mode 100644 index 000000000..78453adf3 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Assets.scala @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.net.URI +import java.nio.file.{Path, Paths} + +import scala.concurrent.duration._ +import scala.util.control.NonFatal + +import cats.{Applicative, Parallel} +import cats.implicits._ + +import cats.effect.{Blocker, Concurrent, ConcurrentEffect, ContextShift, Resource, Sync, Timer} +import cats.effect.concurrent.Ref + +import retry.{RetryDetails, RetryPolicies, RetryPolicy, retryingOnSomeErrors} + +import fs2.Stream +import fs2.hash.md5 +import fs2.io.file.{copy, deleteIfExists, exists, readAll, tempFileResource, writeAll} + +import _root_.io.chrisdavenport.log4cats.Logger +import _root_.io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +import com.snowplowanalytics.snowplow.enrich.fs2.io.Clients + +/** + * Functions responsible for periodic assets (such as MaxMind/IAB DBs) updates + * The common logic is to periodically invoke a function that: + * 1. Downloads a file (in background) to a temp location + * 2. Compares file's checksum with existing one (stored in a mutable hashmap) + * 3. If checksums match - delete the temp file, return + * 4. If checksums don't match - send a signal to stop raw stream + * (via `SignallingRef` in [[Environment]]) + * 5. Once raw stream is stopped - delete an old file and move + * temp file to the old's file location + * If any of those URIs been updated and stopped the raw stream, it will be + * immediately resumed once the above procedure traversed all files + */ +object Assets { + + private implicit def unsafeLogger[F[_]: Sync]: Logger[F] = + Slf4jLogger.getLogger[F] + + /** + * State of the [[updateStream]], containing information about tracked URIs + * and `stop` signal from [[Environment]] as well as all clients necessary + * to download URIs + * + * @param files mutable hash map of URIs and their latest known state + * @param pauseEnrich stop signal coming from [[Environment]] and that can be used + * to stop the raw stream consumption + * @param clients HTTP, GCS, S3 clients if necessary + */ + final case class State[F[_]]( + files: Ref[F, Map[URI, Hash]], + pauseEnrich: Ref[F, Boolean], + clients: Clients[F] + ) + + object State { + + /** Test pair is used in tests to initialize HTTP client, will be ignored during initialization */ + private val TestPair: Asset = URI.create("http://localhost:8080") -> "index" + + /** + * Initialize an assets state. Try to find them on local FS + * or download if they're missing. Also initializes all necessary + * clients (S3, GCP, HTTP etc) + * @param blocker thread pool for downloading and reading files + * @param stop global stop signal from [[Environment]] + * @param assets all assets that have to be tracked + */ + def make[F[_]: ConcurrentEffect: Timer: ContextShift]( + blocker: Blocker, + stop: Ref[F, Boolean], + assets: List[Asset] + ): Resource[F, State[F]] = + for { + clients <- Clients.make[F](blocker, assets.map(_._1)) + map <- Resource.liftF(build[F](blocker, clients, assets.filterNot(asset => asset == TestPair))) + files <- Resource.liftF(Ref.of[F, Map[URI, Hash]](map)) + } yield State(files, stop, clients) + + def build[F[_]: Concurrent: Timer: ContextShift]( + blocker: Blocker, + clients: Clients[F], + assets: List[Asset] + ): F[Map[URI, Hash]] = + Logger[F].info("Preparing enrichment assets") *> + buildFromLocal(blocker, assets) + .flatMap { hashes => + hashes.traverse { + case (uri, path, Some(hash)) => + Logger[F].info(s"Asset from $uri is found on local system at $path").as(uri -> hash) + case (uri, path, None) => + downloadAndHash[F](clients, blocker, uri, Paths.get(path)).map(hash => uri -> hash) + } + } + .map(_.toMap) + + def buildFromLocal[F[_]: Sync: ContextShift](blocker: Blocker, assets: List[Asset]): F[List[(URI, String, Option[Hash])]] = + assets.traverse { case (uri, path) => local[F](blocker, path).map(hash => (uri, path, hash)) } + + /** Check if file already exists */ + def local[F[_]: Sync: ContextShift](blocker: Blocker, path: String): F[Option[Hash]] = { + val fpath = Paths.get(path) + exists(blocker, fpath).ifM( + Hash.fromStream(readAll(fpath, blocker, 1024)).map(_.some), + Sync[F].pure(none) + ) + } + } + + /** Valid MD5 hash */ + final case class Hash private (s: String) extends AnyVal + + object Hash { + private[this] def fromBytes(bytes: Array[Byte]): Hash = { + val bi = new java.math.BigInteger(1, bytes) + Hash(String.format("%0" + (bytes.length << 1) + "x", bi)) + } + + def fromStream[F[_]: Sync](stream: Stream[F, Byte]): F[Hash] = + stream.through(md5).compile.to(Array).map(fromBytes) + } + + /** Pair of a tracked `URI` and destination path on local FS (`java.nio.file.Path` is not serializable) */ + type Asset = (URI, String) + + /** Initialise the [[updateStream]] with all necessary resources if refresh period is specified */ + def run[F[_]: ConcurrentEffect: ContextShift: Timer: Parallel](env: Environment[F]): Stream[F, Unit] = + env.assetsUpdatePeriod match { + case Some(duration) => + val init = for { + curDir <- getCurDir + _ <- Logger[F].info(s"Initializing assets refresh stream in $curDir, ticking every $duration") + assets <- env.enrichments.get.map(_.configs.flatMap(_.filesToCache)) + } yield updateStream[F](env.blocker, env.assetsState, env.enrichments, curDir, duration, assets) + Stream.eval(init).flatten + case None => + Stream.empty.covary[F] + } + + def getCurDir[F[_]: Sync]: F[Path] = + Sync[F].delay(Paths.get("").toAbsolutePath) + + /** + * At the end of every update, the stop signal will be resumed to `false` + * Create an update stream that ticks periodically and can invoke an update action, + * which will download an URI and check if it has been update. If it has the + * raw stream will be stopped via `stop` signal from [[Environment]] and assets updated + */ + def updateStream[F[_]: ConcurrentEffect: ContextShift: Parallel: Timer]( + blocker: Blocker, + state: State[F], + enrichments: Ref[F, Environment.Enrichments[F]], + curDir: Path, + duration: FiniteDuration, + assets: List[Asset] + ): Stream[F, Unit] = + Stream.fixedDelay[F](duration).evalMap { _ => + val log = Logger[F].debug(show"Checking remote assets: ${assets.map(_._1).mkString(", ")}") + val reinitialize: F[Unit] = + for { + // side-effecting get-set is inherently not thread-safe + // we need to be sure the state.stop is set to true + // before re-initializing enrichments + _ <- Logger[F].info("Resuming enrich stream") + old <- enrichments.get + _ <- Logger[F].info(show"Reinitializing enrichments: ${old.configs.map(_.schemaKey.name).mkString(", ")}") + fresh <- old.reinitialize + _ <- enrichments.set(fresh) + _ <- state.pauseEnrich.set(false) + } yield () + + val updated = downloadAndPause[F](blocker, state, curDir, assets) + log *> updated.ifM(reinitialize, Logger[F].debug("No assets have been updated since last check")) + } + + /** + * Download list of assets, return false if none has been downloaded + * It also can set `pauseEnrich` into `true` - a caller should make sure it's unpaused + */ + def downloadAndPause[F[_]: ConcurrentEffect: ContextShift: Timer]( + blocker: Blocker, + state: State[F], + dir: Path, + assets: List[Asset] + ): F[Boolean] = + assets + .traverse { + case (uri, path) => + update(blocker, state, dir, uri, Paths.get(path)) + } + .map(_.contains(true)) + + /** + * Update a file in current directory if it has been updated on remote storage + * If a new file has been discovered - stops the enriching streams (signal in `state`) + * Do nothing if file hasn't been updated + * + * Note: this function has a potential to be thread-unsafe if download time + * exceeds tick period. We assume that no two threads will be downloading the same URI + * + * @param blocker a thread pool to execute download/copy operations + * @param state a map of URI to MD5 hash to keep track latest state of remote files + * @param curDir a local FS destination for temporary files + * @param uri a remote file (S3, GCS or HTTP), the URI is used as an identificator + * @param path a static file name that enrich clients will access + * file itself is placed in current dir (`dir`) + * @return true if file has been updated + */ + def update[F[_]: ConcurrentEffect: ContextShift: Timer]( + blocker: Blocker, + state: State[F], + curDir: Path, + uri: URI, + path: Path + ): F[Boolean] = + tempFileResource[F](blocker, curDir).use { tmp => + // Set stop signal and replace old file with temporary + def stopAndCopy(hash: Hash, delete: Boolean): F[Unit] = + for { + _ <- Logger[F].info(s"An asset at $uri has been updated since last check, pausing the enrich stream to reinitialize") + _ <- state.pauseEnrich.set(true) + _ <- if (delete) { + val deleted = Logger[F].info(s"Deleted outdated asset $path") + val notDeleted = Logger[F].warn(s"Couldn't delete $path, file didn't exist") + deleteIfExists(blocker, path).ifM(deleted, notDeleted) + } else Sync[F].unit + _ <- copy(blocker, tmp, path) + _ <- state.files.update(_.updated(uri, hash)) + _ <- Logger[F].debug(s"Replaced $uri in Assets.State") + } yield () + + for { + hash <- downloadAndHash(state.clients, blocker, uri, tmp) + localFiles <- state.files.get + updated <- localFiles.get(uri) match { + case Some(known) if known == hash => + Sync[F].pure(false) + case Some(_) => + stopAndCopy(hash, true).as(true) + case None => + stopAndCopy(hash, false).as(true) + } + } yield updated + } + + def downloadAndHash[F[_]: Concurrent: ContextShift: Timer]( + clients: Clients[F], + blocker: Blocker, + uri: URI, + destination: Path + ): F[Hash] = { + val stream = clients.download(uri).observe(writeAll[F](destination, blocker)) + Logger[F].info(s"Downloading $uri") *> retryDownload(Hash.fromStream(stream)) + } + + def retryDownload[F[_]: Sync: Timer, A](download: F[A]): F[A] = + retryingOnSomeErrors[A](retryPolicy[F], worthRetrying, onError[F])(download) + + def retryPolicy[F[_]: Applicative]: RetryPolicy[F] = + RetryPolicies.fullJitter[F](1500.milliseconds).join(RetryPolicies.limitRetries[F](5)) + + def worthRetrying(e: Throwable): Boolean = + e match { + case _: Clients.DownloadingFailure => true + case _: IllegalArgumentException => false + case NonFatal(_) => false + } + + def onError[F[_]: Sync](error: Throwable, details: RetryDetails): F[Unit] = + if (details.givingUp) + Logger[F].error(show"Failed to download an asset after ${details.retriesSoFar}. ${error.getMessage}. Aborting the job") + else if (details.retriesSoFar == 0) + Logger[F].warn(show"Failed to download an asset. ${error.getMessage}. Keep retrying") + else + Logger[F].warn( + show"Failed to download an asset after ${details.retriesSoFar} retries, " + + show"waiting for ${details.cumulativeDelay.toMillis} ms. ${error.getMessage}. " + + show"Keep retrying" + ) +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Enrich.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Enrich.scala new file mode 100644 index 000000000..4983aca9c --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Enrich.scala @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.time.Instant +import java.util.Base64 +import java.util.concurrent.TimeUnit + +import org.joda.time.DateTime +import cats.data.{NonEmptyList, ValidatedNel} +import cats.implicits._ + +import cats.effect.{Blocker, Clock, Concurrent, ContextShift, Sync} + +import fs2.Stream + +import _root_.io.sentry.SentryClient +import _root_.io.circe.Json +import _root_.io.circe.syntax._ + +import _root_.io.chrisdavenport.log4cats.Logger +import _root_.io.chrisdavenport.log4cats.slf4j.Slf4jLogger +import com.snowplowanalytics.iglu.client.Client + +import com.snowplowanalytics.snowplow.badrows.{Processor, BadRow, Failure, Payload => BadRowPayload} +import com.snowplowanalytics.snowplow.enrich.common.EtlPipeline +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry +import com.snowplowanalytics.snowplow.enrich.common.loaders.{CollectorPayload, ThriftLoader} +import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry + +object Enrich { + + /** + * Parallelism of an enrich stream. + * Unlike for thread pools it doesn't make much sense to use `CPUs x 2` formulae + * as we're not sizing threads, but fibers and memory is the only cost of them + */ + val ConcurrencyLevel = 64 + + /** Default adapter registry, can be constructed dynamically in future */ + val adapterRegistry = new AdapterRegistry() + + val processor: Processor = Processor(generated.BuildInfo.name, generated.BuildInfo.version) + + private implicit def unsafeLogger[F[_]: Sync]: Logger[F] = + Slf4jLogger.getLogger[F] + + /** + * Run a primary enrichment stream, reading from [[Environment]] source, enriching + * via [[enrichWith]] and sinking into [[GoodSink]] and [[BadSink]] respectively. + * Can be stopped via _stop signal_ from [[Environment]] + * + * The stream won't download any enrichment DBs, it is responsibility of [[Assets]] + * [[Assets.State.make]] downloads assets for the first time unconditionally during + * [[Environment]] initialisation, then if `assetsUpdatePeriod` has been specified - + * they'll be refreshed periodically by [[Assets.updateStream]] + */ + def run[F[_]: Concurrent: ContextShift: Clock](env: Environment[F]): Stream[F, Unit] = { + val registry: F[EnrichmentRegistry[F]] = env.enrichments.get.map(_.registry) + val enrich: Enrich[F] = enrichWith[F](registry, env.blocker, env.igluClient, env.sentry, env.metrics.enrichLatency) + val badSink: BadSink[F] = _.evalTap(_ => env.metrics.badCount).through(env.bad) + val goodSink: GoodSink[F] = _.evalTap(_ => env.metrics.goodCount).through(env.good) + + env.source + .pauseWhen(env.pauseEnrich) + .evalTap(_ => env.metrics.rawCount) + .parEvalMapUnordered(ConcurrencyLevel)(enrich) + .flatMap(_.decompose[BadRow, EnrichedEvent]) + .observeEither(badSink, goodSink) + .void + } + + /** + * Enrich a single `CollectorPayload` to get list of bad rows and/or enriched events + * + * Along with actual `ack` the `enrichLatency` gauge will be updated + */ + def enrichWith[F[_]: Clock: Sync: ContextShift]( + enrichRegistry: F[EnrichmentRegistry[F]], + blocker: Blocker, + igluClient: Client[F, Json], + sentry: Option[SentryClient], + enrichLatency: Option[Long] => F[Unit] + )( + row: Payload[F, Array[Byte]] + ): F[Result[F]] = { + val payload = ThriftLoader.toCollectorPayload(row.data, processor) + val collectorTstamp = payload.toOption.flatMap(_.flatMap(_.context.timestamp).map(_.getMillis)) + + val result = + for { + _ <- Logger[F].debug(payloadToString(payload)) + etlTstamp <- Clock[F].realTime(TimeUnit.MILLISECONDS).map(millis => new DateTime(millis)) + registry <- enrichRegistry + enrich = EtlPipeline.processEvents[F](adapterRegistry, registry, igluClient, processor, etlTstamp, payload) + enriched <- blocker.blockOn(enrich) + trackLatency = enrichLatency(collectorTstamp) + } yield Payload(enriched, trackLatency *> row.finalise) + + result.handleErrorWith(sendToSentry[F](row, sentry)) + } + + /** Stringify `ThriftLoader` result for debugging purposes */ + def payloadToString(payload: ValidatedNel[BadRow.CPFormatViolation, Option[CollectorPayload]]): String = + payload.fold(_.asJson.noSpaces, _.map(_.toBadRowPayload.asJson.noSpaces).getOrElse("None")) + + private val EnrichedFields = + classOf[EnrichedEvent].getDeclaredFields + .filterNot(_.getName.equals("pii")) + .map { field => field.setAccessible(true); field } + .toList + + /** Transform enriched event into canonical TSV */ + def encodeEvent(enrichedEvent: EnrichedEvent): String = + EnrichedFields + .map { field => + val prop = field.get(enrichedEvent) + if (prop == null) "" else prop.toString + } + .mkString("\t") + + /** Log an error, turn the problematic `CollectorPayload` into `BadRow` and notify Sentry if configured */ + def sendToSentry[F[_]: Sync: Clock](original: Payload[F, Array[Byte]], sentry: Option[SentryClient])(error: Throwable): F[Result[F]] = + for { + _ <- Logger[F].error("Runtime exception during payload enrichment. CollectorPayload converted to generic_error and ack'ed") + now <- Clock[F].realTime(TimeUnit.MILLISECONDS).map(Instant.ofEpochMilli) + _ <- original.finalise + badRow = genericBadRow(original.data, now, error) + _ <- sentry match { + case Some(client) => + Sync[F].delay(client.sendException(error)) + case None => + Sync[F].unit + } + } yield Payload(List(badRow.invalid), Sync[F].unit) + + /** Build a `generic_error` bad row for unhandled runtime errors */ + def genericBadRow( + row: Array[Byte], + time: Instant, + error: Throwable + ): BadRow.GenericError = { + val base64 = new String(Base64.getEncoder.encode(row)) + val rawPayload = BadRowPayload.RawPayload(base64) + val failure = Failure.GenericFailure(time, NonEmptyList.one(error.toString)) + BadRow.GenericError(processor, failure, rawPayload) + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Environment.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Environment.scala new file mode 100644 index 000000000..1a08c1519 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Environment.scala @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import scala.concurrent.duration.FiniteDuration + +import cats.Show +import cats.data.EitherT +import cats.implicits._ + +import cats.effect.{Async, Blocker, Clock, Concurrent, ConcurrentEffect, ContextShift, Resource, Sync, Timer} +import cats.effect.concurrent.Ref + +import fs2.concurrent.SignallingRef + +import _root_.io.circe.Json +import _root_.io.circe.syntax._ + +import _root_.io.sentry.{Sentry, SentryClient} +import _root_.io.chrisdavenport.log4cats.Logger +import _root_.io.chrisdavenport.log4cats.slf4j.Slf4jLogger +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} +import com.snowplowanalytics.iglu.core.circe.implicits._ + +import com.snowplowanalytics.iglu.client.Client + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf +import com.snowplowanalytics.snowplow.enrich.fs2.config.{CliConfig, ConfigFile} +import com.snowplowanalytics.snowplow.enrich.fs2.io.{FileSystem, Metrics, Sinks, Source} + +/** + * All allocated resources, configs and mutable variables necessary for running Enrich process + * Also responsiblle for initial assets downloading (during `assetsState` initialisation) + * + * @param igluClient Iglu Client + * @param enrichments enrichment registry with all clients and parsed configuration files + * it's wrapped in mutable variable because all resources need to be + * reinitialized after DB assets are updated via [[Assets]] stream + * @param pauseEnrich a signalling reference that can pause a raw stream and enrichment, + * should be used only by [[Assets]] + * @param assetsState a main entity from [[Assets]] stream, controlling when assets + * have to be replaced with newer ones + * @param blocker thread pool for blocking operations and enrichments themselves + * @param source a stream of raw collector payloads + * @param good a sink for successfully enriched events + * @param bad a sink for events that failed validation or enrichment + * @param sentry optional sentry client + * @param metrics common counters + * @param assetsUpdatePeriod time after which enrich assets should be refresh + * @param metricsReportPeriod period after which metrics are updated + */ +final case class Environment[F[_]]( + igluClient: Client[F, Json], + enrichments: Ref[F, Environment.Enrichments[F]], + pauseEnrich: SignallingRef[F, Boolean], + assetsState: Assets.State[F], + blocker: Blocker, + source: RawSource[F], + good: GoodSink[F], + bad: BadSink[F], + sentry: Option[SentryClient], + metrics: Metrics[F], + assetsUpdatePeriod: Option[FiniteDuration], + metricsReportPeriod: Option[FiniteDuration] +) + +object Environment { + + private implicit def unsafeLogger[F[_]: Sync]: Logger[F] = + Slf4jLogger.getLogger[F] + + type Parsed[F[_], A] = EitherT[F, String, A] + + type Allocated[F[_]] = Parsed[F, Resource[F, Environment[F]]] + + /** Registry with all allocated clients (MaxMind, IAB etc) and their original configs */ + final case class Enrichments[F[_]](registry: EnrichmentRegistry[F], configs: List[EnrichmentConf]) { + + /** Initialize same enrichments, specified by configs (in case DB files updated) */ + def reinitialize(implicit A: Async[F]): F[Enrichments[F]] = + Enrichments.buildRegistry(configs).map(registry => Enrichments(registry, configs)) + } + + object Enrichments { + def make[F[_]: Async: Clock](configs: List[EnrichmentConf]): Resource[F, Ref[F, Enrichments[F]]] = + Resource.liftF { + for { + registry <- buildRegistry[F](configs) + ref <- Ref.of(Enrichments[F](registry, configs)) + } yield ref + } + + def buildRegistry[F[_]: Async](configs: List[EnrichmentConf]) = + EnrichmentRegistry.build[F](configs).value.flatMap { + case Right(reg) => Async[F].pure(reg) + case Left(error) => Async[F].raiseError[EnrichmentRegistry[F]](new RuntimeException(error)) + } + } + + /** Schema for all enrichments combined */ + val EnrichmentsKey: SchemaKey = + SchemaKey("com.snowplowanalytics.snowplow", "enrichments", "jsonschema", SchemaVer.Full(1, 0, 0)) + + /** Initialize and allocate all necessary resources */ + def make[F[_]: ConcurrentEffect: ContextShift: Clock: Timer](config: CliConfig): Allocated[F] = + parse[F](config).map { parsedConfigs => + val file = parsedConfigs.configFile + for { + client <- Client.parseDefault[F](parsedConfigs.igluJson).resource + blocker <- Blocker[F] + metrics <- Metrics.resource[F] + rawSource = Source.read[F](blocker, file.auth, file.input) + goodSink <- Sinks.goodSink[F](blocker, file.auth, file.good) + badSink <- Sinks.badSink[F](blocker, file.auth, file.bad) + assets = parsedConfigs.enrichmentConfigs.flatMap(_.filesToCache) + pauseEnrich <- makePause[F] + assets <- Assets.State.make[F](blocker, pauseEnrich, assets) + enrichments <- Enrichments.make[F](parsedConfigs.enrichmentConfigs) + sentry <- file.sentry.map(_.dsn) match { + case Some(dsn) => Resource.liftF[F, Option[SentryClient]](Sync[F].delay(Sentry.init(dsn.toString).some)) + case None => Resource.pure[F, Option[SentryClient]](none[SentryClient]) + } + _ <- Resource.liftF(pauseEnrich.set(false) *> Logger[F].info("Enrich environment initialized")) + } yield Environment[F](client, + enrichments, + pauseEnrich, + assets, + blocker, + rawSource, + goodSink, + badSink, + sentry, + metrics, + file.assetsUpdatePeriod, + file.metricsReportPeriod + ) + } + + /** + * Make sure `enrichPause` gets into paused state before destroying pipes + * Initialised into `true` because enrich stream should not start until + * [[Assets.State]] is constructed - it will download all assets + */ + def makePause[F[_]: Concurrent]: Resource[F, SignallingRef[F, Boolean]] = + Resource.make(SignallingRef(true))(_.set(true)) + + /** Decode base64-encoded configs, passed via CLI. Read files, validate and parse */ + def parse[F[_]: Async: Clock: ContextShift](config: CliConfig): Parsed[F, ParsedConfigs] = + for { + igluJson <- config.resolver.fold(b => EitherT.rightT[F, String](b.value), p => FileSystem.readJson[F](p)) + enrichmentJsons <- config.enrichments match { + case Left(base64) => + EitherT.rightT[F, String](base64.value) + case Right(path) => + FileSystem + .readJsonDir[F](path) + .map(jsons => Json.arr(jsons: _*)) + .map(json => SelfDescribingData(EnrichmentsKey, json).asJson) + } + configFile <- ConfigFile.parse[F](config.config) + client <- Client.parseDefault[F](igluJson).leftMap(x => show"Cannot decode Iglu Client. $x") + _ <- EitherT.liftF( + Logger[F].info(show"Parsed Iglu Client with following registries: ${client.resolver.repos.map(_.config.name).mkString(", ")}") + ) + configs <- EitherT(EnrichmentRegistry.parse[F](enrichmentJsons, client, false).map(_.toEither)).leftMap { x => + show"Cannot decode enrichments ${x.mkString_(", ")}" + } + _ <- EitherT.liftF(Logger[F].info(show"Parsed following enrichments: ${configs.map(_.schemaKey.name).mkString(", ")}")) + } yield ParsedConfigs(igluJson, configs, configFile) + + private[fs2] final case class ParsedConfigs( + igluJson: Json, + enrichmentConfigs: List[EnrichmentConf], + configFile: ConfigFile + ) + + private implicit class EitherTOps[F[_], E: Show, A](eitherT: EitherT[F, E, A]) { + def resource(implicit F: Sync[F]): Resource[F, A] = { + val action: F[A] = eitherT.value.flatMap { + case Right(a) => Sync[F].pure(a) + case Left(error) => Sync[F].raiseError(new RuntimeException(error.show)) // Safe since we already parsed it + } + Resource.liftF[F, A](action) + } + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Main.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Main.scala new file mode 100644 index 000000000..497192676 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Main.scala @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import cats.syntax.flatMap._ +import cats.effect.{ExitCode, IO, IOApp} + +import _root_.io.sentry.SentryClient + +import _root_.io.chrisdavenport.log4cats.Logger +import _root_.io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +import com.snowplowanalytics.snowplow.enrich.fs2.io.Metrics + +object Main extends IOApp { + + private implicit val logger: Logger[IO] = + Slf4jLogger.getLogger[IO] + + def run(args: List[String]): IO[ExitCode] = + config.CliConfig.command.parse(args) match { + case Right(cfg) => + for { + _ <- logger.info("Initialising resources for Enrich job") + environment <- Environment.make[IO](cfg).value + exit <- environment match { + case Right(e) => + e.use { env => + val log = logger.info("Running enrichment stream") + val enrich = Enrich.run[IO](env) + val updates = Assets.run[IO](env) + val reporting = Metrics.run[IO](env) + val flow = enrich.merge(updates).merge(reporting) + log >> flow.compile.drain.attempt.flatMap { + case Left(exception) => + unsafeSendSentry(exception, env.sentry) + IO.raiseError[ExitCode](exception).as(ExitCode.Error) + case Right(_) => + IO.pure(ExitCode.Success) + } + } + case Left(error) => + logger.error(s"Cannot initialise enrichment resources\n$error").as(ExitCode.Error) + } + } yield exit + case Left(error) => + IO(System.err.println(error)).as(ExitCode.Error) + } + + /** Last attempt to notify about an exception (possibly just interruption) */ + private def unsafeSendSentry(error: Throwable, sentry: Option[SentryClient]): Unit = { + sentry match { + case Some(client) => + client.sendException(error) + case None => () + } + logger.error(s"The Enrich job has stopped ${sentry.fold("")(_ => "Sentry report has been sent")}").unsafeRunSync() + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Payload.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Payload.scala new file mode 100644 index 000000000..10bd2b725 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/Payload.scala @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import scala.annotation.tailrec + +import cats.Applicative +import cats.syntax.either._ +import cats.data.Validated + +import fs2.{Pure, Stream} + +import com.snowplowanalytics.snowplow.enrich.fs2.Payload.Parsed + +/** + * Anything that has been read from [[RawSource]] and needs to be acknowledged + * or a derivative (parsed `A`) that can be used to acknowledge the original message + * @param data original data or anything it has been transformed to + * @param finalise a side-effect to acknowledge (commit, log on-finish) the message or + * no-op in case the original message has been flattened into + * multiple rows and only last row contains the actual side-effect + */ +case class Payload[F[_], A](data: A, finalise: F[Unit]) { + + /** + * Flatten all payloads from a list and replace an `ack` action to no-op everywhere + * except last message, so that original collector payload (with multiple events) + * will be ack'ed only when last event has sunk into good or bad sink + */ + def decompose[L, R](implicit ev: A <:< List[Validated[L, R]], F: Applicative[F]): Stream[F, Parsed[F, L, R]] = { + val _ = ev + val noop: F[Unit] = Applicative[F].unit + def use(op: F[Unit])(v: Validated[L, R]): Parsed[F, L, R] = + v.fold(a => Payload(a, op).asLeft, b => Payload(b, op).asRight) + + Payload.mapWithLast(use(noop), use(finalise))(data) + } +} + +object Payload { + + /** + * Original [[Payload]] that has been transformed into either `A` or `B` + * Despite of the result (`A` or `B`) the original one still has to be acknowledged + * + * If original contained only one row (good or bad), the `Parsed` must have a real + * `ack` action, otherwise if it has been accompanied by other rows, only the last + * element from the original will contain the `ack`, all others just `noop` + */ + type Parsed[F[_], A, B] = Either[Payload[F, A], Payload[F, B]] + + /** Apply `f` function to all elements in a list, except last one, where `lastF` applied */ + def mapWithLast[A, B](f: A => B, lastF: A => B)(as: List[A]): Stream[Pure, B] = { + @tailrec + def go(aas: List[A], accum: Vector[B]): Vector[B] = + aas match { + case Nil => + accum + case last :: Nil => + accum :+ lastF(last) + case a :: remaining => + go(remaining, accum :+ f(a)) + } + + Stream.emits(go(as, Vector.empty)) + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Hocon.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Hocon.scala new file mode 100644 index 000000000..e40037ba3 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Hocon.scala @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.util.Base64 + +import cats.data.ValidatedNel +import cats.syntax.either._ + +import com.typesafe.config.{ConfigException, ConfigFactory} + +import _root_.io.circe.Json + +import pureconfig.syntax._ +import pureconfig.module.circe._ + +import com.monovore.decline.Argument + +final case class Base64Hocon(value: Json) extends AnyVal + +object Base64Hocon { + + private val base64 = Base64.getDecoder + + implicit val base64Hocon: Argument[Base64Hocon] = + new Argument[Base64Hocon] { + def read(string: String): ValidatedNel[String, Base64Hocon] = { + val result = for { + bytes <- Either.catchOnly[IllegalArgumentException](base64.decode(string)).leftMap(_.getMessage) + hocon <- parseHocon(new String(bytes)) + } yield hocon + result.toValidatedNel + } + + def defaultMetavar: String = "base64" + } + + def parseHocon(str: String): Either[String, Base64Hocon] = + for { + configValue <- Either.catchOnly[ConfigException](ConfigFactory.parseString(str)).leftMap(_.toString).map(_.toConfig) + json <- configValue.to[Json].leftMap(_.toString) + } yield Base64Hocon(json) +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Json.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Json.scala new file mode 100644 index 000000000..ab8fc4879 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64Json.scala @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.util.Base64 + +import cats.data.ValidatedNel +import cats.syntax.show._ +import cats.syntax.either._ + +import _root_.io.circe.Json +import _root_.io.circe.parser.parse + +import com.monovore.decline.Argument + +final case class Base64Json(value: Json) extends AnyVal + +object Base64Json { + + private val base64 = Base64.getDecoder + + implicit val base64Json: Argument[Base64Json] = + new Argument[Base64Json] { + + def read(string: String): ValidatedNel[String, Base64Json] = { + val result = for { + bytes <- Either.catchOnly[IllegalArgumentException](base64.decode(string)).leftMap(_.getMessage) + str = new String(bytes) + json <- parse(str).leftMap(_.show) + } yield Base64Json(json) + result.toValidatedNel + } + + def defaultMetavar: String = "base64" + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfig.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfig.scala new file mode 100644 index 000000000..d15a00dab --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfig.scala @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.nio.file.Path + +import cats.data.{NonEmptyList, ValidatedNel} +import cats.implicits._ + +import com.monovore.decline.{Argument, Command, Opts} + +import com.snowplowanalytics.snowplow.enrich.fs2.generated.BuildInfo + +final case class CliConfig( + config: EncodedHoconOrPath, + resolver: EncodedOrPath, + enrichments: EncodedOrPath +) + +object CliConfig { + + implicit val encodedOrPathArgument: Argument[EncodedOrPath] = + new Argument[EncodedOrPath] { + def read(string: String): ValidatedNel[String, EncodedOrPath] = { + val encoded = Argument[Base64Json].read(string).map(_.asLeft) + val path = Argument[Path].read(string).map(_.asRight) + val error = show"Value $string cannot be parsed as Base64 JSON neither as FS path" + encoded.orElse(path).leftMap(_ => NonEmptyList.one(error)) + } + + def defaultMetavar: String = "input" + } + + implicit val encodedHoconOrPathArgument: Argument[EncodedHoconOrPath] = + new Argument[EncodedHoconOrPath] { + def read(string: String): ValidatedNel[String, EncodedHoconOrPath] = { + val encoded = Argument[Base64Hocon].read(string).map(_.asLeft) + val path = Argument[Path].read(string).map(_.asRight) + val error = show"Value $string cannot be parsed as Base64 JSON neither as FS path" + encoded.orElse(path).leftMap(_ => NonEmptyList.one(error)) + } + + def defaultMetavar: String = "input" + } + + val configFile: Opts[EncodedHoconOrPath] = + Opts.option[EncodedHoconOrPath]("config", "Base64-encoded HOCON string with enrichment configurations", "c", "base64") + + val enrichments: Opts[EncodedOrPath] = + Opts.option[EncodedOrPath]("enrichments", "Base64-encoded JSON string with enrichment configurations", "e", "base64") + + val igluConfig: Opts[EncodedOrPath] = + Opts.option[EncodedOrPath]("iglu-config", "Iglu resolver configuration JSON", "r", "base64") + + val enrichedJobConfig: Opts[CliConfig] = + (configFile, igluConfig, enrichments).mapN(CliConfig.apply) + + val command: Command[CliConfig] = + Command(show"${BuildInfo.name}", show"${BuildInfo.name} ${BuildInfo.version}\n${BuildInfo.description}")(enrichedJobConfig) +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFile.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFile.scala new file mode 100644 index 000000000..21ba8d8e6 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFile.scala @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import scala.concurrent.duration.FiniteDuration + +import cats.data.EitherT +import cats.implicits._ + +import cats.effect.{Blocker, ContextShift, Sync} + +import _root_.io.circe.{Decoder, Encoder, Json} +import _root_.io.circe.config.syntax._ +import _root_.io.circe.generic.extras.semiauto.{deriveConfiguredDecoder, deriveConfiguredEncoder} + +import com.snowplowanalytics.snowplow.enrich.fs2.config.io.{Authentication, Input, Output} + +import pureconfig.ConfigSource +import pureconfig.module.catseffect.syntax._ +import pureconfig.module.circe._ + +/** + * Parsed HOCON configuration file + * + * @param auth authentication details, such as credentials + * @param input input (PubSub, Kinesis etc) + * @param good good enriched output (PubSub, Kinesis, FS etc) + * @param bad bad rows output (PubSub, Kinesis, FS etc) + * @param assetsUpdatePeriod time after which assets should be updated, in minutes + */ +final case class ConfigFile( + auth: Authentication, + input: Input, + good: Output, + bad: Output, + assetsUpdatePeriod: Option[FiniteDuration], + sentry: Option[Sentry], + metricsReportPeriod: Option[FiniteDuration] +) + +object ConfigFile { + + // Missing in circe-config + implicit val finiteDurationEncoder: Encoder[FiniteDuration] = + implicitly[Encoder[String]].contramap(_.toString) + + implicit val configFileDecoder: Decoder[ConfigFile] = + deriveConfiguredDecoder[ConfigFile].emap { + case ConfigFile(_, _, _, _, Some(aup), _, _) if aup._1 <= 0L => + "assetsUpdatePeriod in config file cannot be less than 0".asLeft // TODO: use newtype + case ConfigFile(_, _, _, _, _, _, Some(mrp)) if mrp._1 <= 0L => + "metricsReportPeriod in config file cannot be less than 0".asLeft + case other => other.asRight + } + implicit val configFileEncoder: Encoder[ConfigFile] = + deriveConfiguredEncoder[ConfigFile] + + def parse[F[_]: Sync: ContextShift](in: EncodedHoconOrPath): EitherT[F, String, ConfigFile] = + in match { + case Right(path) => + val result = Blocker[F].use { blocker => + ConfigSource + .default(ConfigSource.file(path)) + .loadF[F, Json](blocker) + .map(_.as[ConfigFile].leftMap(f => show"Couldn't parse the config $f")) + } + result.attemptT.leftMap(_.getMessage).subflatMap(identity) + case Left(encoded) => + EitherT.fromEither[F](encoded.value.as[ConfigFile].leftMap(failure => show"Couldn't parse a base64-encoded config file:\n$failure")) + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Sentry.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Sentry.scala new file mode 100644 index 000000000..3dbe4e6fc --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Sentry.scala @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.net.URI + +import cats.syntax.either._ + +import _root_.io.circe.{Decoder, Encoder} +import _root_.io.circe.generic.extras.semiauto._ + +case class Sentry(dsn: URI) + +object Sentry { + + implicit val javaNetUriDecoder: Decoder[URI] = + Decoder[String].emap { str => + Either.catchOnly[IllegalArgumentException](URI.create(str)).leftMap(_.getMessage) + } + + implicit val javaNetUriEncoder: Encoder[URI] = + Encoder[String].contramap(_.toString) + + implicit val authenticationDecoder: Decoder[Sentry] = + deriveConfiguredDecoder[Sentry] + implicit val authenticationEncoder: Encoder[Sentry] = + deriveConfiguredEncoder[Sentry] +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/io.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/io.scala new file mode 100644 index 000000000..7cb28bcb6 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/io.scala @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.nio.file.{InvalidPathException, Path, Paths} + +import cats.syntax.either._ + +import _root_.io.circe.{Decoder, Encoder} +import _root_.io.circe.generic.extras.semiauto._ + +object io { + + implicit val javaPathDecoder: Decoder[Path] = + Decoder[String].emap { s => + Either.catchOnly[InvalidPathException](Paths.get(s)).leftMap(_.getMessage) + } + implicit val javaPathEncoder: Encoder[Path] = + Encoder[String].contramap(_.toString) + + sealed trait Authentication extends Product with Serializable + + object Authentication { + case object Gcp extends Authentication + + implicit val authenticationDecoder: Decoder[Authentication] = + deriveConfiguredDecoder[Authentication] + implicit val authenticationEncoder: Encoder[Authentication] = + deriveConfiguredEncoder[Authentication] + } + + /** Source of raw collector data (only PubSub supported atm) */ + sealed trait Input + + object Input { + + case class PubSub private (subscription: String) extends Input { + val (project, name) = + subscription.split("/").toList match { + case List("projects", project, "subscriptions", name) => + (project, name) + case _ => + throw new IllegalArgumentException(s"Cannot construct Input.PubSub from $subscription") + } + } + case class FileSystem(dir: Path) extends Input + + implicit val inputDecoder: Decoder[Input] = + deriveConfiguredDecoder[Input].emap { + case s @ PubSub(sub) => + sub.split("/").toList match { + case List("projects", _, "subscriptions", _) => + s.asRight + case _ => + s"Subscription must conform projects/project-name/subscriptions/subscription-name format, $s given".asLeft + } + case other => other.asRight + } + implicit val inputEncoder: Encoder[Input] = + deriveConfiguredEncoder[Input] + } + + sealed trait Output + + object Output { + case class PubSub private (topic: String) extends Output { + val (project, name) = + topic.split("/").toList match { + case List("projects", project, "topics", name) => + (project, name) + case _ => + throw new IllegalArgumentException(s"Cannot construct Output.PubSub from $topic") + } + } + case class FileSystem(dir: Path) extends Output + + implicit val outputDecoder: Decoder[Output] = + deriveConfiguredDecoder[Output].emap { + case s @ PubSub(top) => + top.split("/").toList match { + case List("projects", _, "topics", _) => + s.asRight + case _ => + s"Topic must conform projects/project-name/topics/topic-name format, $s given".asLeft + } + case other => other.asRight + } + implicit val outputEncoder: Encoder[Output] = + deriveConfiguredEncoder[Output] + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/package.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/package.scala new file mode 100644 index 000000000..543b9214b --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/package.scala @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.nio.file.Path + +import _root_.io.circe.generic.extras.Configuration + +package object config { + + type EncodedOrPath = Either[Base64Json, Path] + type EncodedHoconOrPath = Either[Base64Hocon, Path] + + private[config] implicit def customCodecConfig: Configuration = + Configuration.default.withDiscriminator("type") + +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Clients.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Clients.scala new file mode 100644 index 000000000..1496217ce --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Clients.scala @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.io + +import java.net.URI + +import cats.syntax.option._ +import cats.syntax.functor._ +import cats.syntax.flatMap._ + +import cats.effect.{Blocker, ConcurrentEffect, ContextShift, Resource, Sync} + +import fs2.{RaiseThrowable, Stream} + +import blobstore.Path +import blobstore.s3.S3Store +import blobstore.gcs.GcsStore + +import com.google.cloud.storage.StorageOptions + +import org.http4s.{Request, Uri} +import org.http4s.client.{Client => HttpClient} +import org.http4s.client.blaze.BlazeClientBuilder + +import software.amazon.awssdk.services.s3.S3AsyncClient + +case class Clients[F[_]]( + s3Store: Option[S3Store[F]], + gcsStore: Option[GcsStore[F]], + http: Option[HttpClient[F]] +) { + + /** Download an `uri` as a stream of bytes, using the appropriate client */ + def download(uri: URI)(implicit RT: RaiseThrowable[F]): Stream[F, Byte] = + Clients.Client.getByUri(uri) match { + case Some(Clients.Client.S3) => + for { + s3 <- s3Store match { + case Some(c) => Stream.emit(c) + case None => Stream.raiseError(new IllegalStateException(s"S3 client is not initialized to download $uri")) + } + data <- s3.get(Path(uri.toString), 16 * 1024) + } yield data + case Some(Clients.Client.GCS) => + for { + gcs <- gcsStore match { + case Some(c) => Stream.emit(c) + case None => Stream.raiseError(new IllegalStateException(s"GCS client is not initialized to download $uri")) + } + data <- gcs.get(Path(uri.toString), 16 * 1024) + } yield data + case Some(Clients.Client.HTTP) => + http match { + case Some(c) => + val request = Request[F](uri = Uri.unsafeFromString(uri.toString)) + for { + response <- c.stream(request) + body <- if (response.status.isSuccess) response.body + else Stream.raiseError[F](Clients.DownloadingFailure(uri)) + } yield body + case None => + Stream.raiseError(new IllegalStateException(s"HTTP client is not initialized to download $uri")) + } + case None => + Stream.raiseError(new IllegalStateException(s"No client initialized to download $uri")) + } +} + +object Clients { + + sealed trait Client + object Client { + case object S3 extends Client + case object GCS extends Client + case object HTTP extends Client + + def getByUri(uri: URI): Option[Client] = + uri.getScheme match { + case "http" | "https" => + Some(HTTP) + case "gs" => + Some(GCS) + case "s3" => + Some(S3) + case _ => + None + } + + def required(uris: List[URI]): Set[Client] = + uris.foldLeft(Set.empty[Client]) { (acc, uri) => + getByUri(uri) match { + case Some(client) => acc + client + case None => acc // This should short-circuit on initialisation + } + } + } + + def mkS3[F[_]: ConcurrentEffect]: F[S3Store[F]] = + Sync[F].delay(S3AsyncClient.builder().build()).flatMap(client => S3Store[F](client)) + + def mkGCS[F[_]: ConcurrentEffect: ContextShift](blocker: Blocker): F[GcsStore[F]] = + Sync[F].delay(StorageOptions.getDefaultInstance.getService).map { storage => + GcsStore(storage, blocker, List.empty) + } + + def mkHTTP[F[_]: ConcurrentEffect]: Resource[F, HttpClient[F]] = + BlazeClientBuilder[F](concurrent.ExecutionContext.global).resource + + /** Initialise all necessary clients capable of fetching provides `uris` */ + def make[F[_]: ConcurrentEffect: ContextShift](blocker: Blocker, uris: List[URI]): Resource[F, Clients[F]] = { + val toInit = Client.required(uris) + for { + s3 <- if (toInit.contains(Client.S3)) Resource.liftF(mkS3[F]).map(_.some) else Resource.pure[F, Option[S3Store[F]]](none) + gcs <- if (toInit.contains(Client.GCS)) Resource.liftF(mkGCS[F](blocker).map(_.some)) else Resource.pure[F, Option[GcsStore[F]]](none) + http <- if (toInit.contains(Client.HTTP)) mkHTTP[F].map(_.some) else Resource.pure[F, Option[HttpClient[F]]](none) + } yield Clients(s3, gcs, http) + } + + case class DownloadingFailure(uri: URI) extends Throwable { + override def getMessage: String = s"Cannot download $uri" + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/FileSystem.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/FileSystem.scala new file mode 100644 index 000000000..58a80b3aa --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/FileSystem.scala @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.io + +import java.nio.file.{Files, Path} + +import scala.collection.JavaConverters._ + +import cats.data.EitherT + +import cats.effect.Sync +import cats.implicits._ + +import fs2.Stream + +import _root_.io.circe.Json +import _root_.io.circe.parser.parse + +import io.chrisdavenport.log4cats.Logger +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +object FileSystem { + + private implicit def unsafeLogger[F[_]: Sync]: Logger[F] = + Slf4jLogger.getLogger[F] + + def list[F[_]: Sync](dir: Path): Stream[F, Path] = + for { + paths <- Stream.eval(Sync[F].delay(Files.list(dir))) + path <- Stream.fromIterator(paths.iterator().asScala) + } yield path + + def readJson[F[_]: Sync](path: Path): EitherT[F, String, Json] = + Sync[F] + .delay[String](Files.readString(path)) + .attemptT + .leftMap(e => show"Error reading ${path.toAbsolutePath.toString} JSON file from filesystem: ${e.getMessage}") + .subflatMap(str => parse(str).leftMap(e => show"Cannot parse JSON in ${path.toAbsolutePath.toString}: ${e.getMessage()}")) + + def readJsonDir[F[_]: Sync](dir: Path): EitherT[F, String, List[Json]] = + list(dir).compile.toList.attemptT + .leftMap(e => show"Cannot list ${dir.toAbsolutePath.toString} directory with JSON: ${e.getMessage}") + .flatMap { paths => + EitherT.liftF[F, String, Unit](Logger[F].info(s"Files found in $dir: ${paths.mkString(", ")}")) *> + paths.filter(_.toString.endsWith(".json")).traverse(readJson[F]) + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Metrics.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Metrics.scala new file mode 100644 index 000000000..18f189858 --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Metrics.scala @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.io + +import cats.syntax.applicativeError._ +import cats.effect.{Resource, Sync, Timer} + +import fs2.Stream + +import com.codahale.metrics.{Gauge, MetricRegistry, Slf4jReporter} + +import org.slf4j.LoggerFactory + +import com.snowplowanalytics.snowplow.enrich.fs2.Environment + +trait Metrics[F[_]] { + + /** Send latest metrics to reporter */ + def report: F[Unit] + + /** + * Track latency between collector hit and enrichment + * This function gets current timestamp by itself + */ + def enrichLatency(collectorTstamp: Option[Long]): F[Unit] + + /** Increment raw payload count */ + def rawCount: F[Unit] + + /** Increment good enriched events */ + def goodCount: F[Unit] + + /** Increment bad events */ + def badCount: F[Unit] +} + +object Metrics { + + val LoggerName = "enrich.metrics" + val LatencyGaugeName = "enrich.metrics.latency" + val RawCounterName = "enrich.metrics.raw.count" + val GoodCounterName = "enrich.metrics.good.count" + val BadCounterName = "enrich.metrics.bad.count" + + def run[F[_]: Sync: Timer](env: Environment[F]): Stream[F, Unit] = + env.metricsReportPeriod match { + case Some(period) => + Stream.fixedRate[F](period).evalMap(_ => env.metrics.report) + case None => + Stream.empty.covary[F] + } + + /** + * Technically `Resource` doesn't give us much as we don't allocate a thread pool, + * but it will make sure the last report is issued + */ + def resource[F[_]: Sync]: Resource[F, Metrics[F]] = + Resource + .make(init) { case (res, _) => Sync[F].delay(res.close()) } + .map { case (res, reg) => make[F](res, reg) } + + /** Initialise backend resources */ + def init[F[_]: Sync]: F[(Slf4jReporter, MetricRegistry)] = + Sync[F].delay { + val registry = new MetricRegistry() + val logger = LoggerFactory.getLogger(LoggerName) + val reporter = Slf4jReporter.forRegistry(registry).outputTo(logger).build() + (reporter, registry) + } + + def make[F[_]: Sync](reporter: Slf4jReporter, registry: MetricRegistry): Metrics[F] = + new Metrics[F] { + val rawCounter = registry.counter(RawCounterName) + val goodCounter = registry.counter(GoodCounterName) + val badCounter = registry.counter(BadCounterName) + + def report: F[Unit] = + Sync[F].delay(reporter.report()) + + def enrichLatency(collectorTstamp: Option[Long]): F[Unit] = + collectorTstamp match { + case Some(tstamp) => + Sync[F] + .delay { + registry.remove(LatencyGaugeName) + val now = System.currentTimeMillis() + val _ = registry.register(LatencyGaugeName, getGauge(now, tstamp)) + } + .handleError { + // Two threads can run into a race condition registering a gauge + case _: IllegalArgumentException => () + } + case None => + Sync[F].unit + } + + def rawCount: F[Unit] = + Sync[F].delay(rawCounter.inc()) + + def goodCount: F[Unit] = + Sync[F].delay(goodCounter.inc()) + + def badCount: F[Unit] = + Sync[F].delay(badCounter.inc()) + + private def getGauge(now: Long, collectorTstamp: Long): Gauge[Long] = + new Gauge[Long] { + def getValue: Long = now - collectorTstamp + } + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Sinks.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Sinks.scala new file mode 100644 index 000000000..230c9dd6d --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Sinks.scala @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.io + +import java.nio.file.{Path, StandardOpenOption} + +import scala.concurrent.duration._ + +import cats.syntax.flatMap._ +import cats.syntax.functor._ + +import cats.effect.{Blocker, Concurrent, ContextShift, Resource, Sync} + +import fs2.{Pipe, Stream, text} +import fs2.io.file.writeAll + +import io.chrisdavenport.log4cats.Logger +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +import com.permutive.pubsub.producer.Model.{ProjectId, Topic} +import com.permutive.pubsub.producer.encoder.MessageEncoder +import com.permutive.pubsub.producer.grpc.{GooglePubsubProducer, PubsubProducerConfig} + +import com.snowplowanalytics.snowplow.badrows.BadRow + +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + +import com.snowplowanalytics.snowplow.enrich.fs2.{BadSink, Enrich, GoodSink, Payload} +import com.snowplowanalytics.snowplow.enrich.fs2.config.io.{Authentication, Output} + +object Sinks { + + /** + * Set the delay threshold to use for batching. After this amount of time has elapsed (counting + * from the first element added), the elements will be wrapped up in a batch and sent. This + * value should not be set too high, usually on the order of milliseconds. Otherwise, calls + * might appear to never complete. + */ + val DelayThreshold: FiniteDuration = 200.milliseconds + + private implicit def unsafeLogger[F[_]: Sync]: Logger[F] = + Slf4jLogger.getLogger[F] + + def goodSink[F[_]: Concurrent: ContextShift]( + blocker: Blocker, + auth: Authentication, + output: Output + ): Resource[F, GoodSink[F]] = + (auth, output) match { + case (Authentication.Gcp, o: Output.PubSub) => + pubsubSink[F, EnrichedEvent](o) + case (_, o: Output.FileSystem) => + Resource.pure(goodFileSink(o.dir, blocker)) + } + + def badSink[F[_]: Concurrent: ContextShift]( + blocker: Blocker, + auth: Authentication, + output: Output + ): Resource[F, BadSink[F]] = + (auth, output) match { + case (Authentication.Gcp, o: Output.PubSub) => + pubsubSink[F, BadRow](o) + case (_, o: Output.FileSystem) => + Resource.pure(badFileSink(o.dir, blocker)) + } + + def pubsubSink[F[_]: Concurrent, A: MessageEncoder]( + output: Output.PubSub + ): Resource[F, Pipe[F, Payload[F, A], Unit]] = { + val config = PubsubProducerConfig[F]( + batchSize = 5, + delayThreshold = DelayThreshold, + onFailedTerminate = err => Logger[F].error(err)("PubSub sink termination error") + ) + + GooglePubsubProducer + .of[F, A](ProjectId(output.project), Topic(output.name), config) + .map(producer => + (s: Stream[F, Payload[F, A]]) => s.parEvalMapUnordered(Enrich.ConcurrencyLevel)(row => producer.produce(row.data) >> row.finalise) + ) + } + + def goodFileSink[F[_]: Sync: ContextShift](goodOut: Path, blocker: Blocker): GoodSink[F] = + goodStream => + goodStream + .evalMap(p => p.finalise.as(Enrich.encodeEvent(p.data))) + .intersperse("\n") + .through(text.utf8Encode) + .through(writeAll[F](goodOut, blocker, List(StandardOpenOption.CREATE_NEW))) + + def badFileSink[F[_]: Sync: ContextShift](badOut: Path, blocker: Blocker): BadSink[F] = + badStream => + badStream + .evalMap(p => p.finalise.as(p.data.compact)) + .intersperse("\n") + .through(text.utf8Encode) + .through(writeAll[F](badOut, blocker, List(StandardOpenOption.CREATE_NEW))) +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Source.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Source.scala new file mode 100644 index 000000000..ce9b734bb --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/Source.scala @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2019 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.io + +import cats.effect.{Blocker, Concurrent, ContextShift, Sync} +import cats.implicits._ + +import fs2.Stream +import fs2.io.file.{directoryStream, readAll} + +import com.permutive.pubsub.consumer.Model +import com.permutive.pubsub.consumer.grpc.{PubsubGoogleConsumer, PubsubGoogleConsumerConfig} + +import com.snowplowanalytics.snowplow.enrich.fs2.{Payload, RawSource} +import com.snowplowanalytics.snowplow.enrich.fs2.config.io.{Authentication, Input} + +import com.google.pubsub.v1.PubsubMessage + +object Source { + + def read[F[_]: Concurrent: ContextShift]( + blocker: Blocker, + auth: Authentication, + input: Input + ): RawSource[F] = + (auth, input) match { + case (Authentication.Gcp, p: Input.PubSub) => + pubSub(blocker, p) + case (_, p: Input.FileSystem) => + directoryStream(blocker, p.dir).evalMap { file => + readAll[F](file, blocker, 4096).compile + .to(Array) + .map(bytes => Payload(bytes, Sync[F].unit)) + } + } + + def pubSub[F[_]: Concurrent: ContextShift]( + blocker: Blocker, + input: Input.PubSub + ): Stream[F, Payload[F, Array[Byte]]] = { + val onFailedTerminate: Throwable => F[Unit] = + e => Sync[F].delay(System.err.println(s"Cannot terminate ${e.getMessage}")) + val pubSubConfig = PubsubGoogleConsumerConfig(onFailedTerminate = onFailedTerminate) + val projectId = Model.ProjectId(input.project) + val subscriptionId = Model.Subscription(input.name) + val errorHandler: (PubsubMessage, Throwable, F[Unit], F[Unit]) => F[Unit] = // Should be useless + (message, error, _, _) => + Sync[F].delay(System.err.println(s"Cannot decode message ${message.getMessageId} into array of bytes. ${error.getMessage}")) + PubsubGoogleConsumer + .subscribe[F, Array[Byte]](blocker, projectId, subscriptionId, errorHandler, pubSubConfig) + .map(record => Payload(record.value, record.ack)) + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/package.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/package.scala new file mode 100644 index 000000000..9ad3e5eca --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/io/package.scala @@ -0,0 +1,31 @@ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import cats.syntax.either._ + +import com.permutive.pubsub.consumer.decoder.MessageDecoder +import com.permutive.pubsub.producer.encoder.MessageEncoder + +import com.snowplowanalytics.snowplow.badrows.BadRow + +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + +package object io { + + implicit val badRowEncoder: MessageEncoder[BadRow] = + new MessageEncoder[BadRow] { + def encode(a: BadRow): Either[Throwable, Array[Byte]] = + a.compact.getBytes.asRight + } + + implicit val enrichedEventEncoder: MessageEncoder[EnrichedEvent] = + new MessageEncoder[EnrichedEvent] { + def encode(enrichedEvent: EnrichedEvent): Either[Throwable, Array[Byte]] = + Enrich.encodeEvent(enrichedEvent).getBytes.asRight + } + + implicit val byteArrayMessageDecoder: MessageDecoder[Array[Byte]] = + new MessageDecoder[Array[Byte]] { + def decode(message: Array[Byte]): Either[Throwable, Array[Byte]] = + message.asRight + } +} diff --git a/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/package.scala b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/package.scala new file mode 100644 index 000000000..57d85526b --- /dev/null +++ b/modules/fs2/src/main/scala/com/snowplowanalytics/snowplow/enrich/fs2/package.scala @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich + +import cats.data.Validated + +import _root_.fs2.{Pipe, Stream} + +import com.snowplowanalytics.snowplow.badrows.BadRow + +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + +package object fs2 { + + /** Raw Thrift payloads coming from a collector */ + type RawSource[F[_]] = Stream[F, Payload[F, Array[Byte]]] + + type BadSink[F[_]] = Pipe[F, Payload[F, BadRow], Unit] + type GoodSink[F[_]] = Pipe[F, Payload[F, EnrichedEvent], Unit] + + /** Enrichment result, containing list of (valid and invalid) results */ + type Result[F[_]] = Payload[F, List[Validated[BadRow, EnrichedEvent]]] + + /** Function to transform an origin raw payload into good and/or bad rows */ + type Enrich[F[_]] = Payload[F, Array[Byte]] => F[Result[F]] +} diff --git a/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-1.mmdb b/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-1.mmdb new file mode 100644 index 0000000000000000000000000000000000000000..902c8d78dc88be6dde0c5633cf01caf60476c7b2 GIT binary patch literal 1462 zcmZ9JS#wlH7=~X$AjqZ&BAa9s7Zd>ro3ba9m{|x!G9asmo^v`g4Kv-tSppN;+;Cq} zQ9(%xFRbE5egef67ZjIXvv|Q3`~-U5p=06H)KgEtU-#F`0mcE(0xiH4U_3Aaw2~8m zNx(#U8)-}S|Ah0&PHZ?8mjDobAfp-oDVDj zUIw0X$wEpynIfMjUmzEeFOn~jcE?wM*MY?xzDmAEE^!l@r&34O|3z=^5ZFAPWouIVO2> zD`0>=KmgmAY$yB4cgP*&09hb+lDo)v$=&20axZWY*hk+_9w6T%J+k>|O%`3^QvyIz z2FVgxChdGhIRqT0RLH8UY9}=(M}W|I1jO{jCCvjI1&%SPlf&fuz$wac-~{C)Id<~X zzz0k|boob=kAY8Ia)$CL@EPTEmwy3#>D)H_+W9w>Z-MV9-;+O(XUQMQpU9udbL20i zZS^bVH}X7if$}@~2RU|rE06#F?edqeWGED!A4@_s^b&N4L79&Zf&km;7>~ zv%b->(o)BFx_uMKuFio}+chs-lA7I^*xeZ^(d(N;$6@`ht<1l2v!EhRX-_7hsY#)| zL@T4wmQj)M*do2Hxwt1?R$62##uu@ja(BD%{Uj8A-3vval4vx@RBRVp#UT@VLo#gi8g2Q1 zRq6k#Bl%*YDuE|!#xJ+Fc$FJd`vMV*qKKpy*K0E0D^0e)V`M~$Wfw9@%q$5+GawM)&~r{_rg5fwI7?vS!lL57 zvbbwfdO;O8`V**Jc|mc>H3Ao2=}(~N9Xb?DO+EGW`*nZ49AFyoD9{4T0v-UyfmU)l z@E|aQ-bUJz{XgVODyU9J|o8(?{AGsem3=Go`kO#>_q(?R%t;wQGd`bXF z$_QB^%cPyJC`W*|C>65ms@h4-$x$G59sw~uaY^$4$AIHZ>f{*tHgJOS4)8AJJ#y;g zCxKH;PP_bl$_K!QE;&Q_2>6)tiOW9)K67pxe&PH}%2&YGlyAsy$+P4+@;rHg{Eqyd zw5@)i{7C);{7m_U{FR(Kzm4mFOiyG5Y_Bouz#3q_!kXd=i|Y$Z+tS>Dn6|L@mk zZabQ-7+ty(d4-FY3xA)<#>!q`%xIZ!oMv4{ghi$Mqof$9Q5D(F(X`eZ_G}&V7o@^; zXJ>Qwd`?xAP7Rf%E?v6v-+(wipmea-H`RZK?RpW`y%7^uMeJ3@5fgf&GHmpkXt}?t z^xf)szL=;=;K`ct%dIV59((*4`k$rs@AxC<1zOZ zWE7{|^pqJ%Wd}LTsoK=`t=Wpur9_mZy_H>*)Ap;_ceP*O@_0U!z6k>_Qe$#r@oge2 BR%ZYJ literal 0 HcmV?d00001 diff --git a/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-3.mmdb b/modules/fs2/src/test/resources/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-3.mmdb new file mode 100644 index 0000000000000000000000000000000000000000..0a909864892c681b99ab8c51874f80c0e86b9c30 GIT binary patch literal 1457 zcmZ9JS#wlH7=~YBG=K|BkX6XYDj*HTWEKL^44a6@o^v`gjWgZDSt1ia+`x@p z!G*=7;DRb{^e0eUc|mdMH3Ao0!B4Q~9Xb?FO+EFT_v`-pJAHt0zzm=XcoujH7y+8e zr-AXnGxQeHmh69m^NCJu_#E&&&{0<5ou3c0=x>m!pEiLGIF_F(3olku#(9t@->xYIF0z~aliURS0rb+-z%d{L3;|gt zIdU^#fIdI~TbOJm`^opo56A&BPi`Z(lRLb}Se@*!YIO39{ly8CWDBrvM2jEBNw&8K-CnzU@Q8S*FcXYwq0j{JrE zm9(vXqx?>u2QE-9l9$M_`K>(pyVvcjxb|8apG+pH@+b&qa$O5MFFsU;cT z=vZl~<2&8H31nx-K&s`27cNgt=bDa4iC*6%Iu2`hZDr11xAH3Tl=fs2nyM7qOSCc? zEol`Qk1e{kG&c8im6R6gvhhW1m)zYdd_M_=U-Lo{s3aN<(q&tTlR%a>_B_1&&%_P; zGG(KSk0LLB^?LsAqnTLQ>4%M$x%xD#(jqJff`bg?HrA3zGG)=Yn_%do$c+7 z({ou>Rys9MlDc^9=7T=5zfb94rEeQ39p9WO3tdb^QQEh%o3h$|6??Dt3tS(`h0-@+;6-X!j?VlKR8mz} literal 0 HcmV?d00001 diff --git a/modules/fs2/src/test/resources/simplelogger.properties b/modules/fs2/src/test/resources/simplelogger.properties new file mode 100644 index 000000000..c4ed0bdd7 --- /dev/null +++ b/modules/fs2/src/test/resources/simplelogger.properties @@ -0,0 +1,13 @@ +org.slf4j.simpleLogger.showThreadName=false +org.slf4j.simpleLogger.showDateTime=true + +org.slf4j.simpleLogger.log.org.http4s.blaze.channel.ServerChannel=off +org.slf4j.simpleLogger.log.org.http4s.blaze.channel.nio1.SelectorLoop=off +org.slf4j.simpleLogger.log.org.http4s.blaze.channel.nio1.NIO1SocketServerGroup=off +org.slf4j.simpleLogger.log.org.http4s.client.PoolManager=off +org.slf4j.simpleLogger.log.org.http4s.server.blaze.BlazeServerBuilder=off + +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.Enrich=info +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.Assets=warn +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.TestEnvironmentinfo=info +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.test.HttpServer=info diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/AssetsSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/AssetsSpec.scala new file mode 100644 index 000000000..12c1e6317 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/AssetsSpec.scala @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.net.URI +import java.nio.file.Paths + +import scala.concurrent.duration._ + +import fs2.Stream +import fs2.io.file.{exists, readAll} + +import cats.effect.{Blocker, Concurrent, ContextShift, IO, Resource, Timer} + +import com.snowplowanalytics.snowplow.enrich.fs2.test._ +import com.snowplowanalytics.snowplow.enrich.fs2.Assets.Asset + +import org.specs2.ScalaCheck +import org.specs2.mutable.Specification +import cats.effect.testing.specs2.CatsIO + +class AssetsSpec extends Specification with CatsIO with ScalaCheck { + + sequential + + "updateStream" should { + "not set stop signal if no updates required" in + AssetsSpec.run(1.second) { (state, run) => + run(100.millis, List.empty) *> state.pauseEnrich.get.map { pause => + pause must beFalse + } + } + + "download an asset and leave pauseEnrich signal with false" in { + val path = Paths.get("asset") + val input = List( + (URI.create("http://localhost:8080/asset"), path.toString) + ) + AssetsSpec.run(1500.millis) { (state, run) => + for { + assetExistsBefore <- Blocker[IO].use(b => exists[IO](b, path)) + _ <- run(100.millis, input) + pauseEnrich <- state.pauseEnrich.get + assetExists <- Blocker[IO].use(b => exists[IO](b, path)) + } yield { + assetExistsBefore must beFalse // Otherwise previous execution left the file + pauseEnrich must beFalse + assetExists must beTrue + } + } + } + + "set stop signal to true when long downloads are performed" in { + val input = List( + (URI.create("http://localhost:8080/slow"), "asset1"), // First sets stop to true + (URI.create("http://localhost:8080/slow"), "asset2") // Second doesn't allow update to return prematurely + ) + AssetsSpec.run(3.seconds) { (state, run) => + for { + fiber <- (IO.sleep(2.seconds) *> state.pauseEnrich.get).start + _ <- run(500.milliseconds, input) + stop <- fiber.join + } yield stop must beTrue + } + } + + "attempt to re-download non-existing file" in { + val path = Paths.get("flaky-asset") + val input = List( + (URI.create("http://localhost:8080/flaky"), path.toString) + ) + AssetsSpec.run(5.seconds) { (state, run) => + for { + _ <- run(800.millis, input) + stop <- state.pauseEnrich.get + assetExists <- Blocker[IO].use { b => + readAll[IO](path, b, 8).compile.to(Array).map(arr => new String(arr)) + } + } yield { + stop must beFalse + assetExists must beEqualTo("3") + } + } + } + } + + "Hash.fromStream" should { + "always create a valid MD5 hash" in { + prop { (bytes: Array[Byte]) => + val input = Stream.emits(bytes).covary[IO] + Assets.Hash.fromStream(input).map { hash => + hash.s.matches("^[a-f0-9]{32}$") must beTrue + } + } + } + } +} + +object AssetsSpec { + + /** Run assets refresh function with specified refresh interval and list of assets */ + type Run = (FiniteDuration, List[Asset]) => IO[Unit] + + /** + * User-written function to test effects of [[Assets]] stream + * * First argument - state initialised to empty, can be inspected after + * * Second argument - [[Run]] function to specify custom refresh interval and list of assets + */ + type Test[A] = (Assets.State[IO], Run) => IO[A] + + /** + * Run a test with resources allocated specifically for it + * It will allocate thread pool, empty state, HTTP server and will + * automatically remove all files after the test is over + * + * @param time timeout after which the test will be forced to exit + * @param test the actual test suite function + */ + def run[A]( + time: FiniteDuration + )( + test: Test[A] + )( + implicit C: Concurrent[IO], + T: Timer[IO], + CS: ContextShift[IO] + ): IO[A] = { + val resources = for { + blocker <- Blocker[IO] + state <- SpecHelpers.refreshState(List(URI.create("http://localhost:8080") -> "index")) + enrichments <- Environment.Enrichments.make[IO](List()) + path <- Resource.liftF(Assets.getCurDir[IO]) + _ <- SpecHelpers.filesResource(blocker, TestFiles) + } yield (blocker, state, enrichments, path) + + resources.use { + case (blocker, state, enrichments, curDir) => + val testFunction: Run = Assets + .updateStream[IO](blocker, state, enrichments, curDir, _, _) + .withHttp + .haltAfter(time) + .compile + .drain + test(state, testFunction) + } + } + + /** List of local files that have to be deleted after every test */ + private val TestFiles = List( + Paths.get("asset"), + Paths.get("asset1"), + Paths.get("asset2"), + Paths.get("flaky-asset") + ) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/EnrichSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/EnrichSpec.scala new file mode 100644 index 000000000..da5b1fab8 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/EnrichSpec.scala @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.time.Instant +import java.util.UUID + +import scala.concurrent.duration._ + +import cats.Applicative +import cats.data.Validated +import cats.implicits._ + +import cats.effect.IO + +import fs2.Stream + +import _root_.io.circe.literal._ + +import org.apache.http.NameValuePair +import org.apache.http.message.BasicNameValuePair + +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.snowplow.analytics.scalasdk.Event +import com.snowplowanalytics.snowplow.badrows.{Processor, BadRow, Payload => BadRowPayload} + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.IpLookupsEnrichment +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent + +import com.snowplowanalytics.snowplow.enrich.fs2.EnrichSpec.{Expected, minimalEvent, normalizeResult} +import com.snowplowanalytics.snowplow.enrich.fs2.test._ + +import org.specs2.ScalaCheck +import org.specs2.mutable.Specification + +import cats.effect.testing.specs2.CatsIO + +import org.specs2.scalacheck.Parameters + +class EnrichSpec extends Specification with CatsIO with ScalaCheck { + + sequential + + "enrichWith" should { + "enrich a minimal page_view CollectorPayload event without any enrichments enabled" in { + val expected = minimalEvent + .copy( + etl_tstamp = Some(Instant.ofEpochMilli(SpecHelpers.StaticTime)), + user_ipaddress = Some("175.16.199.0"), + event = Some("page_view"), + event_vendor = Some("com.snowplowanalytics.snowplow"), + event_name = Some("page_view"), + event_format = Some("jsonschema"), + event_version = Some("1-0-0"), + derived_tstamp = Some(Instant.ofEpochMilli(0L)) + ) + + TestEnvironment.ioBlocker.use { blocker => + Enrich + .enrichWith(TestEnvironment.enrichmentReg.pure[IO], blocker, TestEnvironment.igluClient, None, _ => IO.unit)( + EnrichSpec.payload[IO] + ) + .map(normalizeResult) + .map { + case List(Validated.Valid(event)) => event must beEqualTo(expected) + case other => ko(s"Expected one valid event, got $other") + } + } + } + + "enrich a randomly generated page view event" in { + implicit val cpGen = PayloadGen.getPageViewArbitrary + prop { (collectorPayload: CollectorPayload) => + val payload = Payload(collectorPayload.toRaw, IO.unit) + TestEnvironment.ioBlocker.use { blocker => + Enrich + .enrichWith(TestEnvironment.enrichmentReg.pure[IO], blocker, TestEnvironment.igluClient, None, _ => IO.unit)(payload) + .map(normalizeResult) + .map { + case List(Validated.Valid(e)) => e.event must beSome("page_view") + case other => ko(s"Expected one valid event, got $other") + } + } + }.setParameters(Parameters(maxSize = 20, minTestsOk = 25)) + } + } + + "enrich" should { + "update metrics with raw, good and bad counters" in { + val input = Stream(Payload(Array.empty[Byte], IO.unit), EnrichSpec.payload[IO]) + TestEnvironment.make(input).use { test => + val enrichStream = Enrich.run[IO](test.env) + val rows = test.bad.dequeue + .either(test.good.dequeue) + .concurrently(enrichStream) + .haltAfter(1.second) + for { + _ <- test.env.pauseEnrich.set(false) + payloads <- rows.compile.toList + _ <- IO.sleep(100.millis) + counter <- test.counter.get + } yield { + counter mustEqual Counter(2L, 1L, 1L, None) + payloads must be like { + case List(Left(_), Right(_)) => ok + case List(Right(_), Left(_)) => ok + case other => ko(s"Expected one bad and one good row, got $other") + } + } + } + } + + "enrich event using refreshing MaxMind DB" in { + // 4 enrichments can update assets: MaxMind, IAB, referer-parser, ua-parser + val input = Stream(EnrichSpec.payload[IO]) ++ Stream.sleep_(2.seconds) ++ Stream(EnrichSpec.payload[IO]) + val ipLookupsConf = IpLookupsEnrichment + .parse( + json"""{ + "name": "ip_lookups", + "vendor": "com.snowplowanalytics.snowplow", + "enabled": true, + "parameters": { + "geo": { + "database": "GeoIP2-City.mmdb", + "uri": "http://localhost:8080/maxmind" + } + } + }""", + SchemaKey( + "com.snowplowanalytics.snowplow", + "ip_lookups", + "jsonschema", + SchemaVer.Full(2, 0, 0) + ), + false // Unlike in other tests we actually download it + ) + .getOrElse(throw new RuntimeException("Invalid test configuration")) + + val one = Expected + .copy( + geo_country = Some("CN"), + geo_region = Some("22"), + geo_city = Some("Changchun"), + geo_latitude = Some(43.88), + geo_longitude = Some(125.3228), + geo_region_name = Some("Jilin Sheng"), + geo_timezone = Some("Asia/Harbin") + ) + val two = one.copy(geo_city = Some("Baishan")) + // Third one is Fuyu + + val assetsServer = HttpServer.resource(6.seconds) + (assetsServer *> TestEnvironment.make(input, List(ipLookupsConf))).use { test => + test + .run(_.copy(assetsUpdatePeriod = Some(1800.millis))) + .map { events => + events must containTheSameElementsAs(List(Right(one), Right(two))) + } + } + } + } +} + +object EnrichSpec { + val eventId: UUID = UUID.fromString("deadbeef-dead-beef-dead-beefdead") + + val api: CollectorPayload.Api = + CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp2") + val source: CollectorPayload.Source = + CollectorPayload.Source("ssc-0.0.0-test", "UTF-8", Some("collector.snplow.net")) + val context: CollectorPayload.Context = CollectorPayload.Context(None, Some("175.16.199.0"), None, None, List(), None) + val querystring: List[NameValuePair] = List( + new BasicNameValuePair("e", "pv"), + new BasicNameValuePair("eid", eventId.toString) + ) + val colllectorPayload: CollectorPayload = CollectorPayload(api, querystring, None, None, source, context) + def payload[F[_]: Applicative]: Payload[F, Array[Byte]] = + Payload(colllectorPayload.toRaw, Applicative[F].unit) + + def normalize(payload: Payload[IO, EnrichedEvent]) = + Event + .parse(Enrich.encodeEvent(payload.data)) + .map(_.copy(etl_tstamp = Some(Instant.ofEpochMilli(SpecHelpers.StaticTime)))) match { + case Validated.Valid(event) => + Validated.Valid(event) + case Validated.Invalid(error) => + val rawPayload = BadRowPayload.RawPayload(Enrich.encodeEvent(payload.data)) + val badRow = BadRow.LoaderParsingError(Processor("fs2-enrich-test-suite", "x"), error, rawPayload) + Validated.Invalid(badRow) + } + + def normalizeResult(payload: Result[IO]) = + payload.data.map { + case Validated.Valid(a) => normalize(Payload(a, IO.unit)) + case Validated.Invalid(e) => e.invalid + } + + val minimalEvent = Event + .minimal( + EnrichSpec.eventId, + Instant.ofEpochMilli(0L), + "ssc-0.0.0-test", + s"fs2-enrich-${generated.BuildInfo.version}-common-${generated.BuildInfo.version}" + ) + + val Expected = minimalEvent + .copy( + etl_tstamp = Some(Instant.ofEpochMilli(SpecHelpers.StaticTime)), + user_ipaddress = Some("175.16.199.0"), + event = Some("page_view"), + event_vendor = Some("com.snowplowanalytics.snowplow"), + event_name = Some("page_view"), + event_format = Some("jsonschema"), + event_version = Some("1-0-0"), + derived_tstamp = Some(Instant.ofEpochMilli(0L)) + ) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadGen.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadGen.scala new file mode 100644 index 000000000..3e1bba9c9 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadGen.scala @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.nio.file.{Path, Paths} +import java.util.Base64 + +import cats.effect.{Blocker, IO} +import cats.effect.concurrent.Ref + +import _root_.io.circe.literal._ +import fs2.{Chunk, Stream} +import fs2.io.file.{createDirectory, writeAll} + +import org.apache.http.message.BasicNameValuePair + +import org.joda.time.{DateTimeZone, LocalDate} + +import org.scalacheck.{Arbitrary, Gen} +import cats.effect.testing.specs2.CatsIO + +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload + +object PayloadGen extends CatsIO { + + val api: CollectorPayload.Api = + CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp2") + val source: CollectorPayload.Source = + CollectorPayload.Source("ssc-0.0.0-test", "UTF-8", Some("collector.snplow.net")) + + val userAgentGen: Gen[String] = for { + os <- Gen.oneOf("Windows NT 10.0; Win64; x64", + "Windows NT 5.1; rv:7.0.1", + "Macintosh; Intel Mac OS X 10_14_5", + "Macintosh; Intel Mac OS X 10_15_4" + ) + engine <- Gen.oneOf("AppleWebKit/603.3.8 (KHTML, like Gecko)", + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169", + "AppleWebKit/605.1.15 (KHTML, like Gecko)" + ) + version <- Gen.oneOf("Version/11.1.2 Safari/605.1.15", "Chrome/60.0.3112.113 Safari/537.36", "Gecko/20100101 Firefox/40.1") + } yield s"Mozilla/5.0 ($os) $engine $version" + + val geolocationGen = for { + latitude <- Gen.choose(-90.0, 90.0) + longitude <- Gen.choose(-180.0, 180.0) + payload = json"""{"latitude":$latitude,"longitude":$longitude}""" + schemaKey = "iglu:com.snowplowanalytics.snowplow/geolocation_context/jsonschema/1-1-0" + } yield json"""{"schema":$schemaKey, "data": $payload}""" + val contextsGen = for { + geo <- Gen.option(geolocationGen).map(_.toList) + schemaKey = "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1" + } yield json"""{"schema":$schemaKey, "data": $geo}""" + + val localDateGen: Gen[LocalDate] = Gen.calendar.map(LocalDate.fromCalendarFields).suchThat(_.year().get() < 3000) + val ipGen: Gen[String] = for { + part1 <- Gen.choose(2, 255) + part2 <- Gen.choose(0, 255) + part3 <- Gen.choose(0, 255) + part4 <- Gen.choose(0, 255) + } yield s"$part1.$part2.$part3.$part4" + val contextGen: Gen[CollectorPayload.Context] = for { + timestamp <- localDateGen.map(_.toDateTimeAtStartOfDay(DateTimeZone.UTC)).map(Option.apply) + ip <- Gen.option(ipGen) + userAgent <- userAgentGen.map(x => Some(x)) + userId <- Gen.option(Gen.uuid) + } yield CollectorPayload.Context(timestamp, ip, userAgent, None, List(), userId) + + val getPageView = for { + eventId <- Gen.uuid + aid <- Gen.oneOf("test-app", "scalacheck") + cx <- contextsGen.map(json => Base64.getEncoder.encodeToString(json.noSpaces.getBytes)) + querystring = List( + new BasicNameValuePair("aid", aid), + new BasicNameValuePair("e", "pv"), + new BasicNameValuePair("eid", eventId.toString), + new BasicNameValuePair("cx", cx) + ) + context <- contextGen + } yield CollectorPayload(api, querystring, None, None, source, context) + + val getPageViewArbitrary: Arbitrary[CollectorPayload] = Arbitrary.apply(getPageView) + + val payloadStream = Stream.repeatEval(IO(getPageView.sample)).collect { + case Some(x) => x + } + + def write(dir: Path, cardinality: Long): IO[Unit] = + for { + counter <- Ref.of[IO, Int](0) + dir <- Blocker[IO].use(b => createDirectory[IO](b, dir)) + filename = counter.updateAndGet(_ + 1).map(i => Paths.get(s"${dir.toAbsolutePath}/payload.$i.thrift")) + _ <- Blocker[IO].use { b => + val result = + for { + payload <- payloadStream.take(cardinality) + fileName <- Stream.eval(filename) + _ <- Stream.chunk(Chunk.bytes(payload.toRaw)).through(writeAll[IO](fileName, b)) + } yield () + result.compile.drain + } + } yield () +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadSpec.scala new file mode 100644 index 000000000..d154db6c5 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/PayloadSpec.scala @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import cats.implicits._ + +import cats.effect.IO +import cats.effect.concurrent.Ref +import cats.effect.testing.specs2.CatsIO + +import org.specs2.ScalaCheck +import org.specs2.mutable.Specification + +class PayloadSpec extends Specification with CatsIO with ScalaCheck { + "mapWithLast" should { + "always apply a lastF function to the last element" in { + prop { (list: List[String]) => + val lastF: String => String = _ => "unique" + val result = Payload.mapWithLast(identity[String], lastF)(list).toList + result.lastOption must (beSome("unique") or beNone) + } + } + + "always apply an f function to all elements except last" in { + prop { (list: List[String]) => + val f: String => String = _ => "unique" + val result = Payload.mapWithLast(f, identity[String])(list).toList + list match { + case Nil => ok + case _ => + val init = List.fill(list.length - 1)("unique") + result.mkString("-") must startWith(init.mkString("-")) + } + } + } + } + + "decompose" should { + "preserve the order" in { + val input = List("error-1".invalid, 42.valid, "error-2".invalid) + val payload = Payload(input, IO.unit) + payload.decompose[String, Int].compile.toList.map { + case List(error1, valid, error2) => + error1 must beLeft.like { + case Payload(data, _) => data must be("error-1") + } + valid must beRight.like { + case Payload(data, _) => data must beEqualTo(42) + } + error2 must beLeft.like { + case Payload(data, _) => data must be("error-2") + } + case other => + ko(s"Expected list of 3, got $other") + } + } + + "execute finalize action only once" in { + val input = List("error-1".invalid, 42.valid, "error-2".invalid) + for { + ref <- Ref.of[IO, Int](0) + payload = Payload(input, ref.update(_ + 1)) + parsed <- payload.decompose[String, Int].compile.toList + _ <- parsed.traverse_(_.fold(_.finalise, _.finalise)) + result <- ref.get + } yield result must beEqualTo(1) + } + + "not execute finalize action until last element" in { + val input = List("error-1".invalid, 42.valid, "error-2".invalid) + for { + ref <- Ref.of[IO, Int](0) + payload = Payload(input, ref.update(_ + 1)) + parsed <- payload.decompose[String, Int].compile.toList + _ <- parsed.init.traverse_(_.fold(_.finalise, _.finalise)) + result <- ref.get + } yield result must beEqualTo(0) + } + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/SpecHelpers.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/SpecHelpers.scala new file mode 100644 index 000000000..2c6db6f88 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/SpecHelpers.scala @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import java.nio.file.{NoSuchFileException, Path} + +import scala.concurrent.duration.TimeUnit + +import cats.effect.{Blocker, Clock, IO, Resource} +import cats.effect.concurrent.Ref + +import cats.implicits._ + +import fs2.io.file.deleteIfExists + +import com.snowplowanalytics.snowplow.enrich.fs2.test._ + +import cats.effect.testing.specs2.CatsIO + +object SpecHelpers extends CatsIO { + implicit val ioClock: Clock[IO] = + Clock.create[IO] + + val StaticTime = 1599750938180L + + val staticIoClock: Clock[IO] = + new Clock[IO] { + def realTime(unit: TimeUnit): IO[Long] = IO.pure(StaticTime) + def monotonic(unit: TimeUnit): IO[Long] = IO.pure(StaticTime) + } + + def refreshState(uris: List[Assets.Asset]): Resource[IO, Assets.State[IO]] = + for { + b <- TestEnvironment.ioBlocker + stop <- Resource.liftF(Ref.of[IO, Boolean](false)) + state <- Assets.State.make[IO](b, stop, uris) + } yield state + + /** Clean-up predefined list of files */ + def filesCleanup(blocker: Blocker, files: List[Path]): IO[Unit] = + files.traverse_ { path => + deleteIfExists[IO](blocker, path).recover { + case _: NoSuchFileException => false + } + } + + /** Make sure files don't exist before and after test starts */ + def filesResource(blocker: Blocker, files: List[Path]): Resource[IO, Unit] = + Resource.make(filesCleanup(blocker, files))(_ => filesCleanup(blocker, files)) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64HoconSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64HoconSpec.scala new file mode 100644 index 000000000..10f065064 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/Base64HoconSpec.scala @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.util.Base64.getEncoder + +import com.monovore.decline.Argument + +import org.specs2.mutable.Specification + +class Base64HoconSpec extends Specification { + "Argument[Base64Hocon]" should { + "parse a base64-encoded HOCON" in { + val inputStr = """input = {}""" + val input = getEncoder.encodeToString(inputStr.getBytes()) + Argument[Base64Hocon].read(input).toEither must beRight + } + + "fail to parse plain string as HOCON" in { + val inputStr = "+" + val input = getEncoder.encodeToString(inputStr.getBytes()) + Argument[Base64Hocon].read(input).toEither must beLeft + } + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfigSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfigSpec.scala new file mode 100644 index 000000000..2083a7e37 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/CliConfigSpec.scala @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import cats.syntax.either._ + +import cats.effect.IO + +import org.specs2.mutable.Specification +import cats.effect.testing.specs2.CatsIO + +class CliConfigSpec extends Specification with CatsIO { + "parseHocon" should { + "parse valid HOCON" in { + val string = """ + input = { + type = "PubSub" + subscription = "inputSub" + } + """.stripMargin + Base64Hocon.parseHocon(string) must beRight + } + } + + "ConfigFile.parse" should { + "parse valid HOCON" in { + val hocon = + Base64Hocon + .parseHocon(""" + auth = { + type = "Gcp" + } + input = { + type = "PubSub" + subscription = "projects/test-project/subscriptions/inputSub" + } + good = { + type = "PubSub" + topic = "projects/test-project/topics/good-topic" + } + bad = { + type = "PubSub" + topic = "projects/test-project/topics/bad-topic" + } + """) + .getOrElse(throw new RuntimeException("Cannot parse HOCON file")) + + val expected = ConfigFile( + io.Authentication.Gcp, + io.Input.PubSub("projects/test-project/subscriptions/inputSub"), + io.Output.PubSub("projects/test-project/topics/good-topic"), + io.Output.PubSub("projects/test-project/topics/bad-topic"), + None, + None, + None + ) + + ConfigFile.parse[IO](hocon.asLeft).value.map(result => result must beRight(expected)) + } + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFileSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFileSpec.scala new file mode 100644 index 000000000..9ebb50f82 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/config/ConfigFileSpec.scala @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.config + +import java.net.URI +import java.nio.file.Paths + +import scala.concurrent.duration._ + +import cats.syntax.either._ + +import cats.effect.IO + +import _root_.io.circe.literal._ + +import org.specs2.mutable.Specification +import cats.effect.testing.specs2.CatsIO + +class ConfigFileSpec extends Specification with CatsIO { + "parse" should { + "parse valid HOCON file with path provided" in { + val configPath = Paths.get(getClass.getResource("/config.fs2.hocon.sample").toURI) + val expected = ConfigFile( + io.Authentication.Gcp, + io.Input.PubSub("projects/test-project/subscriptions/inputSub"), + io.Output.PubSub("projects/test-project/topics/good-topic"), + io.Output.PubSub("projects/test-project/topics/bad-topic"), + Some(7.days), + Some(Sentry(URI.create("http://sentry.acme.com"))), + Some(1.second) + ) + ConfigFile.parse[IO](configPath.asRight).value.map(result => result must beRight(expected)) + } + + "parse valid 0 minutes as None" in { + val input = + json"""{ + "auth": { + "type": "Gcp" + }, + "input": { + "type": "PubSub", + "subscription": "projects/test-project/subscriptions/inputSub" + }, + "good": { + "type": "PubSub", + "topic": "projects/test-project/topics/good-topic" + }, + "bad": { + "type": "PubSub", + "topic": "projects/test-project/topics/bad-topic" + }, + + "assetsUpdatePeriod": "0 minutes", + "metricsReportPeriod": "10 second" + }""" + + ConfigFile.parse[IO](Base64Hocon(input).asLeft).value.map { + case Left(message) => message must contain("assetsUpdatePeriod in config file cannot be less than 0") + case _ => ko("Decoding should have failed") + } + } + + "not throw an exception if file not found" in { + val configPath = Paths.get("does-not-exist") + ConfigFile.parse[IO](configPath.asRight).value.map(result => result must beLeft) + } + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/ApiRequestEnrichmentSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/ApiRequestEnrichmentSpec.scala new file mode 100644 index 000000000..8bc991c01 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/ApiRequestEnrichmentSpec.scala @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.enrichments + +import java.util.Base64 + +import scala.concurrent.duration._ + +import org.apache.http.message.BasicNameValuePair + +import cats.implicits._ + +import cats.effect.IO +import cats.effect.testing.specs2.CatsIO + +import fs2.Stream + +import io.circe.Json +import io.circe.literal._ + +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.Contexts + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.ApiRequestConf +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest.{ + Authentication, + Cache, + HttpApi, + Input, + JsonOutput, + Output +} + +import com.snowplowanalytics.snowplow.enrich.fs2.enrichments.ApiRequestEnrichmentSpec.unstructEvent +import com.snowplowanalytics.snowplow.enrich.fs2.{EnrichSpec, Payload} +import com.snowplowanalytics.snowplow.enrich.fs2.test._ + +import org.specs2.mutable.Specification + +class ApiRequestEnrichmentSpec extends Specification with CatsIO { + + sequential + + "ApiRequestEnrichment" should { + "add a derived context" in { + val event = + json"""{ + "schema": "iglu:com.acme/test/jsonschema/1-0-1", + "data": {"path": {"id": 3}} + }""" + val payload = EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ue_px", unstructEvent(event)) :: EnrichSpec.colllectorPayload.querystring + ) + val input = Stream(Payload(payload.toRaw, IO.unit)) + + /** Schemas defined at [[SchemaRegistry]] */ + val enrichment = ApiRequestConf( + SchemaKey("com.acme", "enrichment", "jsonschema", SchemaVer.Full(1, 0, 0)), + List(Input.Json("key1", "unstruct_event", SchemaCriterion("com.acme", "test", "jsonschema", 1), "$.path.id")), + HttpApi("GET", "http://localhost:8080/enrichment/api/{{key1}}", 2000, Authentication(None)), + List(Output("iglu:com.acme/output/jsonschema/1-0-0", Some(JsonOutput("$")))), + Cache(1, 1000) + ) + + val expected = Contexts( + List( + SelfDescribingData( + SchemaKey("com.acme", "output", "jsonschema", SchemaVer.Full(1, 0, 0)), + json"""{"output": "3"}""" + ) + ) + ) + + val testWithHttp = HttpServer.resource(4.seconds) *> TestEnvironment.make(input, List(enrichment)) + testWithHttp.use { test => + test.run().map { events => + events must beLike { + case List(Right(event)) => + event.derived_contexts must beEqualTo(expected) + case other => ko(s"Expected one enriched event, got $other") + } + } + } + } + } +} + +object ApiRequestEnrichmentSpec { + private val encoder = Base64.getEncoder + + def encode(json: Json): String = + new String(encoder.encode(json.noSpaces.getBytes)) + + def unstructEvent(json: Json): String = + encode(json"""{"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":$json}""") +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/IabEnrichmentSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/IabEnrichmentSpec.scala new file mode 100644 index 000000000..a0f7e5fba --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/IabEnrichmentSpec.scala @@ -0,0 +1,102 @@ +package com.snowplowanalytics.snowplow.enrich.fs2.enrichments + +import java.net.URI + +import scala.concurrent.duration._ + +import cats.syntax.apply._ +import cats.syntax.option._ + +import cats.effect.IO + +import io.circe.literal._ + +import fs2.Stream + +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.Contexts +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf +import com.snowplowanalytics.snowplow.enrich.fs2.{EnrichSpec, Payload} +import com.snowplowanalytics.snowplow.enrich.fs2.test.{HttpServer, TestEnvironment} + +import org.specs2.mutable.Specification +import cats.effect.testing.specs2.CatsIO + +class IabEnrichmentSpec extends Specification with CatsIO { + + sequential + + "IabEnrichment" should { + "recognize a robot by IP address" in { + val payload = EnrichSpec.colllectorPayload.copy( + context = EnrichSpec.colllectorPayload.context.copy( + useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0".some + ) + ) + val input = Stream(Payload(payload.toRaw, IO.unit)) + val expected = Contexts( + List( + SelfDescribingData( + SchemaKey("com.iab.snowplow", "spiders_and_robots", "jsonschema", SchemaVer.Full(1, 0, 0)), + json"""{"spiderOrRobot":true,"category":"SPIDER_OR_ROBOT","reason":"FAILED_IP_EXCLUDE","primaryImpact":"UNKNOWN"}""" + ) + ) + ) + val testWithHttp = HttpServer.resource(6.seconds) *> TestEnvironment.make(input, List(IabEnrichmentSpec.enrichmentConf)) + testWithHttp.use { test => + test.run().map { + case List(Right(event)) => + event.derived_contexts must beEqualTo(expected) + case other => + ko(s"Expected one valid event, got $other") + } + } + } + + "refresh assets" in { + val payload = EnrichSpec.colllectorPayload.copy( + context = EnrichSpec.colllectorPayload.context.copy( + useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0".some + ) + ) + val input = Stream(Payload(payload.toRaw, IO.unit)) ++ Stream.sleep_(2.seconds) ++ Stream(Payload(payload.toRaw, IO.unit)) + + val expectedOne = Contexts( + List( + SelfDescribingData( + SchemaKey("com.iab.snowplow", "spiders_and_robots", "jsonschema", SchemaVer.Full(1, 0, 0)), + json"""{"spiderOrRobot":true,"category":"SPIDER_OR_ROBOT","reason":"FAILED_IP_EXCLUDE","primaryImpact":"UNKNOWN"}""" + ) + ) + ) + val expectedTwo = Contexts( + List( + SelfDescribingData( + SchemaKey("com.iab.snowplow", "spiders_and_robots", "jsonschema", SchemaVer.Full(1, 0, 0)), + json"""{"spiderOrRobot":false,"category":"BROWSER","reason":"PASSED_ALL","primaryImpact":"NONE"}""" + ) + ) + ) + + val testWithHttp = HttpServer.resource(6.seconds) *> TestEnvironment.make(input, List(IabEnrichmentSpec.enrichmentConf)) + testWithHttp.use { test => + test.run(_.copy(assetsUpdatePeriod = Some(1800.millis))).map { + case List(Right(eventOne), Right(eventTwo)) => + List(eventOne.derived_contexts, eventTwo.derived_contexts) must containTheSameElementsAs(List(expectedOne, expectedTwo)) + case other => + ko(s"Expected two valid events, got $other") + } + } + } + } +} + +object IabEnrichmentSpec { + val enrichmentConf = EnrichmentConf.IabConf( + SchemaKey("com.acme", "enrichment", "jsonschema", SchemaVer.Full(1, 0, 0)), + (URI.create("http://localhost:8080/iab/ip"), "ip"), + (URI.create("http://localhost:8080/iab/exclude"), "exclude"), + (URI.create("http://localhost:8080/iab/include"), "include") + ) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/YauaaEnrichmentSpec.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/YauaaEnrichmentSpec.scala new file mode 100644 index 000000000..9acf00fa5 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/enrichments/YauaaEnrichmentSpec.scala @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.enrichments + +import cats.implicits._ + +import cats.effect.IO +import cats.effect.testing.specs2.CatsIO + +import fs2.Stream + +import io.circe.literal._ + +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} + +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.Contexts + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf.YauaaConf +import com.snowplowanalytics.snowplow.enrich.fs2.{EnrichSpec, Payload} +import com.snowplowanalytics.snowplow.enrich.fs2.test._ + +import org.specs2.mutable.Specification + +class YauaaEnrichmentSpec extends Specification with CatsIO { + + sequential + + "YauaaEnrichment" should { + "add a derived context" in { + val payload = EnrichSpec.colllectorPayload.copy( + context = EnrichSpec.colllectorPayload.context.copy( + useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:81.0) Gecko/20100101 Firefox/81.0".some + ) + ) + val input = Stream(Payload(payload.toRaw, IO.unit)) + + /** Schemas defined at [[SchemaRegistry]] */ + val enrichment = YauaaConf( + SchemaKey("com.acme", "enrichment", "jsonschema", SchemaVer.Full(1, 0, 0)), + Some(1) + ) + + val expected = Contexts( + List( + SelfDescribingData( + SchemaKey("nl.basjes", "yauaa_context", "jsonschema", SchemaVer.Full(1, 0, 1)), + json"""{ + "deviceBrand" : "Apple", + "deviceName" : "Apple Macintosh", + "operatingSystemVersionMajor" : "10", + "layoutEngineNameVersion" : "Gecko 81.0", + "operatingSystemNameVersion" : "Mac OS X 10.14", + "layoutEngineBuild" : "20100101", + "layoutEngineNameVersionMajor" : "Gecko 81", + "operatingSystemName" : "Mac OS X", + "agentVersionMajor" : "81", + "layoutEngineVersionMajor" : "81", + "deviceClass" : "Desktop", + "agentNameVersionMajor" : "Firefox 81", + "operatingSystemNameVersionMajor" : "Mac OS X 10", + "deviceCpuBits" : "32", + "operatingSystemClass" : "Desktop", + "layoutEngineName" : "Gecko", + "agentName" : "Firefox", + "agentVersion" : "81.0", + "layoutEngineClass" : "Browser", + "agentNameVersion" : "Firefox 81.0", + "operatingSystemVersion" : "10.14", + "deviceCpu" : "Intel", + "agentClass" : "Browser", + "layoutEngineVersion" : "81.0" + }""" + ) + ) + ) + + TestEnvironment.make(input, List(enrichment)).use { test => + test.run().map { events => + events must beLike { + case List(Right(event)) => + event.derived_contexts must beEqualTo(expected) + case other => ko(s"Expected one enriched event, got $other") + } + } + } + } + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/Counter.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/Counter.scala new file mode 100644 index 000000000..62522f9db --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/Counter.scala @@ -0,0 +1,47 @@ +package com.snowplowanalytics.snowplow.enrich.fs2.test + +import java.util.concurrent.TimeUnit + +import cats.Monad +import cats.syntax.flatMap._ + +import cats.effect.concurrent.Ref +import cats.effect.{Clock, Sync} + +import com.snowplowanalytics.snowplow.enrich.fs2.io.Metrics + +/** Metrics container for testing */ +case class Counter( + raw: Long, + good: Long, + bad: Long, + latency: Option[Long] +) + +object Counter { + val empty: Counter = Counter(0L, 0L, 0L, None) + + def make[F[_]: Sync]: F[Ref[F, Counter]] = + Ref.of[F, Counter](empty) + + /** Create a pure metrics with mutable state */ + def mkCounterMetrics[F[_]: Monad: Clock](ref: Ref[F, Counter]): Metrics[F] = + new Metrics[F] { + def report: F[Unit] = + Monad[F].unit + + def enrichLatency(collectorTstamp: Option[Long]): F[Unit] = + Clock[F].realTime(TimeUnit.MILLISECONDS).flatMap { now => + ref.update(_.copy(latency = collectorTstamp.map(ct => now - ct))) + } + + def rawCount: F[Unit] = + ref.update(cnt => cnt.copy(raw = cnt.raw + 1)) + + def goodCount: F[Unit] = + ref.update(cnt => cnt.copy(good = cnt.good + 1)) + + def badCount: F[Unit] = + ref.update(cnt => cnt.copy(bad = cnt.bad + 1)) + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/HttpServer.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/HttpServer.scala new file mode 100644 index 000000000..80bdfa38e --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/HttpServer.scala @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2012-2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.test + +import scala.concurrent.duration._ + +import cats.implicits._ + +import cats.effect.{Blocker, Fiber, IO, Resource} +import cats.effect.concurrent.Ref + +import io.circe.literal._ + +import fs2.Stream +import fs2.io.readInputStream + +import io.chrisdavenport.log4cats.Logger +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +import org.http4s.HttpRoutes +import org.http4s.Method.GET +import org.http4s.server.blaze.BlazeServerBuilder +import org.http4s.dsl.io._ +import org.http4s.syntax.all._ + +import cats.effect.testing.specs2.CatsIO + +/** + * Embedded HTTP Server for testing, mostly for assets refresh, + * but can serve + */ +object HttpServer extends CatsIO { + + private val logger: Logger[IO] = + Slf4jLogger.getLogger[IO] + + /** + * Set of testing routes: + * * Plain data + * * Imitating slow connection + * * Frequently updating resource + * * Sometimes non-working resource + * + * @param counter mutable variable with counter updated on every request + */ + def routes(counter: Ref[IO, Int]): HttpRoutes[IO] = + HttpRoutes + .of[IO] { + case r @ GET -> Root / "asset" => + logger.debug(r.pathInfo) *> Ok("data") + case r @ GET -> Root / "slow" => + val action = for { + i <- counter.updateAndGet(_ + 1) + _ <- if (i == 1) IO.sleep(100.milliseconds) else IO.sleep(10.seconds) + res <- Ok(s"slow data $i") + } yield res + logger.debug(r.pathInfo) *> action + case r @ GET -> Root / "counter" => + logger.debug(r.pathInfo) *> counter.updateAndGet(_ + 1).flatMap { i => + Ok(s"counter $i") + } + case r @ GET -> Root / "flaky" => + logger.debug(r.pathInfo) *> counter.update(_ + 1) *> + counter.get.flatMap { i => + val s = i.toString + if (i == 1 || i == 2) NotFound(s) + else if (i == 3) Ok(s) + else NotFound(s) + } + case GET -> Root / "maxmind" / "GeoIP2-City.mmdb" => + counter.updateAndGet(_ + 1).flatMap { i => + val is = readMaxMindDb(i) + Ok(Blocker[IO].use(b => readInputStream[IO](is, 256, b).compile.to(Array))) + } + case GET -> Root / "iab" / file => + counter.updateAndGet(_ + 1).flatMap { i => + file match { + case "include" => Ok("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0|1|1") + case "exclude" => Ok("") + case "ip" if i == 1 => Ok("175.16.199.0/32") + case "ip" => Ok("175.1.1.0/32") + case other => + println(s"Not Found ${other}") + NotFound(other) + } + } + case GET -> Root / "enrichment" / "api" / output => + counter.updateAndGet(_ + 1).flatMap { _ => + Ok(json"""{"output": $output}""".noSpaces) + } + } + + def run: Stream[IO, Unit] = + for { + counter <- Stream.eval(Ref.of[IO, Int](0)) + stream <- BlazeServerBuilder[IO](concurrent.ExecutionContext.global) + .bindHttp(8080) + .withHttpApp(routes(counter).orNotFound) + .withoutBanner + .withoutSsl + .serve + .void + } yield stream + + /** + * Run HTTP server for some time and destroy afterwards + * @param duration how long the server should be running + * recommended test stream duration + 1 second, + * especially if asset stream used + */ + def resource(duration: FiniteDuration): Resource[IO, Fiber[IO, Unit]] = + Resource.make { + run + .haltAfter(duration) + .compile + .drain + .start + .flatTap(_ => IO.sleep(500.millis) *> logger.info("Running test HttpServer")) + }(_.cancel *> logger.info("Destroyed test HttpServer")) + + private def readMaxMindDb(req: Int) = { + val path = + if (req < 4) s"/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-$req.mmdb" + else s"/com.snowplowanalytics.snowplow.enrich.fs2/assets-refresh/geoip2-city-3.mmdb" + IO(getClass.getResourceAsStream(path)) + } +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/SchemaRegistry.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/SchemaRegistry.scala new file mode 100644 index 000000000..6604575a9 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/SchemaRegistry.scala @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.test + +import io.circe.Json +import io.circe.literal._ + +import com.snowplowanalytics.iglu.core.SelfDescribingSchema +import com.snowplowanalytics.iglu.core.circe.implicits._ + +/** + * In-memory test registry to avoid unnecessary HTTP and FS IO. All schemas used in [[TestEnvironment]] + * Iglu Client + */ +object SchemaRegistry { + val acmeTest: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.acme", + "name": "test", + "format": "jsonschema", + "version": "1-0-1" + }, + "properties": { + "path": { + "properties": { + "id": { + "type": "integer" + } + } + } + } + }""" + + val acmeOutput: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.acme", + "name": "output", + "format": "jsonschema", + "version": "1-0-0" + }, + "properties": { + "output": { + "type": "string" + } + } + }""" + + // Defined on Iglu Central + val unstructEvent: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "unstruct_event", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$$" + }, + "data": {} + }, + "required": ["schema", "data"], + "additionalProperties": false + }""" + + // Defined on Iglu Central + val contexts: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "contexts", + "format": "jsonschema", + "version": "1-0-1" + }, + "type": "array", + "items": { + "type": "object", + "properties": { + "schema": { + "type": "string", + "pattern": "^iglu:[a-zA-Z0-9-_.]+/[a-zA-Z0-9-_]+/[a-zA-Z0-9-_]+/[0-9]+-[0-9]+-[0-9]+$$" + }, + "data": {} + }, + "required": ["schema", "data"], + "additionalProperties": false + } + }""" + + // Defined on Iglu Central + val geolocationContext: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.snowplowanalytics.snowplow", + "name": "geolocation_context", + "format": "jsonschema", + "version": "1-1-0" + }, + "type": "object", + "properties": { + "latitude": { "type": "number", "minimum": -90, "maximum": 90 }, + "longitude": { "type": "number", "minimum": -180, "maximum": 180 }, + "latitudeLongitudeAccuracy": { "type": ["number", "null"] }, + "altitude": { "type": ["number", "null"] }, + "altitudeAccuracy": { "type": ["number", "null"] }, + "bearing": { "type": ["number", "null"] }, + "speed": { "type": ["number", "null"] }, + "timestamp": { "type": ["integer", "null"] } + }, + "required": ["latitude", "longitude"], + "additionalProperties": false + }""" + + // Defined on Iglu Central + val iabAbdRobots: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "com.iab.snowplow", + "name": "spiders_and_robots", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "spiderOrRobot": {"type": "boolean" }, + "category": {"enum": ["SPIDER_OR_ROBOT", "ACTIVE_SPIDER_OR_ROBOT", "INACTIVE_SPIDER_OR_ROBOT", "BROWSER"]}, + "reason": {"enum": ["FAILED_IP_EXCLUDE", "FAILED_UA_INCLUDE", "FAILED_UA_EXCLUDE", "PASSED_ALL"]}, + "primaryImpact": {"enum": ["PAGE_IMPRESSIONS", "AD_IMPRESSIONS", "PAGE_AND_AD_IMPRESSIONS", "UNKNOWN", "NONE"]} + }, + "required": ["spiderOrRobot", "category", "reason", "primaryImpact"], + "additionalProperties": false + }""" + + val yauaaContext: SelfDescribingSchema[Json] = + json"""{ + "$$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "self": { + "vendor": "nl.basjes", + "name": "yauaa_context", + "format": "jsonschema", + "version": "1-0-1" + }, + "type": "object", + "properties": { + "deviceClass": {"enum":["Desktop","Anonymized","Unknown","UNKNOWN","Mobile","Tablet","Phone","Watch","Virtual Reality","eReader","Set-top box","TV","Game Console","Handheld Game Console","Voice","Robot","Robot Mobile","Spy","Hacker"]}, + "deviceName": {"type":"string","maxLength": 100 }, + "deviceBrand": {"type":"string","maxLength": 50 }, + "deviceCpu": {"type":"string","maxLength": 50 }, + "deviceCpuBits": {"type":"string","maxLength": 20 }, + "deviceFirmwareVersion": {"type":"string","maxLength": 100 }, + "deviceVersion": {"type":"string","maxLength": 100 }, + "operatingSystemClass": {"enum":["Desktop","Mobile","Cloud","Embedded","Game Console","Hacker","Anonymized","Unknown"] }, + "operatingSystemName": {"type":"string","maxLength": 100 }, + "operatingSystemVersion": {"type":"string","maxLength": 50 }, + "operatingSystemNameVersion": {"type":"string","maxLength": 150 }, + "operatingSystemVersionBuild": {"type":"string","maxLength": 100 }, + "layoutEngineClass": {"enum":["Browser", "Mobile App", "Hacker", "Robot", "Unknown"] }, + "layoutEngineName": {"type":"string","maxLength": 100 }, + "layoutEngineVersion": {"type":"string","maxLength": 50 }, + "layoutEngineVersionMajor": {"type":"string","maxLength": 20 }, + "layoutEngineNameVersion": {"type":"string","maxLength": 150 }, + "layoutEngineNameVersionMajor": {"type":"string","maxLength": 120 }, + "layoutEngineBuild": {"type":"string","maxLength": 100 }, + "agentClass": {"enum":["Browser", "Browser Webview", "Mobile App", "Robot", "Robot Mobile", "Cloud Application", "Email Client", "Voice", "Special", "Testclient", "Hacker", "Unknown"] }, + "agentName": {"type":"string","maxLength": 100 }, + "agentVersion": {"type":"string","maxLength": 100 }, + "agentVersionMajor": {"type":"string","maxLength": 20 }, + "agentNameVersion": {"type":"string","maxLength": 200 }, + "agentNameVersionMajor": {"type":"string","maxLength": 120 }, + "agentBuild": {"type":"string","maxLength": 100 }, + "agentLanguage": {"type":"string","maxLength": 50 }, + "agentLanguageCode": {"type":"string","maxLength": 20 }, + "agentInformationEmail": {"type":"string","format": "email" }, + "agentInformationUrl": {"type":"string"}, + "agentSecurity": {"type":"string","enum":["Weak security", "Strong security", "Unknown", "Hacker"] }, + "agentUuid": {"type":"string"}, + "webviewAppName": {"type":"string"}, + "webviewAppVersion": {"type":"string"}, + "webviewAppVersionMajor": {"type":"string","maxLength":50}, + "webviewAppNameVersionMajor": {"type":"string","maxLength":50}, + "facebookCarrier": {"type":"string"}, + "facebookDeviceClass": {"type":"string","maxLength":1024}, + "facebookDeviceName": {"type":"string","maxLength":1024}, + "facebookDeviceVersion": {"type":"string"}, + "facebookFBOP": {"type":"string"}, + "facebookFBSS": {"type":"string"}, + "facebookOperatingSystemName": {"type":"string"}, + "facebookOperatingSystemVersion": {"type":"string"}, + "anonymized": {"type":"string"}, + "hackerAttackVector": {"type":"string"}, + "hackerToolkit": {"type":"string"}, + "koboAffiliate": {"type":"string"}, + "koboPlatformId": {"type":"string"}, + "iECompatibilityVersion": {"type":"string","maxLength":100}, + "iECompatibilityVersionMajor": {"type":"string","maxLength":50}, + "iECompatibilityNameVersion": {"type":"string","maxLength":50}, + "iECompatibilityNameVersionMajor": {"type":"string","maxLength":70}, + "carrier": {"type":"string"}, + "gSAInstallationID": {"type":"string"}, + "networkType": {"type":"string"}, + "operatingSystemNameVersionMajor": {"type":"string"}, + "operatingSystemVersionMajor": {"type":"string"} + }, + "required": ["deviceClass"], + "additionalProperties": false + }""" + + private[test] implicit def jsonToSchema(json: Json): SelfDescribingSchema[Json] = + SelfDescribingSchema.parse(json).getOrElse(throw new IllegalStateException("InMemory SchemaRegistry JSON cannot be parsed as schema")) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/TestEnvironment.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/TestEnvironment.scala new file mode 100644 index 000000000..1430a1f86 --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/TestEnvironment.scala @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.fs2.test + +import java.nio.file.Paths + +import scala.concurrent.duration._ + +import cats.Monad +import cats.syntax.either._ + +import cats.effect.{Blocker, Concurrent, ContextShift, IO, Resource, Timer} +import cats.effect.concurrent.Ref + +import io.circe.Json + +import fs2.concurrent.Queue + +import com.snowplowanalytics.iglu.client.{CirceValidator, Client, Resolver} +import com.snowplowanalytics.iglu.client.resolver.registries.Registry + +import com.snowplowanalytics.snowplow.badrows.BadRow +import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.EnrichmentConf +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.fs2.{Assets, Enrich, EnrichSpec, Environment, Payload, RawSource} +import com.snowplowanalytics.snowplow.enrich.fs2.Environment.Enrichments +import com.snowplowanalytics.snowplow.enrich.fs2.SpecHelpers.{filesResource, ioClock} +import cats.effect.testing.specs2.CatsIO + +import com.snowplowanalytics.snowplow.analytics.scalasdk.Event + +import io.chrisdavenport.log4cats.Logger +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +case class TestEnvironment( + env: Environment[IO], + counter: Ref[IO, Counter], + good: Queue[IO, Payload[IO, EnrichedEvent]], + bad: Queue[IO, Payload[IO, BadRow]] +) { + + /** + * Run all streams for 5 seconds and get produced events as a list + * Assets and metrics streams are empty by default, can be enabled + * by updating default [[Environment]] + * If assets stream is enabled, first events get enriched with assets + * downloaded by [[Assets.State.make]], not by [[Assets.run]] + * @param updateEnv function to update an environment created by + * [[TestEnvironment.make]] + */ + def run( + updateEnv: Environment[IO] => Environment[IO] = identity + )( + implicit C: Concurrent[IO], + CS: ContextShift[IO], + T: Timer[IO] + ): IO[List[Either[BadRow, Event]]] = { + val updatedEnv = updateEnv(env) + + val pauses = updatedEnv.pauseEnrich.discrete.evalMap(p => TestEnvironment.logger.info(s"Pause signal is $p")) + val stream = Enrich.run[IO](updatedEnv).merge(Assets.run[IO](updatedEnv)).merge(pauses) + bad.dequeue + .either(good.dequeue) + .concurrently(stream) + .haltAfter(5.seconds) + .compile + .toList + .map { rows => + rows.map(_.fold(_.data.asLeft, event => EnrichSpec.normalize(event).toEither)) + } + } +} + +object TestEnvironment extends CatsIO { + + val logger: Logger[IO] = + Slf4jLogger.getLogger[IO] + + val enrichmentReg: EnrichmentRegistry[IO] = + EnrichmentRegistry[IO]() + val enrichments: Environment.Enrichments[IO] = + Environment.Enrichments(enrichmentReg, Nil) + + val ioBlocker: Resource[IO, Blocker] = Blocker[IO] + + val embeddedRegistry = + Registry.InMemory( + Registry.Config("fs2-enrich embedded test registry", 1, List("com.acme")), + List( + SchemaRegistry.unstructEvent, + SchemaRegistry.contexts, + SchemaRegistry.geolocationContext, + SchemaRegistry.iabAbdRobots, + SchemaRegistry.yauaaContext, + SchemaRegistry.acmeTest, + SchemaRegistry.acmeOutput + ) + ) + val igluClient: Client[IO, Json] = + Client[IO, Json](Resolver(List(embeddedRegistry), None), CirceValidator) + + /** + * A dummy test environment without enrichmenta and with noop sinks and sources + * One can replace stream and sinks via `.copy` + */ + def make(source: RawSource[IO], enrichments: List[EnrichmentConf] = Nil): Resource[IO, TestEnvironment] = + for { + blocker <- ioBlocker + _ <- filesResource(blocker, enrichments.flatMap(_.filesToCache).map(p => Paths.get(p._2))) + counter <- Resource.liftF(Counter.make[IO]) + goodQueue <- Resource.liftF(Queue.unbounded[IO, Payload[IO, EnrichedEvent]]) + badQueue <- Resource.liftF(Queue.unbounded[IO, Payload[IO, BadRow]]) + metrics = Counter.mkCounterMetrics[IO](counter)(Monad[IO], ioClock) + pauseEnrich <- Environment.makePause[IO] + assets <- Assets.State.make(blocker, pauseEnrich, enrichments.flatMap(_.filesToCache)) + _ <- Resource.liftF(logger.info("AssetsState initialized")) + enrichmentsRef <- Enrichments.make[IO](enrichments) + environment = Environment[IO](igluClient, + enrichmentsRef, + pauseEnrich, + assets, + blocker, + source, + goodQueue.enqueue, + badQueue.enqueue, + None, + metrics, + None, + None + ) + _ <- Resource.liftF(pauseEnrich.set(false) *> logger.info("TestEnvironment initialized")) + } yield TestEnvironment(environment, counter, goodQueue, badQueue) +} diff --git a/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/package.scala b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/package.scala new file mode 100644 index 000000000..4ae834cdc --- /dev/null +++ b/modules/fs2/src/test/scala/com/snowplowanalytics/snowplow/enrich/fs2/test/package.scala @@ -0,0 +1,24 @@ +package com.snowplowanalytics.snowplow.enrich.fs2 + +import scala.concurrent.duration.FiniteDuration + +import cats.effect.{Concurrent, IO, Timer} + +import _root_.fs2.Stream + +package object test { + + implicit class StreamOps[F[_], A](s: Stream[F, A]) { + + /** Halting a stream after specified period of time */ + def haltAfter(after: FiniteDuration)(implicit T: Timer[F], C: Concurrent[F]): Stream[F, A] = + Stream.eval_(Timer[F].sleep(after)).mergeHaltL(s) + } + + implicit class StreamIoOps[A](s: Stream[IO, A]) { + + /** Run test [[HttpServer]] in parallel with the stream */ + def withHttp(implicit C: Concurrent[IO]): Stream[IO, A] = + s.concurrently(HttpServer.run) + } +} diff --git a/project/BuildSettings.scala b/project/BuildSettings.scala index 6b73f77a0..93faea755 100644 --- a/project/BuildSettings.scala +++ b/project/BuildSettings.scala @@ -38,7 +38,8 @@ object BuildSettings { scalaVersion := "2.12.11", version := "1.3.2", javacOptions := Seq("-source", "11", "-target", "11"), - resolvers ++= Dependencies.resolutionRepos + resolvers ++= Dependencies.resolutionRepos, + licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), ) /** Custom sbt-buildinfo replacement, used by SCE only */ @@ -62,7 +63,6 @@ object BuildSettings { publishMavenStyle := true, publishArtifact := true, publishArtifact in Test := false, - licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), bintrayOrganization := Some("snowplow"), bintrayRepository := "snowplow-maven", pomIncludeRepository := { _ => false }, @@ -102,6 +102,9 @@ object BuildSettings { lazy val sbtAssemblySettings = Seq( assemblyJarName in assembly := { s"${moduleName.value}-${version.value}.jar" }, assemblyMergeStrategy in assembly := { + case x if x.endsWith("native-image.properties") => MergeStrategy.first + case x if x.endsWith("io.netty.versions.properties") => MergeStrategy.first + case x if x.endsWith("public-suffix-list.txt") => MergeStrategy.first case x if x.endsWith("ProjectSettings$.class") => MergeStrategy.first case x if x.endsWith("module-info.class") => MergeStrategy.first case x => diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 3f545aa97..782ca655b 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -47,6 +47,7 @@ object Dependencies { val maxmindIplookups = "0.7.1" val circe = "0.13.0" val circeOptics = "0.13.0" + val circeConfig = "0.7.0" val circeJackson = "0.13.0" val scalaForex = "1.0.0" val scalaWeather = "1.0.0" @@ -67,11 +68,23 @@ object Dependencies { val jackson = "2.10.5" val config = "1.3.4" + val decline = "1.0.0" + val fs2 = "2.4.4" + val catsEffect = "2.2.0" + val fs2PubSub = "0.16.1" + val fs2BlobStorage = "0.7.3" + val http4s = "0.21.7" + val log4cats = "1.1.1" + val catsRetry = "1.1.1" + val metrics = "4.1.12.1" + val scopt = "3.7.1" val pureconfig = "0.11.0" + val pureconfig013 = "0.13.0" val snowplowTracker = "0.6.1" val specs2 = "4.5.1" + val specs2CE = "0.4.1" val scalacheck = "1.14.0" val jinJava = "2.5.0" @@ -98,10 +111,12 @@ object Dependencies { val circeCore = "io.circe" %% "circe-core" % V.circe val circeGeneric = "io.circe" %% "circe-generic" % V.circe + val circeExtras = "io.circe" %% "circe-generic-extras" % V.circe val circeParser = "io.circe" %% "circe-parser" % V.circe val circeLiteral = "io.circe" %% "circe-literal" % V.circe val circeJava8 = "io.circe" %% "circe-java8" % V.circe val circeJawn = "io.circe" %% "circe-jawn" % V.circe + val circeConfig = "io.circe" %% "circe-config" % V.circeConfig val circeOptics = "io.circe" %% "circe-optics" % V.circeOptics val circeJackson = "io.circe" %% "circe-jackson210" % V.circeJackson val scalaUri = "io.lemonlabs" %% "scala-uri" % V.scalaUri @@ -121,6 +136,7 @@ object Dependencies { val specs2Cats = "org.specs2" %% "specs2-cats" % V.specs2 % Test val specs2Scalacheck = "org.specs2" %% "specs2-scalacheck" % V.specs2 % Test val specs2Mock = "org.specs2" %% "specs2-mock" % V.specs2 % Test + val specs2CE = "com.codecommit" %% "cats-effect-testing-specs2" % V.specs2CE % Test // Beam val sentry = "io.sentry" % "sentry" % V.sentry @@ -143,9 +159,26 @@ object Dependencies { val scopt = "com.github.scopt" %% "scopt" % V.scopt val pureconfig = "com.github.pureconfig" %% "pureconfig" % V.pureconfig val nsqClient = "com.snowplowanalytics" % "nsq-java-client" % V.nsqClient + val catsEffect = "org.typelevel" %% "cats-effect" % V.catsEffect val snowplowTracker = "com.snowplowanalytics" %% "snowplow-scala-tracker-emitter-id" % V.snowplowTracker val scalacheck = "org.scalacheck" %% "scalacheck" % V.scalacheck % Test val kafka = "org.apache.kafka" %% "kafka" % V.kafka % Test val jinJava = "com.hubspot.jinjava" % "jinjava" % V.jinJava % Test + + // FS2 + val decline = "com.monovore" %% "decline" % V.decline + val fs2PubSub = "com.permutive" %% "fs2-google-pubsub-grpc" % V.fs2PubSub + val fs2 = "co.fs2" %% "fs2-core" % V.fs2 + val fs2Io = "co.fs2" %% "fs2-io" % V.fs2 + val http4sClient = "org.http4s" %% "http4s-blaze-client" % V.http4s + val log4cats = "io.chrisdavenport" %% "log4cats-slf4j" % V.log4cats + val catsRetry = "com.github.cb372" %% "cats-retry" % V.catsRetry + val fs2BlobS3 = "com.github.fs2-blobstore" %% "s3" % V.fs2BlobStorage + val fs2BlobGcs = "com.github.fs2-blobstore" %% "gcs" % V.fs2BlobStorage + val pureconfigCats = "com.github.pureconfig" %% "pureconfig-cats-effect" % V.pureconfig + val pureconfigCirce = "com.github.pureconfig" %% "pureconfig-circe" % V.pureconfig + val metrics = "io.dropwizard.metrics" % "metrics-core" % V.metrics + val http4sDsl = "org.http4s" %% "http4s-dsl" % V.http4s % Test + val http4sServer = "org.http4s" %% "http4s-blaze-server" % V.http4s % Test } } From 0f250f94c0d128ce1ba0e700195faf59161891d6 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Sat, 10 Oct 2020 15:18:36 +0300 Subject: [PATCH 35/38] Common: add benchmarking module (close #370) --- build.sbt | 5 + modules/bench/build.sbt | 6 + .../test/resources/simplelogger.properties | 2 + .../EnrichBench.scala | 110 ++++++++++++++++++ .../EtlPipelineBench.scala | 85 ++++++++++++++ .../ThriftLoaderBench.scala | 55 +++++++++ project/plugins.sbt | 1 + 7 files changed, 264 insertions(+) create mode 100644 modules/bench/build.sbt create mode 100644 modules/bench/src/test/resources/simplelogger.properties create mode 100644 modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala create mode 100644 modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala create mode 100644 modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala diff --git a/build.sbt b/build.sbt index d2a5d921c..0dd1566b0 100644 --- a/build.sbt +++ b/build.sbt @@ -233,3 +233,8 @@ lazy val fs2 = project .enablePlugins(BuildInfoPlugin) .settings(BuildSettings.dockerSettings) .enablePlugins(BuildInfoPlugin, JavaAppPackaging, DockerPlugin) + +lazy val bench = project + .in(file("modules/bench")) + .dependsOn(fs2 % "test->test") + .enablePlugins(JmhPlugin) diff --git a/modules/bench/build.sbt b/modules/bench/build.sbt new file mode 100644 index 000000000..6e115c649 --- /dev/null +++ b/modules/bench/build.sbt @@ -0,0 +1,6 @@ +sourceDirectory in Jmh := (sourceDirectory in Test).value +classDirectory in Jmh := (classDirectory in Test).value +dependencyClasspath in Jmh := (dependencyClasspath in Test).value +// rewire tasks, so that 'jmh:run' automatically invokes 'jmh:compile' (otherwise a clean 'jmh:run' would fail) +compile in Jmh := (compile in Jmh).dependsOn(compile in Test).value +run in Jmh := (run in Jmh).dependsOn(Keys.compile in Jmh).evaluated \ No newline at end of file diff --git a/modules/bench/src/test/resources/simplelogger.properties b/modules/bench/src/test/resources/simplelogger.properties new file mode 100644 index 000000000..7c0551b2b --- /dev/null +++ b/modules/bench/src/test/resources/simplelogger.properties @@ -0,0 +1,2 @@ +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.Assets=off +org.slf4j.simpleLogger.log.com.snowplowanalytics.snowplow.enrich.fs2.test.TestEnvironment=off diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala new file mode 100644 index 000000000..ca12ab37d --- /dev/null +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EnrichBench.scala @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.bench + +import org.openjdk.jmh.annotations._ + +import java.util.concurrent.TimeUnit + +import cats.effect.{ContextShift, IO, Clock, Blocker} + +import fs2.Stream + +import com.snowplowanalytics.iglu.client.Client + +import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry +import com.snowplowanalytics.snowplow.enrich.common.loaders.ThriftLoader +import com.snowplowanalytics.snowplow.enrich.fs2.test.TestEnvironment +import com.snowplowanalytics.snowplow.enrich.fs2.{Enrich, Environment, EnrichSpec, Payload} + +import org.apache.http.message.BasicNameValuePair + + +/** + * @example + * {{{ + * jmh:run -i 15 -wi 10 -f1 -t1 EnrichBench + * }}} + */ +@State(Scope.Thread) +@BenchmarkMode(Array(Mode.AverageTime)) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +class EnrichBench { + + implicit val ioClock: Clock[IO] = Clock.create[IO] + + @Benchmark + def measureEnrichWithMinimalPayload(state: EnrichBench.BenchState) = { + implicit val CS: ContextShift[IO] = state.contextShift + Enrich.enrichWith[IO](IO.pure(EnrichmentRegistry()), state.blocker, Client.IgluCentral, None, (_: Option[Long]) => IO.unit)(state.raw).unsafeRunSync() + } + + @Benchmark + def measureToCollectorPayload(state: EnrichBench.BenchState) = { + ThriftLoader.toCollectorPayload(state.raw.data, Enrich.processor) + } + + @Benchmark + @OperationsPerInvocation(50) // 5 events repetated 10 times + def measureRunWithNoEnrichments(state: EnrichBench.BenchState) = { + // We used this benchmark to check if running the whole `enrichWith` on a blocking + // thread-pool will give us increase in performance. Results haven't confirm it: + // EnrichBench.measureRunWithNoEnrichments avgt 15 341.144 ± 18.884 us/op <- smaller blocker + // EnrichBench.measureRunWithNoEnrichments avgt 15 326.608 ± 16.714 us/op <- wrapping blocker + // EnrichBench.measureRunWithNoEnrichments avgt 15 292.907 ± 15.894 us/op <- no blocker at all + // However, I'm still leaving the "smaller blocker" in a hope that with actual IO enrichments + // it will give the expected increase in performance + implicit val CS: ContextShift[IO] = state.contextShift + state.useEnvironment(e => Enrich.run[IO](e).compile.drain).unsafeRunSync() + } +} + +object EnrichBench { + @State(Scope.Benchmark) + class BenchState { + var raw: Payload[IO, Array[Byte]] = _ + var useEnvironment: (Environment[IO] => IO[Unit]) => IO[Unit] = _ + var contextShift: ContextShift[IO] = _ + var blocker: Blocker = _ + + @Setup(Level.Trial) + def setup(): Unit = { + + raw = EnrichSpec.payload[IO] + + val input = Stream.emits(List( + EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ip", "125.12.2.40") :: EnrichSpec.querystring + ), + EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ip", "125.12.2.41") :: EnrichSpec.querystring + ), + EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ip", "125.12.2.42") :: EnrichSpec.querystring + ), + EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ip", "125.12.2.43") :: EnrichSpec.querystring + ), + EnrichSpec.colllectorPayload.copy( + querystring = new BasicNameValuePair("ip", "125.12.2.44") :: EnrichSpec.querystring + ), + )).repeatN(10).map(cp => Payload(cp.toRaw, IO.unit)).covary[IO] + + useEnvironment = TestEnvironment.make(input).map(_.env).use(_: Environment[IO] => IO[Unit]) + + contextShift = IO.contextShift(scala.concurrent.ExecutionContext.global) + + blocker = Blocker[IO].use(IO.pure).unsafeRunSync() + } + } +} \ No newline at end of file diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala new file mode 100644 index 000000000..5b65e66de --- /dev/null +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/EtlPipelineBench.scala @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.bench + +import org.openjdk.jmh.annotations._ + +import java.util.concurrent.TimeUnit + +import cats.Id +import cats.data.Validated + +import cats.effect.{IO, Clock} + +import io.circe.Json + +import com.snowplowanalytics.iglu.client.{Resolver, Client, CirceValidator} + +import com.snowplowanalytics.snowplow.enrich.common.EtlPipeline +import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry +import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry + +import com.snowplowanalytics.snowplow.enrich.fs2.{Enrich, EnrichSpec} + +import org.joda.time.DateTime + +@State(Scope.Thread) +@BenchmarkMode(Array(Mode.AverageTime, Mode.Throughput)) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +class EtlPipelineBench { + + private implicit val ioClock: Clock[IO] = Clock.create[IO] + + private implicit val idClock: Clock[Id] = new Clock[Id] { + final def realTime(unit: TimeUnit): Id[Long] = + unit.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS) + final def monotonic(unit: TimeUnit): Id[Long] = + unit.convert(System.nanoTime(), TimeUnit.NANOSECONDS) + } + + @Benchmark + def measureProcessEventsIO(state: EtlPipelineBench.BenchState) = { + val payload = EnrichSpec.colllectorPayload + EtlPipeline.processEvents[IO](state.adapterRegistry, state.enrichmentRegistryIo, Client.IgluCentral, Enrich.processor, state.dateTime, Validated.Valid(Some(payload))).unsafeRunSync() + } + + @Benchmark + def measureProcessEventsId(state: EtlPipelineBench.BenchState) = { + val payload = EnrichSpec.colllectorPayload + EtlPipeline.processEvents[Id](state.adapterRegistry, state.enrichmentRegistryId, state.clientId, Enrich.processor, state.dateTime, Validated.Valid(Some(payload))) + } +} + +object EtlPipelineBench { + + + @State(Scope.Benchmark) + class BenchState { + var dateTime: DateTime = _ + var adapterRegistry: AdapterRegistry = _ + var enrichmentRegistryId: EnrichmentRegistry[Id] = _ + var enrichmentRegistryIo: EnrichmentRegistry[IO] = _ + var clientId: Client[Id, Json] = _ + var clientIO: Client[IO, Json] = _ + + @Setup(Level.Trial) + def setup(): Unit = { + dateTime = DateTime.parse("2010-06-30T01:20+02:00") + adapterRegistry = new AdapterRegistry() + enrichmentRegistryId = EnrichmentRegistry[Id]() + enrichmentRegistryIo = EnrichmentRegistry[IO]() + clientId = Client[Id, Json](Resolver(List(), None), CirceValidator) + clientIO = Client[IO, Json](Resolver(List(), None), CirceValidator) + } + } +} diff --git a/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala new file mode 100644 index 000000000..fb6d84979 --- /dev/null +++ b/modules/bench/src/test/scala/com.snowplowanalytics.snowplow.enrich.bench/ThriftLoaderBench.scala @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.enrich.bench + +import org.openjdk.jmh.annotations._ +import java.util.concurrent.TimeUnit + +import com.snowplowanalytics.snowplow.enrich.common.loaders.ThriftLoader +import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent +import com.snowplowanalytics.snowplow.enrich.fs2.{Enrich, EnrichSpec} + +@State(Scope.Thread) +@BenchmarkMode(Array(Mode.AverageTime)) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +class ThriftLoaderBench { + + @Benchmark + def measureToCollectorPayload(state: ThriftLoaderBench.BenchState) = + ThriftLoader.toCollectorPayload(state.data, Enrich.processor) + + @Benchmark + def measureNormalize(state: ThriftLoaderBench.BenchState) = { + Enrich.encodeEvent(state.event) + } +} + +object ThriftLoaderBench { + @State(Scope.Benchmark) + class BenchState { + var data: Array[Byte] = _ + var event: EnrichedEvent = _ + + @Setup(Level.Trial) + def setup(): Unit = { + data = EnrichSpec.colllectorPayload.toRaw + + event = new EnrichedEvent() + event.setApp_id("foo") + event.setEvent_id("deadbeef-dead-dead-dead-deaddeafbeef") + event.setUser_ipaddress("128.0.1.2") + event.setUnstruct_event("""{"some": "json"}""") + } + } +} + diff --git a/project/plugins.sbt b/project/plugins.sbt index 21136454b..180ed82ca 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -7,3 +7,4 @@ addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.4") addSbtPlugin("org.scoverage" % "sbt-coveralls" % "1.2.7") addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.6.1") addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.10.0-RC1") +addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.0") \ No newline at end of file From af2278ba134af7968bd68437e617ea41da0fd222 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Sat, 10 Oct 2020 17:25:47 +0300 Subject: [PATCH 36/38] Common: fix NullPointerException on serializing invalid state (close #371) --- .../registry/apirequest/ApiRequestEnrichment.scala | 2 +- .../common/enrichments/registry/apirequest/Errors.scala | 3 +++ .../common/enrichments/registry/apirequest/Output.scala | 4 ++-- .../common/enrichments/registry/sqlquery/Errors.scala | 2 +- .../enrichments/registry/sqlquery/SqlQueryEnrichment.scala | 2 +- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala index 2d2341897..9ae20a644 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/ApiRequestEnrichment.scala @@ -134,7 +134,7 @@ final case class ApiRequestEnrichment[F[_]: Monad: HttpClient]( contexts = jsons.parTraverse { json => SelfDescribingData .parse(json) - .leftMap(e => NonEmptyList.one(s"${json.noSpaces} is not self-describing, ${e.code}")) + .leftMap(e => NonEmptyList.one(s"${json.noSpaces} is not self-describing JSON, ${e.code}")) } outputs <- EitherT.fromEither[F](contexts) } yield outputs diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Errors.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Errors.scala index 3ecca8e88..1492f0bd9 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Errors.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Errors.scala @@ -13,13 +13,16 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.apirequest final case class ValueNotFoundException(message: String) extends Throwable { + override def getMessage: String = "API Request enrichment:" ++ toString override def toString = s"Value not found $message" } final case class JsonPathException(message: String) extends Throwable { + override def getMessage: String = "API Request enrichment:" ++ toString override def toString = s"JSONPath error $message" } final case class InvalidStateException(message: String) extends Throwable { + override def getMessage: String = "API Request enrichment:" ++ toString override def toString = message } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Output.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Output.scala index ae0408edb..253db43d4 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Output.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/apirequest/Output.scala @@ -35,8 +35,8 @@ final case class Output(schema: String, json: Option[JsonOutput]) { def parseResponse(apiResponse: String): Either[Throwable, Json] = json match { case Some(jsonOutput) => jsonOutput.parseResponse(apiResponse) - case output => - new InvalidStateException(s"Error: Unknown output [$output]").asLeft // Cannot happen now + case None => + new InvalidStateException(s"Error: output key is missing").asLeft // Cannot happen now } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/Errors.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/Errors.scala index a320ebf26..a9aa5760a 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/Errors.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/Errors.scala @@ -15,7 +15,7 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlque sealed trait SqlQueryEnrichmentError extends Throwable { val message: String override def toString = message - override def getMessage = message + override def getMessage = "SQL Query enrichment: " ++ message } final case class ValueNotFoundException(message: String) extends SqlQueryEnrichmentError final case class JsonPathException(message: String) extends SqlQueryEnrichmentError diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala index 4aa3a63f0..a625d8a82 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/sqlquery/SqlQueryEnrichment.scala @@ -99,7 +99,7 @@ object SqlQueryEnrichment extends ParseableEnrichment { * @param db source DB configuration * @param query string representation of prepared SQL statement * @param output configuration of output context - * @param ttl cache TTL + * @param ttl cache TTL in milliseconds * @param cache actual mutable LRU cache * @param connection initialized DB connection (a mutable single-value cache) */ From c61fa015841ac47787f355c985b3797794b533ea Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Sun, 11 Oct 2020 19:27:18 +0300 Subject: [PATCH 37/38] Common: make assets publishing independent of each other (close #373) --- .github/workflows/test.yml | 137 ++++++++++++++++++++++++++++++++----- 1 file changed, 119 insertions(+), 18 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 266d70251..07fd960c4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,28 +26,16 @@ jobs: java-version: 11 - name: Prepare Mock server for SCE ApiRequestEnrichmentIntegrationTest (launch in background) run: python integration-tests/sce-api-lookup-test.py 8001 & - - name: Prepare Postgres for SCE SqlLookupEnrichmentIntegrationTest (create entities) - run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/sce-sql-enrichment-test.sql - env: - PGPASSWORD: supersecret1 - name: Prepare Mock server for BE ApiRequestEnrichmentIntegrationTest (launch in background) run: python integration-tests/beam-api-lookup-test.py & - - name: Prepare Postgres for BE SqlLookupEnrichmentIntegrationTest (create entities) - run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/beam-sql-enrichment-test.sql + - name: Prepare Postgres for SCE SqlLookupEnrichmentIntegrationTest (create entities) + run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/sce-sql-enrichment-test.sql env: PGPASSWORD: supersecret1 - name: Run tests - run: sbt coverage +test coverageReport + run: sbt "project common" +test env: OER_KEY: ${{ secrets.OER_KEY }} - - name: Aggregate coverage data - if: ${{ always() }} - run: sbt coverageAggregate - - name: Submit coveralls data - if: ${{ always() }} - run: sbt coveralls - env: - COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} - name: Check Scala formatting if: ${{ always() }} run: sbt scalafmtCheck @@ -55,7 +43,7 @@ jobs: if: ${{ always() }} run: sbt publishLocal - deploy: + deploy_stream: needs: test if: startsWith(github.ref, 'refs/tags/') runs-on: ubuntu-latest @@ -67,22 +55,86 @@ jobs: java-version: 11 - name: Compare SBT version with git tag run: .github/check_tag.sh ${GITHUB_REF##*/} + - name: Test stream enrich + run: sbt "project stream" test - name: Docker login run: docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD env: DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} - - name: Build and publish Beam Docker images - run: sbt "project beam" docker:publish - name: Build and publish Stream Kinesis Docker image + if: ${{ always() }} run: sbt "project kinesis" docker:publish - name: Build and publish Stream Kafka Docker image + if: ${{ always() }} run: sbt "project kafka" docker:publish - name: Build and publish Stream NSQ Docker image + if: ${{ always() }} run: sbt "project nsq" docker:publish + + deploy_fs2: + needs: test + if: startsWith(github.ref, 'refs/tags/') + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + - name: Compare SBT version with git tag + run: .github/check_tag.sh ${GITHUB_REF##*/} + - name: Test FS2 enrich + run: sbt "project fs2" test + - name: Docker login + run: docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD + env: + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} - name: Build and publish Stream NH Docker image run: sbt "project fs2" docker:publish + deploy_beam: + needs: test + if: startsWith(github.ref, 'refs/tags/') + runs-on: ubuntu-latest + + services: + postgres: + image: postgres + ports: + - 5432:5432 + env: + POSTGRES_USER: enricher + POSTGRES_PASSWORD: supersecret1 + POSTGRES_DB: sql_enrichment_test + POSTGRES_PORT: 5432 + options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 + + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + - name: Compare SBT version with git tag + run: .github/check_tag.sh ${GITHUB_REF##*/} + - name: Docker login + run: docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD + env: + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + - name: Prepare Mock server for BE ApiRequestEnrichmentIntegrationTest (launch in background) + run: python integration-tests/beam-api-lookup-test.py & + - name: Prepare Postgres for BE SqlLookupEnrichmentIntegrationTest (create entities) + run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/beam-sql-enrichment-test.sql + env: + PGPASSWORD: supersecret1 + - name: Test Beam enrich + run: sbt "project beam" test + - name: Build and publish Beam Docker images + run: sbt "project beam" docker:publish + deploy_sce: needs: test if: startsWith(github.ref, 'refs/tags/') @@ -102,3 +154,52 @@ jobs: SONA_PASS: ${{ secrets.SONA_PASS }} BINTRAY_SNOWPLOW_MAVEN_USER: ${{ secrets.BINTRAY_SNOWPLOW_MAVEN_USER }} BINTRAY_SNOWPLOW_MAVEN_API_KEY: ${{ secrets.BINTRAY_SNOWPLOW_MAVEN_API_KEY }} + + coverage: + needs: test + runs-on: ubuntu-latest + + services: + postgres: + image: postgres + ports: + - 5432:5432 + env: + POSTGRES_USER: enricher + POSTGRES_PASSWORD: supersecret1 + POSTGRES_DB: sql_enrichment_test + POSTGRES_PORT: 5432 + options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 + + steps: + - uses: actions/checkout@v2 + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + + - name: Prepare Mock server for BE ApiRequestEnrichmentIntegrationTest (launch in background) + run: python integration-tests/beam-api-lookup-test.py & + - name: Prepare Postgres for BE SqlLookupEnrichmentIntegrationTest (create entities) + run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/beam-sql-enrichment-test.sql + env: + PGPASSWORD: supersecret1 + - name: Prepare Mock server for SCE ApiRequestEnrichmentIntegrationTest (launch in background) + run: python integration-tests/sce-api-lookup-test.py 8001 & + - name: Prepare Postgres for SCE SqlLookupEnrichmentIntegrationTest (create entities) + run: psql -h localhost -p 5432 -U enricher -d sql_enrichment_test < integration-tests/sce-sql-enrichment-test.sql + env: + PGPASSWORD: supersecret1 + + - name: Run tests + run: sbt coverage +test coverageReport + env: + OER_KEY: ${{ secrets.OER_KEY }} + - name: Aggregate coverage data + if: ${{ always() }} + run: sbt coverageAggregate + - name: Submit coveralls data + if: ${{ always() }} + run: sbt coveralls + env: + COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} From 9d4cfaa781f0ca76590fecb4b73223517635ba21 Mon Sep 17 00:00:00 2001 From: Anton Parkhomenko Date: Wed, 21 Oct 2020 14:54:21 +0300 Subject: [PATCH 38/38] Prepare for release --- CHANGELOG | 40 +++++++++++++++++++++++++++++++++++++ project/BuildSettings.scala | 2 +- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 7bd550f43..d9388dbae 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,43 @@ +Version 1.4.0 (2020-10-21) +-------------------------- +Stream FS2: add (#346) +Stream: bump log4j-core to 2.13.3 (#368) +Stream: bump base-debian to 0.2.1 (#359) +Stream: remove unused Maxmind database (#352) +Beam: bump Scio to 0.9.3 (#308) +Beam: use test Maxmind databases (#269) +Common: add benchmarking module (#370) +Common: bump scala-forex to 1.0.0 (#349) +Common: bump scala-referer-parser to 1.1.0 (#348) +Common: bump scala-weather to 1.0.0 (#347) +Common: bump iglu-scala-client to 1.0.2 (#52) +Common: bump scala-maxmind-iplookups to 0.7.1 (#323) +Common: bump snowplow-badrows to 2.1.0 (#325) +Common: bump YAUAA to 5.19 (#314) +Common: bump postgresql to 42.2.16 (#369) +Common: bump jackson-databind to 2.10.5 (#367) +Common: bump to JDK 11 (#362) +Common: switch to HostName.asInetAddress to validate IP addresses (#355) +Common: fix NullPointerException on serializing invalid state (#371) +Common: fix API Request Enrichment output deserialization (#374) +Common: fix PiiPseudonymizerEnrichment for arrays and improve unit tests coverage (#334) +Common: fix PII enrichment adding empty objects instead of missing properties (#351) +Common: fix PathNotFoundException in PII enrichment (#339) +Common: fix pattern matching against null in ScrambleMapFunction (#338) +Common: fix flaky ThriftLoader test (#306) +Common: handle empty query string parameters in adapters (#341) +Common: make assets publishing independent of each other (#373) +Common: disable formatting on compile (#358) +Common: add sbt publishLocal operation to test action (#357) +Common: add toThrift and toRaw methods to CollectorPayload (#345) +Common: replace deprecated constructors in EnrichedEventSpec (#354) +Common: improve unit tests coverage (#335) +Common: use test Maxmind databases (#350) +Common: get rid of placeholder schema in enrichment configurations (#302) +Common: move EnrichmentConf into its own module (#303) +Common: get rid of Eval instances (#300) +Common: add tests for Input for SQL enrichment (#316) + Version 1.3.2 (2020-09-06) -------------------------- Common: convert null fields in EnrichedEvent to None in PartiallyEnrichedEvent (#331) diff --git a/project/BuildSettings.scala b/project/BuildSettings.scala index 93faea755..78a3ee7b9 100644 --- a/project/BuildSettings.scala +++ b/project/BuildSettings.scala @@ -36,7 +36,7 @@ object BuildSettings { lazy val basicSettings = Seq( organization := "com.snowplowanalytics", scalaVersion := "2.12.11", - version := "1.3.2", + version := "1.4.0", javacOptions := Seq("-source", "11", "-target", "11"), resolvers ++= Dependencies.resolutionRepos, licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")),