From 6d827c1a29f520e38ae4f4a56300d17d3648e5f7 Mon Sep 17 00:00:00 2001 From: Benjamin Benoist Date: Tue, 20 Oct 2020 23:54:06 +0200 Subject: [PATCH] Add toTSV method (close #97) --- ...lyticsSDK.scala => OrderedBenchmark.scala} | 4 +- .../benchmark/ToTsvBenchmark.scala | 35 ++ build.sbt | 4 +- project/Dependencies.scala | 3 + .../Event.scala | 7 +- .../SnowplowEvent.scala | 2 +- .../encode/TsvEncoder.scala | 211 +++++++++++ .../EventGen.scala | 351 ++++++++++++++++++ .../EventSpec.scala | 267 ++++++------- .../ParsingErrorSpec.scala | 4 +- 10 files changed, 754 insertions(+), 134 deletions(-) rename benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/{BenchmarkAnalyticsSDK.scala => OrderedBenchmark.scala} (98%) create mode 100644 benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/ToTsvBenchmark.scala create mode 100644 src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/encode/TsvEncoder.scala create mode 100644 src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventGen.scala diff --git a/benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/BenchmarkAnalyticsSDK.scala b/benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/OrderedBenchmark.scala similarity index 98% rename from benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/BenchmarkAnalyticsSDK.scala rename to benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/OrderedBenchmark.scala index 5772ac0..a335063 100644 --- a/benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/BenchmarkAnalyticsSDK.scala +++ b/benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/OrderedBenchmark.scala @@ -23,7 +23,7 @@ import com.snowplowanalytics.snowplow.analytics.scalasdk.Event @State(Scope.Thread) @BenchmarkMode(Array(Mode.AverageTime, Mode.Throughput)) @OutputTimeUnit(TimeUnit.MICROSECONDS) -class BenchmarkAnalyticsSDK { +class OrderedBenchmark { @Benchmark def ordered(state : States.AtomicEventState): Unit = { state.event.ordered @@ -44,4 +44,4 @@ object States { event = Event.minimal(uuid, timestamp, vCollector, vTracker) } } -} \ No newline at end of file +} diff --git a/benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/ToTsvBenchmark.scala b/benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/ToTsvBenchmark.scala new file mode 100644 index 0000000..d74eecb --- /dev/null +++ b/benchmark/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/benchmark/ToTsvBenchmark.scala @@ -0,0 +1,35 @@ +package com.snowplowanalytics.snowplow.analytics.scalasdk.benchmark + +import org.openjdk.jmh.annotations._ + +import java.util.concurrent.TimeUnit +import java.util.UUID +import java.time.Instant + +import com.snowplowanalytics.snowplow.analytics.scalasdk.Event + +@State(Scope.Thread) +@BenchmarkMode(Array(Mode.AverageTime, Mode.Throughput)) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +class ToTsvBenchmark { + @Benchmark + def toTsv(state : ToTsvBenchmark.AtomicEventState): Unit = { + state.event.toTsv + } +} + +object ToTsvBenchmark { + @State(Scope.Benchmark) + class AtomicEventState { + var event: Event = _ + + @Setup(Level.Trial) + def init(): Unit = { + val uuid = UUID.randomUUID() + val timestamp = Instant.now() + val vCollector = "2.0.0" + val vTracker = "scala_0.7.0" + event = Event.minimal(uuid, timestamp, vCollector, vTracker) + } + } +} diff --git a/build.sbt b/build.sbt index 83bbc17..d260646 100644 --- a/build.sbt +++ b/build.sbt @@ -39,7 +39,9 @@ lazy val root = project.in(file(".")) Dependencies.circeParser, Dependencies.circeGeneric, // Scala (test only) - Dependencies.specs2 + Dependencies.specs2, + Dependencies.specs2Scalacheck, + Dependencies.scalacheck ) ) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 7d19fc0..87210b9 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -20,6 +20,7 @@ object Dependencies { val circe = "0.13.0" // Scala (test only) val specs2 = "4.8.0" + val scalaCheck = "1.15.0" } val igluCore = "com.snowplowanalytics" %% "iglu-core-circe" % V.igluCore @@ -28,4 +29,6 @@ object Dependencies { val circeGeneric = "io.circe" %% "circe-generic" % V.circe // Scala (test only) val specs2 = "org.specs2" %% "specs2-core" % V.specs2 % Test + val specs2Scalacheck = "org.specs2" %% "specs2-scalacheck" % V.specs2 % Test + val scalacheck = "org.scalacheck" %% "scalacheck" % V.scalaCheck % Test } diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala index 2319b3d..5262d3e 100644 --- a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/Event.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2020 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -15,6 +15,7 @@ package com.snowplowanalytics.snowplow.analytics.scalasdk // java import java.time.Instant import java.util.UUID +import java.time.format.DateTimeFormatter // circe import io.circe.{Encoder, Json, JsonObject, Decoder} @@ -30,6 +31,7 @@ import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.snowplow.analytics.scalasdk.decode.{Parser, DecodeResult} import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent.{Contexts, UnstructEvent} import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent._ +import com.snowplowanalytics.snowplow.analytics.scalasdk.encode.TsvEncoder /** * Case class representing a canonical Snowplow event. @@ -228,6 +230,9 @@ case class Event(app_id: Option[String], this.asJson } + /** Create the TSV representation of this event. */ + def toTsv: String = TsvEncoder.encode(this) + /** * This event as a map of keys to Circe JSON values */ diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/SnowplowEvent.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/SnowplowEvent.scala index 599c5a1..3839969 100644 --- a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/SnowplowEvent.scala +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/SnowplowEvent.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2020 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. diff --git a/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/encode/TsvEncoder.scala b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/encode/TsvEncoder.scala new file mode 100644 index 0000000..5724be1 --- /dev/null +++ b/src/main/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/encode/TsvEncoder.scala @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2020-2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.analytics.scalasdk.encode + +import java.time.format.DateTimeFormatter +import java.time.Instant +import java.util.UUID + +import io.circe.syntax._ + +import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent._ +import com.snowplowanalytics.snowplow.analytics.scalasdk.Event + +object TsvEncoder { + sealed trait FieldEncoder[T] { + def encodeField(t: T): String + } + + implicit object StringEncoder extends FieldEncoder[String] { + def encodeField(str: String) = str + } + + implicit object InstantEncoder extends FieldEncoder[Instant] { + def encodeField(inst: Instant): String = + DateTimeFormatter.ISO_INSTANT + .format(inst) + .replace("T", " ") + .dropRight(1) // remove trailing 'Z' + } + + implicit object UuidEncoder extends FieldEncoder[UUID] { + def encodeField(uuid: UUID): String = uuid.toString + } + + implicit object IntEncoder extends FieldEncoder[Int] { + def encodeField(int: Int): String = int.toString + } + + implicit object DoubleEncoder extends FieldEncoder[Double] { + def encodeField(doub: Double): String = doub.toString + } + + implicit object BooleanEncoder extends FieldEncoder[Boolean] { + def encodeField(bool: Boolean): String = if(bool) "1" else "0" + } + + implicit object ContextsEncoder extends FieldEncoder[Contexts] { + def encodeField(ctxts: Contexts): String = + if (ctxts.data.isEmpty) + "" + else + ctxts.asJson.noSpaces + } + + implicit object UnstructEncoder extends FieldEncoder[UnstructEvent] { + def encodeField(unstruct: UnstructEvent): String = + if (unstruct.data.isDefined) + unstruct.asJson.noSpaces + else + "" + } + + def encode[A](a: A)(implicit ev: FieldEncoder[A]): String = + ev.encodeField(a) + + def encode[A](optA: Option[A])(implicit ev: FieldEncoder[A]): String = + optA.map(a => ev.encodeField(a)).getOrElse("") + + def encode(event: Event): String = + encode(event.app_id) + "\t" + + encode(event.platform) + "\t" + + encode(event.etl_tstamp) + "\t" + + encode(event.collector_tstamp) + "\t" + + encode(event.dvce_created_tstamp) + "\t" + + encode(event.event) + "\t" + + encode(event.event_id) + "\t" + + encode(event.txn_id) + "\t" + + encode(event.name_tracker) + "\t" + + encode(event.v_tracker) + "\t" + + encode(event.v_collector) + "\t" + + encode(event.v_etl) + "\t" + + encode(event.user_id) + "\t" + + encode(event.user_ipaddress) + "\t" + + encode(event.user_fingerprint) + "\t" + + encode(event.domain_userid) + "\t" + + encode(event.domain_sessionidx) + "\t" + + encode(event.network_userid) + "\t" + + encode(event.geo_country) + "\t" + + encode(event.geo_region) + "\t" + + encode(event.geo_city) + "\t" + + encode(event.geo_zipcode) + "\t" + + encode(event.geo_latitude) + "\t" + + encode(event.geo_longitude) + "\t" + + encode(event.geo_region_name) + "\t" + + encode(event.ip_isp) + "\t" + + encode(event.ip_organization) + "\t" + + encode(event.ip_domain) + "\t" + + encode(event.ip_netspeed) + "\t" + + encode(event.page_url) + "\t" + + encode(event.page_title) + "\t" + + encode(event.page_referrer) + "\t" + + encode(event.page_urlscheme) + "\t" + + encode(event.page_urlhost) + "\t" + + encode(event.page_urlport) + "\t" + + encode(event.page_urlpath) + "\t" + + encode(event.page_urlquery) + "\t" + + encode(event.page_urlfragment) + "\t" + + encode(event.refr_urlscheme) + "\t" + + encode(event.refr_urlhost) + "\t" + + encode(event.refr_urlport) + "\t" + + encode(event.refr_urlpath) + "\t" + + encode(event.refr_urlquery) + "\t" + + encode(event.refr_urlfragment) + "\t" + + encode(event.refr_medium) + "\t" + + encode(event.refr_source) + "\t" + + encode(event.refr_term) + "\t" + + encode(event.mkt_medium) + "\t" + + encode(event.mkt_source) + "\t" + + encode(event.mkt_term) + "\t" + + encode(event.mkt_content) + "\t" + + encode(event.mkt_campaign) + "\t" + + encode(event.contexts) + "\t" + + encode(event.se_category) + "\t" + + encode(event.se_action) + "\t" + + encode(event.se_label) + "\t" + + encode(event.se_property) + "\t" + + encode(event.se_value) + "\t" + + encode(event.unstruct_event) + "\t" + + encode(event.tr_orderid) + "\t" + + encode(event.tr_affiliation) + "\t" + + encode(event.tr_total) + "\t" + + encode(event.tr_tax) + "\t" + + encode(event.tr_shipping) + "\t" + + encode(event.tr_city) + "\t" + + encode(event.tr_state) + "\t" + + encode(event.tr_country) + "\t" + + encode(event.ti_orderid) + "\t" + + encode(event.ti_sku) + "\t" + + encode(event.ti_name) + "\t" + + encode(event.ti_category) + "\t" + + encode(event.ti_price) + "\t" + + encode(event.ti_quantity) + "\t" + + encode(event.pp_xoffset_min) + "\t" + + encode(event.pp_xoffset_max) + "\t" + + encode(event.pp_yoffset_min) + "\t" + + encode(event.pp_yoffset_max) + "\t" + + encode(event.useragent) + "\t" + + encode(event.br_name) + "\t" + + encode(event.br_family) + "\t" + + encode(event.br_version) + "\t" + + encode(event.br_type) + "\t" + + encode(event.br_renderengine) + "\t" + + encode(event.br_lang) + "\t" + + encode(event.br_features_pdf) + "\t" + + encode(event.br_features_flash) + "\t" + + encode(event.br_features_java) + "\t" + + encode(event.br_features_director) + "\t" + + encode(event.br_features_quicktime) + "\t" + + encode(event.br_features_realplayer) + "\t" + + encode(event.br_features_windowsmedia) + "\t" + + encode(event.br_features_gears) + "\t" + + encode(event.br_features_silverlight) + "\t" + + encode(event.br_cookies) + "\t" + + encode(event.br_colordepth) + "\t" + + encode(event.br_viewwidth) + "\t" + + encode(event.br_viewheight) + "\t" + + encode(event.os_name) + "\t" + + encode(event.os_family) + "\t" + + encode(event.os_manufacturer) + "\t" + + encode(event.os_timezone) + "\t" + + encode(event.dvce_type) + "\t" + + encode(event.dvce_ismobile) + "\t" + + encode(event.dvce_screenwidth) + "\t" + + encode(event.dvce_screenheight) + "\t" + + encode(event.doc_charset) + "\t" + + encode(event.doc_width) + "\t" + + encode(event.doc_height) + "\t" + + encode(event.tr_currency) + "\t" + + encode(event.tr_total_base) + "\t" + + encode(event.tr_tax_base) + "\t" + + encode(event.tr_shipping_base) + "\t" + + encode(event.ti_currency) + "\t" + + encode(event.ti_price_base) + "\t" + + encode(event.base_currency) + "\t" + + encode(event.geo_timezone) + "\t" + + encode(event.mkt_clickid) + "\t" + + encode(event.mkt_network) + "\t" + + encode(event.etl_tags) + "\t" + + encode(event.dvce_sent_tstamp) + "\t" + + encode(event.refr_domain_userid) + "\t" + + encode(event.refr_dvce_tstamp) + "\t" + + encode(event.derived_contexts) + "\t" + + encode(event.domain_sessionid) + "\t" + + encode(event.derived_tstamp) + "\t" + + encode(event.event_vendor) + "\t" + + encode(event.event_name) + "\t" + + encode(event.event_format) + "\t" + + encode(event.event_version) + "\t" + + encode(event.event_fingerprint) + "\t" + + encode(event.true_tstamp) +} diff --git a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventGen.scala b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventGen.scala new file mode 100644 index 0000000..d52960b --- /dev/null +++ b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventGen.scala @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2016-2020 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.analytics.scalasdk + +import org.scalacheck.{Arbitrary, Gen} + +import io.circe._ +import io.circe.syntax._ +import io.circe.{Encoder, Decoder, HCursor, Json} +import io.circe.parser._ + +import java.time.Instant + +object EventGen { + import SnowplowEvent._ + + def strGen(n: Int, gen: Gen[Char]): Gen[String] = + Gen.chooseNum(1, n).flatMap(len => Gen.listOfN(len, gen).map(_.mkString)) + + private val MaxTimestamp = 2871824840360L + + implicit val instantArbitrary: Arbitrary[Instant] = + Arbitrary { + for { + seconds <- Gen.chooseNum(0L, MaxTimestamp) + nanos <- Gen.chooseNum(Instant.MIN.getNano, Instant.MAX.getNano) + } yield Instant.ofEpochMilli(seconds).plusNanos(nanos.toLong) + } + + val instantGen: Gen[Instant] = + Arbitrary.arbitrary[Instant] + + val ipv4Address: Gen[String] = + for { + a <- Gen.chooseNum(0, 255) + b <- Gen.chooseNum(0, 255) + c <- Gen.chooseNum(0, 255) + d <- Gen.chooseNum(0, 255) + } yield s"$a.$b.$c.$d" + + val ipv6Address: Gen[String] = + for { + a <- Arbitrary.arbitrary[Short] + b <- Arbitrary.arbitrary[Short] + c <- Arbitrary.arbitrary[Short] + d <- Arbitrary.arbitrary[Short] + e <- Arbitrary.arbitrary[Short] + f <- Arbitrary.arbitrary[Short] + g <- Arbitrary.arbitrary[Short] + h <- Arbitrary.arbitrary[Short] + } yield f"$a%x:$b%x:$c%x:$d%x:$e%x:$f%x:$g%x:$h%x" + + val ipAddress: Gen[String] = + Gen.oneOf(ipv4Address, ipv6Address) + + val platform: Gen[String] = Gen.oneOf("web", "mob", "app") + + val eventType: Gen[String] = Gen.oneOf("page_view", "page_ping", "transaction", "unstruct") + + val contexts: Contexts = parse(EventSpec.contextsJson) + .flatMap(_.as[Contexts]) + .getOrElse(throw new UnsupportedOperationException("can't decode contexts")) + + val unstruct: UnstructEvent = parse(EventSpec.unstructJson) + .flatMap(_.as[UnstructEvent]) + .getOrElse(throw new UnsupportedOperationException("can't decode unstructured event")) + + val derived_contexts: Contexts = parse(EventSpec.derivedContextsJson) + .flatMap(_.as[Contexts]) + .getOrElse(throw new UnsupportedOperationException("can't decode derived contexts")) + + val event: Gen[Event] = + for { + app_id <- Gen.option(strGen(512, Gen.alphaNumChar)) + platform <- Gen.option(platform) + etl_tstamp <- Gen.option(instantGen) + collector_tstamp <- instantGen + dvce_created_tstamp <- Gen.option(instantGen) + event <- Gen.option(eventType) + event_id <- Gen.uuid + txn_id <- Gen.option(Gen.chooseNum(1, 10000)) + name_tracker <- Gen.option(strGen(256, Gen.alphaNumChar)) + v_tracker <- Gen.option(strGen(256, Gen.alphaNumChar)) + v_collector <- strGen(512, Gen.alphaNumChar) + v_etl <- strGen(512, Gen.alphaNumChar) + user_id <- Gen.option(Gen.uuid).map(_.map(_.toString())) + user_ipaddress <- Gen.option(ipAddress) + user_fingerprint <- Gen.option(strGen(512, Gen.alphaNumChar)) + domain_userid <- Gen.option(Gen.uuid).map(_.map(_.toString())) + domain_sessionidx <- Gen.option(Gen.chooseNum(1, 10000)) + network_userid <- Gen.option(Gen.uuid).map(_.map(_.toString())) + geo_country <- Gen.option(strGen(3, Gen.alphaUpperChar)) + geo_region <- Gen.option(strGen(100, Gen.alphaNumChar)) + geo_city <- Gen.option(strGen(512, Gen.alphaChar)) + geo_zipcode <- Gen.option(strGen(6, Gen.alphaNumChar)) + geo_latitude <- Gen.option(Arbitrary.arbitrary[Double]) + geo_longitude <- Gen.option(Arbitrary.arbitrary[Double]) + geo_region_name <- Gen.option(strGen(512, Gen.alphaChar)) + ip_isp <- Gen.option(strGen(512, Gen.alphaNumChar)) + ip_organization <- Gen.option(strGen(512, Gen.alphaNumChar)) + ip_domain <- Gen.option(strGen(512, Gen.alphaNumChar)) + ip_netspeed <- Gen.option(strGen(50, Gen.alphaNumChar)) + page_url <- Gen.option(strGen(512, Gen.alphaNumChar)) + page_title <- Gen.option(strGen(512, Gen.alphaNumChar)) + page_referrer <- Gen.option(strGen(512, Gen.alphaNumChar)) + page_urlscheme <- Gen.option(strGen(10, Gen.alphaNumChar)) + page_urlhost <- Gen.option(strGen(512, Gen.alphaNumChar)) + page_urlport <- Gen.option(Gen.chooseNum(1, 65000)) + page_urlpath <- Gen.option(strGen(512, Gen.alphaNumChar)) + page_urlquery <- Gen.option(strGen(512, Gen.alphaNumChar)) + page_urlfragment <- Gen.option(strGen(512, Gen.alphaNumChar)) + refr_urlscheme <- Gen.option(strGen(10, Gen.alphaNumChar)) + refr_urlhost <- Gen.option(strGen(512, Gen.alphaNumChar)) + refr_urlport <- Gen.option(Gen.chooseNum(1, 65000)) + refr_urlpath <- Gen.option(strGen(512, Gen.alphaNumChar)) + refr_urlquery <- Gen.option(strGen(512, Gen.alphaNumChar)) + refr_urlfragment <- Gen.option(strGen(512, Gen.alphaNumChar)) + refr_medium <- Gen.option(strGen(512, Gen.alphaNumChar)) + refr_source <- Gen.option(strGen(512, Gen.alphaNumChar)) + refr_term <- Gen.option(strGen(512, Gen.alphaNumChar)) + mkt_medium <- Gen.option(strGen(512, Gen.alphaNumChar)) + mkt_source <- Gen.option(strGen(512, Gen.alphaNumChar)) + mkt_term <- Gen.option(strGen(512, Gen.alphaNumChar)) + mkt_content <- Gen.option(strGen(512, Gen.alphaNumChar)) + mkt_campaign <- Gen.option(strGen(512, Gen.alphaNumChar)) + contexts <- Gen.oneOf(contexts, Contexts(Nil)) + se_category <- Gen.option(strGen(512, Gen.alphaNumChar)) + se_action <- Gen.option(strGen(512, Gen.alphaNumChar)) + se_label <- Gen.option(strGen(512, Gen.alphaNumChar)) + se_property <- Gen.option(strGen(512, Gen.alphaNumChar)) + se_value <- Gen.option(Arbitrary.arbitrary[Double]) + unstruct_event = event match { + case Some("unstruct") => unstruct + case _ => UnstructEvent(None) + } + tr_orderid <- Gen.option(Gen.uuid).map(_.map(_.toString())) + tr_affiliation <- Gen.option(strGen(512, Gen.alphaNumChar)) + tr_total <- Gen.option(Arbitrary.arbitrary[Double]) + tr_tax <- Gen.option(Arbitrary.arbitrary[Double]) + tr_shipping <- Gen.option(Arbitrary.arbitrary[Double]) + tr_city <- Gen.option(strGen(512, Gen.alphaNumChar)) + tr_state <- Gen.option(strGen(512, Gen.alphaNumChar)) + tr_country <- Gen.option(strGen(512, Gen.alphaNumChar)) + ti_orderid <- Gen.option(Gen.uuid).map(_.map(_.toString())) + ti_sku <- Gen.option(strGen(512, Gen.alphaNumChar)) + ti_name <- Gen.option(strGen(512, Gen.alphaNumChar)) + ti_category <- Gen.option(strGen(512, Gen.alphaNumChar)) + ti_price <- Gen.option(Arbitrary.arbitrary[Double]) + ti_quantity <- Gen.option(Gen.chooseNum(1, 100)) + pp_xoffset_min <- Gen.option(Gen.chooseNum(1, 10000)) + pp_xoffset_max <- Gen.option(Gen.chooseNum(1, 10000)) + pp_yoffset_min <- Gen.option(Gen.chooseNum(1, 10000)) + pp_yoffset_max <- Gen.option(Gen.chooseNum(1, 10000)) + useragent <- Gen.option(strGen(512, Gen.alphaNumChar)) + br_name <- Gen.option(strGen(512, Gen.alphaNumChar)) + br_family <- Gen.option(strGen(512, Gen.alphaNumChar)) + br_version <- Gen.option(strGen(512, Gen.alphaNumChar)) + br_type <- Gen.option(strGen(512, Gen.alphaNumChar)) + br_renderengine <- Gen.option(strGen(512, Gen.alphaNumChar)) + br_lang <- Gen.option(strGen(512, Gen.alphaNumChar)) + br_features_pdf <- Gen.option(Arbitrary.arbitrary[Boolean]) + br_features_flash <- Gen.option(Arbitrary.arbitrary[Boolean]) + br_features_java <- Gen.option(Arbitrary.arbitrary[Boolean]) + br_features_director <- Gen.option(Arbitrary.arbitrary[Boolean]) + br_features_quicktime <- Gen.option(Arbitrary.arbitrary[Boolean]) + br_features_realplayer <- Gen.option(Arbitrary.arbitrary[Boolean]) + br_features_windowsmedia <- Gen.option(Arbitrary.arbitrary[Boolean]) + br_features_gears <- Gen.option(Arbitrary.arbitrary[Boolean]) + br_features_silverlight <- Gen.option(Arbitrary.arbitrary[Boolean]) + br_cookies <- Gen.option(Arbitrary.arbitrary[Boolean]) + br_colordepth <- Gen.option(strGen(512, Gen.alphaNumChar)) + br_viewwidth <- Gen.option(Gen.chooseNum(1, 10000)) + br_viewheight <- Gen.option(Gen.chooseNum(1, 10000)) + os_name <- Gen.option(strGen(512, Gen.alphaNumChar)) + os_family <- Gen.option(strGen(512, Gen.alphaNumChar)) + os_manufacturer <- Gen.option(strGen(512, Gen.alphaNumChar)) + os_timezone <- Gen.option(strGen(512, Gen.alphaNumChar)) + dvce_type <- Gen.option(strGen(512, Gen.alphaNumChar)) + dvce_ismobile <- Gen.option(Arbitrary.arbitrary[Boolean]) + dvce_screenwidth <- Gen.option(Gen.chooseNum(1, 10000)) + dvce_screenheight <- Gen.option(Gen.chooseNum(1, 10000)) + doc_charset <- Gen.option(strGen(512, Gen.alphaNumChar)) + doc_width <- Gen.option(Gen.chooseNum(1, 10000)) + doc_height <- Gen.option(Gen.chooseNum(1, 10000)) + tr_currency <- Gen.option(strGen(512, Gen.alphaNumChar)) + tr_total_base <- Gen.option(Arbitrary.arbitrary[Double]) + tr_tax_base <- Gen.option(Arbitrary.arbitrary[Double]) + tr_shipping_base <- Gen.option(Arbitrary.arbitrary[Double]) + ti_currency <- Gen.option(strGen(512, Gen.alphaNumChar)) + ti_price_base <- Gen.option(Arbitrary.arbitrary[Double]) + base_currency <- Gen.option(strGen(512, Gen.alphaNumChar)) + geo_timezone <- Gen.option(strGen(512, Gen.alphaNumChar)) + mkt_clickid <- Gen.option(Gen.uuid).map(_.map(_.toString())) + mkt_network <- Gen.option(strGen(512, Gen.alphaNumChar)) + etl_tags <- Gen.option(strGen(512, Gen.alphaNumChar)) + dvce_sent_tstamp <- Gen.option(instantGen) + refr_domain_userid <- Gen.option(Gen.uuid).map(_.map(_.toString())) + refr_dvce_tstamp <- Gen.option(instantGen) + derived_contexts <- Gen.oneOf(derived_contexts, Contexts(Nil)) + domain_sessionid <- Gen.option(Gen.uuid).map(_.map(_.toString())) + derived_tstamp <- Gen.option(instantGen) + event_vendor <- Gen.option(Gen.identifier) + event_name <- Gen.option(Gen.identifier) + event_format <- Gen.option("jsonschema") + event_version <- Gen.option(strGen(10, Gen.alphaNumChar)) + event_fingerprint <- Gen.option(strGen(512, Gen.alphaNumChar)) + true_tstamp <- Gen.option(instantGen) + } yield Event( + app_id, + platform, + etl_tstamp, + collector_tstamp, + dvce_created_tstamp, + event, + event_id, + txn_id, + name_tracker, + v_tracker, + v_collector, + v_etl, + user_id, + user_ipaddress, + user_fingerprint, + domain_userid, + domain_sessionidx, + network_userid, + geo_country, + geo_region, + geo_city, + geo_zipcode, + geo_latitude, + geo_longitude, + geo_region_name, + ip_isp, + ip_organization, + ip_domain, + ip_netspeed, + page_url, + page_title, + page_referrer, + page_urlscheme, + page_urlhost, + page_urlport, + page_urlpath, + page_urlquery, + page_urlfragment, + refr_urlscheme, + refr_urlhost, + refr_urlport, + refr_urlpath, + refr_urlquery, + refr_urlfragment, + refr_medium, + refr_source, + refr_term, + mkt_medium, + mkt_source, + mkt_term, + mkt_content, + mkt_campaign, + contexts, + se_category, + se_action, + se_label, + se_property, + se_value, + unstruct_event, + tr_orderid, + tr_affiliation, + tr_total, + tr_tax, + tr_shipping, + tr_city, + tr_state, + tr_country, + ti_orderid, + ti_sku, + ti_name, + ti_category, + ti_price, + ti_quantity, + pp_xoffset_min, + pp_xoffset_max, + pp_yoffset_min, + pp_yoffset_max, + useragent, + br_name, + br_family, + br_version, + br_type, + br_renderengine, + br_lang, + br_features_pdf, + br_features_flash, + br_features_java, + br_features_director, + br_features_quicktime, + br_features_realplayer, + br_features_windowsmedia, + br_features_gears, + br_features_silverlight, + br_cookies, + br_colordepth, + br_viewwidth, + br_viewheight, + os_name, + os_family, + os_manufacturer, + os_timezone, + dvce_type, + dvce_ismobile, + dvce_screenwidth, + dvce_screenheight, + doc_charset, + doc_width, + doc_height, + tr_currency, + tr_total_base, + tr_tax_base, + tr_shipping_base, + ti_currency, + ti_price_base, + base_currency, + geo_timezone, + mkt_clickid, + mkt_network, + etl_tags, + dvce_sent_tstamp, + refr_domain_userid, + refr_dvce_tstamp, + derived_contexts, + domain_sessionid, + derived_tstamp, + event_vendor, + event_name, + event_format, + event_version, + event_fingerprint, + true_tstamp + ) +} diff --git a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventSpec.scala b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventSpec.scala index e1457f9..8b3048c 100644 --- a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventSpec.scala +++ b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/EventSpec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2020 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -34,6 +34,10 @@ import org.specs2.mutable.Specification // Iglu import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} +// ScalaCheck +import org.specs2.ScalaCheck +import org.scalacheck.Prop.forAll + // This library import com.snowplowanalytics.snowplow.analytics.scalasdk.SnowplowEvent._ import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError._ @@ -42,132 +46,8 @@ import com.snowplowanalytics.snowplow.analytics.scalasdk.ParsingError.RowDecodin /** * Tests Event case class */ -class EventSpec extends Specification { - - val unstructJson = - """{ - "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", - "data": { - "schema": "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", - "data": { - "targetUrl": "http://www.example.com", - "elementClasses": ["foreground"], - "elementId": "exampleLink" - } - } - }""" - - val contextsJson = - """{ - "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", - "data": [ - { - "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", - "data": { - "genre": "blog", - "inLanguage": "en-US", - "datePublished": "2014-11-06T00:00:00Z", - "author": "Fred Blundun", - "breadcrumb": [ - "blog", - "releases" - ], - "keywords": [ - "snowplow", - "javascript", - "tracker", - "event" - ] - } - }, - { - "schema": "iglu:org.w3/PerformanceTiming/jsonschema/1-0-0", - "data": { - "navigationStart": 1415358089861, - "unloadEventStart": 1415358090270, - "unloadEventEnd": 1415358090287, - "redirectStart": 0, - "redirectEnd": 0, - "fetchStart": 1415358089870, - "domainLookupStart": 1415358090102, - "domainLookupEnd": 1415358090102, - "connectStart": 1415358090103, - "connectEnd": 1415358090183, - "requestStart": 1415358090183, - "responseStart": 1415358090265, - "responseEnd": 1415358090265, - "domLoading": 1415358090270, - "domInteractive": 1415358090886, - "domContentLoadedEventStart": 1415358090968, - "domContentLoadedEventEnd": 1415358091309, - "domComplete": 0, - "loadEventStart": 0, - "loadEventEnd": 0 - } - } - ] - }""" - - val contextsWithDuplicate = """{ - "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", - "data": [ - { - "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", - "data": { - "genre": "blog", - "inLanguage": "en-US", - "datePublished": "2014-11-06T00:00:00Z", - "author": "Fred Blundun", - "breadcrumb": [ - "blog", - "releases" - ], - "keywords": [ - "snowplow", - "javascript", - "tracker", - "event" - ] - } - }, - { - "schema": "iglu:org.acme/context_one/jsonschema/1-0-0", - "data": { - "item": 1 - } - }, - { - "schema": "iglu:org.acme/context_one/jsonschema/1-0-1", - "data": { - "item": 2 - } - } - ] - }""" - - val derivedContextsJson = - """{ - "schema": "iglu:com.snowplowanalytics.snowplow\/contexts\/jsonschema\/1-0-1", - "data": [ - { - "schema": "iglu:com.snowplowanalytics.snowplow\/ua_parser_context\/jsonschema\/1-0-0", - "data": { - "useragentFamily": "IE", - "useragentMajor": "7", - "useragentMinor": "0", - "useragentPatch": null, - "useragentVersion": "IE 7.0", - "osFamily": "Windows XP", - "osMajor": null, - "osMinor": null, - "osPatch": null, - "osPatchMinor": null, - "osVersion": "Windows XP", - "deviceFamily": "Other" - } - } - ] - }""" +class EventSpec extends Specification with ScalaCheck { + import EventSpec._ "The Event parser" should { "successfully convert a tab-separated pageview event string to an Event instance and JSON" in { @@ -3059,4 +2939,137 @@ class EventSpec extends Specification { SnowplowEvent.transformSchema(Data.UnstructEvent, "com.snowplowanalytics.self-desc", "schema", 1) mustEqual "unstruct_event_com_snowplowanalytics_self_desc_schema_1" } } + + "Parsing the result of toTSV should produce the same event" in { + forAll(EventGen.event) { e => + Event.parse(e.toTsv) mustEqual(Valid(e)) + } + } +} + +object EventSpec { + val unstructJson = + """{ + "schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data": { + "schema": "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1", + "data": { + "targetUrl": "http://www.example.com", + "elementClasses": ["foreground"], + "elementId": "exampleLink" + } + } + }""" + + val contextsJson = + """{ + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", + "data": { + "genre": "blog", + "inLanguage": "en-US", + "datePublished": "2014-11-06T00:00:00Z", + "author": "Fred Blundun", + "breadcrumb": [ + "blog", + "releases" + ], + "keywords": [ + "snowplow", + "javascript", + "tracker", + "event" + ] + } + }, + { + "schema": "iglu:org.w3/PerformanceTiming/jsonschema/1-0-0", + "data": { + "navigationStart": 1415358089861, + "unloadEventStart": 1415358090270, + "unloadEventEnd": 1415358090287, + "redirectStart": 0, + "redirectEnd": 0, + "fetchStart": 1415358089870, + "domainLookupStart": 1415358090102, + "domainLookupEnd": 1415358090102, + "connectStart": 1415358090103, + "connectEnd": 1415358090183, + "requestStart": 1415358090183, + "responseStart": 1415358090265, + "responseEnd": 1415358090265, + "domLoading": 1415358090270, + "domInteractive": 1415358090886, + "domContentLoadedEventStart": 1415358090968, + "domContentLoadedEventEnd": 1415358091309, + "domComplete": 0, + "loadEventStart": 0, + "loadEventEnd": 0 + } + } + ] + }""" + + val contextsWithDuplicate = """{ + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema": "iglu:org.schema/WebPage/jsonschema/1-0-0", + "data": { + "genre": "blog", + "inLanguage": "en-US", + "datePublished": "2014-11-06T00:00:00Z", + "author": "Fred Blundun", + "breadcrumb": [ + "blog", + "releases" + ], + "keywords": [ + "snowplow", + "javascript", + "tracker", + "event" + ] + } + }, + { + "schema": "iglu:org.acme/context_one/jsonschema/1-0-0", + "data": { + "item": 1 + } + }, + { + "schema": "iglu:org.acme/context_one/jsonschema/1-0-1", + "data": { + "item": 2 + } + } + ] + }""" + + val derivedContextsJson = + """{ + "schema": "iglu:com.snowplowanalytics.snowplow\/contexts\/jsonschema\/1-0-1", + "data": [ + { + "schema": "iglu:com.snowplowanalytics.snowplow\/ua_parser_context\/jsonschema\/1-0-0", + "data": { + "useragentFamily": "IE", + "useragentMajor": "7", + "useragentMinor": "0", + "useragentPatch": null, + "useragentVersion": "IE 7.0", + "osFamily": "Windows XP", + "osMajor": null, + "osMinor": null, + "osPatch": null, + "osPatchMinor": null, + "osVersion": "Windows XP", + "deviceFamily": "Other" + } + } + ] + }""" } diff --git a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingErrorSpec.scala b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingErrorSpec.scala index e47d11b..505e344 100644 --- a/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingErrorSpec.scala +++ b/src/test/scala/com.snowplowanalytics.snowplow.analytics.scalasdk/ParsingErrorSpec.scala @@ -95,9 +95,9 @@ class ParsingErrorSpec extends Specification { def is = s2""" } private def parseJson(jsonStr: String): Json = - parse(jsonStr).right.getOrElse(throw new RuntimeException("Failed to parse expected JSON.")) + parse(jsonStr).getOrElse(throw new RuntimeException("Failed to parse expected JSON.")) private def decodeJson[A: Decoder](json: Json): A = { - json.as[A].right.getOrElse(throw new RuntimeException("Failed to decode to ParsingError.")) + json.as[A].getOrElse(throw new RuntimeException("Failed to decode to ParsingError.")) } }