diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala index 6133d4095..b8b06dfe5 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/AdapterRegistry.scala @@ -97,10 +97,9 @@ class AdapterRegistry(remoteAdapters: Map[(String, String), RemoteAdapter] = Map processor: Processor ): F[Validated[BadRow, NonEmptyList[RawEvent]]] = (adapters.get((payload.api.vendor, payload.api.version)) match { - case Some(adapter) => - adapter.toRawEvents(payload, client) - case _ => - val f: FailureDetails.AdapterFailureOrTrackerProtocolViolation = FailureDetails.AdapterFailure.InputData( + case Some(adapter) => adapter.toRawEvents(payload, client) + case None => + val f = FailureDetails.AdapterFailure.InputData( "vendor/version", Some(s"${payload.api.vendor}/${payload.api.version}"), "vendor/version combination is not supported" diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala index a18d48fd8..3b47f5c13 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/Adapter.scala @@ -10,9 +10,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package adapters -package registry +package com.snowplowanalytics.snowplow.enrich.common.adapters.registry import cats.Monad import cats.data.{NonEmptyList, ValidatedNel} @@ -29,23 +27,23 @@ import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.CirceIgluCodecs._ -import com.snowplowanalytics.snowplow.badrows.FailureDetails - import io.circe._ import io.circe.syntax._ import org.apache.http.NameValuePair import org.joda.time.{DateTime, DateTimeZone} -import org.joda.time.format.DateTimeFormat - -import loaders.CollectorPayload -import utils.{HttpClient, JsonUtils => JU} +import org.joda.time.format.{DateTimeFormat, DateTimeFormatter} +import com.snowplowanalytics.snowplow.badrows.FailureDetails +import com.snowplowanalytics.snowplow.enrich.common.RawEventParameters +import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.loaders.CollectorPayload +import com.snowplowanalytics.snowplow.enrich.common.utils.{HttpClient, JsonUtils => JU} trait Adapter { // Signature for a Formatter function - type FormatterFunc = (RawEventParameters) => Json + type FormatterFunc = RawEventParameters => Json // The encoding type to be used val EventEncType = "UTF-8" @@ -53,7 +51,7 @@ trait Adapter { private val AcceptedQueryParameters = Set("nuid", "aid", "cv", "eid", "ttm", "url") // Datetime format we need to convert timestamps to - val JsonSchemaDateTimeFormat = + val JsonSchemaDateTimeFormat: DateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'").withZone(DateTimeZone.UTC) private def toStringField(seconds: Long): String = { @@ -112,7 +110,7 @@ trait Adapter { /** * Converts a CollectorPayload instance into raw events. - * @param payload The CollectorPaylod containing one or more raw events as collected by a + * @param payload The [[CollectorPayload]] containing one or more raw events as collected by a * Snowplow collector * @param client The Iglu client used for schema lookup and validation * @return a Validation boxing either a NEL of RawEvents on Success, or a NEL of Failure Strings @@ -168,6 +166,15 @@ trait Adapter { ): RawEventParameters = { val params = formatter(parameters - ("nuid", "aid", "cv", "p")) val json = toUnstructEvent(SelfDescribingData(schema, params)).noSpaces + buildUnstructEventParams(tracker, platform, parameters, json) + } + + def buildUnstructEventParams( + tracker: String, + platform: String, + parameters: RawEventParameters, + json: String + ): Map[String, Option[String]] = Map( "tv" -> Option(tracker), "e" -> Some("ue"), @@ -175,7 +182,6 @@ trait Adapter { "ue_pr" -> Option(json) ) ++ parameters.filterKeys(AcceptedQueryParameters) - } /** * Creates a Snowplow unstructured event by nesting the provided JValue in a self-describing @@ -223,13 +229,7 @@ trait Adapter { platform: String ): RawEventParameters = { val json = toUnstructEvent(SelfDescribingData(schema, eventJson.asJson)).noSpaces - Map( - "tv" -> Option(tracker), - "e" -> Some("ue"), - "p" -> qsParams.getOrElse("p", Option(platform)), // Required field - "ue_pr" -> Option(json) - ) ++ - qsParams.filterKeys(AcceptedQueryParameters) + buildUnstructEventParams(tracker, platform, qsParams, json) } /** @@ -251,14 +251,7 @@ trait Adapter { platform: String ): RawEventParameters = { val json = toUnstructEvent(SelfDescribingData(schema, eventJson)).noSpaces - - Map( - "tv" -> Option(tracker), - "e" -> Some("ue"), - "p" -> qsParams.getOrElse("p", Option(platform)), // Required field - "ue_pr" -> Option(json) - ) ++ - qsParams.filterKeys(AcceptedQueryParameters) + buildUnstructEventParams(tracker, platform, qsParams, json) } /** @@ -411,7 +404,7 @@ trait Adapter { object Adapter { /** The Iglu schema URI for a Snowplow unstructured event */ - val UnstructEvent = SchemaKey( + val UnstructEvent: SchemaKey = SchemaKey( "com.snowplowanalytics.snowplow", "unstruct_event", "jsonschema", @@ -419,7 +412,7 @@ object Adapter { ) /** The Iglu schema URI for a Snowplow custom contexts */ - val Contexts = SchemaKey( + val Contexts: SchemaKey = SchemaKey( "com.snowplowanalytics.snowplow", "contexts", "jsonschema", diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala index eeeaa6f12..5ba21985b 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/adapters/registry/IgluAdapter.scala @@ -77,9 +77,7 @@ object IgluAdapter extends Adapter { ) case (None, Some(body), Some(contentType)) => Monad[F].pure(payloadSdJsonToEvent(payload, body, contentType, params)) - case (Some(schemaUri), Some(_), Some(_)) => - Monad[F].pure(payloadToEventWithSchema(payload, schemaUri, params)) - case (Some(schemaUri), None, _) => + case (Some(schemaUri), _, _) => // Ignore body Monad[F].pure(payloadToEventWithSchema(payload, schemaUri, params)) case (None, None, _) => val nel = NonEmptyList.of( diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala index f6408ebc4..9c5e28363 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/CollectorPayload.scala @@ -10,37 +10,55 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders +package com.snowplowanalytics.snowplow.enrich.common.loaders import java.util.UUID +import scala.collection.JavaConverters._ + import cats.syntax.either._ import cats.syntax.option._ -import com.snowplowanalytics.snowplow.badrows -import com.snowplowanalytics.snowplow.badrows.{FailureDetails, NVP} - import org.apache.http.NameValuePair +import org.apache.http.client.utils.URIBuilder +import org.apache.thrift.TSerializer + import org.joda.time.DateTime +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + +import com.snowplowanalytics.snowplow.CollectorPayload.thrift.model1.{CollectorPayload => CollectorPayload1} + +import com.snowplowanalytics.snowplow.badrows.{FailureDetails, NVP, Payload} + /** * The canonical input format for the ETL process: it should be possible to convert any collector * input format to this format, ready for the main, collector-agnostic stage of the ETL. * * Unlike `RawEvent`, where `parameters` contain a single event, - * [[CollectorPayload]]'s `body` can contain a POST payload with multiple events + * [[CollectorPayload]]'s `body` can contain a POST payload with multiple events, + * hence [[CollectorPayload]] with `body` is potentially identical to `List[RawEvent]` + * or [[CollectorPayload]] with `querystring` is identical to single `RawEvent` + * + * @param api collector's endpoint + * @param querystring GET parameters, would be empty for buffered events and most webhooks, + * an actual payload of `body` is empty + * @param contentType derived from HTTP header (should be in `Context`) + * @param body POST body, for buffered events and most webhooks, + * an actual payload if `querystring` is empty + * @param source information to identify the collector + * @param context event's meta-information, some properties can be used to augment payload */ final case class CollectorPayload( api: CollectorPayload.Api, - querystring: List[NameValuePair], // Could be empty in future trackers - contentType: Option[String], // Not always set - body: Option[String], // Not set for GETs + querystring: List[NameValuePair], + contentType: Option[String], + body: Option[String], source: CollectorPayload.Source, context: CollectorPayload.Context ) { - def toBadRowPayload: badrows.Payload.CollectorPayload = - badrows.Payload.CollectorPayload( + def toBadRowPayload: Payload.CollectorPayload = + Payload.CollectorPayload( api.vendor, api.version, querystring.map(nvp => NVP(nvp.getName, Option(nvp.getValue))), @@ -56,12 +74,46 @@ final case class CollectorPayload( context.headers, context.userId ) + + /** + * Cast back to Thrift-generated `CollectorPayload` class, coming from collector + * Reverse of [[ThriftLoader.toCollectorPayload]] + * Used for tests and debugging + */ + def toThrift: CollectorPayload1 = { + // Timestamp must be always set, otherwise long will fallback it to 1970-01-01 + val timestamp: Long = context.timestamp.map(_.getMillis.asInstanceOf[java.lang.Long]).orNull + + new CollectorPayload1(CollectorPayload.IgluUri.toSchemaUri, context.ipAddress.orNull, timestamp, source.encoding, source.name) + .setQuerystring((new URIBuilder).setParameters(querystring.asJava).build().getQuery) + .setHostname(source.hostname.orNull) + .setRefererUri(context.refererUri.orNull) + .setContentType(contentType.orNull) + .setUserAgent(context.useragent.orNull) + .setBody(body.orNull) + .setNetworkUserId(context.userId.map(_.toString).orNull) + .setHeaders(context.headers.asJava) + .setPath(api.toRaw) + } + + /** + * Transform back to array of bytes coming from collector topic + * Used for tests and debugging + */ + def toRaw: Array[Byte] = + CollectorPayload.serializer.serialize(toThrift) } object CollectorPayload { + /** Latest payload SchemaKey */ + val IgluUri: SchemaKey = SchemaKey("com.snowplowanalytics.snowplow", "CollectorPayload", "thrift", SchemaVer.Full(1, 0, 0)) + /** * Unambiguously identifies the collector source of this input line. + * @param name kind and version of the collector (e.g. ssc-1.0.1-kafka) + * @param encoding usually "UTF-8" + * @param hostname the actual host the collector was running on */ final case class Source( name: String, @@ -69,26 +121,41 @@ object CollectorPayload { hostname: Option[String] ) - /** Context derived by the collector. */ + /** + * Information *derived* by the collector to be used as meta-data (meta-payload) + * Everything else in [[CollectorPayload]] is directly payload (body and queryparams) + * @param timestamp collector_tstamp (not optional in fact) + * @param ipAddress client's IP address, can be later overwritten by `ip` param in + * `enrichments.Transform` + * @param useragent UA header, can be later overwritten by `ua` param in `entichments.Transform` + * @param refererUri extracted from corresponding HTTP header + * @param headers all headers, including UA and referer URI + * @param userId generated by collector-set third-party cookie + */ final case class Context( - timestamp: Option[DateTime], // Must have a timestamp + timestamp: Option[DateTime], ipAddress: Option[String], useragent: Option[String], refererUri: Option[String], - headers: List[String], // Could be empty - userId: Option[UUID] // User ID generated by collector-set third-party cookie + headers: List[String], + userId: Option[UUID] ) - /** Define the vendor and version of the payload. */ - final case class Api(vendor: String, version: String) + /** + * Define the vendor and version of the payload, defined by collector endpoint + * Coming from [[com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry]] + */ + final case class Api(vendor: String, version: String) { - // Defaults for the tracker vendor and version before we implemented this into Snowplow. - // TODO: make private once the ThriftLoader is updated - val SnowplowTp1 = Api("com.snowplowanalytics.snowplow", "tp1") + /** Reverse back to collector's endpoint */ + def toRaw: String = if (this == SnowplowTp1) "/i" else s"$vendor/$version" + } + + /** Defaults for the tracker vendor and version before we implemented this into Snowplow */ + val SnowplowTp1: Api = Api("com.snowplowanalytics.snowplow", "tp1") // To extract the API vendor and version from the the path to the requested object. - // TODO: move this to somewhere not specific to this collector - private val ApiPathRegex = """^[\/]?([^\/]+)\/([^\/]+)[\/]?$""".r + private val ApiPathRegex = """^[/]?([^/]+)/([^/]+)[/]?$""".r /** * Parses the requested URI path to determine the specific API version this payload follows. @@ -115,4 +182,7 @@ object CollectorPayload { path.startsWith("/ice.png") || // Legacy name for /i path.equals("/i") || // Legacy name for /com.snowplowanalytics.snowplow/tp1 path.startsWith("/i?") + + /** Thrift serializer, used for tests and debugging with `toThrift` */ + private[loaders] lazy val serializer = new TSerializer() } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala index c3456b7d5..71479efec 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/loaders/ThriftLoader.scala @@ -41,7 +41,7 @@ import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, ParseError = object ThriftLoader extends Loader[Array[Byte]] { private val thriftDeserializer = new TDeserializer - private val ExpectedSchema = + private[loaders] val ExpectedSchema = SchemaCriterion("com.snowplowanalytics.snowplow", "CollectorPayload", "thrift", 1, 0) /** Parse Error -> Collector Payload violation */ @@ -133,7 +133,7 @@ object ThriftLoader extends Loader[Array[Byte]] { val headers = Option(collectorPayload.headers).map(_.asScala.toList).getOrElse(Nil) - val ip = IpAddressExtractor.extractIpAddress(headers, collectorPayload.ipAddress).some // Required + val ip = Option(IpAddressExtractor.extractIpAddress(headers, collectorPayload.ipAddress)) // Required val api = Option(collectorPayload.path) match { case None => @@ -196,7 +196,7 @@ object ThriftLoader extends Loader[Array[Byte]] { val headers = Option(snowplowRawEvent.headers).map(_.asScala.toList).getOrElse(Nil) - val ip = IpAddressExtractor.extractIpAddress(headers, snowplowRawEvent.ipAddress).some // Required + val ip = Option(IpAddressExtractor.extractIpAddress(headers, snowplowRawEvent.ipAddress)) // Required (querystring.toValidatedNel, networkUserId).mapN { (q, nuid) => val timestamp = Some(new DateTime(snowplowRawEvent.timestamp, DateTimeZone.UTC)) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala index 0b6da214f..253f0494a 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/CollectorPayloadSpec.scala @@ -12,10 +12,23 @@ */ package com.snowplowanalytics.snowplow.enrich.common.loaders +import cats.syntax.option._ + +import org.apache.http.NameValuePair +import org.apache.http.message.BasicNameValuePair +import org.apache.thrift.TSerializer + +import org.joda.time.{DateTimeZone, LocalDate} + +import org.scalacheck.{Arbitrary, Gen} + +import org.specs2.ScalaCheck import org.specs2.mutable.Specification -import org.specs2.matcher.DataTables +import org.specs2.matcher.{DataTables, ValidatedMatchers} + +import com.snowplowanalytics.snowplow.badrows.Processor -class CollectorPayloadSpec extends Specification with DataTables { +class CollectorPayloadSpec extends Specification with DataTables with ScalaCheck with ValidatedMatchers { // TODO: let's abstract this up to a CollectorApi.parse test // (then we can make isIceRequest private again). @@ -31,4 +44,74 @@ class CollectorPayloadSpec extends Specification with DataTables { } } } + + "toThrift" should { + implicit val arbitraryPayload: Arbitrary[CollectorPayload] = + Arbitrary(CollectorPayloadSpec.collectorPayloadGen) + + "be isomorphic to ThriftLoader.toCollectorPayload" >> { + prop { payload: CollectorPayload => + val bytes = CollectorPayloadSpec.thriftSerializer.serialize(payload.toThrift) + val result = ThriftLoader.toCollectorPayload(bytes, Processor("test", "0.0.1")) + result must beValid(Some(payload)) + } + } + } +} + +object CollectorPayloadSpec { + + val thriftSerializer = new TSerializer() + + val apiGen = Gen.oneOf( + CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp1"), + CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp2"), + CollectorPayload.Api("r", "tp2"), + CollectorPayload.Api("com.snowplowanalytics.iglu", "v1"), + CollectorPayload.Api("com.mailchimp", "v1") + ) + + val nameValuePair = for { + k <- Gen.oneOf("qkey", "key2", "key_3", "key-4", "key 5") + v <- Gen.option(Gen.oneOf("iglu:com.acme/under_score/jsonschema/1-0-3", "foo", "1", "null")) + } yield new BasicNameValuePair(k, v.orNull) + val queryParametersGen: Gen[List[NameValuePair]] = + for { + n <- Gen.chooseNum(0, 4) + list <- Gen.listOfN[NameValuePair](n, nameValuePair) + } yield list + + val contentTypeGen: Gen[String] = Gen.oneOf("text/plain", "application/json", "application/json; encoding=utf-8") + + val source: CollectorPayload.Source = CollectorPayload.Source("host", "UTF-8", "localhost".some) + + val localDateGen: Gen[LocalDate] = Gen.calendar.map(LocalDate.fromCalendarFields) + val ipGen: Gen[String] = for { + part1 <- Gen.choose(2, 255) + part2 <- Gen.choose(0, 255) + part3 <- Gen.choose(0, 255) + part4 <- Gen.choose(0, 255) + } yield s"$part1.$part2.$part3.$part4" + val headerGen: Gen[String] = for { + first <- Gen.asciiPrintableStr.map(_.capitalize) + second <- Gen.option(Gen.asciiPrintableStr.map(_.capitalize)) + key = second.fold(first)(s => s"$first-$s") + value <- Gen.identifier + } yield s"$key: $value" + val contextGen: Gen[CollectorPayload.Context] = for { + timestamp <- localDateGen.map(_.toDateTimeAtStartOfDay(DateTimeZone.UTC)).map(Option.apply) + ip <- Gen.option(ipGen) + userAgent <- Gen.option(Gen.identifier) + headersN <- Gen.chooseNum(0, 8) + headers <- Gen.listOfN(headersN, headerGen) + userId <- Gen.option(Gen.uuid) + } yield CollectorPayload.Context(timestamp, ip, userAgent, None, headers, userId) + + val collectorPayloadGen: Gen[CollectorPayload] = for { + api <- apiGen + kvlist <- queryParametersGen + contentType <- Gen.option(contentTypeGen) + body <- Gen.option(Gen.asciiPrintableStr.suchThat(_.nonEmpty)) + context <- contextGen + } yield CollectorPayload(api, kvlist, contentType, body, source, context) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala index 4a8c0e81c..13e8817dd 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/loaders/ThriftLoaderSpec.scala @@ -10,171 +10,255 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. */ -package com.snowplowanalytics.snowplow.enrich.common -package loaders +package com.snowplowanalytics.snowplow.enrich.common.loaders import java.util.UUID import cats.data.NonEmptyList import cats.syntax.option._ -import com.snowplowanalytics.snowplow.badrows._ - import org.apache.commons.codec.binary.Base64 - import org.joda.time.DateTime -import org.specs2.{ScalaCheck, Specification} -import org.specs2.matcher.{DataTables, ValidatedMatchers} +import com.snowplowanalytics.snowplow.badrows.{BadRow, Failure, FailureDetails, Payload, Processor} -import SpecHelpers._ +import org.specs2.ScalaCheck +import org.specs2.mutable.Specification +import org.specs2.matcher.ValidatedMatchers -class ThriftLoaderSpec extends Specification with DataTables with ValidatedMatchers with ScalaCheck { - val Process = Processor("ThriftLoaderSpec", "v1") +import com.snowplowanalytics.snowplow.enrich.common.SpecHelpers.toNameValuePairs +import com.snowplowanalytics.snowplow.enrich.common.loaders.ThriftLoaderSpec._ - def is = s2""" - toCollectorPayload should return a CollectorPayload for a valid Thrift CollectorPayload (even if parameterless) $e1 - toCollectorPayload should return a Validation Failure for an invalid or corrupted Thrift CollectorPayload $e2 - """ +class ThriftLoaderSpec extends Specification with ValidatedMatchers with ScalaCheck { + "toCollectorPayload" should { + "tolerate fake tracker protocol GET parameters" >> { + val raw = + "CgABAAABQ5iGqAYLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAkxMjcuMC4wLjEMACkIAAEAAAABCAACAAAAAQsAAwAAABh0ZXN0UGFyYW09MyZ0ZXN0UGFyYW0yPTQACwAtAAAACTEyNy4wLjAuMQsAMgAAAGhNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNg8ARgsAAAAIAAAAL0Nvb2tpZTogc3A9YzVmM2EwOWYtNzVmOC00MzA5LWJlYzUtZmVhNTYwZjc4NDU1AAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAJEFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZSwgc2RjaAAAAHRVc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNgAAAFZBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksIGltYWdlL3dlYnAsICovKjtxPTAuOAAAABhDYWNoZS1Db250cm9sOiBtYXgtYWdlPTAAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAABRIb3N0OiAxMjcuMC4wLjE6ODA4MAsAUAAAACRjNWYzYTA5Zi03NWY4LTQzMDktYmVjNS1mZWE1NjBmNzg0NTUA" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) - object Expected { - val encoding = "UTF-8" - val collector = "ssc-0.0.1-Stdout" // Note we have since fixed -stdout to be lowercase - val api = CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp1") - } + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-16T00:49:58.278+00:00").some, + ipAddress = "127.0.0.1".some, + useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36".some, + refererUri = None, + headers = List( + "Cookie: sp=c5f3a09f-75f8-4309-bec5-fea560f78455", + "Accept-Language: en-US, en", + "Accept-Encoding: gzip, deflate, sdch", + "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36", + "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, image/webp, */*;q=0.8", + "Cache-Control: max-age=0", + "Connection: keep-alive", + "Host: 127.0.0.1:8080" + ), + userId = UUID.fromString("c5f3a09f-75f8-4309-bec5-fea560f78455").some + ) + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs("testParam" -> "3", "testParam2" -> "4"), + body = None, + contentType = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "127.0.0.1".some), + context = context + ) - def e1 = - "SPEC NAME" || "RAW" | "EXP. TIMESTAMP" | "EXP. PAYLOAD" | "EXP. HOSTNAME" | "EXP. IP ADDRESS" | "EXP. USER AGENT" | "EXP. REFERER URI" | "EXP. HEADERS" | "EXP. USER ID" | - "Fake params" !! "CgABAAABQ5iGqAYLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAkxMjcuMC4wLjEMACkIAAEAAAABCAACAAAAAQsAAwAAABh0ZXN0UGFyYW09MyZ0ZXN0UGFyYW0yPTQACwAtAAAACTEyNy4wLjAuMQsAMgAAAGhNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNg8ARgsAAAAIAAAAL0Nvb2tpZTogc3A9YzVmM2EwOWYtNzVmOC00MzA5LWJlYzUtZmVhNTYwZjc4NDU1AAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAJEFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZSwgc2RjaAAAAHRVc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoWDExOyBMaW51eCB4ODZfNjQpIEFwcGxlV2ViS2l0LzUzNy4zNiAoS0hUTUwsIGxpa2UgR2Vja28pIENocm9tZS8zMS4wLjE2NTAuNjMgU2FmYXJpLzUzNy4zNgAAAFZBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksIGltYWdlL3dlYnAsICovKjtxPTAuOAAAABhDYWNoZS1Db250cm9sOiBtYXgtYWdlPTAAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAABRIb3N0OiAxMjcuMC4wLjE6ODA4MAsAUAAAACRjNWYzYTA5Zi03NWY4LTQzMDktYmVjNS1mZWE1NjBmNzg0NTUA" ! - DateTime.parse("2014-01-16T00:49:58.278+00:00") ! toNameValuePairs( - "testParam" -> "3", - "testParam2" -> "4" - ) ! "127.0.0.1".some ! "127.0.0.1".some ! "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36".some ! None ! List( - "Cookie: sp=c5f3a09f-75f8-4309-bec5-fea560f78455", - "Accept-Language: en-US, en", - "Accept-Encoding: gzip, deflate, sdch", - "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36", - "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, image/webp, */*;q=0.8", - "Cache-Control: max-age=0", - "Connection: keep-alive", - "Host: 127.0.0.1:8080" - ) ! UUID.fromString("c5f3a09f-75f8-4309-bec5-fea560f78455").some | - "Page ping" !! "CgABAAABQ9pNXggLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACZmU9cHAmcGFnZT1Bc3luY2hyb25vdXMrd2Vic2l0ZS93ZWJhcHArZXhhbXBsZXMrZm9yK3Nub3dwbG93LmpzJnBwX21peD0wJnBwX21heD0wJnBwX21peT0wJnBwX21heT0wJmNvPSU3QiUyMnBhZ2UlMjI6JTdCJTIycGFnZV90eXBlJTIyOiUyMnRlc3QlMjIsJTIybGFzdF91cGRhdGVkJHRtcyUyMjoxMzkzMzcyODAwMDAwJTdELCUyMnVzZXIlMjI6JTdCJTIydXNlcl90eXBlJTIyOiUyMnRlc3RlciUyMiU3RCU3RCZkdG09MTM5MDkzNjkzODg1NSZ0aWQ9Nzk3NzQzJnZwPTI1NjB4OTYxJmRzPTI1NjB4OTYxJnZpZD03JmR1aWQ9M2MxNzU3NTQ0ZTM5YmNhNCZwPW1vYiZ0dj1qcy0wLjEzLjEmZnA9MjY5NTkzMDgwMyZhaWQ9Q0ZlMjNhJmxhbmc9ZW4tVVMmY3M9VVRGLTgmdHo9RXVyb3BlL0xvbmRvbiZ1aWQ9YWxleCsxMjMmZl9wZGY9MCZmX3F0PTEmZl9yZWFscD0wJmZfd21hPTAmZl9kaXI9MCZmX2ZsYT0xJmZfamF2YT0wJmZfZ2VhcnM9MCZmX2FnPTAmcmVzPTI1NjB4MTQ0MCZjZD0yNCZjb29raWU9MSZ1cmw9ZmlsZTovL2ZpbGU6Ly8vVXNlcnMvYWxleC9EZXZlbG9wbWVudC9kZXYtZW52aXJvbm1lbnQvZGVtby8xLXRyYWNrZXIvZXZlbnRzLmh0bWwvb3ZlcnJpZGRlbi11cmwvAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAcAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAAnBDb29raWU6IF9fdXRtYT0xMTE4NzIyODEuODc4MDg0NDg3LjEzOTAyMzcxMDcuMTM5MDg0ODQ4Ny4xMzkwOTMxNTIxLjY7IF9fdXRtej0xMTE4NzIyODEuMTM5MDIzNzEwNy4xLjEudXRtY3NyPShkaXJlY3QpfHV0bWNjbj0oZGlyZWN0KXx1dG1jbWQ9KG5vbmUpOyBfc3BfaWQuMWZmZj1iODlhNmZhNjMxZWVmYWMyLjEzOTAyMzcxMDcuNi4xMzkwOTMxNTQ1LjEzOTA4NDg2NDE7IGhibGlkPUNQamp1aHZGMDV6a3RQN0o3TTVWbzNOSUdQTEp5MVNGOyBvbGZzaz1vbGZzazU2MjkyMzYzNTYxNzU1NDsgX191dG1jPTExMTg3MjI4MTsgd2NzaWQ9dU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9va2x2PTEzOTA5MzE1ODU0NDUlMkN1TWxvZzFRSlZEN2p1aEZaN001Vm9CQ3lQUHlpQnlTUzsgX29rPTk3NTItNTAzLTEwLTUyMjc7IF9va2JrPWNkNCUzRHRydWUlMkN2aTUlM0QwJTJDdmk0JTNEMTM5MDkzMTUyMTEyMyUyQ3ZpMyUzRGFjdGl2ZSUyQ3ZpMiUzRGZhbHNlJTJDdmkxJTNEZmFsc2UlMkNjZDglM0RjaGF0JTJDY2Q2JTNEMCUyQ2NkNSUzRGF3YXklMkNjZDMlM0RmYWxzZSUyQ2NkMiUzRDAlMkNjZDElM0QwJTJDOyBzcD03NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQAAAAeQWNjZXB0LUVuY29kaW5nOiBnemlwLCBkZWZsYXRlAAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAK0FjY2VwdDogaW1hZ2UvcG5nLCBpbWFnZS8qO3E9MC44LCAqLyo7cT0wLjUAAABdVXNlci1BZ2VudDogTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wAAAAFEhvc3Q6IGxvY2FsaG9zdDo0MDAxCwBQAAAAJDc1YTEzNTgzLTVjOTktNDBlMy04MWZjLTU0MTA4NGRmYzc4NAA=" ! - DateTime.parse("2014-01-28T19:22:20.040+00:00") ! toNameValuePairs( - "e" -> "pp", - "page" -> "Asynchronous website/webapp examples for snowplow.js", - "pp_mix" -> "0", - "pp_max" -> "0", - "pp_miy" -> "0", - "pp_may" -> "0", - "co" -> """{"page":{"page_type":"test","last_updated$tms":1393372800000},"user":{"user_type":"tester"}}""", - "dtm" -> "1390936938855", - "tid" -> "797743", - "vp" -> "2560x961", - "ds" -> "2560x961", - "vid" -> "7", - "duid" -> "3c1757544e39bca4", - "p" -> "mob", - "tv" -> "js-0.13.1", - "fp" -> "2695930803", - "aid" -> "CFe23a", - "lang" -> "en-US", - "cs" -> "UTF-8", - "tz" -> "Europe/London", - "uid" -> "alex 123", - "f_pdf" -> "0", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "0", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "0", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "2560x1440", - "cd" -> "24", - "cookie" -> "1", - "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" - ) ! "localhost".some ! "10.0.2.2".some ! "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some ! None ! List( - "Connection: keep-alive", - "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", - "Accept-Encoding: gzip, deflate", - "Accept-Language: en-US, en", - "Accept: image/png, image/*;q=0.8, */*;q=0.5", - "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Host: localhost:4001" - ) ! UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some | - "Unstructured event" !! "CgABAAABQ9qNGa4LABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACeWU9dWUmdWVfbmE9Vmlld2VkK1Byb2R1Y3QmdWVfcHI9JTdCJTIycHJvZHVjdF9pZCUyMjolMjJBU08wMTA0MyUyMiwlMjJjYXRlZ29yeSUyMjolMjJEcmVzc2VzJTIyLCUyMmJyYW5kJTIyOiUyMkFDTUUlMjIsJTIycmV0dXJuaW5nJTIyOnRydWUsJTIycHJpY2UlMjI6NDkuOTUsJTIyc2l6ZXMlMjI6JTVCJTIyeHMlMjIsJTIycyUyMiwlMjJsJTIyLCUyMnhsJTIyLCUyMnh4bCUyMiU1RCwlMjJhdmFpbGFibGVfc2luY2UkZHQlMjI6MTU4MDElN0QmZHRtPTEzOTA5NDExMTUyNjMmdGlkPTY0NzYxNSZ2cD0yNTYweDk2MSZkcz0yNTYweDk2MSZ2aWQ9OCZkdWlkPTNjMTc1NzU0NGUzOWJjYTQmcD1tb2ImdHY9anMtMC4xMy4xJmZwPTI2OTU5MzA4MDMmYWlkPUNGZTIzYSZsYW5nPWVuLVVTJmNzPVVURi04JnR6PUV1cm9wZS9Mb25kb24mdWlkPWFsZXgrMTIzJmZfcGRmPTAmZl9xdD0xJmZfcmVhbHA9MCZmX3dtYT0wJmZfZGlyPTAmZl9mbGE9MSZmX2phdmE9MCZmX2dlYXJzPTAmZl9hZz0wJnJlcz0yNTYweDE0NDAmY2Q9MjQmY29va2llPTEmdXJsPWZpbGU6Ly9maWxlOi8vL1VzZXJzL2FsZXgvRGV2ZWxvcG1lbnQvZGV2LWVudmlyb25tZW50L2RlbW8vMS10cmFja2VyL2V2ZW50cy5odG1sL292ZXJyaWRkZW4tdXJsLwALAC0AAAAJbG9jYWxob3N0CwAyAAAAUU1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMA8ARgsAAAAHAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAACtBY2NlcHQ6IGltYWdlL3BuZywgaW1hZ2UvKjtxPTAuOCwgKi8qO3E9MC41AAAAXVVzZXItQWdlbnQ6IE1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMAAAABRIb3N0OiBsb2NhbGhvc3Q6NDAwMQsAUAAAACQ3NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQA" ! - DateTime.parse("2014-01-28T20:31:56.846+00:00") ! toNameValuePairs( - "e" -> "ue", - "ue_na" -> "Viewed Product", - "ue_pr" -> """{"product_id":"ASO01043","category":"Dresses","brand":"ACME","returning":true,"price":49.95,"sizes":["xs","s","l","xl","xxl"],"available_since$dt":15801}""", - "dtm" -> "1390941115263", - "tid" -> "647615", - "vp" -> "2560x961", - "ds" -> "2560x961", - "vid" -> "8", - "duid" -> "3c1757544e39bca4", - "p" -> "mob", - "tv" -> "js-0.13.1", - "fp" -> "2695930803", - "aid" -> "CFe23a", - "lang" -> "en-US", - "cs" -> "UTF-8", - "tz" -> "Europe/London", - "uid" -> "alex 123", - "f_pdf" -> "0", - "f_qt" -> "1", - "f_realp" -> "0", - "f_wma" -> "0", - "f_dir" -> "0", - "f_fla" -> "1", - "f_java" -> "0", - "f_gears" -> "0", - "f_ag" -> "0", - "res" -> "2560x1440", - "cd" -> "24", - "cookie" -> "1", - "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" - ) ! "localhost".some ! "10.0.2.2".some ! "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some ! None ! List( - "Connection: keep-alive", - "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", - "Accept-Encoding: gzip, deflate", - "Accept-Language: en-US, en", - "Accept: image/png, image/*;q=0.8, */*;q=0.5", - "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Host: localhost:4001" - ) ! UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some | - "Parameterless" !! "CgABAAABQ9o8zYULABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAgAAAAYQ2FjaGUtQ29udHJvbDogbWF4LWFnZT0wAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAAEpBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksICovKjtxPTAuOAAAAF1Vc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoTWFjaW50b3NoOyBJbnRlbCBNYWMgT1MgWCAxMC45OyBydjoyNi4wKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzI2LjAAAAAUSG9zdDogbG9jYWxob3N0OjQwMDELAFAAAAAkNzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AA==" ! - DateTime.parse( - "2014-01-28T19:04:14.469+00:00" - ) ! toNameValuePairs() ! "localhost".some ! "10.0.2.2".some ! "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some ! None ! List( - "Cache-Control: max-age=0", - "Connection: keep-alive", - "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", - "Accept-Encoding: gzip, deflate", - "Accept-Language: en-US, en", - "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8", - "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", - "Host: localhost:4001" - ) ! UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some |> { - - (_, raw, timestamp, payload, hostname, ipAddress, userAgent, refererUri, headers, userId) => - val canonicalEvent = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), Process) - - val expected = CollectorPayload( - api = Expected.api, - querystring = payload, - body = None, - contentType = None, - source = CollectorPayload.Source(Expected.collector, Expected.encoding, hostname), - context = CollectorPayload - .Context(timestamp.some, ipAddress, userAgent, refererUri, headers, userId) - ) - - canonicalEvent must beValid(expected.some) + result must beValid(expected.some) } + "parse valid page ping GET payload" >> { + val raw = + "CgABAAABQ9pNXggLABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACZmU9cHAmcGFnZT1Bc3luY2hyb25vdXMrd2Vic2l0ZS93ZWJhcHArZXhhbXBsZXMrZm9yK3Nub3dwbG93LmpzJnBwX21peD0wJnBwX21heD0wJnBwX21peT0wJnBwX21heT0wJmNvPSU3QiUyMnBhZ2UlMjI6JTdCJTIycGFnZV90eXBlJTIyOiUyMnRlc3QlMjIsJTIybGFzdF91cGRhdGVkJHRtcyUyMjoxMzkzMzcyODAwMDAwJTdELCUyMnVzZXIlMjI6JTdCJTIydXNlcl90eXBlJTIyOiUyMnRlc3RlciUyMiU3RCU3RCZkdG09MTM5MDkzNjkzODg1NSZ0aWQ9Nzk3NzQzJnZwPTI1NjB4OTYxJmRzPTI1NjB4OTYxJnZpZD03JmR1aWQ9M2MxNzU3NTQ0ZTM5YmNhNCZwPW1vYiZ0dj1qcy0wLjEzLjEmZnA9MjY5NTkzMDgwMyZhaWQ9Q0ZlMjNhJmxhbmc9ZW4tVVMmY3M9VVRGLTgmdHo9RXVyb3BlL0xvbmRvbiZ1aWQ9YWxleCsxMjMmZl9wZGY9MCZmX3F0PTEmZl9yZWFscD0wJmZfd21hPTAmZl9kaXI9MCZmX2ZsYT0xJmZfamF2YT0wJmZfZ2VhcnM9MCZmX2FnPTAmcmVzPTI1NjB4MTQ0MCZjZD0yNCZjb29raWU9MSZ1cmw9ZmlsZTovL2ZpbGU6Ly8vVXNlcnMvYWxleC9EZXZlbG9wbWVudC9kZXYtZW52aXJvbm1lbnQvZGVtby8xLXRyYWNrZXIvZXZlbnRzLmh0bWwvb3ZlcnJpZGRlbi11cmwvAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAcAAAAWQ29ubmVjdGlvbjoga2VlcC1hbGl2ZQAAAnBDb29raWU6IF9fdXRtYT0xMTE4NzIyODEuODc4MDg0NDg3LjEzOTAyMzcxMDcuMTM5MDg0ODQ4Ny4xMzkwOTMxNTIxLjY7IF9fdXRtej0xMTE4NzIyODEuMTM5MDIzNzEwNy4xLjEudXRtY3NyPShkaXJlY3QpfHV0bWNjbj0oZGlyZWN0KXx1dG1jbWQ9KG5vbmUpOyBfc3BfaWQuMWZmZj1iODlhNmZhNjMxZWVmYWMyLjEzOTAyMzcxMDcuNi4xMzkwOTMxNTQ1LjEzOTA4NDg2NDE7IGhibGlkPUNQamp1aHZGMDV6a3RQN0o3TTVWbzNOSUdQTEp5MVNGOyBvbGZzaz1vbGZzazU2MjkyMzYzNTYxNzU1NDsgX191dG1jPTExMTg3MjI4MTsgd2NzaWQ9dU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9va2x2PTEzOTA5MzE1ODU0NDUlMkN1TWxvZzFRSlZEN2p1aEZaN001Vm9CQ3lQUHlpQnlTUzsgX29rPTk3NTItNTAzLTEwLTUyMjc7IF9va2JrPWNkNCUzRHRydWUlMkN2aTUlM0QwJTJDdmk0JTNEMTM5MDkzMTUyMTEyMyUyQ3ZpMyUzRGFjdGl2ZSUyQ3ZpMiUzRGZhbHNlJTJDdmkxJTNEZmFsc2UlMkNjZDglM0RjaGF0JTJDY2Q2JTNEMCUyQ2NkNSUzRGF3YXklMkNjZDMlM0RmYWxzZSUyQ2NkMiUzRDAlMkNjZDElM0QwJTJDOyBzcD03NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQAAAAeQWNjZXB0LUVuY29kaW5nOiBnemlwLCBkZWZsYXRlAAAAGkFjY2VwdC1MYW5ndWFnZTogZW4tVVMsIGVuAAAAK0FjY2VwdDogaW1hZ2UvcG5nLCBpbWFnZS8qO3E9MC44LCAqLyo7cT0wLjUAAABdVXNlci1BZ2VudDogTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wAAAAFEhvc3Q6IGxvY2FsaG9zdDo0MDAxCwBQAAAAJDc1YTEzNTgzLTVjOTktNDBlMy04MWZjLTU0MTA4NGRmYzc4NAA=" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) + + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-28T19:22:20.040+00:00").some, + ipAddress = "10.0.2.2".some, + useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some, + refererUri = None, + headers = List( + "Connection: keep-alive", + "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", + "Accept-Encoding: gzip, deflate", + "Accept-Language: en-US, en", + "Accept: image/png, image/*;q=0.8, */*;q=0.5", + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", + "Host: localhost:4001" + ), + userId = UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some + ) + + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs( + "e" -> "pp", + "page" -> "Asynchronous website/webapp examples for snowplow.js", + "pp_mix" -> "0", + "pp_max" -> "0", + "pp_miy" -> "0", + "pp_may" -> "0", + "co" -> """{"page":{"page_type":"test","last_updated$tms":1393372800000},"user":{"user_type":"tester"}}""", + "dtm" -> "1390936938855", + "tid" -> "797743", + "vp" -> "2560x961", + "ds" -> "2560x961", + "vid" -> "7", + "duid" -> "3c1757544e39bca4", + "p" -> "mob", + "tv" -> "js-0.13.1", + "fp" -> "2695930803", + "aid" -> "CFe23a", + "lang" -> "en-US", + "cs" -> "UTF-8", + "tz" -> "Europe/London", + "uid" -> "alex 123", + "f_pdf" -> "0", + "f_qt" -> "1", + "f_realp" -> "0", + "f_wma" -> "0", + "f_dir" -> "0", + "f_fla" -> "1", + "f_java" -> "0", + "f_gears" -> "0", + "f_ag" -> "0", + "res" -> "2560x1440", + "cd" -> "24", + "cookie" -> "1", + "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" + ), + body = None, + contentType = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "localhost".some), + context = context + ) + + result must beValid(expected.some) + } + + "parse valid unstructured event GET payload" >> { + val raw = + "CgABAAABQ9qNGa4LABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABCwADAAACeWU9dWUmdWVfbmE9Vmlld2VkK1Byb2R1Y3QmdWVfcHI9JTdCJTIycHJvZHVjdF9pZCUyMjolMjJBU08wMTA0MyUyMiwlMjJjYXRlZ29yeSUyMjolMjJEcmVzc2VzJTIyLCUyMmJyYW5kJTIyOiUyMkFDTUUlMjIsJTIycmV0dXJuaW5nJTIyOnRydWUsJTIycHJpY2UlMjI6NDkuOTUsJTIyc2l6ZXMlMjI6JTVCJTIyeHMlMjIsJTIycyUyMiwlMjJsJTIyLCUyMnhsJTIyLCUyMnh4bCUyMiU1RCwlMjJhdmFpbGFibGVfc2luY2UkZHQlMjI6MTU4MDElN0QmZHRtPTEzOTA5NDExMTUyNjMmdGlkPTY0NzYxNSZ2cD0yNTYweDk2MSZkcz0yNTYweDk2MSZ2aWQ9OCZkdWlkPTNjMTc1NzU0NGUzOWJjYTQmcD1tb2ImdHY9anMtMC4xMy4xJmZwPTI2OTU5MzA4MDMmYWlkPUNGZTIzYSZsYW5nPWVuLVVTJmNzPVVURi04JnR6PUV1cm9wZS9Mb25kb24mdWlkPWFsZXgrMTIzJmZfcGRmPTAmZl9xdD0xJmZfcmVhbHA9MCZmX3dtYT0wJmZfZGlyPTAmZl9mbGE9MSZmX2phdmE9MCZmX2dlYXJzPTAmZl9hZz0wJnJlcz0yNTYweDE0NDAmY2Q9MjQmY29va2llPTEmdXJsPWZpbGU6Ly9maWxlOi8vL1VzZXJzL2FsZXgvRGV2ZWxvcG1lbnQvZGV2LWVudmlyb25tZW50L2RlbW8vMS10cmFja2VyL2V2ZW50cy5odG1sL292ZXJyaWRkZW4tdXJsLwALAC0AAAAJbG9jYWxob3N0CwAyAAAAUU1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMA8ARgsAAAAHAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAACtBY2NlcHQ6IGltYWdlL3BuZywgaW1hZ2UvKjtxPTAuOCwgKi8qO3E9MC41AAAAXVVzZXItQWdlbnQ6IE1vemlsbGEvNS4wIChNYWNpbnRvc2g7IEludGVsIE1hYyBPUyBYIDEwLjk7IHJ2OjI2LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvMjYuMAAAABRIb3N0OiBsb2NhbGhvc3Q6NDAwMQsAUAAAACQ3NWExMzU4My01Yzk5LTQwZTMtODFmYy01NDEwODRkZmM3ODQA" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) + + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-28T20:31:56.846+00:00").some, + ipAddress = "10.0.2.2".some, + useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some, + refererUri = None, + headers = List( + "Connection: keep-alive", + "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", + "Accept-Encoding: gzip, deflate", + "Accept-Language: en-US, en", + "Accept: image/png, image/*;q=0.8, */*;q=0.5", + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", + "Host: localhost:4001" + ), + userId = UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some + ) + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs( + "e" -> "ue", + "ue_na" -> "Viewed Product", + "ue_pr" -> """{"product_id":"ASO01043","category":"Dresses","brand":"ACME","returning":true,"price":49.95,"sizes":["xs","s","l","xl","xxl"],"available_since$dt":15801}""", + "dtm" -> "1390941115263", + "tid" -> "647615", + "vp" -> "2560x961", + "ds" -> "2560x961", + "vid" -> "8", + "duid" -> "3c1757544e39bca4", + "p" -> "mob", + "tv" -> "js-0.13.1", + "fp" -> "2695930803", + "aid" -> "CFe23a", + "lang" -> "en-US", + "cs" -> "UTF-8", + "tz" -> "Europe/London", + "uid" -> "alex 123", + "f_pdf" -> "0", + "f_qt" -> "1", + "f_realp" -> "0", + "f_wma" -> "0", + "f_dir" -> "0", + "f_fla" -> "1", + "f_java" -> "0", + "f_gears" -> "0", + "f_ag" -> "0", + "res" -> "2560x1440", + "cd" -> "24", + "cookie" -> "1", + "url" -> "file://file:///Users/alex/Development/dev-environment/demo/1-tracker/events.html/overridden-url/" + ), + body = None, + contentType = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "localhost".some), + context = context + ) + + result must beValid(expected.some) + } + + "parse valid parameterless payload" >> { + val raw = + "CgABAAABQ9o8zYULABQAAAAQc3NjLTAuMC4xLVN0ZG91dAsAHgAAAAVVVEYtOAsAKAAAAAgxMC4wLjIuMgwAKQgAAQAAAAEIAAIAAAABAAsALQAAAAlsb2NhbGhvc3QLADIAAABRTW96aWxsYS81LjAgKE1hY2ludG9zaDsgSW50ZWwgTWFjIE9TIFggMTAuOTsgcnY6MjYuMCkgR2Vja28vMjAxMDAxMDEgRmlyZWZveC8yNi4wDwBGCwAAAAgAAAAYQ2FjaGUtQ29udHJvbDogbWF4LWFnZT0wAAAAFkNvbm5lY3Rpb246IGtlZXAtYWxpdmUAAAJwQ29va2llOiBfX3V0bWE9MTExODcyMjgxLjg3ODA4NDQ4Ny4xMzkwMjM3MTA3LjEzOTA4NDg0ODcuMTM5MDkzMTUyMS42OyBfX3V0bXo9MTExODcyMjgxLjEzOTAyMzcxMDcuMS4xLnV0bWNzcj0oZGlyZWN0KXx1dG1jY249KGRpcmVjdCl8dXRtY21kPShub25lKTsgX3NwX2lkLjFmZmY9Yjg5YTZmYTYzMWVlZmFjMi4xMzkwMjM3MTA3LjYuMTM5MDkzMTU0NS4xMzkwODQ4NjQxOyBoYmxpZD1DUGpqdWh2RjA1emt0UDdKN001Vm8zTklHUExKeTFTRjsgb2xmc2s9b2xmc2s1NjI5MjM2MzU2MTc1NTQ7IF9fdXRtYz0xMTE4NzIyODE7IHdjc2lkPXVNbG9nMVFKVkQ3anVoRlo3TTVWb0JDeVBQeWlCeVNTOyBfb2tsdj0xMzkwOTMxNTg1NDQ1JTJDdU1sb2cxUUpWRDdqdWhGWjdNNVZvQkN5UFB5aUJ5U1M7IF9vaz05NzUyLTUwMy0xMC01MjI3OyBfb2tiaz1jZDQlM0R0cnVlJTJDdmk1JTNEMCUyQ3ZpNCUzRDEzOTA5MzE1MjExMjMlMkN2aTMlM0RhY3RpdmUlMkN2aTIlM0RmYWxzZSUyQ3ZpMSUzRGZhbHNlJTJDY2Q4JTNEY2hhdCUyQ2NkNiUzRDAlMkNjZDUlM0Rhd2F5JTJDY2QzJTNEZmFsc2UlMkNjZDIlM0QwJTJDY2QxJTNEMCUyQzsgc3A9NzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AAAAHkFjY2VwdC1FbmNvZGluZzogZ3ppcCwgZGVmbGF0ZQAAABpBY2NlcHQtTGFuZ3VhZ2U6IGVuLVVTLCBlbgAAAEpBY2NlcHQ6IHRleHQvaHRtbCwgYXBwbGljYXRpb24veGh0bWwreG1sLCBhcHBsaWNhdGlvbi94bWw7cT0wLjksICovKjtxPTAuOAAAAF1Vc2VyLUFnZW50OiBNb3ppbGxhLzUuMCAoTWFjaW50b3NoOyBJbnRlbCBNYWMgT1MgWCAxMC45OyBydjoyNi4wKSBHZWNrby8yMDEwMDEwMSBGaXJlZm94LzI2LjAAAAAUSG9zdDogbG9jYWxob3N0OjQwMDELAFAAAAAkNzVhMTM1ODMtNWM5OS00MGUzLTgxZmMtNTQxMDg0ZGZjNzg0AA==" + val result = ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), ThriftLoaderSpec.Process) + + val context = CollectorPayload.Context( + timestamp = DateTime.parse("2014-01-28T19:04:14.469+00:00").some, + ipAddress = "10.0.2.2".some, + useragent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0".some, + refererUri = None, + headers = List( + "Cache-Control: max-age=0", + "Connection: keep-alive", + "Cookie: __utma=111872281.878084487.1390237107.1390848487.1390931521.6; __utmz=111872281.1390237107.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _sp_id.1fff=b89a6fa631eefac2.1390237107.6.1390931545.1390848641; hblid=CPjjuhvF05zktP7J7M5Vo3NIGPLJy1SF; olfsk=olfsk562923635617554; __utmc=111872281; wcsid=uMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _oklv=1390931585445%2CuMlog1QJVD7juhFZ7M5VoBCyPPyiBySS; _ok=9752-503-10-5227; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1390931521123%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; sp=75a13583-5c99-40e3-81fc-541084dfc784", + "Accept-Encoding: gzip, deflate", + "Accept-Language: en-US, en", + "Accept: text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8", + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0", + "Host: localhost:4001" + ), + userId = UUID.fromString("75a13583-5c99-40e3-81fc-541084dfc784").some + ) + + val expected = CollectorPayload( + api = ThriftLoaderSpec.Api, + querystring = toNameValuePairs(), + contentType = None, + body = None, + source = CollectorPayload.Source(ThriftLoaderSpec.Collector, ThriftLoaderSpec.Encoding, "localhost".some), + context = context + ) + + result must beValid(expected.some) + } + + "fail to parse random bytes" >> { + prop { (raw: String) => + ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), Process) must beInvalid.like { + case NonEmptyList( + BadRow.CPFormatViolation( + Process, + Failure.CPFormatViolation(_, "thrift", f), + Payload.RawPayload(_) + ), + List() + ) => + (f must beEqualTo(violation1byte)) or (f must beEqualTo(violation2bytes)) + } + } + } + } +} + +object ThriftLoaderSpec { + val Encoding = "UTF-8" + val Collector = "ssc-0.0.1-Stdout" // Note we have since fixed -stdout to be lowercase + val Api = CollectorPayload.Api("com.snowplowanalytics.snowplow", "tp1") + val Process = Processor("ThriftLoaderSpec", "v1") + val DeserializeMessage = + "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" + val violation1byte: FailureDetails.CPFormatViolationMessage = FailureDetails.CPFormatViolationMessage.Fallback( "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" @@ -183,22 +267,4 @@ class ThriftLoaderSpec extends Specification with DataTables with ValidatedMatch FailureDetails.CPFormatViolationMessage.Fallback( "error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)" ) - - // A bit of fun: the chances of generating a valid Thrift CollectorPayload at random are - // so low that we can just use ScalaCheck here - def e2 = - prop { (raw: String) => - ThriftLoader.toCollectorPayload(Base64.decodeBase64(raw), Process) must beInvalid.like { - case NonEmptyList( - BadRow.CPFormatViolation( - Process, - Failure.CPFormatViolation(_, "thrift", f), - Payload.RawPayload(_) - ), - List() - ) => - (f must beEqualTo(violation1byte)) or (f must beEqualTo(violation2bytes)) - } - } - }