diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala index 9c5aaa2e6..1282281e8 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/pii/PiiPseudonymizerEnrichment.scala @@ -23,7 +23,8 @@ import io.circe.jackson._ import io.circe.syntax._ import com.fasterxml.jackson.databind.JsonNode -import com.fasterxml.jackson.databind.node.{ArrayNode, ObjectNode, TextNode} +import com.fasterxml.jackson.databind.node.{ArrayNode, NullNode, ObjectNode, TextNode} +import com.fasterxml.jackson.databind.ObjectMapper import com.jayway.jsonpath.{Configuration, JsonPath => JJsonPath} import com.jayway.jsonpath.MapFunction @@ -132,6 +133,25 @@ object PiiPseudonymizerEnrichment extends ParseableEnrichment { .get(fieldName) .map(_.asRight) .getOrElse(s"The specified json field $fieldName is not supported".asLeft) + + /** Helper to remove fields that were wrongly added and are not in the original JSON. See #351. */ + private[pii] def removeAddedFields(hashed: Json, original: Json): Json = { + val fixedObject = for { + hashedFields <- hashed.asObject + originalFields <- original.asObject + newFields = hashedFields.toList.flatMap { + case (k, v) => originalFields(k).map(origV => (k, removeAddedFields(v, origV))) + } + } yield Json.fromFields(newFields) + + lazy val fixedArray = for { + hashedArr <- hashed.asArray + originalArr <- original.asArray + newArr = hashedArr.zip(originalArr).map { case (hashed, orig) => removeAddedFields(hashed, orig) } + } yield Json.fromValues(newArr) + + fixedObject.orElse(fixedArray).getOrElse(hashed) + } } /** @@ -203,7 +223,8 @@ final case class PiiJson( ) } .getOrElse((parsed, List.empty[JsonModifiedField])) - } yield (substituted.noSpaces, modifiedFields.toList)).getOrElse((null, List.empty)) + } yield (PiiPseudonymizerEnrichment.removeAddedFields(substituted, parsed).noSpaces, modifiedFields.toList)) + .getOrElse((null, List.empty)) /** Map context top fields with strategy if they match. */ private def mapContextTopFields(tuple: (String, Json), strategy: PiiStrategy): (String, (Json, List[JsonModifiedField])) = @@ -264,15 +285,15 @@ final case class PiiJson( val objectNode = io.circe.jackson.mapper.valueToTree[ObjectNode](json) val documentContext = JJsonPath.using(JsonPathConf).parse(objectNode) val modifiedFields = MutableList[JsonModifiedField]() - val documentContext2 = documentContext.map( - jsonPath, - new ScrambleMapFunction(strategy, modifiedFields, fieldMutator.fieldName, jsonPath, schema) - ) - // make sure it is a structure preserving method, see #3636 - //val transformedJValue = JsonMethods.fromJsonNode(documentContext.json[JsonNode]()) - //val Diff(_, erroneouslyAdded, _) = jValue diff transformedJValue - //val Diff(_, withoutCruft, _) = erroneouslyAdded diff transformedJValue - (jacksonToCirce(documentContext2.json[JsonNode]()), modifiedFields.toList) + Option(documentContext.read[AnyRef](jsonPath)) match { // check that json object not null + case None => (jacksonToCirce(documentContext.json[JsonNode]()), modifiedFields.toList) + case _ => + val documentContext2 = documentContext.map( + jsonPath, + new ScrambleMapFunction(strategy, modifiedFields, fieldMutator.fieldName, jsonPath, schema) + ) + (jacksonToCirce(documentContext2.json[JsonNode]()), modifiedFields.toList) + } } } @@ -290,7 +311,9 @@ private final case class ScrambleMapFunction( val _ = modifiedFields += JsonModifiedField(fieldName, s, newValue, jsonPath, schema) newValue case a: ArrayNode => - a.elements.asScala.map { + val mapper = new ObjectMapper() + val arr = mapper.createArrayNode() + a.elements.asScala.foreach { case t: TextNode => val originalValue = t.asText() val newValue = strategy.scramble(originalValue) @@ -301,9 +324,11 @@ private final case class ScrambleMapFunction( jsonPath, schema ) - newValue - case default: AnyRef => default + arr.add(newValue) + case default: AnyRef => arr.add(default) + case null => arr.add(NullNode.getInstance()) } - case default: AnyRef => default + arr + case _ => currentValue } } diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 index 087d4e6cd..18dd216f5 100644 --- a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 @@ -14,6 +14,9 @@ }, "emailAddress2": { "type": "string" + }, + "emailAddress3": { + "type": "string" } }, "required": ["emailAddress", "emailAddress2"], diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 new file mode 100644 index 000000000..eca4ca19d --- /dev/null +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 @@ -0,0 +1,24 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for acme stuff", + "self": { + "vendor": "com.acme", + "name": "email_sent", + "format": "jsonschema", + "version": "1-1-0" + }, + "type": "object", + "properties": { + "emailAddress": { + "type": "string" + }, + "emailAddress2": { + "type": "string" + }, + "emailAddress3": { + "type": ["string", "null"] + } + }, + "required": ["emailAddress", "emailAddress2"], + "additionalProperties": false +} diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 new file mode 100644 index 000000000..b2310754d --- /dev/null +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.test/array/jsonschema/1-0-0 @@ -0,0 +1,39 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema to test scrambling of array in PII enrichment", + "self": { + "vendor": "com.test", + "name": "array", + "format": "jsonschema", + "version": "1-0-0" + }, + "type": "object", + "properties": { + "field": { + "type": "array", + "items": { + "type": ["string", "null" ] + } + }, + "field2": { + "type": ["string", "null"] + }, + "field3": { + "type": ["object", "null"], + "properties": { + "a": { + "type": "string" + }, + "b": { + "type": "string" + } + } + }, + "field4": { + "type": "string", + "maxLength": 64 + } + }, + "required": ["field"], + "additionalProperties": false +} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala index 2d8527e2f..7e71e3b46 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/EtlPipelineSpec.scala @@ -22,6 +22,7 @@ import com.snowplowanalytics.iglu.client.resolver.registries.Registry import com.snowplowanalytics.iglu.client.validator.CirceValidator import com.snowplowanalytics.snowplow.badrows.Processor +import com.snowplowanalytics.snowplow.badrows.BadRow import org.apache.thrift.TSerializer @@ -44,6 +45,8 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { def is = s2""" EtlPipeline should always produce either bad or good row for each event of the payload $e1 Processing of events with malformed query string should be supported $e2 + Processing of invalid CollectorPayload (CPFormatViolation bad row) should be supported $e3 + Absence of CollectorPayload (None) should be supported $e4 """ val adapterRegistry = new AdapterRegistry() @@ -88,6 +91,33 @@ class EtlPipelineSpec extends Specification with ValidatedMatchers { case res => ko(s"[$res] doesn't contain one enriched event") } } + + def e3 = { + val invalidCollectorPayload = ThriftLoader.toCollectorPayload(Array(1.toByte), processor) + EtlPipeline.processEvents[Id]( + adapterRegistry, + enrichmentReg, + client, + processor, + dateTime, + invalidCollectorPayload + ) must be like { + case Validated.Invalid(_: BadRow.CPFormatViolation) :: Nil => ok + case other => ko(s"One invalid CPFormatViolation expected, got ${other}") + } + } + + def e4 = { + val collectorPayload: Option[CollectorPayload] = None + EtlPipeline.processEvents[Id]( + adapterRegistry, + enrichmentReg, + client, + processor, + dateTime, + collectorPayload.validNel[BadRow] + ) must beEqualTo(Nil) + } } object EtlPipelineSpec { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index bbf15578c..7fd1a4289 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -17,21 +17,23 @@ package enrichments import cats.Id import cats.implicits._ import cats.data.NonEmptyList - import io.circe.literal._ - import org.joda.time.DateTime - import com.snowplowanalytics.snowplow.badrows._ -import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} - +import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer} import loaders._ import adapters.RawEvent +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii.{ + JsonMutators, + PiiJson, + PiiPseudonymizerEnrichment, + PiiStrategyPseudonymize +} import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent import utils.Clock._ import utils.ConversionUtils import enrichments.registry.{IabEnrichment, JavascriptScriptEnrichment, YauaaEnrichment} - +import org.apache.commons.codec.digest.DigestUtils import org.specs2.mutable.Specification import org.specs2.matcher.EitherMatchers @@ -87,7 +89,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { "data": { "emailAddress": "hello@world.com", "emailAddress2": "foo@bar.org", - "emailAddress3": "foo@bar.org" + "unallowedAdditionalField": "foo@bar.org" } } }""" @@ -267,6 +269,314 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { enriched.value must beRight } + "emit an EnrichedEvent if a PII value that needs to be hashed is an empty string" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": "" + } + } + }""" + ) + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + val enriched = EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beRight + } + + "emit an EnrichedEvent if a PII value that needs to be hashed is null" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/2-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": null + } + } + }""" + ) + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + val enriched = EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beRight + } + + "fail to emit an EnrichedEvent if a PII value that needs to be hashed is an empty object" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": {} + } + } + }""" + ) + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + val enriched = EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beLeft + } + + "fail to emit an EnrichedEvent if a context PII value that needs to be hashed is an empty object" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": {} + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + }""" + ) + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + def enriched = + EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beLeft + } + + "fail to emit an EnrichedEvent if a PII value needs to be hashed in both co and ue and is invalid in one of them" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": {} + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": "" + } + } + }""" + ) + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ), + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + def enriched = + EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beLeft + } + "have a preference of 'ua' query string parameter over user agent of HTTP header" >> { val qs_ua = "Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0" val parameters = Map( @@ -353,6 +663,27 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { EnrichmentManager.getIabContext(input, iabEnrichment) must beRight.like { case ctx => ctx must beSome } } } + + "getCollectorVersionSet" should { + "return an enrichment failure if v_collector is null or empty" >> { + val input = new EnrichedEvent() + EnrichmentManager.getCollectorVersionSet(input) must beLeft.like { + case _: FailureDetails.EnrichmentFailure => ok + case other => ko(s"expected EnrichmentFailure but got $other") + } + input.v_collector = "" + EnrichmentManager.getCollectorVersionSet(input) must beLeft.like { + case _: FailureDetails.EnrichmentFailure => ok + case other => ko(s"expected EnrichmentFailure but got $other") + } + } + + "return Unit if v_collector is set" >> { + val input = new EnrichedEvent() + input.v_collector = "v42" + EnrichmentManager.getCollectorVersionSet(input) must beRight(()) + } + } } object EnrichmentManagerSpec { diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala index 60d14e364..32afa53a5 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/pii/PiiPseudonymizerEnrichmentSpec.scala @@ -37,6 +37,7 @@ import com.snowplowanalytics.snowplow.badrows.{BadRow, Processor} import com.snowplowanalytics.snowplow.enrich.common.{EtlPipeline, SpecHelpers} import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentRegistry import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.IpLookupsEnrichment +import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.CampaignAttributionEnrichment import com.snowplowanalytics.snowplow.enrich.common.adapters.AdapterRegistry import com.snowplowanalytics.snowplow.enrich.common.loaders._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent @@ -55,6 +56,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher Hashing configured JSON fields in POJO should silently ignore unsupported types $e6 Hashing configured JSON and scalar fields in POJO emits a correct pii_transformation event $e7 Hashing configured JSON fields in POJO should not create new fields $e8 + removeAddedFields should remove fields added by PII enrichment $e9 """ def commonSetup(enrichmentReg: EnrichmentRegistry[Id]): List[Validated[BadRow, EnrichedEvent]] = { @@ -76,6 +78,17 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher "uid" -> "john@acme.com", "ip" -> "70.46.123.145", "fp" -> "its_you_again!", + "url" -> "http://foo.bar?utm_term=hello&utm_content=world&msclkid=500&_sp=duid", + "dnuid" -> "gfhdgjfgndf", + "nuid" -> "kuykyfkfykukfuy", + "tr_id" -> "t5465463", + "ti_id" -> "6546b56356b354bbv", + "se_ca" -> "super category", + "se_ac" -> "great action", + "se_la" -> "awesome label", + "se_pr" -> "good property", + "duid" -> "786d1b69-a603-4eb8-9178-fed2a195a1ed", + "sid" -> "87857856-a603-4eb8-9178-fed2a195a1ed", "co" -> """ |{ @@ -100,6 +113,15 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher | "someInt": 1 | }, | "schema": "iglu:com.acme/email_sent/jsonschema/1-1-0" + | }, + | { + | "schema": "iglu:com.test/array/jsonschema/1-0-0", + | "data": { + | "field" : ["hello", "world"], + | "field2" : null, + | "field3": null, + | "field4": "" + | } | } | ] |} @@ -178,19 +200,75 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher IpLookupsEnrichment.parse(js, schemaKey, true).toOption.get.enrichment[Id] } + private val campaignAttributionEnrichment = { + val js = json"""{ + "enabled": true, + "parameters": { + "mapping": "static", + "fields": { + "mktMedium": ["utm_medium"], + "mktSource": ["utm_source"], + "mktTerm": ["utm_term"], + "mktContent": ["utm_content"], + "mktCampaign": ["utm_campaign"] + } + } + }""" + val schemaKey = SchemaKey( + "com.snowplowanalytics.snowplow", + "campaign_attribution", + "jsonschema", + SchemaVer.Full(1, 0, 1) + ) + CampaignAttributionEnrichment.parse(js, schemaKey).toOption.get.enrichment + } + def e1 = { val enrichmentReg = EnrichmentRegistry[Id]( ipLookups = ipEnrichment.some, + campaignAttribution = campaignAttributionEnrichment.some, piiPseudonymizer = PiiPseudonymizerEnrichment( List( PiiScalar(fieldMutator = ScalarMutators("user_id")), PiiScalar( fieldMutator = ScalarMutators("user_ipaddress") ), - PiiScalar(fieldMutator = ScalarMutators("ip_domain")), PiiScalar( fieldMutator = ScalarMutators("user_fingerprint") - ) + ), + PiiScalar( + fieldMutator = ScalarMutators("domain_userid") + ), + PiiScalar( + fieldMutator = ScalarMutators("network_userid") + ), + PiiScalar( + fieldMutator = ScalarMutators("ip_organization") + ), + PiiScalar( + fieldMutator = ScalarMutators("ip_domain") + ), + PiiScalar( + fieldMutator = ScalarMutators("tr_orderid") + ), + PiiScalar( + fieldMutator = ScalarMutators("ti_orderid") + ), + PiiScalar( + fieldMutator = ScalarMutators("mkt_term") + ), + PiiScalar( + fieldMutator = ScalarMutators("mkt_clickid") + ), + PiiScalar( + fieldMutator = ScalarMutators("mkt_content") + ), + PiiScalar(fieldMutator = ScalarMutators("se_category")), + PiiScalar(fieldMutator = ScalarMutators("se_action")), + PiiScalar(fieldMutator = ScalarMutators("se_label")), + PiiScalar(fieldMutator = ScalarMutators("se_property")), + PiiScalar(fieldMutator = ScalarMutators("refr_domain_userid")), + PiiScalar(fieldMutator = ScalarMutators("domain_sessionid")) ), false, PiiStrategyPseudonymize( @@ -203,24 +281,52 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher val output = commonSetup(enrichmentReg) val expected = new EnrichedEvent() expected.app_id = "ads" - expected.user_id = "7d8a4beae5bc9d314600667d2f410918f9af265017a6ade99f60a9c8f3aac6e9" - expected.user_ipaddress = "dd9720903c89ae891ed5c74bb7a9f2f90f6487927ac99afe73b096ad0287f3f5" - expected.ip_domain = null - expected.user_fingerprint = "27abac60dff12792c6088b8d00ce7f25c86b396b8c3740480cd18e21068ecff4" expected.geo_city = null expected.etl_tstamp = "1970-01-18 08:40:00.000" expected.collector_tstamp = "2017-07-14 03:39:39.000" + expected.user_id = "7d8a4beae5bc9d314600667d2f410918f9af265017a6ade99f60a9c8f3aac6e9" + expected.user_ipaddress = "dd9720903c89ae891ed5c74bb7a9f2f90f6487927ac99afe73b096ad0287f3f5" + expected.user_fingerprint = "27abac60dff12792c6088b8d00ce7f25c86b396b8c3740480cd18e21068ecff4" + expected.domain_userid = "e97d86d49b16397e8fd654b32a0ed03cfe3a4d8d867d913620ce08e3ca855d6d" + expected.network_userid = "47453d3c4428207d22005463bb3d945b137f9342d445b7114776e88311bbe648" + expected.ip_organization = "4d5dd7eebeb9d47f9ebff5993502c0380a110c34711ef5062fdb84a563759f3b" + expected.ip_domain = null + expected.tr_orderid = "5139219b15f3d1ab0c5056296cf5246eeb0b934ee5d1c96cb2027e694005bbce" + expected.ti_orderid = "326c0bfc5857f21695406ebd93068341c9f2d975cf00d117479e01e9012e196c" + expected.mkt_term = "b62f3a2475ac957009088f9b8ab77ceb7b4ed7c5a6fd920daa204a1953334acb" + expected.mkt_clickid = "fae3733fa03cdf57d82e89ac63026afd8782d07ba3c918acb415a4343457785f" + expected.mkt_content = "8ad32723b7435cbf535025e519cc94dbf1568e17ced2aeb4b9e7941f6346d7d0" + expected.se_category = "f33daec1ed4cb688f4f1762390735fd78f6a06083f855422a7303ed63707c962" + expected.se_action = "53f3e1ca4a0dccce4a1b2900a6bcfd21b22a0f444253067e2fe022948a0b3be7" + expected.se_label = "b243defc0d3b86333a104fb2b3a2f43371b8d73359c429b9177dfc5bb3840efd" + expected.se_property = "eb19004c52cd4557aacfa0b30035160c417c3a6a5fad44b96f03c9e2bebaf0b3" + expected.refr_domain_userid = "f3e68fd96eaef0cafc1257ec7132b4b3dbae20b1073155531f909999e5da9b2c" + expected.domain_sessionid = "7378a72b0183f456df98453b2ff9ed5685206a67f312edb099dc74aed76e1b34" val size = output.size must_== 1 val validOut = output.head must beValid.like { case enrichedEvent => (enrichedEvent.app_id must_== expected.app_id) and + (enrichedEvent.geo_city must_== expected.geo_city) and + (enrichedEvent.etl_tstamp must_== expected.etl_tstamp) and + (enrichedEvent.collector_tstamp must_== expected.collector_tstamp) and (enrichedEvent.user_id must_== expected.user_id) and (enrichedEvent.user_ipaddress must_== expected.user_ipaddress) and - (enrichedEvent.ip_domain must_== expected.ip_domain) and (enrichedEvent.user_fingerprint must_== expected.user_fingerprint) and - (enrichedEvent.geo_city must_== expected.geo_city) and - (enrichedEvent.etl_tstamp must_== expected.etl_tstamp) and - (enrichedEvent.collector_tstamp must_== expected.collector_tstamp) + (enrichedEvent.domain_userid must_== expected.domain_userid) and + (enrichedEvent.network_userid must_== expected.network_userid) and + (enrichedEvent.ip_organization must_== expected.ip_organization) and + (enrichedEvent.ip_domain must_== expected.ip_domain) and + (enrichedEvent.tr_orderid must_== expected.tr_orderid) and + (enrichedEvent.ti_orderid must_== expected.ti_orderid) and + (enrichedEvent.mkt_term must_== expected.mkt_term) and + (enrichedEvent.mkt_clickid must_== expected.mkt_clickid) and + (enrichedEvent.mkt_content must_== expected.mkt_content) and + (enrichedEvent.se_category must_== expected.se_category) and + (enrichedEvent.se_action must_== expected.se_action) and + (enrichedEvent.se_label must_== expected.se_label) and + (enrichedEvent.se_property must_== expected.se_property) and + (enrichedEvent.refr_domain_userid must_== expected.refr_domain_userid) and + (enrichedEvent.domain_sessionid must_== expected.domain_sessionid) } size and validOut } @@ -240,15 +346,35 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 1, 0), jsonPath = "$.data.emailAddress2" ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field" + ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field2" + ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field3.a" + ), PiiJson( fieldMutator = JsonMutators("unstruct_event"), schemaCriterion = SchemaCriterion("com.mailgun", "message_clicked", "jsonschema", 1, 0, 0), jsonPath = "$.ip" + ), + PiiJson( + fieldMutator = JsonMutators("contexts"), + schemaCriterion = SchemaCriterion("com.test", "array", "jsonschema", 1, 0, 0), + jsonPath = "$.field4" ) ), false, PiiStrategyPseudonymize( - "SHA-256", + "MD5", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) @@ -271,10 +397,10 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor val contextJFirstElement = contextJ.downField("data").downArray val contextJSecondElement = contextJFirstElement.right + val contextJThirdElement = contextJSecondElement.right val unstructEventJ = parse(enrichedEvent.unstruct_event).toOption.get.hcursor .downField("data") .downField("data") - val first = (contextJFirstElement .downField("data") .get[String]("emailAddress") must beRight( @@ -311,7 +437,29 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher )) and (unstructEventJ.get[String]("myVar2") must beRight("awesome")) - first and second + val third = (contextJThirdElement + .downField("data") + .get[List[String]]("field") must + beRight( + List[String]("b62f3a2475ac957009088f9b8ab77ceb7b4ed7c5a6fd920daa204a1953334acb", + "8ad32723b7435cbf535025e519cc94dbf1568e17ced2aeb4b9e7941f6346d7d0" + ) + )) and + (contextJThirdElement + .downField("data") + .downField("field2") + .focus must beSome.like { case json => json.isNull }) and + (contextJThirdElement + .downField("data") + .downField("field3") + .focus must beSome.like { case json => json.isNull }) + + // Test that empty string in Pii field gets hashed + val fourth = contextJThirdElement + .downField("data") + .get[String]("field4") must beRight("7a3477dad66e666bd203b834c54b6dfe8b546bdbc5283462ad14052abfb06600") + + first and second and third and fourth } size and validOut @@ -330,7 +478,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ), false, PiiStrategyPseudonymize( - "SHA-256", + "SHA-384", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) @@ -375,7 +523,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ), false, PiiStrategyPseudonymize( - "SHA-256", + "SHA-512", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) @@ -423,7 +571,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ), false, PiiStrategyPseudonymize( - "SHA-256", + "MD-2", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123" ) @@ -593,30 +741,68 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidatedMatcher ).some ) val output = commonSetup(enrichmentReg) - val expected = new EnrichedEvent() - expected.app_id = "ads" - expected.user_id = "john@acme.com" - expected.user_ipaddress = "70.46.123.145" - expected.ip_domain = null - expected.user_fingerprint = "its_you_again!" - expected.geo_city = "Delray Beach" - expected.etl_tstamp = "1970-01-18 08:40:00.000" - expected.collector_tstamp = "2017-07-14 03:39:39.000" val size = output.size must_== 1 val validOut = output.head must beValid.like { case enrichedEvent => - val contextJ = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data") - val firstElem = contextJ.downArray.downField("data") - val secondElem = contextJ.downArray.right.downField("data") + val context = parse(enrichedEvent.contexts).toOption.get.hcursor.downField("data").downArray + val data = context.downField("data") - (firstElem.get[String]("emailAddress") must beRight( - "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6" - )) and - (firstElem.downField("data").get[String]("nonExistentEmailAddress") must beLeft) and - (firstElem.get[String]("emailAddress2") must beRight("bob@acme.com")) and - (secondElem.get[String]("emailAddress") must beRight("tim@acme.com")) and - (secondElem.get[String]("emailAddress2") must beRight("tom@acme.com")) + val one = data.get[String]("emailAddress") must beRight("72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6") + val two = data.get[String]("emailAddress2") must beRight("bob@acme.com") + val three = data.downField("nonExistentEmailAddress").focus must beNone + + one and two and three } size and validOut } + + def e9 = { + val orig = json""" + { + "schema" : "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data" : [ + { + "schema" : "iglu:com.acme/email_sent/jsonschema/1-0-0", + "data" : { + "emailAddress" : "foo@bar.com", + "emailAddress2" : "bob@acme.com" + } + } + ] + } + """ + + val hashed = json""" + { + "schema" : "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data" : [ + { + "schema" : "iglu:com.acme/email_sent/jsonschema/1-0-0", + "data" : { + "emailAddress" : "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6", + "emailAddress2" : "bob@acme.com", + "nonExistentEmailAddress" : {} + } + } + ] + } + """ + + val expected = json""" + { + "schema" : "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data" : [ + { + "schema" : "iglu:com.acme/email_sent/jsonschema/1-0-0", + "data" : { + "emailAddress" : "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6", + "emailAddress2" : "bob@acme.com" + } + } + ] + } + """ + + PiiPseudonymizerEnrichment.removeAddedFields(hashed, orig) must beEqualTo(expected) + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala index 5c118448c..e36a02b43 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/outputs/EnrichedEventSpec.scala @@ -56,14 +56,14 @@ class EnrichedEventSpec extends Specification { testField(_.user_ipaddress = "user_ipaddress", _.user_ipaddress) testField(_.user_fingerprint = "user_fingerprint", _.user_fingerprint) testField(_.domain_userid = "domain_userid", _.domain_userid) - testField(_.domain_sessionidx = new JInteger(0), _.domain_sessionidx) + testField(_.domain_sessionidx = JInteger.valueOf(0), _.domain_sessionidx) testField(_.network_userid = "network_userid", _.network_userid) testField(_.geo_country = "geo_country", _.geo_country) testField(_.geo_region = "geo_region", _.geo_region) testField(_.geo_city = "geo_city", _.geo_city) testField(_.geo_zipcode = "geo_zipcode", _.geo_zipcode) - testField(_.geo_latitude = new JFloat(0.0), _.geo_latitude) - testField(_.geo_longitude = new JFloat(0.0), _.geo_longitude) + testField(_.geo_latitude = JFloat.valueOf("0.0"), _.geo_latitude) + testField(_.geo_longitude = JFloat.valueOf("0.0"), _.geo_longitude) testField(_.geo_region_name = "geo_region_name", _.geo_region_name) testField(_.ip_isp = "ip_isp", _.ip_isp) testField(_.ip_organization = "ip_organization", _.ip_organization) @@ -74,13 +74,13 @@ class EnrichedEventSpec extends Specification { testField(_.page_referrer = "page_referrer", _.page_referrer) testField(_.page_urlscheme = "page_urlscheme", _.page_urlscheme) testField(_.page_urlhost = "page_urlhost", _.page_urlhost) - testField(_.page_urlport = new JInteger(0), _.page_urlport) + testField(_.page_urlport = JInteger.valueOf(0), _.page_urlport) testField(_.page_urlpath = "page_urlpath", _.page_urlpath) testField(_.page_urlquery = "page_urlquery", _.page_urlquery) testField(_.page_urlfragment = "page_urlfragment", _.page_urlfragment) testField(_.refr_urlscheme = "refr_urlscheme", _.refr_urlscheme) testField(_.refr_urlhost = "refr_urlhost", _.refr_urlhost) - testField(_.refr_urlport = new JInteger(0), _.refr_urlport) + testField(_.refr_urlport = JInteger.valueOf(0), _.refr_urlport) testField(_.refr_urlpath = "refr_urlpath", _.refr_urlpath) testField(_.refr_urlquery = "refr_urlquery", _.refr_urlquery) testField(_.refr_urlfragment = "refr_urlfragment", _.refr_urlfragment) @@ -112,11 +112,11 @@ class EnrichedEventSpec extends Specification { testField(_.ti_name = "ti_name", _.ti_name) testField(_.ti_category = "ti_category", _.ti_category) testField(_.ti_price = "ti_price", _.ti_price) - testField(_.ti_quantity = new JInteger(0), _.ti_quantity) - testField(_.pp_xoffset_min = new JInteger(0), _.pp_xoffset_min) - testField(_.pp_xoffset_max = new JInteger(0), _.pp_xoffset_max) - testField(_.pp_yoffset_min = new JInteger(0), _.pp_yoffset_min) - testField(_.pp_yoffset_max = new JInteger(0), _.pp_yoffset_max) + testField(_.ti_quantity = JInteger.valueOf(0), _.ti_quantity) + testField(_.pp_xoffset_min = JInteger.valueOf(0), _.pp_xoffset_min) + testField(_.pp_xoffset_max = JInteger.valueOf(0), _.pp_xoffset_max) + testField(_.pp_yoffset_min = JInteger.valueOf(0), _.pp_yoffset_min) + testField(_.pp_yoffset_max = JInteger.valueOf(0), _.pp_yoffset_max) testField(_.useragent = "useragent", _.useragent) testField(_.br_name = "br_name", _.br_name) testField(_.br_family = "br_family", _.br_family) @@ -124,30 +124,30 @@ class EnrichedEventSpec extends Specification { testField(_.br_type = "br_type", _.br_type) testField(_.br_renderengine = "br_renderengine", _.br_renderengine) testField(_.br_lang = "br_lang", _.br_lang) - testField(_.br_features_pdf = new JByte(Byte.MinValue), _.br_features_pdf) - testField(_.br_features_flash = new JByte(Byte.MinValue), _.br_features_flash) - testField(_.br_features_java = new JByte(Byte.MinValue), _.br_features_java) - testField(_.br_features_director = new JByte(Byte.MinValue), _.br_features_director) - testField(_.br_features_quicktime = new JByte(Byte.MinValue), _.br_features_quicktime) - testField(_.br_features_realplayer = new JByte(Byte.MinValue), _.br_features_realplayer) - testField(_.br_features_windowsmedia = new JByte(Byte.MinValue), _.br_features_windowsmedia) - testField(_.br_features_gears = new JByte(Byte.MinValue), _.br_features_gears) - testField(_.br_features_silverlight = new JByte(Byte.MinValue), _.br_features_silverlight) - testField(_.br_cookies = new JByte(Byte.MinValue), _.br_cookies) + testField(_.br_features_pdf = JByte.valueOf(Byte.MinValue), _.br_features_pdf) + testField(_.br_features_flash = JByte.valueOf(Byte.MinValue), _.br_features_flash) + testField(_.br_features_java = JByte.valueOf(Byte.MinValue), _.br_features_java) + testField(_.br_features_director = JByte.valueOf(Byte.MinValue), _.br_features_director) + testField(_.br_features_quicktime = JByte.valueOf(Byte.MinValue), _.br_features_quicktime) + testField(_.br_features_realplayer = JByte.valueOf(Byte.MinValue), _.br_features_realplayer) + testField(_.br_features_windowsmedia = JByte.valueOf(Byte.MinValue), _.br_features_windowsmedia) + testField(_.br_features_gears = JByte.valueOf(Byte.MinValue), _.br_features_gears) + testField(_.br_features_silverlight = JByte.valueOf(Byte.MinValue), _.br_features_silverlight) + testField(_.br_cookies = JByte.valueOf(Byte.MinValue), _.br_cookies) testField(_.br_colordepth = "br_colordepth", _.br_colordepth) - testField(_.br_viewwidth = new JInteger(0), _.br_viewwidth) - testField(_.br_viewheight = new JInteger(0), _.br_viewheight) + testField(_.br_viewwidth = JInteger.valueOf(0), _.br_viewwidth) + testField(_.br_viewheight = JInteger.valueOf(0), _.br_viewheight) testField(_.os_name = "os_name", _.os_name) testField(_.os_family = "os_family", _.os_family) testField(_.os_manufacturer = "os_manufacturer", _.os_manufacturer) testField(_.os_timezone = "os_timezone", _.os_timezone) testField(_.dvce_type = "dvce_type", _.dvce_type) - testField(_.dvce_ismobile = new JByte(Byte.MinValue), _.dvce_ismobile) - testField(_.dvce_screenwidth = new JInteger(0), _.dvce_screenwidth) - testField(_.dvce_screenheight = new JInteger(0), _.dvce_screenheight) + testField(_.dvce_ismobile = JByte.valueOf(Byte.MinValue), _.dvce_ismobile) + testField(_.dvce_screenwidth = JInteger.valueOf(0), _.dvce_screenwidth) + testField(_.dvce_screenheight = JInteger.valueOf(0), _.dvce_screenheight) testField(_.doc_charset = "doc_charset", _.doc_charset) - testField(_.doc_width = new JInteger(0), _.doc_width) - testField(_.doc_height = new JInteger(0), _.doc_height) + testField(_.doc_width = JInteger.valueOf(0), _.doc_width) + testField(_.doc_height = JInteger.valueOf(0), _.doc_height) testField(_.tr_currency = "tr_currency", _.tr_currency) testField(_.tr_total_base = "tr_total_base", _.tr_total_base) testField(_.tr_tax_base = "tr_tax_base", _.tr_tax_base) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala index 5586b55b8..a3e020e18 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/JsonUtilsSpec.scala @@ -15,12 +15,19 @@ package utils import org.specs2.Specification +import org.joda.time.format.DateTimeFormat + import io.circe.Json +import cats.data.NonEmptyList + class JsonUtilsSpec extends Specification { def is = s2""" toJson can deal with non-null String $e1 toJson can deal with null String $e2 + toJson can deal with booleans $e3 + toJson can deal with integers $e4 + toJson can deal with dates $e5 """ def e1 = { @@ -36,4 +43,52 @@ class JsonUtilsSpec extends Specification { JsonUtils.toJson(key, value, Nil, Nil, None) must beEqualTo((key, Json.Null)) } + + def e3 = { + val key = "field" + + val truE = "true" + val exp1 = JsonUtils.toJson(key, truE, List(key), Nil, None) must + beEqualTo(key -> Json.True) + + val falsE = "false" + val exp2 = JsonUtils.toJson(key, falsE, List(key), Nil, None) must + beEqualTo(key -> Json.False) + + val foo = "foo" + val exp3 = JsonUtils.toJson(key, foo, List(key), Nil, None) must + beEqualTo(key -> Json.fromString(foo)) + + exp1 and exp2 and exp3 + } + + def e4 = { + val key = "field" + + val number = 123 + val exp1 = JsonUtils.toJson(key, number.toString(), Nil, List(key), None) must + beEqualTo(key -> Json.fromBigInt(number)) + + val notNumber = "abc" + val exp2 = JsonUtils.toJson(key, notNumber, Nil, List(key), None) must + beEqualTo(key -> Json.fromString(notNumber)) + + exp1 and exp2 + } + + def e5 = { + val key = "field" + + val formatter = DateTimeFormat.forPattern("yyyy-MM-dd") + val malformedDate = "2020-09-02" + val correctDate = "2020-09-02T22:00:00.000Z" + + val exp1 = JsonUtils.toJson(key, malformedDate, Nil, Nil, Some(NonEmptyList.one(key) -> formatter)) must + be !== (key -> Json.fromString(malformedDate)) + + val exp2 = JsonUtils.toJson(key, correctDate, Nil, Nil, Some(NonEmptyList.one(key) -> formatter)) must + beEqualTo(key -> Json.fromString(correctDate)) + + exp1 and exp2 + } } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala index be3ea91db..5cf98ed60 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala @@ -15,6 +15,7 @@ package utils import java.net.{Inet6Address, InetAddress, URI} import java.nio.ByteBuffer +import java.nio.charset.StandardCharsets import cats.syntax.either._ import cats.syntax.option._ @@ -275,6 +276,49 @@ class ValidateUuidSpec extends Specification with DataTables with ScalaCheck { } } +class ValidateIntegerSpec extends Specification { + def is = s2""" + validateInteger should return the original string if it contains an integer $e1 + validateInteger should return an enrichment failure for a string not containing a valid integer $e2 + """ + + val FieldName = "integer" + + def e1 = ConversionUtils.validateInteger(FieldName, "123") must beRight("123") + + def e2 = { + val str = "abc" + ConversionUtils.validateInteger(FieldName, str) must beLeft( + FailureDetails.EnrichmentFailure( + None, + FailureDetails.EnrichmentFailureMessage.InputData( + FieldName, + Some(str), + "not a valid integer" + ) + ) + ) + } +} + +class DecodeStringSpec extends Specification { + def is = s2""" + decodeString should decode a correctly URL-encoded string $e1 + decodeString should fail decoding a string not correctly URL-encoded $e2 + """ + + val utf8 = StandardCharsets.UTF_8 + + def e1 = { + val clear = "12 ++---=&&3abc%%%34%2234%$#@%^PLLPbgfxbf$#%$@#@^" + val encoded = ConversionUtils.encodeString(utf8.toString(), clear) + ConversionUtils.decodeString(utf8, encoded) must beRight(clear) + } + + def e2 = + ConversionUtils.decodeString(utf8, "%%23") must beLeft +} + class StringToDoubleLikeSpec extends Specification with DataTables { def is = s2""" stringToDoublelike should fail if the supplied String is not parseable as a number $e1