From 6d5e838afa06ceba86db50ee7ad909c4a2ed0f6c Mon Sep 17 00:00:00 2001 From: Dilyan Damyanov Date: Tue, 15 Sep 2020 20:09:57 +0100 Subject: [PATCH] Common: fix PiiPseudonymizerEnrichment for arrays and improve unit tests coverage (close #334) --- .../com.acme/email_sent/jsonschema/1-0-0 | 2 +- .../com.acme/email_sent/jsonschema/2-0-0 | 24 +++++++ .../enrichments/EnrichmentManagerSpec.scala | 62 ++++++++++++++++++- 3 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 index eca4ca19d..18dd216f5 100644 --- a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/1-0-0 @@ -16,7 +16,7 @@ "type": "string" }, "emailAddress3": { - "type": ["string", "null"] + "type": "string" } }, "required": ["emailAddress", "emailAddress2"], diff --git a/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 new file mode 100644 index 000000000..eca4ca19d --- /dev/null +++ b/modules/common/src/test/resources/iglu-schemas/schemas/com.acme/email_sent/jsonschema/2-0-0 @@ -0,0 +1,24 @@ +{ + "$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#", + "description": "Schema for acme stuff", + "self": { + "vendor": "com.acme", + "name": "email_sent", + "format": "jsonschema", + "version": "1-1-0" + }, + "type": "object", + "properties": { + "emailAddress": { + "type": "string" + }, + "emailAddress2": { + "type": "string" + }, + "emailAddress3": { + "type": ["string", "null"] + } + }, + "required": ["emailAddress", "emailAddress2"], + "additionalProperties": false +} diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index e483d0fa6..6a819ebde 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -352,7 +352,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { { "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", "data":{ - "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "schema":"iglu:com.acme/email_sent/jsonschema/2-0-0", "data": { "emailAddress": "hello@world.com", "emailAddress2": "foo@bar.org", @@ -389,6 +389,66 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers { enriched.value must beRight } + "fail to emit an EnrichedEvent if a PII value that needs to be hashed is an empty object" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "co" -> """ + { + "schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0", + "data": [ + { + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org" + } + } + ] + } + """, + "ue_pr" -> """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "emailAddress3": {} + } + } + }""" + ) + val rawEvent = RawEvent(api, parameters, None, source, context) + val enrichmentReg = EnrichmentRegistry[Id]( + piiPseudonymizer = PiiPseudonymizerEnrichment( + List( + PiiJson( + fieldMutator = JsonMutators("unstruct_event"), + schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0), + jsonPath = "$.emailAddress3" + ) + ), + false, + PiiStrategyPseudonymize( + "MD5", + hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), + "pepper123" + ) + ).some + ) + val enriched = EnrichmentManager.enrichEvent( + enrichmentReg, + client, + processor, + timestamp, + rawEvent + ) + enriched.value must beLeft + } + "have a preference of 'ua' query string parameter over user agent of HTTP header" >> { val qs_ua = "Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0" val parameters = Map(