Skip to content

Commit

Permalink
Add Pii Enrichment tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dilyand committed Sep 16, 2020
1 parent efac498 commit 087c2c5
Show file tree
Hide file tree
Showing 5 changed files with 361 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
},
"emailAddress2": {
"type": "string"
},
"emailAddress3": {
"type": "string"
}
},
"required": ["emailAddress", "emailAddress2"],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#",
"description": "Schema for acme stuff",
"self": {
"vendor": "com.acme",
"name": "email_sent",
"format": "jsonschema",
"version": "1-1-0"
},
"type": "object",
"properties": {
"emailAddress": {
"type": "string"
},
"emailAddress2": {
"type": "string"
},
"emailAddress3": {
"type": ["string", "null"]
}
},
"required": ["emailAddress", "emailAddress2"],
"additionalProperties": false
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
"type": "string"
}
}
},
"field4": {
"type": "string",
"maxLength": 64
}
},
"required": ["field"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,23 @@ package enrichments
import cats.Id
import cats.implicits._
import cats.data.NonEmptyList

import io.circe.literal._

import org.joda.time.DateTime

import com.snowplowanalytics.snowplow.badrows._
import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer}

import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer}
import loaders._
import adapters.RawEvent
import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.pii.{
JsonMutators,
PiiJson,
PiiPseudonymizerEnrichment,
PiiStrategyPseudonymize
}
import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent
import utils.Clock._
import utils.ConversionUtils
import enrichments.registry.{IabEnrichment, JavascriptScriptEnrichment, YauaaEnrichment}

import org.apache.commons.codec.digest.DigestUtils
import org.specs2.mutable.Specification
import org.specs2.matcher.EitherMatchers

Expand Down Expand Up @@ -87,7 +89,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers {
"data": {
"emailAddress": "hello@world.com",
"emailAddress2": "foo@bar.org",
"emailAddress3": "foo@bar.org"
"unallowedAdditionalField": "foo@bar.org"
}
}
}"""
Expand Down Expand Up @@ -267,6 +269,314 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers {
enriched.value must beRight
}

"emit an EnrichedEvent if a PII value that needs to be hashed is an empty string" >> {
val parameters = Map(
"e" -> "ue",
"tv" -> "js-0.13.1",
"p" -> "web",
"co" -> """
{
"schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0",
"data": [
{
"schema":"iglu:com.acme/email_sent/jsonschema/1-0-0",
"data": {
"emailAddress": "hello@world.com",
"emailAddress2": "foo@bar.org"
}
}
]
}
""",
"ue_pr" -> """
{
"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",
"data":{
"schema":"iglu:com.acme/email_sent/jsonschema/1-0-0",
"data": {
"emailAddress": "hello@world.com",
"emailAddress2": "foo@bar.org",
"emailAddress3": ""
}
}
}"""
)
val rawEvent = RawEvent(api, parameters, None, source, context)
val enrichmentReg = EnrichmentRegistry[Id](
piiPseudonymizer = PiiPseudonymizerEnrichment(
List(
PiiJson(
fieldMutator = JsonMutators("unstruct_event"),
schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0),
jsonPath = "$.emailAddress3"
)
),
false,
PiiStrategyPseudonymize(
"MD5",
hashFunction = DigestUtils.sha256Hex(_: Array[Byte]),
"pepper123"
)
).some
)
val enriched = EnrichmentManager.enrichEvent(
enrichmentReg,
client,
processor,
timestamp,
rawEvent
)
enriched.value must beRight
}

"emit an EnrichedEvent if a PII value that needs to be hashed is null" >> {
val parameters = Map(
"e" -> "ue",
"tv" -> "js-0.13.1",
"p" -> "web",
"co" -> """
{
"schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0",
"data": [
{
"schema":"iglu:com.acme/email_sent/jsonschema/1-0-0",
"data": {
"emailAddress": "hello@world.com",
"emailAddress2": "foo@bar.org"
}
}
]
}
""",
"ue_pr" -> """
{
"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",
"data":{
"schema":"iglu:com.acme/email_sent/jsonschema/2-0-0",
"data": {
"emailAddress": "hello@world.com",
"emailAddress2": "foo@bar.org",
"emailAddress3": null
}
}
}"""
)
val rawEvent = RawEvent(api, parameters, None, source, context)
val enrichmentReg = EnrichmentRegistry[Id](
piiPseudonymizer = PiiPseudonymizerEnrichment(
List(
PiiJson(
fieldMutator = JsonMutators("unstruct_event"),
schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0),
jsonPath = "$.emailAddress3"
)
),
false,
PiiStrategyPseudonymize(
"MD5",
hashFunction = DigestUtils.sha256Hex(_: Array[Byte]),
"pepper123"
)
).some
)
val enriched = EnrichmentManager.enrichEvent(
enrichmentReg,
client,
processor,
timestamp,
rawEvent
)
enriched.value must beRight
}

"fail to emit an EnrichedEvent if a PII value that needs to be hashed is an empty object" >> {
val parameters = Map(
"e" -> "ue",
"tv" -> "js-0.13.1",
"p" -> "web",
"co" -> """
{
"schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0",
"data": [
{
"schema":"iglu:com.acme/email_sent/jsonschema/1-0-0",
"data": {
"emailAddress": "hello@world.com",
"emailAddress2": "foo@bar.org"
}
}
]
}
""",
"ue_pr" -> """
{
"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",
"data":{
"schema":"iglu:com.acme/email_sent/jsonschema/1-0-0",
"data": {
"emailAddress": "hello@world.com",
"emailAddress2": "foo@bar.org",
"emailAddress3": {}
}
}
}"""
)
val rawEvent = RawEvent(api, parameters, None, source, context)
val enrichmentReg = EnrichmentRegistry[Id](
piiPseudonymizer = PiiPseudonymizerEnrichment(
List(
PiiJson(
fieldMutator = JsonMutators("unstruct_event"),
schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0),
jsonPath = "$.emailAddress3"
)
),
false,
PiiStrategyPseudonymize(
"MD5",
hashFunction = DigestUtils.sha256Hex(_: Array[Byte]),
"pepper123"
)
).some
)
val enriched = EnrichmentManager.enrichEvent(
enrichmentReg,
client,
processor,
timestamp,
rawEvent
)
enriched.value must beLeft
}

"fail to emit an EnrichedEvent if a context PII value that needs to be hashed is an empty object" >> {
val parameters = Map(
"e" -> "ue",
"tv" -> "js-0.13.1",
"p" -> "web",
"co" -> """
{
"schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0",
"data": [
{
"schema":"iglu:com.acme/email_sent/jsonschema/1-0-0",
"data": {
"emailAddress": "hello@world.com",
"emailAddress2": "foo@bar.org",
"emailAddress3": {}
}
}
]
}
""",
"ue_pr" -> """
{
"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",
"data":{
"schema":"iglu:com.acme/email_sent/jsonschema/1-0-0",
"data": {
"emailAddress": "hello@world.com",
"emailAddress2": "foo@bar.org"
}
}
}"""
)
val rawEvent = RawEvent(api, parameters, None, source, context)
val enrichmentReg = EnrichmentRegistry[Id](
piiPseudonymizer = PiiPseudonymizerEnrichment(
List(
PiiJson(
fieldMutator = JsonMutators("contexts"),
schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0),
jsonPath = "$.emailAddress3"
)
),
false,
PiiStrategyPseudonymize(
"MD5",
hashFunction = DigestUtils.sha256Hex(_: Array[Byte]),
"pepper123"
)
).some
)
def enriched =
EnrichmentManager.enrichEvent(
enrichmentReg,
client,
processor,
timestamp,
rawEvent
)
enriched.value must beLeft
}

"fail to emit an EnrichedEvent if a PII value needs to be hashed in both co and ue and is invalid in one of them" >> {
val parameters = Map(
"e" -> "ue",
"tv" -> "js-0.13.1",
"p" -> "web",
"co" -> """
{
"schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0",
"data": [
{
"schema":"iglu:com.acme/email_sent/jsonschema/1-0-0",
"data": {
"emailAddress": "hello@world.com",
"emailAddress2": "foo@bar.org",
"emailAddress3": {}
}
}
]
}
""",
"ue_pr" -> """
{
"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",
"data":{
"schema":"iglu:com.acme/email_sent/jsonschema/1-0-0",
"data": {
"emailAddress": "hello@world.com",
"emailAddress2": "foo@bar.org",
"emailAddress3": ""
}
}
}"""
)
val rawEvent = RawEvent(api, parameters, None, source, context)
val enrichmentReg = EnrichmentRegistry[Id](
piiPseudonymizer = PiiPseudonymizerEnrichment(
List(
PiiJson(
fieldMutator = JsonMutators("contexts"),
schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0),
jsonPath = "$.emailAddress3"
),
PiiJson(
fieldMutator = JsonMutators("unstruct_event"),
schemaCriterion = SchemaCriterion("com.acme", "email_sent", "jsonschema", 1, 0, 0),
jsonPath = "$.emailAddress3"
)
),
false,
PiiStrategyPseudonymize(
"MD5",
hashFunction = DigestUtils.sha256Hex(_: Array[Byte]),
"pepper123"
)
).some
)
def enriched =
EnrichmentManager.enrichEvent(
enrichmentReg,
client,
processor,
timestamp,
rawEvent
)
enriched.value must beLeft
}

"have a preference of 'ua' query string parameter over user agent of HTTP header" >> {
val qs_ua = "Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0"
val parameters = Map(
Expand Down
Loading

0 comments on commit 087c2c5

Please sign in to comment.