Skip to content
This repository has been archived by the owner on Jun 4, 2021. It is now read-only.

Commit

Permalink
Fix unnecessarily-created JSON object as a result of the PII Enrichme…
Browse files Browse the repository at this point in the history
…nt (closes snowplow/snowplow#3636)
  • Loading branch information
knservis authored and oguzhanunlu committed May 29, 2020
1 parent 5669f67 commit 8169d82
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import scala.collection.mutable.MutableList

// Scala libraries
import org.json4s
import org.json4s.{DefaultFormats, JValue}
import org.json4s.{DefaultFormats, Diff, JValue}
import org.json4s.JsonAST._
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods
Expand Down Expand Up @@ -275,7 +275,11 @@ final case class PiiJson(fieldMutator: Mutator, schemaCriterion: SchemaCriterion
val documentContext2 = documentContext.map(
jsonPath,
new ScrambleMapFunction(strategy, modifiedFields, fieldMutator.fieldName, jsonPath, schema))
(JsonMethods.fromJsonNode(documentContext2.json[JsonNode]), modifiedFields.toList)
// make sure it is a structure preserving method, see #3636
val transformedJValue = JsonMethods.fromJsonNode(documentContext.json[JsonNode]())
val Diff(_, erroneouslyAdded, _) = jValue diff transformedJValue
val Diff(_, withoutCruft, _) = erroneouslyAdded diff transformedJValue
(withoutCruft, modifiedFields.toList)
}
}

Expand All @@ -295,7 +299,7 @@ private final class ScrambleMapFunction(strategy: PiiStrategy,
case t: TextNode =>
val originalValue = t.asText()
val newValue = strategy.scramble(originalValue)
val _ = modifiedFields += JsonModifiedField(fieldName, originalValue, newValue, jsonPath, schema)
modifiedFields += JsonModifiedField(fieldName, originalValue, newValue, jsonPath, schema)
newValue
case default: AnyRef => default
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ import common.loaders.{CollectorApi, CollectorContext, CollectorPayload, Collect
import common.outputs.EnrichedEvent
import utils.TestResourcesRepositoryRef
import common.SpecHelpers.toNameValuePairs
import common.utils.TestResourcesRepositoryRef
import utils.ScalazJson4sUtils

// Iglu
import iglu.client.SchemaCriterion
Expand All @@ -54,6 +56,7 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidationMatche
Hashing configured JSON fields in POJO should work when multiple fields are matched through schemacriterion $e5
Hashing configured JSON fields in POJO should silently ignore unsupported types $e6
Hashing configured JSON and scalar fields in POJO emits a correct pii_transformation event $e7
Hashing configured JSON fields in POJO should not create new fields $e8
"""

def commonSetup(enrichmentMap: EnrichmentMap): List[ValidatedEnrichedEvent] = {
Expand Down Expand Up @@ -471,4 +474,47 @@ class PiiPseudonymizerEnrichmentSpec extends Specification with ValidationMatche
(((unstructEventJ \ "data") \ "data" \ "myVar2").extract[String] must_== "awesome")
}
}

def e8 = {
val enrichmentMap = Map(
("ip_lookups" -> ipEnrichment),
("pii_enrichment_config" -> PiiPseudonymizerEnrichment(
List(
PiiJson(
fieldMutator = JsonMutators.get("contexts").get,
schemaCriterion = SchemaCriterion.parse("iglu:com.acme/email_sent/jsonschema/1-0-0").toOption.get,
jsonPath = "$.['emailAddress', 'nonExistentEmailAddress']"
)
),
true,
PiiStrategyPseudonymize("SHA-256", hashFunction = DigestUtils.sha256Hex(_: Array[Byte]), "pepper123")
))
)
val output = commonSetup(enrichmentMap = enrichmentMap)
val expected = new EnrichedEvent()
expected.app_id = "ads"
expected.user_id = "john@acme.com"
expected.user_ipaddress = "70.46.123.145"
expected.ip_domain = null
expected.user_fingerprint = "its_you_again!"
expected.geo_city = "Delray Beach"
expected.etl_tstamp = "1970-01-18 08:40:00.000"
expected.collector_tstamp = "2017-07-14 03:39:39.000"
output.size must_== 1
val out = output(0)
out must beSuccessful.like {
case enrichedEvent => {
implicit val formats = org.json4s.DefaultFormats
val contextJ = parse(enrichedEvent.contexts)
(((contextJ \ "data")(0) \ "data" \ "emailAddress")
.extract[String] must_== "72f323d5359eabefc69836369e4cabc6257c43ab6419b05dfb2211d0e44284c6") and
(ScalazJson4sUtils.fieldExists(((contextJ \ "data")(0) \ "data"), "nonExistentEmailAddress") must_== false) and
(((contextJ \ "data")(0) \ "data" \ "emailAddress2")
.extract[String] must_== "bob@acme.com") and
(((contextJ \ "data")(1) \ "data" \ "emailAddress")
.extract[String] must_== "tim@acme.com") and
(((contextJ \ "data")(1) \ "data" \ "emailAddress2").extract[String] must_== "tom@acme.com")
}
}
}
}

0 comments on commit 8169d82

Please sign in to comment.