Skip to content

Commit

Permalink
Common: fix PII enrichment adding empty objects instead of missing pr…
Browse files Browse the repository at this point in the history
…operties (close #351)
  • Loading branch information
dilyand authored and benjben committed Sep 21, 2020
1 parent 58f5a35 commit cb3b570
Show file tree
Hide file tree
Showing 6 changed files with 452 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,37 @@ object PiiPseudonymizerEnrichment extends ParseableEnrichment {
.get(fieldName)
.map(_.asRight)
.getOrElse(s"The specified json field $fieldName is not supported".asLeft)

/** Helper to remove fields that were wrongly added and are not in the original JSON. See #351. */
private[pii] def removeAddedFields(hashed: Json, original: Json): Json =
hashed.asObject.map(_.toMap) match {
case Some(hashed_fields) => // hashed is JSON object
original.asObject.map(_.toMap) match {
case Some(orig_fields) =>
val newMap =
hashed_fields
.collect {
case (k, v) if orig_fields.isDefinedAt(k) =>
(k, removeAddedFields(v, orig_fields.get(k).get))
}
Json.fromFields(newMap)
case None =>
hashed // should never happen. Would mean change of type of one field
}
case None =>
hashed.asArray match {
case Some(hashed_arr) => // hashed is array (can contain JSON objects)
original.asArray match {
case Some(orig_arr) =>
// we can use zip because there should never be new fields in an array, only in an object
val values = hashed_arr.zip(orig_arr).map { case (hashed, orig) => removeAddedFields(hashed, orig) }
Json.fromValues(values)
case None =>
Json.fromValues(hashed_arr) // should never happen. Would mean change of type of one field
}
case None => hashed // hashed is neither JSON object nor array
}
}
}

/**
Expand Down Expand Up @@ -204,7 +235,8 @@ final case class PiiJson(
)
}
.getOrElse((parsed, List.empty[JsonModifiedField]))
} yield (substituted.noSpaces, modifiedFields.toList)).getOrElse((null, List.empty))
} yield (PiiPseudonymizerEnrichment.removeAddedFields(substituted, parsed).noSpaces, modifiedFields.toList))
.getOrElse((null, List.empty))

/** Map context top fields with strategy if they match. */
private def mapContextTopFields(tuple: (String, Json), strategy: PiiStrategy): (String, (Json, List[JsonModifiedField])) =
Expand Down Expand Up @@ -272,10 +304,6 @@ final case class PiiJson(
jsonPath,
new ScrambleMapFunction(strategy, modifiedFields, fieldMutator.fieldName, jsonPath, schema)
)
// make sure it is a structure preserving method, see #3636
//val transformedJValue = JsonMethods.fromJsonNode(documentContext.json[JsonNode]())
//val Diff(_, erroneouslyAdded, _) = jValue diff transformedJValue
//val Diff(_, withoutCruft, _) = erroneouslyAdded diff transformedJValue
(jacksonToCirce(documentContext2.json[JsonNode]()), modifiedFields.toList)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
},
"emailAddress2": {
"type": "string"
},
"emailAddress3": {
"type": "string"
}
},
"required": ["emailAddress", "emailAddress2"],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#",
"description": "Schema for acme stuff",
"self": {
"vendor": "com.acme",
"name": "email_sent",
"format": "jsonschema",
"version": "1-1-0"
},
"type": "object",
"properties": {
"emailAddress": {
"type": "string"
},
"emailAddress2": {
"type": "string"
},
"emailAddress3": {
"type": ["string", "null"]
}
},
"required": ["emailAddress", "emailAddress2"],
"additionalProperties": false
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
"type": "string"
}
}
},
"field4": {
"type": "string",
"maxLength": 64
}
},
"required": ["field"],
Expand Down
Loading

0 comments on commit cb3b570

Please sign in to comment.