From 135742dd3c588fefbeb7496b8480554504280b06 Mon Sep 17 00:00:00 2001 From: Murray Steele Date: Tue, 13 Feb 2018 09:59:10 +0000 Subject: [PATCH 1/2] Provide `PIISafe` wrapper object This wrapper object can be used to wrap arguments we want to send to analytics that we know are not PII because we generated them, but we also know may look like PII. Wrapping this kind of value in a `PIISafe` object tells the analytics PII stripping code that this value is already safe and instead of attempting to detect and remove PII from it, instead we should extract the raw value and pass that on. We realised we needed this because when testing smart-answers on GOV.UK we noticed that the value of a custom dimension containing the content_ids of the taxons for the smart-answer had been stripped. The value of the custom dimension was `eed5b92e-8279-4ca9-a141-5c35ed22fcf1`, but it was transformed into `eed5b92e-8279-4ca9-a141-5[postcode]22fcf1` because the substring `c35ed` in the final portion looks like a postcode, `C3 5ED`. To avoid this we'd like to be able to tell analytics that some values are definitely safe to send as they don't contain PII. My first thought was to provide a list of tracker methods and values that should be ignored. Something like: GOVUK.Analytics.safe_for_PII = [['setdimension', 1]] This would tell GOVUK.Analytics that for any arguments that start with 'setdimension' and 1 we should assume the other values are safe for PII. However, it felt like the PII code would need to know too much about the values it is given in order to work on this. We'd also need to consider how to say that the first value argument for a function was safe, but we should still detect PII in any of the other arguments. Our second thought, and the version in this commit, was to allow individual values in the arguments to be wrapped in a "safe" object that the analytics code knows to ignore. This is inspired by the rails `html_safe` functionality used in ERB views. Rails automatically pushes all output in an ERB template through an html escaping routine. If you know that the output is already safe, because it's a string of html you've generated in a helper for example, then you can flag it as html_safe to tell rails not to escape it. --- CHANGELOG.md | 3 ++ docs/analytics.md | 19 +++++++++++ javascripts/govuk/analytics/analytics.js | 17 +++++++--- spec/unit/analytics/analytics.spec.js | 43 ++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed408fef..d710d582 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# Unreleased + +- Allow wrapping arguments to analytics as PII safe to tell the analytics code not to attempt to strip PII from the values: ([PR #448](https://github.com/alphagov/govuk_frontend_toolkit/pull/448)) # 7.3.0 - Strip PII from all arguments passed to GA. Emails are stripped by default, postcodes can also be stripped if configured to do so: ([PR #435](https://github.com/alphagov/govuk_frontend_toolkit/pull/435)). diff --git a/docs/analytics.md b/docs/analytics.md index eda74c78..e218a6d5 100644 --- a/docs/analytics.md +++ b/docs/analytics.md @@ -249,3 +249,22 @@ initialize time as follows: Any value other than the JS literal `true` for `stripPostcodePII` will leave the analytics module configured not to strip postcodes. + +#### Avoding false positives + +Sometimes you will have data you want to send to analytics that looks like PII +and would be stripped out. For example on GOV.UK the content_ids that belong +to every document can sometimes contain a string of characters that look like a +UK postcode: in `eed5b92e-8279-4ca9-a141-5c35ed22fcf1` the substring `c35ed` in +the final portion looks like a postcode, `C3 5ED`, and will be transformed into +`eed5b92e-8279-4ca9-a141-5[postcode]22fcf1` which breaks the `content_id`. To +send data that you know is not PII, but it looks like an email address or a UK +postcode you can provide your arguments wrapped in a `GOVUK.Analytics.PIISafe` +object. If any argument to an analytics function is an instance of one of these +objects the value contained within will be extracted and sent directly to the +analytics tracker without attempting to strip PII from it. For example: + +```js + GOVUK.analytics.setDimension(1, new GOVUK.Analytics.PIISafe('this-is-not-an@email-address-but-it-looks-like-one')); + GOVUK.analytics.trackEvent('report title clicked', new GOVUK.Analytics.PIISafe('this report title looks like it contains a P0 5TC ode but it does not really')); +```` diff --git a/javascripts/govuk/analytics/analytics.js b/javascripts/govuk/analytics/analytics.js index 1daca5cc..93abe9fb 100644 --- a/javascripts/govuk/analytics/analytics.js +++ b/javascripts/govuk/analytics/analytics.js @@ -29,6 +29,11 @@ } } + var PIISafe = function (value) { + this.value = value + } + Analytics.PIISafe = PIISafe + Analytics.prototype.stripPII = function (value) { if (typeof value === 'string') { return this.stripPIIFromString(value) @@ -51,12 +56,16 @@ } Analytics.prototype.stripPIIFromObject = function (object) { - for (var property in object) { - var value = object[property] + if (object instanceof Analytics.PIISafe) { + return object.value + } else { + for (var property in object) { + var value = object[property] - object[property] = this.stripPII(value) + object[property] = this.stripPII(value) + } + return object } - return object } Analytics.prototype.stripPIIFromArray = function (array) { diff --git a/spec/unit/analytics/analytics.spec.js b/spec/unit/analytics/analytics.spec.js index ecf63782..efdc1f4f 100644 --- a/spec/unit/analytics/analytics.spec.js +++ b/spec/unit/analytics/analytics.spec.js @@ -150,6 +150,24 @@ describe('GOVUK.Analytics', function () { analytics.setDimension(1, 'SW1+1AA-value', { label: 'RG209NJ', value: ['data', 'data', 'someone has added their personalIV63 6TU postcode'] }) expect(window.ga.calls.mostRecent().args).toEqual(['set', 'dimension1', '[postcode]-value']) // set dimension ignores extra options }) + + it('ignores any PIISafe arguments even if they look like emails or postcodes', function () { + analytics = new GOVUK.Analytics({ + universalId: 'universal-id', + cookieDomain: '.www.gov.uk', + siteSpeedSampleRate: 100, + stripPostcodePII: true + }) + + analytics.trackPageview(new GOVUK.Analytics.PIISafe('/path/to/an/embedded/SW1+1AA/postcode/?with=an&postcode=SP4%207DE'), new GOVUK.Analytics.PIISafe('an.email@example.com'), { label: new GOVUK.Analytics.PIISafe('another.email@example.com'), value: ['data', 'data', new GOVUK.Analytics.PIISafe('someone has added their personalIV63 6TU postcode')] }) + expect(window.ga.calls.mostRecent().args).toEqual(['send', 'pageview', { page: '/path/to/an/embedded/SW1+1AA/postcode/?with=an&postcode=SP4%207DE', title: 'an.email@example.com', label: 'another.email@example.com', value: ['data', 'data', 'someone has added their personalIV63 6TU postcode'] }]) + + analytics.trackEvent(new GOVUK.Analytics.PIISafe('SW1+1AA-category'), new GOVUK.Analytics.PIISafe('an.email@example.com-action'), { label: new GOVUK.Analytics.PIISafe('RG209NJ'), value: ['data', 'data', 'someone has added their personalIV63 6TU postcode'] }) + expect(window.ga.calls.mostRecent().args).toEqual(['send', { hitType: 'event', eventCategory: 'SW1+1AA-category', eventAction: 'an.email@example.com-action', eventLabel: 'RG209NJ' }]) // trackEvent ignores options other than label or integer values for value + + analytics.setDimension(1, new GOVUK.Analytics.PIISafe('an.email@SW1+1AA-value.com'), { label: new GOVUK.Analytics.PIISafe('RG209NJ'), value: ['data', 'data', new GOVUK.Analytics.PIISafe('someone has added their personalIV63 6TU postcode')] }) + expect(window.ga.calls.mostRecent().args).toEqual(['set', 'dimension1', 'an.email@SW1+1AA-value.com']) // set dimension ignores extra options + }) }) describe('when tracking social media shares', function () { @@ -224,6 +242,31 @@ describe('GOVUK.Analytics', function () { value: ['data', 'data', 'someone has added their personal[postcode] postcode'] }]) }) + + it('ignores any PIISafe arguments even if they look like emails or postcodes', function () { + analytics = new GOVUK.Analytics({ + universalId: 'universal-id', + cookieDomain: '.www.gov.uk', + siteSpeedSampleRate: 100, + stripPostcodePII: true + }) + + analytics.trackShare('email', { + to: new GOVUK.Analytics.PIISafe('IV63 6TU'), + label: new GOVUK.Analytics.PIISafe('an.email@example.com'), + value: new GOVUK.Analytics.PIISafe(['data', 'another.email@example.com', 'someone has added their personalTD15 2SE postcode']) + }) + + expect(window.ga.calls.mostRecent().args).toEqual(['send', { + hitType: 'social', + socialNetwork: 'email', + socialAction: 'share', + socialTarget: jasmine.any(String), + to: 'IV63 6TU', + label: 'an.email@example.com', + value: ['data', 'another.email@example.com', 'someone has added their personalTD15 2SE postcode'] + }]) + }) }) describe('when adding a linked domain', function () { From b1ce5b45ed694b19b11d5bea4a8a1dfaf7bd5856 Mon Sep 17 00:00:00 2001 From: Murray Steele Date: Wed, 14 Feb 2018 12:13:16 +0000 Subject: [PATCH 2/2] Treat `Arguments` like `Array` instances when stripping PII Differences in JS runtimes means that we can't always iterate over an `Arguments` instance's properties with a `for (var prop in object) { ... }` loop like we can for other object types and we have to iterate over the contents like we do with an array with a `for (var i = 0, l = array.length; i < l; i++) { ... }` loop. If we don't iterate this way then for those runtimes that don't allow it we don't run the `stripPII` function on the contents of the `Arguments` instance and all is for naught! It seems that most current browsers do let you iterate over an `Argument` object using properties, but the phantomjs runtime that GOV.UK uses in the static application to run tests doesn't let you do this. It's likely this is an old implementation, but it's probably based on some browser behaviour, so it means that some older browsers may also not allow this. Iterating by numerical index works in all browsers and runtimes so it's safer to do this. --- javascripts/govuk/analytics/analytics.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascripts/govuk/analytics/analytics.js b/javascripts/govuk/analytics/analytics.js index 93abe9fb..c405abe2 100644 --- a/javascripts/govuk/analytics/analytics.js +++ b/javascripts/govuk/analytics/analytics.js @@ -37,7 +37,7 @@ Analytics.prototype.stripPII = function (value) { if (typeof value === 'string') { return this.stripPIIFromString(value) - } else if (Object.prototype.toString.call(value) === '[object Array]') { + } else if (Object.prototype.toString.call(value) === '[object Array]' || Object.prototype.toString.call(value) === '[object Arguments]') { return this.stripPIIFromArray(value) } else if (typeof value === 'object') { return this.stripPIIFromObject(value)