From 7cd629ff8f96d7bdbd79f6f8108866f2ac24ff08 Mon Sep 17 00:00:00 2001 From: bren Date: Thu, 14 Feb 2019 12:32:33 +0000 Subject: [PATCH 1/8] =?UTF-8?q?remove=20check=20for=20"next."=20in=20metri?= =?UTF-8?q?c=20name=20=20=F0=9F=90=BF=20v2.12.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/checks/graphiteSpike.check.js | 4 ---- src/checks/graphiteThreshold.check.js | 6 ------ 2 files changed, 10 deletions(-) diff --git a/src/checks/graphiteSpike.check.js b/src/checks/graphiteSpike.check.js index 5bfd24a..2006759 100644 --- a/src/checks/graphiteSpike.check.js +++ b/src/checks/graphiteSpike.check.js @@ -31,10 +31,6 @@ class GraphiteSpikeCheck extends Check { if (!options.numerator) { throw new Error(`You must pass in a numerator for the "${options.name}" check - e.g., "next.heroku.article.*.express.start"`); } - - if (!/next\./.test(options.numerator)) { - throw new Error(`You must prepend the numerator (${options.numerator}) with "next." for the "${options.name}" check - e.g., "heroku.article.*.express.start" needs to be "next.heroku.article.*.express.start"`); - } this.sampleUrl = this.generateUrl(options.numerator, options.divisor, this.samplePeriod); this.baselineUrl = this.generateUrl(options.numerator, options.divisor, this.baselinePeriod); diff --git a/src/checks/graphiteThreshold.check.js b/src/checks/graphiteThreshold.check.js index 893618b..e5d6fc7 100644 --- a/src/checks/graphiteThreshold.check.js +++ b/src/checks/graphiteThreshold.check.js @@ -30,13 +30,8 @@ class GraphiteThresholdCheck extends Check { throw new Error(`You must pass in a metric for the "${options.name}" check - e.g., "next.heroku.article.*.express.start"`); } - if (!/next\./.test(options.metric)) { - throw new Error(`You must prepend the metric (${options.metric}) with "next." for the "${options.name}" check - e.g., "heroku.article.*.express.start" needs to be "next.heroku.article.*.express.start"`); - } this.metric = options.metric; - this.sampleUrl = this.generateUrl(options.metric, this.samplePeriod); - this.checkOutput = 'Graphite threshold check has not yet run'; } @@ -45,7 +40,6 @@ class GraphiteThresholdCheck extends Check { } tick(){ - return fetch(this.sampleUrl, { headers: { key: this.ftGraphiteKey } }) .then(fetchres.json) .then(results => { From 9bbe60ec008c2a980b0ebb23eeaa66dd077f1574 Mon Sep 17 00:00:00 2001 From: bren Date: Thu, 14 Feb 2019 12:35:01 +0000 Subject: [PATCH 2/8] =?UTF-8?q?delete=20memory=20test=20that=20hasn't=20be?= =?UTF-8?q?en=20exported=20since=202016=20and=20code=20that=20only=20it=20?= =?UTF-8?q?depends=20on=20=20=F0=9F=90=BF=20v2.12.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 3 - int-tests/heroku.int.test.js | 25 -------- loadvars.js | 5 -- src/checks/memory.check.js | 70 -------------------- src/lib/herokuAdaptor.js | 52 --------------- src/lib/serviceRegistryAdaptor.js | 69 -------------------- test/herokuAdaptor.spec.js | 26 -------- test/memcheck.spec.js | 99 ----------------------------- test/serviceRegistryAdaptor.spec.js | 51 --------------- 9 files changed, 400 deletions(-) delete mode 100644 int-tests/heroku.int.test.js delete mode 100644 loadvars.js delete mode 100644 src/checks/memory.check.js delete mode 100644 src/lib/herokuAdaptor.js delete mode 100644 src/lib/serviceRegistryAdaptor.js delete mode 100644 test/herokuAdaptor.spec.js delete mode 100644 test/memcheck.spec.js delete mode 100644 test/serviceRegistryAdaptor.spec.js diff --git a/Makefile b/Makefile index 167d541..6528614 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,4 @@ IGNORE_A11Y = true test-unit: FT_GRAPHITE_KEY=123 HEROKU_AUTH_TOKEN=token mocha -test-int: - mocha int-tests/ -r loadvars.js - test: verify test-unit diff --git a/int-tests/heroku.int.test.js b/int-tests/heroku.int.test.js deleted file mode 100644 index d6f89d5..0000000 --- a/int-tests/heroku.int.test.js +++ /dev/null @@ -1,25 +0,0 @@ -'use strict'; -const expect = require('chai').expect; -const sinon = require('sinon'); -const proxyquire = require('proxyquire').noCallThru().noPreserveCache(); - -describe('Heroku Adaptor', function(){ - - const wait = ms => new Promise(r => setTimeout(r, ms)); - let adaptor; - - before(() => { - adaptor = require('../src/lib/herokuAdaptor'); - }); - - it('Should be able to get data from the metrics api', () => { - return adaptor.getErrorMetrics('ft-next-article-eu', '10m') - .then(metrics => { - expect(metrics).to.exist; - expect(metrics).to.have.property('start_time'); - expect(metrics).to.have.property('end_time'); - expect(metrics).to.have.property('step'); - expect(metrics).to.have.property('data'); - }) - }); -}); diff --git a/loadvars.js b/loadvars.js deleted file mode 100644 index 28a6879..0000000 --- a/loadvars.js +++ /dev/null @@ -1,5 +0,0 @@ -'use strict'; - -if(process.env.NODE_ENV !== 'production' && process.env.NODE_ENV !== 'branch' && !process.env.CI && !process.env.JENKINS_URL){ - require('dotenv').load(); -} diff --git a/src/checks/memory.check.js b/src/checks/memory.check.js deleted file mode 100644 index 4da3e74..0000000 --- a/src/checks/memory.check.js +++ /dev/null @@ -1,70 +0,0 @@ -'use strict'; -const Check = require('./check'); -const status = require('./status'); -const serviceRegistryAdaptor = require('../lib/serviceRegistryAdaptor'); -const herokuAdaptor = require('../lib/herokuAdaptor'); -const ms = require('ms'); - -class Memcheck extends Check { - - constructor(config){ - super(config); - this.appsToCheck = config.apps || 'all'; - this.window = config.window || '10m'; - this.threshold = config.threshold || 2; - } - - init () { - return serviceRegistryAdaptor.start() - .then(() => { - let apps = serviceRegistryAdaptor.getData(); - if (this.appsToCheck === 'all') { - this.apps = apps; - } else { - this.apps = new Map(); - for(let app of apps){ - if(this.appsToCheck.indexOf(app[0] > -1)){ - this.apps.set(app[0], app[1]); - } - } - } - }); - } - - tick(){ - let failures = new Map(); - let promises = Array.from(this.apps.keys()).map(app => { - return herokuAdaptor.getR14Count(app, this.window) - .then(count => { - if(count > this.threshold){ - failures.set(app, count); - } - - return count; - }); - }); - - Promise.all(promises).then(() => { - if(failures.size === 0){ - this.status = status.PASSED; - this.checkOutput = "All apps are ok"; - }else{ - let problemApps = Array.from(failures.keys()); - this.status = status.FAILED; - this.checkOutput = `The following ${failures.size > 1 ? 'apps are' : 'app is'} using too much memory: ${problemApps.join(', ')}` - let hasPlatinum = problemApps.some(a => { - return this.apps.get(a) === 'platinum'; - }); - this.severity = hasPlatinum ? 2 : 3; - } - }).catch(e => { - console.error(e); - }) - } - - -} - -module.exports = Memcheck; - - diff --git a/src/lib/herokuAdaptor.js b/src/lib/herokuAdaptor.js deleted file mode 100644 index 77f4c73..0000000 --- a/src/lib/herokuAdaptor.js +++ /dev/null @@ -1,52 +0,0 @@ -'use strict'; -const ms = require('ms'); -const fetch = require('node-fetch'); - -let url = '/dyno/errors?process_type=web&start_time=2016-04-18T09%3A30%3A00.000Z&end_time=2016-04-19T09%3A20%3A00.000Z&step=10m'; - -function getErrorMetrics(app, duration){ - if(!process.env.HEROKU_AUTH_TOKEN){ - let err = new Error('HEROKU_AUTH_TOKEN env var required'); - return Promise.reject(err); - } - - let durationMs = ms(duration); - let endTime = new Date(); - let startTime = new Date(endTime - durationMs); - let step = ms(durationMs / 10); - let url = `https://api.metrics.herokai.com/metrics/${app}/dyno/errors?process_type=web&start_time=${startTime.toISOString()}&end_time=${endTime.toISOString()}&step=${step}`; - return fetch(url, - { - headers:{ - 'Authorization': 'Bearer ' + process.env.HEROKU_AUTH_TOKEN - } - } - ) - .then(response => { - if(!response.ok){ - let err = new Error(`Failed to fetch ${url}. Response was ${response.status}`); - err.type = 'HEROKU_API_ERROR'; - } - - return response.json(); - }); -} - -function getTotalErrorCount(data, code){ - let errors = data.data[code]; - if(!errors || !errors.length || !errors.reduce){ - return 0; - } - - return errors.reduce((total, count) => { - return count ? total + count : total; - }, 0); -} - -module.exports = { - getErrorMetrics: getErrorMetrics, - getR14Count: (app, duration) => { - return getErrorMetrics(app, duration) - .then(data => getTotalErrorCount(data, 'R14')) - } -}; diff --git a/src/lib/serviceRegistryAdaptor.js b/src/lib/serviceRegistryAdaptor.js deleted file mode 100644 index 5210cda..0000000 --- a/src/lib/serviceRegistryAdaptor.js +++ /dev/null @@ -1,69 +0,0 @@ -'use strict'; -const fetch = require('node-fetch'); -const log = require('@financial-times/n-logger').default; - -function getServiceRegistryData(){ - return fetch('http://next-registry.ft.com/') - .then(response => { - if(!response.ok){ - let err = new Error('Failed to fetch service registry'); - err.type = 'SERVICE_REGISTRY_FETCH_FAILURE'; - err.data = {status:response.status, statusText:response.statusText}; - throw err; - } - - return response.json(); - }) -} - -function getHighestVersion(versions){ - let highestVersionNumber = Object.keys(versions).map(v => parseInt(v, 10)).sort().reverse()[0]; - return versions[highestVersionNumber.toString()]; -} - -function parseRegistryData(data){ - let map = new Map(); - let regex = /http:\/\/([a-z-]+)\.herokuapp.com/; - try{ - for(let item of data){ - let serviceTier = item.tier; - let activeVersion = getHighestVersion(item.versions); - let nodes = activeVersion.nodes.forEach(n => { - let nodeUrl = typeof n === 'string' ? n : n.url; - let matches = regex.exec(nodeUrl); - if(matches && matches.length){ - let appName = matches[1]; - map.set(appName, serviceTier); - } - - }); - } - }catch(e){ - e.type = 'REGISTY_DATA_PARSE_ERROR'; - throw e; - } - - return map; -} - -let data; - -function tick(){ - return getServiceRegistryData() - .then(registry => { - data = parseRegistryData(registry); - }); -} - -function start(interval){ - setInterval(tick, interval); - return tick(); -} - -module.exports = { - start: start, - getData: () => data -}; - - - diff --git a/test/herokuAdaptor.spec.js b/test/herokuAdaptor.spec.js deleted file mode 100644 index 95be9c0..0000000 --- a/test/herokuAdaptor.spec.js +++ /dev/null @@ -1,26 +0,0 @@ -'use strict'; -const expect = require('chai').expect; -const sinon = require('sinon'); -const proxyquire = require('proxyquire').noCallThru().noPreserveCache(); - -describe('Heroku Adaptor', function(){ - - let adaptor; - let fixture = require('./fixtures/herokuMetricsApiResponse.json'); - let mockResponse = {status:200, ok:true, json:() => fixture}; - let mockFetch = sinon.stub().returns(Promise.resolve(mockResponse)); - - before(() => { - adaptor = proxyquire('../src/lib/herokuAdaptor', {'node-fetch':mockFetch}); - }); - - - it('Should return the number of R14 errors within the given timeframe', () => { - return adaptor.getR14Count('ft-next-article-eu', '10m') - .then(count => { - expect(count).to.exist; - expect(count).to.equal(5953); - }) - }); - -}); diff --git a/test/memcheck.spec.js b/test/memcheck.spec.js deleted file mode 100644 index d1f4652..0000000 --- a/test/memcheck.spec.js +++ /dev/null @@ -1,99 +0,0 @@ -'use strict'; -const expect = require('chai').expect; -const sinon = require('sinon'); -const proxyquire = require('proxyquire').noCallThru().noPreserveCache(); - -describe('Memory Usage Check', function(){ - - let config = require('./fixtures/config/memcheckFixture').checks[0]; - - const wait = ms => new Promise(r => setTimeout(r, ms)); - - let bronzeAppsListFixture = new Map([ - ['ft-next-test-app', 'bronze'], - ['ft-next-platinum-app', 'bronze'] - ]); - - let platinumAppsListFixture = new Map([ - ['ft-next-test-app', 'bronze'], - ['ft-next-platinum-app', 'platinum'] - ]); - - - let mockServiceRegistryAdaptor; - - let mockHerokuAdaptor; - - function setup(appsListFixture, count){ - mockServiceRegistryAdaptor = { - start : sinon.stub().returns(Promise.resolve(null)), - getData : sinon.stub().returns(appsListFixture) - }; - mockHerokuAdaptor = { - getR14Count: sinon.stub().returns(Promise.resolve(count || 0)) - }; - - let MemCheck = proxyquire( - '../src/checks/memory.check', - { - '../lib/serviceRegistryAdaptor':mockServiceRegistryAdaptor, - '../lib/herokuAdaptor':mockHerokuAdaptor - } - ); - - return new MemCheck(config); - } - - it('Should get a list of apps from the service registry', () => { - let memcheck = setup(bronzeAppsListFixture, 0); - memcheck.start(); - return wait(500) - .then(() => { - sinon.assert.called(mockServiceRegistryAdaptor.start); - sinon.assert.called(mockServiceRegistryAdaptor.getData); - }); - }); - - it('Should make an api call for each app on the given interval', () => { - let memcheck = setup(bronzeAppsListFixture, 0); - memcheck.start(); - return wait(500) - .then(() => { - expect(mockHerokuAdaptor.getR14Count.callCount).to.equal(bronzeAppsListFixture.size); - expect(bronzeAppsListFixture.has(mockHerokuAdaptor.getR14Count.firstCall.args[0])).to.be.true; - }); - }); - - it('Should fail if an app had has R14 errors in the time period', () => { - let memcheck = setup(bronzeAppsListFixture, 5); - memcheck.start(); - return wait(500) - .then(() => { - let status = memcheck.getStatus(); - expect(status.ok).to.be.false; - for(let app of bronzeAppsListFixture.keys()){ - expect(status.checkOutput).to.contain(app); - } - }); - }); - - it('Should fail with a severity of 3 for bronze apps', () => { - let memcheck = setup(bronzeAppsListFixture, 5); - memcheck.start(); - return wait(500) - .then(() => { - let status = memcheck.getStatus(); - expect(status.severity).to.equal(3); - }); - }); - - it('Should fail with a severity of 2 for platinum apps', () => { - let memcheck = setup(platinumAppsListFixture, 5); - memcheck.start(); - return wait(500) - .then(() => { - let status = memcheck.getStatus(); - expect(status.severity).to.equal(2); - }); - }); -}); diff --git a/test/serviceRegistryAdaptor.spec.js b/test/serviceRegistryAdaptor.spec.js deleted file mode 100644 index 99e2020..0000000 --- a/test/serviceRegistryAdaptor.spec.js +++ /dev/null @@ -1,51 +0,0 @@ -'use strict'; -const expect = require('chai').expect; -const sinon = require('sinon'); -const proxyquire = require('proxyquire').noCallThru().noPreserveCache(); - - -describe('Service Registry Adaptor', function(){ - - function wait(ms){ - return new Promise(r => setInterval(r, ms)); - } - - let adaptor; - let serviceRegistryFixture = require('./fixtures/serviceRegistryFixture.json'); - let mockResponse = {status:200, ok:true, json:() => Promise.resolve(serviceRegistryFixture), text:() => Promise.resolve('')}; - let mockFetch; - - beforeEach(() => { - mockFetch = sinon.stub().returns(Promise.resolve(mockResponse)); - adaptor = proxyquire('../src/lib/serviceRegistryAdaptor', {'node-fetch':mockFetch}); - }); - - afterEach(() => { - //mockFetch.restore(); - }); - - it('Should call the service registry', () => { - return adaptor.start(1000) - .then(() => { - sinon.assert.called(mockFetch); - sinon.assert.calledWith(mockFetch, 'http://next-registry.ft.com/'); - }) - }); - - it('Should be able to parse out the nodes and service tiers', () => { - return adaptor.start(1000) - .then(() => { - let data = adaptor.getData(); - expect(data.has('ft-next-article-eu')).to.be.true; - }) - }); - - it('Should poll the registry using the given interval', () => { - return adaptor.start(500) - .then(() => wait(1000)) - .then(() => { - expect(mockFetch.callCount).to.be.greaterThan(1); - }) - }); -}); - From 73854532ff03680dac81756ac6bd9acdd88105dd Mon Sep 17 00:00:00 2001 From: bren Date: Thu, 14 Feb 2019 13:59:47 +0000 Subject: [PATCH 3/8] =?UTF-8?q?use=20the=20newfangled=20language=20feature?= =?UTF-8?q?=20"arrays"=20for=20that=20check=20=20=F0=9F=90=BF=20v2.12.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/checks/check.js | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/checks/check.js b/src/checks/check.js index ab494ac..10f5a8c 100644 --- a/src/checks/check.js +++ b/src/checks/check.js @@ -14,13 +14,17 @@ const isOfficeHoursNow = () => { class Check { constructor (opts) { - 'name,severity,businessImpact,panicGuide,technicalSummary' - .split(',') - .forEach(prop => { - if (!opts[prop]) { - throw new Error(`${prop} is required for every healthcheck`); - } - }) + [ + 'name', + 'severity', + 'businessImpact', + 'panicGuide', + 'technicalSummary' + ].forEach(prop => { + if (!opts[prop]) { + throw new Error(`${prop} is required for every healthcheck`); + } + }) if (this.start !== Check.prototype.start || this._tick !== Check.prototype._tick) { throw new Error(`Do no override native start and _tick methods of n-health checks. From 42772ac007bd9461540e5dbde9375f060af35658 Mon Sep 17 00:00:00 2001 From: bren Date: Thu, 14 Feb 2019 16:23:14 +0000 Subject: [PATCH 4/8] do async/await to it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🐿 v2.12.0 --- src/checks/aggregate.check.js | 2 - src/checks/check.js | 55 ++++++------- src/checks/cloudWatchThreshold.check.js | 44 +++++----- src/checks/graphiteSpike.check.js | 74 +++++++++-------- src/checks/graphiteThreshold.check.js | 63 ++++++++------- src/checks/graphiteWorking.check.js | 102 +++++++++++------------- src/checks/index.js | 10 +-- src/checks/json.check.js | 30 +++---- src/checks/pingdom.check.js | 36 ++++----- src/checks/responseCompare.check.js | 33 ++++---- src/checks/string.check.js | 22 +++-- 11 files changed, 221 insertions(+), 250 deletions(-) diff --git a/src/checks/aggregate.check.js b/src/checks/aggregate.check.js index bfacce0..180b1bb 100644 --- a/src/checks/aggregate.check.js +++ b/src/checks/aggregate.check.js @@ -35,7 +35,6 @@ class AggregateCheck extends Check { init () { let watchRegex = new RegExp(`(${this.watch.join('|')})`, 'i'); this.obserables = this.parent.checks.filter(check => watchRegex.test(check.name)); - return Promise.resolve() } tick(){ @@ -43,7 +42,6 @@ class AggregateCheck extends Check { if(this.mode === AggregateCheck.modes.AT_LEAST_ONE){ this.status = results.length && results.some(r => r) ? status.PASSED : status.FAILED; } - return Promise.resolve(); } } diff --git a/src/checks/check.js b/src/checks/check.js index 10f5a8c..4873076 100644 --- a/src/checks/check.js +++ b/src/checks/check.js @@ -12,8 +12,7 @@ const isOfficeHoursNow = () => { }; class Check { - - constructor (opts) { + constructor(opts) { [ 'name', 'severity', @@ -21,12 +20,12 @@ class Check { 'panicGuide', 'technicalSummary' ].forEach(prop => { - if (!opts[prop]) { + if(!opts[prop]) { throw new Error(`${prop} is required for every healthcheck`); } }) - if (this.start !== Check.prototype.start || this._tick !== Check.prototype._tick) { + if(this.start !== Check.prototype.start || this._tick !== Check.prototype._tick) { throw new Error(`Do no override native start and _tick methods of n-health checks. They provide essential error handlers. If complex setup is required, define an init method returning a Promise`) @@ -42,37 +41,34 @@ an init method returning a Promise`) this.status = status.PENDING; this.lastUpdated = null; } - init () { - return Promise.resolve(); - } - start () { - this.init() - .then(() => { - this.int = setInterval(this._tick.bind(this), this.interval); - this._tick(); - }) + + async init() {} + + async start() { + await this.init(); + + this.int = setInterval(this._tick.bind(this), this.interval); + this._tick(); } - _tick () { + async _tick() { + try { + await this.tick() + } catch(err){ + logger.error({ event: 'FAILED_HEALTHCHECK_TICK', name: this.name }, err) + raven.captureError(err); + this.status = status.ERRORED; + this.checkOutput = 'Healthcheck failed to execute'; + } - return Promise.resolve() - .then(() => this.tick()) - .catch(err => { - logger.error({ event: 'FAILED_HEALTHCHECK_TICK', name: this.name }, err) - raven.captureError(err); - this.status = status.ERRORED; - this.checkOutput = 'Healthcheck failed to execute'; - }) - .then(() => { - this.lastUpdated = new Date(); - }); + this.lastUpdated = new Date(); } - stop () { + stop() { clearInterval(this.int); } - getStatus () { + getStatus() { const output = { name: this.name, ok: this.status === status.PASSED, @@ -85,10 +81,10 @@ an init method returning a Promise`) checkOutput: this.status === status.ERRORED ? 'Healthcheck failed to execute' : this.checkOutput }; - if (this.officeHoursOnly && !isOfficeHoursNow()) { + if(this.officeHoursOnly && !isOfficeHoursNow()) { output.ok = true; output.checkOutput = 'This check is not set to run outside of office hours'; - } else if (this.lastUpdated) { + } else if(this.lastUpdated) { output.lastUpdated = this.lastUpdated.toISOString(); let shouldHaveRun = Date.now() - (this.interval + 1000); if(this.lastUpdated.getTime() < shouldHaveRun){ @@ -96,6 +92,7 @@ an init method returning a Promise`) output.checkOutput = 'Check has not run recently'; } } + return output; } } diff --git a/src/checks/cloudWatchThreshold.check.js b/src/checks/cloudWatchThreshold.check.js index c7b83b8..ba53d0f 100644 --- a/src/checks/cloudWatchThreshold.check.js +++ b/src/checks/cloudWatchThreshold.check.js @@ -31,9 +31,10 @@ class CloudWatchThresholdCheck extends Check { // use a larger window when gathering stats, because CloudWatch // can take its sweet time with populating new datapoints. let timeWindow = this.samplePeriod * 1.5; - return { - EndTime: moment().toISOString(), - StartTime: moment().subtract(timeWindow, 'seconds').toISOString(), + const now = moment(); + return { + EndTime: now.toISOString(), + StartTime: now.subtract(timeWindow, 'seconds').toISOString(), MetricName: this.cloudWatchMetricName, Namespace: this.cloudWatchNamespace, Period: this.samplePeriod, @@ -42,31 +43,28 @@ class CloudWatchThresholdCheck extends Check { }; } - tick() { + async tick() { const params = this.generateParams(); - return this.cloudWatch + try { + const res = await this.cloudWatch .getMetricStatistics(params) - .promise() - .then(res => { - res.Datapoints.sort((a, b) => b['Timestamp'] - a['Timestamp']); - const value = res.Datapoints[0][this.cloudWatchStatistic]; - let ok; + .promise(); - if (this.direction === 'above') { - ok = value <= this.threshold; - } else { - ok = value >= this.threshold; - } + res.Datapoints.sort((a, b) => b['Timestamp'] - a['Timestamp']); + const value = res.Datapoints[0][this.cloudWatchStatistic]; - this.status = ok ? status.PASSED : status.FAILED; - this.checkOutput = ok ? `No threshold change detected in CloudWatch data. Current value: ${value}` : `CloudWatch data ${this.direction} required threshold. Current value: ${value}`; - }) - .catch(err => { - log.error('Failed to get CloudWatch data', err); - this.status = status.FAILED; - this.checkOutput = `Cloudwatch threshold check failed to fetch data: ${err.message}`; - }); + const ok = this.direction === 'above' + ? value <= this.threshold + : value >= this.threshold; + + this.status = ok ? status.PASSED : status.FAILED; + this.checkOutput = ok ? `No threshold change detected in CloudWatch data. Current value: ${value}` : `CloudWatch data ${this.direction} required threshold. Current value: ${value}`; + } catch(err) { + log.error('Failed to get CloudWatch data', err); + this.status = status.FAILED; + this.checkOutput = `Cloudwatch threshold check failed to fetch data: ${err.message}`; + } } } diff --git a/src/checks/graphiteSpike.check.js b/src/checks/graphiteSpike.check.js index 2006759..b134005 100644 --- a/src/checks/graphiteSpike.check.js +++ b/src/checks/graphiteSpike.check.js @@ -25,10 +25,12 @@ class GraphiteSpikeCheck extends Check { this.ftGraphiteBaseUrl = 'https://graphitev2-api.ft.com/render/?'; this.ftGraphiteKey = process.env.FT_GRAPHITE_KEY; - if (!this.ftGraphiteKey) { + + if(!this.ftGraphiteKey) { throw new Error('You must set FT_GRAPHITE_KEY environment variable'); } - if (!options.numerator) { + + if(!options.numerator) { throw new Error(`You must pass in a numerator for the "${options.name}" check - e.g., "next.heroku.article.*.express.start"`); } @@ -37,24 +39,26 @@ class GraphiteSpikeCheck extends Check { // If there's no divisor specified we probably need to normalize sample and baseline to account for the difference in size between their time ranges this.shouldNormalize = typeof options.normalize !== 'undefined' ? options.normalize : !options.divisor; - if (this.shouldNormalize) { + + if(this.shouldNormalize) { this.sampleMs = ms(this.samplePeriod); this.baselineMs = ms(this.baselinePeriod); } + this.checkOutput = 'Graphite spike check has not yet run'; } generateUrl(numerator, divisor, period) { const urlBase = this.ftGraphiteBaseUrl + `from=-${period}&format=json&target=`; - if (divisor) { + if(divisor) { return urlBase + `divideSeries(summarize(${this.seriesFunction}(${numerator}),"${period}","${this.summarizeFunction}",true),summarize(${this.seriesFunction}(${divisor}),"${period}","${this.summarizeFunction}",true))`; } else { return urlBase + `summarize(${this.seriesFunction}(${numerator}),"${period}","${this.summarizeFunction}",true)`; } } - normalize (data) { - if (this.shouldNormalize) { + normalize(data) { + if(this.shouldNormalize) { data.sample = data.sample / this.sampleMs; data.baseline = data.baseline / this.baselineMs; } @@ -62,40 +66,34 @@ class GraphiteSpikeCheck extends Check { return data; } - tick(){ - - return Promise.all([ - fetch(this.sampleUrl, { headers: { key: this.ftGraphiteKey } }) - .then(fetchres.json), - fetch(this.baselineUrl, { headers: { key: this.ftGraphiteKey } }) - .then(fetchres.json) - ]) - .then(jsons => { - - return this.normalize({ - sample: jsons[0][0] ? jsons[0][0].datapoints[0][0] : 0, - // baseline should not be allowed to be smaller than one as it is use as a divisor - baseline: jsons[1][0] ? jsons[1][0].datapoints[0][0] : 1 - }); - }) - .then(data => { - let ok; - if (this.direction === 'up') { - ok = data.sample / data.baseline < this.threshold; - } else { - ok = data.sample / data.baseline > 1 / this.threshold; - } - this.status = ok ? status.PASSED : status.FAILED; - - this.checkOutput = ok ? 'No spike detected in graphite data' : 'Spike detected in graphite data'; - }) - .catch(err => { - logger.error({ event: `${logEventPrefix}_ERROR`, url: this.sampleUrl }, err); - this.status = status.FAILED; - this.checkOutput = 'Graphite spike check failed to fetch data: ' + err.message; + async tick() { + try { + const [sample, baseline] = await Promise.all([ + fetch(this.sampleUrl, { headers: { key: this.ftGraphiteKey } }) + .then(fetchres.json), + fetch(this.baselineUrl, { headers: { key: this.ftGraphiteKey } }) + .then(fetchres.json) + ]) + + const data = this.normalize({ + sample: sample[0] ? sample[0].datapoints[0][0] : 0, + // baseline should not be allowed to be smaller than one as it is use as a divisor + baseline: baseline[0] ? baseline[0].datapoints[0][0] : 1 }); - } + const ok = this.direction === 'up' + ? data.sample / data.baseline < this.threshold + : data.sample / data.baseline > 1 / this.threshold; + + this.status = ok ? status.PASSED : status.FAILED; + this.checkOutput = ok ? 'No spike detected in graphite data' : 'Spike detected in graphite data'; + + } catch(err) { + logger.error({ event: `${logEventPrefix}_ERROR`, url: this.sampleUrl }, err); + this.status = status.FAILED; + this.checkOutput = 'Graphite spike check failed to fetch data: ' + err.message; + } + } } module.exports = GraphiteSpikeCheck; diff --git a/src/checks/graphiteThreshold.check.js b/src/checks/graphiteThreshold.check.js index e5d6fc7..c480915 100644 --- a/src/checks/graphiteThreshold.check.js +++ b/src/checks/graphiteThreshold.check.js @@ -39,42 +39,43 @@ class GraphiteThresholdCheck extends Check { return this.ftGraphiteBaseUrl + `format=json&from=-${period}&target=` + metric; } - tick(){ - return fetch(this.sampleUrl, { headers: { key: this.ftGraphiteKey } }) - .then(fetchres.json) - .then(results => { - const simplifiedResults = results.map(result => { - const isFailing = result.datapoints.some(value => { - if (value[0] === null) { - // metric data is unavailable, we don't fail this threshold check if metric data is unavailable - // if you want a failing check for when metric data is unavailable, use graphiteWorking - return false; - } else { - return this.direction === 'above' ? - Number(value[0]) > this.threshold : - Number(value[0]) < this.threshold; - } - }); - return { target: result.target, isFailing }; + async tick() { + try { + const results = await fetch(this.sampleUrl, { + headers: { key: this.ftGraphiteKey } + }).then(fetchres.json); + + const simplifiedResults = results.map(result => { + const isFailing = result.datapoints.some(value => { + if (value[0] === null) { + // metric data is unavailable, we don't fail this threshold check if metric data is unavailable + // if you want a failing check for when metric data is unavailable, use graphiteWorking + return false; + } else { + return this.direction === 'above' ? + Number(value[0]) > this.threshold : + Number(value[0]) < this.threshold; + } }); + return { target: result.target, isFailing }; + }); - const failed = simplifiedResults.some(result => result.isFailing); - const failingMetrics = simplifiedResults.filter(result => result.isFailing).map(result => result.target); + const failed = simplifiedResults.some(result => result.isFailing); + const failingMetrics = simplifiedResults.filter(result => result.isFailing).map(result => result.target); - this.status = failed ? status.FAILED : status.PASSED; + this.status = failed ? status.FAILED : status.PASSED; - // The metric crossed a threshold - this.checkOutput = failed ? - `In the last ${this.samplePeriod}, the following metric(s) have moved ${this.direction} the threshold value of ${this.threshold}: ${failingMetrics.join(' ')}` : - `No threshold error detected in graphite data for ${this.metric}.`; - }) - .catch(err => { - logger.error({ event: `${logEventPrefix}_ERROR`, url: this.sampleUrl }, err); - this.status = status.FAILED; - this.checkOutput = 'Graphite threshold check failed to fetch data: ' + err.message; - }); - } + // The metric crossed a threshold + this.checkOutput = failed ? + `In the last ${this.samplePeriod}, the following metric(s) have moved ${this.direction} the threshold value of ${this.threshold}: ${failingMetrics.join(' ')}` : + `No threshold error detected in graphite data for ${this.metric}.`; + } catch(err) { + logger.error({ event: `${logEventPrefix}_ERROR`, url: this.sampleUrl }, err); + this.status = status.FAILED; + this.checkOutput = 'Graphite threshold check failed to fetch data: ' + err.message; + } + } } module.exports = GraphiteThresholdCheck; diff --git a/src/checks/graphiteWorking.check.js b/src/checks/graphiteWorking.check.js index 0e24a06..06b3454 100644 --- a/src/checks/graphiteWorking.check.js +++ b/src/checks/graphiteWorking.check.js @@ -2,6 +2,7 @@ const status = require('./status'); const Check = require('./check'); const fetch = require('node-fetch'); const log = require('@financial-times/n-logger').default; +const fetchres = require('fetchres'); const logEventPrefix = 'GRAPHITE_WORKING_CHECK'; @@ -12,7 +13,6 @@ function badJSON(message, json) { } class GraphiteWorkingCheck extends Check { - constructor (options) { options.technicalSummary = options.technicalSummary || 'There has been no metric data for a sustained period of time'; options.panicGuide = options.panicGuide || 'Check this is up and running. Check this has been able to send metrics to Graphite (see Heroku and Splunk logs). Check Graphite has not been dropping metric data.'; @@ -35,61 +35,55 @@ class GraphiteWorkingCheck extends Check { this.checkOutput = "This check has not yet run"; } - tick () { - return fetch(this.url, { headers: { key: this.ftGraphiteKey } }) - .then(response => { - if(!response.ok){ - throw new Error('Bad Response: ' + response.status); - } - - return response.json(); - }) - .then(json => { - if(!json.length){ - badJSON('returned JSON should be an array', json); - } - - if(!json[0].datapoints){ - badJSON('No datapoints property', json); - } - - if(json[0].datapoints.length < 1){ - badJSON('Expected at least one datapoint', json); - } - - const simplifiedResults = json.map(result => { - - let nullsForHowManySeconds; - - if (result.datapoints.every(datapoint => datapoint[0] === null)) { - nullsForHowManySeconds = Infinity; - } else { - // This sums the number of seconds since the last non-null result at the tail of the list of metrics. - nullsForHowManySeconds = result.datapoints - .map((datapoint, index, array) => [datapoint[0], index > 0 ? datapoint[1] - array[index - 1][1] : 0]) - .reduce((xs, datapoint) => datapoint[0] === null ? xs + datapoint[1] : 0, 0); - } - - const simplifiedResult = { target: result.target, nullsForHowManySeconds }; - log.info({ event: `${logEventPrefix}_NULLS_FOR_HOW_LONG` }, simplifiedResult); - return simplifiedResult; - }); - - const failedResults = simplifiedResults.filter(r => r.nullsForHowManySeconds >= 180); - - if (failedResults.length === 0) { - this.status = status.PASSED; - this.checkOutput =`${this.metric} has data`; + async tick() { + try { + const json = await fetch(this.url, { + headers: { key: this.ftGraphiteKey } + }).then(fetchres.json); + + if(!json.length) { + badJSON('returned JSON should be an array', json); + } + + if(!json[0].datapoints) { + badJSON('No datapoints property', json); + } + + if(json[0].datapoints.length < 1) { + badJSON('Expected at least one datapoint', json); + } + + const simplifiedResults = json.map(result => { + let nullsForHowManySeconds; + + if (result.datapoints.every(datapoint => datapoint[0] === null)) { + nullsForHowManySeconds = Infinity; } else { - this.status = status.FAILED; - this.checkOutput = failedResults.map(r => `${r.target} has been null for ${Math.round(r.nullsForHowManySeconds / 60)} minutes.`).join(' '); - } - }) - .catch(err => { - log.error({ event: `${logEventPrefix}_ERROR`, url: this.url }, err); - this.status = status.FAILED; - this.checkOutput = err.toString(); + // This sums the number of seconds since the last non-null result at the tail of the list of metrics. + nullsForHowManySeconds = result.datapoints + .map((datapoint, index, array) => [datapoint[0], index > 0 ? datapoint[1] - array[index - 1][1] : 0]) + .reduce((xs, datapoint) => datapoint[0] === null ? xs + datapoint[1] : 0, 0); + } + + const simplifiedResult = { target: result.target, nullsForHowManySeconds }; + log.info({ event: `${logEventPrefix}_NULLS_FOR_HOW_LONG` }, simplifiedResult); + return simplifiedResult; }); + + const failedResults = simplifiedResults.filter(r => r.nullsForHowManySeconds >= 180); + + if (failedResults.length === 0) { + this.status = status.PASSED; + this.checkOutput =`${this.metric} has data`; + } else { + this.status = status.FAILED; + this.checkOutput = failedResults.map(r => `${r.target} has been null for ${Math.round(r.nullsForHowManySeconds / 60)} minutes.`).join(' '); + } + } catch(err) { + log.error({ event: `${logEventPrefix}_ERROR`, url: this.url }, err); + this.status = status.FAILED; + this.checkOutput = err.toString(); + } } } diff --git a/src/checks/index.js b/src/checks/index.js index 2356d53..be26061 100644 --- a/src/checks/index.js +++ b/src/checks/index.js @@ -1,11 +1,11 @@ 'use strict'; module.exports = { - aggregate : require('./aggregate.check'), - responseCompare : require('./responseCompare.check'), - json : require('./json.check'), - string : require('./string.check'), - pingdom : require('./pingdom.check'), + aggregate: require('./aggregate.check'), + responseCompare: require('./responseCompare.check'), + json: require('./json.check'), + string: require('./string.check'), + pingdom: require('./pingdom.check'), graphiteSpike: require('./graphiteSpike.check'), graphiteThreshold: require('./graphiteThreshold.check'), graphiteWorking: require('./graphiteWorking.check'), diff --git a/src/checks/json.check.js b/src/checks/json.check.js index a12eb7d..607ddab 100644 --- a/src/checks/json.check.js +++ b/src/checks/json.check.js @@ -2,10 +2,11 @@ const status = require('./status'); const Check = require('./check'); const fetch = require('node-fetch'); +const fetchres = require('fetchres'); class JsonCheck extends Check{ - constructor(options){ + constructor(options) { super(options); this.callback = options.callback; this.url = options.url; @@ -13,27 +14,20 @@ class JsonCheck extends Check{ this.fetchOptions = options.fetchOptions; } - get checkOutput(){ + get checkOutput() { return this.checkResultInternal[this.status]; } - tick(){ - return fetch(this.url, this.fetchOptions) - .then(response => { - if(!response.ok){ - throw new Error('BadResponse ' + response.status); - } + async tick() { + try { + const json = await fetch(this.url, this.fetchOptions).then(fetchres.json) - return response.json(); - }) - .then(json => { - let result = this.callback(json); - this.status = result ? status.PASSED : status.FAILED; - }) - .catch(err => { - console.error('Failed to get JSON', err); - this.status = status.FAILED; - }) + let result = this.callback(json); + this.status = result ? status.PASSED : status.FAILED; + } catch(err) { + console.error('Failed to get JSON', err); + this.status = status.FAILED; + } } } diff --git a/src/checks/pingdom.check.js b/src/checks/pingdom.check.js index 134aeab..8b91002 100644 --- a/src/checks/pingdom.check.js +++ b/src/checks/pingdom.check.js @@ -17,29 +17,21 @@ class PingdomCheck extends Check{ this.checkOutput = `Pingdom check ${this.checkId} has not yet run`; } - tick(){ - return fetch(this.url, { - headers : this.headers - }) - .then(response => { - this.rawResponse = response; - return response.json(); - }) - .then(response => { - if(!this.rawResponse.ok){ - throw new Error(`Pingdom API returned ${response.error.statuscode}: ${response.error.errormessage}`); - } + async tick(){ + try { + const response = await fetch(this.url, { headers : this.headers }); + const json = await response.json(); - return response; - }) - .then(json => { - this.status = (json.check.status === 'up') ? status.PASSED : status.FAILED; - this.checkOutput = `Pingdom status: ${json.check.status}`; - }) - .catch(err => { - this.status = status.FAILED; - this.checkOutput = `Failed to get status: ${err.message}`; - }) + if(!response.ok){ + throw new Error(`Pingdom API returned ${json.error.statuscode}: ${json.error.errormessage}`); + } + + this.status = (json.check.status === 'up') ? status.PASSED : status.FAILED; + this.checkOutput = `Pingdom status: ${json.check.status}`; + } catch(err) { + this.status = status.FAILED; + this.checkOutput = `Failed to get status: ${err.message}`; + } } } diff --git a/src/checks/responseCompare.check.js b/src/checks/responseCompare.check.js index fbe2064..5a87ba2 100644 --- a/src/checks/responseCompare.check.js +++ b/src/checks/responseCompare.check.js @@ -34,25 +34,28 @@ class ResponseCompareCheck extends Check { } } - tick(){ - return Promise.all(this.urls.map(url => fetch(url, { headers: this.headers }))) - .then(responses => { - return Promise.all(responses.map(r => r.text().then(this.normalizeResponse))); - }) - .then(responses => { - if(this.comparison === ResponseCompareCheck.comparisons.EQUAL){ - this.status = allEqual(responses) ? status.PASSED : status.FAILED; - } - }) - .catch(err => { - console.error(err.stack); - setTimeout(() => {throw err; }, 0); - }); + async tick(){ + try { + const responses = await Promise.all( + this.urls.map( + url => fetch(url, { headers: this.headers }).then( + r => r.text().then(this.normalizeResponse) + ) + ) + ) + + if(this.comparison === ResponseCompareCheck.comparisons.EQUAL){ + this.status = allEqual(responses) ? status.PASSED : status.FAILED; + } + } catch(err) { + console.error('Response was not OK', err); + this.status = status.FAILED; + } } } ResponseCompareCheck.comparisons = { - EQUAL : 'equal' + EQUAL: 'equal' }; module.exports = ResponseCompareCheck; diff --git a/src/checks/string.check.js b/src/checks/string.check.js index ba8df5b..b5616b8 100644 --- a/src/checks/string.check.js +++ b/src/checks/string.check.js @@ -2,6 +2,7 @@ const status = require('./status'); const Check = require('./check'); const fetch = require('node-fetch'); +const fetchres = require('fetchres'); class StringCheck extends Check { @@ -20,19 +21,14 @@ class StringCheck extends Check { return `${this.url} is ${this.status === status.PASSED ? '' : 'not'} equal to ${this.expected}`; } - tick(){ - return fetch(this.url, this.fetchOptions) - .then(response => { - if(!response.ok){ - throw new Error('BadResponse ' + response.status); - } - return response.text() - }) - .then(body => this.status = body === this.expected ? status.PASSED : status.FAILED) - .catch(err => { - console.error('Response was not OK', err); - this.status = status.FAILED; - }); + async tick(){ + try { + const body = await fetch(this.url, this.fetchOptions).then(fetchres.text); + this.status = body === this.expected ? status.PASSED : status.FAILED; + } catch(err) { + console.error('Response was not OK', err); + this.status = status.FAILED; + } } } From 66a212237a42f8d44b892745c0ae992c9199ebd8 Mon Sep 17 00:00:00 2001 From: bren Date: Thu, 14 Feb 2019 16:56:45 +0000 Subject: [PATCH 5/8] =?UTF-8?q?doesn't=20need=20to=20be=20async=20=20?= =?UTF-8?q?=F0=9F=90=BF=20v2.12.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/checks/check.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/checks/check.js b/src/checks/check.js index 4873076..400154e 100644 --- a/src/checks/check.js +++ b/src/checks/check.js @@ -42,7 +42,7 @@ an init method returning a Promise`) this.lastUpdated = null; } - async init() {} + init() {} async start() { await this.init(); From d7c5fdb774970ba3346bf56dde0e04f447273aad Mon Sep 17 00:00:00 2001 From: bren Date: Thu, 14 Feb 2019 17:43:23 +0000 Subject: [PATCH 6/8] =?UTF-8?q?documentation=20that=20actually=20tells=20y?= =?UTF-8?q?ou=20how=20to=20use=20it=20=20=F0=9F=90=BF=20v2.12.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 174 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 115 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index ac61ae7..540c417 100644 --- a/README.md +++ b/README.md @@ -1,97 +1,153 @@ # n-health [![CircleCI](https://circleci.com/gh/Financial-Times/n-health.svg?style=svg)](https://circleci.com/gh/Financial-Times/n-health) -Makes it easy to add a variety of healthchecks to an app. +Collection of healthcheck classes to use in your nodejs application -## Adding Health Checks -To Add more health checks create a new file in the `config` directory. It should be a .js file which exports an object. The object must have the following properties: +## Usage -* name: A name for the healthcheck - is supposed to match to a name in the CMDB, ideally -* description: Test description for the checks - for reference only -* checks: Array of checks - see below for check config +`n-health` exports a function that loads [healthcheck configuration](#healthcheck-configuration) files from a folder: -## Standard check options +```js +const nHealth = require('n-health'); -* name, severity, businessImpact, technicalSummary and panicGuide are all required. See the [specification](https://docs.google.com/document/edit?id=1ftlkDj1SUXvKvKJGvoMoF1GnSUInCNPnNGomqTpJaFk) for details -* interval: time between checks in milliseconds or any string compatible with [ms](https://www.npmjs.com/package/ms) [default: 1minute] -* type: The type of check (see below) -* officeHoursOnly: [default: false] For queries that will probably fail out of hours (e.g. Internet Explorer usage, B2B stuff), set this to true and the check will pass on weekends and outside office hours. Use sparingly. +const healthChecks = nHealth( + 'path/to/healthchecks' // by default, `/healthchecks` or `/config` in the root of your application +) +``` -## Healthcheck types and options +It returns an object with an `asArray` method. If you're using `n-express`, pass this array as the `healthChecks` option: -### pingdom +```js +const nExpress = require('@financial-times/n-express') + +nExpress({ + healthChecks +}) +``` + +If you're not using n-express, you should create a `/__health` endpoint which returns the following JSON structure (see the [specification](https://docs.google.com/document/edit?id=1ftlkDj1SUXvKvKJGvoMoF1GnSUInCNPnNGomqTpJaFk) for details): + +```json +{ + "schemaVersion": 1, + "name": "app name", + "systemCode": "biz-ops system code", + "description": "human readable description", + "checks": [] +} +``` + +`checks` should be an array of check status objects. You can get this by calling `getStatus` on each item in the array, for example with `healthChecks.asArray().map(check => check.getStatus())`. + +### Custom healthchecks + +If you require a healthcheck not provided by n-health, you can pass a second argument to `nHealth`, which should be a path to a folder of files exporting custom healthcheck classes. These modules should export a class that extends `n-health`'s `Check` class and implements the `tick` method, which is periodically called to update the check's `status`. It can also implement the `init` to do something when the check is first run. Both of these methods can be `async` if you need to do something like make a request. + +```js +const {Check, status} = require('n-health'); + +class RandomCheck extends Check { + tick() { + this.status = Math.random() < 0.5 ? status.PASSED : status.FAILED; + } +} + +module.exports = RandomCheck; +``` + +See the [src/checks](src/checks) folder for some examples. + +## Healthcheck configuration + +A healthcheck config is a Javascript file that exports an object with these properties. + +* `name`: A name for the healthcheck - is supposed to match to a name in biz-ops, ideally +* `description`: Test description for the checks - for reference only +* `checks`: Array of [check objects](#check-objects) + +### Check objects + +#### Common options + +* `type`: The type of check, which should be one of the types below. That check type's options should also be included in the object as required. +* `name`, `severity`, `businessImpact`, `technicalSummary` and `panicGuide` are all required. See the [specification](https://docs.google.com/document/edit?id=1ftlkDj1SUXvKvKJGvoMoF1GnSUInCNPnNGomqTpJaFk) for details +* `interval`: time between checks in milliseconds or any string compatible with [ms](https://www.npmjs.com/package/ms) [default: 1minute] +* `officeHoursOnly`: [default: `false`] For queries that will probably fail out of hours (e.g. Internet Explorer usage, B2B stuff), set this to true and the check will pass on weekends and outside office hours. Use sparingly. + +#### `pingdom` Will poll the pingdom API to get the status of a specific check -* checkId: The id of the check in pingdom +* `checkId`: The id of the check in pingdom -### responseCompare +#### `responseCompare` Fetches from multiple urls and compares the responses. Useful to check that replication is working -* urls: An array of urls to call -* comparison: Type of comparison to apply to the responses (Only "equal" so far +* `urls`: An array of urls to call +* `comparison`: Type of comparison to apply to the responses: + - `'equal'` the check succeeds if all the responses have the same status -### json +#### `json` Calls a url, gets some json and runs a callback to check its form -* url: url to call and get the json -* fetchOptions: Object to pass to fetch, see https://www.npmjs.com/package/node-fetch#options for more information. -* callback: A function to run on the response. Accepts the parsed json as an argument and should return true or false +* `url`: url to call and get the json +* `fetchOptions`: Object to pass to fetch, see https://www.npmjs.com/package/node-fetch#options for more information. +* `callback`: A function to run on the response. Accepts the parsed json as an argument and should return true or false -### aggregate -Reports on the status of other checks. Useful if you have a multi-region service and, if one check fails it is not as bad as if ALL the checks fail. +#### `aggregate` +Reports on the status of other checks. Useful if you have a multi-region service and, if one check fails it is not as bad as if ALL the checks fail. -* watch: Array of names of checks to aggregate -* mode: Aggregate mode. I think "atLeastOne" is the only valid option so far +* `watch`: Array of names of checks to aggregate +* `mode`: Aggregate mode: + - `'atLeastOne'` the check succeeds if at least one of its subchecks succeeds -### graphiteSpike +#### `graphiteSpike` Compares current and historical graphite metrics to see if there is a spike -* numerator: [required] Name of main graphite metric to count (may contain wildcards) -* divisor: [optional] Name of graphite metric to divide by (may contain wildcards) -* normalize: [optional] Boolean indicating whether to normalize to adjust for difference in size between sample and baseline timescales. Default is `true` if no divisor specified, `false` otherwise. -* samplePeriod: [default: '10min'] Length of time to count metrics for a sample of current behaviour -* baselinePeriod: [default: '7d'] Length of time to count metrics for to establish baseline behaviour -* direction: [default: 'up'] Direction in which to look for spikes; 'up' = sharp increase in activity, 'down' = sharp decrease in activity -* threshold: [default: 3] Amount of difference between current and baseline activity which registers as a spike e.g. 5 means current activity must be 5 times greater/less than the baseline activity +* `numerator`: [required] Name of main graphite metric to count (may contain wildcards) +* `divisor`: [optional] Name of graphite metric to divide by (may contain wildcards) +* `normalize`: [optional] Boolean indicating whether to normalize to adjust for difference in size between sample and baseline timescales. Default is `true` if no divisor specified, `false` otherwise. +* `samplePeriod`: [default: `'10min'`] Length of time to count metrics for a sample of current behaviour +* `baselinePeriod`: [default: `'7d'`] Length of time to count metrics for to establish baseline behaviour +* `direction`: [default: `'up'`] Direction in which to look for spikes; 'up' = sharp increase in activity, 'down' = sharp decrease in activity +* `threshold`: [default: `3`] Amount of difference between current and baseline activity which registers as a spike e.g. 5 means current activity must be 5 times greater/less than the baseline activity -### graphiteThreshold +#### `graphiteThreshold` Checks whether the value of a graphite metric has crossed a threshold -* metric: [required] Name of graphite metric to count (may contain wildcards) -* threshold: [required] Value to check the metrics against -* samplePeriod: [default: '10min'] Length of time to count metrics for a sample of current behaviour -* direction: [default: 'above'] Direction on which to trigger the healthcheck; - - 'above' = alert if value goes above the threshold - - 'below' = alert if value goes below the threshold +* `metric`: [required] Name of graphite metric to count (may contain wildcards) +* `threshold`: [required] Value to check the metrics against +* `samplePeriod`: [default: `'10min'`] Length of time to count metrics for a sample of current behaviour +* `direction`: [default: `'above'`] Direction on which to trigger the healthcheck: + - `'above'` = alert if value goes above the threshold + - `'below'` = alert if value goes below the threshold -### graphiteWorking +#### `graphiteWorking` Checks if the value of a graphite metric has received data recently. -* metric: [required] Name of graphite metric to count (may contain wildcards) +* `metric`: [required] Name of graphite metric to count (may contain wildcards) - Use `summarize` if the metric receives data infrequently, e.g. `summarize(next.heroku.next-article.some-infrequent-periodic-metric, '30mins', 'sum', true)` -* time: [default: '-5minutes'] Length of time to count metrics +* `time`: [default: `'-5minutes'`] Length of time to count metrics -### cloudWatchThreshold +#### `cloudWatchThreshold` Checks whether the value of a CloudWatch metric has crossed a threshold _Note: this assumes that `AWS_ACCESS_KEY` & `AWS_SECRET_ACCESS_KEY` are implictly available as environment variables on process.env_ - -* cloudWatchRegion = [default 'eu-west-1'] AWS region the metrics are stored -* cloudWatchMetricName = [required] Name of the CloudWatch metric to count -* cloudWatchNamespace = [required] Namespace the metric resides in -* cloudWatchStatistic = [default 'Sum'] Data aggregation type to return -* cloudWatchDimensions = Optional array of metric data to query -* samplePeriod: [default: 300] Length of time in seconds to count metrics for a sample of current behaviour -* threshold: [required] Value to check the metrics against -* direction: [default: 'above'] Direction on which to trigger the healthcheck; - - 'above' = alert if value goes above the threshold - - 'below' = alert if value goes below the threshold - -### cloudWatchAlarm +* `cloudWatchRegion` = [default `'eu-west-1'`] AWS region the metrics are stored +* `cloudWatchMetricName` = [required] Name of the CloudWatch metric to count +* `cloudWatchNamespace` = [required] Namespace the metric resides in +* `cloudWatchStatistic` = [default `'Sum'`] Data aggregation type to return +* `cloudWatchDimensions` = Optional array of metric data to query +* `samplePeriod`: [default: `300`] Length of time in seconds to count metrics for a sample of current behaviour +* `threshold`: [required] Value to check the metrics against +* `direction`: [default: `'above'`] Direction on which to trigger the healthcheck: + - `'above'` = alert if value goes above the threshold + - `'below'` = alert if value goes below the threshold + +#### `cloudWatchAlarm` Checks whether the state of a CloudWatch alarm is health _Note: this assumes that `AWS_ACCESS_KEY` & `AWS_SECRET_ACCESS_KEY` are implictly available as environment variables on process.env_ -* cloudWatchRegion = [default 'eu-west-1'] AWS region the metrics are stored -* cloudWatchAlarmName = [required] Name of the CloudWatch alarm to check +* `cloudWatchRegion` = [default `'eu-west-1'`] AWS region the metrics are stored +* `cloudWatchAlarmName` = [required] Name of the CloudWatch alarm to check From 9155deca4065264bcab549056e4a8b3d7dd9861f Mon Sep 17 00:00:00 2001 From: bren Date: Thu, 14 Feb 2019 17:48:05 +0000 Subject: [PATCH 7/8] =?UTF-8?q?fix=20n-express=20example=20=20=F0=9F=90=BF?= =?UTF-8?q?=20v2.12.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 540c417..f02749b 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ It returns an object with an `asArray` method. If you're using `n-express`, pass const nExpress = require('@financial-times/n-express') nExpress({ - healthChecks + healthChecks: healthChecks.asArray() }) ``` From 97dacdb17f5bce843c49b09e167ab1fe39bb0b30 Mon Sep 17 00:00:00 2001 From: Adam Braimbridge Date: Mon, 25 Feb 2019 12:06:50 +0100 Subject: [PATCH 8/8] clarify office hours Co-Authored-By: quarterto --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f02749b..103c823 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ A healthcheck config is a Javascript file that exports an object with these prop * `type`: The type of check, which should be one of the types below. That check type's options should also be included in the object as required. * `name`, `severity`, `businessImpact`, `technicalSummary` and `panicGuide` are all required. See the [specification](https://docs.google.com/document/edit?id=1ftlkDj1SUXvKvKJGvoMoF1GnSUInCNPnNGomqTpJaFk) for details * `interval`: time between checks in milliseconds or any string compatible with [ms](https://www.npmjs.com/package/ms) [default: 1minute] -* `officeHoursOnly`: [default: `false`] For queries that will probably fail out of hours (e.g. Internet Explorer usage, B2B stuff), set this to true and the check will pass on weekends and outside office hours. Use sparingly. +* `officeHoursOnly`: [default: `false`] For queries that will probably fail out of hours (e.g. Internet Explorer usage, B2B stuff), set this to true and the check will pass on weekends and outside office hours (defined as 8am-6pm UTC). Use sparingly. #### `pingdom` Will poll the pingdom API to get the status of a specific check