-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NOPS-1034 added threshold handling for asPercent and divideSeries #165
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -173,7 +173,7 @@ describe('Graphite Threshold Check', function(){ | |
|
||
context('It handles summarize(sumSeries)', function () { | ||
|
||
it('Should be healthy if sum above lower threshold', function (done) { | ||
it('Should be healthy if sum above threshold', function (done) { | ||
mockGraphite([ | ||
{ | ||
datapoints: [ | ||
|
@@ -192,7 +192,7 @@ describe('Graphite Threshold Check', function(){ | |
}); | ||
}); | ||
|
||
it('Should be healthy if sum below upper threshold', function (done) { | ||
it('Should be healthy if sum below threshold', function (done) { | ||
mockGraphite([ | ||
{ datapoints: [ | ||
[ null, 1635125640],[null,1635129240],[1,1635132840],[1,1635136440],[null,1635140040],[3,1635143640]], | ||
|
@@ -209,7 +209,7 @@ describe('Graphite Threshold Check', function(){ | |
}); | ||
}); | ||
|
||
it('Should be unhealthy if sum above lower threshold', function (done) { | ||
it('Should be unhealthy if sum below threshold', function (done) { | ||
mockGraphite([ | ||
{ datapoints: [ | ||
[ null, 1635125640],[null,1635129240],[1,1635132840],[1,1635136440],[null,1635140040],[3,1635143640]], | ||
|
@@ -228,7 +228,7 @@ describe('Graphite Threshold Check', function(){ | |
}); | ||
}); | ||
|
||
it('Should be unhealthy if sum below upper threshold', function (done) { | ||
it('Should be unhealthy if sum above threshold', function (done) { | ||
mockGraphite([ | ||
{ datapoints: [ | ||
[ null, 1635125640],[null,1635129240],[1,1635132840],[1,1635136440],[null,1635140040],[3,1635143640]], | ||
|
@@ -246,10 +246,158 @@ describe('Graphite Threshold Check', function(){ | |
done(); | ||
}); | ||
}); | ||
|
||
}); | ||
|
||
context('It handles asPercent', function () { | ||
|
||
it('Should be healthy if above threshold', function (done) { | ||
mockGraphite([ | ||
{ | ||
datapoints: [ | ||
[ 8, 1635125640]], | ||
target: `asPercent(summarize(sumSeries(failure.count), '10min', 'sum', true), summarize(sumSeries(success.count), '10min', 'sum', true))` | ||
} | ||
]); | ||
|
||
check = new Check(getCheckConfig({ | ||
threshold: 1, | ||
direction: 'below' | ||
})); | ||
check.start(); | ||
setTimeout(() => { | ||
expect(check.getStatus().ok).to.be.true; | ||
done(); | ||
}); | ||
}); | ||
|
||
it('Should be healthy if below threshold', function (done) { | ||
mockGraphite([ | ||
{ datapoints: [ | ||
[ null, 1635125640]], | ||
target: `asPercent(summarize(sumSeries(failure.count), '10min', 'sum', true), summarize(sumSeries(success.count), '10min', 'sum', true))` | ||
} | ||
]); | ||
check = new Check(getCheckConfig({ | ||
threshold: 1 | ||
})); | ||
check.start(); | ||
setTimeout(() => { | ||
expect(check.getStatus().ok).to.be.true; | ||
done(); | ||
}); | ||
}); | ||
|
||
it('Should be unhealthy if above threshold', function (done) { | ||
mockGraphite([ | ||
{ datapoints: [ | ||
[ 8, 1635125640]], | ||
target: `asPercent(summarize(sumSeries(failure.count), '10min', 'sum', true),summarize(sumSeries(success.count), '10min', 'sum', true))` | ||
} | ||
]); | ||
check = new Check(getCheckConfig({ | ||
threshold: 5, | ||
})); | ||
check.start(); | ||
setTimeout(() => { | ||
expect(check.getStatus().ok).to.be.false; | ||
done(); | ||
}); | ||
}); | ||
|
||
it('Should be unhealthy if below threshold', function (done) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: would it make sense for it to be healthy (instead of unhealthy) below the threshold? In my mind, if we get the % of errors against the successes, a low value suggests that there's a low failure rate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This tests checks that the |
||
mockGraphite([ | ||
{ datapoints: [ | ||
[ null, 1635125640]], | ||
target: `asPercent(summarize(sumSeries(failure.count), '10min', 'sum', true), summarize(sumSeries(success.count), '10min', 'sum', true))` | ||
}, | ||
|
||
]); | ||
check = new Check(getCheckConfig({ | ||
threshold: 4, | ||
direction: 'below' | ||
|
||
})); | ||
check.start(); | ||
setTimeout(() => { | ||
expect(check.getStatus().ok).to.be.false; | ||
done(); | ||
}); | ||
}); | ||
}); | ||
|
||
context('It handles divideSeries', function () { | ||
|
||
it('Should be healthy if above threshold', function (done) { | ||
mockGraphite([ | ||
{ | ||
datapoints: [ | ||
[ 8, 1635125640]], | ||
target: `divideSeries(sumSeries(error.count), sumSeries(status.*.count))` | ||
} | ||
]); | ||
|
||
check = new Check(getCheckConfig({ | ||
threshold: 1, | ||
direction: 'below' | ||
})); | ||
check.start(); | ||
setTimeout(() => { | ||
expect(check.getStatus().ok).to.be.true; | ||
done(); | ||
}); | ||
}); | ||
|
||
it('Should be healthy if below threshold', function (done) { | ||
mockGraphite([ | ||
{ datapoints: [ | ||
[ null, 1635125640]], | ||
target: `divideSeries(sumSeries(error.count), sumSeries(status.*.count))` | ||
} | ||
]); | ||
check = new Check(getCheckConfig({ | ||
threshold: 1 | ||
})); | ||
check.start(); | ||
setTimeout(() => { | ||
expect(check.getStatus().ok).to.be.true; | ||
done(); | ||
}); | ||
}); | ||
|
||
it('Should be unhealthy if above threshold', function (done) { | ||
mockGraphite([ | ||
{ datapoints: [ | ||
[ 8, 1635125640]], | ||
target: `divideSeries(sumSeries(error.count),sumSeries(status.*.count))` | ||
} | ||
]); | ||
check = new Check(getCheckConfig({ | ||
threshold: 5, | ||
})); | ||
check.start(); | ||
setTimeout(() => { | ||
expect(check.getStatus().ok).to.be.false; | ||
done(); | ||
}); | ||
}); | ||
|
||
it('Should be unhealthy if below threshold', function (done) { | ||
mockGraphite([ | ||
{ datapoints: [ | ||
[ null, 1635125640]], | ||
target: `divideSeries(sumSeries(error.count), sumSeries(status.*.count))` | ||
}, | ||
]); | ||
check = new Check(getCheckConfig({ | ||
threshold: 4, | ||
direction: 'below' | ||
})); | ||
check.start(); | ||
setTimeout(() => { | ||
expect(check.getStatus().ok).to.be.false; | ||
done(); | ||
}); | ||
}); | ||
}); | ||
|
||
it('Should be possible to configure sample period', function(done){ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
question: I think I need some more context here in understanding why we're only using the first element of fetchCountPerTimeUnit to determine whether it is failing. Is it because we can be sure that there is only 1 datapoint in the array for divideSeries or asPercent, and that datapoint is the result of the query? From what I understand, if there is more than 1 datapoint in the array, the api does not return the result of the query so the datapoints look like
so I would have thought that we would need to do the calculations ourselves here, similar to how summarize(sumSeries) is done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oops, I just saw your tests and it answered my question - it is indeed 1 item in the datapoints array
suggestion: as there's only 1 item in the datapoints array, perhaps we can just change line 54 to
const ratioOfErrorsToSuccesses = Number(result.datapoints[0][0])
to make it more readable
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I took a slightly different approach. All the health checks I've seen have only one datapoint, but you are correct that this may not always be the case - I added a log to let us know if that's not the case (so we can iterate as needed).