From fad809cd51006bae298bd985b8759d364b6afe81 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Wed, 24 Mar 2021 08:16:51 +0100 Subject: [PATCH] pkg/rules: fix deduplication of equal alerts with different labels Currently, if an alerting rule having the same name with different severity labels is being returned from different replicas then they are being treated as separate alerts. Given the following alerts a1,a2 with severities s1,s2 returned from replicas r1,2: a1[s1,r1] a1[s2,r1] a1[s1,r2] a1[s2,r2] Then, currently, the algorithm deduplicates to: a1[s1] a1[s2] a1[s1] a1[s2] Instead of the intendet result: a1[s1] a1[s2] This fixes it by removing replica labels before sorting labels for deduplication. Signed-off-by: Sergiusz Urbaniak --- CHANGELOG.md | 1 + pkg/rules/rules.go | 9 ++-- pkg/rules/rules_test.go | 108 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 105 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4285cf009..6d6d86a91e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ We use _breaking :warning:_ to mark changes that are not backward compatible (re - [#3204](https://github.com/thanos-io/thanos/pull/3204) Mixin: Use sidecar's metric timestamp for healthcheck. - [#3922](https://github.com/thanos-io/thanos/pull/3922) Fix panic in http logging middleware. +- [#3960](https://github.com/thanos-io/thanos/pull/3960) fix deduplication of equal alerts with different labels ### Changed - [#3948](https://github.com/thanos-io/thanos/pull/3948) Receiver: Adjust `http_request_duration_seconds` buckets for low latency requests. diff --git a/pkg/rules/rules.go b/pkg/rules/rules.go index 9d40ee2403..422d03297c 100644 --- a/pkg/rules/rules.go +++ b/pkg/rules/rules.go @@ -68,23 +68,22 @@ func dedupRules(rules []*rulespb.Rule, replicaLabels map[string]struct{}) []*rul return rules } - // Sort each rule's label names such that they are comparable. + // Remove replica labels and sort each rule's label names such that they are comparable. for _, r := range rules { + removeReplicaLabels(r, replicaLabels) sort.Slice(r.GetLabels(), func(i, j int) bool { return r.GetLabels()[i].Name < r.GetLabels()[j].Name }) } - // Sort rules globally based on synthesized deduplication labels, also considering replica labels and their values. + // Sort rules globally. sort.Slice(rules, func(i, j int) bool { return rules[i].Compare(rules[j]) < 0 }) - // Remove rules based on synthesized deduplication labels, this time ignoring replica labels and last evaluation. + // Remove rules based on synthesized deduplication labels. i := 0 - removeReplicaLabels(rules[i], replicaLabels) for j := 1; j < len(rules); j++ { - removeReplicaLabels(rules[j], replicaLabels) if rules[i].Compare(rules[j]) != 0 { // Effectively retain rules[j] in the resulting slice. i++ diff --git a/pkg/rules/rules_test.go b/pkg/rules/rules_test.go index 0f0c99499a..bae891b614 100644 --- a/pkg/rules/rules_test.go +++ b/pkg/rules/rules_test.go @@ -448,14 +448,14 @@ func TestDedupRules(t *testing.T) { rulespb.NewAlertingRule(&rulespb.Alert{ Name: "a1", Query: "up", - DurationSeconds: 2.0, + DurationSeconds: 1.0, Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ {Name: "a", Value: "1"}, }}}), rulespb.NewAlertingRule(&rulespb.Alert{ Name: "a1", Query: "up", - DurationSeconds: 1.0, + DurationSeconds: 2.0, Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ {Name: "a", Value: "1"}, }}}), @@ -505,13 +505,13 @@ func TestDedupRules(t *testing.T) { }}}), }, want: []*rulespb.Rule{ + rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1"}), rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1", Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ {Name: "a", Value: "1"}, }}}), rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1", Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ {Name: "a", Value: "2"}, }}}), - rulespb.NewRecordingRule(&rulespb.RecordingRule{Name: "a1"}), }, replicaLabels: []string{"replica"}, }, @@ -613,6 +613,11 @@ func TestDedupRules(t *testing.T) { }), }, want: []*rulespb.Rule{ + rulespb.NewAlertingRule(&rulespb.Alert{ + State: rulespb.AlertState_FIRING, + Name: "a1", + LastEvaluation: time.Unix(2, 0), + }), rulespb.NewAlertingRule(&rulespb.Alert{ State: rulespb.AlertState_FIRING, Name: "a1", @@ -621,11 +626,6 @@ func TestDedupRules(t *testing.T) { }}, LastEvaluation: time.Unix(1, 0), }), - rulespb.NewAlertingRule(&rulespb.Alert{ - State: rulespb.AlertState_FIRING, - Name: "a1", - LastEvaluation: time.Unix(2, 0), - }), rulespb.NewAlertingRule(&rulespb.Alert{ State: rulespb.AlertState_PENDING, Name: "a2", @@ -634,6 +634,98 @@ func TestDedupRules(t *testing.T) { }, replicaLabels: []string{"replica"}, }, + { + name: "alerts with different severity", + rules: []*rulespb.Rule{ + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "1"}, + {Name: "severity", Value: "warning"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "1"}, + {Name: "severity", Value: "critical"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "2"}, + {Name: "severity", Value: "warning"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "2"}, + {Name: "severity", Value: "critical"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + }, + want: []*rulespb.Rule{ + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "severity", Value: "critical"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "severity", Value: "warning"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + }, + replicaLabels: []string{"replica"}, + }, + { + name: "alerts with missing replica labels", + rules: []*rulespb.Rule{ + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "1"}, + {Name: "label", Value: "foo"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "replica", Value: "2"}, + {Name: "label", Value: "foo"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "label", Value: "foo"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + }, + want: []*rulespb.Rule{ + rulespb.NewAlertingRule(&rulespb.Alert{ + Name: "a1", + Labels: labelpb.ZLabelSet{Labels: []labelpb.ZLabel{ + {Name: "label", Value: "foo"}, + }}, + LastEvaluation: time.Unix(1, 0), + }), + }, + replicaLabels: []string{"replica"}, + }, } { t.Run(tc.name, func(t *testing.T) { replicaLabels := make(map[string]struct{})