From d65d2b211f908dafc8ee0da2d0a4b2bd658a125c Mon Sep 17 00:00:00 2001 From: Kevin Gillette Date: Sat, 15 Jul 2023 16:58:29 -0600 Subject: [PATCH] SortTags deduplicates, uses slices.SortStable golang.org/x/exp/slices has comparable allocation-free sorting, which we can use while reducing our code. Neither Datadog nor Prometheus appear to retain or handle multiple tags with the same name on the same metric, so this package deduplicates at time of sort (latest tag wins). If any metric systems we use do tolerate and provide useful behavior when provided duplicate tag/label names, and the practice is justified, that would be a reason to revert or adjust this deduplication behavior. --- engine.go | 6 +-- tag.go | 45 ++++++++++++++++++++--- tag_test.go | 103 +++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 121 insertions(+), 33 deletions(-) diff --git a/engine.go b/engine.go index 292b504..15f049c 100644 --- a/engine.go +++ b/engine.go @@ -68,7 +68,7 @@ func (eng *Engine) WithPrefix(prefix string, tags ...Tag) *Engine { return &Engine{ Handler: eng.Handler, Prefix: eng.makeName(prefix), - Tags: eng.makeTags(tags), + Tags: mergeTags(eng.Tags, tags), } } @@ -170,10 +170,6 @@ func (eng *Engine) makeName(name string) string { return concat(eng.Prefix, name) } -func (eng *Engine) makeTags(tags []Tag) []Tag { - return SortTags(concatTags(eng.Tags, tags)) -} - var measureArrayPool = sync.Pool{ New: func() interface{} { return new([1]Measure) }, } diff --git a/tag.go b/tag.go index c823724..f014290 100644 --- a/tag.go +++ b/tag.go @@ -47,21 +47,54 @@ func SortTags(tags []Tag) []Tag { // For 20 or fewer tags, this is as fast as an unstable sort. slices.SortStableFunc(tags, tagIsLess) - return tags + return deduplicateTags(tags) } func tagIsLess(a, b Tag) bool { return a.Name < b.Name } +func deduplicateTags(tags []Tag) []Tag { + var prev string + out := tags[:0] + + for _, tag := range tags { + switch { + case tag.Name == "": + // Ignore unnamed tags. + continue + + case tag.Name != prev: + // Non-duplicate tag: keep. + prev = tag.Name + out = append(out, tag) + + default: + // Duplicate tag: replace previous, same-named tag. + i := len(out) - 1 + out[i] = tag + } + } + + if len(out) == 0 { + // No input tags had non-empty names: + // return nil to be consistent for ease of testing. + return nil + } -func concatTags(t1 []Tag, t2 []Tag) []Tag { + return out +} + +// mergeTags returns the sorted, deduplicated-by-name union of t1 and t2. +func mergeTags(t1, t2 []Tag) []Tag { n := len(t1) + len(t2) if n == 0 { return nil } - t3 := make([]Tag, 0, n) - t3 = append(t3, t1...) - t3 = append(t3, t2...) - return t3 + + out := make([]Tag, 0, n) + out = append(out, t1...) + out = append(out, t2...) + + return SortTags(out) } func copyTags(tags []Tag) []Tag { diff --git a/tag_test.go b/tag_test.go index 0ccc053..22a4f77 100644 --- a/tag_test.go +++ b/tag_test.go @@ -32,7 +32,8 @@ func Test_copyTags(t *testing.T) { for _, test := range tests { t.Run("", func(t *testing.T) { - if tags := copyTags(test.t1); !reflect.DeepEqual(tags, test.t2) { + tags := copyTags(test.t1) + if !reflect.DeepEqual(tags, test.t2) { t.Errorf("copyTags => %#v != %#v", tags, test.t2) } }) @@ -43,41 +44,58 @@ func Test_mergeTags(t *testing.T) { t.Parallel() tests := []struct { - t1 []Tag - t2 []Tag - t3 []Tag + name string + t1, t2, t3 []Tag }{ { - t1: nil, - t2: nil, - t3: nil, + name: "nil_inputs", + t1: nil, + t2: nil, + t3: nil, }, { - t1: []Tag{}, - t2: []Tag{}, - t3: nil, + name: "empty_inputs", + t1: []Tag{}, + t2: []Tag{}, + t3: nil, }, { - t1: []Tag{{"A", "1"}}, - t2: nil, - t3: []Tag{{"A", "1"}}, + name: "second_empty_input", + t1: []Tag{{"A", "1"}}, + t2: nil, + t3: []Tag{{"A", "1"}}, }, { - t1: nil, - t2: []Tag{{"B", "2"}}, - t3: []Tag{{"B", "2"}}, + name: "first_empty_input", + t1: nil, + t2: []Tag{{"B", "2"}}, + t3: []Tag{{"B", "2"}}, + }, + { + name: "non_duplicated_inputs", + t1: []Tag{{"A", "1"}}, + t2: []Tag{{"B", "2"}}, + t3: []Tag{{"A", "1"}, {"B", "2"}}, }, { - t1: []Tag{{"A", "1"}}, - t2: []Tag{{"B", "2"}}, - t3: []Tag{{"A", "1"}, {"B", "2"}}, + name: "cross_duplicated_inputs", + t1: []Tag{{"A", "1"}}, + t2: []Tag{{"A", "2"}}, + t3: []Tag{{"A", "2"}}, + }, + { + name: "self_duplicated_input", + t1: []Tag{{"A", "2"}, {"A", "1"}}, + t2: nil, + t3: []Tag{{"A", "1"}}, }, } for _, test := range tests { - t.Run("", func(t *testing.T) { - if tags := concatTags(test.t1, test.t2); !reflect.DeepEqual(tags, test.t3) { - t.Errorf("concatTags => %#v != %#v", tags, test.t3) + t.Run(test.name, func(t *testing.T) { + tags := mergeTags(test.t1, test.t2) + if !reflect.DeepEqual(tags, test.t3) { + t.Errorf("mergeTags => %v != %v", tags, test.t3) } }) } @@ -329,3 +347,44 @@ func Benchmark_tagsBuffer_sort_unsorted(b *testing.B) { buf.sort() } } + +func Benchmark_mergeTags(b *testing.B) { + b.ReportAllocs() + + origT1 := []Tag{ + {"hello", "world"}, + {"answer", "42"}, + {"some long tag name", "!"}, + {"some longer tag name", "1234"}, + {"A", ""}, + {"B", ""}, + {"C", ""}, + {"hello", "world"}, + {"answer", "42"}, + {"some long tag name", "!"}, + } + + origT2 := []Tag{ + {"some longer tag name", "1234"}, + {"A", ""}, + {"B", ""}, + {"C", ""}, + {"hello", "world"}, + {"answer", "42"}, + {"some long tag name", "!"}, + {"some longer tag name", "1234"}, + {"A", ""}, + {"B", ""}, + {"C", ""}, + } + + t1 := make([]Tag, len(origT1)) + t2 := make([]Tag, len(origT2)) + + for i := 0; i < b.N; i++ { + copy(t1, origT1) + copy(t2, origT2) + + _ = mergeTags(t1, t2) + } +}