Skip to content

Commit

Permalink
SortTags deduplicates, uses slices.SortStable
Browse files Browse the repository at this point in the history
golang.org/x/exp/slices has comparable allocation-free sorting,
which we can use while reducing our code.

Neither Datadog nor Prometheus appear to retain or handle
multiple tags with the same name on the same metric,
so this package deduplicates at time of sort (latest tag wins).

If any metric systems we use do tolerate and provide useful behavior
when provided duplicate tag/label names, and the practice is justified,
that would be a reason to revert or adjust this deduplication behavior.
  • Loading branch information
extemporalgenome committed Jul 16, 2023
1 parent d11a18e commit d65d2b2
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 33 deletions.
6 changes: 1 addition & 5 deletions engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func (eng *Engine) WithPrefix(prefix string, tags ...Tag) *Engine {
return &Engine{
Handler: eng.Handler,
Prefix: eng.makeName(prefix),
Tags: eng.makeTags(tags),
Tags: mergeTags(eng.Tags, tags),
}
}

Expand Down Expand Up @@ -170,10 +170,6 @@ func (eng *Engine) makeName(name string) string {
return concat(eng.Prefix, name)
}

func (eng *Engine) makeTags(tags []Tag) []Tag {
return SortTags(concatTags(eng.Tags, tags))
}

var measureArrayPool = sync.Pool{
New: func() interface{} { return new([1]Measure) },
}
Expand Down
45 changes: 39 additions & 6 deletions tag.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,54 @@ func SortTags(tags []Tag) []Tag {
// For 20 or fewer tags, this is as fast as an unstable sort.
slices.SortStableFunc(tags, tagIsLess)

return tags
return deduplicateTags(tags)
}

func tagIsLess(a, b Tag) bool { return a.Name < b.Name }

func deduplicateTags(tags []Tag) []Tag {
var prev string
out := tags[:0]

for _, tag := range tags {
switch {
case tag.Name == "":
// Ignore unnamed tags.
continue

case tag.Name != prev:
// Non-duplicate tag: keep.
prev = tag.Name
out = append(out, tag)

default:
// Duplicate tag: replace previous, same-named tag.
i := len(out) - 1
out[i] = tag
}
}

if len(out) == 0 {
// No input tags had non-empty names:
// return nil to be consistent for ease of testing.
return nil
}

func concatTags(t1 []Tag, t2 []Tag) []Tag {
return out
}

// mergeTags returns the sorted, deduplicated-by-name union of t1 and t2.
func mergeTags(t1, t2 []Tag) []Tag {
n := len(t1) + len(t2)
if n == 0 {
return nil
}
t3 := make([]Tag, 0, n)
t3 = append(t3, t1...)
t3 = append(t3, t2...)
return t3

out := make([]Tag, 0, n)
out = append(out, t1...)
out = append(out, t2...)

return SortTags(out)
}

func copyTags(tags []Tag) []Tag {
Expand Down
103 changes: 81 additions & 22 deletions tag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ func Test_copyTags(t *testing.T) {

for _, test := range tests {
t.Run("", func(t *testing.T) {
if tags := copyTags(test.t1); !reflect.DeepEqual(tags, test.t2) {
tags := copyTags(test.t1)
if !reflect.DeepEqual(tags, test.t2) {
t.Errorf("copyTags => %#v != %#v", tags, test.t2)
}
})
Expand All @@ -43,41 +44,58 @@ func Test_mergeTags(t *testing.T) {
t.Parallel()

tests := []struct {
t1 []Tag
t2 []Tag
t3 []Tag
name string
t1, t2, t3 []Tag
}{
{
t1: nil,
t2: nil,
t3: nil,
name: "nil_inputs",
t1: nil,
t2: nil,
t3: nil,
},
{
t1: []Tag{},
t2: []Tag{},
t3: nil,
name: "empty_inputs",
t1: []Tag{},
t2: []Tag{},
t3: nil,
},
{
t1: []Tag{{"A", "1"}},
t2: nil,
t3: []Tag{{"A", "1"}},
name: "second_empty_input",
t1: []Tag{{"A", "1"}},
t2: nil,
t3: []Tag{{"A", "1"}},
},
{
t1: nil,
t2: []Tag{{"B", "2"}},
t3: []Tag{{"B", "2"}},
name: "first_empty_input",
t1: nil,
t2: []Tag{{"B", "2"}},
t3: []Tag{{"B", "2"}},
},
{
name: "non_duplicated_inputs",
t1: []Tag{{"A", "1"}},
t2: []Tag{{"B", "2"}},
t3: []Tag{{"A", "1"}, {"B", "2"}},
},
{
t1: []Tag{{"A", "1"}},
t2: []Tag{{"B", "2"}},
t3: []Tag{{"A", "1"}, {"B", "2"}},
name: "cross_duplicated_inputs",
t1: []Tag{{"A", "1"}},
t2: []Tag{{"A", "2"}},
t3: []Tag{{"A", "2"}},
},
{
name: "self_duplicated_input",
t1: []Tag{{"A", "2"}, {"A", "1"}},
t2: nil,
t3: []Tag{{"A", "1"}},
},
}

for _, test := range tests {
t.Run("", func(t *testing.T) {
if tags := concatTags(test.t1, test.t2); !reflect.DeepEqual(tags, test.t3) {
t.Errorf("concatTags => %#v != %#v", tags, test.t3)
t.Run(test.name, func(t *testing.T) {
tags := mergeTags(test.t1, test.t2)
if !reflect.DeepEqual(tags, test.t3) {
t.Errorf("mergeTags => %v != %v", tags, test.t3)
}
})
}
Expand Down Expand Up @@ -329,3 +347,44 @@ func Benchmark_tagsBuffer_sort_unsorted(b *testing.B) {
buf.sort()
}
}

func Benchmark_mergeTags(b *testing.B) {
b.ReportAllocs()

origT1 := []Tag{
{"hello", "world"},
{"answer", "42"},
{"some long tag name", "!"},
{"some longer tag name", "1234"},
{"A", ""},
{"B", ""},
{"C", ""},
{"hello", "world"},
{"answer", "42"},
{"some long tag name", "!"},
}

origT2 := []Tag{
{"some longer tag name", "1234"},
{"A", ""},
{"B", ""},
{"C", ""},
{"hello", "world"},
{"answer", "42"},
{"some long tag name", "!"},
{"some longer tag name", "1234"},
{"A", ""},
{"B", ""},
{"C", ""},
}

t1 := make([]Tag, len(origT1))
t2 := make([]Tag, len(origT2))

for i := 0; i < b.N; i++ {
copy(t1, origT1)
copy(t2, origT2)

_ = mergeTags(t1, t2)
}
}

0 comments on commit d65d2b2

Please sign in to comment.