From 40bb6e7d3844b1ca10479d876093907436325ea2 Mon Sep 17 00:00:00 2001 From: Janos Date: Thu, 28 Jul 2022 09:55:20 +0200 Subject: [PATCH 01/14] Changing active series matches from []bool to map[int]bool for better memory performance --- pkg/ingester/activeseries/active_series.go | 2 +- pkg/ingester/activeseries/matchers.go | 9 +++-- pkg/ingester/activeseries/matchers_test.go | 47 ++++++++-------------- 3 files changed, 23 insertions(+), 35 deletions(-) diff --git a/pkg/ingester/activeseries/active_series.go b/pkg/ingester/activeseries/active_series.go index 27219d58900..582a6bd0fd0 100644 --- a/pkg/ingester/activeseries/active_series.go +++ b/pkg/ingester/activeseries/active_series.go @@ -49,7 +49,7 @@ type seriesStripe struct { type seriesEntry struct { lbs labels.Labels nanos *atomic.Int64 // Unix timestamp in nanoseconds. Needs to be a pointer because we don't store pointers to entries in the stripe. - matches []bool // Which matchers of Matchers does this series match + matches map[int]bool // Index of the matcher matching -> true map showing only matching matchers } func NewActiveSeries(asm *Matchers, timeout time.Duration) *ActiveSeries { diff --git a/pkg/ingester/activeseries/matchers.go b/pkg/ingester/activeseries/matchers.go index a276f077eb2..5b0bb6a9c13 100644 --- a/pkg/ingester/activeseries/matchers.go +++ b/pkg/ingester/activeseries/matchers.go @@ -35,13 +35,16 @@ func (m *Matchers) Config() CustomTrackersConfig { return m.cfg } -func (m *Matchers) Matches(series labels.Labels) []bool { +// Matches returns a map[int]bool containing only matching matchers with ordinal number of matcher -> true if matches +func (m *Matchers) Matches(series labels.Labels) map[int]bool { if len(m.matchers) == 0 { return nil } - matches := make([]bool, len(m.matchers)) + matches := map[int]bool{} for i, sm := range m.matchers { - matches[i] = sm.Matches(series) + if sm.Matches(series) { + matches[i] = true + } } return matches } diff --git a/pkg/ingester/activeseries/matchers_test.go b/pkg/ingester/activeseries/matchers_test.go index b693a4b02f6..877d0fe366c 100644 --- a/pkg/ingester/activeseries/matchers_test.go +++ b/pkg/ingester/activeseries/matchers_test.go @@ -22,60 +22,45 @@ func TestMatcher_MatchesSeries(t *testing.T) { for _, tc := range []struct { series labels.Labels - expected []bool + expected map[int]bool }{ { series: labels.Labels{{Name: "foo", Value: "true"}, {Name: "baz", Value: "unrelated"}}, - expected: []bool{ - false, // bar_starts_with_1 - false, // does_not_have_foo_label - false, // has_foo_and_bar_starts_with_1 - true, // has_foo_label + expected: map[int]bool{ + 3: true, // has_foo_label }, }, { series: labels.Labels{{Name: "foo", Value: "true"}, {Name: "bar", Value: "100"}, {Name: "baz", Value: "unrelated"}}, - expected: []bool{ - true, // bar_starts_with_1 - false, // does_not_have_foo_label - true, // has_foo_and_bar_starts_with_1 - true, // has_foo_label + expected: map[int]bool{ + 0: true, // bar_starts_with_1 + 2: true, // has_foo_and_bar_starts_with_1 + 3: true, // has_foo_label }, }, { series: labels.Labels{{Name: "foo", Value: "true"}, {Name: "bar", Value: "200"}, {Name: "baz", Value: "unrelated"}}, - expected: []bool{ - false, // bar_starts_with_1 - false, // does_not_have_foo_label - false, // has_foo_and_bar_starts_with_1 - true, // has_foo_label + expected: map[int]bool{ + 3: true, // has_foo_label }, }, { series: labels.Labels{{Name: "bar", Value: "200"}, {Name: "baz", Value: "unrelated"}}, - expected: []bool{ - false, // bar_starts_with_1 - true, // does_not_have_foo_label - false, // has_foo_and_bar_starts_with_1 - false, // has_foo_label + expected: map[int]bool{ + 1: true, // does_not_have_foo_label }, }, { series: labels.Labels{{Name: "bar", Value: "100"}, {Name: "baz", Value: "unrelated"}}, - expected: []bool{ - true, // bar_starts_with_1 - true, // does_not_have_foo_label - false, // has_foo_and_bar_starts_with_1 - false, // has_foo_label + expected: map[int]bool{ + 0: true, // bar_starts_with_1 + 1: true, // does_not_have_foo_label }, }, { series: labels.Labels{{Name: "baz", Value: "unrelated"}}, - expected: []bool{ - false, // bar_starts_with_1 - true, // does_not_have_foo_label - false, // has_foo_and_bar_starts_with_1 - false, // has_foo_label + expected: map[int]bool{ + 1: true, // does_not_have_foo_label }, }, } { From 3067b747886a2bb52814e97fbac5acee946b8218 Mon Sep 17 00:00:00 2001 From: Janos Date: Thu, 28 Jul 2022 16:29:04 +0200 Subject: [PATCH 02/14] Changing map[int]bool to int slice --- pkg/ingester/activeseries/active_series.go | 20 +++++--------- pkg/ingester/activeseries/matchers.go | 8 +++--- pkg/ingester/activeseries/matchers_test.go | 32 +++++++++++----------- 3 files changed, 27 insertions(+), 33 deletions(-) diff --git a/pkg/ingester/activeseries/active_series.go b/pkg/ingester/activeseries/active_series.go index 582a6bd0fd0..a2966ddc7c2 100644 --- a/pkg/ingester/activeseries/active_series.go +++ b/pkg/ingester/activeseries/active_series.go @@ -49,7 +49,7 @@ type seriesStripe struct { type seriesEntry struct { lbs labels.Labels nanos *atomic.Int64 // Unix timestamp in nanoseconds. Needs to be a pointer because we don't store pointers to entries in the stripe. - matches map[int]bool // Index of the matcher matching -> true map showing only matching matchers + matches []int // Index of the matcher matching } func NewActiveSeries(asm *Matchers, timeout time.Duration) *ActiveSeries { @@ -193,10 +193,8 @@ func (s *seriesStripe) findOrCreateEntryForSeries(fingerprint uint64, series lab matches := s.matchers.Matches(series) s.active++ - for i, ok := range matches { - if ok { - s.activeMatching[i]++ - } + for _, matcherIdx := range matches { + s.activeMatching[matcherIdx]++ } e := seriesEntry{ @@ -260,10 +258,8 @@ func (s *seriesStripe) purge(keepUntil time.Time) { } s.active++ - for i, ok := range entries[0].matches { - if ok { - s.activeMatching[i]++ - } + for _, matcherIdx := range entries[0].matches { + s.activeMatching[matcherIdx]++ } if ts < oldest { oldest = ts @@ -292,10 +288,8 @@ func (s *seriesStripe) purge(keepUntil time.Time) { } else { s.active += cnt for i := range entries { - for i, ok := range entries[i].matches { - if ok { - s.activeMatching[i]++ - } + for _, matcherIdx := range entries[i].matches { + s.activeMatching[matcherIdx]++ } } diff --git a/pkg/ingester/activeseries/matchers.go b/pkg/ingester/activeseries/matchers.go index 5b0bb6a9c13..e3cfc52343e 100644 --- a/pkg/ingester/activeseries/matchers.go +++ b/pkg/ingester/activeseries/matchers.go @@ -35,15 +35,15 @@ func (m *Matchers) Config() CustomTrackersConfig { return m.cfg } -// Matches returns a map[int]bool containing only matching matchers with ordinal number of matcher -> true if matches -func (m *Matchers) Matches(series labels.Labels) map[int]bool { +// Matches returns a []int containing only matcher indexes which are matching +func (m *Matchers) Matches(series labels.Labels) []int { if len(m.matchers) == 0 { return nil } - matches := map[int]bool{} + matches := []int{} for i, sm := range m.matchers { if sm.Matches(series) { - matches[i] = true + matches = append(matches, i) } } return matches diff --git a/pkg/ingester/activeseries/matchers_test.go b/pkg/ingester/activeseries/matchers_test.go index 877d0fe366c..b855284e5a5 100644 --- a/pkg/ingester/activeseries/matchers_test.go +++ b/pkg/ingester/activeseries/matchers_test.go @@ -22,45 +22,45 @@ func TestMatcher_MatchesSeries(t *testing.T) { for _, tc := range []struct { series labels.Labels - expected map[int]bool + expected []int }{ { series: labels.Labels{{Name: "foo", Value: "true"}, {Name: "baz", Value: "unrelated"}}, - expected: map[int]bool{ - 3: true, // has_foo_label + expected: []int{ + 3, // has_foo_label }, }, { series: labels.Labels{{Name: "foo", Value: "true"}, {Name: "bar", Value: "100"}, {Name: "baz", Value: "unrelated"}}, - expected: map[int]bool{ - 0: true, // bar_starts_with_1 - 2: true, // has_foo_and_bar_starts_with_1 - 3: true, // has_foo_label + expected: []int{ + 0, // bar_starts_with_1 + 2, // has_foo_and_bar_starts_with_1 + 3, // has_foo_label }, }, { series: labels.Labels{{Name: "foo", Value: "true"}, {Name: "bar", Value: "200"}, {Name: "baz", Value: "unrelated"}}, - expected: map[int]bool{ - 3: true, // has_foo_label + expected: []int{ + 3, // has_foo_label }, }, { series: labels.Labels{{Name: "bar", Value: "200"}, {Name: "baz", Value: "unrelated"}}, - expected: map[int]bool{ - 1: true, // does_not_have_foo_label + expected: []int{ + 1, // does_not_have_foo_label }, }, { series: labels.Labels{{Name: "bar", Value: "100"}, {Name: "baz", Value: "unrelated"}}, - expected: map[int]bool{ - 0: true, // bar_starts_with_1 - 1: true, // does_not_have_foo_label + expected: []int{ + 0, // bar_starts_with_1 + 1, // does_not_have_foo_label }, }, { series: labels.Labels{{Name: "baz", Value: "unrelated"}}, - expected: map[int]bool{ - 1: true, // does_not_have_foo_label + expected: []int{ + 1, // does_not_have_foo_label }, }, } { From 757f2282e7c31d93bac03512d65792b85daaefe2 Mon Sep 17 00:00:00 2001 From: Janos Date: Mon, 1 Aug 2022 15:07:32 +0200 Subject: [PATCH 03/14] Changing slice allocation based on Oleg's suggestion --- pkg/ingester/activeseries/matchers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ingester/activeseries/matchers.go b/pkg/ingester/activeseries/matchers.go index e3cfc52343e..94d8e9c341b 100644 --- a/pkg/ingester/activeseries/matchers.go +++ b/pkg/ingester/activeseries/matchers.go @@ -40,7 +40,7 @@ func (m *Matchers) Matches(series labels.Labels) []int { if len(m.matchers) == 0 { return nil } - matches := []int{} + var matches []int for i, sm := range m.matchers { if sm.Matches(series) { matches = append(matches, i) From 72ad043e17b8e64bbc0c57d61f69312e247c53e9 Mon Sep 17 00:00:00 2001 From: Janos Date: Fri, 29 Jul 2022 12:28:44 +0200 Subject: [PATCH 04/14] Benchmark --- pkg/ingester/activeseries/matchers_test.go | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/pkg/ingester/activeseries/matchers_test.go b/pkg/ingester/activeseries/matchers_test.go index b855284e5a5..3e766240550 100644 --- a/pkg/ingester/activeseries/matchers_test.go +++ b/pkg/ingester/activeseries/matchers_test.go @@ -3,6 +3,8 @@ package activeseries import ( + "fmt" + "strconv" "testing" "github.com/stretchr/testify/require" @@ -71,6 +73,48 @@ func TestMatcher_MatchesSeries(t *testing.T) { } } +func BenchmarkMatchesSeries(b *testing.B) { + + trackerCounts := []int{10, 100, 1000, 10000} + asms := make([]*Matchers, len(trackerCounts)) + + for i, matcherCount := range trackerCounts { + configMap := map[string]string{} + for j := 0; j < matcherCount; j++ { + configMap[strconv.Itoa(j)] = fmt.Sprintf("{grafanacloud_usage_group=~%d.*}", j) + } + config, _ := NewCustomTrackersConfig(configMap) + asms[i] = NewMatchers(config) + + } + + labelCounts := []int{1, 10, 100} + series := make([]labels.Labels, len(labelCounts)) + for i, labelCount := range labelCounts { + l := labels.Labels{ + {Name: "grafanacloud_usage_group", Value: "1"}, // going to match exactly to one matcher + } + for j := 1; j < labelCount; j++ { + labelEntry := labels.Label{Name: fmt.Sprintf("foo%d", j), Value: "true"} + l = append(l, labelEntry) + } + series[i] = l + } + + for i, trackerCount := range trackerCounts { + for j, labelCount := range labelCounts { + b.Run(fmt.Sprintf("TrackerCount: %d, LabelCount: %d", trackerCount, labelCount), func(b *testing.B) { + for x := 0; x < b.N; x++ { + got := asms[i].Matches(series[j]) + if len(got) > 2 { + b.FailNow() + } + } + }) + } + } +} + func TestCustomTrackersConfigs_MalformedMatcher(t *testing.T) { for _, matcher := range []string{ `{foo}`, From 10c353cc9751a0d53cbc3c496217813f35897717 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Tue, 2 Aug 2022 10:43:19 +0200 Subject: [PATCH 05/14] PoC: fixed size backed array slice for matchers Signed-off-by: Oleg Zaytsev --- pkg/ingester/activeseries/active_series.go | 17 ++++++---- pkg/ingester/activeseries/matchers.go | 38 +++++++++++++++++++--- pkg/ingester/activeseries/matchers_test.go | 12 +++++-- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/pkg/ingester/activeseries/active_series.go b/pkg/ingester/activeseries/active_series.go index a2966ddc7c2..dfe91b478c9 100644 --- a/pkg/ingester/activeseries/active_series.go +++ b/pkg/ingester/activeseries/active_series.go @@ -49,7 +49,7 @@ type seriesStripe struct { type seriesEntry struct { lbs labels.Labels nanos *atomic.Int64 // Unix timestamp in nanoseconds. Needs to be a pointer because we don't store pointers to entries in the stripe. - matches []int // Index of the matcher matching + matches fixedSlice // Index of the matcher matching } func NewActiveSeries(asm *Matchers, timeout time.Duration) *ActiveSeries { @@ -191,10 +191,11 @@ func (s *seriesStripe) findOrCreateEntryForSeries(fingerprint uint64, series lab } matches := s.matchers.Matches(series) + matchesLen := matches.len() s.active++ - for _, matcherIdx := range matches { - s.activeMatching[matcherIdx]++ + for i := 0; i < matchesLen; i++ { + s.activeMatching[matches.get(i)]++ } e := seriesEntry{ @@ -258,8 +259,9 @@ func (s *seriesStripe) purge(keepUntil time.Time) { } s.active++ - for _, matcherIdx := range entries[0].matches { - s.activeMatching[matcherIdx]++ + ml := entries[0].matches.len() + for i := 0; i < ml; i++ { + s.activeMatching[entries[0].matches.get(i)]++ } if ts < oldest { oldest = ts @@ -288,8 +290,9 @@ func (s *seriesStripe) purge(keepUntil time.Time) { } else { s.active += cnt for i := range entries { - for _, matcherIdx := range entries[i].matches { - s.activeMatching[matcherIdx]++ + ml := entries[i].matches.len() + for i := 0; i < ml; i++ { + s.activeMatching[entries[i].matches.get(i)]++ } } diff --git a/pkg/ingester/activeseries/matchers.go b/pkg/ingester/activeseries/matchers.go index 94d8e9c341b..039f3f2212a 100644 --- a/pkg/ingester/activeseries/matchers.go +++ b/pkg/ingester/activeseries/matchers.go @@ -35,15 +35,15 @@ func (m *Matchers) Config() CustomTrackersConfig { return m.cfg } -// Matches returns a []int containing only matcher indexes which are matching -func (m *Matchers) Matches(series labels.Labels) []int { +// Matches returns a fixedSlice containing only matcher indexes which are matching +func (m *Matchers) Matches(series labels.Labels) fixedSlice { if len(m.matchers) == 0 { - return nil + return fixedSlice{} } - var matches []int + var matches fixedSlice for i, sm := range m.matchers { if sm.Matches(series) { - matches = append(matches, i) + matches.append(i) } } return matches @@ -80,3 +80,31 @@ func amlabelMatcherToProm(m *amlabels.Matcher) *labels.Matcher { // labels.MatchType(m.Type) is a risky conversion because it depends on the iota order, but we have a test for it return labels.MustNewMatcher(labels.MatchType(m.Type), m.Name, m.Value) } + +const fixedSliceSize = 4 + +type fixedSlice struct { + arr [fixedSliceSize]int + arrl int + rest []int +} + +func (fs *fixedSlice) append(val int) { + if fs.arrl < fixedSliceSize { + fs.arr[fs.arrl] = val + fs.arrl++ + return + } + fs.rest = append(fs.rest, val) +} + +func (fs *fixedSlice) get(idx int) int { + if idx < fixedSliceSize { + return fs.arr[idx] + } + return fs.rest[idx-fixedSliceSize] +} + +func (fs *fixedSlice) len() int { + return fs.arrl + len(fs.rest) +} diff --git a/pkg/ingester/activeseries/matchers_test.go b/pkg/ingester/activeseries/matchers_test.go index 3e766240550..94bb2c7c3e0 100644 --- a/pkg/ingester/activeseries/matchers_test.go +++ b/pkg/ingester/activeseries/matchers_test.go @@ -68,11 +68,19 @@ func TestMatcher_MatchesSeries(t *testing.T) { } { t.Run(tc.series.String(), func(t *testing.T) { got := asm.Matches(tc.series) - assert.Equal(t, tc.expected, got) + assert.Equal(t, tc.expected, fixedSliceToSlice(got)) }) } } +func fixedSliceToSlice(fixed fixedSlice) []int { + slice := make([]int, fixed.len()) + for i := 0; i < fixed.len(); i++ { + slice[i] = fixed.get(i) + } + return slice +} + func BenchmarkMatchesSeries(b *testing.B) { trackerCounts := []int{10, 100, 1000, 10000} @@ -106,7 +114,7 @@ func BenchmarkMatchesSeries(b *testing.B) { b.Run(fmt.Sprintf("TrackerCount: %d, LabelCount: %d", trackerCount, labelCount), func(b *testing.B) { for x := 0; x < b.N; x++ { got := asms[i].Matches(series[j]) - if len(got) > 2 { + if got.len() > 2 { b.FailNow() } } From b152483b4faf43a13a7ec7431585556c8641f427 Mon Sep 17 00:00:00 2001 From: Janos Date: Tue, 18 Oct 2022 15:08:30 +0200 Subject: [PATCH 06/14] Adding Oleg's improvements as well --- pkg/ingester/activeseries/active_series.go | 6 ++-- pkg/ingester/activeseries/matchers.go | 34 +++++++++++----------- pkg/ingester/activeseries/matchers_test.go | 10 +++---- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/pkg/ingester/activeseries/active_series.go b/pkg/ingester/activeseries/active_series.go index dfe91b478c9..c8bbb3d8794 100644 --- a/pkg/ingester/activeseries/active_series.go +++ b/pkg/ingester/activeseries/active_series.go @@ -48,8 +48,8 @@ type seriesStripe struct { // seriesEntry holds a timestamp for single series. type seriesEntry struct { lbs labels.Labels - nanos *atomic.Int64 // Unix timestamp in nanoseconds. Needs to be a pointer because we don't store pointers to entries in the stripe. - matches fixedSlice // Index of the matcher matching + nanos *atomic.Int64 // Unix timestamp in nanoseconds. Needs to be a pointer because we don't store pointers to entries in the stripe. + matches preAllocDynamicSlice // Index of the matcher matching } func NewActiveSeries(asm *Matchers, timeout time.Duration) *ActiveSeries { @@ -209,7 +209,7 @@ func (s *seriesStripe) findOrCreateEntryForSeries(fingerprint uint64, series lab return e.nanos, true } -//nolint // Linter reports that this method is unused, but it is. +// nolint // Linter reports that this method is unused, but it is. func (s *seriesStripe) clear() { s.mu.Lock() defer s.mu.Unlock() diff --git a/pkg/ingester/activeseries/matchers.go b/pkg/ingester/activeseries/matchers.go index 039f3f2212a..4fa7fe19bb5 100644 --- a/pkg/ingester/activeseries/matchers.go +++ b/pkg/ingester/activeseries/matchers.go @@ -36,11 +36,11 @@ func (m *Matchers) Config() CustomTrackersConfig { } // Matches returns a fixedSlice containing only matcher indexes which are matching -func (m *Matchers) Matches(series labels.Labels) fixedSlice { +func (m *Matchers) Matches(series labels.Labels) preAllocDynamicSlice { if len(m.matchers) == 0 { - return fixedSlice{} + return preAllocDynamicSlice{} } - var matches fixedSlice + var matches preAllocDynamicSlice for i, sm := range m.matchers { if sm.Matches(series) { matches.append(i) @@ -81,30 +81,30 @@ func amlabelMatcherToProm(m *amlabels.Matcher) *labels.Matcher { return labels.MustNewMatcher(labels.MatchType(m.Type), m.Name, m.Value) } -const fixedSliceSize = 4 +const preAllocatedSize = 3 -type fixedSlice struct { - arr [fixedSliceSize]int - arrl int - rest []int +type preAllocDynamicSlice struct { + arr [preAllocatedSize]uint16 + arrl byte + rest []uint16 } -func (fs *fixedSlice) append(val int) { - if fs.arrl < fixedSliceSize { - fs.arr[fs.arrl] = val +func (fs *preAllocDynamicSlice) append(val int) { + if fs.arrl < preAllocatedSize { + fs.arr[fs.arrl] = uint16(val) fs.arrl++ return } - fs.rest = append(fs.rest, val) + fs.rest = append(fs.rest, uint16(val)) } -func (fs *fixedSlice) get(idx int) int { - if idx < fixedSliceSize { +func (fs *preAllocDynamicSlice) get(idx int) uint16 { + if idx < preAllocatedSize { return fs.arr[idx] } - return fs.rest[idx-fixedSliceSize] + return fs.rest[idx-preAllocatedSize] } -func (fs *fixedSlice) len() int { - return fs.arrl + len(fs.rest) +func (fs *preAllocDynamicSlice) len() int { + return int(fs.arrl) + len(fs.rest) } diff --git a/pkg/ingester/activeseries/matchers_test.go b/pkg/ingester/activeseries/matchers_test.go index 94bb2c7c3e0..7d1d33796d7 100644 --- a/pkg/ingester/activeseries/matchers_test.go +++ b/pkg/ingester/activeseries/matchers_test.go @@ -68,15 +68,15 @@ func TestMatcher_MatchesSeries(t *testing.T) { } { t.Run(tc.series.String(), func(t *testing.T) { got := asm.Matches(tc.series) - assert.Equal(t, tc.expected, fixedSliceToSlice(got)) + assert.Equal(t, tc.expected, preAllocDynamicSliceToSlice(got)) }) } } -func fixedSliceToSlice(fixed fixedSlice) []int { - slice := make([]int, fixed.len()) - for i := 0; i < fixed.len(); i++ { - slice[i] = fixed.get(i) +func preAllocDynamicSliceToSlice(prealloc preAllocDynamicSlice) []int { + slice := make([]int, prealloc.len()) + for i := 0; i < prealloc.len(); i++ { + slice[i] = int(prealloc.get(i)) } return slice } From 7cf417b83d82731f4f49da178ba4507f95777a8b Mon Sep 17 00:00:00 2001 From: Janos Date: Tue, 18 Oct 2022 17:13:44 +0200 Subject: [PATCH 07/14] Limiting the maximum number of trackers as 64k --- .../activeseries/custom_trackers_config.go | 5 +++ .../custom_trackers_config_test.go | 34 +++++++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/pkg/ingester/activeseries/custom_trackers_config.go b/pkg/ingester/activeseries/custom_trackers_config.go index 1ec78979f6d..6e749a3bccd 100644 --- a/pkg/ingester/activeseries/custom_trackers_config.go +++ b/pkg/ingester/activeseries/custom_trackers_config.go @@ -11,6 +11,8 @@ import ( "gopkg.in/yaml.v3" ) +const maxNumberOfTrackers = 64000 + // CustomTrackersConfig configures active series custom trackers. // It can be set using a flag, or parsed from yaml. type CustomTrackersConfig struct { @@ -142,6 +144,9 @@ func (c CustomTrackersConfig) MarshalYAML() (interface{}, error) { func NewCustomTrackersConfig(m map[string]string) (c CustomTrackersConfig, err error) { c.source = m c.config = map[string]labelsMatchers{} + if len(m) > maxNumberOfTrackers { + return c, fmt.Errorf("the number of trackers set [%d] exceeds the maximum number of trackers [%d]", len(m), maxNumberOfTrackers) + } for name, matcher := range m { sm, err := amlabels.ParseMatchers(matcher) if err != nil { diff --git a/pkg/ingester/activeseries/custom_trackers_config_test.go b/pkg/ingester/activeseries/custom_trackers_config_test.go index 134c86806ef..e09c9c20bf0 100644 --- a/pkg/ingester/activeseries/custom_trackers_config_test.go +++ b/pkg/ingester/activeseries/custom_trackers_config_test.go @@ -3,7 +3,9 @@ package activeseries import ( + "bytes" "flag" + "fmt" "testing" "github.com/pkg/errors" @@ -75,8 +77,8 @@ func TestCustomTrackersConfigs(t *testing.T) { expected: mustNewCustomTrackersConfigFromMap(t, map[string]string{`foo`: `{foo="bar"}`}), }, { - name: "whitespaces are trimmed from name and matcher", - flags: []string{`-ingester.active-series-custom-trackers= foo : {foo="bar"}` + "\n "}, + name: "whitespaces are trimmed from name and matcher", + flags: []string{`-ingester.active-series-custom-trackers= foo : {foo="bar"}` + "\n "}, expected: mustNewCustomTrackersConfigFromMap(t, map[string]string{`foo`: `{foo="bar"}`}), }, { @@ -125,6 +127,34 @@ func TestCustomTrackersConfigs(t *testing.T) { } } +func TestMaximumNumberOfTrackers(t *testing.T) { + t.Run("Flag based setup", func(t *testing.T) { + var flagToSet bytes.Buffer + numberOfTrackers := maxNumberOfTrackers + 1 + for i := 0; i < numberOfTrackers; i++ { + flagToSet.WriteString(fmt.Sprintf("name%d:{__name__=%d}", i, i)) + if i < numberOfTrackers-1 { + flagToSet.WriteString(";") + } + } + + c := CustomTrackersConfig{} + err := c.Set(flagToSet.String()) + require.Error(t, err, "custom tracker config should not accept more than %d trackers", maxNumberOfTrackers) + }) + + t.Run("Map based setup", func(t *testing.T) { + configMap := map[string]string{} + numberOfTrackers := maxNumberOfTrackers + 1 + for i := 0; i < numberOfTrackers; i++ { + configMap[fmt.Sprintf("name%d", i)] = fmt.Sprintf("{__name__=%d}", i) + } + + _, err := NewCustomTrackersConfig(configMap) + require.Error(t, err, "custom tracker config should not accept more than %d trackers", maxNumberOfTrackers) + }) +} + func TestCustomTrackerConfig_Equality(t *testing.T) { configSets := [][]CustomTrackersConfig{ { From ccccfd5eb1992eca0cbf76e463aaa890ec851001 Mon Sep 17 00:00:00 2001 From: Janos Date: Tue, 18 Oct 2022 17:25:50 +0200 Subject: [PATCH 08/14] Making linter happy --- pkg/ingester/activeseries/active_series.go | 2 +- .../activeseries/custom_trackers_config_test.go | 2 +- pkg/ingester/activeseries/matchers.go | 16 ++++++++-------- pkg/ingester/activeseries/matchers_test.go | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pkg/ingester/activeseries/active_series.go b/pkg/ingester/activeseries/active_series.go index c8bbb3d8794..94472dfe9de 100644 --- a/pkg/ingester/activeseries/active_series.go +++ b/pkg/ingester/activeseries/active_series.go @@ -49,7 +49,7 @@ type seriesStripe struct { type seriesEntry struct { lbs labels.Labels nanos *atomic.Int64 // Unix timestamp in nanoseconds. Needs to be a pointer because we don't store pointers to entries in the stripe. - matches preAllocDynamicSlice // Index of the matcher matching + matches PreAllocDynamicSlice // Index of the matcher matching } func NewActiveSeries(asm *Matchers, timeout time.Duration) *ActiveSeries { diff --git a/pkg/ingester/activeseries/custom_trackers_config_test.go b/pkg/ingester/activeseries/custom_trackers_config_test.go index e09c9c20bf0..410523944a2 100644 --- a/pkg/ingester/activeseries/custom_trackers_config_test.go +++ b/pkg/ingester/activeseries/custom_trackers_config_test.go @@ -78,7 +78,7 @@ func TestCustomTrackersConfigs(t *testing.T) { }, { name: "whitespaces are trimmed from name and matcher", - flags: []string{`-ingester.active-series-custom-trackers= foo : {foo="bar"}` + "\n "}, + flags: []string{`-ingester.active-series-custom-trackers= foo : {foo="bar"}` + "\n "}, expected: mustNewCustomTrackersConfigFromMap(t, map[string]string{`foo`: `{foo="bar"}`}), }, { diff --git a/pkg/ingester/activeseries/matchers.go b/pkg/ingester/activeseries/matchers.go index 4fa7fe19bb5..5b63d6230f2 100644 --- a/pkg/ingester/activeseries/matchers.go +++ b/pkg/ingester/activeseries/matchers.go @@ -35,12 +35,12 @@ func (m *Matchers) Config() CustomTrackersConfig { return m.cfg } -// Matches returns a fixedSlice containing only matcher indexes which are matching -func (m *Matchers) Matches(series labels.Labels) preAllocDynamicSlice { +// Matches returns a PreAllocDynamicSlice containing only matcher indexes which are matching +func (m *Matchers) Matches(series labels.Labels) PreAllocDynamicSlice { if len(m.matchers) == 0 { - return preAllocDynamicSlice{} + return PreAllocDynamicSlice{} } - var matches preAllocDynamicSlice + var matches PreAllocDynamicSlice for i, sm := range m.matchers { if sm.Matches(series) { matches.append(i) @@ -83,13 +83,13 @@ func amlabelMatcherToProm(m *amlabels.Matcher) *labels.Matcher { const preAllocatedSize = 3 -type preAllocDynamicSlice struct { +type PreAllocDynamicSlice struct { arr [preAllocatedSize]uint16 arrl byte rest []uint16 } -func (fs *preAllocDynamicSlice) append(val int) { +func (fs *PreAllocDynamicSlice) append(val int) { if fs.arrl < preAllocatedSize { fs.arr[fs.arrl] = uint16(val) fs.arrl++ @@ -98,13 +98,13 @@ func (fs *preAllocDynamicSlice) append(val int) { fs.rest = append(fs.rest, uint16(val)) } -func (fs *preAllocDynamicSlice) get(idx int) uint16 { +func (fs *PreAllocDynamicSlice) get(idx int) uint16 { if idx < preAllocatedSize { return fs.arr[idx] } return fs.rest[idx-preAllocatedSize] } -func (fs *preAllocDynamicSlice) len() int { +func (fs *PreAllocDynamicSlice) len() int { return int(fs.arrl) + len(fs.rest) } diff --git a/pkg/ingester/activeseries/matchers_test.go b/pkg/ingester/activeseries/matchers_test.go index 7d1d33796d7..3126758bb38 100644 --- a/pkg/ingester/activeseries/matchers_test.go +++ b/pkg/ingester/activeseries/matchers_test.go @@ -73,7 +73,7 @@ func TestMatcher_MatchesSeries(t *testing.T) { } } -func preAllocDynamicSliceToSlice(prealloc preAllocDynamicSlice) []int { +func preAllocDynamicSliceToSlice(prealloc PreAllocDynamicSlice) []int { slice := make([]int, prealloc.len()) for i := 0; i < prealloc.len(); i++ { slice[i] = int(prealloc.get(i)) From 779d0a8acf15ef5ea3399e66fa31cc87ef1e7bef Mon Sep 17 00:00:00 2001 From: Janos Date: Mon, 24 Oct 2022 09:12:36 +0200 Subject: [PATCH 09/14] Making preAllocDynamicSlice and also activeseries.mathches unexported --- pkg/ingester/activeseries/active_series.go | 4 ++-- pkg/ingester/activeseries/matchers.go | 16 ++++++++-------- pkg/ingester/activeseries/matchers_test.go | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pkg/ingester/activeseries/active_series.go b/pkg/ingester/activeseries/active_series.go index 705d3ccda7b..9c1dedf6b8e 100644 --- a/pkg/ingester/activeseries/active_series.go +++ b/pkg/ingester/activeseries/active_series.go @@ -49,7 +49,7 @@ type seriesStripe struct { type seriesEntry struct { lbs labels.Labels nanos *atomic.Int64 // Unix timestamp in nanoseconds. Needs to be a pointer because we don't store pointers to entries in the stripe. - matches PreAllocDynamicSlice // Index of the matcher matching + matches preAllocDynamicSlice // Index of the matcher matching } func NewActiveSeries(asm *Matchers, timeout time.Duration) *ActiveSeries { @@ -190,7 +190,7 @@ func (s *seriesStripe) findOrCreateEntryForSeries(fingerprint uint64, series lab } } - matches := s.matchers.Matches(series) + matches := s.matchers.matches(series) matchesLen := matches.len() s.active++ diff --git a/pkg/ingester/activeseries/matchers.go b/pkg/ingester/activeseries/matchers.go index 5b63d6230f2..910ca97cbf8 100644 --- a/pkg/ingester/activeseries/matchers.go +++ b/pkg/ingester/activeseries/matchers.go @@ -35,12 +35,12 @@ func (m *Matchers) Config() CustomTrackersConfig { return m.cfg } -// Matches returns a PreAllocDynamicSlice containing only matcher indexes which are matching -func (m *Matchers) Matches(series labels.Labels) PreAllocDynamicSlice { +// matches returns a PreAllocDynamicSlice containing only matcher indexes which are matching +func (m *Matchers) matches(series labels.Labels) preAllocDynamicSlice { if len(m.matchers) == 0 { - return PreAllocDynamicSlice{} + return preAllocDynamicSlice{} } - var matches PreAllocDynamicSlice + var matches preAllocDynamicSlice for i, sm := range m.matchers { if sm.Matches(series) { matches.append(i) @@ -83,13 +83,13 @@ func amlabelMatcherToProm(m *amlabels.Matcher) *labels.Matcher { const preAllocatedSize = 3 -type PreAllocDynamicSlice struct { +type preAllocDynamicSlice struct { arr [preAllocatedSize]uint16 arrl byte rest []uint16 } -func (fs *PreAllocDynamicSlice) append(val int) { +func (fs *preAllocDynamicSlice) append(val int) { if fs.arrl < preAllocatedSize { fs.arr[fs.arrl] = uint16(val) fs.arrl++ @@ -98,13 +98,13 @@ func (fs *PreAllocDynamicSlice) append(val int) { fs.rest = append(fs.rest, uint16(val)) } -func (fs *PreAllocDynamicSlice) get(idx int) uint16 { +func (fs *preAllocDynamicSlice) get(idx int) uint16 { if idx < preAllocatedSize { return fs.arr[idx] } return fs.rest[idx-preAllocatedSize] } -func (fs *PreAllocDynamicSlice) len() int { +func (fs *preAllocDynamicSlice) len() int { return int(fs.arrl) + len(fs.rest) } diff --git a/pkg/ingester/activeseries/matchers_test.go b/pkg/ingester/activeseries/matchers_test.go index 3126758bb38..6097a806219 100644 --- a/pkg/ingester/activeseries/matchers_test.go +++ b/pkg/ingester/activeseries/matchers_test.go @@ -67,13 +67,13 @@ func TestMatcher_MatchesSeries(t *testing.T) { }, } { t.Run(tc.series.String(), func(t *testing.T) { - got := asm.Matches(tc.series) + got := asm.matches(tc.series) assert.Equal(t, tc.expected, preAllocDynamicSliceToSlice(got)) }) } } -func preAllocDynamicSliceToSlice(prealloc PreAllocDynamicSlice) []int { +func preAllocDynamicSliceToSlice(prealloc preAllocDynamicSlice) []int { slice := make([]int, prealloc.len()) for i := 0; i < prealloc.len(); i++ { slice[i] = int(prealloc.get(i)) @@ -113,7 +113,7 @@ func BenchmarkMatchesSeries(b *testing.B) { for j, labelCount := range labelCounts { b.Run(fmt.Sprintf("TrackerCount: %d, LabelCount: %d", trackerCount, labelCount), func(b *testing.B) { for x := 0; x < b.N; x++ { - got := asms[i].Matches(series[j]) + got := asms[i].matches(series[j]) if got.len() > 2 { b.FailNow() } From f460b60372b74f8acbd8c5a3a8228f2209ce984c Mon Sep 17 00:00:00 2001 From: Janos Date: Mon, 24 Oct 2022 09:19:02 +0200 Subject: [PATCH 10/14] Commenting and cleaning up preAllocDynamicSlice --- pkg/ingester/activeseries/custom_trackers_config.go | 4 +++- pkg/ingester/activeseries/matchers.go | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pkg/ingester/activeseries/custom_trackers_config.go b/pkg/ingester/activeseries/custom_trackers_config.go index 6e749a3bccd..0982f5c805c 100644 --- a/pkg/ingester/activeseries/custom_trackers_config.go +++ b/pkg/ingester/activeseries/custom_trackers_config.go @@ -4,6 +4,7 @@ package activeseries import ( "fmt" + "math" "sort" "strings" @@ -11,7 +12,8 @@ import ( "gopkg.in/yaml.v3" ) -const maxNumberOfTrackers = 64000 +// preAllocDynamicSlice is using uint16 to represent custom tracker matches +const maxNumberOfTrackers = math.MaxUint16 // CustomTrackersConfig configures active series custom trackers. // It can be set using a flag, or parsed from yaml. diff --git a/pkg/ingester/activeseries/matchers.go b/pkg/ingester/activeseries/matchers.go index 910ca97cbf8..5d2820ad583 100644 --- a/pkg/ingester/activeseries/matchers.go +++ b/pkg/ingester/activeseries/matchers.go @@ -83,6 +83,10 @@ func amlabelMatcherToProm(m *amlabels.Matcher) *labels.Matcher { const preAllocatedSize = 3 +// preAllocDynamicSlice is a slice-like uint16 data structure that allocates space for the first `preAllocatedSize` elements. +// When more than `preAllocatedSize` elements are appended, it allocates a slice that escapes to heap. +// This trades in extra allocated space (2x more with `preAllocatedSize=3`) for zero-allocations in most of the cases, +// relying on the assumption that most of the matchers will not match more than `preAllocatedSize` trackers. type preAllocDynamicSlice struct { arr [preAllocatedSize]uint16 arrl byte From da93b2f6add4d3163e300e22312c0d023de7bd19 Mon Sep 17 00:00:00 2001 From: Janos Date: Mon, 24 Oct 2022 09:37:17 +0200 Subject: [PATCH 11/14] Failing the benchmark if the config fails to initizalize --- pkg/ingester/activeseries/matchers_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/ingester/activeseries/matchers_test.go b/pkg/ingester/activeseries/matchers_test.go index 6097a806219..81dd7ea4be5 100644 --- a/pkg/ingester/activeseries/matchers_test.go +++ b/pkg/ingester/activeseries/matchers_test.go @@ -91,7 +91,8 @@ func BenchmarkMatchesSeries(b *testing.B) { for j := 0; j < matcherCount; j++ { configMap[strconv.Itoa(j)] = fmt.Sprintf("{grafanacloud_usage_group=~%d.*}", j) } - config, _ := NewCustomTrackersConfig(configMap) + config, err := NewCustomTrackersConfig(configMap) + require.NoError(b, err) asms[i] = NewMatchers(config) } From 569306132506526acbd89c7cba167aebba57ca92 Mon Sep 17 00:00:00 2001 From: Janos Date: Mon, 24 Oct 2022 09:55:25 +0200 Subject: [PATCH 12/14] Updating changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e2962dc126..649e1719fc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ * [ENHANCEMENT] Ingester: Reduce activity tracker memory allocation. #3203 * [ENHANCEMENT] Query-frontend: Log more detailed information in the case of a failed query. #3190 * [ENHANCEMENT] Compactor: Add new `cortex_compactor_block_max_time_delta_seconds` histogram for detecting if compaction of blocks is lagging behind. #3240 +* [ENHANCEMENT] Reducing the memory footprint of active series custom trackers. #2568 * [BUGFIX] Flusher: Add `Overrides` as a dependency to prevent panics when starting with `-target=flusher`. #3151 ### Mixin From 26a4d6ccbe9aa3bb795d98e87904cdf6ddc53e02 Mon Sep 17 00:00:00 2001 From: Janos Gub Date: Mon, 24 Oct 2022 20:16:53 +0200 Subject: [PATCH 13/14] Update CHANGELOG.md Co-authored-by: Oleg Zaytsev --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 649e1719fc0..2967ad3a4a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ * [ENHANCEMENT] Ingester: Reduce activity tracker memory allocation. #3203 * [ENHANCEMENT] Query-frontend: Log more detailed information in the case of a failed query. #3190 * [ENHANCEMENT] Compactor: Add new `cortex_compactor_block_max_time_delta_seconds` histogram for detecting if compaction of blocks is lagging behind. #3240 -* [ENHANCEMENT] Reducing the memory footprint of active series custom trackers. #2568 +* [ENHANCEMENT] Ingester: reduced the memory footprint of active series custom trackers. #2568 * [BUGFIX] Flusher: Add `Overrides` as a dependency to prevent panics when starting with `-target=flusher`. #3151 ### Mixin From 9def4f39338406e8db5ba65be2c50f694f79d8e0 Mon Sep 17 00:00:00 2001 From: Janos Gub Date: Mon, 24 Oct 2022 20:22:15 +0200 Subject: [PATCH 14/14] Update pkg/ingester/activeseries/matchers_test.go Co-authored-by: Oleg Zaytsev --- pkg/ingester/activeseries/matchers_test.go | 50 ++++++++++++++-------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/pkg/ingester/activeseries/matchers_test.go b/pkg/ingester/activeseries/matchers_test.go index 81dd7ea4be5..a0343862454 100644 --- a/pkg/ingester/activeseries/matchers_test.go +++ b/pkg/ingester/activeseries/matchers_test.go @@ -82,42 +82,54 @@ func preAllocDynamicSliceToSlice(prealloc preAllocDynamicSlice) []int { } func BenchmarkMatchesSeries(b *testing.B) { - - trackerCounts := []int{10, 100, 1000, 10000} + trackerCounts := []int{10, 100, 1000} asms := make([]*Matchers, len(trackerCounts)) for i, matcherCount := range trackerCounts { configMap := map[string]string{} for j := 0; j < matcherCount; j++ { - configMap[strconv.Itoa(j)] = fmt.Sprintf("{grafanacloud_usage_group=~%d.*}", j) + configMap[strconv.Itoa(j)] = fmt.Sprintf(`{this_will_match_%d="true"}`, j) } config, err := NewCustomTrackersConfig(configMap) require.NoError(b, err) asms[i] = NewMatchers(config) - } - labelCounts := []int{1, 10, 100} - series := make([]labels.Labels, len(labelCounts)) - for i, labelCount := range labelCounts { - l := labels.Labels{ - {Name: "grafanacloud_usage_group", Value: "1"}, // going to match exactly to one matcher + makeLabels := func(total, matching int) labels.Labels { + if total < matching { + b.Fatal("wrong test setup, total < matching") + } + lbs := make(labels.Labels, 0, total) + for i := 0; i < matching; i++ { + lbs = append(lbs, labels.Label{Name: fmt.Sprintf("this_will_match_%d", i), Value: "true"}) } - for j := 1; j < labelCount; j++ { - labelEntry := labels.Label{Name: fmt.Sprintf("foo%d", j), Value: "true"} - l = append(l, labelEntry) + for i := matching; i < total; i++ { + lbs = append(lbs, labels.Label{Name: fmt.Sprintf("something_else_%d", i), Value: "true"}) } - series[i] = l + return lbs } for i, trackerCount := range trackerCounts { - for j, labelCount := range labelCounts { - b.Run(fmt.Sprintf("TrackerCount: %d, LabelCount: %d", trackerCount, labelCount), func(b *testing.B) { + for _, bc := range []struct { + total, matching int + }{ + {1, 0}, + {1, 1}, + {10, 1}, + {10, 2}, + {10, 5}, + {25, 1}, + {25, 2}, + {25, 5}, + {100, 1}, + {100, 2}, + {100, 5}, + } { + series := makeLabels(bc.total, bc.matching) + b.Run(fmt.Sprintf("TrackerCount: %d, Labels: %d, Matching: %d", trackerCount, bc.total, bc.matching), func(b *testing.B) { for x := 0; x < b.N; x++ { - got := asms[i].matches(series[j]) - if got.len() > 2 { - b.FailNow() - } + got := asms[i].matches(series) + require.Equal(b, bc.matching, got.len()) } }) }