Skip to content

Commit

Permalink
Store: improve index header reading performance by sorting values fir…
Browse files Browse the repository at this point in the history
…st (thanos-io#5588)

Signed-off-by: Xiaochao Dong (@damnever) <the.xcdong@gmail.com>

Signed-off-by: Xiaochao Dong (@damnever) <the.xcdong@gmail.com>
  • Loading branch information
damnever authored and GiedriusS committed Aug 25, 2022
1 parent 9632157 commit d960d0e
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#5451](https://github.com/thanos-io/thanos/pull/5451) Azure: Reduce memory usage by not buffering file downloads entirely in memory.
- [#5484](https://github.com/thanos-io/thanos/pull/5484) Update Prometheus deps to v2.36.2.
- [#5511](https://github.com/thanos-io/thanos/pull/5511) Update Prometheus deps to v2.37.0.
- [#5588](https://github.com/thanos-io/thanos/pull/5588) Store: improve index header reading performance by sorting values first.

### Removed

Expand Down
17 changes: 11 additions & 6 deletions pkg/store/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -1976,13 +1976,18 @@ func checkNilPosting(l labels.Label, p index.Postings) index.Postings {

// NOTE: Derived from tsdb.postingsForMatcher. index.Merge is equivalent to map duplication.
func toPostingGroup(lvalsFn func(name string) ([]string, error), m *labels.Matcher) (*postingGroup, error) {
if m.Type == labels.MatchRegexp && len(findSetMatches(m.Value)) > 0 {
vals := findSetMatches(m.Value)
toAdd := make([]labels.Label, 0, len(vals))
for _, val := range vals {
toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val})
if m.Type == labels.MatchRegexp {
if vals := findSetMatches(m.Value); len(vals) > 0 {
// Sorting will improve the performance dramatically if the dataset is relatively large
// since entries in the postings offset table was sorted by label name and value,
// the sequential reading is much faster.
sort.Strings(vals)
toAdd := make([]labels.Label, 0, len(vals))
for _, val := range vals {
toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val})
}
return newPostingGroup(false, toAdd, nil), nil
}
return newPostingGroup(false, toAdd, nil), nil
}

// If the matcher selects an empty value, it selects all the series which don't
Expand Down
27 changes: 22 additions & 5 deletions pkg/store/bucket_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"regexp"
"sort"
"strconv"
"strings"
"sync"
"testing"
"time"
Expand Down Expand Up @@ -1113,14 +1114,16 @@ func appendTestData(t testing.TB, app storage.Appender, series int) {
}

series = series / 5
uniq := 0
for n := 0; n < 10; n++ {
for i := 0; i < series/10; i++ {
addSeries(labels.FromStrings("i", strconv.Itoa(i)+storetestutil.LabelLongSuffix, "n", strconv.Itoa(n)+storetestutil.LabelLongSuffix, "j", "foo"))
addSeries(labels.FromStrings("i", strconv.Itoa(i)+storetestutil.LabelLongSuffix, "n", strconv.Itoa(n)+storetestutil.LabelLongSuffix, "j", "foo", "uniq", fmt.Sprintf("%08d", uniq)))
// Have some series that won't be matched, to properly test inverted matches.
addSeries(labels.FromStrings("i", strconv.Itoa(i)+storetestutil.LabelLongSuffix, "n", strconv.Itoa(n)+storetestutil.LabelLongSuffix, "j", "bar"))
addSeries(labels.FromStrings("i", strconv.Itoa(i)+storetestutil.LabelLongSuffix, "n", "0_"+strconv.Itoa(n)+storetestutil.LabelLongSuffix, "j", "bar"))
addSeries(labels.FromStrings("i", strconv.Itoa(i)+storetestutil.LabelLongSuffix, "n", "1_"+strconv.Itoa(n)+storetestutil.LabelLongSuffix, "j", "bar"))
addSeries(labels.FromStrings("i", strconv.Itoa(i)+storetestutil.LabelLongSuffix, "n", "2_"+strconv.Itoa(n)+storetestutil.LabelLongSuffix, "j", "foo"))
addSeries(labels.FromStrings("i", strconv.Itoa(i)+storetestutil.LabelLongSuffix, "n", strconv.Itoa(n)+storetestutil.LabelLongSuffix, "j", "bar", "uniq", fmt.Sprintf("%08d", uniq+1)))
addSeries(labels.FromStrings("i", strconv.Itoa(i)+storetestutil.LabelLongSuffix, "n", "0_"+strconv.Itoa(n)+storetestutil.LabelLongSuffix, "j", "bar", "uniq", fmt.Sprintf("%08d", uniq+2)))
addSeries(labels.FromStrings("i", strconv.Itoa(i)+storetestutil.LabelLongSuffix, "n", "1_"+strconv.Itoa(n)+storetestutil.LabelLongSuffix, "j", "bar", "uniq", fmt.Sprintf("%08d", uniq+3)))
addSeries(labels.FromStrings("i", strconv.Itoa(i)+storetestutil.LabelLongSuffix, "n", "2_"+strconv.Itoa(n)+storetestutil.LabelLongSuffix, "j", "foo", "uniq", fmt.Sprintf("%08d", uniq+4)))
uniq += 5
}
}
testutil.Ok(t, app.Commit())
Expand Down Expand Up @@ -1161,6 +1164,19 @@ func benchmarkExpandedPostings(
iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "n", "2"+storetestutil.LabelLongSuffix)
iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$")
iRegexSet := labels.MustNewMatcher(labels.MatchRegexp, "i", "0"+storetestutil.LabelLongSuffix+"|1"+storetestutil.LabelLongSuffix+"|2"+storetestutil.LabelLongSuffix)
bigValueSetSize := series / 10
if bigValueSetSize > 50000 {
bigValueSetSize = 50000
}
bigValueSet := make([]string, 0, bigValueSetSize)
for i := 0; i < series; i += series / bigValueSetSize {
bigValueSet = append(bigValueSet, fmt.Sprintf("%08d", i))
}
bigValueSetSize = len(bigValueSet)
rand.New(rand.NewSource(time.Now().UnixNano())).Shuffle(len(bigValueSet), func(i, j int) {
bigValueSet[i], bigValueSet[j] = bigValueSet[j], bigValueSet[i]
})
iRegexBigValueSet := labels.MustNewMatcher(labels.MatchRegexp, "uniq", strings.Join(bigValueSet, "|"))

series = series / 5
cases := []struct {
Expand All @@ -1186,6 +1202,7 @@ func benchmarkExpandedPostings(
{`n="1",i=~".+",i!="2",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2, jFoo}, int(float64(series) * 0.1)},
{`n="1",i=~".+",i!~"2.*",j="foo"`, []*labels.Matcher{n1, iPlus, iNot2Star, jFoo}, int(1 + float64(series)*0.088888)},
{`i=~"0|1|2"`, []*labels.Matcher{iRegexSet}, 150}, // 50 series for "1", 50 for "2" and 50 for "3".
{`uniq=~"9|random-shuffled-values|1"`, []*labels.Matcher{iRegexBigValueSet}, bigValueSetSize},
}

for _, c := range cases {
Expand Down

0 comments on commit d960d0e

Please sign in to comment.