Skip to content
This repository has been archived by the owner on Aug 13, 2019. It is now read-only.

Optimize queries using regex matchers for set lookups #602

Merged
merged 19 commits into from
May 27, 2019
113 changes: 113 additions & 0 deletions querier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"os"
"path/filepath"
"sort"
"strconv"
"testing"

"github.com/pkg/errors"
Expand Down Expand Up @@ -1691,6 +1692,118 @@ func BenchmarkQuerySeek(b *testing.B) {
}
}

func BenchmarkSetMatcher(b *testing.B) {
cases := []struct {
numBlocks int
numSeries int
numSamplesPerSeriesPerBlock int
setPattern string
regexPattern string
}{
{
numBlocks: 1,
numSeries: 15,
naivewong marked this conversation as resolved.
Show resolved Hide resolved
numSamplesPerSeriesPerBlock: 10,
setPattern: "^(?:1|2|3)$",
regexPattern: "1|2|3",
naivewong marked this conversation as resolved.
Show resolved Hide resolved
},
{
numBlocks: 1,
numSeries: 15,
numSamplesPerSeriesPerBlock: 10,
setPattern: "^(?:1|2|3|4|5|6|7|8|9|10)$",
regexPattern: "1|2|3|4|5|6|7|8|9|10",
},
{
numBlocks: 1,
numSeries: 200,
numSamplesPerSeriesPerBlock: 10,
setPattern: "^(?:1|2|3)$",
regexPattern: "1|2|3",
},
{
numBlocks: 1,
numSeries: 200,
numSamplesPerSeriesPerBlock: 10,
setPattern: "^(?:1|2|3|4|5|6|7|8|9|10)$",
regexPattern: "1|2|3|4|5|6|7|8|9|10",
},
}

for _, c := range cases {
dir, err := ioutil.TempDir("", "bench_postings_for_matchers")
testutil.Ok(b, err)
defer func() {
testutil.Ok(b, os.RemoveAll(dir))
}()

var (
blocks []*Block
prefilledLabels []map[string]string
generatedSeries []Series
)
for i := int64(0); i < int64(c.numBlocks); i++ {
mint := i * int64(c.numSamplesPerSeriesPerBlock)
maxt := mint + int64(c.numSamplesPerSeriesPerBlock) - 1
if len(prefilledLabels) == 0 {
generatedSeries = make([]Series, c.numSeries)
naivewong marked this conversation as resolved.
Show resolved Hide resolved
for i := 0; i < c.numSeries; i++ {
lbls := make(map[string]string, 10)
// The first label pair is {"test", "i"} which is for benchmarking set matcher.
lbls["test"] = strconv.Itoa(i)
naivewong marked this conversation as resolved.
Show resolved Hide resolved
for len(lbls) < 10 {
lbls[randString()] = randString()
}
samples := make([]tsdbutil.Sample, 0, maxt-mint+1)
for t := mint; t <= maxt; t++ {
samples = append(samples, sample{t: t, v: rand.Float64()})
}
generatedSeries[i] = newSeries(lbls, samples)
}
for _, s := range generatedSeries {
prefilledLabels = append(prefilledLabels, s.Labels().Map())
}
} else {
generatedSeries = populateSeries(prefilledLabels, mint, maxt)
}
block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil)
testutil.Ok(b, err)
blocks = append(blocks, block)
defer block.Close()
}

que := &querier{
blocks: make([]Querier, 0, len(blocks)),
}
for _, blk := range blocks {
q, err := NewBlockQuerier(blk, math.MinInt64, math.MaxInt64)
testutil.Ok(b, err)
que.blocks = append(que.blocks, q)
}
defer que.Close()

benchMsg1 := fmt.Sprintf("SetMatch,nSeries=%d,pattern=\"%s\"", c.numSeries, c.setPattern)
benchMsg2 := fmt.Sprintf("RegexMatch,nSeries=%d,pattern=\"%s\"", c.numSeries, c.regexPattern)
b.Run(benchMsg1, func(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
for n := 0; n < b.N; n++ {
_, err := que.Select(labels.NewMustRegexpMatcher("test", c.setPattern))
testutil.Ok(b, err)

}
})
b.Run(benchMsg2, func(b *testing.B) {
b.ResetTimer()
b.ReportAllocs()
for n := 0; n < b.N; n++ {
_, err := que.Select(labels.NewMustRegexpMatcher("test", c.regexPattern))
testutil.Ok(b, err)
}
})
}
}

// Refer to https://github.com/prometheus/prometheus/issues/2651.
func TestFindSetMatches(t *testing.T) {
cases := []struct {
Expand Down