Skip to content
This repository has been archived by the owner on Aug 13, 2019. It is now read-only.

Optimize queries using regex matchers for set lookups #602

Merged
merged 19 commits into from
May 27, 2019
39 changes: 9 additions & 30 deletions block_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"math/rand"
"os"
"path/filepath"
"strconv"
"testing"

"github.com/go-kit/kit/log"
Expand Down Expand Up @@ -184,6 +185,11 @@ func createBlock(tb testing.TB, dir string, series []Series) string {
return filepath.Join(dir, ulid.String())
}

const (
defaultLabelName = "labelName"
defaultLabelValue = "labelValue"
)

// genSeries generates series with a given number of labels and values.
func genSeries(totalSeries, labelCount int, mint, maxt int64) []Series {
if totalSeries == 0 || labelCount == 0 {
Expand All @@ -193,8 +199,9 @@ func genSeries(totalSeries, labelCount int, mint, maxt int64) []Series {
series := make([]Series, totalSeries)
for i := 0; i < totalSeries; i++ {
lbls := make(map[string]string, labelCount)
for len(lbls) < labelCount {
lbls[randString()] = randString()
lbls[defaultLabelName] = strconv.Itoa(i)
for j := 1; len(lbls) < labelCount; j++ {
codesome marked this conversation as resolved.
Show resolved Hide resolved
lbls[defaultLabelName+strconv.Itoa(j)] = defaultLabelValue + strconv.Itoa(j)
}
samples := make([]tsdbutil.Sample, 0, maxt-mint+1)
for t := mint; t <= maxt; t++ {
Expand Down Expand Up @@ -224,31 +231,3 @@ func populateSeries(lbls []map[string]string, mint, maxt int64) []Series {
}
return series
}

const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
const (
letterIdxBits = 6 // 6 bits to represent a letter index
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
letterIdxMax = 63 / letterIdxBits // # of letter indices fitting in 63 bits
)

// randString generates random string.
func randString() string {
maxLength := int32(50)
length := rand.Int31n(maxLength)
b := make([]byte, length+1)
// A rand.Int63() generates 63 random bits, enough for letterIdxMax characters!
for i, cache, remain := length, rand.Int63(), letterIdxMax; i >= 0; {
if remain == 0 {
cache, remain = rand.Int63(), letterIdxMax
}
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
b[i] = letterBytes[idx]
i--
}
cache >>= letterIdxBits
remain--
}

return string(b)
}
13 changes: 7 additions & 6 deletions labels/selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,15 @@ func NewEqualMatcher(name, value string) Matcher {
return &EqualMatcher{name: name, value: value}
}

type regexpMatcher struct {
type RegexpMatcher struct {
name string
re *regexp.Regexp
}

func (m regexpMatcher) Name() string { return m.name }
func (m regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
func (m regexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) }
func (m RegexpMatcher) Name() string { return m.name }
func (m RegexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
func (m RegexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) }
func (m RegexpMatcher) Value() string { return m.re.String() }

// NewRegexpMatcher returns a new matcher verifying that a value matches
// the regular expression pattern.
Expand All @@ -79,7 +80,7 @@ func NewRegexpMatcher(name, pattern string) (Matcher, error) {
if err != nil {
return nil, err
}
return &regexpMatcher{name: name, re: re}, nil
return &RegexpMatcher{name: name, re: re}, nil
}

// NewMustRegexpMatcher returns a new matcher verifying that a value matches
Expand All @@ -90,7 +91,7 @@ func NewMustRegexpMatcher(name, pattern string) Matcher {
if err != nil {
panic(err)
}
return &regexpMatcher{name: name, re: re}
return &RegexpMatcher{name: name, re: re}

}

Expand Down
77 changes: 77 additions & 0 deletions querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"sort"
"strings"
"unicode/utf8"

"github.com/pkg/errors"
"github.com/prometheus/tsdb/chunkenc"
Expand Down Expand Up @@ -266,6 +267,62 @@ func (q *blockQuerier) Close() error {
return merr.Err()
}

// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped.
var regexMetaCharacterBytes [16]byte

// isRegexMetaCharacter reports whether byte b needs to be escaped.
func isRegexMetaCharacter(b byte) bool {
return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0
gouthamve marked this conversation as resolved.
Show resolved Hide resolved
}

func init() {
for _, b := range []byte(`.+*?()|[]{}^$`) {
regexMetaCharacterBytes[b%16] |= 1 << (b / 16)
}
}

func findSetMatches(pattern string) []string {
// Return empty matches if the wrapper from Prometheus is missing.
if len(pattern) < 6 || pattern[:4] != "^(?:" || pattern[len(pattern)-2:] != ")$" {
return nil
}
escaped := false
sets := []*strings.Builder{&strings.Builder{}}
for i := 4; i < len(pattern)-2; i++ {
if escaped {
switch {
case isRegexMetaCharacter(pattern[i]):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this check necessary?
Why not just allow every escaped character? (Or perhaps disallow \0... since that’s complicated).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@brian-brazil Should I include the cases above?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, though check what Go does for escaping characters that don't need to be escaped.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sorry, I was thinking about the wrong language.
The set of escapes we care about here is documented at https://github.com/google/re2/wiki/Syntax and is painfully complicated.
There are lots of escapes we don't want to accept, like \g, \p, \x, so you do need something like what you have.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have rechecked, the special characters you meant like \n, \a are actually already included in the else {} part of findSetMatches. What I detect here are the special characters like \\., \\+ in regexp, which means after I find \\, I determine if the next char is special.

sets[len(sets)-1].WriteByte(pattern[i])
case pattern[i] == '\\':
sets[len(sets)-1].WriteByte('\\')
default:
return nil
}
escaped = false
} else {
switch {
case isRegexMetaCharacter(pattern[i]):
if pattern[i] == '|' {
sets = append(sets, &strings.Builder{})
} else {
return nil
}
case pattern[i] == '\\':
escaped = true
default:
sets[len(sets)-1].WriteByte(pattern[i])
}
naivewong marked this conversation as resolved.
Show resolved Hide resolved
}
}
matches := make([]string, 0, len(sets))
for _, s := range sets {
if s.Len() > 0 {
matches = append(matches, s.String())
}
}
return matches
}

// PostingsForMatchers assembles a single postings iterator against the index reader
// based on the given matchers.
func PostingsForMatchers(ix IndexReader, ms ...labels.Matcher) (index.Postings, error) {
Expand Down Expand Up @@ -346,6 +403,14 @@ func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error
return ix.Postings(em.Name(), em.Value())
}

// Fast-path for set matching.
if em, ok := m.(*labels.RegexpMatcher); ok {
setMatches := findSetMatches(em.Value())
if len(setMatches) > 0 {
return postingsForSetMatcher(ix, em.Name(), setMatches)
}
}

tpls, err := ix.LabelValues(m.Name())
if err != nil {
return nil, err
Expand Down Expand Up @@ -411,6 +476,18 @@ func inversePostingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings
return index.Merge(rit...), nil
}

func postingsForSetMatcher(ix IndexReader, name string, matches []string) (index.Postings, error) {
var its []index.Postings
for _, match := range matches {
if it, err := ix.Postings(name, match); err == nil {
naivewong marked this conversation as resolved.
Show resolved Hide resolved
its = append(its, it)
} else {
return nil, err
}
}
return index.Merge(its...), nil
}

func mergeStrings(a, b []string) []string {
maxl := len(a)
if len(b) > len(a) {
Expand Down
Loading