From d249d1d074c0db6e96b835abf0e54c8124721e1e Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Mon, 13 May 2019 21:53:28 +0800 Subject: [PATCH 01/19] Original version of the set optimization Signed-off-by: naivewong <867245430@qq.com> --- labels/selector.go | 13 +- querier.go | 362 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 369 insertions(+), 6 deletions(-) diff --git a/labels/selector.go b/labels/selector.go index a0565f57..c94ebb33 100644 --- a/labels/selector.go +++ b/labels/selector.go @@ -63,14 +63,15 @@ func NewEqualMatcher(name, value string) Matcher { return &EqualMatcher{name: name, value: value} } -type regexpMatcher struct { +type RegexpMatcher struct { name string re *regexp.Regexp } -func (m regexpMatcher) Name() string { return m.name } -func (m regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) } -func (m regexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) } +func (m RegexpMatcher) Name() string { return m.name } +func (m RegexpMatcher) Matches(v string) bool { return m.re.MatchString(v) } +func (m RegexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) } +func (m RegexpMatcher) Value() string { return m.re.String() } // NewRegexpMatcher returns a new matcher verifying that a value matches // the regular expression pattern. @@ -79,7 +80,7 @@ func NewRegexpMatcher(name, pattern string) (Matcher, error) { if err != nil { return nil, err } - return ®expMatcher{name: name, re: re}, nil + return &RegexpMatcher{name: name, re: re}, nil } // NewMustRegexpMatcher returns a new matcher verifying that a value matches @@ -90,7 +91,7 @@ func NewMustRegexpMatcher(name, pattern string) Matcher { if err != nil { panic(err) } - return ®expMatcher{name: name, re: re} + return &RegexpMatcher{name: name, re: re} } diff --git a/querier.go b/querier.go index 9d99de08..99dc52c4 100644 --- a/querier.go +++ b/querier.go @@ -266,6 +266,350 @@ func (q *blockQuerier) Close() error { return merr.Err() } +func addStrToLevel(level []*strings.Builder, strs []string, idx int) []*strings.Builder { + levelSize := len(level) - idx + for j := idx + levelSize; j < idx + levelSize * len(strs); j ++ { + level = append(level, &strings.Builder{}) + level[j].WriteString(level[j % levelSize + idx].String()) + } + for i, s := range strs { + for j := idx + i * levelSize; j < idx + (i + 1) * levelSize; j ++ { + level[j].WriteString(s) + } + } + return level +} + +func combineLevels(left []*strings.Builder, right []*strings.Builder, idx int) []*strings.Builder { + levelSize := len(left) - idx + for j := idx + levelSize; j < idx + levelSize * len(right); j ++ { + left = append(left, &strings.Builder{}) + left[j].WriteString(left[idx + j % levelSize].String()) + } + for i, s := range right { + for j := idx + i * levelSize; j < idx + (i + 1) * levelSize; j ++ { + left[j].WriteString(s.String()) + } + } + return left +} + +func handleEnd(pattern string, idx int) int { + escaped := false + for idx < len(pattern) { + switch c := pattern[idx]; { + case c == '$': + break + case c == '\\': + escaped = true + break + case c == 'B' && escaped: + escaped = false + break + default: + + } + idx += 1 + } + if idx == len(pattern) { + return idx + } else { + return -1 + } +} + +// Return empty array if not found. +func findSetMatches(pattern string) []string { + // To detect the character '\'. + escaped := false + insideBrackets := false + insideBraces := false + combinePending := false + matches := []string{} + // This is to handle the nested parentheses. + levels := [][]*strings.Builder{[]*strings.Builder{&strings.Builder{}}} + barIdx := map[int]int{} + bracket := []string{} + i := 0 + // Handle the beginning part of the pattern. +Loop: + for i < len(pattern) { + switch c := pattern[i]; { + case c == '^': + break + case c == '\\': + escaped = true + break + case c == 'b' && escaped: + escaped = false + break + default: + break Loop + } + i += 1 + } + // Handle the middle part of the pattern. + for i < len(pattern) { + if escaped { + switch c := pattern[i]; { + case c == 'B': + escaped = false + i = handleEnd(pattern, i + 1) + if i == -1 { + return []string{} + } + break + case c == '(' || c == ')' || c == '[' || c == '{' || c == '$' || c == '*' || c == '+' || c == '.' || c == '?' || c == '\\' || c == '^' || c == '|': + escaped = false + if idx, ok := barIdx[len(levels) - 1]; ok { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{string(c)}, idx) + } else { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{string(c)}, 0) + } + default: + return []string{} + } + } else if insideBrackets { + switch c := pattern[i]; { + case c == '^': + return []string{} + case c == ']': + if !(i + 1 < len(pattern) && pattern[i + 1] == '{') { + if idx, ok := barIdx[len(levels) - 1]; ok { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, idx) + } else { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, 0) + } + bracket = bracket[:0] + } + insideBrackets = false + break + case c == '-': + for ch := pattern[i - 1] + 1; ch <= pattern[i + 1]; ch ++ { + bracket = append(bracket, string(ch)) + } + i += 1 + break; + default: + bracket = append(bracket, string(pattern[i])) + break + } + } else if insideBraces { + start := 0 + end := 0 + left := false + comma := false + right := false + j := i + for pattern[j] != '}' { + if '0' <= pattern[j] && pattern[j] <= '9' { + if !comma { + left = true + start = start * 10 + int(pattern[j] - '0') + } else { + right = true + end = end * 10 + int(pattern[j] - '0') + } + } else if pattern[j] == ',' { + comma = true + } else { + return []string{} + } + j += 1 + } + if !left { + if len(bracket) > 0 { + if idx, ok := barIdx[len(levels) - 1]; ok { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, idx) + } else { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, 0) + } + bracket = bracket[:0] + } + if idx, ok := barIdx[len(levels) - 1]; ok { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{pattern[i - 1: j + 1]}, idx) + } else { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{pattern[i - 1: j + 1]}, 0) + } + } else if comma { + if !right { + return []string{} + } else { + // Need further discussion. + l := 1 + if len(bracket) > l { + l = len(bracket) + } + if combinePending && len(levels[len(levels) - 1]) > l { + l = len(levels[len(levels) - 1]) + } + addStrs := make([]string, 0, (end - start + 1) * l) + if combinePending { + idx, ok := barIdx[len(levels) - 1] + if !ok { + idx = 0 + } + for k := start; k <= end; k ++ { + for _, s := range levels[len(levels) - 1] { + addStrs = append(addStrs, strings.Repeat(s.String(), k)) + } + } + levels[len(levels) - 1] = levels[len(levels) - 1][: idx + 1] + levels[len(levels) - 1][idx].Reset() + // Delete the bar index on the top level. + if _, ok = barIdx[len(levels) - 1]; ok { + delete(barIdx, len(levels) - 1) + } + levels = levels[: len(levels) - 1] + combinePending = false + } else if len(bracket) > 0 { + for k := start; k <= end; k ++ { + for _, s := range bracket { + addStrs = append(addStrs, strings.Repeat(s, k)) + } + } + bracket = bracket[:0] + } else { + for k := start; k <= end; k ++ { + addStrs = append(addStrs, strings.Repeat(string(pattern[i - 2]), k)) + } + } + if idx, ok := barIdx[len(levels) - 1]; ok { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], addStrs, idx) + } else { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], addStrs, 0) + } + } + } else { + l := 1 + if len(bracket) > l { + l = len(bracket) + } + if combinePending && len(levels[len(levels) - 1]) > l { + l = len(levels[len(levels) - 1]) + } + addStrs := make([]string, 0, l) + if combinePending { + idx, ok := barIdx[len(levels) - 1] + if !ok { + idx = 0 + } + for _, s := range levels[len(levels) - 1] { + addStrs = append(addStrs, strings.Repeat(s.String(), start)) + } + levels[len(levels) - 1] = levels[len(levels) - 1][: idx + 1] + levels[len(levels) - 1][idx].Reset() + // Delete the bar index on the top level. + if _, ok = barIdx[len(levels) - 1]; ok { + delete(barIdx, len(levels) - 1) + } + levels = levels[: len(levels) - 1] + combinePending = false + } else if len(bracket) > 0 { + for _, s := range bracket { + addStrs = append(addStrs, strings.Repeat(s, start)) + } + bracket = bracket[:0] + } else { + addStrs = append(addStrs, strings.Repeat(string(pattern[i - 2]), start)) + } + // fmt.Println("len addStrs", len(addStrs)) + if idx, ok := barIdx[len(levels) - 1]; ok { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], addStrs, idx) + } else { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], addStrs, 0) + } + } + if combinePending { + if idx, ok := barIdx[len(levels) - 2]; ok { + levels[len(levels) - 2] = combineLevels(levels[len(levels) - 2], levels[len(levels) - 1], idx) + } else { + levels[len(levels) - 2] = combineLevels(levels[len(levels) - 2], levels[len(levels) - 1], 0) + } + // Delete the bar index on the top level. + if _, ok := barIdx[len(levels) - 1]; ok { + delete(barIdx, len(levels) - 1) + } + levels = levels[: len(levels) - 1] + combinePending = false + } + i = j + insideBraces = false + } else { + switch c := pattern[i]; { + case c == '*' || c == '+' || c == '.' || c == '?': + return []string{} + case c == '$': + i = handleEnd(pattern, i + 1) + if i == -1 { + return []string{} + } + break + case c == '\\': + escaped = true + break + case c == '[': + insideBrackets = true + break + case c == '{': + insideBraces = true + break + case c == '(': + if i + 2 < len(pattern) && pattern[i + 1] == '?' && pattern[i + 2] == ':' { + i += 2 + } + levels = append(levels, []*strings.Builder{&strings.Builder{}}) + break + case c == ')': + // fmt.Println(len(levels)) + if i + 1 < len(pattern) && pattern[i + 1] == '{' { + combinePending = true + } else { + if idx, ok := barIdx[len(levels) - 2]; ok { + levels[len(levels) - 2] = combineLevels(levels[len(levels) - 2], levels[len(levels) - 1], idx) + } else { + levels[len(levels) - 2] = combineLevels(levels[len(levels) - 2], levels[len(levels) - 1], 0) + } + // Delete the bar index on the top level. + if _, ok := barIdx[len(levels) - 1]; ok { + delete(barIdx, len(levels) - 1) + } + levels = levels[: len(levels) - 1] + } + break + case c == '|': + levels[len(levels) - 1] = append(levels[len(levels) - 1], &strings.Builder{}) + barIdx[len(levels) - 1] = len(levels[len(levels) - 1]) - 1 + break + default: + if len(bracket) > 0 { + if idx, ok := barIdx[len(levels) - 1]; ok { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, idx) + } else { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, 0) + } + bracket = bracket[:0] + } + if !(i + 1 < len(pattern) && pattern[i + 1] == '{') { + if idx, ok := barIdx[len(levels) - 1]; ok { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{string(c)}, idx) + } else { + levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{string(c)}, 0) + } + } + break + } + } + i += 1 + } + for _, s := range levels[0] { + if s.Len() > 0{ + matches = append(matches, s.String()) + } + } + return matches +} + // PostingsForMatchers assembles a single postings iterator against the index reader // based on the given matchers. func PostingsForMatchers(ix IndexReader, ms ...labels.Matcher) (index.Postings, error) { @@ -346,6 +690,14 @@ func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error return ix.Postings(em.Name(), em.Value()) } + // Fast-path for set matching. + if em, ok := m.(*labels.RegexpMatcher); ok { + setMatches := findSetMatches(em.Value()) + if len(setMatches) > 0 { + return postingsForSetMatcher(ix, em.Name(), setMatches) + } + } + tpls, err := ix.LabelValues(m.Name()) if err != nil { return nil, err @@ -411,6 +763,16 @@ func inversePostingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings return index.Merge(rit...), nil } +func postingsForSetMatcher(ix IndexReader, name string, matches []string) (index.Postings, error) { + var its []index.Postings + for _, match := range matches { + if it, err := ix.Postings(name, match); err == nil { + its = append(its, it) + } + } + return index.Merge(its...), nil +} + func mergeStrings(a, b []string) []string { maxl := len(a) if len(b) > len(a) { From 32080decc7755b2680e5f6841911429a7780ec75 Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Tue, 14 May 2019 12:16:37 +0800 Subject: [PATCH 02/19] simple set matcher Signed-off-by: naivewong <867245430@qq.com> --- querier.go | 348 ++++-------------------------------------------- querier_test.go | 87 ++++++++++++ 2 files changed, 115 insertions(+), 320 deletions(-) diff --git a/querier.go b/querier.go index 99dc52c4..24db4bed 100644 --- a/querier.go +++ b/querier.go @@ -266,344 +266,52 @@ func (q *blockQuerier) Close() error { return merr.Err() } -func addStrToLevel(level []*strings.Builder, strs []string, idx int) []*strings.Builder { - levelSize := len(level) - idx - for j := idx + levelSize; j < idx + levelSize * len(strs); j ++ { - level = append(level, &strings.Builder{}) - level[j].WriteString(level[j % levelSize + idx].String()) - } - for i, s := range strs { - for j := idx + i * levelSize; j < idx + (i + 1) * levelSize; j ++ { - level[j].WriteString(s) - } - } - return level -} +// Bitmap used by func special to check whether a character needs to be escaped. +var specialBytes [16]byte -func combineLevels(left []*strings.Builder, right []*strings.Builder, idx int) []*strings.Builder { - levelSize := len(left) - idx - for j := idx + levelSize; j < idx + levelSize * len(right); j ++ { - left = append(left, &strings.Builder{}) - left[j].WriteString(left[idx + j % levelSize].String()) - } - for i, s := range right { - for j := idx + i * levelSize; j < idx + (i + 1) * levelSize; j ++ { - left[j].WriteString(s.String()) - } - } - return left +// special reports whether byte b needs to be escaped. +func special(b byte) bool { + return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0 } -func handleEnd(pattern string, idx int) int { - escaped := false - for idx < len(pattern) { - switch c := pattern[idx]; { - case c == '$': - break - case c == '\\': - escaped = true - break - case c == 'B' && escaped: - escaped = false - break - default: - - } - idx += 1 - } - if idx == len(pattern) { - return idx - } else { - return -1 +func init() { + for _, b := range []byte(`.+*?()|[]{}^$`) { + specialBytes[b%16] |= 1 << (b / 16) } } -// Return empty array if not found. func findSetMatches(pattern string) []string { - // To detect the character '\'. escaped := false - insideBrackets := false - insideBraces := false - combinePending := false - matches := []string{} - // This is to handle the nested parentheses. - levels := [][]*strings.Builder{[]*strings.Builder{&strings.Builder{}}} - barIdx := map[int]int{} - bracket := []string{} - i := 0 - // Handle the beginning part of the pattern. -Loop: - for i < len(pattern) { - switch c := pattern[i]; { - case c == '^': - break - case c == '\\': - escaped = true - break - case c == 'b' && escaped: - escaped = false - break - default: - break Loop - } - i += 1 - } - // Handle the middle part of the pattern. - for i < len(pattern) { + sets := []*strings.Builder{&strings.Builder{}} + for i := 0; i < len(pattern); i ++ { if escaped { - switch c := pattern[i]; { - case c == 'B': - escaped = false - i = handleEnd(pattern, i + 1) - if i == -1 { - return []string{} - } - break - case c == '(' || c == ')' || c == '[' || c == '{' || c == '$' || c == '*' || c == '+' || c == '.' || c == '?' || c == '\\' || c == '^' || c == '|': - escaped = false - if idx, ok := barIdx[len(levels) - 1]; ok { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{string(c)}, idx) - } else { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{string(c)}, 0) - } - default: - return []string{} - } - } else if insideBrackets { - switch c := pattern[i]; { - case c == '^': - return []string{} - case c == ']': - if !(i + 1 < len(pattern) && pattern[i + 1] == '{') { - if idx, ok := barIdx[len(levels) - 1]; ok { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, idx) - } else { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, 0) - } - bracket = bracket[:0] - } - insideBrackets = false - break - case c == '-': - for ch := pattern[i - 1] + 1; ch <= pattern[i + 1]; ch ++ { - bracket = append(bracket, string(ch)) - } - i += 1 - break; - default: - bracket = append(bracket, string(pattern[i])) - break - } - } else if insideBraces { - start := 0 - end := 0 - left := false - comma := false - right := false - j := i - for pattern[j] != '}' { - if '0' <= pattern[j] && pattern[j] <= '9' { - if !comma { - left = true - start = start * 10 + int(pattern[j] - '0') - } else { - right = true - end = end * 10 + int(pattern[j] - '0') - } - } else if pattern[j] == ',' { - comma = true - } else { - return []string{} - } - j += 1 - } - if !left { - if len(bracket) > 0 { - if idx, ok := barIdx[len(levels) - 1]; ok { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, idx) - } else { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, 0) - } - bracket = bracket[:0] - } - if idx, ok := barIdx[len(levels) - 1]; ok { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{pattern[i - 1: j + 1]}, idx) - } else { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{pattern[i - 1: j + 1]}, 0) - } - } else if comma { - if !right { - return []string{} - } else { - // Need further discussion. - l := 1 - if len(bracket) > l { - l = len(bracket) - } - if combinePending && len(levels[len(levels) - 1]) > l { - l = len(levels[len(levels) - 1]) - } - addStrs := make([]string, 0, (end - start + 1) * l) - if combinePending { - idx, ok := barIdx[len(levels) - 1] - if !ok { - idx = 0 - } - for k := start; k <= end; k ++ { - for _, s := range levels[len(levels) - 1] { - addStrs = append(addStrs, strings.Repeat(s.String(), k)) - } - } - levels[len(levels) - 1] = levels[len(levels) - 1][: idx + 1] - levels[len(levels) - 1][idx].Reset() - // Delete the bar index on the top level. - if _, ok = barIdx[len(levels) - 1]; ok { - delete(barIdx, len(levels) - 1) - } - levels = levels[: len(levels) - 1] - combinePending = false - } else if len(bracket) > 0 { - for k := start; k <= end; k ++ { - for _, s := range bracket { - addStrs = append(addStrs, strings.Repeat(s, k)) - } - } - bracket = bracket[:0] - } else { - for k := start; k <= end; k ++ { - addStrs = append(addStrs, strings.Repeat(string(pattern[i - 2]), k)) - } - } - if idx, ok := barIdx[len(levels) - 1]; ok { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], addStrs, idx) - } else { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], addStrs, 0) - } - } + // Add the escaped special character to the sets. + if special(pattern[i]) { + sets[len(sets)-1].WriteByte(pattern[i]) + } else if pattern[i] == '\\' { + sets[len(sets)-1].WriteByte('\\') } else { - l := 1 - if len(bracket) > l { - l = len(bracket) - } - if combinePending && len(levels[len(levels) - 1]) > l { - l = len(levels[len(levels) - 1]) - } - addStrs := make([]string, 0, l) - if combinePending { - idx, ok := barIdx[len(levels) - 1] - if !ok { - idx = 0 - } - for _, s := range levels[len(levels) - 1] { - addStrs = append(addStrs, strings.Repeat(s.String(), start)) - } - levels[len(levels) - 1] = levels[len(levels) - 1][: idx + 1] - levels[len(levels) - 1][idx].Reset() - // Delete the bar index on the top level. - if _, ok = barIdx[len(levels) - 1]; ok { - delete(barIdx, len(levels) - 1) - } - levels = levels[: len(levels) - 1] - combinePending = false - } else if len(bracket) > 0 { - for _, s := range bracket { - addStrs = append(addStrs, strings.Repeat(s, start)) - } - bracket = bracket[:0] - } else { - addStrs = append(addStrs, strings.Repeat(string(pattern[i - 2]), start)) - } - // fmt.Println("len addStrs", len(addStrs)) - if idx, ok := barIdx[len(levels) - 1]; ok { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], addStrs, idx) - } else { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], addStrs, 0) - } - } - if combinePending { - if idx, ok := barIdx[len(levels) - 2]; ok { - levels[len(levels) - 2] = combineLevels(levels[len(levels) - 2], levels[len(levels) - 1], idx) - } else { - levels[len(levels) - 2] = combineLevels(levels[len(levels) - 2], levels[len(levels) - 1], 0) - } - // Delete the bar index on the top level. - if _, ok := barIdx[len(levels) - 1]; ok { - delete(barIdx, len(levels) - 1) - } - levels = levels[: len(levels) - 1] - combinePending = false + return []string{} } - i = j - insideBraces = false + escaped = false } else { - switch c := pattern[i]; { - case c == '*' || c == '+' || c == '.' || c == '?': - return []string{} - case c == '$': - i = handleEnd(pattern, i + 1) - if i == -1 { + // Return empty sets when there are special characters excluding '|'. + if special(pattern[i]) { + if pattern[i] == '|' { + sets = append(sets, &strings.Builder{}) + } else { return []string{} } - break - case c == '\\': + } else if pattern[i] == '\\' { escaped = true - break - case c == '[': - insideBrackets = true - break - case c == '{': - insideBraces = true - break - case c == '(': - if i + 2 < len(pattern) && pattern[i + 1] == '?' && pattern[i + 2] == ':' { - i += 2 - } - levels = append(levels, []*strings.Builder{&strings.Builder{}}) - break - case c == ')': - // fmt.Println(len(levels)) - if i + 1 < len(pattern) && pattern[i + 1] == '{' { - combinePending = true - } else { - if idx, ok := barIdx[len(levels) - 2]; ok { - levels[len(levels) - 2] = combineLevels(levels[len(levels) - 2], levels[len(levels) - 1], idx) - } else { - levels[len(levels) - 2] = combineLevels(levels[len(levels) - 2], levels[len(levels) - 1], 0) - } - // Delete the bar index on the top level. - if _, ok := barIdx[len(levels) - 1]; ok { - delete(barIdx, len(levels) - 1) - } - levels = levels[: len(levels) - 1] - } - break - case c == '|': - levels[len(levels) - 1] = append(levels[len(levels) - 1], &strings.Builder{}) - barIdx[len(levels) - 1] = len(levels[len(levels) - 1]) - 1 - break - default: - if len(bracket) > 0 { - if idx, ok := barIdx[len(levels) - 1]; ok { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, idx) - } else { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], bracket, 0) - } - bracket = bracket[:0] - } - if !(i + 1 < len(pattern) && pattern[i + 1] == '{') { - if idx, ok := barIdx[len(levels) - 1]; ok { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{string(c)}, idx) - } else { - levels[len(levels) - 1] = addStrToLevel(levels[len(levels) - 1], []string{string(c)}, 0) - } - } - break + } else { + sets[len(sets)-1].WriteByte(pattern[i]) } } - i += 1 } - for _, s := range levels[0] { - if s.Len() > 0{ + matches := make([]string, 0, len(sets)) + for _, s := range sets { + if s.Len() > 0 { matches = append(matches, s.String()) } } diff --git a/querier_test.go b/querier_test.go index cb53462a..de3ca405 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1691,6 +1691,57 @@ func BenchmarkQuerySeek(b *testing.B) { } } +// Refer to https://github.com/prometheus/prometheus/issues/2651. +func TestFindSetMatches(t *testing.T) { + cases := []struct { + pattern string + exp []string + }{ + // Simple sets. + { + pattern: "foo|bar|baz", + exp: []string{ + "foo", + "bar", + "baz", + }, + }, + // Simple sets containing escaped characters. + { + pattern: "fo\\.o|bar\\?|\\^baz", + exp: []string{ + "fo.o", + "bar?", + "^baz", + }, + }, + // Simple sets containing special characters without escaping. + { + pattern: "fo.o|bar?|^baz", + exp: []string{}, + }, + } + + for _, c := range cases { + matches := findSetMatches(c.pattern) + if len(c.exp) == 0 { + if len(matches) != 0 { + t.Errorf("Evaluating %s, unexpected result %v", c.pattern, matches) + } + } else { + if len(matches) != len(c.exp) { + t.Errorf("Evaluating %s, length of result not equal to exp", c.pattern) + } else { + for i := 0; i < len(c.exp); i ++ { + if c.exp[i] != matches[i] { + t.Errorf("Evaluating %s, unexpected result %s", c.pattern, matches[i]) + } + } + } + } + } +} + func TestPostingsForMatchers(t *testing.T) { h, err := NewHead(nil, nil, nil, 1000) testutil.Ok(t, err) @@ -1703,6 +1754,7 @@ func TestPostingsForMatchers(t *testing.T) { app.Add(labels.FromStrings("n", "1", "i", "a"), 0, 0) app.Add(labels.FromStrings("n", "1", "i", "b"), 0, 0) app.Add(labels.FromStrings("n", "2"), 0, 0) + app.Add(labels.FromStrings("n", "2.5"), 0, 0) testutil.Ok(t, app.Commit()) cases := []struct { @@ -1735,6 +1787,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), }, }, // Not equals. @@ -1742,6 +1795,7 @@ func TestPostingsForMatchers(t *testing.T) { matchers: []labels.Matcher{labels.Not(labels.NewEqualMatcher("n", "1"))}, exp: []labels.Labels{ labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), }, }, { @@ -1796,6 +1850,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1"), labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), }, }, { @@ -1824,6 +1879,7 @@ func TestPostingsForMatchers(t *testing.T) { matchers: []labels.Matcher{labels.Not(labels.NewMustRegexpMatcher("n", "^1$"))}, exp: []labels.Labels{ labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), }, }, { @@ -1869,6 +1925,37 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1", "i", "a"), }, }, + // Set optimization for Regex. + // Refer to https://github.com/prometheus/prometheus/issues/2651. + { + matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "1|2")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "2"), + }, + }, + { + matchers: []labels.Matcher{labels.NewMustRegexpMatcher("i", "a|b")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, + { + matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "x1|2")}, + exp: []labels.Labels{ + labels.FromStrings("n", "2"), + }, + }, + { + matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "2|2\\.5")}, + exp: []labels.Labels{ + labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), + }, + }, } ir, err := h.Index() From 3f60bf8a877ae636b6499d5d8fc9567d10daf331 Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Tue, 14 May 2019 12:23:46 +0800 Subject: [PATCH 03/19] simple set matcher Signed-off-by: naivewong <867245430@qq.com> --- querier.go | 1 + 1 file changed, 1 insertion(+) diff --git a/querier.go b/querier.go index 24db4bed..9acf651d 100644 --- a/querier.go +++ b/querier.go @@ -17,6 +17,7 @@ import ( "fmt" "sort" "strings" + "unicode/utf8" "github.com/pkg/errors" "github.com/prometheus/tsdb/chunkenc" From 02dfa44a19b197e3c8128bd3f70b37ea8f42091d Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Tue, 14 May 2019 12:37:48 +0800 Subject: [PATCH 04/19] update Signed-off-by: naivewong <867245430@qq.com> --- querier.go | 2 +- querier_test.go | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/querier.go b/querier.go index 9acf651d..97d989c4 100644 --- a/querier.go +++ b/querier.go @@ -284,7 +284,7 @@ func init() { func findSetMatches(pattern string) []string { escaped := false sets := []*strings.Builder{&strings.Builder{}} - for i := 0; i < len(pattern); i ++ { + for i := 0; i < len(pattern); i++ { if escaped { // Add the escaped special character to the sets. if special(pattern[i]) { diff --git a/querier_test.go b/querier_test.go index de3ca405..45fc2a48 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1701,8 +1701,8 @@ func TestFindSetMatches(t *testing.T) { { pattern: "foo|bar|baz", exp: []string{ - "foo", - "bar", + "foo", + "bar", "baz", }, }, @@ -1710,8 +1710,8 @@ func TestFindSetMatches(t *testing.T) { { pattern: "fo\\.o|bar\\?|\\^baz", exp: []string{ - "fo.o", - "bar?", + "fo.o", + "bar?", "^baz", }, }, @@ -1732,7 +1732,7 @@ func TestFindSetMatches(t *testing.T) { if len(matches) != len(c.exp) { t.Errorf("Evaluating %s, length of result not equal to exp", c.pattern) } else { - for i := 0; i < len(c.exp); i ++ { + for i := 0; i < len(c.exp); i++ { if c.exp[i] != matches[i] { t.Errorf("Evaluating %s, unexpected result %s", c.pattern, matches[i]) } From 842e2a465dcc18ff29782b9de294c2b4678f7968 Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Tue, 14 May 2019 17:45:56 +0800 Subject: [PATCH 05/19] update Signed-off-by: naivewong <867245430@qq.com> --- querier.go | 6 +++++- querier_test.go | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/querier.go b/querier.go index 97d989c4..f6f7f658 100644 --- a/querier.go +++ b/querier.go @@ -282,9 +282,13 @@ func init() { } func findSetMatches(pattern string) []string { + // Return empty matches if the wrapper from Prometheus is missing. + if len(pattern) < 6 || pattern[:4] != "^(?:" || pattern[len(pattern)-2:] != ")$" { + return []string{} + } escaped := false sets := []*strings.Builder{&strings.Builder{}} - for i := 0; i < len(pattern); i++ { + for i := 4; i < len(pattern)-2; i++ { if escaped { // Add the escaped special character to the sets. if special(pattern[i]) { diff --git a/querier_test.go b/querier_test.go index 45fc2a48..15138467 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1699,7 +1699,7 @@ func TestFindSetMatches(t *testing.T) { }{ // Simple sets. { - pattern: "foo|bar|baz", + pattern: "^(?:foo|bar|baz)$", exp: []string{ "foo", "bar", @@ -1708,7 +1708,7 @@ func TestFindSetMatches(t *testing.T) { }, // Simple sets containing escaped characters. { - pattern: "fo\\.o|bar\\?|\\^baz", + pattern: "^(?:fo\\.o|bar\\?|\\^baz)$", exp: []string{ "fo.o", "bar?", @@ -1717,7 +1717,12 @@ func TestFindSetMatches(t *testing.T) { }, // Simple sets containing special characters without escaping. { - pattern: "fo.o|bar?|^baz", + pattern: "^(?:fo.o|bar?|^baz)$", + exp: []string{}, + }, + // Missing wrapper. + { + pattern: "foo|bar|baz", exp: []string{}, }, } @@ -1928,7 +1933,7 @@ func TestPostingsForMatchers(t *testing.T) { // Set optimization for Regex. // Refer to https://github.com/prometheus/prometheus/issues/2651. { - matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "1|2")}, + matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "^(?:1|2)$")}, exp: []labels.Labels{ labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), @@ -1937,20 +1942,20 @@ func TestPostingsForMatchers(t *testing.T) { }, }, { - matchers: []labels.Matcher{labels.NewMustRegexpMatcher("i", "a|b")}, + matchers: []labels.Matcher{labels.NewMustRegexpMatcher("i", "^(?:a|b)$")}, exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), }, }, { - matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "x1|2")}, + matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "^(?:x1|2)$")}, exp: []labels.Labels{ labels.FromStrings("n", "2"), }, }, { - matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "2|2\\.5")}, + matchers: []labels.Matcher{labels.NewMustRegexpMatcher("n", "^(?:2|2\\.5)$")}, exp: []labels.Labels{ labels.FromStrings("n", "2"), labels.FromStrings("n", "2.5"), From 1e3cf631d7cadf8a032801484df7fc743373176a Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Wed, 15 May 2019 11:09:47 +0800 Subject: [PATCH 06/19] add benchmark Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 112 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/querier_test.go b/querier_test.go index 15138467..feae89c3 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1691,6 +1691,118 @@ func BenchmarkQuerySeek(b *testing.B) { } } +func BenchmarkSetMatcher(b *testing.B) { + cases := []struct { + numBlocks int + numSeries int + numSamplesPerSeriesPerBlock int + setPattern string + regexPattern string + }{ + { + numBlocks: 1, + numSeries: 15, + numSamplesPerSeriesPerBlock: 10, + setPattern: "^(?:1|2|3)$", + regexPattern: "1|2|3", + }, + { + numBlocks: 1, + numSeries: 15, + numSamplesPerSeriesPerBlock: 10, + setPattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + regexPattern: "1|2|3|4|5|6|7|8|9|10", + }, + { + numBlocks: 1, + numSeries: 200, + numSamplesPerSeriesPerBlock: 10, + setPattern: "^(?:1|2|3)$", + regexPattern: "1|2|3", + }, + { + numBlocks: 1, + numSeries: 200, + numSamplesPerSeriesPerBlock: 10, + setPattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + regexPattern: "1|2|3|4|5|6|7|8|9|10", + }, + } + + for _, c := range cases { + dir, err := ioutil.TempDir("", "bench_postings_for_matchers") + testutil.Ok(b, err) + defer func() { + testutil.Ok(b, os.RemoveAll(dir)) + }() + + var ( + blocks []*Block + prefilledLabels []map[string]string + generatedSeries []Series + ) + for i := int64(0); i < int64(c.numBlocks); i++ { + mint := i*int64(c.numSamplesPerSeriesPerBlock) + maxt := mint + int64(c.numSamplesPerSeriesPerBlock) - 1 + if len(prefilledLabels) == 0 { + generatedSeries = make([]Series, c.numSeries) + for i := 0; i < c.numSeries; i++ { + lbls := make(map[string]string, 10) + // The first label pair is {"test", "i"} which is for benchmarking set matcher. + lbls["test"] = strconv.Itoa(i) + for len(lbls) < 10 { + lbls[randString()] = randString() + } + samples := make([]tsdbutil.Sample, 0, maxt-mint+1) + for t := mint; t <= maxt; t++ { + samples = append(samples, sample{t: t, v: rand.Float64()}) + } + generatedSeries[i] = newSeries(lbls, samples) + } + for _, s := range generatedSeries { + prefilledLabels = append(prefilledLabels, s.Labels().Map()) + } + } else { + generatedSeries = populateSeries(prefilledLabels, mint, maxt) + } + block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil) + testutil.Ok(b, err) + blocks = append(blocks, block) + defer block.Close() + } + + que := &querier{ + blocks: make([]Querier, 0, len(blocks)), + } + for _, blk := range blocks { + q, err := NewBlockQuerier(blk, math.MinInt64, math.MaxInt64) + testutil.Ok(b, err) + que.blocks = append(que.blocks, q) + } + defer que.Close() + + benchMsg1 := fmt.Sprintf("SetMatch,nSeries=%d,pattern=\"%s\"", c.numSeries, c.setPattern) + benchMsg2 := fmt.Sprintf("RegexMatch,nSeries=%d,pattern=\"%s\"", c.numSeries, c.regexPattern) + b.Run(benchMsg1, func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + for n := 0; n < b.N; n++ { + _, err := que.Select(labels.NewMustRegexpMatcher("test", c.setPattern)) + testutil.Ok(b, err) + + } + }) + b.Run(benchMsg2, func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + for n := 0; n < b.N; n++ { + _, err := que.Select(labels.NewMustRegexpMatcher("test", c.regexPattern)) + testutil.Ok(b, err) + } + }) + } +} + // Refer to https://github.com/prometheus/prometheus/issues/2651. func TestFindSetMatches(t *testing.T) { cases := []struct { From 4a66e349951a8555d0e43bd46118377d9261560b Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Wed, 15 May 2019 11:12:56 +0800 Subject: [PATCH 07/19] update Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/querier_test.go b/querier_test.go index feae89c3..2d713435 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1709,7 +1709,7 @@ func BenchmarkSetMatcher(b *testing.B) { { numBlocks: 1, numSeries: 15, - numSamplesPerSeriesPerBlock: 10, + numSamplesPerSeriesPerBlock: 10, setPattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", regexPattern: "1|2|3|4|5|6|7|8|9|10", }, @@ -1723,7 +1723,7 @@ func BenchmarkSetMatcher(b *testing.B) { { numBlocks: 1, numSeries: 200, - numSamplesPerSeriesPerBlock: 10, + numSamplesPerSeriesPerBlock: 10, setPattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", regexPattern: "1|2|3|4|5|6|7|8|9|10", }, @@ -1742,7 +1742,7 @@ func BenchmarkSetMatcher(b *testing.B) { generatedSeries []Series ) for i := int64(0); i < int64(c.numBlocks); i++ { - mint := i*int64(c.numSamplesPerSeriesPerBlock) + mint := i * int64(c.numSamplesPerSeriesPerBlock) maxt := mint + int64(c.numSamplesPerSeriesPerBlock) - 1 if len(prefilledLabels) == 0 { generatedSeries = make([]Series, c.numSeries) From 87c3186db6fcc1dbad6a9bbac8cc670109cd7a2a Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Wed, 15 May 2019 11:20:42 +0800 Subject: [PATCH 08/19] update Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/querier_test.go b/querier_test.go index 2d713435..17e80218 100644 --- a/querier_test.go +++ b/querier_test.go @@ -21,6 +21,7 @@ import ( "os" "path/filepath" "sort" + "strconv" "testing" "github.com/pkg/errors" From 017871f2b62c5a3b65cfe61f499f6a47176ebfcb Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Wed, 15 May 2019 16:03:57 +0800 Subject: [PATCH 09/19] update benchmark Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 49 ++++++++++++++++++------------------------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/querier_test.go b/querier_test.go index 17e80218..18d23a58 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1692,41 +1692,37 @@ func BenchmarkQuerySeek(b *testing.B) { } } +// Refer to https://github.com/prometheus/prometheus/issues/2651. func BenchmarkSetMatcher(b *testing.B) { cases := []struct { numBlocks int numSeries int numSamplesPerSeriesPerBlock int - setPattern string - regexPattern string + pattern string }{ { numBlocks: 1, numSeries: 15, numSamplesPerSeriesPerBlock: 10, - setPattern: "^(?:1|2|3)$", - regexPattern: "1|2|3", + pattern: "^(?:1|2|3)$", }, { numBlocks: 1, numSeries: 15, - numSamplesPerSeriesPerBlock: 10, - setPattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", - regexPattern: "1|2|3|4|5|6|7|8|9|10", + numSamplesPerSeriesPerBlock: 10, + pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, { - numBlocks: 1, - numSeries: 200, + numBlocks: 20, + numSeries: 1000, numSamplesPerSeriesPerBlock: 10, - setPattern: "^(?:1|2|3)$", - regexPattern: "1|2|3", + pattern: "^(?:1|2|3)$", }, { - numBlocks: 1, - numSeries: 200, - numSamplesPerSeriesPerBlock: 10, - setPattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", - regexPattern: "1|2|3|4|5|6|7|8|9|10", + numBlocks: 20, + numSeries: 1000, + numSamplesPerSeriesPerBlock: 10, + pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, } @@ -1743,14 +1739,14 @@ func BenchmarkSetMatcher(b *testing.B) { generatedSeries []Series ) for i := int64(0); i < int64(c.numBlocks); i++ { - mint := i * int64(c.numSamplesPerSeriesPerBlock) + mint := i*int64(c.numSamplesPerSeriesPerBlock) maxt := mint + int64(c.numSamplesPerSeriesPerBlock) - 1 if len(prefilledLabels) == 0 { generatedSeries = make([]Series, c.numSeries) for i := 0; i < c.numSeries; i++ { lbls := make(map[string]string, 10) - // The first label pair is {"test", "i"} which is for benchmarking set matcher. - lbls["test"] = strconv.Itoa(i) + // The first label pair is {"test", "i%50"} which is for benchmarking set matcher. + lbls["test"] = strconv.Itoa(i%50) for len(lbls) < 10 { lbls[randString()] = randString() } @@ -1782,25 +1778,16 @@ func BenchmarkSetMatcher(b *testing.B) { } defer que.Close() - benchMsg1 := fmt.Sprintf("SetMatch,nSeries=%d,pattern=\"%s\"", c.numSeries, c.setPattern) - benchMsg2 := fmt.Sprintf("RegexMatch,nSeries=%d,pattern=\"%s\"", c.numSeries, c.regexPattern) - b.Run(benchMsg1, func(b *testing.B) { + benchMsg := fmt.Sprintf("nSeries=%d,pattern=\"%s\"", c.numSeries, c.pattern) + b.Run(benchMsg, func(b *testing.B) { b.ResetTimer() b.ReportAllocs() for n := 0; n < b.N; n++ { - _, err := que.Select(labels.NewMustRegexpMatcher("test", c.setPattern)) + _, err := que.Select(labels.NewMustRegexpMatcher("test", c.pattern)) testutil.Ok(b, err) } }) - b.Run(benchMsg2, func(b *testing.B) { - b.ResetTimer() - b.ReportAllocs() - for n := 0; n < b.N; n++ { - _, err := que.Select(labels.NewMustRegexpMatcher("test", c.regexPattern)) - testutil.Ok(b, err) - } - }) } } From b81e86b2e62ce08857afd8ba49382f3a4a1f3b4d Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Wed, 15 May 2019 16:07:24 +0800 Subject: [PATCH 10/19] update Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/querier_test.go b/querier_test.go index 18d23a58..3dd67b3c 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1709,7 +1709,7 @@ func BenchmarkSetMatcher(b *testing.B) { { numBlocks: 1, numSeries: 15, - numSamplesPerSeriesPerBlock: 10, + numSamplesPerSeriesPerBlock: 10, pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, { @@ -1721,7 +1721,7 @@ func BenchmarkSetMatcher(b *testing.B) { { numBlocks: 20, numSeries: 1000, - numSamplesPerSeriesPerBlock: 10, + numSamplesPerSeriesPerBlock: 10, pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, } @@ -1739,14 +1739,14 @@ func BenchmarkSetMatcher(b *testing.B) { generatedSeries []Series ) for i := int64(0); i < int64(c.numBlocks); i++ { - mint := i*int64(c.numSamplesPerSeriesPerBlock) + mint := i * int64(c.numSamplesPerSeriesPerBlock) maxt := mint + int64(c.numSamplesPerSeriesPerBlock) - 1 if len(prefilledLabels) == 0 { generatedSeries = make([]Series, c.numSeries) for i := 0; i < c.numSeries; i++ { lbls := make(map[string]string, 10) // The first label pair is {"test", "i%50"} which is for benchmarking set matcher. - lbls["test"] = strconv.Itoa(i%50) + lbls["test"] = strconv.Itoa(i % 50) for len(lbls) < 10 { lbls[randString()] = randString() } From 0f88eed28e9565e22e103d62002c1ef9afa352ff Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Wed, 15 May 2019 20:25:22 +0800 Subject: [PATCH 11/19] update benchmark Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/querier_test.go b/querier_test.go index 3dd67b3c..5fb43194 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1700,11 +1700,13 @@ func BenchmarkSetMatcher(b *testing.B) { numSamplesPerSeriesPerBlock int pattern string }{ + // The first three cases are to find out whether the set + // matcher is always faster than regex matcher. { numBlocks: 1, - numSeries: 15, + numSeries: 1, numSamplesPerSeriesPerBlock: 10, - pattern: "^(?:1|2|3)$", + pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, { numBlocks: 1, @@ -1712,6 +1714,13 @@ func BenchmarkSetMatcher(b *testing.B) { numSamplesPerSeriesPerBlock: 10, pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, + { + numBlocks: 1, + numSeries: 15, + numSamplesPerSeriesPerBlock: 10, + pattern: "^(?:1|2|3)$", + }, + // Big data sizes benchmarks. { numBlocks: 20, numSeries: 1000, @@ -1724,6 +1733,18 @@ func BenchmarkSetMatcher(b *testing.B) { numSamplesPerSeriesPerBlock: 10, pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, + { + numBlocks: 1, + numSeries: 100000, + numSamplesPerSeriesPerBlock: 10, + pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + }, + { + numBlocks: 1, + numSeries: 500000, + numSamplesPerSeriesPerBlock: 10, + pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + }, } for _, c := range cases { @@ -1745,8 +1766,8 @@ func BenchmarkSetMatcher(b *testing.B) { generatedSeries = make([]Series, c.numSeries) for i := 0; i < c.numSeries; i++ { lbls := make(map[string]string, 10) - // The first label pair is {"test", "i%50"} which is for benchmarking set matcher. - lbls["test"] = strconv.Itoa(i % 50) + // The first label pair is {"test", "i%100"} which is for benchmarking set matcher. + lbls["test"] = strconv.Itoa(i % 100) for len(lbls) < 10 { lbls[randString()] = randString() } @@ -1778,7 +1799,7 @@ func BenchmarkSetMatcher(b *testing.B) { } defer que.Close() - benchMsg := fmt.Sprintf("nSeries=%d,pattern=\"%s\"", c.numSeries, c.pattern) + benchMsg := fmt.Sprintf("nSeries=%d,nBlocks=%d,pattern=\"%s\"", c.numSeries, c.numBlocks, c.pattern) b.Run(benchMsg, func(b *testing.B) { b.ResetTimer() b.ReportAllocs() From 81cb0282e5c9ebad52153a8b98f02014afb68f63 Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Wed, 15 May 2019 21:03:09 +0800 Subject: [PATCH 12/19] update benchmark Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/querier_test.go b/querier_test.go index 5fb43194..4183cd66 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1745,6 +1745,12 @@ func BenchmarkSetMatcher(b *testing.B) { numSamplesPerSeriesPerBlock: 10, pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, + { + numBlocks: 10, + numSeries: 500000, + numSamplesPerSeriesPerBlock: 10, + pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + }, } for _, c := range cases { From 954df56feb971bc1a9ec1ae36b38d54153de9879 Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Thu, 16 May 2019 11:40:03 +0800 Subject: [PATCH 13/19] update benchmark Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/querier_test.go b/querier_test.go index 4183cd66..5d070125 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1698,6 +1698,7 @@ func BenchmarkSetMatcher(b *testing.B) { numBlocks int numSeries int numSamplesPerSeriesPerBlock int + cardinality int pattern string }{ // The first three cases are to find out whether the set @@ -1706,18 +1707,21 @@ func BenchmarkSetMatcher(b *testing.B) { numBlocks: 1, numSeries: 1, numSamplesPerSeriesPerBlock: 10, + cardinality: 100, pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, { numBlocks: 1, numSeries: 15, numSamplesPerSeriesPerBlock: 10, + cardinality: 100, pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, { numBlocks: 1, numSeries: 15, numSamplesPerSeriesPerBlock: 10, + cardinality: 100, pattern: "^(?:1|2|3)$", }, // Big data sizes benchmarks. @@ -1725,30 +1729,43 @@ func BenchmarkSetMatcher(b *testing.B) { numBlocks: 20, numSeries: 1000, numSamplesPerSeriesPerBlock: 10, + cardinality: 100, pattern: "^(?:1|2|3)$", }, { numBlocks: 20, numSeries: 1000, numSamplesPerSeriesPerBlock: 10, + cardinality: 100, pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, + // Increase cardinality. { numBlocks: 1, numSeries: 100000, numSamplesPerSeriesPerBlock: 10, + cardinality: 100000, pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, { numBlocks: 1, numSeries: 500000, numSamplesPerSeriesPerBlock: 10, + cardinality: 500000, pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, { numBlocks: 10, numSeries: 500000, numSamplesPerSeriesPerBlock: 10, + cardinality: 500000, + pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", + }, + { + numBlocks: 1, + numSeries: 1000000, + numSamplesPerSeriesPerBlock: 10, + cardinality: 1000000, pattern: "^(?:1|2|3|4|5|6|7|8|9|10)$", }, } @@ -1772,8 +1789,8 @@ func BenchmarkSetMatcher(b *testing.B) { generatedSeries = make([]Series, c.numSeries) for i := 0; i < c.numSeries; i++ { lbls := make(map[string]string, 10) - // The first label pair is {"test", "i%100"} which is for benchmarking set matcher. - lbls["test"] = strconv.Itoa(i % 100) + // The first label pair is {"test", "i%cardinality"} which is for benchmarking set matcher. + lbls["test"] = strconv.Itoa(i % c.cardinality) for len(lbls) < 10 { lbls[randString()] = randString() } From c929d7ad0243b9a9faaea9de7f7a270cd3536466 Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Thu, 16 May 2019 11:46:07 +0800 Subject: [PATCH 14/19] update Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/querier_test.go b/querier_test.go index 5d070125..287ea04d 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1822,7 +1822,7 @@ func BenchmarkSetMatcher(b *testing.B) { } defer que.Close() - benchMsg := fmt.Sprintf("nSeries=%d,nBlocks=%d,pattern=\"%s\"", c.numSeries, c.numBlocks, c.pattern) + benchMsg := fmt.Sprintf("nSeries=%d,nBlocks=%d,cardinality=%d,pattern=\"%s\"", c.numSeries, c.numBlocks, c.cardinality, c.pattern) b.Run(benchMsg, func(b *testing.B) { b.ResetTimer() b.ReportAllocs() From 959158eb3ad63e7f2591736d7b2a26e80adbf804 Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Thu, 16 May 2019 17:07:05 +0800 Subject: [PATCH 15/19] update Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/querier_test.go b/querier_test.go index 287ea04d..25f32165 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1791,8 +1791,10 @@ func BenchmarkSetMatcher(b *testing.B) { lbls := make(map[string]string, 10) // The first label pair is {"test", "i%cardinality"} which is for benchmarking set matcher. lbls["test"] = strconv.Itoa(i % c.cardinality) + j := 1 for len(lbls) < 10 { - lbls[randString()] = randString() + lbls["labelName"+strconv.Itoa(j)] = "labelValue" + strconv.Itoa(j) + j += 1 } samples := make([]tsdbutil.Sample, 0, maxt-mint+1) for t := mint; t <= maxt; t++ { From 591ae7caab693050638bf4936427dff591ac85c6 Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Thu, 16 May 2019 17:54:48 +0800 Subject: [PATCH 16/19] update Signed-off-by: naivewong <867245430@qq.com> --- querier.go | 36 +++++++++++++++++++----------------- querier_test.go | 4 ++-- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/querier.go b/querier.go index f6f7f658..253102b0 100644 --- a/querier.go +++ b/querier.go @@ -267,49 +267,49 @@ func (q *blockQuerier) Close() error { return merr.Err() } -// Bitmap used by func special to check whether a character needs to be escaped. -var specialBytes [16]byte +// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped. +var regexMetaCharacterBytes [16]byte -// special reports whether byte b needs to be escaped. -func special(b byte) bool { - return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0 +// isRegexMetaCharacter reports whether byte b needs to be escaped. +func isRegexMetaCharacter(b byte) bool { + return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0 } func init() { for _, b := range []byte(`.+*?()|[]{}^$`) { - specialBytes[b%16] |= 1 << (b / 16) + regexMetaCharacterBytes[b%16] |= 1 << (b / 16) } } func findSetMatches(pattern string) []string { // Return empty matches if the wrapper from Prometheus is missing. if len(pattern) < 6 || pattern[:4] != "^(?:" || pattern[len(pattern)-2:] != ")$" { - return []string{} + return nil } escaped := false sets := []*strings.Builder{&strings.Builder{}} for i := 4; i < len(pattern)-2; i++ { if escaped { - // Add the escaped special character to the sets. - if special(pattern[i]) { + switch { + case isRegexMetaCharacter(pattern[i]): sets[len(sets)-1].WriteByte(pattern[i]) - } else if pattern[i] == '\\' { + case pattern[i] == '\\': sets[len(sets)-1].WriteByte('\\') - } else { - return []string{} + default: + return nil } escaped = false } else { - // Return empty sets when there are special characters excluding '|'. - if special(pattern[i]) { + switch { + case isRegexMetaCharacter(pattern[i]): if pattern[i] == '|' { sets = append(sets, &strings.Builder{}) } else { - return []string{} + return nil } - } else if pattern[i] == '\\' { + case pattern[i] == '\\': escaped = true - } else { + default: sets[len(sets)-1].WriteByte(pattern[i]) } } @@ -481,6 +481,8 @@ func postingsForSetMatcher(ix IndexReader, name string, matches []string) (index for _, match := range matches { if it, err := ix.Postings(name, match); err == nil { its = append(its, it) + } else { + return nil, err } } return index.Merge(its...), nil diff --git a/querier_test.go b/querier_test.go index 25f32165..af94a77e 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1864,12 +1864,12 @@ func TestFindSetMatches(t *testing.T) { // Simple sets containing special characters without escaping. { pattern: "^(?:fo.o|bar?|^baz)$", - exp: []string{}, + exp: nil, }, // Missing wrapper. { pattern: "foo|bar|baz", - exp: []string{}, + exp: nil, }, } From 2749e565529f2951dab594fa51b415213395bb3f Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Fri, 17 May 2019 11:08:56 +0800 Subject: [PATCH 17/19] update Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/querier_test.go b/querier_test.go index af94a77e..1b0ba271 100644 --- a/querier_test.go +++ b/querier_test.go @@ -2107,6 +2107,15 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "2.5"), }, }, + // Empty value. + { + matchers: []labels.Matcher{labels.NewMustRegexpMatcher("i", "^(?:c||d)$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), + }, + }, } ir, err := h.Index() From d30f3a2e797740be349a52b146aea7983aaf3a09 Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Fri, 17 May 2019 22:25:01 +0800 Subject: [PATCH 18/19] use genSeries from #467 Signed-off-by: naivewong <867245430@qq.com> --- block_test.go | 39 +++++++++------------------------------ querier_test.go | 1 - 2 files changed, 9 insertions(+), 31 deletions(-) diff --git a/block_test.go b/block_test.go index bdfd58fb..3ae2ec24 100644 --- a/block_test.go +++ b/block_test.go @@ -21,6 +21,7 @@ import ( "math/rand" "os" "path/filepath" + "strconv" "testing" "github.com/go-kit/kit/log" @@ -184,6 +185,11 @@ func createBlock(tb testing.TB, dir string, series []Series) string { return filepath.Join(dir, ulid.String()) } +const ( + defaultLabelName = "labelName" + defaultLabelValue = "labelValue" +) + // genSeries generates series with a given number of labels and values. func genSeries(totalSeries, labelCount int, mint, maxt int64) []Series { if totalSeries == 0 || labelCount == 0 { @@ -193,8 +199,9 @@ func genSeries(totalSeries, labelCount int, mint, maxt int64) []Series { series := make([]Series, totalSeries) for i := 0; i < totalSeries; i++ { lbls := make(map[string]string, labelCount) - for len(lbls) < labelCount { - lbls[randString()] = randString() + lbls[defaultLabelName] = strconv.Itoa(i) + for j := 1; len(lbls) < labelCount; j++ { + lbls[defaultLabelName+strconv.Itoa(j)] = defaultLabelValue + strconv.Itoa(j) } samples := make([]tsdbutil.Sample, 0, maxt-mint+1) for t := mint; t <= maxt; t++ { @@ -224,31 +231,3 @@ func populateSeries(lbls []map[string]string, mint, maxt int64) []Series { } return series } - -const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" -const ( - letterIdxBits = 6 // 6 bits to represent a letter index - letterIdxMask = 1<= 0; { - if remain == 0 { - cache, remain = rand.Int63(), letterIdxMax - } - if idx := int(cache & letterIdxMask); idx < len(letterBytes) { - b[i] = letterBytes[idx] - i-- - } - cache >>= letterIdxBits - remain-- - } - - return string(b) -} diff --git a/querier_test.go b/querier_test.go index 1b0ba271..ff27cdc1 100644 --- a/querier_test.go +++ b/querier_test.go @@ -21,7 +21,6 @@ import ( "os" "path/filepath" "sort" - "strconv" "testing" "github.com/pkg/errors" From 6049a19ad8bac905422d5ecf960459c768410695 Mon Sep 17 00:00:00 2001 From: naivewong <867245430@qq.com> Date: Fri, 17 May 2019 22:35:18 +0800 Subject: [PATCH 19/19] update Signed-off-by: naivewong <867245430@qq.com> --- querier_test.go | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/querier_test.go b/querier_test.go index ff27cdc1..dfbc6a75 100644 --- a/querier_test.go +++ b/querier_test.go @@ -1785,22 +1785,7 @@ func BenchmarkSetMatcher(b *testing.B) { mint := i * int64(c.numSamplesPerSeriesPerBlock) maxt := mint + int64(c.numSamplesPerSeriesPerBlock) - 1 if len(prefilledLabels) == 0 { - generatedSeries = make([]Series, c.numSeries) - for i := 0; i < c.numSeries; i++ { - lbls := make(map[string]string, 10) - // The first label pair is {"test", "i%cardinality"} which is for benchmarking set matcher. - lbls["test"] = strconv.Itoa(i % c.cardinality) - j := 1 - for len(lbls) < 10 { - lbls["labelName"+strconv.Itoa(j)] = "labelValue" + strconv.Itoa(j) - j += 1 - } - samples := make([]tsdbutil.Sample, 0, maxt-mint+1) - for t := mint; t <= maxt; t++ { - samples = append(samples, sample{t: t, v: rand.Float64()}) - } - generatedSeries[i] = newSeries(lbls, samples) - } + generatedSeries = genSeries(c.numSeries, 10, mint, maxt) for _, s := range generatedSeries { prefilledLabels = append(prefilledLabels, s.Labels().Map()) }