Skip to content

Commit

Permalink
Fix sharding of vector and time functions (#2355) (#2361)
Browse files Browse the repository at this point in the history
Functions `vector` and `time` can't be sharded, as they produce same set
of series (with zero labels) and can't be concatenated later.

Additionally, functions that have a default `time()` argument,
like `month()`, were not propagating the shardeability checks properly.

Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
(cherry picked from commit 27154b3)
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
  • Loading branch information
colega committed Jul 11, 2022
1 parent d2c82dd commit f81037e
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Mimirtool

* [BUGFIX] Make mimirtool build for Windows work again. #2273
* [BUGFIX] Query-frontend: `vector` and `time` functions were sharded, which made expressions like `vector(1) > 0 and vector(1)` fail. #2355

## 2.2.0-rc.0

Expand Down
29 changes: 28 additions & 1 deletion pkg/frontend/querymiddleware/astmapper/parallel.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,21 @@ var NonParallelFuncs = []string{
"histogram_quantile",
"sort_desc",
"sort",
"time",
"vector",
}

// FuncsWithDefaultTimeArg is the list of functions that extract date information from a variadic list of params,
// which defaults to be just time() otherwise.
var FuncsWithDefaultTimeArg = []string{
"day_of_month",
"day_of_week",
"day_of_year",
"days_in_month",
"hour",
"minute",
"month",
"year",
}

// CanParallelize tests if a subtree is parallelizable.
Expand Down Expand Up @@ -81,7 +96,7 @@ func CanParallelize(node parser.Node, logger log.Logger) bool {
return false
}

for _, e := range n.Args {
for _, e := range argsWithDefaults(n) {
if !CanParallelize(e, logger) {
return false
}
Expand Down Expand Up @@ -133,6 +148,18 @@ func ParallelizableFunc(f parser.Function) bool {
return true
}

// argsWithDefaults returns the arguments of the call, including the omitted defaults.
func argsWithDefaults(call *parser.Call) parser.Expressions {
for _, fn := range FuncsWithDefaultTimeArg {
if fn == call.Func.Name && len(call.Args) == 0 {
return parser.Expressions{
&parser.Call{Func: parser.Functions["time"]},
}
}
}
return call.Args
}

func noAggregates(n parser.Node) bool {
hasAggregates, _ := anyNode(n, isAggregateExpr)
return !hasAggregates
Expand Down
21 changes: 21 additions & 0 deletions pkg/frontend/querymiddleware/astmapper/parallel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,24 @@ func TestCanParallel_String(t *testing.T) {
})
}
}

func TestFunctionsWithDefaultsIsUpToDate(t *testing.T) {
for name, f := range parser.Functions {
t.Run(name, func(t *testing.T) {
if f.Variadic == 0 {
return
}
if f.Name == "label_join" {
// label_join has no defaults, it just accepts any number of labels
return
}
if f.Name == "round" {
// round has a default value for the second scalar value, which is not relevant for sharding purposes.
return
}

// Rest of the functions with known defaults are functions with a default time() argument.
require.Containsf(t, FuncsWithDefaultTimeArg, name, "Function %q has variable arguments, and it's not in the list of functions with default time() argument.")
})
}
}
10 changes: 10 additions & 0 deletions pkg/frontend/querymiddleware/astmapper/sharding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,16 @@ func TestShardSummer(t *testing.T) {
`)`,
expectedShardedQueries: 6,
},
{
in: `vector(1) > 0 and vector(1)`,
out: `vector(1) > 0 and vector(1)`,
expectedShardedQueries: 0,
},
{
in: `sum(foo) > 0 and vector(1)`,
out: `sum(` + concatShards(3, `sum(foo{__query_shard__="x_of_y"})`) + `) > 0 and vector(1)`,
expectedShardedQueries: 3,
},
} {
tt := tt

Expand Down
32 changes: 32 additions & 0 deletions pkg/frontend/querymiddleware/querysharding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,38 @@ func TestQueryShardingCorrectness(t *testing.T) {
expectedShardedQueries: 0,
noRangeQuery: true,
},
"day_of_month() >= 1 and day_of_month()": {
query: `day_of_month() >= 1 and day_of_month()`,
expectedShardedQueries: 0,
},
"month() >= 1 and month()": {
query: `month() >= 1 and month()`,
expectedShardedQueries: 0,
},
"vector(1) > 0 and vector(1)": {
query: `vector(1) > 0 and vector(1)`,
expectedShardedQueries: 0,
},
"sum(metric_counter) > 0 and vector(1)": {
query: `sum(metric_counter) > 0 and vector(1)`,
expectedShardedQueries: 1,
},
"vector(1)": {
query: `vector(1)`,
expectedShardedQueries: 0,
},
"time()": {
query: `time()`,
expectedShardedQueries: 0,
},
"month(sum(metric_counter))": {
query: `month(sum(metric_counter))`,
expectedShardedQueries: 1, // Sharded because the contents of `sum()` is sharded.
},
"month(sum(metric_counter)) > 0 and vector(1)": {
query: `month(sum(metric_counter)) > 0 and vector(1)`,
expectedShardedQueries: 1, // Sharded because the contents of `sum()` is sharded.
},
}

series := make([]*promql.StorageSeries, 0, numSeries+(numHistograms*len(histogramBuckets)))
Expand Down

0 comments on commit f81037e

Please sign in to comment.