Skip to content

Commit

Permalink
Non-ingesters should not fail on its config validation (#7990)
Browse files Browse the repository at this point in the history
* mimir: non-ingesters should not fail on its config validation

Signed-off-by: Vladimir Varankin <vladimir.varankin@grafana.com>

* ingester: regroup internal constants for readability

Signed-off-by: Vladimir Varankin <vladimir.varankin@grafana.com>

* pkg/ingester: add ErrSpreadMinimizingValidation sentinel error to ring validation

Signed-off-by: Vladimir Varankin <vladimir.varankin@grafana.com>

* mimir: skip specific ingester config validation errors

Signed-off-by: Vladimir Varankin <vladimir.varankin@grafana.com>

* update CHANGELOG

Signed-off-by: Vladimir Varankin <vladimir.varankin@grafana.com>

---------

Signed-off-by: Vladimir Varankin <vladimir.varankin@grafana.com>
  • Loading branch information
narqo committed Apr 30, 2024
1 parent 48b05ed commit c55ed05
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 45 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
* [BUGFIX] Ingester: do not reuse exemplars slice in the write request if there are more than 10 exemplars per series. This should help to reduce the in-use memory in case of few requests with a very large number of exemplars. #7936
* [BUGFIX] Distributor: fix down scaling of native histograms in the distributor when timeseries unmarshal cache is in use. #7947
* [BUGFIX] Distributor: fix cardinality API to return more accurate number of in-memory series when number of zones is larger than replication factor. #7984
* [BUGFIX] All: fix config validation for non-ingester modules, when ingester's ring is configured with spread-minimizing token generation strategy. #7990

### Mixin

Expand Down
51 changes: 30 additions & 21 deletions pkg/ingester/ingester_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,22 @@ import (
)

const (
// sharedOptionWithRingClient is a message appended to all config options that should be also
// set on the components running the ingester ring client.
sharedOptionWithRingClient = " This option needs be set on ingesters, distributors, queriers and rulers when running in microservices mode."
tokensFilePathFlag = "tokens-file-path"
tokenGenerationStrategyFlag = "token-generation-strategy"
randomTokenGeneration = "random"
spreadMinimizingTokenGeneration = "spread-minimizing"
spreadMinimizingJoinRingInOrderFlag = "spread-minimizing-join-ring-in-order"
flagTokensFilePath = "tokens-file-path"
flagTokenGenerationStrategy = "token-generation-strategy"
flagSpreadMinimizingJoinRingInOrder = "spread-minimizing-join-ring-in-order"
// allowed values for token-generation-strategy
tokenGenerationRandom = "random"
tokenGenerationSpreadMinimizing = "spread-minimizing"
)

// sharedOptionWithRingClient is a message appended to all config options that should be also
// set on the components running the ingester ring client.
const sharedOptionWithRingClient = " This option needs be set on ingesters, distributors, queriers and rulers when running in microservices mode."

// ErrSpreadMinimizingValidation is a sentinel error that indicates a failure
// in the validation of spread minimizing token generation config.
var ErrSpreadMinimizingValidation = fmt.Errorf("%q token generation strategy is misconfigured", tokenGenerationSpreadMinimizing)

type RingConfig struct {
KVStore kv.Config `yaml:"kvstore" doc:"description=The key-value store used to share the hash ring across multiple instances. This option needs be set on ingesters, distributors, queriers and rulers when running in microservices mode."`
HeartbeatPeriod time.Duration `yaml:"heartbeat_period" category:"advanced"`
Expand Down Expand Up @@ -67,21 +73,24 @@ type RingConfig struct {
}

func (cfg *RingConfig) Validate() error {
if cfg.TokenGenerationStrategy != randomTokenGeneration && cfg.TokenGenerationStrategy != spreadMinimizingTokenGeneration {
return fmt.Errorf("unsupported token generation strategy (%q) has been chosen for %s", cfg.TokenGenerationStrategy, tokenGenerationStrategyFlag)
if cfg.TokenGenerationStrategy != tokenGenerationRandom && cfg.TokenGenerationStrategy != tokenGenerationSpreadMinimizing {
return fmt.Errorf("unsupported token generation strategy (%q) has been chosen for %s", cfg.TokenGenerationStrategy, flagTokenGenerationStrategy)
}

if cfg.TokenGenerationStrategy == spreadMinimizingTokenGeneration {
if cfg.TokenGenerationStrategy == tokenGenerationSpreadMinimizing {
if cfg.TokensFilePath != "" {
return fmt.Errorf("%q token generation strategy requires %q to be empty", spreadMinimizingTokenGeneration, tokensFilePathFlag)
return fmt.Errorf("%w: strategy requires %q to be empty", ErrSpreadMinimizingValidation, flagTokensFilePath)
}
_, err := ring.NewSpreadMinimizingTokenGenerator(cfg.InstanceID, cfg.InstanceZone, cfg.SpreadMinimizingZones, cfg.SpreadMinimizingJoinRingInOrder)
return err
if err != nil {
return fmt.Errorf("%w: %w", ErrSpreadMinimizingValidation, err)
}
return nil
}

// at this point cfg.TokenGenerationStrategy is not spreadMinimizingTokenGeneration
// at this point cfg.TokenGenerationStrategy is not tokenGenerationSpreadMinimizing
if cfg.SpreadMinimizingJoinRingInOrder {
return fmt.Errorf("%q must be false when using %q token generation strategy", spreadMinimizingJoinRingInOrderFlag, cfg.TokenGenerationStrategy)
return fmt.Errorf("%q must be false when using %q token generation strategy", flagSpreadMinimizingJoinRingInOrder, cfg.TokenGenerationStrategy)
}

return nil
Expand All @@ -107,7 +116,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
f.BoolVar(&cfg.ZoneAwarenessEnabled, prefix+"zone-awareness-enabled", false, "True to enable the zone-awareness and replicate ingested samples across different availability zones."+sharedOptionWithRingClient)
f.Var(&cfg.ExcludedZones, prefix+"excluded-zones", "Comma-separated list of zones to exclude from the ring. Instances in excluded zones will be filtered out from the ring."+sharedOptionWithRingClient)

f.StringVar(&cfg.TokensFilePath, prefix+tokensFilePathFlag, "", fmt.Sprintf("File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup. Must be empty if -%s is set to %q.", prefix+tokenGenerationStrategyFlag, spreadMinimizingTokenGeneration))
f.StringVar(&cfg.TokensFilePath, prefix+flagTokensFilePath, "", fmt.Sprintf("File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup. Must be empty if -%s is set to %q.", prefix+flagTokenGenerationStrategy, tokenGenerationSpreadMinimizing))
f.IntVar(&cfg.NumTokens, prefix+"num-tokens", 128, "Number of tokens for each ingester.")

// Instance flags
Expand All @@ -128,9 +137,9 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
f.DurationVar(&cfg.FinalSleep, prefix+"final-sleep", 0, "Duration to sleep for before exiting, to ensure metrics are scraped.")

// TokenGenerator
f.StringVar(&cfg.TokenGenerationStrategy, prefix+tokenGenerationStrategyFlag, randomTokenGeneration, fmt.Sprintf("Specifies the strategy used for generating tokens for ingesters. Supported values are: %s.", strings.Join([]string{randomTokenGeneration, spreadMinimizingTokenGeneration}, ",")))
f.BoolVar(&cfg.SpreadMinimizingJoinRingInOrder, prefix+spreadMinimizingJoinRingInOrderFlag, false, fmt.Sprintf("True to allow this ingester registering tokens in the ring only after all previous ingesters (with ID lower than the current one) have already been registered. This configuration option is supported only when the token generation strategy is set to %q.", spreadMinimizingTokenGeneration))
f.Var(&cfg.SpreadMinimizingZones, prefix+"spread-minimizing-zones", fmt.Sprintf("Comma-separated list of zones in which spread minimizing strategy is used for token generation. This value must include all zones in which ingesters are deployed, and must not change over time. This configuration is used only when %q is set to %q.", tokenGenerationStrategyFlag, spreadMinimizingTokenGeneration))
f.StringVar(&cfg.TokenGenerationStrategy, prefix+flagTokenGenerationStrategy, tokenGenerationRandom, fmt.Sprintf("Specifies the strategy used for generating tokens for ingesters. Supported values are: %s.", strings.Join([]string{tokenGenerationRandom, tokenGenerationSpreadMinimizing}, ",")))
f.BoolVar(&cfg.SpreadMinimizingJoinRingInOrder, prefix+flagSpreadMinimizingJoinRingInOrder, false, fmt.Sprintf("True to allow this ingester registering tokens in the ring only after all previous ingesters (with ID lower than the current one) have already been registered. This configuration option is supported only when the token generation strategy is set to %q.", tokenGenerationSpreadMinimizing))
f.Var(&cfg.SpreadMinimizingZones, prefix+"spread-minimizing-zones", fmt.Sprintf("Comma-separated list of zones in which spread minimizing strategy is used for token generation. This value must include all zones in which ingesters are deployed, and must not change over time. This configuration is used only when %q is set to %q.", flagTokenGenerationStrategy, tokenGenerationSpreadMinimizing))
}

// ToRingConfig returns a ring.Config based on the ingester
Expand Down Expand Up @@ -187,10 +196,10 @@ func (cfg *RingConfig) ToLifecyclerConfig() ring.LifecyclerConfig {
// an instance of ring.RandomTokenGenerator. Otherwise, nil is returned.
func (cfg *RingConfig) customTokenGenerator() ring.TokenGenerator {
switch cfg.TokenGenerationStrategy {
case spreadMinimizingTokenGeneration:
case tokenGenerationSpreadMinimizing:
tokenGenerator, _ := ring.NewSpreadMinimizingTokenGenerator(cfg.InstanceID, cfg.InstanceZone, cfg.SpreadMinimizingZones, cfg.SpreadMinimizingJoinRingInOrder)
return tokenGenerator
case randomTokenGeneration:
case tokenGenerationRandom:
return ring.NewRandomTokenGenerator()
default:
return nil
Expand Down
63 changes: 40 additions & 23 deletions pkg/ingester/ingester_ring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func TestRingConfig_CustomConfigToLifecyclerConfig(t *testing.T) {
cfg.MinReadyDuration = 3 * time.Minute
cfg.FinalSleep = 2 * time.Minute
cfg.ListenPort = 10
cfg.TokenGenerationStrategy = spreadMinimizingTokenGeneration
cfg.TokenGenerationStrategy = tokenGenerationSpreadMinimizing
cfg.SpreadMinimizingZones = []string{"zone-a", "zone-b", "zone-c"}
cfg.SpreadMinimizingJoinRingInOrder = true

Expand Down Expand Up @@ -114,6 +114,7 @@ func TestRingConfig_CustomConfigToLifecyclerConfig(t *testing.T) {

func TestRingConfig_Validate(t *testing.T) {
tests := map[string]struct {
instanceID string
zone string
tokenGenerationStrategy string
spreadMinimizingJoinRingInOrder bool
Expand All @@ -122,59 +123,75 @@ func TestRingConfig_Validate(t *testing.T) {
tokensFilePath string
}{
"spread-minimizing and correct zones pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
},
"spread-minimizing and a wrong instance-id don't pass validation": {
instanceID: "broken-instance-48pgb", // InstanceID is expected to end up with a number.
zone: instanceZone,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
expectedError: fmt.Errorf("%s: unable to extract instance id", ErrSpreadMinimizingValidation),
},
"spread-minimizing and no spread-minimizing-zones zone don't pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
expectedError: fmt.Errorf("number of zones 0 is not correct: it must be greater than 0 and less or equal than 8"),
},
"spread-minimizing and a wrong zone don't pass validation": {
instanceID: instanceID,
zone: wrongZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
expectedError: fmt.Errorf("zone %s is not valid", wrongZone),
},
"spread-minimizing and tokens-file-path set don't pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
tokensFilePath: "/path/tokens",
expectedError: fmt.Errorf("%q token generation strategy requires %q to be empty", spreadMinimizingTokenGeneration, tokensFilePathFlag),
expectedError: fmt.Errorf("strategy requires %q to be empty", flagTokensFilePath),
},
"spread-minimizing and spread-minimizing-join-ring-in-order pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
spreadMinimizingJoinRingInOrder: true,
},
"random passes validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: randomTokenGeneration,
tokenGenerationStrategy: tokenGenerationRandom,
},
"random and tokens-file-path set pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: randomTokenGeneration,
tokenGenerationStrategy: tokenGenerationRandom,
tokensFilePath: "/path/tokens",
},
"random and spread-minimizing-join-ring-in-order don't pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: randomTokenGeneration,
tokenGenerationStrategy: tokenGenerationRandom,
spreadMinimizingJoinRingInOrder: true,
expectedError: fmt.Errorf("%q must be false when using %q token generation strategy", spreadMinimizingJoinRingInOrderFlag, randomTokenGeneration),
expectedError: fmt.Errorf("%q must be false when using %q token generation strategy", flagSpreadMinimizingJoinRingInOrder, tokenGenerationRandom),
},
"unknown token generation doesn't pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: "bla-bla-tokens",
expectedError: fmt.Errorf("unsupported token generation strategy (%q) has been chosen for %s", "bla-bla-tokens", tokenGenerationStrategyFlag),
expectedError: fmt.Errorf("unsupported token generation strategy (%q) has been chosen for %s", "bla-bla-tokens", flagTokenGenerationStrategy),
},
}

for _, testData := range tests {
cfg := RingConfig{}
cfg.InstanceID = instanceID
cfg.InstanceID = testData.instanceID
cfg.InstanceZone = testData.zone
cfg.TokenGenerationStrategy = testData.tokenGenerationStrategy
cfg.SpreadMinimizingJoinRingInOrder = testData.spreadMinimizingJoinRingInOrder
Expand All @@ -185,7 +202,7 @@ func TestRingConfig_Validate(t *testing.T) {
require.NoError(t, err)
} else {
require.Error(t, err)
require.Equal(t, testData.expectedError, err)
require.ErrorContains(t, err, testData.expectedError.Error())
}
}
}
Expand All @@ -199,14 +216,14 @@ func TestRingConfig_CustomTokenGenerator(t *testing.T) {
}{
"spread-minimizing and correct zones give a SpreadMinimizingTokenGenerator": {
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
expectedResultStrategy: spreadMinimizingTokenGeneration,
expectedResultStrategy: tokenGenerationSpreadMinimizing,
},
"random gives a RandomTokenGenerator": {
zone: instanceZone,
tokenGenerationStrategy: randomTokenGeneration,
expectedResultStrategy: randomTokenGeneration,
tokenGenerationStrategy: tokenGenerationRandom,
expectedResultStrategy: tokenGenerationRandom,
},
}

Expand All @@ -217,11 +234,11 @@ func TestRingConfig_CustomTokenGenerator(t *testing.T) {
cfg.TokenGenerationStrategy = testData.tokenGenerationStrategy
cfg.SpreadMinimizingZones = testData.spreadMinimizingZones
lifecyclerConfig := cfg.ToLifecyclerConfig()
if testData.expectedResultStrategy == randomTokenGeneration {
if testData.expectedResultStrategy == tokenGenerationRandom {
tokenGenerator, ok := lifecyclerConfig.RingTokenGenerator.(*ring.RandomTokenGenerator)
assert.True(t, ok)
assert.NotNil(t, tokenGenerator)
} else if testData.expectedResultStrategy == spreadMinimizingTokenGeneration {
} else if testData.expectedResultStrategy == tokenGenerationSpreadMinimizing {
tokenGenerator, ok := lifecyclerConfig.RingTokenGenerator.(*ring.SpreadMinimizingTokenGenerator)
assert.True(t, ok)
assert.NotNil(t, tokenGenerator)
Expand All @@ -240,19 +257,19 @@ func TestRingConfig_SpreadMinimizingJoinRingInOrder(t *testing.T) {
}{
"spread-minimizing and spread-minimizing-join-ring-in-order allow can join": {
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingJoinRingInOrder: true,
expectedCanJoinEnabled: true,
},
"spread-minimizing without spread-minimizing-join-ring-in-order doesn't allow can join": {
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingJoinRingInOrder: false,
expectedCanJoinEnabled: false,
},
"random doesn't allow can join": {
zone: instanceZone,
tokenGenerationStrategy: randomTokenGeneration,
tokenGenerationStrategy: tokenGenerationRandom,
expectedCanJoinEnabled: false,
},
}
Expand Down
9 changes: 8 additions & 1 deletion pkg/mimir/mimir.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,14 @@ func (c *Config) Validate(log log.Logger) error {
return errors.Wrap(err, "invalid ingester_client config")
}
if err := c.Ingester.Validate(log); err != nil {
return errors.Wrap(err, "invalid ingester config")
// We check for "ingester" module here because, as of today, its config has a special mode, that assumes
// passing a unique set of per instance flags, e.g. "-ingester.ring.instance-id".
// Such a scenario breaks the validation of other modules if those flags aren't also passed to each instance (ref
// grafana/mimir#7822). Otherwise, log the fact and move on.
if c.isAnyModuleEnabled(Ingester, Write, All) || !errors.Is(err, ingester.ErrSpreadMinimizingValidation) {
return errors.Wrap(err, "invalid ingester config")
}
level.Debug(log).Log("ingester config is invalid; moving on because the \"ingester\" module is not in this process's targets", "err", err.Error())
}
if err := c.Worker.Validate(); err != nil {
return errors.Wrap(err, "invalid frontend_worker config")
Expand Down
28 changes: 28 additions & 0 deletions pkg/mimir/mimir_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,34 @@ func TestConfigValidation(t *testing.T) {
},
expectAnyError: true,
},
{
name: "should fail if ingester ring is misconfigured with spread-minimizing token generation strategy when target is ingester",
getTestConfig: func() *Config {
cfg := newDefaultConfig()
_ = cfg.Target.Set("ingester")
cfg.Ingester.IngesterRing.InstanceID = "" // empty string is not a valid instance-id
cfg.Ingester.IngesterRing.InstanceZone = "zone-1"
cfg.Ingester.IngesterRing.SpreadMinimizingZones = []string{"zone-1", "zone-2"}
cfg.Ingester.IngesterRing.TokenGenerationStrategy = "spread-minimizing"

return cfg
},
expectAnyError: true,
},
{
name: "should pass if ingester ring is misconfigured with spread-minimizing token generation strategy when target is not ingester",
getTestConfig: func() *Config {
cfg := newDefaultConfig()
_ = cfg.Target.Set("distributor")
cfg.Ingester.IngesterRing.InstanceID = "" // empty string is not a valid instance-id
cfg.Ingester.IngesterRing.InstanceZone = "zone-1"
cfg.Ingester.IngesterRing.SpreadMinimizingZones = []string{"zone-1", "zone-2"}
cfg.Ingester.IngesterRing.TokenGenerationStrategy = "spread-minimizing"

return cfg
},
expectAnyError: false,
},
} {
t.Run(tc.name, func(t *testing.T) {
err := tc.getTestConfig().Validate(log.NewNopLogger())
Expand Down

0 comments on commit c55ed05

Please sign in to comment.