Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Non-ingesters should not fail on its config validation #7990

Merged
merged 5 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
* [BUGFIX] Ingester: do not reuse exemplars slice in the write request if there are more than 10 exemplars per series. This should help to reduce the in-use memory in case of few requests with a very large number of exemplars. #7936
* [BUGFIX] Distributor: fix down scaling of native histograms in the distributor when timeseries unmarshal cache is in use. #7947
* [BUGFIX] Distributor: fix cardinality API to return more accurate number of in-memory series when number of zones is larger than replication factor. #7984
* [BUGFIX] All: fix config validation for non-ingester modules, when ingester's ring is configured with spread-minimizing token generation strategy. #7990

### Mixin

Expand Down
51 changes: 30 additions & 21 deletions pkg/ingester/ingester_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,22 @@ import (
)

const (
// sharedOptionWithRingClient is a message appended to all config options that should be also
// set on the components running the ingester ring client.
sharedOptionWithRingClient = " This option needs be set on ingesters, distributors, queriers and rulers when running in microservices mode."
tokensFilePathFlag = "tokens-file-path"
tokenGenerationStrategyFlag = "token-generation-strategy"
randomTokenGeneration = "random"
spreadMinimizingTokenGeneration = "spread-minimizing"
spreadMinimizingJoinRingInOrderFlag = "spread-minimizing-join-ring-in-order"
flagTokensFilePath = "tokens-file-path"
flagTokenGenerationStrategy = "token-generation-strategy"
flagSpreadMinimizingJoinRingInOrder = "spread-minimizing-join-ring-in-order"
// allowed values for token-generation-strategy
tokenGenerationRandom = "random"
tokenGenerationSpreadMinimizing = "spread-minimizing"
)

// sharedOptionWithRingClient is a message appended to all config options that should be also
// set on the components running the ingester ring client.
const sharedOptionWithRingClient = " This option needs be set on ingesters, distributors, queriers and rulers when running in microservices mode."

// ErrSpreadMinimizingValidation is a sentinel error that indicates a failure
// in the validation of spread minimizing token generation config.
var ErrSpreadMinimizingValidation = fmt.Errorf("%q token generation strategy is misconfigured", tokenGenerationSpreadMinimizing)

type RingConfig struct {
KVStore kv.Config `yaml:"kvstore" doc:"description=The key-value store used to share the hash ring across multiple instances. This option needs be set on ingesters, distributors, queriers and rulers when running in microservices mode."`
HeartbeatPeriod time.Duration `yaml:"heartbeat_period" category:"advanced"`
Expand Down Expand Up @@ -67,21 +73,24 @@ type RingConfig struct {
}

func (cfg *RingConfig) Validate() error {
if cfg.TokenGenerationStrategy != randomTokenGeneration && cfg.TokenGenerationStrategy != spreadMinimizingTokenGeneration {
return fmt.Errorf("unsupported token generation strategy (%q) has been chosen for %s", cfg.TokenGenerationStrategy, tokenGenerationStrategyFlag)
if cfg.TokenGenerationStrategy != tokenGenerationRandom && cfg.TokenGenerationStrategy != tokenGenerationSpreadMinimizing {
return fmt.Errorf("unsupported token generation strategy (%q) has been chosen for %s", cfg.TokenGenerationStrategy, flagTokenGenerationStrategy)
}

if cfg.TokenGenerationStrategy == spreadMinimizingTokenGeneration {
if cfg.TokenGenerationStrategy == tokenGenerationSpreadMinimizing {
if cfg.TokensFilePath != "" {
return fmt.Errorf("%q token generation strategy requires %q to be empty", spreadMinimizingTokenGeneration, tokensFilePathFlag)
return fmt.Errorf("%w: strategy requires %q to be empty", ErrSpreadMinimizingValidation, flagTokensFilePath)
}
_, err := ring.NewSpreadMinimizingTokenGenerator(cfg.InstanceID, cfg.InstanceZone, cfg.SpreadMinimizingZones, cfg.SpreadMinimizingJoinRingInOrder)
return err
if err != nil {
return fmt.Errorf("%w: %w", ErrSpreadMinimizingValidation, err)
}
return nil
}

// at this point cfg.TokenGenerationStrategy is not spreadMinimizingTokenGeneration
// at this point cfg.TokenGenerationStrategy is not tokenGenerationSpreadMinimizing
if cfg.SpreadMinimizingJoinRingInOrder {
return fmt.Errorf("%q must be false when using %q token generation strategy", spreadMinimizingJoinRingInOrderFlag, cfg.TokenGenerationStrategy)
return fmt.Errorf("%q must be false when using %q token generation strategy", flagSpreadMinimizingJoinRingInOrder, cfg.TokenGenerationStrategy)
}

return nil
Expand All @@ -107,7 +116,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
f.BoolVar(&cfg.ZoneAwarenessEnabled, prefix+"zone-awareness-enabled", false, "True to enable the zone-awareness and replicate ingested samples across different availability zones."+sharedOptionWithRingClient)
f.Var(&cfg.ExcludedZones, prefix+"excluded-zones", "Comma-separated list of zones to exclude from the ring. Instances in excluded zones will be filtered out from the ring."+sharedOptionWithRingClient)

f.StringVar(&cfg.TokensFilePath, prefix+tokensFilePathFlag, "", fmt.Sprintf("File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup. Must be empty if -%s is set to %q.", prefix+tokenGenerationStrategyFlag, spreadMinimizingTokenGeneration))
f.StringVar(&cfg.TokensFilePath, prefix+flagTokensFilePath, "", fmt.Sprintf("File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup. Must be empty if -%s is set to %q.", prefix+flagTokenGenerationStrategy, tokenGenerationSpreadMinimizing))
f.IntVar(&cfg.NumTokens, prefix+"num-tokens", 128, "Number of tokens for each ingester.")

// Instance flags
Expand All @@ -128,9 +137,9 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
f.DurationVar(&cfg.FinalSleep, prefix+"final-sleep", 0, "Duration to sleep for before exiting, to ensure metrics are scraped.")

// TokenGenerator
f.StringVar(&cfg.TokenGenerationStrategy, prefix+tokenGenerationStrategyFlag, randomTokenGeneration, fmt.Sprintf("Specifies the strategy used for generating tokens for ingesters. Supported values are: %s.", strings.Join([]string{randomTokenGeneration, spreadMinimizingTokenGeneration}, ",")))
f.BoolVar(&cfg.SpreadMinimizingJoinRingInOrder, prefix+spreadMinimizingJoinRingInOrderFlag, false, fmt.Sprintf("True to allow this ingester registering tokens in the ring only after all previous ingesters (with ID lower than the current one) have already been registered. This configuration option is supported only when the token generation strategy is set to %q.", spreadMinimizingTokenGeneration))
f.Var(&cfg.SpreadMinimizingZones, prefix+"spread-minimizing-zones", fmt.Sprintf("Comma-separated list of zones in which spread minimizing strategy is used for token generation. This value must include all zones in which ingesters are deployed, and must not change over time. This configuration is used only when %q is set to %q.", tokenGenerationStrategyFlag, spreadMinimizingTokenGeneration))
f.StringVar(&cfg.TokenGenerationStrategy, prefix+flagTokenGenerationStrategy, tokenGenerationRandom, fmt.Sprintf("Specifies the strategy used for generating tokens for ingesters. Supported values are: %s.", strings.Join([]string{tokenGenerationRandom, tokenGenerationSpreadMinimizing}, ",")))
f.BoolVar(&cfg.SpreadMinimizingJoinRingInOrder, prefix+flagSpreadMinimizingJoinRingInOrder, false, fmt.Sprintf("True to allow this ingester registering tokens in the ring only after all previous ingesters (with ID lower than the current one) have already been registered. This configuration option is supported only when the token generation strategy is set to %q.", tokenGenerationSpreadMinimizing))
f.Var(&cfg.SpreadMinimizingZones, prefix+"spread-minimizing-zones", fmt.Sprintf("Comma-separated list of zones in which spread minimizing strategy is used for token generation. This value must include all zones in which ingesters are deployed, and must not change over time. This configuration is used only when %q is set to %q.", flagTokenGenerationStrategy, tokenGenerationSpreadMinimizing))
}

// ToRingConfig returns a ring.Config based on the ingester
Expand Down Expand Up @@ -187,10 +196,10 @@ func (cfg *RingConfig) ToLifecyclerConfig() ring.LifecyclerConfig {
// an instance of ring.RandomTokenGenerator. Otherwise, nil is returned.
func (cfg *RingConfig) customTokenGenerator() ring.TokenGenerator {
switch cfg.TokenGenerationStrategy {
case spreadMinimizingTokenGeneration:
case tokenGenerationSpreadMinimizing:
tokenGenerator, _ := ring.NewSpreadMinimizingTokenGenerator(cfg.InstanceID, cfg.InstanceZone, cfg.SpreadMinimizingZones, cfg.SpreadMinimizingJoinRingInOrder)
return tokenGenerator
case randomTokenGeneration:
case tokenGenerationRandom:
return ring.NewRandomTokenGenerator()
default:
return nil
Expand Down
63 changes: 40 additions & 23 deletions pkg/ingester/ingester_ring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func TestRingConfig_CustomConfigToLifecyclerConfig(t *testing.T) {
cfg.MinReadyDuration = 3 * time.Minute
cfg.FinalSleep = 2 * time.Minute
cfg.ListenPort = 10
cfg.TokenGenerationStrategy = spreadMinimizingTokenGeneration
cfg.TokenGenerationStrategy = tokenGenerationSpreadMinimizing
cfg.SpreadMinimizingZones = []string{"zone-a", "zone-b", "zone-c"}
cfg.SpreadMinimizingJoinRingInOrder = true

Expand Down Expand Up @@ -114,6 +114,7 @@ func TestRingConfig_CustomConfigToLifecyclerConfig(t *testing.T) {

func TestRingConfig_Validate(t *testing.T) {
tests := map[string]struct {
instanceID string
zone string
tokenGenerationStrategy string
spreadMinimizingJoinRingInOrder bool
Expand All @@ -122,59 +123,75 @@ func TestRingConfig_Validate(t *testing.T) {
tokensFilePath string
}{
"spread-minimizing and correct zones pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
},
"spread-minimizing and a wrong instance-id don't pass validation": {
instanceID: "broken-instance-48pgb", // InstanceID is expected to end up with a number.
zone: instanceZone,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
expectedError: fmt.Errorf("%s: unable to extract instance id", ErrSpreadMinimizingValidation),
},
"spread-minimizing and no spread-minimizing-zones zone don't pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
expectedError: fmt.Errorf("number of zones 0 is not correct: it must be greater than 0 and less or equal than 8"),
},
"spread-minimizing and a wrong zone don't pass validation": {
instanceID: instanceID,
zone: wrongZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
expectedError: fmt.Errorf("zone %s is not valid", wrongZone),
},
"spread-minimizing and tokens-file-path set don't pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
tokensFilePath: "/path/tokens",
expectedError: fmt.Errorf("%q token generation strategy requires %q to be empty", spreadMinimizingTokenGeneration, tokensFilePathFlag),
expectedError: fmt.Errorf("strategy requires %q to be empty", flagTokensFilePath),
},
"spread-minimizing and spread-minimizing-join-ring-in-order pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
spreadMinimizingJoinRingInOrder: true,
},
"random passes validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: randomTokenGeneration,
tokenGenerationStrategy: tokenGenerationRandom,
},
"random and tokens-file-path set pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: randomTokenGeneration,
tokenGenerationStrategy: tokenGenerationRandom,
tokensFilePath: "/path/tokens",
},
"random and spread-minimizing-join-ring-in-order don't pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: randomTokenGeneration,
tokenGenerationStrategy: tokenGenerationRandom,
spreadMinimizingJoinRingInOrder: true,
expectedError: fmt.Errorf("%q must be false when using %q token generation strategy", spreadMinimizingJoinRingInOrderFlag, randomTokenGeneration),
expectedError: fmt.Errorf("%q must be false when using %q token generation strategy", flagSpreadMinimizingJoinRingInOrder, tokenGenerationRandom),
},
"unknown token generation doesn't pass validation": {
instanceID: instanceID,
zone: instanceZone,
tokenGenerationStrategy: "bla-bla-tokens",
expectedError: fmt.Errorf("unsupported token generation strategy (%q) has been chosen for %s", "bla-bla-tokens", tokenGenerationStrategyFlag),
expectedError: fmt.Errorf("unsupported token generation strategy (%q) has been chosen for %s", "bla-bla-tokens", flagTokenGenerationStrategy),
},
}

for _, testData := range tests {
cfg := RingConfig{}
cfg.InstanceID = instanceID
cfg.InstanceID = testData.instanceID
cfg.InstanceZone = testData.zone
cfg.TokenGenerationStrategy = testData.tokenGenerationStrategy
cfg.SpreadMinimizingJoinRingInOrder = testData.spreadMinimizingJoinRingInOrder
Expand All @@ -185,7 +202,7 @@ func TestRingConfig_Validate(t *testing.T) {
require.NoError(t, err)
} else {
require.Error(t, err)
require.Equal(t, testData.expectedError, err)
require.ErrorContains(t, err, testData.expectedError.Error())
}
}
}
Expand All @@ -199,14 +216,14 @@ func TestRingConfig_CustomTokenGenerator(t *testing.T) {
}{
"spread-minimizing and correct zones give a SpreadMinimizingTokenGenerator": {
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingZones: spreadMinimizingZones,
expectedResultStrategy: spreadMinimizingTokenGeneration,
expectedResultStrategy: tokenGenerationSpreadMinimizing,
},
"random gives a RandomTokenGenerator": {
zone: instanceZone,
tokenGenerationStrategy: randomTokenGeneration,
expectedResultStrategy: randomTokenGeneration,
tokenGenerationStrategy: tokenGenerationRandom,
expectedResultStrategy: tokenGenerationRandom,
},
}

Expand All @@ -217,11 +234,11 @@ func TestRingConfig_CustomTokenGenerator(t *testing.T) {
cfg.TokenGenerationStrategy = testData.tokenGenerationStrategy
cfg.SpreadMinimizingZones = testData.spreadMinimizingZones
lifecyclerConfig := cfg.ToLifecyclerConfig()
if testData.expectedResultStrategy == randomTokenGeneration {
if testData.expectedResultStrategy == tokenGenerationRandom {
tokenGenerator, ok := lifecyclerConfig.RingTokenGenerator.(*ring.RandomTokenGenerator)
assert.True(t, ok)
assert.NotNil(t, tokenGenerator)
} else if testData.expectedResultStrategy == spreadMinimizingTokenGeneration {
} else if testData.expectedResultStrategy == tokenGenerationSpreadMinimizing {
tokenGenerator, ok := lifecyclerConfig.RingTokenGenerator.(*ring.SpreadMinimizingTokenGenerator)
assert.True(t, ok)
assert.NotNil(t, tokenGenerator)
Expand All @@ -240,19 +257,19 @@ func TestRingConfig_SpreadMinimizingJoinRingInOrder(t *testing.T) {
}{
"spread-minimizing and spread-minimizing-join-ring-in-order allow can join": {
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingJoinRingInOrder: true,
expectedCanJoinEnabled: true,
},
"spread-minimizing without spread-minimizing-join-ring-in-order doesn't allow can join": {
zone: instanceZone,
tokenGenerationStrategy: spreadMinimizingTokenGeneration,
tokenGenerationStrategy: tokenGenerationSpreadMinimizing,
spreadMinimizingJoinRingInOrder: false,
expectedCanJoinEnabled: false,
},
"random doesn't allow can join": {
zone: instanceZone,
tokenGenerationStrategy: randomTokenGeneration,
tokenGenerationStrategy: tokenGenerationRandom,
expectedCanJoinEnabled: false,
},
}
Expand Down
9 changes: 8 additions & 1 deletion pkg/mimir/mimir.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,14 @@ func (c *Config) Validate(log log.Logger) error {
return errors.Wrap(err, "invalid ingester_client config")
}
if err := c.Ingester.Validate(log); err != nil {
return errors.Wrap(err, "invalid ingester config")
// We check for "ingester" module here because, as of today, its config has a special mode, that assumes
// passing a unique set of per instance flags, e.g. "-ingester.ring.instance-id".
// Such a scenario breaks the validation of other modules if those flags aren't also passed to each instance (ref
// grafana/mimir#7822). Otherwise, log the fact and move on.
if c.isAnyModuleEnabled(Ingester, Write, All) || !errors.Is(err, ingester.ErrSpreadMinimizingValidation) {
return errors.Wrap(err, "invalid ingester config")
}
level.Debug(log).Log("ingester config is invalid; moving on because the \"ingester\" module is not in this process's targets", "err", err.Error())
}
if err := c.Worker.Validate(); err != nil {
return errors.Wrap(err, "invalid frontend_worker config")
Expand Down
28 changes: 28 additions & 0 deletions pkg/mimir/mimir_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,34 @@ func TestConfigValidation(t *testing.T) {
},
expectAnyError: true,
},
{
name: "should fail if ingester ring is misconfigured with spread-minimizing token generation strategy when target is ingester",
getTestConfig: func() *Config {
cfg := newDefaultConfig()
_ = cfg.Target.Set("ingester")
cfg.Ingester.IngesterRing.InstanceID = "" // empty string is not a valid instance-id
cfg.Ingester.IngesterRing.InstanceZone = "zone-1"
cfg.Ingester.IngesterRing.SpreadMinimizingZones = []string{"zone-1", "zone-2"}
cfg.Ingester.IngesterRing.TokenGenerationStrategy = "spread-minimizing"

return cfg
},
expectAnyError: true,
},
{
name: "should pass if ingester ring is misconfigured with spread-minimizing token generation strategy when target is not ingester",
getTestConfig: func() *Config {
cfg := newDefaultConfig()
_ = cfg.Target.Set("distributor")
cfg.Ingester.IngesterRing.InstanceID = "" // empty string is not a valid instance-id
cfg.Ingester.IngesterRing.InstanceZone = "zone-1"
cfg.Ingester.IngesterRing.SpreadMinimizingZones = []string{"zone-1", "zone-2"}
cfg.Ingester.IngesterRing.TokenGenerationStrategy = "spread-minimizing"

return cfg
},
expectAnyError: false,
},
} {
t.Run(tc.name, func(t *testing.T) {
err := tc.getTestConfig().Validate(log.NewNopLogger())
Expand Down
Loading