From 812b2a440c9da68776112976c0a1c735b8f796ff Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Wed, 2 Mar 2022 12:12:30 +0100 Subject: [PATCH 1/6] Added multi-zone support to jsonnet Signed-off-by: Marco Pracucci --- operations/mimir/ingester.libsonnet | 5 + operations/mimir/mimir.libsonnet | 3 +- operations/mimir/multi-zone.libsonnet | 360 ++++++++++++++++++++++++++ 3 files changed, 367 insertions(+), 1 deletion(-) create mode 100644 operations/mimir/multi-zone.libsonnet diff --git a/operations/mimir/ingester.libsonnet b/operations/mimir/ingester.libsonnet index 4a80a57b3e8..ca57f29feb6 100644 --- a/operations/mimir/ingester.libsonnet +++ b/operations/mimir/ingester.libsonnet @@ -21,6 +21,11 @@ 'ingester.ring.heartbeat-period': '15s', 'ingester.ring.unregister-on-shutdown': $._config.unregister_ingesters_on_shutdown, + // Disable the ring health check in the readiness endpoint so that we can quickly rollout + // multiple ingesters in multi-zone deployments. It's also safe to disable it everywhere, + // given we deploy all ingesters with StatefulSets. + 'ingester.readiness-check-ring-health': false, + // Limits config. 'runtime-config.file': '%s/overrides.yaml' % $._config.overrides_configmap_mountpoint, 'server.grpc-max-concurrent-streams': 10000, diff --git a/operations/mimir/mimir.libsonnet b/operations/mimir/mimir.libsonnet index 4b2f1cf8c66..eb21ebdceac 100644 --- a/operations/mimir/mimir.libsonnet +++ b/operations/mimir/mimir.libsonnet @@ -22,4 +22,5 @@ // Mimir features (import 'shuffle-sharding.libsonnet') + -(import 'query-sharding.libsonnet') +(import 'query-sharding.libsonnet') + +(import 'multi-zone.libsonnet') diff --git a/operations/mimir/multi-zone.libsonnet b/operations/mimir/multi-zone.libsonnet new file mode 100644 index 00000000000..f6aa4e016bc --- /dev/null +++ b/operations/mimir/multi-zone.libsonnet @@ -0,0 +1,360 @@ +local k = import 'ksonnet-util/kausal.libsonnet'; +local container = k.core.v1.container; +local deployment = k.apps.v1.deployment; +local statefulSet = k.apps.v1.statefulSet; +local podDisruptionBudget = k.policy.v1beta1.podDisruptionBudget; +local volume = k.core.v1.volume; +local roleBinding = k.rbac.v1.roleBinding; +local role = k.rbac.v1.role; +local service = k.core.v1.service; +local serviceAccount = k.core.v1.serviceAccount; +local servicePort = k.core.v1.servicePort; +local policyRule = k.rbac.v1.policyRule; +local podAntiAffinity = deployment.mixin.spec.template.spec.affinity.podAntiAffinity; + +{ + _images+:: { + // See: https://github.com/grafana/rollout-operator + rollout_operator: 'grafana/rollout-operator:v0.1.1', + }, + + _config+: { + cortex_multi_zone_ingester_enabled: false, + cortex_multi_zone_ingester_migration_enabled: false, + cortex_multi_zone_ingester_replication_write_path_enabled: true, + cortex_multi_zone_ingester_replication_read_path_enabled: true, + cortex_multi_zone_ingester_replicas: 0, + cortex_multi_zone_ingester_max_unavailable: 10, + + cortex_multi_zone_store_gateway_enabled: false, + cortex_multi_zone_store_gateway_read_path_enabled: $._config.cortex_multi_zone_store_gateway_enabled, + cortex_multi_zone_store_gateway_migration_enabled: false, + cortex_multi_zone_store_gateway_replicas: 0, + cortex_multi_zone_store_gateway_max_unavailable: 10, + + // We can update the queryBlocksStorageConfig only once the migration is over. During the migration + // we don't want to apply these changes to single-zone store-gateways too. + queryBlocksStorageConfig+:: if !$._config.cortex_multi_zone_store_gateway_enabled || !$._config.cortex_multi_zone_store_gateway_read_path_enabled || $._config.cortex_multi_zone_store_gateway_migration_enabled then {} else { + 'store-gateway.sharding-ring.zone-awareness-enabled': 'true', + 'store-gateway.sharding-ring.prefix': 'multi-zone/', + }, + }, + + // + // Zone-aware replication. + // + + distributor_args+:: if !($._config.cortex_multi_zone_ingester_enabled && $._config.cortex_multi_zone_ingester_replication_write_path_enabled) then {} else { + 'ingester.ring.zone-awareness-enabled': 'true', + }, + + ruler_args+:: ( + if !($._config.cortex_multi_zone_ingester_enabled && $._config.cortex_multi_zone_ingester_replication_write_path_enabled) then {} else { + 'ingester.ring.zone-awareness-enabled': 'true', + } + ) + ( + // During the migration, if read path switch is enabled we need to apply changes directly to rulers instead of queryBlocksStorageConfig. + if !($._config.cortex_multi_zone_store_gateway_enabled && $._config.cortex_multi_zone_store_gateway_read_path_enabled && $._config.cortex_multi_zone_store_gateway_migration_enabled) then {} else { + 'store-gateway.sharding-ring.zone-awareness-enabled': 'true', + 'store-gateway.sharding-ring.prefix': 'multi-zone/', + } + ), + + querier_args+:: ( + if !($._config.cortex_multi_zone_ingester_enabled && $._config.cortex_multi_zone_ingester_replication_read_path_enabled) then {} else { + 'ingester.ring.zone-awareness-enabled': 'true', + } + ) + ( + // During the migration, if read path switch is enabled we need to apply changes directly to queriers instead of queryBlocksStorageConfig. + if !($._config.cortex_multi_zone_store_gateway_enabled && $._config.cortex_multi_zone_store_gateway_read_path_enabled && $._config.cortex_multi_zone_store_gateway_migration_enabled) then {} else { + 'store-gateway.sharding-ring.zone-awareness-enabled': 'true', + 'store-gateway.sharding-ring.prefix': 'multi-zone/', + } + ), + + // + // Multi-zone ingesters. + // + + ingester_zone_a_args:: {}, + ingester_zone_b_args:: {}, + ingester_zone_c_args:: {}, + + newIngesterZoneContainer(zone, zone_args):: + $.ingester_container + + container.withArgs($.util.mapToFlags( + $.ingester_args + zone_args + { + 'ingester.ring.zone-awareness-enabled': 'true', + 'ingester.ring.instance-availability-zone': 'zone-%s' % zone, + }, + )), + + newIngesterZoneStatefulSet(zone, container):: + local name = 'ingester-zone-%s' % zone; + + self.newIngesterStatefulSet(name, container, with_anti_affinity=false) + + statefulSet.mixin.metadata.withLabels({ 'rollout-group': 'ingester' }) + + statefulSet.mixin.metadata.withAnnotations({ 'rollout-max-unavailable': std.toString($._config.cortex_multi_zone_ingester_max_unavailable) }) + + statefulSet.mixin.spec.template.metadata.withLabels({ name: name, 'rollout-group': 'ingester' }) + + statefulSet.mixin.spec.selector.withMatchLabels({ name: name, 'rollout-group': 'ingester' }) + + statefulSet.mixin.spec.updateStrategy.withType('OnDelete') + + statefulSet.mixin.spec.template.spec.withTerminationGracePeriodSeconds(1200) + + statefulSet.mixin.spec.withReplicas(std.ceil($._config.cortex_multi_zone_ingester_replicas / 3)) + + { + spec+: + // Allow to schedule 2+ ingesters in the same zone on the same node, but do not schedule 2+ ingesters in + // different zones on the same node. In case of 1 node failure in the Kubernetes cluster, only ingesters + // in 1 zone will be affected. + podAntiAffinity.withRequiredDuringSchedulingIgnoredDuringExecution([ + podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecutionType.new() + + podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecutionType.mixin.labelSelector.withMatchExpressions([ + { key: 'rollout-group', operator: 'In', values: ['ingester'] }, + { key: 'name', operator: 'NotIn', values: [name] }, + ]) + + podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecutionType.withTopologyKey('kubernetes.io/hostname'), + ]).spec, + }, + + // Creates a headless service for the per-zone ingesters StatefulSet. We don't use it + // but we need to create it anyway because it's responsible for the network identity of + // the StatefulSet pods. For more information, see: + // https://v1-18.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#statefulset-v1-apps + newIngesterZoneService(sts):: + $.util.serviceFor(sts, $._config.service_ignored_labels) + + service.mixin.spec.withClusterIp('None'), // Headless. + + ingester_zone_a_container:: if !$._config.cortex_multi_zone_ingester_enabled then null else + self.newIngesterZoneContainer('a', $.ingester_zone_a_args), + + ingester_zone_a_statefulset: if !$._config.cortex_multi_zone_ingester_enabled then null else + self.newIngesterZoneStatefulSet('a', $.ingester_zone_a_container), + + ingester_zone_a_service: if !$._config.cortex_multi_zone_ingester_enabled then null else + $.newIngesterZoneService($.ingester_zone_a_statefulset), + + ingester_zone_b_container:: if !$._config.cortex_multi_zone_ingester_enabled then null else + self.newIngesterZoneContainer('b', $.ingester_zone_b_args), + + ingester_zone_b_statefulset: if !$._config.cortex_multi_zone_ingester_enabled then null else + self.newIngesterZoneStatefulSet('b', $.ingester_zone_b_container), + + ingester_zone_b_service: if !$._config.cortex_multi_zone_ingester_enabled then null else + $.newIngesterZoneService($.ingester_zone_b_statefulset), + + ingester_zone_c_container:: if !$._config.cortex_multi_zone_ingester_enabled then null else + self.newIngesterZoneContainer('c', $.ingester_zone_c_args), + + ingester_zone_c_statefulset: if !$._config.cortex_multi_zone_ingester_enabled then null else + self.newIngesterZoneStatefulSet('c', $.ingester_zone_c_container), + + ingester_zone_c_service: if !$._config.cortex_multi_zone_ingester_enabled then null else + $.newIngesterZoneService($.ingester_zone_c_statefulset), + + ingester_rollout_pdb: if !$._config.cortex_multi_zone_ingester_enabled then null else + podDisruptionBudget.new() + + podDisruptionBudget.mixin.metadata.withName('ingester-rollout-pdb') + + podDisruptionBudget.mixin.metadata.withLabels({ name: 'ingester-rollout-pdb' }) + + podDisruptionBudget.mixin.spec.selector.withMatchLabels({ 'rollout-group': 'ingester' }) + + podDisruptionBudget.mixin.spec.withMaxUnavailable(1), + + // + // Single-zone ingesters shouldn't be configured when multi-zone is enabled. + // + + ingester_statefulset: + // Remove the default "ingester" StatefulSet if multi-zone is enabled and no migration is in progress. + if $._config.cortex_multi_zone_ingester_enabled && !$._config.cortex_multi_zone_ingester_migration_enabled + then null + else super.ingester_statefulset, + + ingester_service: + // Remove the default "ingester" service if multi-zone is enabled and no migration is in progress. + if $._config.cortex_multi_zone_ingester_enabled && !$._config.cortex_multi_zone_ingester_migration_enabled + then null + else super.ingester_service, + + ingester_pdb: + // Keep it if multi-zone is disabled. + if !$._config.cortex_multi_zone_ingester_enabled + then super.ingester_pdb + // We don’t want Kubernetes to terminate any "ingester" StatefulSet's pod while migration is in progress. + else if $._config.cortex_multi_zone_ingester_migration_enabled + then super.ingester_pdb + podDisruptionBudget.mixin.spec.withMaxUnavailable(0) + // Remove it if multi-zone is enabled and no migration is in progress. + else null, + + // + // Multi-zone store-gateways. + // + + newStoreGatewayZoneContainer(zone):: + $.store_gateway_container + + container.withArgs($.util.mapToFlags($.store_gateway_args { + 'store-gateway.sharding-ring.instance-availability-zone': 'zone-%s' % zone, + 'store-gateway.sharding-ring.zone-awareness-enabled': true, + + // Use a different prefix so that both single-zone and multi-zone store-gateway rings can co-exists. + 'store-gateway.sharding-ring.prefix': 'multi-zone/', + + // Do not unregister from ring at shutdown, so that no blocks re-shuffling occurs during rollouts. + 'store-gateway.sharding-ring.unregister-on-shutdown': false, + })), + + newStoreGatewayZoneStatefulSet(zone, container):: + local name = 'store-gateway-zone-%s' % zone; + + self.newStoreGatewayStatefulSet(name, container) + + statefulSet.mixin.metadata.withLabels({ 'rollout-group': 'store-gateway' }) + + statefulSet.mixin.metadata.withAnnotations({ 'rollout-max-unavailable': std.toString($._config.cortex_multi_zone_store_gateway_max_unavailable) }) + + statefulSet.mixin.spec.template.metadata.withLabels({ name: name, 'rollout-group': 'store-gateway' }) + + statefulSet.mixin.spec.selector.withMatchLabels({ name: name, 'rollout-group': 'store-gateway' }) + + statefulSet.mixin.spec.updateStrategy.withType('OnDelete') + + statefulSet.mixin.spec.withReplicas(std.ceil($._config.cortex_multi_zone_store_gateway_replicas / 3)), + + // Creates a headless service for the per-zone store-gateways StatefulSet. We don't use it + // but we need to create it anyway because it's responsible for the network identity of + // the StatefulSet pods. For more information, see: + // https://v1-18.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#statefulset-v1-apps + newStoreGatewayZoneService(sts):: + $.util.serviceFor(sts) + + service.mixin.spec.withClusterIp('None'), // Headless. + + local nonRetainablePVCs = { + _config+: { + store_gateway_data_disk_class: + if super.store_gateway_data_disk_class == 'fast' then 'fast-dont-retain' + else super.store_gateway_data_disk_class, + }, + }, + + store_gateway_zone_a_container:: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + self.newStoreGatewayZoneContainer('a'), + + store_gateway_zone_a_statefulset: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + (self + nonRetainablePVCs).newStoreGatewayZoneStatefulSet('a', $.store_gateway_zone_a_container), + + store_gateway_zone_a_service: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + self.newStoreGatewayZoneService($.store_gateway_zone_a_statefulset), + + store_gateway_zone_b_container:: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + self.newStoreGatewayZoneContainer('b'), + + store_gateway_zone_b_statefulset: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + (self + nonRetainablePVCs).newStoreGatewayZoneStatefulSet('b', $.store_gateway_zone_b_container), + + store_gateway_zone_b_service: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + self.newStoreGatewayZoneService($.store_gateway_zone_b_statefulset), + + store_gateway_zone_c_container:: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + self.newStoreGatewayZoneContainer('c'), + + store_gateway_zone_c_statefulset: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + (self + nonRetainablePVCs).newStoreGatewayZoneStatefulSet('c', $.store_gateway_zone_c_container), + + store_gateway_zone_c_service: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + self.newStoreGatewayZoneService($.store_gateway_zone_c_statefulset), + + // Create a service backed by all store-gateway replicas (in all zone). + // This service is used to access the store-gateway admin UI. + store_gateway_multi_zone_service: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + local name = 'store-gateway-multi-zone'; + local labels = { 'rollout-group': 'store-gateway' }; + local ports = [ + servicePort.newNamed(name='store-gateway-http-metrics', port=80, targetPort=80) + + servicePort.withProtocol('TCP'), + ]; + + service.new(name, labels, ports) + + service.mixin.metadata.withLabels({ name: name }), + + store_gateway_rollout_pdb: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + podDisruptionBudget.new() + + podDisruptionBudget.mixin.metadata.withName('store-gateway-rollout-pdb') + + podDisruptionBudget.mixin.metadata.withLabels({ name: 'store-gateway-rollout-pdb' }) + + podDisruptionBudget.mixin.spec.selector.withMatchLabels({ 'rollout-group': 'store-gateway' }) + + podDisruptionBudget.mixin.spec.withMaxUnavailable(1), + + // + // Single-zone store-gateways shouldn't be configured when multi-zone is enabled. + // + + store_gateway_statefulset: + // Remove the default store-gateway StatefulSet if multi-zone is enabled and no migration is in progress. + if $._config.cortex_multi_zone_store_gateway_enabled && !$._config.cortex_multi_zone_store_gateway_migration_enabled + then null + else super.store_gateway_statefulset, + + store_gateway_service: + // Remove the default store-gateway service if multi-zone is enabled and no migration is in progress. + if $._config.cortex_multi_zone_store_gateway_enabled && !$._config.cortex_multi_zone_store_gateway_migration_enabled + then null + else super.store_gateway_service, + + store_gateway_pdb: + // Remove the default store-gateway PodDisruptionBudget if multi-zone is enabled and no migration is in progress. + if $._config.cortex_multi_zone_store_gateway_enabled && !$._config.cortex_multi_zone_store_gateway_migration_enabled + then null + else super.store_gateway_pdb, + + // + // Rollout operator. + // + + local rollout_operator_enabled = $._config.cortex_multi_zone_ingester_enabled || $._config.cortex_multi_zone_store_gateway_enabled, + + rollout_operator_args:: { + 'kubernetes.namespace': $._config.namespace, + }, + + rollout_operator_container:: + container.new('rollout-operator', $._images.rollout_operator) + + container.withArgsMixin($.util.mapToFlags($.rollout_operator_args)) + + container.withPorts([ + k.core.v1.containerPort.new('http-metrics', 8001), + ]) + + $.util.resourcesRequests('100m', '100Mi') + + $.util.resourcesLimits('1', '200Mi') + + container.mixin.readinessProbe.httpGet.withPath('/ready') + + container.mixin.readinessProbe.httpGet.withPort(8001) + + container.mixin.readinessProbe.withInitialDelaySeconds(5) + + container.mixin.readinessProbe.withTimeoutSeconds(1), + + rollout_operator_deployment: if !rollout_operator_enabled then null else + deployment.new('rollout-operator', 1, [$.rollout_operator_container]) + + deployment.mixin.metadata.withName('rollout-operator') + + deployment.mixin.spec.template.spec.withImagePullSecrets({ name: $.pentagon.gcr_secret_name }) + + deployment.mixin.spec.template.spec.withServiceAccountName('rollout-operator') + + // Ensure Kubernetes doesn't run 2 operators at the same time. + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(0) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1), + + rollout_operator_role: if !rollout_operator_enabled then null else + role.new('rollout-operator-role') + + role.mixin.metadata.withNamespace($._config.namespace) + + role.withRulesMixin([ + policyRule.withApiGroups('') + + policyRule.withResources(['pods']) + + policyRule.withVerbs(['list', 'get', 'watch', 'delete']), + policyRule.withApiGroups('apps') + + policyRule.withResources(['statefulsets']) + + policyRule.withVerbs(['list', 'get', 'watch']), + policyRule.withApiGroups('apps') + + policyRule.withResources(['statefulsets/status']) + + policyRule.withVerbs(['update']), + ]), + + rollout_operator_rolebinding: if !rollout_operator_enabled then null else + roleBinding.new('rollout-operator-rolebinding') + + roleBinding.mixin.metadata.withNamespace($._config.namespace) + + roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') + + roleBinding.mixin.roleRef.withKind('Role') + + roleBinding.mixin.roleRef.withName('rollout-operator-role') + + roleBinding.withSubjectsMixin({ + kind: 'ServiceAccount', + name: 'rollout-operator', + namespace: $._config.namespace, + }), + + rollout_operator_service_account: if !rollout_operator_enabled then null else + serviceAccount.new('rollout-operator'), +} From d0a510d107f04c3686ed9eb9eab5851949e5a88a Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Wed, 2 Mar 2022 12:35:32 +0100 Subject: [PATCH 2/6] Added CHANGELOG entry Signed-off-by: Marco Pracucci --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ce8518134e..46a0139d7dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ ### Jsonnet +* [FEATURE] Added multi-zone ingesters and store-gateways support. #1352 + ### Mimirtool * [CHANGE] `analyse` command has been renamed to `analyze`. #1318 From 00f7961a8ba5539228444539997fe163c64a441f Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Wed, 2 Mar 2022 12:43:54 +0100 Subject: [PATCH 3/6] Added jsonnet tests Signed-off-by: Marco Pracucci --- CHANGELOG.md | 1 + .../mimir-tests/test-defaults-generated.yaml | 1 + ...est-disable-chunk-streaming-generated.yaml | 1 + .../mimir-tests/test-gossip-generated.yaml | 1 + .../test-gossip-multikv-generated.yaml | 1 + .../test-multi-zone-generated.yaml | 2421 +++++++++++++++ ...zone-with-ongoing-migration-generated.yaml | 2692 +++++++++++++++++ ...-multi-zone-with-ongoing-migration.jsonnet | 29 + .../mimir-tests/test-multi-zone.jsonnet | 27 + .../test-query-sharding-generated.yaml | 1 + .../test-shuffle-sharding-generated.yaml | 1 + .../test-storage-azure-generated.yaml | 1 + .../test-storage-gcs-generated.yaml | 1 + .../test-storage-s3-generated.yaml | 1 + operations/mimir/multi-zone.libsonnet | 1 - 15 files changed, 5179 insertions(+), 1 deletion(-) create mode 100644 operations/mimir-tests/test-multi-zone-generated.yaml create mode 100644 operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml create mode 100644 operations/mimir-tests/test-multi-zone-with-ongoing-migration.jsonnet create mode 100644 operations/mimir-tests/test-multi-zone.jsonnet diff --git a/CHANGELOG.md b/CHANGELOG.md index 46a0139d7dd..8d663782832 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ ### Jsonnet +* [CHANGE] Disabled `-ingester.readiness-check-ring-health`. #1352 * [FEATURE] Added multi-zone ingesters and store-gateways support. #1352 ### Mimirtool diff --git a/operations/mimir-tests/test-defaults-generated.yaml b/operations/mimir-tests/test-defaults-generated.yaml index 62faaa33177..10afb56ba27 100644 --- a/operations/mimir-tests/test-defaults-generated.yaml +++ b/operations/mimir-tests/test-defaults-generated.yaml @@ -1009,6 +1009,7 @@ spec: - -distributor.health-check-ingesters=true - -ingester.max-global-series-per-metric=20000 - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 - -ingester.ring.heartbeat-period=15s - -ingester.ring.heartbeat-timeout=10m diff --git a/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml b/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml index 421b38f3ef3..40f252fd139 100644 --- a/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml +++ b/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml @@ -1327,6 +1327,7 @@ spec: - -distributor.health-check-ingesters=true - -ingester.max-global-series-per-metric=20000 - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 - -ingester.ring.heartbeat-period=15s - -ingester.ring.heartbeat-timeout=10m diff --git a/operations/mimir-tests/test-gossip-generated.yaml b/operations/mimir-tests/test-gossip-generated.yaml index bcd125514b4..e64de8cf753 100644 --- a/operations/mimir-tests/test-gossip-generated.yaml +++ b/operations/mimir-tests/test-gossip-generated.yaml @@ -1368,6 +1368,7 @@ spec: - -distributor.health-check-ingesters=true - -ingester.max-global-series-per-metric=20000 - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false - -ingester.ring.heartbeat-period=15s - -ingester.ring.heartbeat-timeout=10m - -ingester.ring.num-tokens=512 diff --git a/operations/mimir-tests/test-gossip-multikv-generated.yaml b/operations/mimir-tests/test-gossip-multikv-generated.yaml index 3d63bbeb3f3..9353bc057b4 100644 --- a/operations/mimir-tests/test-gossip-multikv-generated.yaml +++ b/operations/mimir-tests/test-gossip-multikv-generated.yaml @@ -1380,6 +1380,7 @@ spec: - -distributor.health-check-ingesters=true - -ingester.max-global-series-per-metric=20000 - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 - -ingester.ring.heartbeat-period=15s - -ingester.ring.heartbeat-timeout=10m diff --git a/operations/mimir-tests/test-multi-zone-generated.yaml b/operations/mimir-tests/test-multi-zone-generated.yaml new file mode 100644 index 00000000000..5ac94a45c92 --- /dev/null +++ b/operations/mimir-tests/test-multi-zone-generated.yaml @@ -0,0 +1,2421 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: default +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + labels: + name: ingester-rollout-pdb + name: ingester-rollout-pdb + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + rollout-group: ingester +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + labels: + name: store-gateway-rollout-pdb + name: store-gateway-rollout-pdb + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + rollout-group: store-gateway +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: consul-sidekick + namespace: default +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rollout-operator + namespace: default +--- +apiVersion: v1 +data: + consul-config.json: '{"leave_on_terminate": true, "raft_snapshot_threshold": 128, + "raft_trailing_logs": 10000, "telemetry": {"dogstatsd_addr": "127.0.0.1:9125"}}' + mapping: | + mappings: + - match: consul.*.runtime.* + name: consul_runtime + labels: + type: $2 + - match: consul.runtime.total_gc_pause_ns + name: consul_runtime_total_gc_pause_ns + labels: + type: $2 + - match: consul.consul.health.service.query-tag.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3 + - match: consul.consul.health.service.query-tag.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4 + - match: consul.consul.health.service.query-tag.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9.$10 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9.$10.$11 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9.$10.$11.$12 + - match: consul.consul.catalog.deregister + name: consul_catalog_deregister + labels: {} + - match: consul.consul.dns.domain_query.*.*.*.*.* + name: consul_dns_domain_query + labels: + query: $1.$2.$3.$4.$5 + - match: consul.consul.health.service.not-found.* + name: consul_health_service_not_found + labels: + query: $1 + - match: consul.consul.health.service.query.* + name: consul_health_service_query + labels: + query: $1 + - match: consul.*.memberlist.health.score + name: consul_memberlist_health_score + labels: {} + - match: consul.serf.queue.* + name: consul_serf_events + labels: + type: $1 + - match: consul.serf.snapshot.appendLine + name: consul_serf_snapshot_appendLine + labels: + type: $1 + - match: consul.serf.coordinate.adjustment-ms + name: consul_serf_coordinate_adjustment_ms + labels: {} + - match: consul.consul.rpc.query + name: consul_rpc_query + labels: {} + - match: consul.*.consul.session_ttl.active + name: consul_session_ttl_active + labels: {} + - match: consul.raft.rpc.* + name: consul_raft_rpc + labels: + type: $1 + - match: consul.raft.rpc.appendEntries.storeLogs + name: consul_raft_rpc_appendEntries_storeLogs + labels: + type: $1 + - match: consul.consul.fsm.persist + name: consul_fsm_persist + labels: {} + - match: consul.raft.fsm.apply + name: consul_raft_fsm_apply + labels: {} + - match: consul.raft.leader.lastContact + name: consul_raft_leader_lastcontact + labels: {} + - match: consul.raft.leader.dispatchLog + name: consul_raft_leader_dispatchLog + labels: {} + - match: consul.raft.commitTime + name: consul_raft_commitTime + labels: {} + - match: consul.raft.replication.appendEntries.logs.*.*.*.* + name: consul_raft_replication_appendEntries_logs + labels: + query: ${1}.${2}.${3}.${4} + - match: consul.raft.replication.appendEntries.rpc.*.*.*.* + name: consul_raft_replication_appendEntries_rpc + labels: + query: ${1}.${2}.${3}.${4} + - match: consul.raft.replication.heartbeat.*.*.*.* + name: consul_raft_replication_heartbeat + labels: + query: ${1}.${2}.${3}.${4} + - match: consul.consul.rpc.request + name: consul_rpc_requests + labels: {} + - match: consul.consul.rpc.accept_conn + name: consul_rpc_accept_conn + labels: {} + - match: consul.memberlist.udp.* + name: consul_memberlist_udp + labels: + type: $1 + - match: consul.memberlist.tcp.* + name: consul_memberlist_tcp + labels: + type: $1 + - match: consul.memberlist.gossip + name: consul_memberlist_gossip + labels: {} + - match: consul.memberlist.probeNode + name: consul_memberlist_probenode + labels: {} + - match: consul.memberlist.pushPullNode + name: consul_memberlist_pushpullnode + labels: {} + - match: consul.http.* + name: consul_http_request + labels: + method: $1 + path: / + - match: consul.http.*.* + name: consul_http_request + labels: + method: $1 + path: /$2 + - match: consul.http.*.*.* + name: consul_http_request + labels: + method: $1 + path: /$2/$3 + - match: consul.http.*.*.*.* + name: consul_http_request + labels: + method: $1 + path: /$2/$3/$4 + - match: consul.http.*.*.*.*.* + name: consul_http_request + labels: + method: $1 + path: /$2/$3/$4/$5 + - match: consul.consul.leader.barrier + name: consul_leader_barrier + labels: {} + - match: consul.consul.leader.reconcileMember + name: consul_leader_reconcileMember + labels: {} + - match: consul.consul.leader.reconcile + name: consul_leader_reconcile + labels: {} + - match: consul.consul.fsm.coordinate.batch-update + name: consul_fsm_coordinate_batch_update + labels: {} + - match: consul.consul.fsm.autopilot + name: consul_fsm_autopilot + labels: {} + - match: consul.consul.fsm.kvs.cas + name: consul_fsm_kvs_cas + labels: {} + - match: consul.consul.fsm.register + name: consul_fsm_register + labels: {} + - match: consul.consul.fsm.deregister + name: consul_fsm_deregister + labels: {} + - match: consul.consul.fsm.tombstone.reap + name: consul_fsm_tombstone_reap + labels: {} + - match: consul.consul.catalog.register + name: consul_catalog_register + labels: {} + - match: consul.consul.catalog.deregister + name: consul_catalog_deregister + labels: {} + - match: consul.consul.leader.reapTombstones + name: consul_leader_reapTombstones + labels: {} +kind: ConfigMap +metadata: + name: consul + namespace: default +--- +apiVersion: v1 +data: + overrides.yaml: | + overrides: {} +kind: ConfigMap +metadata: + name: overrides + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: consul-sidekick + namespace: default +rules: +- apiGroups: + - "" + - extensions + - apps + resources: + - pods + - replicasets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: rollout-operator-role + namespace: default +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - list + - get + - watch + - delete +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - list + - get + - watch +- apiGroups: + - apps + resources: + - statefulsets/status + verbs: + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: consul-sidekick + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: consul-sidekick +subjects: +- kind: ServiceAccount + name: consul-sidekick + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: rollout-operator-rolebinding + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: rollout-operator-role +subjects: +- kind: ServiceAccount + name: rollout-operator + namespace: default +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: alertmanager + name: alertmanager + namespace: default +spec: + clusterIP: None + ports: + - name: alertmanager-http-metrics + port: 8080 + targetPort: 8080 + - name: alertmanager-grpc + port: 9095 + targetPort: 9095 + selector: + name: alertmanager +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: consul + name: consul + namespace: default +spec: + ports: + - name: consul-server + port: 8300 + targetPort: 8300 + - name: consul-serf + port: 8301 + targetPort: 8301 + - name: consul-client + port: 8400 + targetPort: 8400 + - name: consul-api + port: 8500 + targetPort: 8500 + - name: statsd-exporter-http-metrics + port: 8000 + targetPort: 8000 + - name: consul-exporter-http-metrics + port: 9107 + targetPort: 9107 + selector: + name: consul +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: distributor + name: distributor + namespace: default +spec: + clusterIP: None + ports: + - name: distributor-http-metrics + port: 8080 + targetPort: 8080 + - name: distributor-grpc + port: 9095 + targetPort: 9095 + selector: + name: distributor +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ingester-zone-a + name: ingester-zone-a + namespace: default +spec: + clusterIP: None + ports: + - name: ingester-http-metrics + port: 8080 + targetPort: 8080 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + selector: + name: ingester-zone-a + rollout-group: ingester +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ingester-zone-b + name: ingester-zone-b + namespace: default +spec: + clusterIP: None + ports: + - name: ingester-http-metrics + port: 8080 + targetPort: 8080 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + selector: + name: ingester-zone-b + rollout-group: ingester +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ingester-zone-c + name: ingester-zone-c + namespace: default +spec: + clusterIP: None + ports: + - name: ingester-http-metrics + port: 8080 + targetPort: 8080 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + selector: + name: ingester-zone-c + rollout-group: ingester +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached + name: memcached + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-frontend + name: memcached-frontend + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-frontend +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-index-queries + name: memcached-index-queries + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-index-queries +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-metadata + name: memcached-metadata + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-metadata +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: querier + name: querier + namespace: default +spec: + ports: + - name: querier-http-metrics + port: 8080 + targetPort: 8080 + - name: querier-grpc + port: 9095 + targetPort: 9095 + selector: + name: querier +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-frontend + name: query-frontend + namespace: default +spec: + ports: + - name: query-frontend-http-metrics + port: 8080 + targetPort: 8080 + - name: query-frontend-grpc + port: 9095 + targetPort: 9095 + selector: + name: query-frontend +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-frontend + name: query-frontend-discovery + namespace: default +spec: + clusterIP: None + ports: + - name: query-frontend-http-metrics + port: 8080 + targetPort: 8080 + - name: query-frontend-grpc + port: 9095 + targetPort: 9095 + publishNotReadyAddresses: true + selector: + name: query-frontend +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-scheduler + name: query-scheduler + namespace: default +spec: + ports: + - name: query-scheduler-http-metrics + port: 8080 + targetPort: 8080 + - name: query-scheduler-grpc + port: 9095 + targetPort: 9095 + selector: + name: query-scheduler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-scheduler + name: query-scheduler-discovery + namespace: default +spec: + clusterIP: None + ports: + - name: query-scheduler-http-metrics + port: 8080 + targetPort: 8080 + - name: query-scheduler-grpc + port: 9095 + targetPort: 9095 + publishNotReadyAddresses: true + selector: + name: query-scheduler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ruler + name: ruler + namespace: default +spec: + ports: + - name: ruler-http-metrics + port: 8080 + targetPort: 8080 + - name: ruler-grpc + port: 9095 + targetPort: 9095 + selector: + name: ruler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-multi-zone + name: store-gateway-multi-zone + namespace: default +spec: + ports: + - name: store-gateway-http-metrics + port: 80 + protocol: TCP + targetPort: 80 + selector: + rollout-group: store-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-zone-a + name: store-gateway-zone-a + namespace: default +spec: + clusterIP: None + ports: + - name: store-gateway-http-metrics + port: 8080 + targetPort: 8080 + - name: store-gateway-grpc + port: 9095 + targetPort: 9095 + selector: + name: store-gateway-zone-a + rollout-group: store-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-zone-b + name: store-gateway-zone-b + namespace: default +spec: + clusterIP: None + ports: + - name: store-gateway-http-metrics + port: 8080 + targetPort: 8080 + - name: store-gateway-grpc + port: 9095 + targetPort: 9095 + selector: + name: store-gateway-zone-b + rollout-group: store-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-zone-c + name: store-gateway-zone-c + namespace: default +spec: + clusterIP: None + ports: + - name: store-gateway-http-metrics + port: 8080 + targetPort: 8080 + - name: store-gateway-grpc + port: 9095 + targetPort: 9095 + selector: + name: store-gateway-zone-c + rollout-group: store-gateway +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: consul + namespace: default +spec: + minReadySeconds: 10 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: consul + template: + metadata: + annotations: + consul-hash: e56ef6821a3557604caccaf6d5820239 + labels: + name: consul + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: consul + topologyKey: kubernetes.io/hostname + - labelSelector: + matchLabels: + name: ingester + namespaces: + - default + topologyKey: kubernetes.io/hostname + containers: + - args: + - agent + - -ui + - -server + - -client=0.0.0.0 + - -config-file=/etc/config/consul-config.json + - -bootstrap-expect=1 + - -ui-content-path=/default/consul/ + env: + - name: CHECKPOINT_DISABLE + value: "1" + image: consul:1.5.3 + imagePullPolicy: IfNotPresent + name: consul + ports: + - containerPort: 8300 + name: server + - containerPort: 8301 + name: serf + - containerPort: 8400 + name: client + - containerPort: 8500 + name: api + resources: + requests: + cpu: "4" + memory: 4Gi + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + - args: + - --namespace=$(POD_NAMESPACE) + - --pod-name=$(POD_NAME) + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + image: weaveworks/consul-sidekick:master-f18ad13 + imagePullPolicy: IfNotPresent + name: sidekick + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + - args: + - --web.listen-address=:8000 + - --statsd.mapping-config=/etc/config/mapping + image: prom/statsd-exporter:v0.12.2 + imagePullPolicy: IfNotPresent + name: statsd-exporter + ports: + - containerPort: 8000 + name: http-metrics + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + - args: + - --consul.server=localhost:8500 + - --web.listen-address=:9107 + - --consul.timeout=1s + - --no-consul.health-summary + - --consul.allow_stale + image: prom/consul-exporter:v0.5.0 + imagePullPolicy: IfNotPresent + name: consul-exporter + ports: + - containerPort: 9107 + name: http-metrics + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + serviceAccount: consul-sidekick + volumes: + - configMap: + name: consul + name: consul + - emptyDir: + medium: Memory + name: data +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: distributor + namespace: default +spec: + minReadySeconds: 10 + replicas: 3 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: distributor + strategy: + rollingUpdate: + maxSurge: 5 + maxUnavailable: 1 + template: + metadata: + labels: + name: distributor + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: distributor + topologyKey: kubernetes.io/hostname + containers: + - args: + - -distributor.extend-writes=true + - -distributor.ha-tracker.enable=true + - -distributor.ha-tracker.enable-for-all-users=true + - -distributor.ha-tracker.etcd.endpoints=etcd-client.default.svc.cluster.local.:2379 + - -distributor.ha-tracker.prefix=prom_ha/ + - -distributor.ha-tracker.store=etcd + - -distributor.health-check-ingesters=true + - -distributor.ingestion-burst-size=200000 + - -distributor.ingestion-rate-limit=10000 + - -distributor.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -distributor.ring.prefix= + - -distributor.ring.store=consul + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.zone-awareness-enabled=true + - -mem-ballast-size-bytes=1073741824 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.max-connection-age=2m + - -server.grpc.keepalive.max-connection-age-grace=5m + - -server.grpc.keepalive.max-connection-idle=1m + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=distributor + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: distributor + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 4Gi + requests: + cpu: "2" + memory: 2Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: querier + namespace: default +spec: + minReadySeconds: 10 + replicas: 6 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: querier + strategy: + rollingUpdate: + maxSurge: 5 + maxUnavailable: 1 + template: + metadata: + labels: + name: querier + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: querier + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -distributor.health-check-ingesters=true + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.zone-awareness-enabled=true + - -mem-ballast-size-bytes=268435456 + - -querier.frontend-client.grpc-max-send-msg-size=104857600 + - -querier.max-concurrent=8 + - -querier.query-ingesters-within=13h + - -querier.query-store-after=12h + - -querier.scheduler-address=query-scheduler-discovery.default.svc.cluster.local:9095 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -server.http-write-timeout=1m + - -store-gateway.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=consul + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -store.max-query-length=768h + - -target=querier + env: + - name: JAEGER_REPORTER_MAX_QUEUE_SIZE + value: "1024" + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: querier + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 24Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: query-frontend + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: query-frontend + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + template: + metadata: + labels: + name: query-frontend + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: query-frontend + topologyKey: kubernetes.io/hostname + containers: + - args: + - -query-frontend.align-querier-with-step=false + - -query-frontend.cache-results=true + - -query-frontend.max-cache-freshness=10m + - -query-frontend.results-cache.backend=memcached + - -query-frontend.results-cache.memcached.addresses=dnssrvnoa+memcached-frontend.default.svc.cluster.local:11211 + - -query-frontend.results-cache.memcached.timeout=500ms + - -query-frontend.scheduler-address=query-scheduler-discovery.default.svc.cluster.local:9095 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-recv-msg-size-bytes=104857600 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -server.http-write-timeout=1m + - -store.max-query-length=12000h + - -target=query-frontend + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: query-frontend + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 1200Mi + requests: + cpu: "2" + memory: 600Mi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: query-scheduler + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: query-scheduler + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + name: query-scheduler + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: query-scheduler + topologyKey: kubernetes.io/hostname + containers: + - args: + - -query-scheduler.max-outstanding-requests-per-tenant=100 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=query-scheduler + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: query-scheduler + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 1Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rollout-operator + namespace: default +spec: + minReadySeconds: 10 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: rollout-operator + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + name: rollout-operator + spec: + containers: + - args: + - -kubernetes.namespace=default + image: grafana/rollout-operator:v0.1.1 + imagePullPolicy: IfNotPresent + name: rollout-operator + ports: + - containerPort: 8001 + name: http-metrics + readinessProbe: + httpGet: + path: /ready + port: 8001 + initialDelaySeconds: 5 + timeoutSeconds: 1 + resources: + limits: + cpu: "1" + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + serviceAccountName: rollout-operator +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ruler + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: ruler + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + name: ruler + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: ruler + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -distributor.extend-writes=true + - -distributor.health-check-ingesters=true + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.zone-awareness-enabled=true + - -querier.query-ingesters-within=13h + - -querier.query-store-after=12h + - -ruler-storage.backend=gcs + - -ruler-storage.gcs.bucket-name=rules-bucket + - -ruler.alertmanager-url=http://alertmanager.default.svc.cluster.local/alertmanager + - -ruler.max-rule-groups-per-tenant=35 + - -ruler.max-rules-per-rule-group=20 + - -ruler.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ruler.ring.store=consul + - -ruler.rule-path=/rules + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=consul + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -store.max-query-length=768h + - -target=ruler + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ruler + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + cpu: "16" + memory: 16Gi + requests: + cpu: "1" + memory: 6Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + terminationGracePeriodSeconds: 600 + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: alertmanager + name: alertmanager + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: alertmanager + serviceName: alertmanager + template: + metadata: + labels: + name: alertmanager + spec: + containers: + - args: + - -alertmanager-storage.backend=gcs + - -alertmanager-storage.gcs.bucket-name=alerts-bucket + - -alertmanager.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -alertmanager.sharding-ring.replication-factor=3 + - -alertmanager.sharding-ring.store=consul + - -alertmanager.storage.path=/data + - -alertmanager.web.external-url=http://test/alertmanager + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=alertmanager + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: alertmanager + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + requests: + cpu: 100m + memory: 1Gi + volumeMounts: + - mountPath: /data + name: alertmanager-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 900 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: alertmanager-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: compactor + name: compactor + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: compactor + serviceName: compactor + template: + metadata: + labels: + name: compactor + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -compactor.block-ranges=2h,12h,24h + - -compactor.blocks-retention-period=0 + - -compactor.cleanup-interval=15m + - -compactor.compaction-concurrency=1 + - -compactor.compaction-interval=30m + - -compactor.compactor-tenant-shard-size=1 + - -compactor.data-dir=/data + - -compactor.deletion-delay=2h + - -compactor.max-closing-blocks-concurrency=2 + - -compactor.max-opening-blocks-concurrency=4 + - -compactor.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -compactor.ring.prefix= + - -compactor.ring.store=consul + - -compactor.ring.wait-stability-min-duration=1m + - -compactor.split-and-merge-shards=0 + - -compactor.split-groups=1 + - -compactor.symbols-flushers-concurrency=4 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=compactor + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: compactor + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 6Gi + requests: + cpu: 1 + memory: 6Gi + volumeMounts: + - mountPath: /data + name: compactor-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 900 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: compactor-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 250Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: ingester + name: ingester-zone-a + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: ingester-zone-a + rollout-group: ingester + serviceName: ingester-zone-a + template: + metadata: + labels: + name: ingester-zone-a + rollout-group: ingester + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: rollout-group + operator: In + values: + - ingester + - key: name + operator: NotIn + values: + - ingester-zone-a + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -blocks-storage.tsdb.block-ranges-period=2h + - -blocks-storage.tsdb.close-idle-tsdb-timeout=13h + - -blocks-storage.tsdb.dir=/data/tsdb + - -blocks-storage.tsdb.isolation-enabled=false + - -blocks-storage.tsdb.ship-interval=1m + - -distributor.health-check-ingesters=true + - -ingester.max-global-series-per-metric=20000 + - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-period=15s + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.instance-availability-zone=zone-a + - -ingester.ring.num-tokens=512 + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.tokens-file-path=/data/tokens + - -ingester.ring.unregister-on-shutdown=true + - -ingester.ring.zone-awareness-enabled=true + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-concurrent-streams=10000 + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=ingester + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ingester + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 25Gi + requests: + cpu: "4" + memory: 15Gi + volumeMounts: + - mountPath: /data + name: ingester-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 1200 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ingester-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: fast +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: ingester + name: ingester-zone-b + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: ingester-zone-b + rollout-group: ingester + serviceName: ingester-zone-b + template: + metadata: + labels: + name: ingester-zone-b + rollout-group: ingester + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: rollout-group + operator: In + values: + - ingester + - key: name + operator: NotIn + values: + - ingester-zone-b + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -blocks-storage.tsdb.block-ranges-period=2h + - -blocks-storage.tsdb.close-idle-tsdb-timeout=13h + - -blocks-storage.tsdb.dir=/data/tsdb + - -blocks-storage.tsdb.isolation-enabled=false + - -blocks-storage.tsdb.ship-interval=1m + - -distributor.health-check-ingesters=true + - -ingester.max-global-series-per-metric=20000 + - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-period=15s + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.instance-availability-zone=zone-b + - -ingester.ring.num-tokens=512 + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.tokens-file-path=/data/tokens + - -ingester.ring.unregister-on-shutdown=true + - -ingester.ring.zone-awareness-enabled=true + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-concurrent-streams=10000 + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=ingester + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ingester + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 25Gi + requests: + cpu: "4" + memory: 15Gi + volumeMounts: + - mountPath: /data + name: ingester-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 1200 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ingester-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: fast +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: ingester + name: ingester-zone-c + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: ingester-zone-c + rollout-group: ingester + serviceName: ingester-zone-c + template: + metadata: + labels: + name: ingester-zone-c + rollout-group: ingester + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: rollout-group + operator: In + values: + - ingester + - key: name + operator: NotIn + values: + - ingester-zone-c + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -blocks-storage.tsdb.block-ranges-period=2h + - -blocks-storage.tsdb.close-idle-tsdb-timeout=13h + - -blocks-storage.tsdb.dir=/data/tsdb + - -blocks-storage.tsdb.isolation-enabled=false + - -blocks-storage.tsdb.ship-interval=1m + - -distributor.health-check-ingesters=true + - -ingester.max-global-series-per-metric=20000 + - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-period=15s + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.instance-availability-zone=zone-c + - -ingester.ring.num-tokens=512 + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.tokens-file-path=/data/tokens + - -ingester.ring.unregister-on-shutdown=true + - -ingester.ring.zone-awareness-enabled=true + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-concurrent-streams=10000 + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=ingester + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ingester + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 25Gi + requests: + cpu: "4" + memory: 15Gi + volumeMounts: + - mountPath: /data + name: ingester-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 1200 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ingester-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: fast +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached + serviceName: memcached + template: + metadata: + labels: + name: memcached + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 6144 + - -I 1m + - -c 16384 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 9Gi + requests: + cpu: 500m + memory: 6552Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-frontend + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached-frontend + serviceName: memcached-frontend + template: + metadata: + labels: + name: memcached-frontend + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-frontend + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 1024 + - -I 5m + - -c 1024 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 1536Mi + requests: + cpu: 500m + memory: 1329Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-index-queries + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached-index-queries + serviceName: memcached-index-queries + template: + metadata: + labels: + name: memcached-index-queries + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-index-queries + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 1024 + - -I 5m + - -c 16384 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 1536Mi + requests: + cpu: 500m + memory: 1329Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-metadata + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + name: memcached-metadata + serviceName: memcached-metadata + template: + metadata: + labels: + name: memcached-metadata + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-metadata + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 512 + - -I 1m + - -c 16384 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 768Mi + requests: + cpu: 500m + memory: 715Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: store-gateway + name: store-gateway-zone-a + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: store-gateway-zone-a + rollout-group: store-gateway + serviceName: store-gateway-zone-a + template: + metadata: + labels: + name: store-gateway-zone-a + rollout-group: store-gateway + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.chunks-cache.attributes-in-memory-max-items=50000 + - -blocks-storage.bucket-store.chunks-cache.backend=memcached + - -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dnssrvnoa+memcached.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.ignore-blocks-within=10h + - -blocks-storage.bucket-store.index-cache.backend=memcached + - -blocks-storage.bucket-store.index-cache.memcached.addresses=dnssrvnoa+memcached-index-queries.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-item-size=5242880 + - -blocks-storage.bucket-store.index-header-lazy-loading-enabled=true + - -blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout=60m + - -blocks-storage.bucket-store.max-chunk-pool-bytes=12884901888 + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -store-gateway.sharding-ring.instance-availability-zone=zone-a + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=consul + - -store-gateway.sharding-ring.tokens-file-path=/data/tokens + - -store-gateway.sharding-ring.unregister-on-shutdown=false + - -store-gateway.sharding-ring.wait-stability-min-duration=1m + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -target=store-gateway + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: store-gateway + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 18Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /data + name: store-gateway-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 120 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: store-gateway-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: store-gateway + name: store-gateway-zone-b + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: store-gateway-zone-b + rollout-group: store-gateway + serviceName: store-gateway-zone-b + template: + metadata: + labels: + name: store-gateway-zone-b + rollout-group: store-gateway + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.chunks-cache.attributes-in-memory-max-items=50000 + - -blocks-storage.bucket-store.chunks-cache.backend=memcached + - -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dnssrvnoa+memcached.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.ignore-blocks-within=10h + - -blocks-storage.bucket-store.index-cache.backend=memcached + - -blocks-storage.bucket-store.index-cache.memcached.addresses=dnssrvnoa+memcached-index-queries.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-item-size=5242880 + - -blocks-storage.bucket-store.index-header-lazy-loading-enabled=true + - -blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout=60m + - -blocks-storage.bucket-store.max-chunk-pool-bytes=12884901888 + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -store-gateway.sharding-ring.instance-availability-zone=zone-b + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=consul + - -store-gateway.sharding-ring.tokens-file-path=/data/tokens + - -store-gateway.sharding-ring.unregister-on-shutdown=false + - -store-gateway.sharding-ring.wait-stability-min-duration=1m + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -target=store-gateway + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: store-gateway + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 18Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /data + name: store-gateway-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 120 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: store-gateway-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: store-gateway + name: store-gateway-zone-c + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: store-gateway-zone-c + rollout-group: store-gateway + serviceName: store-gateway-zone-c + template: + metadata: + labels: + name: store-gateway-zone-c + rollout-group: store-gateway + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.chunks-cache.attributes-in-memory-max-items=50000 + - -blocks-storage.bucket-store.chunks-cache.backend=memcached + - -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dnssrvnoa+memcached.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.ignore-blocks-within=10h + - -blocks-storage.bucket-store.index-cache.backend=memcached + - -blocks-storage.bucket-store.index-cache.memcached.addresses=dnssrvnoa+memcached-index-queries.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-item-size=5242880 + - -blocks-storage.bucket-store.index-header-lazy-loading-enabled=true + - -blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout=60m + - -blocks-storage.bucket-store.max-chunk-pool-bytes=12884901888 + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -store-gateway.sharding-ring.instance-availability-zone=zone-c + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=consul + - -store-gateway.sharding-ring.tokens-file-path=/data/tokens + - -store-gateway.sharding-ring.unregister-on-shutdown=false + - -store-gateway.sharding-ring.wait-stability-min-duration=1m + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -target=store-gateway + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: store-gateway + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 18Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /data + name: store-gateway-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 120 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: store-gateway-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: standard +--- +apiVersion: etcd.database.coreos.com/v1beta2 +kind: EtcdCluster +metadata: + annotations: + etcd.database.coreos.com/scope: clusterwide + name: etcd + namespace: default +spec: + pod: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + etcd_cluster: etcd + topologyKey: kubernetes.io/hostname + annotations: + prometheus.io/port: "2379" + prometheus.io/scrape: "true" + etcdEnv: + - name: ETCD_AUTO_COMPACTION_RETENTION + value: 1h + labels: + name: etcd + resources: + limits: + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + size: 3 + version: 3.3.13 diff --git a/operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml b/operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml new file mode 100644 index 00000000000..57ab6bfc1ac --- /dev/null +++ b/operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml @@ -0,0 +1,2692 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: default +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + labels: + name: ingester-pdb + name: ingester-pdb + namespace: default +spec: + maxUnavailable: 0 + selector: + matchLabels: + name: ingester +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + labels: + name: ingester-rollout-pdb + name: ingester-rollout-pdb + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + rollout-group: ingester +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + labels: + name: store-gateway-pdb + name: store-gateway-pdb + namespace: default +spec: + maxUnavailable: 2 + selector: + matchLabels: + name: store-gateway +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + labels: + name: store-gateway-rollout-pdb + name: store-gateway-rollout-pdb + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + rollout-group: store-gateway +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: consul-sidekick + namespace: default +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rollout-operator + namespace: default +--- +apiVersion: v1 +data: + consul-config.json: '{"leave_on_terminate": true, "raft_snapshot_threshold": 128, + "raft_trailing_logs": 10000, "telemetry": {"dogstatsd_addr": "127.0.0.1:9125"}}' + mapping: | + mappings: + - match: consul.*.runtime.* + name: consul_runtime + labels: + type: $2 + - match: consul.runtime.total_gc_pause_ns + name: consul_runtime_total_gc_pause_ns + labels: + type: $2 + - match: consul.consul.health.service.query-tag.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3 + - match: consul.consul.health.service.query-tag.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4 + - match: consul.consul.health.service.query-tag.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9.$10 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9.$10.$11 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9.$10.$11.$12 + - match: consul.consul.catalog.deregister + name: consul_catalog_deregister + labels: {} + - match: consul.consul.dns.domain_query.*.*.*.*.* + name: consul_dns_domain_query + labels: + query: $1.$2.$3.$4.$5 + - match: consul.consul.health.service.not-found.* + name: consul_health_service_not_found + labels: + query: $1 + - match: consul.consul.health.service.query.* + name: consul_health_service_query + labels: + query: $1 + - match: consul.*.memberlist.health.score + name: consul_memberlist_health_score + labels: {} + - match: consul.serf.queue.* + name: consul_serf_events + labels: + type: $1 + - match: consul.serf.snapshot.appendLine + name: consul_serf_snapshot_appendLine + labels: + type: $1 + - match: consul.serf.coordinate.adjustment-ms + name: consul_serf_coordinate_adjustment_ms + labels: {} + - match: consul.consul.rpc.query + name: consul_rpc_query + labels: {} + - match: consul.*.consul.session_ttl.active + name: consul_session_ttl_active + labels: {} + - match: consul.raft.rpc.* + name: consul_raft_rpc + labels: + type: $1 + - match: consul.raft.rpc.appendEntries.storeLogs + name: consul_raft_rpc_appendEntries_storeLogs + labels: + type: $1 + - match: consul.consul.fsm.persist + name: consul_fsm_persist + labels: {} + - match: consul.raft.fsm.apply + name: consul_raft_fsm_apply + labels: {} + - match: consul.raft.leader.lastContact + name: consul_raft_leader_lastcontact + labels: {} + - match: consul.raft.leader.dispatchLog + name: consul_raft_leader_dispatchLog + labels: {} + - match: consul.raft.commitTime + name: consul_raft_commitTime + labels: {} + - match: consul.raft.replication.appendEntries.logs.*.*.*.* + name: consul_raft_replication_appendEntries_logs + labels: + query: ${1}.${2}.${3}.${4} + - match: consul.raft.replication.appendEntries.rpc.*.*.*.* + name: consul_raft_replication_appendEntries_rpc + labels: + query: ${1}.${2}.${3}.${4} + - match: consul.raft.replication.heartbeat.*.*.*.* + name: consul_raft_replication_heartbeat + labels: + query: ${1}.${2}.${3}.${4} + - match: consul.consul.rpc.request + name: consul_rpc_requests + labels: {} + - match: consul.consul.rpc.accept_conn + name: consul_rpc_accept_conn + labels: {} + - match: consul.memberlist.udp.* + name: consul_memberlist_udp + labels: + type: $1 + - match: consul.memberlist.tcp.* + name: consul_memberlist_tcp + labels: + type: $1 + - match: consul.memberlist.gossip + name: consul_memberlist_gossip + labels: {} + - match: consul.memberlist.probeNode + name: consul_memberlist_probenode + labels: {} + - match: consul.memberlist.pushPullNode + name: consul_memberlist_pushpullnode + labels: {} + - match: consul.http.* + name: consul_http_request + labels: + method: $1 + path: / + - match: consul.http.*.* + name: consul_http_request + labels: + method: $1 + path: /$2 + - match: consul.http.*.*.* + name: consul_http_request + labels: + method: $1 + path: /$2/$3 + - match: consul.http.*.*.*.* + name: consul_http_request + labels: + method: $1 + path: /$2/$3/$4 + - match: consul.http.*.*.*.*.* + name: consul_http_request + labels: + method: $1 + path: /$2/$3/$4/$5 + - match: consul.consul.leader.barrier + name: consul_leader_barrier + labels: {} + - match: consul.consul.leader.reconcileMember + name: consul_leader_reconcileMember + labels: {} + - match: consul.consul.leader.reconcile + name: consul_leader_reconcile + labels: {} + - match: consul.consul.fsm.coordinate.batch-update + name: consul_fsm_coordinate_batch_update + labels: {} + - match: consul.consul.fsm.autopilot + name: consul_fsm_autopilot + labels: {} + - match: consul.consul.fsm.kvs.cas + name: consul_fsm_kvs_cas + labels: {} + - match: consul.consul.fsm.register + name: consul_fsm_register + labels: {} + - match: consul.consul.fsm.deregister + name: consul_fsm_deregister + labels: {} + - match: consul.consul.fsm.tombstone.reap + name: consul_fsm_tombstone_reap + labels: {} + - match: consul.consul.catalog.register + name: consul_catalog_register + labels: {} + - match: consul.consul.catalog.deregister + name: consul_catalog_deregister + labels: {} + - match: consul.consul.leader.reapTombstones + name: consul_leader_reapTombstones + labels: {} +kind: ConfigMap +metadata: + name: consul + namespace: default +--- +apiVersion: v1 +data: + overrides.yaml: | + overrides: {} +kind: ConfigMap +metadata: + name: overrides + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: consul-sidekick + namespace: default +rules: +- apiGroups: + - "" + - extensions + - apps + resources: + - pods + - replicasets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: rollout-operator-role + namespace: default +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - list + - get + - watch + - delete +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - list + - get + - watch +- apiGroups: + - apps + resources: + - statefulsets/status + verbs: + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: consul-sidekick + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: consul-sidekick +subjects: +- kind: ServiceAccount + name: consul-sidekick + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: rollout-operator-rolebinding + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: rollout-operator-role +subjects: +- kind: ServiceAccount + name: rollout-operator + namespace: default +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: alertmanager + name: alertmanager + namespace: default +spec: + clusterIP: None + ports: + - name: alertmanager-http-metrics + port: 8080 + targetPort: 8080 + - name: alertmanager-grpc + port: 9095 + targetPort: 9095 + selector: + name: alertmanager +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: consul + name: consul + namespace: default +spec: + ports: + - name: consul-server + port: 8300 + targetPort: 8300 + - name: consul-serf + port: 8301 + targetPort: 8301 + - name: consul-client + port: 8400 + targetPort: 8400 + - name: consul-api + port: 8500 + targetPort: 8500 + - name: statsd-exporter-http-metrics + port: 8000 + targetPort: 8000 + - name: consul-exporter-http-metrics + port: 9107 + targetPort: 9107 + selector: + name: consul +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: distributor + name: distributor + namespace: default +spec: + clusterIP: None + ports: + - name: distributor-http-metrics + port: 8080 + targetPort: 8080 + - name: distributor-grpc + port: 9095 + targetPort: 9095 + selector: + name: distributor +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ingester + name: ingester + namespace: default +spec: + ports: + - name: ingester-http-metrics + port: 8080 + targetPort: 8080 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + selector: + name: ingester +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ingester-zone-a + name: ingester-zone-a + namespace: default +spec: + clusterIP: None + ports: + - name: ingester-http-metrics + port: 8080 + targetPort: 8080 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + selector: + name: ingester-zone-a + rollout-group: ingester +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ingester-zone-b + name: ingester-zone-b + namespace: default +spec: + clusterIP: None + ports: + - name: ingester-http-metrics + port: 8080 + targetPort: 8080 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + selector: + name: ingester-zone-b + rollout-group: ingester +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ingester-zone-c + name: ingester-zone-c + namespace: default +spec: + clusterIP: None + ports: + - name: ingester-http-metrics + port: 8080 + targetPort: 8080 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + selector: + name: ingester-zone-c + rollout-group: ingester +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached + name: memcached + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-frontend + name: memcached-frontend + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-frontend +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-index-queries + name: memcached-index-queries + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-index-queries +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-metadata + name: memcached-metadata + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-metadata +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: querier + name: querier + namespace: default +spec: + ports: + - name: querier-http-metrics + port: 8080 + targetPort: 8080 + - name: querier-grpc + port: 9095 + targetPort: 9095 + selector: + name: querier +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-frontend + name: query-frontend + namespace: default +spec: + ports: + - name: query-frontend-http-metrics + port: 8080 + targetPort: 8080 + - name: query-frontend-grpc + port: 9095 + targetPort: 9095 + selector: + name: query-frontend +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-frontend + name: query-frontend-discovery + namespace: default +spec: + clusterIP: None + ports: + - name: query-frontend-http-metrics + port: 8080 + targetPort: 8080 + - name: query-frontend-grpc + port: 9095 + targetPort: 9095 + publishNotReadyAddresses: true + selector: + name: query-frontend +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-scheduler + name: query-scheduler + namespace: default +spec: + ports: + - name: query-scheduler-http-metrics + port: 8080 + targetPort: 8080 + - name: query-scheduler-grpc + port: 9095 + targetPort: 9095 + selector: + name: query-scheduler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-scheduler + name: query-scheduler-discovery + namespace: default +spec: + clusterIP: None + ports: + - name: query-scheduler-http-metrics + port: 8080 + targetPort: 8080 + - name: query-scheduler-grpc + port: 9095 + targetPort: 9095 + publishNotReadyAddresses: true + selector: + name: query-scheduler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ruler + name: ruler + namespace: default +spec: + ports: + - name: ruler-http-metrics + port: 8080 + targetPort: 8080 + - name: ruler-grpc + port: 9095 + targetPort: 9095 + selector: + name: ruler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway + name: store-gateway + namespace: default +spec: + ports: + - name: store-gateway-http-metrics + port: 8080 + targetPort: 8080 + - name: store-gateway-grpc + port: 9095 + targetPort: 9095 + selector: + name: store-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-multi-zone + name: store-gateway-multi-zone + namespace: default +spec: + ports: + - name: store-gateway-http-metrics + port: 80 + protocol: TCP + targetPort: 80 + selector: + rollout-group: store-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-zone-a + name: store-gateway-zone-a + namespace: default +spec: + clusterIP: None + ports: + - name: store-gateway-http-metrics + port: 8080 + targetPort: 8080 + - name: store-gateway-grpc + port: 9095 + targetPort: 9095 + selector: + name: store-gateway-zone-a + rollout-group: store-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-zone-b + name: store-gateway-zone-b + namespace: default +spec: + clusterIP: None + ports: + - name: store-gateway-http-metrics + port: 8080 + targetPort: 8080 + - name: store-gateway-grpc + port: 9095 + targetPort: 9095 + selector: + name: store-gateway-zone-b + rollout-group: store-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-zone-c + name: store-gateway-zone-c + namespace: default +spec: + clusterIP: None + ports: + - name: store-gateway-http-metrics + port: 8080 + targetPort: 8080 + - name: store-gateway-grpc + port: 9095 + targetPort: 9095 + selector: + name: store-gateway-zone-c + rollout-group: store-gateway +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: consul + namespace: default +spec: + minReadySeconds: 10 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: consul + template: + metadata: + annotations: + consul-hash: e56ef6821a3557604caccaf6d5820239 + labels: + name: consul + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: consul + topologyKey: kubernetes.io/hostname + - labelSelector: + matchLabels: + name: ingester + namespaces: + - default + topologyKey: kubernetes.io/hostname + containers: + - args: + - agent + - -ui + - -server + - -client=0.0.0.0 + - -config-file=/etc/config/consul-config.json + - -bootstrap-expect=1 + - -ui-content-path=/default/consul/ + env: + - name: CHECKPOINT_DISABLE + value: "1" + image: consul:1.5.3 + imagePullPolicy: IfNotPresent + name: consul + ports: + - containerPort: 8300 + name: server + - containerPort: 8301 + name: serf + - containerPort: 8400 + name: client + - containerPort: 8500 + name: api + resources: + requests: + cpu: "4" + memory: 4Gi + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + - args: + - --namespace=$(POD_NAMESPACE) + - --pod-name=$(POD_NAME) + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + image: weaveworks/consul-sidekick:master-f18ad13 + imagePullPolicy: IfNotPresent + name: sidekick + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + - args: + - --web.listen-address=:8000 + - --statsd.mapping-config=/etc/config/mapping + image: prom/statsd-exporter:v0.12.2 + imagePullPolicy: IfNotPresent + name: statsd-exporter + ports: + - containerPort: 8000 + name: http-metrics + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + - args: + - --consul.server=localhost:8500 + - --web.listen-address=:9107 + - --consul.timeout=1s + - --no-consul.health-summary + - --consul.allow_stale + image: prom/consul-exporter:v0.5.0 + imagePullPolicy: IfNotPresent + name: consul-exporter + ports: + - containerPort: 9107 + name: http-metrics + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + serviceAccount: consul-sidekick + volumes: + - configMap: + name: consul + name: consul + - emptyDir: + medium: Memory + name: data +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: distributor + namespace: default +spec: + minReadySeconds: 10 + replicas: 3 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: distributor + strategy: + rollingUpdate: + maxSurge: 5 + maxUnavailable: 1 + template: + metadata: + labels: + name: distributor + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: distributor + topologyKey: kubernetes.io/hostname + containers: + - args: + - -distributor.extend-writes=true + - -distributor.ha-tracker.enable=true + - -distributor.ha-tracker.enable-for-all-users=true + - -distributor.ha-tracker.etcd.endpoints=etcd-client.default.svc.cluster.local.:2379 + - -distributor.ha-tracker.prefix=prom_ha/ + - -distributor.ha-tracker.store=etcd + - -distributor.health-check-ingesters=true + - -distributor.ingestion-burst-size=200000 + - -distributor.ingestion-rate-limit=10000 + - -distributor.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -distributor.ring.prefix= + - -distributor.ring.store=consul + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.zone-awareness-enabled=true + - -mem-ballast-size-bytes=1073741824 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.max-connection-age=2m + - -server.grpc.keepalive.max-connection-age-grace=5m + - -server.grpc.keepalive.max-connection-idle=1m + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=distributor + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: distributor + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 4Gi + requests: + cpu: "2" + memory: 2Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: querier + namespace: default +spec: + minReadySeconds: 10 + replicas: 6 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: querier + strategy: + rollingUpdate: + maxSurge: 5 + maxUnavailable: 1 + template: + metadata: + labels: + name: querier + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: querier + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -distributor.health-check-ingesters=true + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.zone-awareness-enabled=true + - -mem-ballast-size-bytes=268435456 + - -querier.frontend-client.grpc-max-send-msg-size=104857600 + - -querier.max-concurrent=8 + - -querier.query-ingesters-within=13h + - -querier.query-store-after=12h + - -querier.scheduler-address=query-scheduler-discovery.default.svc.cluster.local:9095 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -server.http-write-timeout=1m + - -store-gateway.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=consul + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -store.max-query-length=768h + - -target=querier + env: + - name: JAEGER_REPORTER_MAX_QUEUE_SIZE + value: "1024" + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: querier + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 24Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: query-frontend + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: query-frontend + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + template: + metadata: + labels: + name: query-frontend + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: query-frontend + topologyKey: kubernetes.io/hostname + containers: + - args: + - -query-frontend.align-querier-with-step=false + - -query-frontend.cache-results=true + - -query-frontend.max-cache-freshness=10m + - -query-frontend.results-cache.backend=memcached + - -query-frontend.results-cache.memcached.addresses=dnssrvnoa+memcached-frontend.default.svc.cluster.local:11211 + - -query-frontend.results-cache.memcached.timeout=500ms + - -query-frontend.scheduler-address=query-scheduler-discovery.default.svc.cluster.local:9095 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-recv-msg-size-bytes=104857600 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -server.http-write-timeout=1m + - -store.max-query-length=12000h + - -target=query-frontend + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: query-frontend + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 1200Mi + requests: + cpu: "2" + memory: 600Mi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: query-scheduler + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: query-scheduler + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + name: query-scheduler + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: query-scheduler + topologyKey: kubernetes.io/hostname + containers: + - args: + - -query-scheduler.max-outstanding-requests-per-tenant=100 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=query-scheduler + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: query-scheduler + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 1Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rollout-operator + namespace: default +spec: + minReadySeconds: 10 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: rollout-operator + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + name: rollout-operator + spec: + containers: + - args: + - -kubernetes.namespace=default + image: grafana/rollout-operator:v0.1.1 + imagePullPolicy: IfNotPresent + name: rollout-operator + ports: + - containerPort: 8001 + name: http-metrics + readinessProbe: + httpGet: + path: /ready + port: 8001 + initialDelaySeconds: 5 + timeoutSeconds: 1 + resources: + limits: + cpu: "1" + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + serviceAccountName: rollout-operator +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ruler + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: ruler + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + name: ruler + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: ruler + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -distributor.extend-writes=true + - -distributor.health-check-ingesters=true + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.zone-awareness-enabled=true + - -querier.query-ingesters-within=13h + - -querier.query-store-after=12h + - -ruler-storage.backend=gcs + - -ruler-storage.gcs.bucket-name=rules-bucket + - -ruler.alertmanager-url=http://alertmanager.default.svc.cluster.local/alertmanager + - -ruler.max-rule-groups-per-tenant=35 + - -ruler.max-rules-per-rule-group=20 + - -ruler.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ruler.ring.store=consul + - -ruler.rule-path=/rules + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=consul + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -store.max-query-length=768h + - -target=ruler + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ruler + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + cpu: "16" + memory: 16Gi + requests: + cpu: "1" + memory: 6Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + terminationGracePeriodSeconds: 600 + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: alertmanager + name: alertmanager + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: alertmanager + serviceName: alertmanager + template: + metadata: + labels: + name: alertmanager + spec: + containers: + - args: + - -alertmanager-storage.backend=gcs + - -alertmanager-storage.gcs.bucket-name=alerts-bucket + - -alertmanager.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -alertmanager.sharding-ring.replication-factor=3 + - -alertmanager.sharding-ring.store=consul + - -alertmanager.storage.path=/data + - -alertmanager.web.external-url=http://test/alertmanager + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=alertmanager + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: alertmanager + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + requests: + cpu: 100m + memory: 1Gi + volumeMounts: + - mountPath: /data + name: alertmanager-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 900 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: alertmanager-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: compactor + name: compactor + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: compactor + serviceName: compactor + template: + metadata: + labels: + name: compactor + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -compactor.block-ranges=2h,12h,24h + - -compactor.blocks-retention-period=0 + - -compactor.cleanup-interval=15m + - -compactor.compaction-concurrency=1 + - -compactor.compaction-interval=30m + - -compactor.compactor-tenant-shard-size=1 + - -compactor.data-dir=/data + - -compactor.deletion-delay=2h + - -compactor.max-closing-blocks-concurrency=2 + - -compactor.max-opening-blocks-concurrency=4 + - -compactor.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -compactor.ring.prefix= + - -compactor.ring.store=consul + - -compactor.ring.wait-stability-min-duration=1m + - -compactor.split-and-merge-shards=0 + - -compactor.split-groups=1 + - -compactor.symbols-flushers-concurrency=4 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=compactor + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: compactor + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 6Gi + requests: + cpu: 1 + memory: 6Gi + volumeMounts: + - mountPath: /data + name: compactor-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 900 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: compactor-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 250Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: ingester + name: ingester + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 3 + selector: + matchLabels: + name: ingester + serviceName: ingester + template: + metadata: + labels: + name: ingester + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: ingester + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -blocks-storage.tsdb.block-ranges-period=2h + - -blocks-storage.tsdb.close-idle-tsdb-timeout=13h + - -blocks-storage.tsdb.dir=/data/tsdb + - -blocks-storage.tsdb.isolation-enabled=false + - -blocks-storage.tsdb.ship-interval=1m + - -distributor.health-check-ingesters=true + - -ingester.max-global-series-per-metric=20000 + - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-period=15s + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.num-tokens=512 + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.tokens-file-path=/data/tokens + - -ingester.ring.unregister-on-shutdown=true + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-concurrent-streams=10000 + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=ingester + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ingester + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 25Gi + requests: + cpu: "4" + memory: 15Gi + volumeMounts: + - mountPath: /data + name: ingester-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 1200 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ingester-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: fast +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: ingester + name: ingester-zone-a + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: ingester-zone-a + rollout-group: ingester + serviceName: ingester-zone-a + template: + metadata: + labels: + name: ingester-zone-a + rollout-group: ingester + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: rollout-group + operator: In + values: + - ingester + - key: name + operator: NotIn + values: + - ingester-zone-a + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -blocks-storage.tsdb.block-ranges-period=2h + - -blocks-storage.tsdb.close-idle-tsdb-timeout=13h + - -blocks-storage.tsdb.dir=/data/tsdb + - -blocks-storage.tsdb.isolation-enabled=false + - -blocks-storage.tsdb.ship-interval=1m + - -distributor.health-check-ingesters=true + - -ingester.max-global-series-per-metric=20000 + - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-period=15s + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.instance-availability-zone=zone-a + - -ingester.ring.num-tokens=512 + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.tokens-file-path=/data/tokens + - -ingester.ring.unregister-on-shutdown=true + - -ingester.ring.zone-awareness-enabled=true + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-concurrent-streams=10000 + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=ingester + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ingester + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 25Gi + requests: + cpu: "4" + memory: 15Gi + volumeMounts: + - mountPath: /data + name: ingester-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 1200 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ingester-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: fast +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: ingester + name: ingester-zone-b + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: ingester-zone-b + rollout-group: ingester + serviceName: ingester-zone-b + template: + metadata: + labels: + name: ingester-zone-b + rollout-group: ingester + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: rollout-group + operator: In + values: + - ingester + - key: name + operator: NotIn + values: + - ingester-zone-b + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -blocks-storage.tsdb.block-ranges-period=2h + - -blocks-storage.tsdb.close-idle-tsdb-timeout=13h + - -blocks-storage.tsdb.dir=/data/tsdb + - -blocks-storage.tsdb.isolation-enabled=false + - -blocks-storage.tsdb.ship-interval=1m + - -distributor.health-check-ingesters=true + - -ingester.max-global-series-per-metric=20000 + - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-period=15s + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.instance-availability-zone=zone-b + - -ingester.ring.num-tokens=512 + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.tokens-file-path=/data/tokens + - -ingester.ring.unregister-on-shutdown=true + - -ingester.ring.zone-awareness-enabled=true + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-concurrent-streams=10000 + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=ingester + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ingester + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 25Gi + requests: + cpu: "4" + memory: 15Gi + volumeMounts: + - mountPath: /data + name: ingester-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 1200 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ingester-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: fast +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: ingester + name: ingester-zone-c + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: ingester-zone-c + rollout-group: ingester + serviceName: ingester-zone-c + template: + metadata: + labels: + name: ingester-zone-c + rollout-group: ingester + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: rollout-group + operator: In + values: + - ingester + - key: name + operator: NotIn + values: + - ingester-zone-c + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -blocks-storage.tsdb.block-ranges-period=2h + - -blocks-storage.tsdb.close-idle-tsdb-timeout=13h + - -blocks-storage.tsdb.dir=/data/tsdb + - -blocks-storage.tsdb.isolation-enabled=false + - -blocks-storage.tsdb.ship-interval=1m + - -distributor.health-check-ingesters=true + - -ingester.max-global-series-per-metric=20000 + - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false + - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -ingester.ring.heartbeat-period=15s + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.instance-availability-zone=zone-c + - -ingester.ring.num-tokens=512 + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=consul + - -ingester.ring.tokens-file-path=/data/tokens + - -ingester.ring.unregister-on-shutdown=true + - -ingester.ring.zone-awareness-enabled=true + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-concurrent-streams=10000 + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=ingester + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ingester + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 25Gi + requests: + cpu: "4" + memory: 15Gi + volumeMounts: + - mountPath: /data + name: ingester-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 1200 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ingester-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: fast +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached + serviceName: memcached + template: + metadata: + labels: + name: memcached + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 6144 + - -I 1m + - -c 16384 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 9Gi + requests: + cpu: 500m + memory: 6552Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-frontend + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached-frontend + serviceName: memcached-frontend + template: + metadata: + labels: + name: memcached-frontend + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-frontend + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 1024 + - -I 5m + - -c 1024 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 1536Mi + requests: + cpu: 500m + memory: 1329Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-index-queries + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached-index-queries + serviceName: memcached-index-queries + template: + metadata: + labels: + name: memcached-index-queries + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-index-queries + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 1024 + - -I 5m + - -c 16384 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 1536Mi + requests: + cpu: 500m + memory: 1329Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-metadata + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + name: memcached-metadata + serviceName: memcached-metadata + template: + metadata: + labels: + name: memcached-metadata + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-metadata + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 512 + - -I 1m + - -c 16384 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 768Mi + requests: + cpu: 500m + memory: 715Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: store-gateway + name: store-gateway + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 3 + selector: + matchLabels: + name: store-gateway + serviceName: store-gateway + template: + metadata: + labels: + name: store-gateway + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.chunks-cache.attributes-in-memory-max-items=50000 + - -blocks-storage.bucket-store.chunks-cache.backend=memcached + - -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dnssrvnoa+memcached.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.ignore-blocks-within=10h + - -blocks-storage.bucket-store.index-cache.backend=memcached + - -blocks-storage.bucket-store.index-cache.memcached.addresses=dnssrvnoa+memcached-index-queries.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-item-size=5242880 + - -blocks-storage.bucket-store.index-header-lazy-loading-enabled=true + - -blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout=60m + - -blocks-storage.bucket-store.max-chunk-pool-bytes=12884901888 + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -store-gateway.sharding-ring.prefix= + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=consul + - -store-gateway.sharding-ring.tokens-file-path=/data/tokens + - -store-gateway.sharding-ring.wait-stability-min-duration=1m + - -target=store-gateway + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: store-gateway + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 18Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /data + name: store-gateway-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 120 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: store-gateway-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: store-gateway + name: store-gateway-zone-a + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: store-gateway-zone-a + rollout-group: store-gateway + serviceName: store-gateway-zone-a + template: + metadata: + labels: + name: store-gateway-zone-a + rollout-group: store-gateway + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.chunks-cache.attributes-in-memory-max-items=50000 + - -blocks-storage.bucket-store.chunks-cache.backend=memcached + - -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dnssrvnoa+memcached.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.ignore-blocks-within=10h + - -blocks-storage.bucket-store.index-cache.backend=memcached + - -blocks-storage.bucket-store.index-cache.memcached.addresses=dnssrvnoa+memcached-index-queries.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-item-size=5242880 + - -blocks-storage.bucket-store.index-header-lazy-loading-enabled=true + - -blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout=60m + - -blocks-storage.bucket-store.max-chunk-pool-bytes=12884901888 + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -store-gateway.sharding-ring.instance-availability-zone=zone-a + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=consul + - -store-gateway.sharding-ring.tokens-file-path=/data/tokens + - -store-gateway.sharding-ring.unregister-on-shutdown=false + - -store-gateway.sharding-ring.wait-stability-min-duration=1m + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -target=store-gateway + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: store-gateway + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 18Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /data + name: store-gateway-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 120 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: store-gateway-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: store-gateway + name: store-gateway-zone-b + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: store-gateway-zone-b + rollout-group: store-gateway + serviceName: store-gateway-zone-b + template: + metadata: + labels: + name: store-gateway-zone-b + rollout-group: store-gateway + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.chunks-cache.attributes-in-memory-max-items=50000 + - -blocks-storage.bucket-store.chunks-cache.backend=memcached + - -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dnssrvnoa+memcached.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.ignore-blocks-within=10h + - -blocks-storage.bucket-store.index-cache.backend=memcached + - -blocks-storage.bucket-store.index-cache.memcached.addresses=dnssrvnoa+memcached-index-queries.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-item-size=5242880 + - -blocks-storage.bucket-store.index-header-lazy-loading-enabled=true + - -blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout=60m + - -blocks-storage.bucket-store.max-chunk-pool-bytes=12884901888 + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -store-gateway.sharding-ring.instance-availability-zone=zone-b + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=consul + - -store-gateway.sharding-ring.tokens-file-path=/data/tokens + - -store-gateway.sharding-ring.unregister-on-shutdown=false + - -store-gateway.sharding-ring.wait-stability-min-duration=1m + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -target=store-gateway + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: store-gateway + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 18Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /data + name: store-gateway-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 120 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: store-gateway-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: store-gateway + name: store-gateway-zone-c + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: store-gateway-zone-c + rollout-group: store-gateway + serviceName: store-gateway-zone-c + template: + metadata: + labels: + name: store-gateway-zone-c + rollout-group: store-gateway + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.chunks-cache.attributes-in-memory-max-items=50000 + - -blocks-storage.bucket-store.chunks-cache.backend=memcached + - -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dnssrvnoa+memcached.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.ignore-blocks-within=10h + - -blocks-storage.bucket-store.index-cache.backend=memcached + - -blocks-storage.bucket-store.index-cache.memcached.addresses=dnssrvnoa+memcached-index-queries.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-item-size=5242880 + - -blocks-storage.bucket-store.index-header-lazy-loading-enabled=true + - -blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout=60m + - -blocks-storage.bucket-store.max-chunk-pool-bytes=12884901888 + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -store-gateway.sharding-ring.instance-availability-zone=zone-c + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=consul + - -store-gateway.sharding-ring.tokens-file-path=/data/tokens + - -store-gateway.sharding-ring.unregister-on-shutdown=false + - -store-gateway.sharding-ring.wait-stability-min-duration=1m + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -target=store-gateway + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: store-gateway + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 18Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /data + name: store-gateway-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 120 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: store-gateway-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: standard +--- +apiVersion: etcd.database.coreos.com/v1beta2 +kind: EtcdCluster +metadata: + annotations: + etcd.database.coreos.com/scope: clusterwide + name: etcd + namespace: default +spec: + pod: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + etcd_cluster: etcd + topologyKey: kubernetes.io/hostname + annotations: + prometheus.io/port: "2379" + prometheus.io/scrape: "true" + etcdEnv: + - name: ETCD_AUTO_COMPACTION_RETENTION + value: 1h + labels: + name: etcd + resources: + limits: + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + size: 3 + version: 3.3.13 diff --git a/operations/mimir-tests/test-multi-zone-with-ongoing-migration.jsonnet b/operations/mimir-tests/test-multi-zone-with-ongoing-migration.jsonnet new file mode 100644 index 00000000000..5f7357995d5 --- /dev/null +++ b/operations/mimir-tests/test-multi-zone-with-ongoing-migration.jsonnet @@ -0,0 +1,29 @@ +local mimir = import 'mimir/mimir.libsonnet'; + +mimir { + _config+:: { + namespace: 'default', + external_url: 'http://test', + + blocks_storage_backend: 'gcs', + blocks_storage_bucket_name: 'blocks-bucket', + bucket_index_enabled: true, + query_scheduler_enabled: true, + + ruler_enabled: true, + ruler_client_type: 'gcs', + ruler_storage_bucket_name: 'rules-bucket', + + alertmanager_enabled: true, + alertmanager_client_type: 'gcs', + alertmanager_gcs_bucket_name: 'alerts-bucket', + + cortex_multi_zone_ingester_enabled: true, + cortex_multi_zone_ingester_migration_enabled: true, + cortex_multi_zone_ingester_replicas: 3, + + cortex_multi_zone_store_gateway_enabled: true, + cortex_multi_zone_store_gateway_migration_enabled: true, + cortex_multi_zone_store_gateway_replicas: 3, + }, +} diff --git a/operations/mimir-tests/test-multi-zone.jsonnet b/operations/mimir-tests/test-multi-zone.jsonnet new file mode 100644 index 00000000000..660c473dee5 --- /dev/null +++ b/operations/mimir-tests/test-multi-zone.jsonnet @@ -0,0 +1,27 @@ +local mimir = import 'mimir/mimir.libsonnet'; + +mimir { + _config+:: { + namespace: 'default', + external_url: 'http://test', + + blocks_storage_backend: 'gcs', + blocks_storage_bucket_name: 'blocks-bucket', + bucket_index_enabled: true, + query_scheduler_enabled: true, + + ruler_enabled: true, + ruler_client_type: 'gcs', + ruler_storage_bucket_name: 'rules-bucket', + + alertmanager_enabled: true, + alertmanager_client_type: 'gcs', + alertmanager_gcs_bucket_name: 'alerts-bucket', + + cortex_multi_zone_ingester_enabled: true, + cortex_multi_zone_ingester_replicas: 3, + + cortex_multi_zone_store_gateway_enabled: true, + cortex_multi_zone_store_gateway_replicas: 3, + }, +} diff --git a/operations/mimir-tests/test-query-sharding-generated.yaml b/operations/mimir-tests/test-query-sharding-generated.yaml index 8dc1c0124b7..8157b743e30 100644 --- a/operations/mimir-tests/test-query-sharding-generated.yaml +++ b/operations/mimir-tests/test-query-sharding-generated.yaml @@ -1330,6 +1330,7 @@ spec: - -distributor.health-check-ingesters=true - -ingester.max-global-series-per-metric=20000 - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 - -ingester.ring.heartbeat-period=15s - -ingester.ring.heartbeat-timeout=10m diff --git a/operations/mimir-tests/test-shuffle-sharding-generated.yaml b/operations/mimir-tests/test-shuffle-sharding-generated.yaml index f114b1b0f78..126fd5b72bc 100644 --- a/operations/mimir-tests/test-shuffle-sharding-generated.yaml +++ b/operations/mimir-tests/test-shuffle-sharding-generated.yaml @@ -1337,6 +1337,7 @@ spec: - -distributor.ingestion-tenant-shard-size=3 - -ingester.max-global-series-per-metric=20000 - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 - -ingester.ring.heartbeat-period=15s - -ingester.ring.heartbeat-timeout=10m diff --git a/operations/mimir-tests/test-storage-azure-generated.yaml b/operations/mimir-tests/test-storage-azure-generated.yaml index fbf94bc4fe5..aef350c1d1e 100644 --- a/operations/mimir-tests/test-storage-azure-generated.yaml +++ b/operations/mimir-tests/test-storage-azure-generated.yaml @@ -1338,6 +1338,7 @@ spec: - -distributor.health-check-ingesters=true - -ingester.max-global-series-per-metric=20000 - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 - -ingester.ring.heartbeat-period=15s - -ingester.ring.heartbeat-timeout=10m diff --git a/operations/mimir-tests/test-storage-gcs-generated.yaml b/operations/mimir-tests/test-storage-gcs-generated.yaml index 0a4b0e89797..2e3398c4b6e 100644 --- a/operations/mimir-tests/test-storage-gcs-generated.yaml +++ b/operations/mimir-tests/test-storage-gcs-generated.yaml @@ -1326,6 +1326,7 @@ spec: - -distributor.health-check-ingesters=true - -ingester.max-global-series-per-metric=20000 - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 - -ingester.ring.heartbeat-period=15s - -ingester.ring.heartbeat-timeout=10m diff --git a/operations/mimir-tests/test-storage-s3-generated.yaml b/operations/mimir-tests/test-storage-s3-generated.yaml index 63ca6420ca7..790dc68795d 100644 --- a/operations/mimir-tests/test-storage-s3-generated.yaml +++ b/operations/mimir-tests/test-storage-s3-generated.yaml @@ -1333,6 +1333,7 @@ spec: - -distributor.health-check-ingesters=true - -ingester.max-global-series-per-metric=20000 - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false - -ingester.ring.consul.hostname=consul.default.svc.cluster.local:8500 - -ingester.ring.heartbeat-period=15s - -ingester.ring.heartbeat-timeout=10m diff --git a/operations/mimir/multi-zone.libsonnet b/operations/mimir/multi-zone.libsonnet index f6aa4e016bc..a324b29a3a3 100644 --- a/operations/mimir/multi-zone.libsonnet +++ b/operations/mimir/multi-zone.libsonnet @@ -322,7 +322,6 @@ local podAntiAffinity = deployment.mixin.spec.template.spec.affinity.podAntiAffi rollout_operator_deployment: if !rollout_operator_enabled then null else deployment.new('rollout-operator', 1, [$.rollout_operator_container]) + deployment.mixin.metadata.withName('rollout-operator') + - deployment.mixin.spec.template.spec.withImagePullSecrets({ name: $.pentagon.gcr_secret_name }) + deployment.mixin.spec.template.spec.withServiceAccountName('rollout-operator') + // Ensure Kubernetes doesn't run 2 operators at the same time. deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(0) + From 4d35aeedbdd74a5a9641771dada76848c68cbed4 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Wed, 2 Mar 2022 14:15:21 +0100 Subject: [PATCH 4/6] Addressed review comments Signed-off-by: Marco Pracucci --- operations/mimir/images.libsonnet | 3 +++ operations/mimir/multi-zone.libsonnet | 32 +++++++++++---------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/operations/mimir/images.libsonnet b/operations/mimir/images.libsonnet index 942ba7eb59c..1935b2c2ff0 100644 --- a/operations/mimir/images.libsonnet +++ b/operations/mimir/images.libsonnet @@ -20,5 +20,8 @@ overrides_exporter: self.mimir, query_tee: 'grafana/query-tee:mimir-2.0.0', + + // See: https://github.com/grafana/rollout-operator + rollout_operator: 'grafana/rollout-operator:v0.1.1', }, } diff --git a/operations/mimir/multi-zone.libsonnet b/operations/mimir/multi-zone.libsonnet index a324b29a3a3..50588b18467 100644 --- a/operations/mimir/multi-zone.libsonnet +++ b/operations/mimir/multi-zone.libsonnet @@ -1,22 +1,16 @@ -local k = import 'ksonnet-util/kausal.libsonnet'; -local container = k.core.v1.container; -local deployment = k.apps.v1.deployment; -local statefulSet = k.apps.v1.statefulSet; -local podDisruptionBudget = k.policy.v1beta1.podDisruptionBudget; -local volume = k.core.v1.volume; -local roleBinding = k.rbac.v1.roleBinding; -local role = k.rbac.v1.role; -local service = k.core.v1.service; -local serviceAccount = k.core.v1.serviceAccount; -local servicePort = k.core.v1.servicePort; -local policyRule = k.rbac.v1.policyRule; -local podAntiAffinity = deployment.mixin.spec.template.spec.affinity.podAntiAffinity; - { - _images+:: { - // See: https://github.com/grafana/rollout-operator - rollout_operator: 'grafana/rollout-operator:v0.1.1', - }, + local container = $.core.v1.container, + local deployment = $.apps.v1.deployment, + local statefulSet = $.apps.v1.statefulSet, + local podDisruptionBudget = $.policy.v1beta1.podDisruptionBudget, + local volume = $.core.v1.volume, + local roleBinding = $.rbac.v1.roleBinding, + local role = $.rbac.v1.role, + local service = $.core.v1.service, + local serviceAccount = $.core.v1.serviceAccount, + local servicePort = $.core.v1.servicePort, + local policyRule = $.rbac.v1.policyRule, + local podAntiAffinity = deployment.mixin.spec.template.spec.affinity.podAntiAffinity, _config+: { cortex_multi_zone_ingester_enabled: false, @@ -310,7 +304,7 @@ local podAntiAffinity = deployment.mixin.spec.template.spec.affinity.podAntiAffi container.new('rollout-operator', $._images.rollout_operator) + container.withArgsMixin($.util.mapToFlags($.rollout_operator_args)) + container.withPorts([ - k.core.v1.containerPort.new('http-metrics', 8001), + $.core.v1.containerPort.new('http-metrics', 8001), ]) + $.util.resourcesRequests('100m', '100Mi') + $.util.resourcesLimits('1', '200Mi') + From c4f299c3fd393b0ab362e17d6adcc699aa192aa9 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Wed, 2 Mar 2022 14:23:29 +0100 Subject: [PATCH 5/6] Upstreaming gossip + multi-zone too Signed-off-by: Marco Pracucci --- .../test-gossip-multi-zone-generated.yaml | 2507 +++++++++++++++++ .../test-gossip-multi-zone.jsonnet | 28 + operations/mimir/gossip.libsonnet | 26 +- 3 files changed, 2557 insertions(+), 4 deletions(-) create mode 100644 operations/mimir-tests/test-gossip-multi-zone-generated.yaml create mode 100644 operations/mimir-tests/test-gossip-multi-zone.jsonnet diff --git a/operations/mimir-tests/test-gossip-multi-zone-generated.yaml b/operations/mimir-tests/test-gossip-multi-zone-generated.yaml new file mode 100644 index 00000000000..83b7fd76427 --- /dev/null +++ b/operations/mimir-tests/test-gossip-multi-zone-generated.yaml @@ -0,0 +1,2507 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: default +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + labels: + name: ingester-rollout-pdb + name: ingester-rollout-pdb + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + rollout-group: ingester +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + labels: + name: store-gateway-rollout-pdb + name: store-gateway-rollout-pdb + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + rollout-group: store-gateway +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: consul-sidekick + namespace: default +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rollout-operator + namespace: default +--- +apiVersion: v1 +data: + consul-config.json: '{"leave_on_terminate": true, "raft_snapshot_threshold": 128, + "raft_trailing_logs": 10000, "telemetry": {"dogstatsd_addr": "127.0.0.1:9125"}}' + mapping: | + mappings: + - match: consul.*.runtime.* + name: consul_runtime + labels: + type: $2 + - match: consul.runtime.total_gc_pause_ns + name: consul_runtime_total_gc_pause_ns + labels: + type: $2 + - match: consul.consul.health.service.query-tag.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3 + - match: consul.consul.health.service.query-tag.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4 + - match: consul.consul.health.service.query-tag.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9.$10 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9.$10.$11 + - match: consul.consul.health.service.query-tag.*.*.*.*.*.*.*.*.*.*.*.* + name: consul_health_service_query_tag + labels: + query: $1.$2.$3.$4.$5.$6.$7.$8.$9.$10.$11.$12 + - match: consul.consul.catalog.deregister + name: consul_catalog_deregister + labels: {} + - match: consul.consul.dns.domain_query.*.*.*.*.* + name: consul_dns_domain_query + labels: + query: $1.$2.$3.$4.$5 + - match: consul.consul.health.service.not-found.* + name: consul_health_service_not_found + labels: + query: $1 + - match: consul.consul.health.service.query.* + name: consul_health_service_query + labels: + query: $1 + - match: consul.*.memberlist.health.score + name: consul_memberlist_health_score + labels: {} + - match: consul.serf.queue.* + name: consul_serf_events + labels: + type: $1 + - match: consul.serf.snapshot.appendLine + name: consul_serf_snapshot_appendLine + labels: + type: $1 + - match: consul.serf.coordinate.adjustment-ms + name: consul_serf_coordinate_adjustment_ms + labels: {} + - match: consul.consul.rpc.query + name: consul_rpc_query + labels: {} + - match: consul.*.consul.session_ttl.active + name: consul_session_ttl_active + labels: {} + - match: consul.raft.rpc.* + name: consul_raft_rpc + labels: + type: $1 + - match: consul.raft.rpc.appendEntries.storeLogs + name: consul_raft_rpc_appendEntries_storeLogs + labels: + type: $1 + - match: consul.consul.fsm.persist + name: consul_fsm_persist + labels: {} + - match: consul.raft.fsm.apply + name: consul_raft_fsm_apply + labels: {} + - match: consul.raft.leader.lastContact + name: consul_raft_leader_lastcontact + labels: {} + - match: consul.raft.leader.dispatchLog + name: consul_raft_leader_dispatchLog + labels: {} + - match: consul.raft.commitTime + name: consul_raft_commitTime + labels: {} + - match: consul.raft.replication.appendEntries.logs.*.*.*.* + name: consul_raft_replication_appendEntries_logs + labels: + query: ${1}.${2}.${3}.${4} + - match: consul.raft.replication.appendEntries.rpc.*.*.*.* + name: consul_raft_replication_appendEntries_rpc + labels: + query: ${1}.${2}.${3}.${4} + - match: consul.raft.replication.heartbeat.*.*.*.* + name: consul_raft_replication_heartbeat + labels: + query: ${1}.${2}.${3}.${4} + - match: consul.consul.rpc.request + name: consul_rpc_requests + labels: {} + - match: consul.consul.rpc.accept_conn + name: consul_rpc_accept_conn + labels: {} + - match: consul.memberlist.udp.* + name: consul_memberlist_udp + labels: + type: $1 + - match: consul.memberlist.tcp.* + name: consul_memberlist_tcp + labels: + type: $1 + - match: consul.memberlist.gossip + name: consul_memberlist_gossip + labels: {} + - match: consul.memberlist.probeNode + name: consul_memberlist_probenode + labels: {} + - match: consul.memberlist.pushPullNode + name: consul_memberlist_pushpullnode + labels: {} + - match: consul.http.* + name: consul_http_request + labels: + method: $1 + path: / + - match: consul.http.*.* + name: consul_http_request + labels: + method: $1 + path: /$2 + - match: consul.http.*.*.* + name: consul_http_request + labels: + method: $1 + path: /$2/$3 + - match: consul.http.*.*.*.* + name: consul_http_request + labels: + method: $1 + path: /$2/$3/$4 + - match: consul.http.*.*.*.*.* + name: consul_http_request + labels: + method: $1 + path: /$2/$3/$4/$5 + - match: consul.consul.leader.barrier + name: consul_leader_barrier + labels: {} + - match: consul.consul.leader.reconcileMember + name: consul_leader_reconcileMember + labels: {} + - match: consul.consul.leader.reconcile + name: consul_leader_reconcile + labels: {} + - match: consul.consul.fsm.coordinate.batch-update + name: consul_fsm_coordinate_batch_update + labels: {} + - match: consul.consul.fsm.autopilot + name: consul_fsm_autopilot + labels: {} + - match: consul.consul.fsm.kvs.cas + name: consul_fsm_kvs_cas + labels: {} + - match: consul.consul.fsm.register + name: consul_fsm_register + labels: {} + - match: consul.consul.fsm.deregister + name: consul_fsm_deregister + labels: {} + - match: consul.consul.fsm.tombstone.reap + name: consul_fsm_tombstone_reap + labels: {} + - match: consul.consul.catalog.register + name: consul_catalog_register + labels: {} + - match: consul.consul.catalog.deregister + name: consul_catalog_deregister + labels: {} + - match: consul.consul.leader.reapTombstones + name: consul_leader_reapTombstones + labels: {} +kind: ConfigMap +metadata: + name: consul + namespace: default +--- +apiVersion: v1 +data: + overrides.yaml: | + overrides: {} +kind: ConfigMap +metadata: + name: overrides + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: consul-sidekick + namespace: default +rules: +- apiGroups: + - "" + - extensions + - apps + resources: + - pods + - replicasets + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: rollout-operator-role + namespace: default +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - list + - get + - watch + - delete +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - list + - get + - watch +- apiGroups: + - apps + resources: + - statefulsets/status + verbs: + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: consul-sidekick + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: consul-sidekick +subjects: +- kind: ServiceAccount + name: consul-sidekick + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: rollout-operator-rolebinding + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: rollout-operator-role +subjects: +- kind: ServiceAccount + name: rollout-operator + namespace: default +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: alertmanager + name: alertmanager + namespace: default +spec: + clusterIP: None + ports: + - name: alertmanager-http-metrics + port: 8080 + targetPort: 8080 + - name: alertmanager-grpc + port: 9095 + targetPort: 9095 + selector: + name: alertmanager +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: consul + name: consul + namespace: default +spec: + ports: + - name: consul-server + port: 8300 + targetPort: 8300 + - name: consul-serf + port: 8301 + targetPort: 8301 + - name: consul-client + port: 8400 + targetPort: 8400 + - name: consul-api + port: 8500 + targetPort: 8500 + - name: statsd-exporter-http-metrics + port: 8000 + targetPort: 8000 + - name: consul-exporter-http-metrics + port: 9107 + targetPort: 9107 + selector: + name: consul +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: distributor + name: distributor + namespace: default +spec: + clusterIP: None + ports: + - name: distributor-http-metrics + port: 8080 + targetPort: 8080 + - name: distributor-grpc + port: 9095 + targetPort: 9095 + - name: distributor-gossip-ring + port: 7946 + targetPort: 7946 + selector: + name: distributor +--- +apiVersion: v1 +kind: Service +metadata: + name: gossip-ring + namespace: default +spec: + clusterIP: None + ports: + - name: gossip-ring + port: 7946 + protocol: TCP + targetPort: 7946 + selector: + gossip_ring_member: "true" +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ingester-zone-a + name: ingester-zone-a + namespace: default +spec: + clusterIP: None + ports: + - name: ingester-http-metrics + port: 8080 + targetPort: 8080 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + - name: ingester-gossip-ring + port: 7946 + targetPort: 7946 + selector: + name: ingester-zone-a + rollout-group: ingester +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ingester-zone-b + name: ingester-zone-b + namespace: default +spec: + clusterIP: None + ports: + - name: ingester-http-metrics + port: 8080 + targetPort: 8080 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + - name: ingester-gossip-ring + port: 7946 + targetPort: 7946 + selector: + name: ingester-zone-b + rollout-group: ingester +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ingester-zone-c + name: ingester-zone-c + namespace: default +spec: + clusterIP: None + ports: + - name: ingester-http-metrics + port: 8080 + targetPort: 8080 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + - name: ingester-gossip-ring + port: 7946 + targetPort: 7946 + selector: + name: ingester-zone-c + rollout-group: ingester +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached + name: memcached + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-frontend + name: memcached-frontend + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-frontend +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-index-queries + name: memcached-index-queries + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-index-queries +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-metadata + name: memcached-metadata + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-metadata +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: querier + name: querier + namespace: default +spec: + ports: + - name: querier-http-metrics + port: 8080 + targetPort: 8080 + - name: querier-grpc + port: 9095 + targetPort: 9095 + - name: querier-gossip-ring + port: 7946 + targetPort: 7946 + selector: + name: querier +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-frontend + name: query-frontend + namespace: default +spec: + ports: + - name: query-frontend-http-metrics + port: 8080 + targetPort: 8080 + - name: query-frontend-grpc + port: 9095 + targetPort: 9095 + selector: + name: query-frontend +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-frontend + name: query-frontend-discovery + namespace: default +spec: + clusterIP: None + ports: + - name: query-frontend-http-metrics + port: 8080 + targetPort: 8080 + - name: query-frontend-grpc + port: 9095 + targetPort: 9095 + publishNotReadyAddresses: true + selector: + name: query-frontend +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-scheduler + name: query-scheduler + namespace: default +spec: + ports: + - name: query-scheduler-http-metrics + port: 8080 + targetPort: 8080 + - name: query-scheduler-grpc + port: 9095 + targetPort: 9095 + selector: + name: query-scheduler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-scheduler + name: query-scheduler-discovery + namespace: default +spec: + clusterIP: None + ports: + - name: query-scheduler-http-metrics + port: 8080 + targetPort: 8080 + - name: query-scheduler-grpc + port: 9095 + targetPort: 9095 + publishNotReadyAddresses: true + selector: + name: query-scheduler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ruler + name: ruler + namespace: default +spec: + ports: + - name: ruler-http-metrics + port: 8080 + targetPort: 8080 + - name: ruler-grpc + port: 9095 + targetPort: 9095 + selector: + name: ruler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-multi-zone + name: store-gateway-multi-zone + namespace: default +spec: + ports: + - name: store-gateway-http-metrics + port: 80 + protocol: TCP + targetPort: 80 + selector: + rollout-group: store-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-zone-a + name: store-gateway-zone-a + namespace: default +spec: + clusterIP: None + ports: + - name: store-gateway-http-metrics + port: 8080 + targetPort: 8080 + - name: store-gateway-grpc + port: 9095 + targetPort: 9095 + - name: store-gateway-gossip-ring + port: 7946 + targetPort: 7946 + selector: + gossip_ring_member: "true" + name: store-gateway-zone-a + rollout-group: store-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-zone-b + name: store-gateway-zone-b + namespace: default +spec: + clusterIP: None + ports: + - name: store-gateway-http-metrics + port: 8080 + targetPort: 8080 + - name: store-gateway-grpc + port: 9095 + targetPort: 9095 + - name: store-gateway-gossip-ring + port: 7946 + targetPort: 7946 + selector: + gossip_ring_member: "true" + name: store-gateway-zone-b + rollout-group: store-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway-zone-c + name: store-gateway-zone-c + namespace: default +spec: + clusterIP: None + ports: + - name: store-gateway-http-metrics + port: 8080 + targetPort: 8080 + - name: store-gateway-grpc + port: 9095 + targetPort: 9095 + - name: store-gateway-gossip-ring + port: 7946 + targetPort: 7946 + selector: + gossip_ring_member: "true" + name: store-gateway-zone-c + rollout-group: store-gateway +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: consul + namespace: default +spec: + minReadySeconds: 10 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: consul + template: + metadata: + annotations: + consul-hash: e56ef6821a3557604caccaf6d5820239 + labels: + name: consul + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: consul + topologyKey: kubernetes.io/hostname + - labelSelector: + matchLabels: + name: ingester + namespaces: + - default + topologyKey: kubernetes.io/hostname + containers: + - args: + - agent + - -ui + - -server + - -client=0.0.0.0 + - -config-file=/etc/config/consul-config.json + - -bootstrap-expect=1 + - -ui-content-path=/default/consul/ + env: + - name: CHECKPOINT_DISABLE + value: "1" + image: consul:1.5.3 + imagePullPolicy: IfNotPresent + name: consul + ports: + - containerPort: 8300 + name: server + - containerPort: 8301 + name: serf + - containerPort: 8400 + name: client + - containerPort: 8500 + name: api + resources: + requests: + cpu: "4" + memory: 4Gi + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + - args: + - --namespace=$(POD_NAMESPACE) + - --pod-name=$(POD_NAME) + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + image: weaveworks/consul-sidekick:master-f18ad13 + imagePullPolicy: IfNotPresent + name: sidekick + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + - args: + - --web.listen-address=:8000 + - --statsd.mapping-config=/etc/config/mapping + image: prom/statsd-exporter:v0.12.2 + imagePullPolicy: IfNotPresent + name: statsd-exporter + ports: + - containerPort: 8000 + name: http-metrics + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + - args: + - --consul.server=localhost:8500 + - --web.listen-address=:9107 + - --consul.timeout=1s + - --no-consul.health-summary + - --consul.allow_stale + image: prom/consul-exporter:v0.5.0 + imagePullPolicy: IfNotPresent + name: consul-exporter + ports: + - containerPort: 9107 + name: http-metrics + volumeMounts: + - mountPath: /etc/config + name: consul + - mountPath: /consul/data/ + name: data + serviceAccount: consul-sidekick + volumes: + - configMap: + name: consul + name: consul + - emptyDir: + medium: Memory + name: data +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: distributor + namespace: default +spec: + minReadySeconds: 10 + replicas: 3 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: distributor + strategy: + rollingUpdate: + maxSurge: 5 + maxUnavailable: 1 + template: + metadata: + labels: + gossip_ring_member: "true" + name: distributor + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: distributor + topologyKey: kubernetes.io/hostname + containers: + - args: + - -distributor.extend-writes=true + - -distributor.ha-tracker.enable=true + - -distributor.ha-tracker.enable-for-all-users=true + - -distributor.ha-tracker.etcd.endpoints=etcd-client.default.svc.cluster.local.:2379 + - -distributor.ha-tracker.prefix=prom_ha/ + - -distributor.ha-tracker.store=etcd + - -distributor.health-check-ingesters=true + - -distributor.ingestion-burst-size=200000 + - -distributor.ingestion-rate-limit=10000 + - -distributor.ring.prefix= + - -distributor.ring.store=memberlist + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=memberlist + - -ingester.ring.zone-awareness-enabled=true + - -mem-ballast-size-bytes=1073741824 + - -memberlist.abort-if-join-fails=false + - -memberlist.bind-port=7946 + - -memberlist.join=gossip-ring.default.svc.cluster.local:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.max-connection-age=2m + - -server.grpc.keepalive.max-connection-age-grace=5m + - -server.grpc.keepalive.max-connection-idle=1m + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=distributor + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: distributor + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 4Gi + requests: + cpu: "2" + memory: 2Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: querier + namespace: default +spec: + minReadySeconds: 10 + replicas: 6 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: querier + strategy: + rollingUpdate: + maxSurge: 5 + maxUnavailable: 1 + template: + metadata: + labels: + gossip_ring_member: "true" + name: querier + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: querier + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -distributor.health-check-ingesters=true + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=memberlist + - -ingester.ring.zone-awareness-enabled=true + - -mem-ballast-size-bytes=268435456 + - -memberlist.abort-if-join-fails=false + - -memberlist.bind-port=7946 + - -memberlist.join=gossip-ring.default.svc.cluster.local:7946 + - -querier.frontend-client.grpc-max-send-msg-size=104857600 + - -querier.max-concurrent=8 + - -querier.query-ingesters-within=13h + - -querier.query-store-after=12h + - -querier.scheduler-address=query-scheduler-discovery.default.svc.cluster.local:9095 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -server.http-write-timeout=1m + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=memberlist + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -store.max-query-length=768h + - -target=querier + env: + - name: JAEGER_REPORTER_MAX_QUEUE_SIZE + value: "1024" + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: querier + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 24Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: query-frontend + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: query-frontend + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + template: + metadata: + labels: + name: query-frontend + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: query-frontend + topologyKey: kubernetes.io/hostname + containers: + - args: + - -query-frontend.align-querier-with-step=false + - -query-frontend.cache-results=true + - -query-frontend.max-cache-freshness=10m + - -query-frontend.results-cache.backend=memcached + - -query-frontend.results-cache.memcached.addresses=dnssrvnoa+memcached-frontend.default.svc.cluster.local:11211 + - -query-frontend.results-cache.memcached.timeout=500ms + - -query-frontend.scheduler-address=query-scheduler-discovery.default.svc.cluster.local:9095 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-recv-msg-size-bytes=104857600 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -server.http-write-timeout=1m + - -store.max-query-length=12000h + - -target=query-frontend + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: query-frontend + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 1200Mi + requests: + cpu: "2" + memory: 600Mi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: query-scheduler + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: query-scheduler + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + name: query-scheduler + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: query-scheduler + topologyKey: kubernetes.io/hostname + containers: + - args: + - -query-scheduler.max-outstanding-requests-per-tenant=100 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=query-scheduler + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: query-scheduler + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 1Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rollout-operator + namespace: default +spec: + minReadySeconds: 10 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: rollout-operator + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + name: rollout-operator + spec: + containers: + - args: + - -kubernetes.namespace=default + image: grafana/rollout-operator:v0.1.1 + imagePullPolicy: IfNotPresent + name: rollout-operator + ports: + - containerPort: 8001 + name: http-metrics + readinessProbe: + httpGet: + path: /ready + port: 8001 + initialDelaySeconds: 5 + timeoutSeconds: 1 + resources: + limits: + cpu: "1" + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + serviceAccountName: rollout-operator +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ruler + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: ruler + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + gossip_ring_member: "true" + name: ruler + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: ruler + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -distributor.extend-writes=true + - -distributor.health-check-ingesters=true + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=memberlist + - -ingester.ring.zone-awareness-enabled=true + - -memberlist.abort-if-join-fails=false + - -memberlist.bind-port=7946 + - -memberlist.join=gossip-ring.default.svc.cluster.local:7946 + - -querier.query-ingesters-within=13h + - -querier.query-store-after=12h + - -ruler-storage.backend=gcs + - -ruler-storage.gcs.bucket-name=rules-bucket + - -ruler.alertmanager-url=http://alertmanager.default.svc.cluster.local/alertmanager + - -ruler.max-rule-groups-per-tenant=35 + - -ruler.max-rules-per-rule-group=20 + - -ruler.ring.store=memberlist + - -ruler.rule-path=/rules + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=memberlist + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -store.max-query-length=768h + - -target=ruler + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ruler + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + cpu: "16" + memory: 16Gi + requests: + cpu: "1" + memory: 6Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + terminationGracePeriodSeconds: 600 + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: alertmanager + name: alertmanager + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: alertmanager + serviceName: alertmanager + template: + metadata: + labels: + name: alertmanager + spec: + containers: + - args: + - -alertmanager-storage.backend=gcs + - -alertmanager-storage.gcs.bucket-name=alerts-bucket + - -alertmanager.sharding-ring.consul.hostname=consul.default.svc.cluster.local:8500 + - -alertmanager.sharding-ring.replication-factor=3 + - -alertmanager.sharding-ring.store=consul + - -alertmanager.storage.path=/data + - -alertmanager.web.external-url=http://test/alertmanager + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=alertmanager + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: alertmanager + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + requests: + cpu: 100m + memory: 1Gi + volumeMounts: + - mountPath: /data + name: alertmanager-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 900 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: alertmanager-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: compactor + name: compactor + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: compactor + serviceName: compactor + template: + metadata: + labels: + gossip_ring_member: "true" + name: compactor + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -compactor.block-ranges=2h,12h,24h + - -compactor.blocks-retention-period=0 + - -compactor.cleanup-interval=15m + - -compactor.compaction-concurrency=1 + - -compactor.compaction-interval=30m + - -compactor.compactor-tenant-shard-size=1 + - -compactor.data-dir=/data + - -compactor.deletion-delay=2h + - -compactor.max-closing-blocks-concurrency=2 + - -compactor.max-opening-blocks-concurrency=4 + - -compactor.ring.prefix= + - -compactor.ring.store=memberlist + - -compactor.ring.wait-stability-min-duration=1m + - -compactor.split-and-merge-shards=0 + - -compactor.split-groups=1 + - -compactor.symbols-flushers-concurrency=4 + - -memberlist.abort-if-join-fails=false + - -memberlist.bind-port=7946 + - -memberlist.join=gossip-ring.default.svc.cluster.local:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=compactor + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: compactor + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 6Gi + requests: + cpu: 1 + memory: 6Gi + volumeMounts: + - mountPath: /data + name: compactor-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 900 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: compactor-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 250Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: ingester + name: ingester-zone-a + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: ingester-zone-a + rollout-group: ingester + serviceName: ingester-zone-a + template: + metadata: + labels: + gossip_ring_member: "true" + name: ingester-zone-a + rollout-group: ingester + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: rollout-group + operator: In + values: + - ingester + - key: name + operator: NotIn + values: + - ingester-zone-a + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -blocks-storage.tsdb.block-ranges-period=2h + - -blocks-storage.tsdb.close-idle-tsdb-timeout=13h + - -blocks-storage.tsdb.dir=/data/tsdb + - -blocks-storage.tsdb.isolation-enabled=false + - -blocks-storage.tsdb.ship-interval=1m + - -distributor.health-check-ingesters=true + - -ingester.max-global-series-per-metric=20000 + - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false + - -ingester.ring.heartbeat-period=15s + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.instance-availability-zone=zone-a + - -ingester.ring.num-tokens=512 + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=memberlist + - -ingester.ring.tokens-file-path=/data/tokens + - -ingester.ring.unregister-on-shutdown=true + - -ingester.ring.zone-awareness-enabled=true + - -memberlist.abort-if-join-fails=false + - -memberlist.bind-port=7946 + - -memberlist.join=gossip-ring.default.svc.cluster.local:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-concurrent-streams=10000 + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=ingester + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ingester + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 25Gi + requests: + cpu: "4" + memory: 15Gi + volumeMounts: + - mountPath: /data + name: ingester-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 1200 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ingester-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: fast +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: ingester + name: ingester-zone-b + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: ingester-zone-b + rollout-group: ingester + serviceName: ingester-zone-b + template: + metadata: + labels: + gossip_ring_member: "true" + name: ingester-zone-b + rollout-group: ingester + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: rollout-group + operator: In + values: + - ingester + - key: name + operator: NotIn + values: + - ingester-zone-b + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -blocks-storage.tsdb.block-ranges-period=2h + - -blocks-storage.tsdb.close-idle-tsdb-timeout=13h + - -blocks-storage.tsdb.dir=/data/tsdb + - -blocks-storage.tsdb.isolation-enabled=false + - -blocks-storage.tsdb.ship-interval=1m + - -distributor.health-check-ingesters=true + - -ingester.max-global-series-per-metric=20000 + - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false + - -ingester.ring.heartbeat-period=15s + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.instance-availability-zone=zone-b + - -ingester.ring.num-tokens=512 + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=memberlist + - -ingester.ring.tokens-file-path=/data/tokens + - -ingester.ring.unregister-on-shutdown=true + - -ingester.ring.zone-awareness-enabled=true + - -memberlist.abort-if-join-fails=false + - -memberlist.bind-port=7946 + - -memberlist.join=gossip-ring.default.svc.cluster.local:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-concurrent-streams=10000 + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=ingester + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ingester + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 25Gi + requests: + cpu: "4" + memory: 15Gi + volumeMounts: + - mountPath: /data + name: ingester-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 1200 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ingester-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: fast +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: ingester + name: ingester-zone-c + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: ingester-zone-c + rollout-group: ingester + serviceName: ingester-zone-c + template: + metadata: + labels: + gossip_ring_member: "true" + name: ingester-zone-c + rollout-group: ingester + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: rollout-group + operator: In + values: + - ingester + - key: name + operator: NotIn + values: + - ingester-zone-c + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -blocks-storage.tsdb.block-ranges-period=2h + - -blocks-storage.tsdb.close-idle-tsdb-timeout=13h + - -blocks-storage.tsdb.dir=/data/tsdb + - -blocks-storage.tsdb.isolation-enabled=false + - -blocks-storage.tsdb.ship-interval=1m + - -distributor.health-check-ingesters=true + - -ingester.max-global-series-per-metric=20000 + - -ingester.max-global-series-per-user=150000 + - -ingester.readiness-check-ring-health=false + - -ingester.ring.heartbeat-period=15s + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.instance-availability-zone=zone-c + - -ingester.ring.num-tokens=512 + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=memberlist + - -ingester.ring.tokens-file-path=/data/tokens + - -ingester.ring.unregister-on-shutdown=true + - -ingester.ring.zone-awareness-enabled=true + - -memberlist.abort-if-join-fails=false + - -memberlist.bind-port=7946 + - -memberlist.join=gossip-ring.default.svc.cluster.local:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-concurrent-streams=10000 + - -server.grpc-max-recv-msg-size-bytes=10485760 + - -server.grpc-max-send-msg-size-bytes=10485760 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=ingester + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: ingester + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 25Gi + requests: + cpu: "4" + memory: 15Gi + volumeMounts: + - mountPath: /data + name: ingester-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 1200 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ingester-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: fast +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached + serviceName: memcached + template: + metadata: + labels: + name: memcached + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 6144 + - -I 1m + - -c 16384 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 9Gi + requests: + cpu: 500m + memory: 6552Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-frontend + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached-frontend + serviceName: memcached-frontend + template: + metadata: + labels: + name: memcached-frontend + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-frontend + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 1024 + - -I 5m + - -c 1024 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 1536Mi + requests: + cpu: 500m + memory: 1329Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-index-queries + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached-index-queries + serviceName: memcached-index-queries + template: + metadata: + labels: + name: memcached-index-queries + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-index-queries + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 1024 + - -I 5m + - -c 16384 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 1536Mi + requests: + cpu: 500m + memory: 1329Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-metadata + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + name: memcached-metadata + serviceName: memcached-metadata + template: + metadata: + labels: + name: memcached-metadata + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-metadata + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 512 + - -I 1m + - -c 16384 + - -v + image: memcached:1.6.9-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 768Mi + requests: + cpu: 500m + memory: 715Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.6.0 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: store-gateway + name: store-gateway-zone-a + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: store-gateway-zone-a + rollout-group: store-gateway + serviceName: store-gateway-zone-a + template: + metadata: + labels: + gossip_ring_member: "true" + name: store-gateway-zone-a + rollout-group: store-gateway + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.chunks-cache.attributes-in-memory-max-items=50000 + - -blocks-storage.bucket-store.chunks-cache.backend=memcached + - -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dnssrvnoa+memcached.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.ignore-blocks-within=10h + - -blocks-storage.bucket-store.index-cache.backend=memcached + - -blocks-storage.bucket-store.index-cache.memcached.addresses=dnssrvnoa+memcached-index-queries.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-item-size=5242880 + - -blocks-storage.bucket-store.index-header-lazy-loading-enabled=true + - -blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout=60m + - -blocks-storage.bucket-store.max-chunk-pool-bytes=12884901888 + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -memberlist.abort-if-join-fails=false + - -memberlist.bind-port=7946 + - -memberlist.join=gossip-ring.default.svc.cluster.local:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.instance-availability-zone=zone-a + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=memberlist + - -store-gateway.sharding-ring.tokens-file-path=/data/tokens + - -store-gateway.sharding-ring.unregister-on-shutdown=false + - -store-gateway.sharding-ring.wait-stability-min-duration=1m + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -target=store-gateway + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: store-gateway + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 18Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /data + name: store-gateway-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 120 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: store-gateway-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: store-gateway + name: store-gateway-zone-b + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: store-gateway-zone-b + rollout-group: store-gateway + serviceName: store-gateway-zone-b + template: + metadata: + labels: + gossip_ring_member: "true" + name: store-gateway-zone-b + rollout-group: store-gateway + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.chunks-cache.attributes-in-memory-max-items=50000 + - -blocks-storage.bucket-store.chunks-cache.backend=memcached + - -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dnssrvnoa+memcached.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.ignore-blocks-within=10h + - -blocks-storage.bucket-store.index-cache.backend=memcached + - -blocks-storage.bucket-store.index-cache.memcached.addresses=dnssrvnoa+memcached-index-queries.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-item-size=5242880 + - -blocks-storage.bucket-store.index-header-lazy-loading-enabled=true + - -blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout=60m + - -blocks-storage.bucket-store.max-chunk-pool-bytes=12884901888 + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -memberlist.abort-if-join-fails=false + - -memberlist.bind-port=7946 + - -memberlist.join=gossip-ring.default.svc.cluster.local:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.instance-availability-zone=zone-b + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=memberlist + - -store-gateway.sharding-ring.tokens-file-path=/data/tokens + - -store-gateway.sharding-ring.unregister-on-shutdown=false + - -store-gateway.sharding-ring.wait-stability-min-duration=1m + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -target=store-gateway + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: store-gateway + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 18Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /data + name: store-gateway-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 120 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: store-gateway-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + rollout-max-unavailable: "10" + labels: + rollout-group: store-gateway + name: store-gateway-zone-c + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: store-gateway-zone-c + rollout-group: store-gateway + serviceName: store-gateway-zone-c + template: + metadata: + labels: + gossip_ring_member: "true" + name: store-gateway-zone-c + rollout-group: store-gateway + spec: + containers: + - args: + - -blocks-storage.backend=gcs + - -blocks-storage.bucket-store.chunks-cache.attributes-in-memory-max-items=50000 + - -blocks-storage.bucket-store.chunks-cache.backend=memcached + - -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dnssrvnoa+memcached.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.ignore-blocks-within=10h + - -blocks-storage.bucket-store.index-cache.backend=memcached + - -blocks-storage.bucket-store.index-cache.memcached.addresses=dnssrvnoa+memcached-index-queries.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-item-size=5242880 + - -blocks-storage.bucket-store.index-header-lazy-loading-enabled=true + - -blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout=60m + - -blocks-storage.bucket-store.max-chunk-pool-bytes=12884901888 + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -memberlist.abort-if-join-fails=false + - -memberlist.bind-port=7946 + - -memberlist.join=gossip-ring.default.svc.cluster.local:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.instance-availability-zone=zone-c + - -store-gateway.sharding-ring.prefix=multi-zone/ + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=memberlist + - -store-gateway.sharding-ring.tokens-file-path=/data/tokens + - -store-gateway.sharding-ring.unregister-on-shutdown=false + - -store-gateway.sharding-ring.wait-stability-min-duration=1m + - -store-gateway.sharding-ring.zone-awareness-enabled=true + - -target=store-gateway + image: grafana/mimir:mimir-2.0.0 + imagePullPolicy: IfNotPresent + name: store-gateway + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 18Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /data + name: store-gateway-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 120 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: OnDelete + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: store-gateway-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: standard +--- +apiVersion: etcd.database.coreos.com/v1beta2 +kind: EtcdCluster +metadata: + annotations: + etcd.database.coreos.com/scope: clusterwide + name: etcd + namespace: default +spec: + pod: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + etcd_cluster: etcd + topologyKey: kubernetes.io/hostname + annotations: + prometheus.io/port: "2379" + prometheus.io/scrape: "true" + etcdEnv: + - name: ETCD_AUTO_COMPACTION_RETENTION + value: 1h + labels: + name: etcd + resources: + limits: + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + size: 3 + version: 3.3.13 diff --git a/operations/mimir-tests/test-gossip-multi-zone.jsonnet b/operations/mimir-tests/test-gossip-multi-zone.jsonnet new file mode 100644 index 00000000000..8f87bde6613 --- /dev/null +++ b/operations/mimir-tests/test-gossip-multi-zone.jsonnet @@ -0,0 +1,28 @@ +local gossip = import 'mimir/gossip.libsonnet'; +local mimir = import 'mimir/mimir.libsonnet'; + +mimir + gossip { + _config+:: { + namespace: 'default', + external_url: 'http://test', + + blocks_storage_backend: 'gcs', + blocks_storage_bucket_name: 'blocks-bucket', + bucket_index_enabled: true, + query_scheduler_enabled: true, + + ruler_enabled: true, + ruler_client_type: 'gcs', + ruler_storage_bucket_name: 'rules-bucket', + + alertmanager_enabled: true, + alertmanager_client_type: 'gcs', + alertmanager_gcs_bucket_name: 'alerts-bucket', + + cortex_multi_zone_ingester_enabled: true, + cortex_multi_zone_ingester_replicas: 3, + + cortex_multi_zone_store_gateway_enabled: true, + cortex_multi_zone_store_gateway_replicas: 3, + }, +} diff --git a/operations/mimir/gossip.libsonnet b/operations/mimir/gossip.libsonnet index 4921992db75..5d1ed5b237b 100644 --- a/operations/mimir/gossip.libsonnet +++ b/operations/mimir/gossip.libsonnet @@ -66,8 +66,17 @@ distributor_deployment+: gossipLabel, - ingester_statefulset+: - gossipLabel, + ingester_statefulset: if $._config.cortex_multi_zone_ingester_enabled && !$._config.cortex_multi_zone_ingester_migration_enabled then null else + super.ingester_statefulset + gossipLabel, + + ingester_zone_a_statefulset: if !$._config.cortex_multi_zone_ingester_enabled then null else + super.ingester_zone_a_statefulset + gossipLabel, + + ingester_zone_b_statefulset: if !$._config.cortex_multi_zone_ingester_enabled then null else + super.ingester_zone_b_statefulset + gossipLabel, + + ingester_zone_c_statefulset: if !$._config.cortex_multi_zone_ingester_enabled then null else + super.ingester_zone_c_statefulset + gossipLabel, querier_deployment+: gossipLabel, @@ -75,8 +84,17 @@ ruler_deployment+: if $._config.ruler_enabled then gossipLabel else {}, - store_gateway_statefulset+: - gossipLabel, + store_gateway_statefulset: if $._config.cortex_multi_zone_store_gateway_enabled && !$._config.cortex_multi_zone_store_gateway_migration_enabled then null else + super.store_gateway_statefulset + gossipLabel, + + store_gateway_zone_a_statefulset: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + super.store_gateway_zone_a_statefulset + gossipLabel, + + store_gateway_zone_b_statefulset: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + super.store_gateway_zone_b_statefulset + gossipLabel, + + store_gateway_zone_c_statefulset: if !$._config.cortex_multi_zone_store_gateway_enabled then null else + super.store_gateway_zone_c_statefulset + gossipLabel, // Headless service (= no assigned IP, DNS returns all targets instead) pointing to gossip network members. gossip_ring_service: From 909317522d9cb360e20dc92f4172bd395516cf73 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Wed, 2 Mar 2022 14:39:38 +0100 Subject: [PATCH 6/6] Remove gossip_ring_member selector from store-gateway multi-zone service Signed-off-by: Marco Pracucci --- operations/mimir-tests/test-gossip-multi-zone-generated.yaml | 3 --- operations/mimir/multi-zone.libsonnet | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/operations/mimir-tests/test-gossip-multi-zone-generated.yaml b/operations/mimir-tests/test-gossip-multi-zone-generated.yaml index 83b7fd76427..4b38360e274 100644 --- a/operations/mimir-tests/test-gossip-multi-zone-generated.yaml +++ b/operations/mimir-tests/test-gossip-multi-zone-generated.yaml @@ -721,7 +721,6 @@ spec: port: 7946 targetPort: 7946 selector: - gossip_ring_member: "true" name: store-gateway-zone-a rollout-group: store-gateway --- @@ -745,7 +744,6 @@ spec: port: 7946 targetPort: 7946 selector: - gossip_ring_member: "true" name: store-gateway-zone-b rollout-group: store-gateway --- @@ -769,7 +767,6 @@ spec: port: 7946 targetPort: 7946 selector: - gossip_ring_member: "true" name: store-gateway-zone-c rollout-group: store-gateway --- diff --git a/operations/mimir/multi-zone.libsonnet b/operations/mimir/multi-zone.libsonnet index 50588b18467..ada14851e00 100644 --- a/operations/mimir/multi-zone.libsonnet +++ b/operations/mimir/multi-zone.libsonnet @@ -210,7 +210,7 @@ // the StatefulSet pods. For more information, see: // https://v1-18.docs.kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#statefulset-v1-apps newStoreGatewayZoneService(sts):: - $.util.serviceFor(sts) + + $.util.serviceFor(sts, $._config.service_ignored_labels) + service.mixin.spec.withClusterIp('None'), // Headless. local nonRetainablePVCs = {