From a7ce333e9316c90ad5f5476733dc4accc6281352 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Wed, 24 Jan 2024 15:23:12 +0100 Subject: [PATCH] Jsonnet: allow to configure ruler-querier max skew separately from querier (#7204) * Jsonnet: allow to configure ruler-querier max skew separately from querier Signed-off-by: Marco Pracucci * Add jsonnet test Signed-off-by: Marco Pracucci --------- Signed-off-by: Marco Pracucci --- CHANGELOG.md | 1 + ...onents-with-custom-max-skew-generated.yaml | 1582 +++++++++++++++++ ...ll-components-with-custom-max-skew.jsonnet | 11 + operations/mimir/config.libsonnet | 1 + operations/mimir/ruler.libsonnet | 2 +- 5 files changed, 1596 insertions(+), 1 deletion(-) create mode 100644 operations/mimir-tests/test-all-components-with-custom-max-skew-generated.yaml create mode 100644 operations/mimir-tests/test-all-components-with-custom-max-skew.jsonnet diff --git a/CHANGELOG.md b/CHANGELOG.md index 15e0bfcbdfa..fb239499db6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,7 @@ * `-store-gateway.sharding-ring.heartbeat-timeout` set to `4m` * `-compactor.ring.heartbeat-period` set to `1m` * `-compactor.ring.heartbeat-timeout` set to `4m` +* [CHANGE] Ruler-querier: the topology spread constrain max skew is now configured through the configuration option `ruler_querier_topology_spread_max_skew` instead of `querier_topology_spread_max_skew`. #7204 * [FEATURE] Added support for the following root-level settings to configure the list of matchers to apply to node affinity: #6782 #6829 * `alertmanager_node_affinity_matchers` * `compactor_node_affinity_matchers` diff --git a/operations/mimir-tests/test-all-components-with-custom-max-skew-generated.yaml b/operations/mimir-tests/test-all-components-with-custom-max-skew-generated.yaml new file mode 100644 index 00000000000..2032c43f3e6 --- /dev/null +++ b/operations/mimir-tests/test-all-components-with-custom-max-skew-generated.yaml @@ -0,0 +1,1582 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: default +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: alertmanager + name: alertmanager + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: alertmanager +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: compactor + name: compactor + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: compactor +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: distributor + name: distributor + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: distributor +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: ingester + name: ingester + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: ingester +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: memcached + name: memcached + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: memcached +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: memcached-frontend + name: memcached-frontend + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: memcached-frontend +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: memcached-index-queries + name: memcached-index-queries + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: memcached-index-queries +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: memcached-metadata + name: memcached-metadata + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: memcached-metadata +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: querier + name: querier + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: querier +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: query-frontend + name: query-frontend + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: query-frontend +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: query-scheduler + name: query-scheduler + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: query-scheduler +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: ruler + name: ruler + namespace: default +spec: + maxUnavailable: 1 + selector: + matchLabels: + name: ruler +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + labels: + name: store-gateway + name: store-gateway + namespace: default +spec: + maxUnavailable: 2 + selector: + matchLabels: + name: store-gateway +--- +apiVersion: v1 +data: + overrides.yaml: | + overrides: {} +kind: ConfigMap +metadata: + name: overrides + namespace: default +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: alertmanager + name: alertmanager + namespace: default +spec: + clusterIP: None + ports: + - name: alertmanager-http-metrics + port: 8080 + targetPort: 8080 + - name: alertmanager-grpc + port: 9095 + targetPort: 9095 + - name: alertmanager-gossip-ring + port: 7946 + targetPort: 7946 + selector: + name: alertmanager +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: compactor + name: compactor + namespace: default +spec: + clusterIP: None + ports: + - name: compactor-http-metrics + port: 8080 + targetPort: 8080 + - name: compactor-grpc + port: 9095 + targetPort: 9095 + - name: compactor-gossip-ring + port: 7946 + targetPort: 7946 + selector: + name: compactor +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: distributor + name: distributor + namespace: default +spec: + clusterIP: None + ports: + - name: distributor-http-metrics + port: 8080 + targetPort: 8080 + - name: distributor-grpc + port: 9095 + targetPort: 9095 + - name: distributor-gossip-ring + port: 7946 + targetPort: 7946 + selector: + name: distributor +--- +apiVersion: v1 +kind: Service +metadata: + name: gossip-ring + namespace: default +spec: + clusterIP: None + ports: + - appProtocol: tcp + name: gossip-ring + port: 7946 + protocol: TCP + targetPort: 7946 + selector: + gossip_ring_member: "true" +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ingester + name: ingester + namespace: default +spec: + ports: + - name: ingester-http-metrics + port: 8080 + targetPort: 8080 + - name: ingester-grpc + port: 9095 + targetPort: 9095 + - name: ingester-gossip-ring + port: 7946 + targetPort: 7946 + selector: + name: ingester +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached + name: memcached + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-frontend + name: memcached-frontend + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-frontend +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-index-queries + name: memcached-index-queries + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-index-queries +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: memcached-metadata + name: memcached-metadata + namespace: default +spec: + clusterIP: None + ports: + - name: memcached-client + port: 11211 + targetPort: 11211 + - name: exporter-http-metrics + port: 9150 + targetPort: 9150 + selector: + name: memcached-metadata +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: querier + name: querier + namespace: default +spec: + ports: + - name: querier-http-metrics + port: 8080 + targetPort: 8080 + - name: querier-grpc + port: 9095 + targetPort: 9095 + - name: querier-gossip-ring + port: 7946 + targetPort: 7946 + selector: + name: querier +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-frontend + name: query-frontend + namespace: default +spec: + ports: + - name: query-frontend-http-metrics + port: 8080 + targetPort: 8080 + - name: query-frontend-grpc + port: 9095 + targetPort: 9095 + selector: + name: query-frontend +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-scheduler + name: query-scheduler + namespace: default +spec: + ports: + - name: query-scheduler-http-metrics + port: 8080 + targetPort: 8080 + - name: query-scheduler-grpc + port: 9095 + targetPort: 9095 + selector: + name: query-scheduler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: query-scheduler + name: query-scheduler-discovery + namespace: default +spec: + clusterIP: None + ports: + - name: query-scheduler-http-metrics + port: 8080 + targetPort: 8080 + - name: query-scheduler-grpc + port: 9095 + targetPort: 9095 + publishNotReadyAddresses: true + selector: + name: query-scheduler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: ruler + name: ruler + namespace: default +spec: + ports: + - name: ruler-http-metrics + port: 8080 + targetPort: 8080 + - name: ruler-grpc + port: 9095 + targetPort: 9095 + selector: + name: ruler +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: store-gateway + name: store-gateway + namespace: default +spec: + ports: + - name: store-gateway-http-metrics + port: 8080 + targetPort: 8080 + - name: store-gateway-grpc + port: 9095 + targetPort: 9095 + - name: store-gateway-gossip-ring + port: 7946 + targetPort: 7946 + selector: + name: store-gateway +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: distributor + namespace: default +spec: + minReadySeconds: 10 + replicas: 3 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: distributor + strategy: + rollingUpdate: + maxSurge: 15% + maxUnavailable: 0 + template: + metadata: + labels: + gossip_ring_member: "true" + name: distributor + spec: + containers: + - args: + - -distributor.ha-tracker.enable=true + - -distributor.ha-tracker.enable-for-all-users=true + - -distributor.ha-tracker.etcd.endpoints=etcd-client.default.svc.cluster.local.:2379 + - -distributor.ha-tracker.prefix=prom_ha/ + - -distributor.ha-tracker.store=etcd + - -distributor.health-check-ingesters=true + - -distributor.ingestion-burst-size=200000 + - -distributor.ingestion-rate-limit=10000 + - -distributor.ring.heartbeat-period=1m + - -distributor.ring.heartbeat-timeout=4m + - -distributor.ring.prefix= + - -distributor.ring.store=memberlist + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=memberlist + - -mem-ballast-size-bytes=1073741824 + - -memberlist.bind-port=7946 + - -memberlist.join=dns+gossip-ring.default.svc.cluster.local.:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.max-connection-age=2m + - -server.grpc.keepalive.max-connection-age-grace=5m + - -server.grpc.keepalive.max-connection-idle=1m + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=distributor + - -usage-stats.installation-mode=jsonnet + env: + - name: GOMAXPROCS + value: "8" + image: grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: distributor + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 4Gi + requests: + cpu: "2" + memory: 2Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + topologySpreadConstraints: + - labelSelector: + matchLabels: + name: distributor + maxSkew: 2 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: querier + namespace: default +spec: + minReadySeconds: 10 + replicas: 6 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: querier + strategy: + rollingUpdate: + maxSurge: 15% + maxUnavailable: 0 + template: + metadata: + labels: + gossip_ring_member: "true" + name: querier + spec: + containers: + - args: + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local.:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -common.storage.backend=gcs + - -distributor.health-check-ingesters=true + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=memberlist + - -mem-ballast-size-bytes=268435456 + - -memberlist.bind-port=7946 + - -memberlist.join=dns+gossip-ring.default.svc.cluster.local.:7946 + - -querier.frontend-client.grpc-max-send-msg-size=104857600 + - -querier.max-concurrent=8 + - -querier.max-partial-query-length=768h + - -querier.scheduler-address=query-scheduler-discovery.default.svc.cluster.local.:9095 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.heartbeat-timeout=4m + - -store-gateway.sharding-ring.prefix= + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=memberlist + - -target=querier + - -usage-stats.installation-mode=jsonnet + env: + - name: GOMAXPROCS + value: "5" + - name: JAEGER_REPORTER_MAX_QUEUE_SIZE + value: "5000" + image: grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: querier + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 24Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + topologySpreadConstraints: + - labelSelector: + matchLabels: + name: querier + maxSkew: 2 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: query-frontend + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: query-frontend + strategy: + rollingUpdate: + maxSurge: 15% + maxUnavailable: 0 + template: + metadata: + labels: + name: query-frontend + spec: + containers: + - args: + - -query-frontend.cache-results=true + - -query-frontend.max-cache-freshness=10m + - -query-frontend.max-total-query-length=12000h + - -query-frontend.query-sharding-target-series-per-shard=2500 + - -query-frontend.results-cache.backend=memcached + - -query-frontend.results-cache.memcached.addresses=dnssrvnoa+memcached-frontend.default.svc.cluster.local.:11211 + - -query-frontend.results-cache.memcached.max-item-size=5242880 + - -query-frontend.results-cache.memcached.timeout=500ms + - -query-frontend.scheduler-address=query-scheduler-discovery.default.svc.cluster.local.:9095 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.max-connection-age=30s + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -shutdown-delay=90s + - -target=query-frontend + - -usage-stats.installation-mode=jsonnet + env: + - name: JAEGER_REPORTER_MAX_QUEUE_SIZE + value: "5000" + image: grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: query-frontend + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 1200Mi + requests: + cpu: "2" + memory: 600Mi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + terminationGracePeriodSeconds: 390 + topologySpreadConstraints: + - labelSelector: + matchLabels: + name: query-frontend + maxSkew: 2 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: query-scheduler + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: query-scheduler + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + name: query-scheduler + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: query-scheduler + topologyKey: kubernetes.io/hostname + containers: + - args: + - -query-scheduler.max-outstanding-requests-per-tenant=100 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=query-scheduler + - -usage-stats.installation-mode=jsonnet + image: grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: query-scheduler + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 2Gi + requests: + cpu: "2" + memory: 1Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ruler + namespace: default +spec: + minReadySeconds: 10 + replicas: 2 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: ruler + strategy: + rollingUpdate: + maxSurge: 50% + maxUnavailable: 0 + template: + metadata: + labels: + gossip_ring_member: "true" + name: ruler + spec: + containers: + - args: + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local.:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -common.storage.backend=gcs + - -distributor.health-check-ingesters=true + - -distributor.remote-timeout=10s + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=memberlist + - -memberlist.bind-port=7946 + - -memberlist.join=dns+gossip-ring.default.svc.cluster.local.:7946 + - -querier.max-partial-query-length=768h + - -ruler-storage.cache.backend=memcached + - -ruler-storage.cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local.:11211 + - -ruler-storage.cache.memcached.max-async-concurrency=50 + - -ruler-storage.cache.memcached.max-item-size=1048576 + - -ruler-storage.gcs.bucket-name=rules-bucket + - -ruler.alertmanager-url=http://alertmanager.default.svc.cluster.local./alertmanager + - -ruler.max-rule-groups-per-tenant=70 + - -ruler.max-rules-per-rule-group=20 + - -ruler.ring.store=memberlist + - -ruler.rule-path=/rules + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.heartbeat-timeout=4m + - -store-gateway.sharding-ring.prefix= + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=memberlist + - -target=ruler + - -usage-stats.installation-mode=jsonnet + env: + - name: JAEGER_REPORTER_MAX_QUEUE_SIZE + value: "1000" + image: grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: ruler + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + cpu: "16" + memory: 16Gi + requests: + cpu: "1" + memory: 6Gi + volumeMounts: + - mountPath: /etc/mimir + name: overrides + terminationGracePeriodSeconds: 600 + topologySpreadConstraints: + - labelSelector: + matchLabels: + name: ruler + maxSkew: 2 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - configMap: + name: overrides + name: overrides +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: alertmanager + name: alertmanager + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: alertmanager + serviceName: alertmanager + template: + metadata: + labels: + gossip_ring_member: "true" + name: alertmanager + spec: + containers: + - args: + - -alertmanager-storage.gcs.bucket-name=alerts-bucket + - -alertmanager.sharding-ring.replication-factor=3 + - -alertmanager.sharding-ring.store=memberlist + - -alertmanager.storage.path=/data + - -alertmanager.web.external-url=http://test/alertmanager + - -common.storage.backend=gcs + - -memberlist.bind-port=7946 + - -memberlist.join=dns+gossip-ring.default.svc.cluster.local.:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=alertmanager + - -usage-stats.installation-mode=jsonnet + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: alertmanager + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 15Gi + requests: + cpu: "2" + memory: 10Gi + volumeMounts: + - mountPath: /data + name: alertmanager-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 900 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: alertmanager-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: compactor + name: compactor + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 1 + selector: + matchLabels: + name: compactor + serviceName: compactor + template: + metadata: + labels: + gossip_ring_member: "true" + name: compactor + spec: + containers: + - args: + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -common.storage.backend=gcs + - -compactor.block-ranges=2h,12h,24h + - -compactor.blocks-retention-period=0 + - -compactor.cleanup-interval=15m + - -compactor.compaction-concurrency=1 + - -compactor.compaction-interval=30m + - -compactor.compactor-tenant-shard-size=1 + - -compactor.data-dir=/data + - -compactor.deletion-delay=2h + - -compactor.first-level-compaction-wait-period=25m + - -compactor.max-closing-blocks-concurrency=2 + - -compactor.max-opening-blocks-concurrency=4 + - -compactor.ring.heartbeat-period=1m + - -compactor.ring.heartbeat-timeout=4m + - -compactor.ring.prefix= + - -compactor.ring.store=memberlist + - -compactor.ring.wait-stability-min-duration=1m + - -compactor.split-and-merge-shards=0 + - -compactor.split-groups=1 + - -compactor.symbols-flushers-concurrency=4 + - -memberlist.bind-port=7946 + - -memberlist.join=dns+gossip-ring.default.svc.cluster.local.:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=compactor + - -usage-stats.installation-mode=jsonnet + image: grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: compactor + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 6Gi + requests: + cpu: 1 + memory: 6Gi + volumeMounts: + - mountPath: /data + name: compactor-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 900 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: compactor-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 250Gi + storageClassName: standard +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: ingester + name: ingester + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 3 + selector: + matchLabels: + name: ingester + serviceName: ingester + template: + metadata: + labels: + gossip_ring_member: "true" + name: ingester + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: ingester + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -blocks-storage.tsdb.block-ranges-period=2h + - -blocks-storage.tsdb.dir=/data/tsdb + - -blocks-storage.tsdb.head-compaction-interval=15m + - -blocks-storage.tsdb.ship-interval=1m + - -blocks-storage.tsdb.wal-replay-concurrency=3 + - -common.storage.backend=gcs + - -distributor.health-check-ingesters=true + - -ingester.max-global-metadata-per-metric=10 + - -ingester.max-global-metadata-per-user=30000 + - -ingester.max-global-series-per-user=150000 + - -ingester.ring.heartbeat-period=2m + - -ingester.ring.heartbeat-timeout=10m + - -ingester.ring.num-tokens=512 + - -ingester.ring.prefix= + - -ingester.ring.replication-factor=3 + - -ingester.ring.store=memberlist + - -ingester.ring.tokens-file-path=/data/tokens + - -ingester.ring.unregister-on-shutdown=true + - -memberlist.bind-port=7946 + - -memberlist.join=dns+gossip-ring.default.svc.cluster.local.:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc-max-concurrent-streams=500 + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -target=ingester + - -usage-stats.installation-mode=jsonnet + env: + - name: JAEGER_REPORTER_MAX_QUEUE_SIZE + value: "1000" + image: grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: ingester + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 25Gi + requests: + cpu: "4" + memory: 15Gi + volumeMounts: + - mountPath: /data + name: ingester-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 1200 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: ingester-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + storageClassName: fast +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached + serviceName: memcached + template: + metadata: + labels: + name: memcached + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 6144 + - -I 1m + - -c 16384 + - -v + - --extended=track_sizes + image: memcached:1.6.22-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 9Gi + requests: + cpu: 500m + memory: 6552Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.14.2 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-frontend + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached-frontend + serviceName: memcached-frontend + template: + metadata: + labels: + name: memcached-frontend + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-frontend + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 1024 + - -I 5m + - -c 16384 + - -v + - --extended=track_sizes + image: memcached:1.6.22-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 1536Mi + requests: + cpu: 500m + memory: 1176Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.14.2 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-index-queries + namespace: default +spec: + replicas: 3 + selector: + matchLabels: + name: memcached-index-queries + serviceName: memcached-index-queries + template: + metadata: + labels: + name: memcached-index-queries + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-index-queries + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 1024 + - -I 5m + - -c 16384 + - -v + - --extended=track_sizes + image: memcached:1.6.22-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 1536Mi + requests: + cpu: 500m + memory: 1176Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.14.2 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: memcached-metadata + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + name: memcached-metadata + serviceName: memcached-metadata + template: + metadata: + labels: + name: memcached-metadata + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: memcached-metadata + topologyKey: kubernetes.io/hostname + containers: + - args: + - -m 512 + - -I 1m + - -c 16384 + - -v + - --extended=track_sizes + image: memcached:1.6.22-alpine + imagePullPolicy: IfNotPresent + name: memcached + ports: + - containerPort: 11211 + name: client + resources: + limits: + memory: 768Mi + requests: + cpu: 500m + memory: 638Mi + - args: + - --memcached.address=localhost:11211 + - --web.listen-address=0.0.0.0:9150 + image: prom/memcached-exporter:v0.14.2 + imagePullPolicy: IfNotPresent + name: exporter + ports: + - containerPort: 9150 + name: http-metrics + updateStrategy: + type: RollingUpdate +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: store-gateway + name: store-gateway + namespace: default +spec: + podManagementPolicy: Parallel + replicas: 3 + selector: + matchLabels: + name: store-gateway + serviceName: store-gateway + template: + metadata: + labels: + gossip_ring_member: "true" + name: store-gateway + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: store-gateway + topologyKey: kubernetes.io/hostname + containers: + - args: + - -blocks-storage.bucket-store.chunks-cache.backend=memcached + - -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dnssrvnoa+memcached.default.svc.cluster.local.:11211 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections=150 + - -blocks-storage.bucket-store.chunks-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.chunks-cache.memcached.timeout=450ms + - -blocks-storage.bucket-store.index-cache.backend=memcached + - -blocks-storage.bucket-store.index-cache.memcached.addresses=dnssrvnoa+memcached-index-queries.default.svc.cluster.local.:11211 + - -blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.index-cache.memcached.max-idle-connections=150 + - -blocks-storage.bucket-store.index-cache.memcached.max-item-size=5242880 + - -blocks-storage.bucket-store.index-cache.memcached.timeout=450ms + - -blocks-storage.bucket-store.index-header.lazy-loading-enabled=true + - -blocks-storage.bucket-store.index-header.lazy-loading-idle-timeout=60m + - -blocks-storage.bucket-store.metadata-cache.backend=memcached + - -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dnssrvnoa+memcached-metadata.default.svc.cluster.local.:11211 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency=50 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency=100 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections=150 + - -blocks-storage.bucket-store.metadata-cache.memcached.max-item-size=1048576 + - -blocks-storage.bucket-store.sync-dir=/data/tsdb + - -blocks-storage.bucket-store.sync-interval=15m + - -blocks-storage.gcs.bucket-name=blocks-bucket + - -common.storage.backend=gcs + - -memberlist.bind-port=7946 + - -memberlist.join=dns+gossip-ring.default.svc.cluster.local.:7946 + - -runtime-config.file=/etc/mimir/overrides.yaml + - -server.grpc.keepalive.min-time-between-pings=10s + - -server.grpc.keepalive.ping-without-stream-allowed=true + - -server.http-listen-port=8080 + - -store-gateway.sharding-ring.heartbeat-period=1m + - -store-gateway.sharding-ring.heartbeat-timeout=4m + - -store-gateway.sharding-ring.prefix= + - -store-gateway.sharding-ring.replication-factor=3 + - -store-gateway.sharding-ring.store=memberlist + - -store-gateway.sharding-ring.tokens-file-path=/data/tokens + - -store-gateway.sharding-ring.unregister-on-shutdown=false + - -store-gateway.sharding-ring.wait-stability-min-duration=1m + - -target=store-gateway + - -usage-stats.installation-mode=jsonnet + env: + - name: GOMAXPROCS + value: "5" + - name: GOMEMLIMIT + value: "12884901888" + - name: JAEGER_REPORTER_MAX_QUEUE_SIZE + value: "1000" + image: grafana/mimir:2.11.0 + imagePullPolicy: IfNotPresent + name: store-gateway + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 9095 + name: grpc + - containerPort: 7946 + name: gossip-ring + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + memory: 18Gi + requests: + cpu: "1" + memory: 12Gi + volumeMounts: + - mountPath: /data + name: store-gateway-data + - mountPath: /etc/mimir + name: overrides + securityContext: + runAsUser: 0 + terminationGracePeriodSeconds: 120 + volumes: + - configMap: + name: overrides + name: overrides + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: store-gateway-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: standard +--- +apiVersion: etcd.database.coreos.com/v1beta2 +kind: EtcdCluster +metadata: + annotations: + etcd.database.coreos.com/scope: clusterwide + name: etcd + namespace: default +spec: + pod: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + etcd_cluster: etcd + topologyKey: kubernetes.io/hostname + annotations: + prometheus.io/port: "2379" + prometheus.io/scrape: "true" + etcdEnv: + - name: ETCD_AUTO_COMPACTION_RETENTION + value: 1h + labels: + name: etcd + resources: + limits: + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + size: 3 + version: 3.3.13 diff --git a/operations/mimir-tests/test-all-components-with-custom-max-skew.jsonnet b/operations/mimir-tests/test-all-components-with-custom-max-skew.jsonnet new file mode 100644 index 00000000000..3a4465999ee --- /dev/null +++ b/operations/mimir-tests/test-all-components-with-custom-max-skew.jsonnet @@ -0,0 +1,11 @@ +// Based on test-all-components.jsonnet. +(import 'test-all-components.jsonnet') { + _config+:: { + // Increase all defaults by 1. + distributor_topology_spread_max_skew+: 1, + query_frontend_topology_spread_max_skew+: 1, + querier_topology_spread_max_skew+: 1, + ruler_topology_spread_max_skew+: 1, + ruler_querier_topology_spread_max_skew+: 1, + }, +} diff --git a/operations/mimir/config.libsonnet b/operations/mimir/config.libsonnet index 68f31afd9d7..749e93bf307 100644 --- a/operations/mimir/config.libsonnet +++ b/operations/mimir/config.libsonnet @@ -37,6 +37,7 @@ query_frontend_topology_spread_max_skew: 1, querier_topology_spread_max_skew: 1, ruler_topology_spread_max_skew: 1, + ruler_querier_topology_spread_max_skew: 1, // Controls how many concurrent queries are run in the querier. querier_max_concurrency: 8, diff --git a/operations/mimir/ruler.libsonnet b/operations/mimir/ruler.libsonnet index 8f121d6873d..fe5febd70ac 100644 --- a/operations/mimir/ruler.libsonnet +++ b/operations/mimir/ruler.libsonnet @@ -64,7 +64,7 @@ deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge('50%') + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(0) + deployment.mixin.spec.template.spec.withTerminationGracePeriodSeconds(600) + - $.newMimirSpreadTopology(name, $._config.querier_topology_spread_max_skew) + + $.newMimirSpreadTopology(name, $._config.ruler_querier_topology_spread_max_skew) + $.mimirVolumeMounts, ruler_service: if !$._config.is_microservices_deployment_mode || !$._config.ruler_enabled then null else