Skip to content

Commit

Permalink
sidecar: not ready when Prometheus is unavailable
Browse files Browse the repository at this point in the history
Set Sidecar to not be ready when Prometheus is unavailable. This avoids
problems when Prometheus is replaying WAL and Sidecar is "up" but then
Query hangs while waiting for a response from Prometheus/Sidecar. Also,
it gets shown as UP in the Stores page with no ext. labels during this
time that is confusing.

Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
  • Loading branch information
GiedriusS committed Dec 9, 2021
1 parent d1acaea commit febb55c
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re

### Changed

- [#4939](https://github.com/thanos-io/thanos/pull/4939) Sidecar: set Sidecar to NOT READY when it cannot establish a connection with Prometheus
- [#4864](https://github.com/thanos-io/thanos/pull/4864) UI: Remove the old PromQL editor

## [v0.23.1](https://github.com/thanos-io/thanos/tree/release-0.23) - 2021.10.1
Expand Down
2 changes: 2 additions & 0 deletions cmd/thanos/sidecar.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,10 @@ func runSidecar(
if err := m.UpdateLabels(iterCtx); err != nil {
level.Warn(logger).Log("msg", "heartbeat failed", "err", err)
promUp.Set(0)
statusProber.NotReady(err)
} else {
promUp.Set(1)
statusProber.Ready()
}

return nil
Expand Down
35 changes: 35 additions & 0 deletions test/e2e/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package e2e_test
import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"os"
Expand Down Expand Up @@ -103,6 +104,40 @@ func sortResults(res model.Vector) {
})
}

func TestSidecarNotReady(t *testing.T) {
t.Parallel()

e, err := e2e.NewDockerEnvironment("e2e_test_query")
testutil.Ok(t, err)
t.Cleanup(e2ethanos.CleanScenario(t, e))

prom, sidecar, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", defaultPromConfig("prom-alone", 0, "", ""), "", e2ethanos.DefaultPrometheusImage())
testutil.Ok(t, err)
testutil.Ok(t, e2e.StartAndWaitReady(prom, sidecar))
testutil.Ok(t, prom.Stop())

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

// Sidecar should not be ready - it cannot accept traffic if Prometheus is down.
testutil.Ok(t, runutil.Retry(1*time.Second, ctx.Done(), func() (rerr error) {
req, err := http.NewRequestWithContext(ctx, "GET", "http://"+sidecar.Endpoint("http")+"/-/ready", nil)
if err != nil {
return err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer runutil.CloseWithErrCapture(&rerr, resp.Body, "closing resp body")

if resp.StatusCode == 200 {
return fmt.Errorf("got status code %d", resp.StatusCode)
}
return nil
}))
}

func TestQuery(t *testing.T) {
t.Parallel()

Expand Down

0 comments on commit febb55c

Please sign in to comment.