Skip to content

Commit

Permalink
add existing http health check handler e2e test
Browse files Browse the repository at this point in the history
Signed-off-by: Chao Chen <chaochn@amazon.com>
  • Loading branch information
chaochn47 committed Oct 12, 2023
1 parent 6d68ab0 commit 94b9348
Show file tree
Hide file tree
Showing 5 changed files with 239 additions and 0 deletions.
1 change: 1 addition & 0 deletions server/etcdserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1822,6 +1822,7 @@ func (s *EtcdServer) apply(
zap.Stringer("type", e.Type))
switch e.Type {
case raftpb.EntryNormal:
// gofail: var beforeApplyOneEntryNormal struct{}
s.applyEntryNormal(&e)
s.setAppliedIndex(e.Index)
s.setTerm(e.Term)
Expand Down
229 changes: 229 additions & 0 deletions tests/e2e/http_health_check_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
// Copyright 2023 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !cluster_proxy

package e2e

import (
"context"
"io"
"net/http"
"os"
"strings"
"testing"
"time"

"github.com/stretchr/testify/require"

"go.etcd.io/etcd/tests/v3/framework/config"
"go.etcd.io/etcd/tests/v3/framework/e2e"
"go.etcd.io/etcd/tests/v3/framework/testutils"
)

type healthCheckConfig struct {
url string
expectedStatusCode int
expectedTimeoutError bool
}

func TestHTTPHealthHandler(t *testing.T) {
e2e.BeforeTest(t)
client := &http.Client{}
tcs := []struct {
name string
injectFailure func(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster)
clusterOptions []e2e.EPClusterOption
healthChecks []healthCheckConfig
}{
{
name: "no failures", // happy case
clusterOptions: []e2e.EPClusterOption{e2e.WithClusterSize(1)},
healthChecks: []healthCheckConfig{
{
url: "/health",
expectedStatusCode: http.StatusOK,
},
},
},
{
name: "activated no space alarm",
injectFailure: triggerNoSpaceAlarm,
clusterOptions: []e2e.EPClusterOption{e2e.WithClusterSize(1), e2e.WithQuotaBackendBytes(int64(13 * os.Getpagesize()))},
healthChecks: []healthCheckConfig{
{
url: "/health",
expectedStatusCode: http.StatusServiceUnavailable,
},
{
url: "/health?exclude=NOSPACE",
expectedStatusCode: http.StatusOK,
},
},
},
{
name: "overloaded server slow apply",
injectFailure: triggerSlowApply,
clusterOptions: []e2e.EPClusterOption{e2e.WithClusterSize(3), e2e.WithGoFailEnabled(true)},
healthChecks: []healthCheckConfig{
{
url: "/health?serializable=true",
expectedStatusCode: http.StatusOK,
},
{
url: "/health?serializable=false",
expectedTimeoutError: true,
},
},
},
{
name: "network partitioned",
injectFailure: blackhole,
clusterOptions: []e2e.EPClusterOption{e2e.WithClusterSize(3), e2e.WithIsPeerTLS(true), e2e.WithPeerProxy(true)},
healthChecks: []healthCheckConfig{
{
url: "/health?serializable=true",
expectedStatusCode: http.StatusOK,
},
{
url: "/health?serializable=false",
expectedTimeoutError: true,
expectedStatusCode: http.StatusServiceUnavailable,
// old leader may return "etcdserver: leader changed" error with 503 in ReadIndex leaderChangedNotifier
},
},
},
{
name: "raft loop deadlock",
injectFailure: triggerRaftLoopDeadLock,
clusterOptions: []e2e.EPClusterOption{e2e.WithClusterSize(1), e2e.WithGoFailEnabled(true)},
healthChecks: []healthCheckConfig{
{
// current kubeadm etcd liveness check failed to detect raft loop deadlock in steady state
// ref. https://github.com/kubernetes/kubernetes/blob/master/cmd/kubeadm/app/phases/etcd/local.go#L225-L226
// current liveness probe depends on the etcd /health check has a flaw that new /livez check should resolve.
url: "/health?serializable=true",
expectedStatusCode: http.StatusOK,
},
{
url: "/health?serializable=false",
expectedTimeoutError: true,
},
},
},
// verify that auth enabled serializable read must go through mvcc
// this test case is skipped until https://github.com/etcd-io/etcd/pull/16697 is merged.
{
name: "slow buffer write back with auth enabled",
injectFailure: triggerSlowBufferWriteBackWithAuth,
clusterOptions: []e2e.EPClusterOption{e2e.WithClusterSize(1), e2e.WithGoFailEnabled(true)},
healthChecks: []healthCheckConfig{
{
url: "/health?serializable=true",
expectedTimeoutError: true,
},
},
},
}

for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
clus, err := e2e.NewEtcdProcessCluster(ctx, t, tc.clusterOptions...)
require.NoError(t, err)
defer clus.Close()
testutils.ExecuteUntil(ctx, t, func() {
if tc.injectFailure != nil {
tc.injectFailure(ctx, t, clus)
}

for _, hc := range tc.healthChecks {
requestURL := clus.Procs[0].EndpointsHTTP()[0] + hc.url
t.Logf("health check URL is %s", requestURL)
doHealthCheckAndVerify(t, client, requestURL, hc.expectedStatusCode, hc.expectedTimeoutError)
}
})
})
}
}

func doHealthCheckAndVerify(t *testing.T, client *http.Client, url string, expectStatusCode int, expectTimeoutError bool) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
require.NoErrorf(t, err, "failed to creat request %+v", err)
resp, herr := client.Do(req)
cancel()
if expectTimeoutError {
if herr != nil && herr.Error() == context.DeadlineExceeded.Error() {
return
}
}
require.NoErrorf(t, herr, "failed to get response %+v", err)
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
resp.Body.Close()
require.NoErrorf(t, err, "failed to read response %+v", err)

t.Logf("health check response body is: %s", body)
require.Equal(t, expectStatusCode, resp.StatusCode)
}

func triggerNoSpaceAlarm(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster) {
buf := strings.Repeat("b", os.Getpagesize())
etcdctl := clus.Etcdctl()
for {
if err := etcdctl.Put(ctx, "foo", buf, config.PutOptions{}); err != nil {
if !strings.Contains(err.Error(), "etcdserver: mvcc: database space exceeded") {
t.Fatal(err)
}
break
}
}
}

func triggerSlowApply(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster) {
// the following proposal will be blocked at applying stage
// because when apply index < committed index, linearizable read would time out.
require.NoError(t, clus.Procs[0].Failpoints().SetupHTTP(ctx, "beforeApplyOneEntryNormal", `sleep("3s")`))
require.NoError(t, clus.Procs[1].Etcdctl().Put(ctx, "foo", "bar", config.PutOptions{}))
}

func blackhole(_ context.Context, t *testing.T, clus *e2e.EtcdProcessCluster) {
member := clus.Procs[0]
proxy := member.PeerProxy()
t.Logf("Blackholing traffic from and to member %q", member.Config().Name)
proxy.BlackholeTx()
proxy.BlackholeRx()
}

func triggerRaftLoopDeadLock(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster) {
err := clus.Procs[0].Failpoints().SetupHTTP(ctx, "raftBeforeSave", `sleep("3s")`)
require.NoError(t, err)
clus.Procs[0].Etcdctl().Put(context.Background(), "foo", "bar", config.PutOptions{})
}

func triggerSlowBufferWriteBackWithAuth(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster) {
t.Skip("wait until https://github.com/etcd-io/etcd/pull/16697 merged that using root permissions doing serializable read")
etcdctl := clus.Etcdctl()
_, err := etcdctl.UserAdd(ctx, "root", "root", config.UserAddOptions{})
require.NoError(t, err)
_, err = etcdctl.UserGrantRole(ctx, "root", "root")
require.NoError(t, err)
require.NoError(t, etcdctl.AuthEnable(ctx))

require.NoError(t, clus.Procs[0].Failpoints().SetupHTTP(ctx, "beforeWritebackBuf", `sleep("3s")`))
clus.Procs[0].Etcdctl(e2e.WithAuth("root", "root")).Put(context.Background(), "foo", "bar", config.PutOptions{Timeout: 200 * time.Millisecond})
}
1 change: 1 addition & 0 deletions tests/framework/config/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type GetOptions struct {

type PutOptions struct {
LeaseID clientv3.LeaseID
Timeout time.Duration
}

type DeleteOptions struct {
Expand Down
3 changes: 3 additions & 0 deletions tests/framework/e2e/etcdctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ func (ctl *EtcdctlV3) Put(ctx context.Context, key, value string, opts config.Pu
if opts.LeaseID != 0 {
args = append(args, "--lease", strconv.FormatInt(int64(opts.LeaseID), 16))
}
if opts.Timeout != 0 {
args = append(args, fmt.Sprintf("--command-timeout=%s", opts.Timeout))
}
_, err := SpawnWithExpectLines(ctx, args, nil, expect.ExpectedResponse{Value: "OK"})
return err
}
Expand Down
5 changes: 5 additions & 0 deletions tests/framework/integration/integration.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,11 @@ func (c integrationClient) Get(ctx context.Context, key string, o config.GetOpti
}

func (c integrationClient) Put(ctx context.Context, key, value string, opts config.PutOptions) error {
if opts.Timeout != 0 {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, opts.Timeout)
defer cancel()
}
var clientOpts []clientv3.OpOption
if opts.LeaseID != 0 {
clientOpts = append(clientOpts, clientv3.WithLease(opts.LeaseID))
Expand Down

0 comments on commit 94b9348

Please sign in to comment.