diff --git a/etcdserver/api/rafthttp/http.go b/etcdserver/api/rafthttp/http.go index 0b05282c04fd..32804be5fa94 100644 --- a/etcdserver/api/rafthttp/http.go +++ b/etcdserver/api/rafthttp/http.go @@ -51,7 +51,7 @@ var ( RaftSnapshotPrefix = path.Join(RaftPrefix, "snapshot") errIncompatibleVersion = errors.New("incompatible version") - errClusterIDMismatch = errors.New("cluster ID mismatch") + ErrClusterIDMismatch = errors.New("cluster ID mismatch") ) type peerGetter interface { @@ -558,7 +558,7 @@ func checkClusterCompatibilityFromHeader(lg *zap.Logger, localID types.ID, heade } else { plog.Errorf("request cluster ID mismatch (got %s want %s)", gcid, cid) } - return errClusterIDMismatch + return ErrClusterIDMismatch } return nil } diff --git a/etcdserver/api/rafthttp/stream.go b/etcdserver/api/rafthttp/stream.go index cf7d8ccf62cf..94bcf0ba0e0b 100644 --- a/etcdserver/api/rafthttp/stream.go +++ b/etcdserver/api/rafthttp/stream.go @@ -677,7 +677,7 @@ func (cr *streamReader) dial(t streamType) (io.ReadCloser, error) { } return nil, errIncompatibleVersion - case errClusterIDMismatch.Error(): + case ErrClusterIDMismatch.Error(): if cr.lg != nil { cr.lg.Warn( "request sent was ignored by remote peer due to cluster ID mismatch", @@ -685,13 +685,13 @@ func (cr *streamReader) dial(t streamType) (io.ReadCloser, error) { zap.String("remote-peer-cluster-id", resp.Header.Get("X-Etcd-Cluster-ID")), zap.String("local-member-id", cr.tr.ID.String()), zap.String("local-member-cluster-id", cr.tr.ClusterID.String()), - zap.Error(errClusterIDMismatch), + zap.Error(ErrClusterIDMismatch), ) } else { plog.Errorf("request sent was ignored (cluster ID mismatch: peer[%s]=%s, local=%s)", cr.peerID, resp.Header.Get("X-Etcd-Cluster-ID"), cr.tr.ClusterID) } - return nil, errClusterIDMismatch + return nil, ErrClusterIDMismatch default: return nil, fmt.Errorf("unhandled error %q when precondition failed", string(b)) diff --git a/etcdserver/api/rafthttp/util.go b/etcdserver/api/rafthttp/util.go index 20938647c7a6..0c0f39dca3a6 100644 --- a/etcdserver/api/rafthttp/util.go +++ b/etcdserver/api/rafthttp/util.go @@ -86,10 +86,10 @@ func checkPostResponse(resp *http.Response, body []byte, req *http.Request, to t case errIncompatibleVersion.Error(): plog.Errorf("request sent was ignored by peer %s (server version incompatible)", to) return errIncompatibleVersion - case errClusterIDMismatch.Error(): + case ErrClusterIDMismatch.Error(): plog.Errorf("request sent was ignored (cluster ID mismatch: remote[%s]=%s, local=%s)", to, resp.Header.Get("X-Etcd-Cluster-ID"), req.Header.Get("X-Etcd-Cluster-ID")) - return errClusterIDMismatch + return ErrClusterIDMismatch default: return fmt.Errorf("unhandled error %q when precondition failed", string(body)) } diff --git a/etcdserver/api/v3rpc/rpctypes/error.go b/etcdserver/api/v3rpc/rpctypes/error.go index 18e85925e005..63827d202b96 100644 --- a/etcdserver/api/v3rpc/rpctypes/error.go +++ b/etcdserver/api/v3rpc/rpctypes/error.go @@ -45,6 +45,7 @@ var ( ErrGRPCMemberNotLearner = status.New(codes.FailedPrecondition, "etcdserver: can only promote a learner member").Err() ErrGRPCLearnerNotReady = status.New(codes.FailedPrecondition, "etcdserver: can only promote a learner member which is in sync with leader").Err() ErrGRPCTooManyLearners = status.New(codes.FailedPrecondition, "etcdserver: too many learner members in cluster").Err() + ErrGRPCClusterIdMismatch = status.New(codes.FailedPrecondition, "etcdserver: cluster ID mismatch").Err() ErrGRPCRequestTooLarge = status.New(codes.InvalidArgument, "etcdserver: request is too large").Err() ErrGRPCRequestTooManyRequests = status.New(codes.ResourceExhausted, "etcdserver: too many requests").Err() @@ -105,6 +106,7 @@ var ( ErrorDesc(ErrGRPCMemberNotLearner): ErrGRPCMemberNotLearner, ErrorDesc(ErrGRPCLearnerNotReady): ErrGRPCLearnerNotReady, ErrorDesc(ErrGRPCTooManyLearners): ErrGRPCTooManyLearners, + ErrorDesc(ErrGRPCClusterIdMismatch): ErrGRPCClusterIdMismatch, ErrorDesc(ErrGRPCRequestTooLarge): ErrGRPCRequestTooLarge, ErrorDesc(ErrGRPCRequestTooManyRequests): ErrGRPCRequestTooManyRequests, @@ -186,6 +188,7 @@ var ( ErrInvalidAuthToken = Error(ErrGRPCInvalidAuthToken) ErrAuthOldRevision = Error(ErrGRPCAuthOldRevision) ErrInvalidAuthMgmt = Error(ErrGRPCInvalidAuthMgmt) + ErrClusterIdMismatch = Error(ErrGRPCClusterIdMismatch) ErrNoLeader = Error(ErrGRPCNoLeader) ErrNotLeader = Error(ErrGRPCNotLeader) diff --git a/etcdserver/corrupt.go b/etcdserver/corrupt.go index e66848dac40f..a7ce2ae88e40 100644 --- a/etcdserver/corrupt.go +++ b/etcdserver/corrupt.go @@ -24,6 +24,7 @@ import ( "strings" "time" + "go.etcd.io/etcd/etcdserver/api/rafthttp" "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes" pb "go.etcd.io/etcd/etcdserver/etcdserverpb" "go.etcd.io/etcd/mvcc" @@ -126,6 +127,19 @@ func (s *EtcdServer) CheckInitialHashKV() error { } else { plog.Warningf("%s cannot check the hash of peer(%q) at revision %d: local node is lagging behind(%q)", s.ID(), p.eps, rev, p.err.Error()) } + case rpctypes.ErrClusterIdMismatch: + if lg != nil { + lg.Warn( + "cluster ID mismatch", + zap.String("local-member-id", s.ID().String()), + zap.Int64("local-member-revision", rev), + zap.Int64("local-member-compact-revision", crev), + zap.Uint32("local-member-hash", h), + zap.String("remote-peer-id", p.id.String()), + zap.Strings("remote-peer-endpoints", p.eps), + zap.Error(err), + ) + } } } } @@ -353,7 +367,7 @@ func (s *EtcdServer) getPeerHashKVs(rev int64) []*peerHashKVResp { ctx, cancel := context.WithTimeout(context.Background(), s.Cfg.ReqTimeout()) var resp *pb.HashKVResponse - resp, lastErr = s.getPeerHashKVHTTP(ctx, ep, rev) + resp, lastErr = s.getPeerHashKVHTTP(ctx, s.cluster.ID(), ep, rev) cancel() if lastErr == nil { resps = append(resps, &peerHashKVResp{peerInfo: p, resp: resp, err: nil}) @@ -440,6 +454,10 @@ func (h *hashKVHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Error(w, "bad path", http.StatusBadRequest) return } + if gcid := r.Header.Get("X-Etcd-Cluster-ID"); gcid != "" && gcid != h.server.cluster.ID().String() { + http.Error(w, rafthttp.ErrClusterIDMismatch.Error(), http.StatusPreconditionFailed) + return + } defer r.Body.Close() b, err := ioutil.ReadAll(r.Body) @@ -478,7 +496,7 @@ func (h *hashKVHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } // getPeerHashKVHTTP fetch hash of kv store at the given rev via http call to the given url -func (s *EtcdServer) getPeerHashKVHTTP(ctx context.Context, url string, rev int64) (*pb.HashKVResponse, error) { +func (s *EtcdServer) getPeerHashKVHTTP(ctx context.Context, cid types.ID, url string, rev int64) (*pb.HashKVResponse, error) { cc := &http.Client{Transport: s.peerRt} hashReq := &pb.HashKVRequest{Revision: rev} hashReqBytes, err := json.Marshal(hashReq) @@ -492,6 +510,7 @@ func (s *EtcdServer) getPeerHashKVHTTP(ctx context.Context, url string, rev int6 } req = req.WithContext(ctx) req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Etcd-Cluster-ID", cid.String()) req.Cancel = ctx.Done() resp, err := cc.Do(req) @@ -511,6 +530,10 @@ func (s *EtcdServer) getPeerHashKVHTTP(ctx context.Context, url string, rev int6 if strings.Contains(string(b), mvcc.ErrFutureRev.Error()) { return nil, rpctypes.ErrFutureRev } + } else if resp.StatusCode == http.StatusPreconditionFailed { + if strings.Contains(string(b), rafthttp.ErrClusterIDMismatch.Error()) { + return nil, rpctypes.ErrClusterIdMismatch + } } if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("unknown error: %s", string(b)) diff --git a/etcdserver/corrupt_test.go b/etcdserver/corrupt_test.go new file mode 100644 index 000000000000..7c3f0e98e8f4 --- /dev/null +++ b/etcdserver/corrupt_test.go @@ -0,0 +1,124 @@ +// Copyright 2023 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etcdserver + +import ( + "bytes" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "strconv" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "go.uber.org/zap" + + pb "go.etcd.io/etcd/etcdserver/etcdserverpb" + "go.etcd.io/etcd/lease" + "go.etcd.io/etcd/mvcc" + betesting "go.etcd.io/etcd/mvcc/backend" + "go.etcd.io/etcd/pkg/types" +) + +func TestHashKVHandler(t *testing.T) { + var remoteClusterID = 111195 + var localClusterID = 111196 + var revision = 1 + + etcdSrv := &EtcdServer{} + etcdSrv.cluster = newTestCluster(nil) + etcdSrv.cluster.SetID(types.ID(localClusterID), types.ID(localClusterID)) + be, _ := betesting.NewDefaultTmpBackend() + defer func() { + assert.NoError(t, be.Close()) + }() + //etcdSrv.kv = mvcc.New(zap.NewNop(), be, &lease.FakeLessor{}, mvcc.StoreConfig{}) + etcdSrv.kv = mvcc.New(zap.NewNop(), be, &lease.FakeLessor{}, nil, nil, mvcc.StoreConfig{}) + ph := &hashKVHandler{ + lg: zap.NewNop(), + server: etcdSrv, + } + srv := httptest.NewServer(ph) + defer srv.Close() + + tests := []struct { + name string + remoteClusterID int + wcode int + wKeyWords string + }{ + { + name: "HashKV returns 200 if cluster hash matches", + remoteClusterID: localClusterID, + wcode: http.StatusOK, + wKeyWords: "", + }, + { + name: "HashKV returns 400 if cluster hash doesn't matche", + remoteClusterID: remoteClusterID, + wcode: http.StatusPreconditionFailed, + wKeyWords: "cluster ID mismatch", + }, + } + for i, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + hashReq := &pb.HashKVRequest{Revision: int64(revision)} + hashReqBytes, err := json.Marshal(hashReq) + if err != nil { + t.Fatalf("failed to marshal request: %v", err) + } + req, err := http.NewRequest(http.MethodGet, srv.URL+PeerHashKVPath, bytes.NewReader(hashReqBytes)) + if err != nil { + t.Fatalf("failed to create request: %v", err) + } + req.Header.Set("X-Etcd-Cluster-ID", strconv.FormatUint(uint64(tt.remoteClusterID), 16)) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("failed to get http response: %v", err) + } + body, err := io.ReadAll(resp.Body) + resp.Body.Close() + if err != nil { + t.Fatalf("unexpected io.ReadAll error: %v", err) + } + if resp.StatusCode != tt.wcode { + t.Fatalf("#%d: code = %d, want %d", i, resp.StatusCode, tt.wcode) + } + if resp.StatusCode != http.StatusOK { + if !strings.Contains(string(body), tt.wKeyWords) { + t.Errorf("#%d: body: %s, want body to contain keywords: %s", i, string(body), tt.wKeyWords) + } + return + } + + hashKVResponse := pb.HashKVResponse{} + err = json.Unmarshal(body, &hashKVResponse) + if err != nil { + t.Fatalf("unmarshal response error: %v", err) + } + //hashValue, _, err := etcdSrv.KV().HashStorage().HashByRev(int64(revision)) + hashValue, _, _, err := etcdSrv.KV().HashByRev(int64(revision)) + if err != nil { + t.Fatalf("etcd server hash failed: %v", err) + } + if hashKVResponse.Hash != hashValue { + t.Fatalf("hash value inconsistent: %d != %d", hashKVResponse.Hash, hashValue) + } + }) + } +} diff --git a/tests/e2e/cluster_test.go b/tests/e2e/cluster_test.go index deac4ed1e5eb..046eaa107fcd 100644 --- a/tests/e2e/cluster_test.go +++ b/tests/e2e/cluster_test.go @@ -128,6 +128,7 @@ type etcdProcessClusterConfig struct { noStrictReconfig bool enableV2 bool initialCorruptCheck bool + corruptCheckTime time.Duration authTokenOpts string MaxConcurrentStreams uint32 // default is math.MaxUint32 @@ -141,6 +142,17 @@ type etcdProcessClusterConfig struct { // newEtcdProcessCluster launches a new cluster from etcd processes, returning // a new etcdProcessCluster once all nodes are ready to accept client requests. func newEtcdProcessCluster(cfg *etcdProcessClusterConfig) (*etcdProcessCluster, error) { + epc, err := initEtcdProcessCluster(cfg) + if err != nil { + return nil, err + } + + return startEtcdProcessCluster(epc, cfg) +} + +// `initEtcdProcessCluster` initializes a new cluster based on the given config. +// It doesn't start the cluster. +func initEtcdProcessCluster(cfg *etcdProcessClusterConfig) (*etcdProcessCluster, error) { etcdCfgs := cfg.etcdServerProcessConfigs() epc := &etcdProcessCluster{ cfg: cfg, @@ -158,6 +170,11 @@ func newEtcdProcessCluster(cfg *etcdProcessClusterConfig) (*etcdProcessCluster, epc.procs[i] = proc } + return epc, nil +} + +// `startEtcdProcessCluster` launches a new cluster from etcd processes. +func startEtcdProcessCluster(epc *etcdProcessCluster, cfg *etcdProcessClusterConfig) (*etcdProcessCluster, error) { if err := epc.Start(); err != nil { return nil, err } @@ -262,6 +279,9 @@ func (cfg *etcdProcessClusterConfig) etcdServerProcessConfigs() []*etcdServerPro if cfg.initialCorruptCheck { args = append(args, "--experimental-initial-corrupt-check") } + if cfg.corruptCheckTime != 0 { + args = append(args, "--experimental-corrupt-check-time", cfg.corruptCheckTime.String()) + } var murl string if cfg.metricsURLScheme != "" { murl = (&url.URL{ diff --git a/tests/e2e/etcd_corrupt_test.go b/tests/e2e/etcd_corrupt_test.go index 5fb35557895d..51f8ba534f47 100644 --- a/tests/e2e/etcd_corrupt_test.go +++ b/tests/e2e/etcd_corrupt_test.go @@ -20,11 +20,16 @@ import ( "fmt" "os" "path/filepath" + "sync" "testing" "time" + "github.com/stretchr/testify/assert" + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/etcdserver/etcdserverpb" "go.etcd.io/etcd/mvcc/mvccpb" + "go.etcd.io/etcd/pkg/testutil" bolt "go.etcd.io/bbolt" ) @@ -127,3 +132,96 @@ func corruptHash(fpath string) error { return nil }) } + +func TestInPlaceRecovery(t *testing.T) { + defer testutil.AfterTest(t) + + basePort := 20000 + + // Initialize the cluster. + cfgOld := etcdProcessClusterConfig{ + clusterSize: 3, + initialToken: "old", + keepDataDir: false, + clientTLS: clientNonTLS, + corruptCheckTime: time.Second, + basePort: basePort, + } + epcOld, err := newEtcdProcessCluster(&cfgOld) + if err != nil { + t.Fatalf("could not start etcd process cluster (%v)", err) + } + t.Cleanup(func() { + if errC := epcOld.Close(); errC != nil { + t.Fatalf("error closing etcd processes (%v)", errC) + } + }) + t.Log("Old cluster started.") + + //Put some data into the old cluster, so that after recovering from a blank db, the hash diverges. + t.Log("putting 10 keys...") + oldEtcdctl := NewEtcdctl(epcOld.EndpointsV3(), cfgOld.clientTLS, false, false) + for i := 0; i < 10; i++ { + err := oldEtcdctl.Put(fmt.Sprintf("%d", i), fmt.Sprintf("%d", i)) + assert.NoError(t, err, "error on put") + } + + // Create a new cluster config, but with the same port numbers. In this way the new servers can stay in + // contact with the old ones. + cfgNew := etcdProcessClusterConfig{ + clusterSize: 3, + initialToken: "new", + keepDataDir: false, + clientTLS: clientNonTLS, + initialCorruptCheck: true, + corruptCheckTime: time.Second, + basePort: basePort, + } + epcNew, err := initEtcdProcessCluster(&cfgNew) + if err != nil { + t.Fatalf("could not start etcd process cluster (%v)", err) + } + t.Cleanup(func() { + if errC := epcNew.Close(); errC != nil { + t.Fatalf("error closing etcd processes (%v)", errC) + } + }) + t.Log("New cluster initialized.") + + newEtcdctl := NewEtcdctl(epcNew.EndpointsV3(), cfgNew.clientTLS, false, false) + // Rolling recovery of the servers. + var wg sync.WaitGroup + t.Log("rolling updating servers in place...") + for i, newProc := range epcNew.procs { + oldProc := epcOld.procs[i] + err = oldProc.Close() + if err != nil { + t.Fatalf("could not stop etcd process (%v)", err) + } + t.Logf("old cluster server %d: %s stopped.", i, oldProc.Config().name) + + wg.Add(1) + go func(proc etcdProcess) { + defer wg.Done() + perr := proc.Start() + if perr != nil { + t.Fatalf("failed to start etcd process: %v", perr) + return + } + t.Logf("new etcd server %q started in-place with blank db", proc.Config().name) + }(newProc) + t.Log("sleeping 5 sec to let nodes do periodical check...") + time.Sleep(5 * time.Second) + } + wg.Wait() + t.Log("new cluster started.") + + alarmResponse, err := newEtcdctl.AlarmList() + assert.NoError(t, err, "error on alarm list") + for _, alarm := range alarmResponse.Alarms { + if alarm.Alarm == etcdserverpb.AlarmType_CORRUPT { + t.Fatalf("there is no corruption after in-place recovery, but corruption reported.") + } + } + t.Log("no corruption detected.") +} diff --git a/tests/e2e/etcdctl.go b/tests/e2e/etcdctl.go index 2b30fb2fbf82..0b7794b8f1d3 100644 --- a/tests/e2e/etcdctl.go +++ b/tests/e2e/etcdctl.go @@ -40,7 +40,7 @@ func NewEtcdctl(endpoints []string, connType clientConnType, isAutoTLS bool, v2 func (ctl *Etcdctl) Get(key string) (*clientv3.GetResponse, error) { var resp clientv3.GetResponse - err := ctl.spawnJsonCmd(&resp, "get", key) + err := ctl.spawnJsonCmd(&resp, "", "get", key) return &resp, err } @@ -67,7 +67,7 @@ func (ctl *Etcdctl) AlarmList() (*clientv3.AlarmResponse, error) { panic("Unsupported method for v2") } var resp clientv3.AlarmResponse - err := ctl.spawnJsonCmd(&resp, "alarm", "list") + err := ctl.spawnJsonCmd(&resp, "{", "alarm", "list") return &resp, err } @@ -76,7 +76,7 @@ func (ctl *Etcdctl) MemberList() (*clientv3.MemberListResponse, error) { panic("Unsupported method for v2") } var resp clientv3.MemberListResponse - err := ctl.spawnJsonCmd(&resp, "member", "list") + err := ctl.spawnJsonCmd(&resp, "", "member", "list") return &resp, err } @@ -88,13 +88,16 @@ func (ctl *Etcdctl) Compact(rev int64) (*clientv3.CompactResponse, error) { return nil, spawnWithExpect(args, fmt.Sprintf("compacted revision %v", rev)) } -func (ctl *Etcdctl) spawnJsonCmd(output interface{}, args ...string) error { +func (ctl *Etcdctl) spawnJsonCmd(output interface{}, expectedOutput string, args ...string) error { args = append(args, "-w", "json") cmd, err := spawnCmd(append(ctl.cmdArgs(), args...)) if err != nil { return err } - line, err := cmd.Expect("header") + if expectedOutput == "" { + expectedOutput = "header" + } + line, err := cmd.Expect(expectedOutput) if err != nil { return err }