Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.5] etcdserver: intentionally set the memberID as 0 in corruption alarm #14852

Merged
merged 1 commit into from
Nov 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions scripts/test_lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ function relativePath {
local commonPart=$source
local result=""

while [[ "${target#$commonPart}" == "${target}" ]]; do
# Refer to https://www.shellcheck.net/wiki/SC2295
while [[ "${target#"$commonPart"}" == "${target}" ]]; do
# no match, means that candidate common part is not correct
# go up one level (reduce common part)
commonPart="$(dirname "$commonPart")"
Expand All @@ -90,7 +91,8 @@ function relativePath {

# since we now have identified the common part,
# compute the non-common part
local forwardPart="${target#$commonPart}"
# Refer to https://www.shellcheck.net/wiki/SC2295
local forwardPart="${target#"$commonPart"}"

# and now stick all parts together
if [[ -n $result ]] && [[ -n $forwardPart ]]; then
Expand Down
12 changes: 10 additions & 2 deletions server/etcdserver/corrupt.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,11 @@ func (cm *corruptionChecker) PeriodicCheck() error {
return
}
alarmed = true
cm.hasher.TriggerCorruptAlarm(id)
// It isn't clear which member's data is corrupted, so we
// intentionally set the memberID as 0. We will identify
// the corrupted members using quorum in 3.6. Please see
// discussion in https://github.com/etcd-io/etcd/pull/14828.
cm.hasher.TriggerCorruptAlarm(types.ID(0))
}

if h2.Hash != h.Hash && rev2 == rev && h.CompactRevision == h2.CompactRevision {
Expand Down Expand Up @@ -276,7 +280,11 @@ func (cm *corruptionChecker) CompactHashCheck() {

// follower's compact revision is leader's old one, then hashes must match
if p.resp.Hash != hash.Hash {
cm.hasher.TriggerCorruptAlarm(p.id)
// It isn't clear which member's data is corrupted, so we
// intentionally set the memberID as 0. We will identify
// the corrupted members using quorum in 3.6. Please see
// discussion in https://github.com/etcd-io/etcd/pull/14828.
cm.hasher.TriggerCorruptAlarm(types.ID(0))
cm.lg.Error("failed compaction hash check",
zap.Int64("revision", hash.Revision),
zap.Int64("leader-compact-revision", hash.CompactRevision),
Expand Down
12 changes: 6 additions & 6 deletions server/etcdserver/corrupt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ func TestPeriodicCheck(t *testing.T) {
{
name: "Different local hash and same revisions",
hasher: fakeHasher{hashByRevResponses: []hashByRev{{hash: mvcc.KeyValueHash{Hash: 1, CompactRevision: 1}, revision: 1}, {hash: mvcc.KeyValueHash{Hash: 2, CompactRevision: 1}, revision: 1}}},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(1)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "MemberId()", "TriggerCorruptAlarm(1)"},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(1)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "MemberId()", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
{
Expand All @@ -163,15 +163,15 @@ func TestPeriodicCheck(t *testing.T) {
hasher: fakeHasher{
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1}}}},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(42)"},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
{
name: "Peer with newer compact revision",
hasher: fakeHasher{
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 88}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10}, CompactRevision: 2}}},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(88)"},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
{
Expand All @@ -188,7 +188,7 @@ func TestPeriodicCheck(t *testing.T) {
hashByRevResponses: []hashByRev{{hash: mvcc.KeyValueHash{Hash: 1, CompactRevision: 1}, revision: 1}, {hash: mvcc.KeyValueHash{Hash: 2, CompactRevision: 2}, revision: 2}},
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 666}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 1}, CompactRevision: 1, Hash: 2}}},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(1)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(666)"},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(1)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
{
Expand All @@ -199,7 +199,7 @@ func TestPeriodicCheck(t *testing.T) {
{peerInfo: peerInfo{id: 89}, resp: &pb.HashKVResponse{Header: &pb.ResponseHeader{Revision: 10}, CompactRevision: 2}},
},
},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(88)"},
expectActions: []string{"HashByRev(0)", "PeerHashByRev(0)", "ReqTimeout()", "LinearizableReadNotify()", "HashByRev(0)", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
}
Expand Down Expand Up @@ -267,7 +267,7 @@ func TestCompactHashCheck(t *testing.T) {
hashes: []mvcc.KeyValueHash{{Revision: 1, CompactRevision: 1, Hash: 1}, {Revision: 2, CompactRevision: 1, Hash: 2}},
peerHashes: []*peerHashKVResp{{peerInfo: peerInfo{id: 42}, resp: &pb.HashKVResponse{CompactRevision: 1, Hash: 3}}},
},
expectActions: []string{"MemberId()", "ReqTimeout()", "Hashes()", "PeerHashByRev(2)", "TriggerCorruptAlarm(42)"},
expectActions: []string{"MemberId()", "ReqTimeout()", "Hashes()", "PeerHashByRev(2)", "TriggerCorruptAlarm(0)"},
expectCorrupt: true,
},
{
Expand Down
12 changes: 3 additions & 9 deletions tests/e2e/corrupt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
time.Sleep(checkTime * 11 / 10)
alarmResponse, err := cc.AlarmList()
assert.NoError(t, err, "error on alarm list")
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: memberID}}, alarmResponse.Alarms)
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
}

func TestCompactHashCheckDetectCorruption(t *testing.T) {
Expand All @@ -164,14 +164,8 @@ func TestCompactHashCheckDetectCorruption(t *testing.T) {
err := cc.Put(testutil.PickKey(int64(i)), fmt.Sprint(i))
assert.NoError(t, err, "error on put")
}
members, err := cc.MemberList()
_, err = cc.MemberList()
assert.NoError(t, err, "error on member list")
var memberID uint64
for _, m := range members.Members {
if m.Name == epc.procs[0].Config().name {
memberID = m.ID
}
}

epc.procs[0].Stop()
err = testutil.CorruptBBolt(datadir.ToBackendFileName(epc.procs[0].Config().dataDirPath))
Expand All @@ -184,5 +178,5 @@ func TestCompactHashCheckDetectCorruption(t *testing.T) {
time.Sleep(checkTime * 11 / 10)
alarmResponse, err := cc.AlarmList()
assert.NoError(t, err, "error on alarm list")
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: memberID}}, alarmResponse.Alarms)
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
}
4 changes: 2 additions & 2 deletions tests/integration/corrupt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
time.Sleep(50 * time.Millisecond)
alarmResponse, err := cc.AlarmList(ctx)
assert.NoError(t, err, "error on alarm list")
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: uint64(clus.Members[0].ID())}}, alarmResponse.Alarms)
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
}

func TestCompactHashCheck(t *testing.T) {
Expand Down Expand Up @@ -169,5 +169,5 @@ func TestCompactHashCheckDetectCorruption(t *testing.T) {
time.Sleep(50 * time.Millisecond)
alarmResponse, err := cc.AlarmList(ctx)
assert.NoError(t, err, "error on alarm list")
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: uint64(clus.Members[0].ID())}}, alarmResponse.Alarms)
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: 0}}, alarmResponse.Alarms)
}