From fb839c5e98576b9eb61574631cf6ae0ac868d59e Mon Sep 17 00:00:00 2001 From: Michal Maslanka Date: Tue, 2 Aug 2022 08:42:08 +0200 Subject: [PATCH 1/3] r/heartbeat_manager: pass target_node_id to self request Set target node id in self requested heartbeat metadata to make it possible to validate target node id in heartbeat responses. Signed-off-by: Michal Maslanka --- src/v/raft/heartbeat_manager.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/v/raft/heartbeat_manager.cc b/src/v/raft/heartbeat_manager.cc index 266bab861278..63c4fd8a39b6 100644 --- a/src/v/raft/heartbeat_manager.cc +++ b/src/v/raft/heartbeat_manager.cc @@ -102,7 +102,8 @@ static heartbeat_requests requests_for_range( if (rni == ptr->self()) { auto hb_metadata = ptr->meta(); pending_beats[rni.id()].emplace_back( - heartbeat_metadata{hb_metadata, rni}, + heartbeat_metadata{ + .meta = hb_metadata, .node_id = rni, .target_node_id = rni}, heartbeat_manager::follower_request_meta( ptr, follower_req_seq(0), hb_metadata.prev_log_index, rni)); return; From fe6125b24186b0b6cafb00165219fa8001353348 Mon Sep 17 00:00:00 2001 From: Michal Maslanka Date: Tue, 2 Aug 2022 08:12:02 +0200 Subject: [PATCH 2/3] r/heartbeat_manager: refactored process_reply group Use local variable instead of the iterator directly for readability Signed-off-by: Michal Maslanka --- src/v/raft/heartbeat_manager.cc | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/v/raft/heartbeat_manager.cc b/src/v/raft/heartbeat_manager.cc index 63c4fd8a39b6..571d993074ca 100644 --- a/src/v/raft/heartbeat_manager.cc +++ b/src/v/raft/heartbeat_manager.cc @@ -285,16 +285,17 @@ void heartbeat_manager::process_reply( vlog(hbeatlog.error, "cannot find consensus group:{}", g); continue; } + auto consensus = *it; - (*it)->update_heartbeat_status(req_meta.follower_vnode, false); + consensus->update_heartbeat_status(req_meta.follower_vnode, false); // propagate error - (*it)->process_append_entries_reply( + consensus->process_append_entries_reply( n, result(r.error()), req_meta.seq, req_meta.dirty_offset); - (*it)->get_probe().heartbeat_request_error(); + consensus->get_probe().heartbeat_request_error(); } return; } @@ -307,15 +308,13 @@ void heartbeat_manager::process_reply( m.group); continue; } + auto consensus = *it; vlog(hbeatlog.trace, "Heartbeat reply from node: {} - {}", n, m); auto meta = std::move(groups.find(m.group)->second); - (*it)->update_heartbeat_status(meta.follower_vnode, true); + consensus->update_heartbeat_status(meta.follower_vnode, true); - (*it)->process_append_entries_reply( - n, - result(std::move(m)), - meta.seq, - meta.dirty_offset); + consensus->process_append_entries_reply( + n, result(m), meta.seq, meta.dirty_offset); } } From 21e8d13bcea06929aceee89727fc22f9bb2ae103 Mon Sep 17 00:00:00 2001 From: Michal Maslanka Date: Tue, 2 Aug 2022 08:14:20 +0200 Subject: [PATCH 3/3] r/heartbeat_manager: validate received heartbeat response group It may be possible (due to serialization issue or other logical error) that the node will receive a heartbeat response for the group which wasn't requested or response received is targeting an incorrect node. Added check if received group id exists in requested heartbeats map. Fixes: #4623 Signed-off-by: Michal Maslanka --- src/v/raft/heartbeat_manager.cc | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/src/v/raft/heartbeat_manager.cc b/src/v/raft/heartbeat_manager.cc index 571d993074ca..d398c416fe8e 100644 --- a/src/v/raft/heartbeat_manager.cc +++ b/src/v/raft/heartbeat_manager.cc @@ -310,11 +310,38 @@ void heartbeat_manager::process_reply( } auto consensus = *it; vlog(hbeatlog.trace, "Heartbeat reply from node: {} - {}", n, m); - auto meta = std::move(groups.find(m.group)->second); - consensus->update_heartbeat_status(meta.follower_vnode, true); + + if (unlikely(m.target_node_id != consensus->self())) { + vlog( + hbeatlog.warn, + "Heartbeat response addressed to different node: {}, current " + "node: {}, response: {}", + m.target_node_id, + consensus->self(), + m); + continue; + } + + auto meta_it = groups.find(m.group); + if (unlikely(meta_it == groups.end())) { + vlog( + hbeatlog.warn, + "Unexpected heartbeat reply for group {} from node {}, skipping: " + "{}", + m.group, + n, + m); + continue; + } + + consensus->update_heartbeat_status( + meta_it->second.follower_vnode, true); consensus->process_append_entries_reply( - n, result(m), meta.seq, meta.dirty_offset); + n, + result(m), + meta_it->second.seq, + meta_it->second.dirty_offset); } }