From 018f3586a9105305cfd759352c3ae07862d92d8a Mon Sep 17 00:00:00 2001 From: Michal Maslanka Date: Fri, 1 Apr 2022 08:37:05 +0200 Subject: [PATCH] r/vote: do not wait for majority of responses from unique voters In joint consensus raft needs majority of both new and previous quorums to make the decisions. Voting for new leader is one of the processes that requires majority agreement from both quorums. In previous implementation we waited for majority of the vote request responses. This approach would make leader election much slower in situations where quorums differ by one node. Example: current voters: `[1,2,4]`, previous voters: `[1,2,3]` In this scenario to elect leader it is enough to wait from responses from node 1 and 2 as they form majority in both quorums. In previous implementation we wait for at least `(n/2)+1` responses. Where `n` is a number of unique voter ids. In the example above `n = len([1,2,3,4]) = 4`. This way we had to wait for 3 replies while only 2 of the are enough to elect new leader. Changed implementation to check if we can make a definitive decision about vote round result after receiving each of the replies. This way we will always use smallest possible set of replies to make a decision. Signed-off-by: Michal Maslanka (cherry picked from commit bca6f1db00a32673cc6d476c06f16d9ce6bf3160) --- src/v/raft/prevote_stm.cc | 9 ++------- src/v/raft/vote_stm.cc | 17 +++++------------ 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/src/v/raft/prevote_stm.cc b/src/v/raft/prevote_stm.cc index 896c34dea253..a0d13e123d46 100644 --- a/src/v/raft/prevote_stm.cc +++ b/src/v/raft/prevote_stm.cc @@ -163,13 +163,8 @@ ss::future prevote_stm::do_prevote() { // dispatch requests to all voters _config->for_each_voter([this](vnode id) { (void)dispatch_prevote(id); }); - // wait until majority - const size_t majority = (_config->unique_voter_count() / 2) + 1; - - return _sem.wait(majority) - .then([this] { return process_replies(); }) - // process results - .then([this]() { return _success; }); + // process results + return process_replies().then([this]() { return _success; }); } ss::future<> prevote_stm::process_replies() { diff --git a/src/v/raft/vote_stm.cc b/src/v/raft/vote_stm.cc index 8c6511fc7e30..7052bd098823 100644 --- a/src/v/raft/vote_stm.cc +++ b/src/v/raft/vote_stm.cc @@ -144,18 +144,11 @@ ss::future<> vote_stm::do_vote() { // dispatch requests to all voters _config->for_each_voter([this](vnode id) { (void)dispatch_one(id); }); - // wait until majority - const size_t majority = (_config->unique_voter_count() / 2) + 1; - - return _sem.wait(majority) - .then([this] { return process_replies(); }) - // porcess results - .then([this]() { - return _ptr->_op_lock.get_units().then( - [this](ss::semaphore_units<> u) { - return update_vote_state(std::move(u)); - }); - }); + return process_replies().then([this]() { + return _ptr->_op_lock.get_units().then([this](ss::semaphore_units<> u) { + return update_vote_state(std::move(u)); + }); + }); } ss::future<> vote_stm::process_replies() {