Skip to content

Commit

Permalink
t/redpanda_test: skip healthy check until whole cluster is upgraded
Browse files Browse the repository at this point in the history
without care, a topic could be under replicated during the rolling
upgrade of the cluster. since this transient state is prolonghed, a
healthy check that would otherwise be fine after the upgrade, could
fail.

this commit delays the redpanda.healthy() check after the while cluster
is upgraded
  • Loading branch information
andijcr committed Jun 29, 2023
1 parent be61a1f commit e7d7849
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 12 deletions.
17 changes: 10 additions & 7 deletions tests/rptest/services/redpanda.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,15 +970,18 @@ def rolling_restart_nodes(self,
start_timeout=None,
stop_timeout=None,
use_maintenance_mode=True,
omit_seeds_on_idx_one=True):
omit_seeds_on_idx_one=True,
skip_redpanda_healthy_check=False):
nodes = [nodes] if isinstance(nodes, ClusterNode) else nodes
restarter = RollingRestarter(self)
restarter.restart_nodes(nodes,
override_cfg_params=override_cfg_params,
start_timeout=start_timeout,
stop_timeout=stop_timeout,
use_maintenance_mode=use_maintenance_mode,
omit_seeds_on_idx_one=omit_seeds_on_idx_one)
restarter.restart_nodes(
nodes,
override_cfg_params=override_cfg_params,
start_timeout=start_timeout,
stop_timeout=stop_timeout,
use_maintenance_mode=use_maintenance_mode,
omit_seeds_on_idx_one=omit_seeds_on_idx_one,
skip_redpanda_healthy_check=skip_redpanda_healthy_check)

def set_cluster_config(self,
values: dict,
Expand Down
10 changes: 6 additions & 4 deletions tests/rptest/services/rolling_restarter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def restart_nodes(self,
start_timeout=None,
stop_timeout=None,
use_maintenance_mode=True,
omit_seeds_on_idx_one=True):
omit_seeds_on_idx_one=True,
skip_redpanda_healthy_check=False):
"""
Performs a rolling restart on the given nodes, optionally overriding
the given configs.
Expand Down Expand Up @@ -90,9 +91,10 @@ def wait_until_cluster_healthy(timeout_sec):
backoff_sec=1,
err_msg=f"Node {node.name} draining leaderships")

wait_until(lambda: self.redpanda.healthy(),
timeout_sec=stop_timeout,
backoff_sec=1)
wait_until(
lambda: skip_redpanda_healthy_check or self.redpanda.healthy(),
timeout_sec=stop_timeout,
backoff_sec=1)

self.redpanda.stop_node(node, timeout=stop_timeout)

Expand Down
4 changes: 3 additions & 1 deletion tests/rptest/tests/redpanda_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,9 @@ def _consumer_offsets_present():
canary_nodes,
start_timeout=90,
stop_timeout=90,
use_maintenance_mode=use_maintenance_mode)
use_maintenance_mode=use_maintenance_mode,
skip_redpanda_healthy_check=True
) # do not check for under replicated partitions, since we are in a transient state for a prolonged amount of time
mid_upgrade_check({n: current_version
for n in canary_nodes}
| {n: old_version
Expand Down

0 comments on commit e7d7849

Please sign in to comment.