Skip to content

Commit

Permalink
tests: fix ClusterConfigTest waits
Browse files Browse the repository at this point in the history
The wait_for_version_sync was correct for
waiting for the configuration to propagate
across the cluster, but it was _not_ correct
for waiting for the configuration status to
be symmetric on all nodes (i.e. for all nodes
to know the status of all other nodes)

For test cases that query the status via
arbitrary notes, a stricter wait is needed.

This issue becomes visible once wait_until is
improved to avoid spurious extra sleeps, in
redpanda-data#6003

Fixes redpanda-data#6010
  • Loading branch information
jcsp committed Aug 15, 2022
1 parent 8c3fb85 commit a5b9738
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
4 changes: 2 additions & 2 deletions tests/rptest/services/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,8 @@ def patch_cluster_config(self,
'remove': remove
}).json()

def get_cluster_config_status(self):
return self._request("GET", "cluster_config/status").json()
def get_cluster_config_status(self, node: ClusterNode = None):
return self._request("GET", "cluster_config/status", node=node).json()

def get_node_config(self):
return self._request("GET", "node_config").json()
Expand Down
21 changes: 18 additions & 3 deletions tests/rptest/tests/cluster_config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,21 @@ def _wait_for_version_sync(self, version):
backoff_sec=0.5,
err_msg=f"Config status versions did not converge on {version}")

def _wait_for_version_status_sync(self, version):
"""
Stricter than _wait_for_version_sync: this requires not only that
the config version has propagated to all nodes, but also that the
consequent config status (of all the peers) has propagated to all nodes.
"""
for n in self.redpanda.nodes:
wait_until(lambda: set([
n['config_version']
for n in self.admin.get_cluster_config_status(node=n)
]) == {version},
timeout_sec=10,
backoff_sec=0.5,
err_msg=f"Config status did not converge on {version}")

def _check_restart_clears(self):
"""
After changing a setting with needs_restart=true, check that
Expand Down Expand Up @@ -305,7 +320,7 @@ def test_simple_live_change(self):
patch_result = self.admin.patch_cluster_config(
upsert=dict([norestart_new_setting]))
new_version = patch_result['config_version']
self._wait_for_version_sync(new_version)
self._wait_for_version_status_sync(new_version)

assert self.admin.get_cluster_config()[
norestart_new_setting[0]] == norestart_new_setting[1]
Expand Down Expand Up @@ -387,7 +402,7 @@ def test_invalid_settings_forced(self):
[invalid_setting]),
force=True)
new_version = patch_result['config_version']
self._wait_for_version_sync(new_version)
self._wait_for_version_status_sync(new_version)

assert self.admin.get_cluster_config()[
invalid_setting[0]] == default_value
Expand Down Expand Up @@ -534,7 +549,7 @@ def test_valid_settings(self):

patch_result = self.admin.patch_cluster_config(upsert=updates,
remove=[])
self._wait_for_version_sync(patch_result['config_version'])
self._wait_for_version_status_sync(patch_result['config_version'])

def check_status(expect_restart):
# Use one node's status, they should be symmetric
Expand Down

0 comments on commit a5b9738

Please sign in to comment.