Skip to content

Commit

Permalink
tests/partition_balancer: update LOG_ALLOW_LIST
Browse files Browse the repository at this point in the history
Add a "raft::offset_monitor::wait_aborted" message to allow list
redpanda-data#5154 is fixed
  • Loading branch information
ztlpn committed Sep 12, 2022
1 parent c795e49 commit 5fb5ec0
Showing 1 changed file with 17 additions and 8 deletions.
25 changes: 17 additions & 8 deletions tests/rptest/tests/partition_balancer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@
# consumer timeout than the default 30 seconds
CONSUMER_TIMEOUT = 90

# TODO: remove after https://github.com/redpanda-data/redpanda/issues/5154
# is fixed
LOG_ALLOW_LIST = CHAOS_LOG_ALLOW_LIST + [
# ERROR 2022-08-05 06:32:44,034 [shard 0]
# rpc - Service handler threw an exception:
# raft::offset_monitor::wait_aborted (offset monitor wait aborted)
"rpc - .* raft::offset_monitor::wait_aborted"
]


class PartitionBalancerTest(EndToEndTest):
def __init__(self, ctx, *args, **kwargs):
Expand Down Expand Up @@ -237,7 +246,7 @@ def make_unavailable(self,
self.f_injector._start_func(self.cur_failure.type)(
self.cur_failure.node)

@cluster(num_nodes=7, log_allow_list=CHAOS_LOG_ALLOW_LIST)
@cluster(num_nodes=7, log_allow_list=LOG_ALLOW_LIST)
def test_unavailable_nodes(self):
self.start_redpanda(num_nodes=5)

Expand Down Expand Up @@ -286,7 +295,7 @@ def _throttle_recovery(self, new_value):
self.redpanda.set_cluster_config(
{"raft_learner_recovery_rate": str(new_value)})

@cluster(num_nodes=6, log_allow_list=CHAOS_LOG_ALLOW_LIST)
@cluster(num_nodes=6, log_allow_list=LOG_ALLOW_LIST)
def test_movement_cancellations(self):
self.start_redpanda(num_nodes=4)

Expand Down Expand Up @@ -334,7 +343,7 @@ def test_movement_cancellations(self):

self.run_validation(consumer_timeout_sec=CONSUMER_TIMEOUT)

@cluster(num_nodes=8, log_allow_list=CHAOS_LOG_ALLOW_LIST)
@cluster(num_nodes=8, log_allow_list=LOG_ALLOW_LIST)
def test_rack_awareness(self):
extra_rp_conf = self._extra_rp_conf | {"enable_rack_awareness": True}
self.redpanda = RedpandaService(self.test_context,
Expand Down Expand Up @@ -378,7 +387,7 @@ def check_rack_placement():
@ok_to_fail # https://github.com/redpanda-data/redpanda/issues/5154
# https://github.com/redpanda-data/redpanda/issues/6075
# https://github.com/redpanda-data/redpanda/issues/5836
@cluster(num_nodes=7, log_allow_list=CHAOS_LOG_ALLOW_LIST)
@cluster(num_nodes=7, log_allow_list=LOG_ALLOW_LIST)
def test_fuzz_admin_ops(self):
self.start_redpanda(num_nodes=5)

Expand Down Expand Up @@ -437,7 +446,7 @@ def get_node2partition_count():

@ok_to_fail # https://github.com/redpanda-data/redpanda/issues/5884
# https://github.com/redpanda-data/redpanda/issues/5980
@cluster(num_nodes=6, log_allow_list=CHAOS_LOG_ALLOW_LIST)
@cluster(num_nodes=6, log_allow_list=LOG_ALLOW_LIST)
def test_full_nodes(self):
"""
Test partition balancer full disk node handling with the following scenario:
Expand Down Expand Up @@ -534,7 +543,7 @@ def func(s):
f"disk used percentage: {int(100.0 * used_ratio)}")
assert used_ratio < 0.8

@cluster(num_nodes=7, log_allow_list=CHAOS_LOG_ALLOW_LIST)
@cluster(num_nodes=7, log_allow_list=LOG_ALLOW_LIST)
@matrix(kill_same_node=[True, False])
def test_maintenance_mode(self, kill_same_node):
"""
Expand Down Expand Up @@ -613,7 +622,7 @@ def entered_maintenance_mode(node):
self.run_validation(enable_idempotence=False,
consumer_timeout_sec=CONSUMER_TIMEOUT)

@cluster(num_nodes=7, log_allow_list=CHAOS_LOG_ALLOW_LIST)
@cluster(num_nodes=7, log_allow_list=LOG_ALLOW_LIST)
@matrix(kill_same_node=[True, False], decommission_first=[True, False])
def test_decommission(self, kill_same_node, decommission_first):
"""
Expand Down Expand Up @@ -713,7 +722,7 @@ def node_removed():
self.run_validation(enable_idempotence=False,
consumer_timeout_sec=CONSUMER_TIMEOUT)

@cluster(num_nodes=4, log_allow_list=CHAOS_LOG_ALLOW_LIST)
@cluster(num_nodes=4, log_allow_list=LOG_ALLOW_LIST)
def test_transfer_controller_leadership(self):
"""
Test that unavailability timeout is correctly restarted after controller
Expand Down

0 comments on commit 5fb5ec0

Please sign in to comment.