tests/partition_balancer: update LOG_ALLOW_LIST

Add a "raft::offset_monitor::wait_aborted" message to allow list redpanda-data#5154 is fixed
ztlpn · Sep 12, 2022 · 5fb5ec0 · 5fb5ec0
1 parent c795e49
commit 5fb5ec0
Showing 1 changed file with 17 additions and 8 deletions.
diff --git a/tests/rptest/tests/partition_balancer_test.py b/tests/rptest/tests/partition_balancer_test.py
@@ -31,6 +31,15 @@
 # consumer timeout than the default 30 seconds
 CONSUMER_TIMEOUT = 90
 
+# TODO: remove after https://github.com/redpanda-data/redpanda/issues/5154
+# is fixed
+LOG_ALLOW_LIST = CHAOS_LOG_ALLOW_LIST + [
+    # ERROR 2022-08-05 06:32:44,034 [shard 0]
+    # rpc - Service handler threw an exception:
+    # raft::offset_monitor::wait_aborted (offset monitor wait aborted)
+    "rpc - .* raft::offset_monitor::wait_aborted"
+]
+
 
 class PartitionBalancerTest(EndToEndTest):
     def __init__(self, ctx, *args, **kwargs):
@@ -237,7 +246,7 @@ def make_unavailable(self,
             self.f_injector._start_func(self.cur_failure.type)(
                 self.cur_failure.node)
 
-    @cluster(num_nodes=7, log_allow_list=CHAOS_LOG_ALLOW_LIST)
+    @cluster(num_nodes=7, log_allow_list=LOG_ALLOW_LIST)
     def test_unavailable_nodes(self):
         self.start_redpanda(num_nodes=5)
 
@@ -286,7 +295,7 @@ def _throttle_recovery(self, new_value):
         self.redpanda.set_cluster_config(
             {"raft_learner_recovery_rate": str(new_value)})
 
-    @cluster(num_nodes=6, log_allow_list=CHAOS_LOG_ALLOW_LIST)
+    @cluster(num_nodes=6, log_allow_list=LOG_ALLOW_LIST)
     def test_movement_cancellations(self):
         self.start_redpanda(num_nodes=4)
 
@@ -334,7 +343,7 @@ def test_movement_cancellations(self):
 
             self.run_validation(consumer_timeout_sec=CONSUMER_TIMEOUT)
 
-    @cluster(num_nodes=8, log_allow_list=CHAOS_LOG_ALLOW_LIST)
+    @cluster(num_nodes=8, log_allow_list=LOG_ALLOW_LIST)
     def test_rack_awareness(self):
         extra_rp_conf = self._extra_rp_conf | {"enable_rack_awareness": True}
         self.redpanda = RedpandaService(self.test_context,
@@ -378,7 +387,7 @@ def check_rack_placement():
     @ok_to_fail  # https://github.com/redpanda-data/redpanda/issues/5154
     # https://github.com/redpanda-data/redpanda/issues/6075
     # https://github.com/redpanda-data/redpanda/issues/5836
-    @cluster(num_nodes=7, log_allow_list=CHAOS_LOG_ALLOW_LIST)
+    @cluster(num_nodes=7, log_allow_list=LOG_ALLOW_LIST)
     def test_fuzz_admin_ops(self):
         self.start_redpanda(num_nodes=5)
 
@@ -437,7 +446,7 @@ def get_node2partition_count():
 
     @ok_to_fail  # https://github.com/redpanda-data/redpanda/issues/5884
     # https://github.com/redpanda-data/redpanda/issues/5980
-    @cluster(num_nodes=6, log_allow_list=CHAOS_LOG_ALLOW_LIST)
+    @cluster(num_nodes=6, log_allow_list=LOG_ALLOW_LIST)
     def test_full_nodes(self):
         """
         Test partition balancer full disk node handling with the following scenario:
@@ -534,7 +543,7 @@ def func(s):
                 f"disk used percentage: {int(100.0 * used_ratio)}")
             assert used_ratio < 0.8
 
-    @cluster(num_nodes=7, log_allow_list=CHAOS_LOG_ALLOW_LIST)
+    @cluster(num_nodes=7, log_allow_list=LOG_ALLOW_LIST)
     @matrix(kill_same_node=[True, False])
     def test_maintenance_mode(self, kill_same_node):
         """
@@ -613,7 +622,7 @@ def entered_maintenance_mode(node):
         self.run_validation(enable_idempotence=False,
                             consumer_timeout_sec=CONSUMER_TIMEOUT)
 
-    @cluster(num_nodes=7, log_allow_list=CHAOS_LOG_ALLOW_LIST)
+    @cluster(num_nodes=7, log_allow_list=LOG_ALLOW_LIST)
     @matrix(kill_same_node=[True, False], decommission_first=[True, False])
     def test_decommission(self, kill_same_node, decommission_first):
         """
@@ -713,7 +722,7 @@ def node_removed():
         self.run_validation(enable_idempotence=False,
                             consumer_timeout_sec=CONSUMER_TIMEOUT)
 
-    @cluster(num_nodes=4, log_allow_list=CHAOS_LOG_ALLOW_LIST)
+    @cluster(num_nodes=4, log_allow_list=LOG_ALLOW_LIST)
     def test_transfer_controller_leadership(self):
         """
         Test that unavailability timeout is correctly restarted after controller