From 190a3a0a2614b67ccd6550eb5d6e60d2b1d22b0a Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 27 Jul 2022 15:10:33 +0100 Subject: [PATCH] tests: don't ignore shutdown hangs in failure injector Previously, a hang on SIGINT would be logged, but then the test would proceed with a node in this zombie state. Since shutdown hangs are always a bug, it is correct to fail the test as early as we can with the right reason, so just re-raise the exception instead of trying to proceed. Related: https://github.com/redpanda-data/redpanda/issues/5178 --- tests/rptest/services/failure_injector.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/rptest/services/failure_injector.py b/tests/rptest/services/failure_injector.py index 092e9d5e1eb0..5e421e78ddf6 100644 --- a/tests/rptest/services/failure_injector.py +++ b/tests/rptest/services/failure_injector.py @@ -46,6 +46,13 @@ def inject_failure(self, spec): self._start_func(spec.type)(spec.node) except Exception as e: self.redpanda.logger.info(f"injecting failure error: {e}") + if spec.type == FailureSpec.FAILURE_TERMINATE and isinstance( + e, TimeoutError): + # A timeout during termination indicates a shutdown hang in redpanda: this + # is a bug and we should fail the test on it. Otherwise we'd leave the node + # in a weird state & get some non-obvious failure later in the test, such + # as https://github.com/redpanda-data/redpanda/issues/5178 + raise finally: if spec.length is not None: if spec.length == 0: