From ed64407fe8637109d59ecb11c80e57bc98c84148 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 27 Jul 2022 15:10:33 +0100 Subject: [PATCH] tests: don't ignore shutdown hangs in failure injector Previously, a hang on SIGINT would be logged, but then the test would proceed with a node in this zombie state. Since shutdown hangs are always a bug, it is correct to fail the test as early as we can with the right reason, so just re-raise the exception instead of trying to proceed. Related: https://github.com/redpanda-data/redpanda/issues/5178 --- tests/rptest/services/failure_injector.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/rptest/services/failure_injector.py b/tests/rptest/services/failure_injector.py index 092e9d5e1eb07..f2fd280310824 100644 --- a/tests/rptest/services/failure_injector.py +++ b/tests/rptest/services/failure_injector.py @@ -40,12 +40,19 @@ def __enter__(self): def __exit__(self, type, value, traceback): self._heal_all() - def inject_failure(self, spec): + def inject_failure(self, spec, require_success=False): self.redpanda.logger.info(f"injecting failure: {spec}") try: self._start_func(spec.type)(spec.node) except Exception as e: self.redpanda.logger.info(f"injecting failure error: {e}") + if spec.type == FailureSpec.FAILURE_TERMINATE and isinstance( + e, TimeoutError): + # A timeout during termination indicates a shutdown hang in redpanda: this + # is a bug and we should fail the test on it. Otherwise we'd leave the node + # in a weird state & get some non-obvious failure later in the test, such + # as https://github.com/redpanda-data/redpanda/issues/5178 + raise finally: if spec.length is not None: if spec.length == 0: