diff --git a/mmengine/hooks/logger_hook.py b/mmengine/hooks/logger_hook.py index 04e5f07294..26cd35b7d3 100644 --- a/mmengine/hooks/logger_hook.py +++ b/mmengine/hooks/logger_hook.py @@ -304,4 +304,5 @@ def after_run(self, runner) -> None: if not self.keep_local: os.remove(local_filepath) runner.logger.info(f'{local_filepath} was removed due to the ' - '`self.keep_local=False`') + '`self.keep_local=False`. You can check ' + f'the running logs in {out_filepath}') diff --git a/mmengine/runner/runner.py b/mmengine/runner/runner.py index e2506dd8d1..345c9418dd 100644 --- a/mmengine/runner/runner.py +++ b/mmengine/runner/runner.py @@ -716,6 +716,11 @@ def build_logger(self, log_cfg = dict(log_level=log_level, log_file=log_file, **kwargs) log_cfg.setdefault('name', self._experiment_name) + # `torch.compile` in PyTorch 2.0 could close all user defined handlers + # unexpectedly. Using file mode 'a' can help prevent abnormal + # termination of the FileHandler and ensure that the log file could + # be continuously updated during the lifespan of the runner. + log_cfg.setdefault('file_mode', 'a') return MMLogger.get_instance(**log_cfg) # type: ignore diff --git a/tests/test_runner/test_runner.py b/tests/test_runner/test_runner.py index a40c75317d..725b511ec7 100644 --- a/tests/test_runner/test_runner.py +++ b/tests/test_runner/test_runner.py @@ -1745,6 +1745,14 @@ def test_train_with_compile(self): runner = Runner.from_cfg(cfg) runner.train() + runner._maybe_compile('train_step') + # PyTorch 2.0.0 could close the FileHandler after calling of + # ``torch.compile``. So we need to test our file handler still works. + with open(osp.join(f'{runner.log_dir}', + f'{runner.timestamp}.log')) as f: + last_line = f.readlines()[-1] + self.assertTrue(last_line.endswith('please be patient.\n')) + def test_val(self): cfg = copy.deepcopy(self.epoch_based_cfg) cfg.experiment_name = 'test_val1'