Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function to disable paddle signal handler #34577

Merged
merged 4 commits into from
Aug 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions paddle/fluid/platform/init.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <csignal>
#include <fstream>
#include <string>

Expand Down Expand Up @@ -245,15 +246,16 @@ void InitDevices(const std::vector<int> devices) {
// Description Quoted from
// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/signal.h.html
const struct {
int signal_number;
const char *name;
const char *error_string;
} SignalErrorStrings[] = {
{"SIGSEGV", "Segmentation fault"},
{"SIGILL", "Illegal instruction"},
{"SIGFPE", "Erroneous arithmetic operation"},
{"SIGABRT", "Process abort signal"},
{"SIGBUS", "Access to an undefined portion of a memory object"},
{"SIGTERM", "Termination signal"},
{SIGSEGV, "SIGSEGV", "Segmentation fault"},
{SIGILL, "SIGILL", "Illegal instruction"},
{SIGFPE, "SIGFPE", "Erroneous arithmetic operation"},
{SIGABRT, "SIGABRT", "Process abort signal"},
{SIGBUS, "SIGBUS", "Access to an undefined portion of a memory object"},
{SIGTERM, "SIGTERM", "Termination signal"},
};

bool StartsWith(const char *str, const char *prefix) {
Expand Down Expand Up @@ -319,7 +321,21 @@ void SignalHandle(const char *data, int size) {
// will Kill program by the default signal handler
}
}
#endif // _WIN32

void DisableSignalHandler() {
#ifndef _WIN32
for (size_t i = 0;
i < (sizeof(SignalErrorStrings) / sizeof(*(SignalErrorStrings))); ++i) {
int signal_number = SignalErrorStrings[i].signal_number;
struct sigaction sig_action;
memset(&sig_action, 0, sizeof(sig_action));
sigemptyset(&sig_action.sa_mask);
sig_action.sa_handler = SIG_DFL;
sigaction(signal_number, &sig_action, NULL);
}
#endif
}

#ifdef WITH_WIN_DUMP_DBG
typedef BOOL(WINAPI *MINIDUMP_WRITE_DUMP)(
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/platform/init.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,7 @@ class SignalMessageDumper {
void SignalHandle(const char* data, int size);
#endif

void DisableSignalHandler();

} // namespace framework
} // namespace paddle
2 changes: 2 additions & 0 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,8 @@ PYBIND11_MODULE(core_noavx, m) {

m.def("set_num_threads", &platform::SetNumThreads);

m.def("disable_signal_handler", &DisableSignalHandler);

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
m.def("cudnn_version", &platform::CudnnVersion);
#endif
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@
from .device import get_device # noqa: F401
from .fluid.framework import is_compiled_with_cuda # noqa: F401
from .fluid.framework import is_compiled_with_rocm # noqa: F401
from .fluid.framework import disable_signal_handler # noqa: F401
from .device import is_compiled_with_xpu # noqa: F401
from .device import is_compiled_with_npu # noqa: F401
from .device import XPUPlace # noqa: F401
Expand Down Expand Up @@ -483,6 +484,7 @@
'enable_static',
'scatter_nd',
'set_default_dtype',
'disable_signal_handler',
'expand_as',
'stack',
'sqrt',
Expand Down
25 changes: 25 additions & 0 deletions python/paddle/fluid/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,31 @@ def is_compiled_with_xpu():
return core.is_compiled_with_xpu()


def disable_signal_handler():
"""
Reset signal handler registered by Paddle.

Paddle installs signal handlers at C++ level to log debug information upon failing.
However, conflicts can happen if another python module is making use of such signal.
Such being the case, one may disblae paddle signal handler via this interface.

Known frameworks that require disabling signal handler includes:
1. TVM
2. ADLIK

Make sure you called paddle.disable_signal_handler() before using above mentioned frameworks.

Returns: None

Examples:
.. code-block:: python

import paddle
paddle.disable_signal_handler()
"""
core.disable_signal_handler()


def is_compiled_with_cuda():
"""
Whether this whl package can be used to run the model on GPU.
Expand Down
1 change: 1 addition & 0 deletions python/paddle/fluid/tests/unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
LIST(REMOVE_ITEM TEST_OPS test_memcpy_op)
LIST(REMOVE_ITEM TEST_OPS test_raw_program_optimizer)
LIST(REMOVE_ITEM TEST_OPS test_fleet_gradient_scale)
LIST(REMOVE_ITEM TEST_OPS test_disable_signal_handler)
endif()

if(WIN32)
Expand Down
48 changes: 48 additions & 0 deletions python/paddle/fluid/tests/unittests/test_disable_signal_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import unittest
import numpy as np
import signal, os
import paddle
import subprocess

SignalsToTest = {
signal.SIGTERM, signal.SIGBUS, signal.SIGABRT, signal.SIGSEGV,
signal.SIGILL, signal.SIGFPE
}


class TestSignOpError(unittest.TestCase):
def test_errors(self):
try:
for sig in SignalsToTest:
output = subprocess.check_output(
[
"python", "-c",
f"import paddle; import signal,os; paddle.disable_signal_handler(); os.kill(os.getpid(), {sig})"
],
stderr=subprocess.STDOUT)
except Exception as e:
# If paddle signal handler is enabled
# One would expect "paddle::framework::SignalHandle" in STDERR
stdout_message = str(e.output)
if "paddle::framework::SignalHandle" in stdout_message:
raise Exception("Paddle signal handler not disabled")


if __name__ == "__main__":
unittest.main()