Skip to content

Commit

Permalink
add flag to check_kernel launch (PaddlePaddle#32692) (PaddlePaddle#32709
Browse files Browse the repository at this point in the history
)
  • Loading branch information
jeff41404 committed Apr 30, 2021
1 parent 097d5f5 commit 09adf20
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 5 deletions.
13 changes: 9 additions & 4 deletions paddle/fluid/framework/op_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ limitations under the License. */
#include <unordered_set>

#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "glog/logging.h" // For VLOG()
#include "gflags/gflags.h"
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
#include "paddle/fluid/framework/grad_op_desc_maker.h"
Expand Down Expand Up @@ -67,6 +68,8 @@ class Version;
} // namespace framework
} // namespace paddle

DECLARE_bool(check_kernel_launch);

namespace paddle {
namespace framework {

Expand Down Expand Up @@ -135,14 +138,16 @@ class OpRegistry {
};

template <typename PlaceType>
inline void CheckKernelLaunch(const char* op_type){};
inline void CheckKernelLaunch(const char* op_type) {}

#ifdef PADDLE_WITH_CUDA
template <>
inline void CheckKernelLaunch<::paddle::platform::CUDAPlace>(
const char* op_type) {
PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type);
};
if (FLAGS_check_kernel_launch) {
PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type);
}
}
#endif

template <typename PlaceType, bool at_end, size_t I, typename... KernelType>
Expand Down
13 changes: 13 additions & 0 deletions paddle/fluid/platform/flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,19 @@ DEFINE_string(tracer_mkldnn_ops_on, "",
DEFINE_string(tracer_mkldnn_ops_off, "",
"List of OneDNN operation types to be turned off");

/**
* Debug related FLAG
* Name: check_kernel_launch
* Since Version: 2.1.0
* Value Range: bool, default=false
* Example:
* Note: Check kernel launch status after every kernel compute.
*/
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DEFINE_bool(check_kernel_launch, false,
"Check kernel launch status after every kernel compute");
#endif

/**
* CUDNN related FLAG
* Name: conv2d_disable_cudnn
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/pybind/global_value_getter_setter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ DECLARE_int32(multiple_of_cupti_buffer_size);
DECLARE_bool(reader_queue_speed_test_mode);
DECLARE_int32(call_stack_level);
DECLARE_bool(sort_sum_gradient);
DECLARE_bool(check_kernel_launch);
// device management
DECLARE_int32(paddle_num_threads);
// executor
Expand Down Expand Up @@ -376,7 +377,7 @@ static void RegisterGlobalVarGetterSetter() {
FLAGS_fraction_of_gpu_memory_to_use, FLAGS_initial_gpu_memory_in_mb,
FLAGS_reallocate_gpu_memory_in_mb, FLAGS_enable_cublas_tensor_op_math,
FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce,
FLAGS_conv2d_disable_cudnn);
FLAGS_conv2d_disable_cudnn, FLAGS_check_kernel_launch);
#endif
#ifdef PADDLE_WITH_XPU
REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_xpus);
Expand Down

0 comments on commit 09adf20

Please sign in to comment.