Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix inconsistent calls to nvml::Init and nvml::Shutdown #5317

Merged
merged 8 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions dali/core/mm/malloc_resource.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,8 @@ cuda_malloc_async_memory_resource::cuda_malloc_async_memory_resource(int device_
dummy_host_stream_ = CUDAStreamPool::instance().Get(device_id_);
#if NVML_ENABLED
static const float driverVersion = []() {
nvml::Init();
auto nvml_handle = nvml::NvmlInstance::CreateNvmlInstance();
auto ret = nvml::GetDriverVersion();
nvml::Shutdown();
return ret;
}();
if (driverVersion < 470.60) {
Expand Down
9 changes: 5 additions & 4 deletions dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ class nvJPEGDecoder : public StatelessOperator<MixedBackend>, CachedDecoderImpl
// disable HW decoder for drivers < 455.x as the memory pool for it is not available
// and multi GPU performance is far from perfect due to frequent memory allocations
#if NVML_ENABLED
nvml::Init();
nvml_handle_ = nvml::NvmlInstance::CreateNvmlInstance();
float driverVersion = nvml::GetDriverVersion();
if (driverVersion < 455) {
try_init_hw_decoder = false,
Expand Down Expand Up @@ -297,9 +297,6 @@ class nvJPEGDecoder : public StatelessOperator<MixedBackend>, CachedDecoderImpl
try {
DeviceGuard g(device_id_);

#if NVML_ENABLED
nvml::Shutdown();
#endif
if (hw_decode_stream_)
CUDA_CALL(cudaStreamSynchronize(hw_decode_stream_));

Expand Down Expand Up @@ -1186,6 +1183,10 @@ class nvJPEGDecoder : public StatelessOperator<MixedBackend>, CachedDecoderImpl
int64_t task_priority_seq_ = 0;
unsigned int num_hw_engines_ = 1;
unsigned int num_hw_cores_per_engine_ = 1;

#if NVML_ENABLED
nvml::NvmlInstance nvml_handle_;
#endif
};

} // namespace dali
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ TYPED_TEST(nvjpegDecodeDecoupledAPITest, TestSingleTiffDecode4T) {
#if NVJPEG_VER_MAJOR >= 11 && NVML_ENABLED
void PrintDeviceInfo() {
unsigned int device_count;
nvml::Init();
auto nvml_handle = nvml::NvmlInstance::CreateNvmlInstance();
CUDA_CALL(nvmlDeviceGetCount_v2(&device_count));
for (unsigned int device_idx = 0; device_idx < device_count; device_idx++) {
auto info = nvml::GetDeviceInfo(device_idx);
Expand All @@ -195,7 +195,6 @@ void PrintDeviceInfo() {
<< " cc_m " << info.cap_minor
<< std::endl;
}
nvml::Shutdown();
}

/**
Expand All @@ -204,9 +203,8 @@ void PrintDeviceInfo() {
bool ShouldUseHwDecoder() {
// HW decoder is disabled for drivers < 455.x, see
// dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h for details
nvml::Init();
auto nvml_handle = nvml::NvmlInstance::CreateNvmlInstance();
static float driver_version = nvml::GetDriverVersion();
nvml::Shutdown();
static bool device_supports_hw_decoder = nvml::isHWDecoderSupported();
return device_supports_hw_decoder && driver_version >= 455;
}
Expand Down
3 changes: 1 addition & 2 deletions dali/operators/reader/gds_mem_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,8 @@ void SkipIfIncompatible(TestBody &&body) {
// skip test for aarch64 and CUDA < 12.2
#if NVML_ENABLED
static const int driverVersion = []() {
nvml::Init();
auto nvml_handle = nvml::NvmlInstance::CreateNvmlInstance();
auto ret = nvml::GetCudaDriverVersion();
nvml::Shutdown();
return ret;
}();
#if defined(__aarch64__)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,8 @@ void VideoSampleGpu::Decode() {
void VideoLoaderDecoderGpu::InitCudaStream() {
#if NVML_ENABLED
{
nvml::Init();
auto nvml_handle = nvml::NvmlInstance::CreateNvmlInstance();
static float driver_version = nvml::GetDriverVersion();
nvml::Shutdown();
if (driver_version > 460 && driver_version < 470.21) {
DALI_WARN_ONCE("Warning: Decoding on a default stream. Performance may be affected.");
return;
Expand Down
10 changes: 2 additions & 8 deletions dali/operators/reader/nvdecoder/nvdecoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@
#include "dali/core/error_handling.h"
#include "dali/operators/reader/nvdecoder/imgproc.h"
#include "dali/core/device_guard.h"
#include "dali/util/nvml.h"

namespace dali {

static constexpr int kNvcuvid_success = 1;
Expand Down Expand Up @@ -64,7 +62,7 @@ NvDecoder::NvDecoder(int device_id,
bool use_default_stream = false;
#if NVML_ENABLED
{
nvml::Init();
nvml_handle_ = nvml::NvmlInstance::CreateNvmlInstance();
static float driver_version = nvml::GetDriverVersion();
if (driver_version > 460 && driver_version < 470.21)
use_default_stream = true;
Expand Down Expand Up @@ -135,11 +133,7 @@ bool NvDecoder::initialized() const {
return parser_.initialized();
}

NvDecoder::~NvDecoder() {
#if NVML_ENABLED
nvml::Shutdown();
#endif
}
NvDecoder::~NvDecoder() {}

VidReqStatus NvDecoder::decode_av_packet(AVPacket* avpkt, int64_t start_time,
AVRational stream_base) {
Expand Down
7 changes: 7 additions & 0 deletions dali/operators/reader/nvdecoder/nvdecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ extern "C" {
#include "dali/operators/reader/nvdecoder/cuvideodecoder.h"
#include "dali/operators/reader/nvdecoder/dynlink_nvcuvid.h"
#include "dali/util/thread_safe_queue.h"
#if NVML_ENABLED
#include "dali/util/nvml.h"
#endif


struct AVPacket;
#if HAVE_AVSTREAM_CODECPAR
Expand Down Expand Up @@ -230,6 +234,9 @@ class NvDecoder {
std::exception_ptr captured_exception_;

std::thread thread_convert_;
#if NVML_ENABLED
nvml::NvmlInstance nvml_handle_;
#endif
};

} // namespace dali
Expand Down
3 changes: 1 addition & 2 deletions dali/operators/reader/video_reader_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,8 @@ TEST_F(VideoReaderTest, MultipleVideoResolution) {
float driverVersion = 0;

#if NVML_ENABLED
nvml::Init();
auto nvml_handle = nvml::NvmlInstance::CreateNvmlInstance();
driverVersion = nvml::GetDriverVersion();
nvml::Shutdown();
#endif


Expand Down
13 changes: 7 additions & 6 deletions dali/operators/sequence/optical_flow/optical_flow.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,12 @@ class OpticalFlow : public StatelessOperator<Backend> {
std::to_string(spec.NumInput()));
sync_ = CUDAEvent::Create(device_id_);
#if NVML_ENABLED
nvml::Init();
nvml_handle_ = nvml::NvmlInstance::CreateNvmlInstance();
#endif
}

~OpticalFlow() {
#if NVML_ENABLED
nvml::Shutdown();
#endif
}
~OpticalFlow() {}

DISABLE_COPY_MOVE_ASSIGN(OpticalFlow);

protected:
Expand Down Expand Up @@ -230,6 +227,10 @@ class OpticalFlow : public StatelessOperator<Backend> {
std::vector<int> sequence_sizes_;
std::vector<DimsOrder> processing_order_;
CUDAEvent sync_;

#if NVML_ENABLED
nvml::NvmlInstance nvml_handle_;
#endif
};

} // namespace dali
Expand Down
5 changes: 1 addition & 4 deletions dali/pipeline/util/thread_pool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ ThreadPool::ThreadPool(int num_thread, int device_id, bool set_affinity, const c
#if NVML_ENABLED
// only for the CPU pipeline
if (device_id != CPU_ONLY_DEVICE_ID) {
nvml::Init();
nvml_handle_ = nvml::NvmlInstance::CreateNvmlInstance();
}
#endif
// Start the threads in the main loop
Expand All @@ -54,9 +54,6 @@ ThreadPool::~ThreadPool() {
for (auto &thread : threads_) {
thread.join();
}
#if NVML_ENABLED
nvml::Shutdown();
#endif
}

void ThreadPool::AddWork(Work work, int64_t priority, bool start_immediately) {
Expand Down
6 changes: 6 additions & 0 deletions dali/pipeline/util/thread_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
#include <vector>
#include <string>
#include "dali/core/common.h"
#if NVML_ENABLED
#include "dali/util/nvml.h"
#endif


namespace dali {
Expand Down Expand Up @@ -93,6 +96,9 @@ class DLL_PUBLIC ThreadPool {

// Stored error strings for each thread
vector<std::queue<string>> tl_errors_;
#if NVML_ENABLED
nvml::NvmlInstance nvml_handle_;
#endif
};

} // namespace dali
Expand Down
8 changes: 4 additions & 4 deletions dali/pipeline/util/worker_thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class WorkerThread {
running_(true), work_complete_(true), barrier_(2) {
#if NVML_ENABLED
if (device_id != CPU_ONLY_DEVICE_ID) {
nvml::Init();
nvml_handle_ = nvml::NvmlInstance::CreateNvmlInstance();
}
#endif
thread_ = std::thread(&WorkerThread::ThreadMain,
Expand All @@ -80,9 +80,6 @@ class WorkerThread {

inline ~WorkerThread() {
Shutdown();
#if NVML_ENABLED
nvml::Shutdown();
#endif
}

/*
Expand Down Expand Up @@ -236,6 +233,9 @@ class WorkerThread {
std::queue<string> errors_;

Barrier barrier_;
#if NVML_ENABLED
nvml::NvmlInstance nvml_handle_;
#endif
};

} // namespace dali
Expand Down
39 changes: 39 additions & 0 deletions dali/util/nvml.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,45 @@ inline void Shutdown() {
CUDA_CALL(nvmlShutdown());
}


class NvmlInstance {
public:
static NvmlInstance CreateNvmlInstance() {
return NvmlInstance(true);
}

explicit NvmlInstance(bool init = false) {
if (init) {
Init();
is_created_ = true;
}
}

NvmlInstance(const NvmlInstance &) = delete;

NvmlInstance &operator=(const NvmlInstance &) = delete;

inline NvmlInstance(NvmlInstance &&other) : is_created_(other.is_created_) {
other.is_created_ = false;
}

inline NvmlInstance &operator=(NvmlInstance &&other) {
std::swap(is_created_, other.is_created_);
other.~NvmlInstance();
stiepan marked this conversation as resolved.
Show resolved Hide resolved
return *this;
}

~NvmlInstance() {
if (is_created_) {
Shutdown();
is_created_ = false;
}
}

private:
bool is_created_ = false;
};

/**
* Checks, whether CUDA11-proper NVML functions have been successfully loaded
*/
Expand Down
1 change: 1 addition & 0 deletions qa/leak.sup
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ leak:dali::kernels::DynamicScratchpad::~DynamicScratchpad
leak:dali::kernels::DynamicScratchpad::AllocImpl
leak:dali::mm::GPUHog::init()
leak:dali::mm::detail::fixed_size_allocator
leak:nvmlInitWithFlags
# no idea how to suppress them other than as below but they are not caused by DALI
# still there is some danger that any of the below functions appear in a valid leak
leak:std::string::_Rep::_S_create
Expand Down
Loading