Fix bugs in C API and refactor tests (NVIDIA#3350)

Fix `daliDeserializeDefault` - the batch size map was not allocated and when the pipeline handle was correctly cleaned, it tried to deallocate memory under invalid pointer. Add missing `daliDeletePipeline` in all C API tests so the tests no longer leak the created pipeline. Some tests (daliOutputCopySamples) used to fill the prefetch queue and schedule additional runs, whilst accessing only one output iteration. The not-freed pipeline could be actively using the memory when the memory resources were cleared on shutdown and the whole test process could crash during shutdown. Remove the additional unused iteration in such case. Rework tests to not use access to underlying contiguous buffer of TensorList. Signed-off-by: Krzysztof Lecki <klecki@nvidia.com>
cyyever · Jan 23, 2022 · 48132ec · 48132ec
1 parent 6d26f82
commit 48132ec
Show file tree

Hide file tree

Showing 3 changed files with 126 additions and 79 deletions.
diff --git a/dali/c_api/c_api.cc b/dali/c_api/c_api.cc
@@ -41,7 +41,7 @@ bool dali_initialized = false;
  * Typically, this operator will be BatchSizeProvider.
  * Negative values denote max batch size (default state).
  * Typical usage:
- * auto batch_sizes_map = reinterpret_cast<batch_size_map_t*>(handle->batch_sizes_map);
+ * auto *batch_size_map = reinterpret_cast<batch_size_map_t *>(handle->batch_size_map);
  */
 using batch_size_map_t = std::unordered_map<std::string /* op_name */, int /* batch_size */>;
 
@@ -80,7 +80,7 @@ void SetExternalInput(daliPipelineHandle *pipe_handle, const char *name, const v
                       dali_data_type_t data_type, const int64_t *shapes, int sample_dim,
                       const char *layout_str, cudaStream_t stream = 0, unsigned int flags = 0) {
   dali::Pipeline *pipeline = reinterpret_cast<dali::Pipeline *>(pipe_handle->pipe);
-  auto bs_map = reinterpret_cast<batch_size_map_t *>(pipe_handle->batch_sizes_map);
+  auto *bs_map = reinterpret_cast<batch_size_map_t *>(pipe_handle->batch_size_map);
   auto curr_batch_size = PopCurrBatchSize(bs_map, pipeline->max_batch_size(), name);
   std::vector<int64_t> shapes_tmp(shapes, shapes + sample_dim * curr_batch_size);
   dali::TensorListShape<> tl_shape(std::move(shapes_tmp), curr_batch_size, sample_dim);
@@ -111,7 +111,7 @@ void SetExternalInputTensors(daliPipelineHandle *pipe_handle, const char *name,
                              const int64_t *shapes, int64_t sample_dim, const char *layout_str,
                              cudaStream_t stream = 0, unsigned int flags = 0) {
   dali::Pipeline *pipeline = reinterpret_cast<dali::Pipeline *>(pipe_handle->pipe);
-  auto bs_map = reinterpret_cast<batch_size_map_t *>(pipe_handle->batch_sizes_map);
+  auto *bs_map = reinterpret_cast<batch_size_map_t *>(pipe_handle->batch_size_map);
   auto curr_batch_size = PopCurrBatchSize(bs_map, pipeline->max_batch_size(), name);
   std::vector<int64_t> shapes_tmp(shapes, shapes + sample_dim * curr_batch_size);
   dali::TensorListShape<> tl_shape(std::move(shapes_tmp), curr_batch_size, sample_dim);
@@ -177,12 +177,12 @@ void daliCreatePipeline(daliPipelineHandle *pipe_handle, const char *serialized_
   if (pipeline->device_id() >= 0) {
     stream = dali::CUDAStream::Create(true);
   }
+  auto bs_map = std::make_unique<batch_size_map_t>();
+
   pipe_handle->ws = ws.release();
   pipe_handle->copy_stream = stream.release();
   pipe_handle->pipe = pipeline.release();
-
-  auto bs_map = std::make_unique<batch_size_map_t>();
-  pipe_handle->batch_sizes_map = bs_map.release();
+  pipe_handle->batch_size_map = bs_map.release();
 }
 
 
@@ -195,9 +195,11 @@ void daliDeserializeDefault(daliPipelineHandle *pipe_handle, const char *seriali
     stream = dali::CUDAStream::Create(true);
   }
   auto ws = std::make_unique<dali::DeviceWorkspace>();
+  auto bs_map = std::make_unique<batch_size_map_t>();
   pipe_handle->ws = ws.release();
   pipe_handle->copy_stream = stream.release();
   pipe_handle->pipe = pipeline.release();
+  pipe_handle->batch_size_map = bs_map.release();
 }
 
 
@@ -225,7 +227,7 @@ void daliPrefetchSeparate(daliPipelineHandle *pipe_handle,
 
 void daliSetExternalInputBatchSize(daliPipelineHandle *pipe_handle, const char *name,
                                    int batch_size) {
-  auto *bs_map = reinterpret_cast<batch_size_map_t *>(pipe_handle->batch_sizes_map);
+  auto *bs_map = reinterpret_cast<batch_size_map_t *>(pipe_handle->batch_size_map);
   (*bs_map)[name] = batch_size;
 }
 
@@ -541,7 +543,7 @@ void daliCopyTensorListNTo(daliPipelineHandle *pipe_handle, void *dst, int outpu
 void daliDeletePipeline(daliPipelineHandle* pipe_handle) {
   dali::Pipeline *pipeline = reinterpret_cast<dali::Pipeline *>(pipe_handle->pipe);
   dali::DeviceWorkspace *ws = reinterpret_cast<dali::DeviceWorkspace *>(pipe_handle->ws);
-  auto *bs_map = reinterpret_cast<batch_size_map_t *>(pipe_handle->batch_sizes_map);
+  auto *bs_map = reinterpret_cast<batch_size_map_t *>(pipe_handle->batch_size_map);
   DALI_ENFORCE(pipeline != nullptr && ws != nullptr, "Pipeline already deleted");
   if (pipe_handle->copy_stream) {
     CUDA_CALL(cudaStreamDestroy(pipe_handle->copy_stream));
@@ -552,7 +554,7 @@ void daliDeletePipeline(daliPipelineHandle* pipe_handle) {
   delete bs_map;
   pipe_handle->ws = nullptr;
   pipe_handle->pipe = nullptr;
-  pipe_handle->batch_sizes_map = nullptr;
+  pipe_handle->batch_size_map = nullptr;
 }
 
 void daliLoadLibrary(const char* lib_path) {