general perf changes alongside WDS perf (NVIDIA#3363)

* moved requested changes from webdataset_perf_1 Signed-off-by: Bartłomiej Cieślar <bcieslar2001@gmail.com> * bug fixes Signed-off-by: Bartłomiej Cieślar <bcieslar2001@gmail.com>
cyyever · Jan 23, 2022 · eded5df · eded5df
1 parent 057d74f
commit eded5df
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 19 deletions.
diff --git a/dali/pipeline/data/buffer.h b/dali/pipeline/data/buffer.h
@@ -175,13 +175,6 @@ class DLL_PUBLIC Buffer {
     return num_bytes_;
   }
 
-  /**
-   * @brief Returns the padding value of allocations caused by Resize() call
-   */
-  static inline size_t padding() {
-    return kPadding;
-  }
-
   /**
    * @brief Returns the id of the datatype of the underlying storage.
    */
@@ -394,13 +387,12 @@ class DLL_PUBLIC Buffer {
 
     if (new_num_bytes > num_bytes_) {
       size_t grow = num_bytes_ * growth_factor_;
-      grow = (grow + kPadding) & ~(kPadding - 1);
       if (grow > new_num_bytes) new_num_bytes = grow;
       reserve(new_num_bytes);
-    } else if (!is_pinned() && align_up(new_num_bytes, kPadding) < num_bytes_ * shrink_threshold_) {
+    } else if (!is_pinned() && new_num_bytes < num_bytes_ * shrink_threshold_) {
       data_.reset();
       num_bytes_ = 0;
-      reserve(align_up(new_num_bytes, kPadding));
+      reserve(new_num_bytes);
     }
   }
 
@@ -419,8 +411,6 @@ class DLL_PUBLIC Buffer {
 
   static double growth_factor_;
   static double shrink_threshold_;
-  // round to 1kB
-  static constexpr size_t kPadding = 1024;
 
   TypeInfo type_ = {};               // Data type of underlying storage
   shared_ptr<void> data_ = nullptr;  // Pointer to underlying storage
@@ -433,11 +423,11 @@ class DLL_PUBLIC Buffer {
 };
 
 template <typename Backend>
-DLL_PUBLIC double Buffer<Backend>::growth_factor_ = 1.0;
+DLL_PUBLIC double Buffer<Backend>::growth_factor_ = 1.1;
 
 template <typename Backend>
 DLL_PUBLIC double Buffer<Backend>::shrink_threshold_ =
-  std::is_same<Backend, CPUBackend>::value ? 0.9 : 0;
+  std::is_same<Backend, CPUBackend>::value ? 0.5 : 0;
 
 template <typename Backend>
 DLL_PUBLIC constexpr double Buffer<Backend>::kMaxGrowthFactor;

diff --git a/dali/pipeline/executor/executor.cc b/dali/pipeline/executor/executor.cc
@@ -318,11 +318,9 @@ void Executor<WorkspacePolicy, QueuePolicy>::RunHelper(OpNode &op_node, Workspac
     for (int i = 0; i < ws.NumOutput(); i++) {
       auto &desc = output_desc[i];
       if (ws.template OutputIsType<CPUBackend>(i)) {
-        ws.template OutputRef<CPUBackend>(i).Resize(desc.shape);
-        ws.template OutputRef<CPUBackend>(i).set_type(desc.type);
+        ws.template OutputRef<CPUBackend>(i).Resize(desc.shape, desc.type);
       } else {
-        ws.template OutputRef<GPUBackend>(i).Resize(desc.shape);
-        ws.template OutputRef<GPUBackend>(i).set_type(desc.type);
+        ws.template OutputRef<GPUBackend>(i).Resize(desc.shape, desc.type);
       }
     }
   } else {

diff --git a/dali/pipeline/pipeline_test.cc b/dali/pipeline/pipeline_test.cc
@@ -474,7 +474,7 @@ TEST_F(PipelineTestOnce, TestPresize) {
   CUDA_CALL(cudaMemcpy(&tmp, ws.Output<GPUBackend>(2).tensor<size_t>(0),
             sizeof(size_t) * 2, cudaMemcpyDefault));
   ASSERT_EQ(tmp[0], presize_val_Mixed);
-  ASSERT_EQ(tmp[1], std::max(Buffer<CPUBackend>::padding(), 2 * sizeof(size_t)));
+  ASSERT_EQ(tmp[1], 2 * sizeof(size_t));
 
   CUDA_CALL(cudaMemcpy(&tmp, ws.Output<GPUBackend>(3).tensor<size_t>(0),
             sizeof(size_t) * 2, cudaMemcpyDefault));