dmlc · mfbalin · Jul 16, 2024 · Jul 15, 2024 · Jul 16, 2024 · Jul 16, 2024
diff --git a/graphbolt/include/graphbolt/async.h b/graphbolt/include/graphbolt/async.h
@@ -0,0 +1,61 @@
+/**
+ *   Copyright (c) 2024, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
+ *   All rights reserved.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ *
+ * @file graphbolt/async.h
+ * @brief Provides asynchronous task utilities for GraphBolt.
+ */
+#ifndef GRAPHBOLT_ASYNC_H_
+#define GRAPHBOLT_ASYNC_H_
+
+#include <ATen/Parallel.h>
+#include <torch/script.h>
+
+#include <future>
+#include <type_traits>
+
+namespace graphbolt {
+
+template <typename T>
+class Future : public torch::CustomClassHolder {
+ public:
+  Future(std::future<T>&& future) : future_(std::move(future)) {}
+
+  Future() = default;
+
+  T Wait() { return future_.get(); }
+
+ private:
+  std::future<T> future_;
+};
+
+template <typename F>
+auto async(F function) {
+  using T = decltype(function());
+  auto promise = std::make_shared<std::promise<T>>();
+  auto future = promise->get_future();
+  at::launch([=]() {
+    if constexpr (std::is_void_v<T>) {
+      function();
+      promise->set_value();
+    } else
+      promise->set_value(function());
+  });
+  return c10::make_intrusive<Future<T>>(std::move(future));
+}
+
+}  // namespace graphbolt
+
+#endif  // GRAPHBOLT_ASYNC_H_
diff --git a/graphbolt/src/cnumpy.cc b/graphbolt/src/cnumpy.cc
@@ -125,8 +125,15 @@ c10::intrusive_ptr<Future<torch::Tensor>> OnDiskNpyArray::IndexSelect(
 }
 
 #ifdef HAVE_LIBRARY_LIBURING
-void OnDiskNpyArray::IndexSelectIOUringImpl(
-    torch::Tensor index, torch::Tensor result) {
+torch::Tensor OnDiskNpyArray::IndexSelectIOUringImpl(torch::Tensor index) {
+  std::vector<int64_t> shape(index.sizes().begin(), index.sizes().end());
+  shape.insert(shape.end(), feature_dim_.begin() + 1, feature_dim_.end());
+  auto result = torch::empty(
+      shape, index.options()
+                 .dtype(dtype_)
+                 .layout(torch::kStrided)
+                 .pinned_memory(index.is_pinned())
+                 .requires_grad(false));
   auto result_buffer = reinterpret_cast<char *>(result.data_ptr());
 
   // Indicator for index error.
@@ -209,24 +216,12 @@ void OnDiskNpyArray::IndexSelectIOUringImpl(
     throw std::runtime_error("IndexError: Index out of range.");
   }
 
-  // return result; the input result parameter is the return value of this func.
+  return result;
 }
 
 c10::intrusive_ptr<Future<torch::Tensor>> OnDiskNpyArray::IndexSelectIOUring(
     torch::Tensor index) {
-  std::vector<int64_t> shape;
-  shape.push_back(index.numel());
-  shape.insert(shape.end(), feature_dim_.begin() + 1, feature_dim_.end());
-  auto result = torch::empty(
-      shape, index.options()
-                 .dtype(dtype_)
-                 .layout(torch::kStrided)
-                 .requires_grad(false));
-
-  auto future = at::intraop_launch_future(
-      [=]() { IndexSelectIOUringImpl(index, result); });
-
-  return c10::make_intrusive<Future<torch::Tensor>>(future, result);
+  return async([=] { return IndexSelectIOUringImpl(index); });
 }
 
 #endif  // HAVE_LIBRARY_LIBURING

diff --git a/graphbolt/src/cnumpy.h b/graphbolt/src/cnumpy.h
@@ -6,6 +6,7 @@
  */
 
 #include <fcntl.h>
+#include <graphbolt/async.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <torch/script.h>
@@ -26,24 +27,6 @@
 namespace graphbolt {
 namespace storage {
 
-template <typename T>
-class Future : public torch::CustomClassHolder {
- public:
-  Future(c10::intrusive_ptr<c10::ivalue::Future> future, T value)
-      : future_(future), value_(value) {}
-
-  Future() = default;
-
-  T Wait() {
-    future_->waitAndThrow();
-    return value_;
-  }
-
- private:
-  c10::intrusive_ptr<c10::ivalue::Future> future_;
-  T value_;
-};
-
 /**
  * @brief Disk Numpy Fetecher class.
  */
@@ -100,7 +83,7 @@ class OnDiskNpyArray : public torch::CustomClassHolder {
   c10::intrusive_ptr<Future<torch::Tensor>> IndexSelectIOUring(
       torch::Tensor index);
 
-  void IndexSelectIOUringImpl(torch::Tensor index, torch::Tensor result);
+  torch::Tensor IndexSelectIOUringImpl(torch::Tensor index);
 
 #endif  // HAVE_LIBRARY_LIBURING
  private:

diff --git a/graphbolt/src/python_binding.cc b/graphbolt/src/python_binding.cc
@@ -42,8 +42,11 @@ TORCH_LIBRARY(graphbolt, m) {
           "original_edge_ids", &FusedSampledSubgraph::original_edge_ids)
       .def_readwrite("type_per_edge", &FusedSampledSubgraph::type_per_edge)
       .def_readwrite("etype_offsets", &FusedSampledSubgraph::etype_offsets);
-  m.class_<storage::Future<torch::Tensor>>("TensorFuture")
-      .def("wait", &storage::Future<torch::Tensor>::Wait);
+  m.class_<Future<void>>("VoidFuture").def("wait", &Future<void>::Wait);
+  m.class_<Future<torch::Tensor>>("TensorFuture")
+      .def("wait", &Future<torch::Tensor>::Wait);
+  m.class_<Future<std::vector<torch::Tensor>>>("TensorListFuture")
+      .def("wait", &Future<std::vector<torch::Tensor>>::Wait);
   m.class_<storage::OnDiskNpyArray>("OnDiskNpyArray")
       .def("index_select", &storage::OnDiskNpyArray::IndexSelect);
   m.class_<FusedCSCSamplingGraph>("FusedCSCSamplingGraph")