From c9945a9dc3ce8b347c3d101146696fea1bef283c Mon Sep 17 00:00:00 2001
From: Muhammed Fatih Balin <m.f.balin@gmail.com>
Date: Sat, 27 Jul 2024 21:44:53 -0400
Subject: [PATCH] stop using `PinMemory`.

---
 python/dgl/graphbolt/dataloader.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/python/dgl/graphbolt/dataloader.py b/python/dgl/graphbolt/dataloader.py
index 516f85070ae8..9aba0b2d30f8 100644
--- a/python/dgl/graphbolt/dataloader.py
+++ b/python/dgl/graphbolt/dataloader.py
@@ -224,10 +224,10 @@ def __init__(
                     ),
                 )
 
-        # (4) Cut datapipe at CopyTo and wrap with PinMemory before CopyTo. This
-        # enables enables non_blocking copies to the device. PinMemory already
-        # is a PrefetcherIterDataPipe so the data pipeline up to the CopyTo will
-        # run in a separate thread.
+        # (4) Cut datapipe at CopyTo and wrap with pinning and prefetching
+        # before CopyTo. This enables enables non_blocking copies to the device.
+        # Prefetching enables the data pipeline up to the CopyTo to run in a
+        # separate thread.
         if torch.cuda.is_available():
             copiers = dp_utils.find_dps(datapipe_graph, CopyTo)
             for copier in copiers:
@@ -235,12 +235,11 @@ def __init__(
                     datapipe_graph = dp_utils.replace_dp(
                         datapipe_graph,
                         copier,
-                        # Prefetcher is inside this datapipe already.
-                        dp.iter.PinMemory(
-                            copier.datapipe,
-                            pin_memory_fn=lambda x, _: x.pin_memory(),
-                        ).copy_to(copier.device, non_blocking=True),
-                        # After the data gets pinned, we copy non_blocking.
+                        copier.datapipe.transform(
+                            lambda x: x.pin_memory()
+                        ).prefetch(2)
+                        # After the data gets pinned, we can copy non_blocking.
+                        .copy_to(copier.device, non_blocking=True),
                     )
 
         # The stages after feature fetching is still done in the main process.