dmlc · frozenbugs · Aug 6, 2024 · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024
diff --git a/python/dgl/graphbolt/dataloader.py b/python/dgl/graphbolt/dataloader.py
@@ -4,14 +4,18 @@
 
 import torch
 import torch.utils.data as torch_data
-import torchdata.dataloader2.graph as dp_utils
 
 from .base import CopyTo, get_host_to_device_uva_stream
 from .feature_fetcher import FeatureFetcher, FeatureFetcherStartMarker
 from .impl.gpu_graph_cache import GPUGraphCache
 from .impl.neighbor_sampler import SamplePerLayer
 
-from .internal import datapipe_graph_to_adjlist
+from .internal import (
+    datapipe_graph_to_adjlist,
+    find_dps,
+    replace_dp,
+    traverse_dps,
+)
 from .item_sampler import ItemSampler
 
 
@@ -47,7 +51,7 @@ def construct_gpu_graph_cache(
 
 def _find_and_wrap_parent(datapipe_graph, target_datapipe, wrapper, **kwargs):
     """Find parent of target_datapipe and wrap it with ."""
-    datapipes = dp_utils.find_dps(
+    datapipes = find_dps(
         datapipe_graph,
         target_datapipe,
     )
@@ -56,7 +60,7 @@ def _find_and_wrap_parent(datapipe_graph, target_datapipe, wrapper, **kwargs):
         datapipe_id = id(datapipe)
         for parent_datapipe_id in datapipe_adjlist[datapipe_id][1]:
             parent_datapipe, _ = datapipe_adjlist[parent_datapipe_id]
-            datapipe_graph = dp_utils.replace_dp(
+            datapipe_graph = replace_dp(
                 datapipe_graph,
                 parent_datapipe,
                 wrapper(parent_datapipe, **kwargs),
@@ -157,18 +161,18 @@ def __init__(
         #    of the FeatureFetcher with a multiprocessing PyTorch DataLoader.
 
         datapipe = datapipe.mark_end()
-        datapipe_graph = dp_utils.traverse_dps(datapipe)
+        datapipe_graph = traverse_dps(datapipe)
 
         # (1) Insert minibatch distribution.
         # TODO(BarclayII): Currently I'm using sharding_filter() as a
         # concept demonstration. Later on minibatch distribution should be
         # merged into ItemSampler to maximize efficiency.
-        item_samplers = dp_utils.find_dps(
+        item_samplers = find_dps(
             datapipe_graph,
             ItemSampler,
         )
         for item_sampler in item_samplers:
-            datapipe_graph = dp_utils.replace_dp(
+            datapipe_graph = replace_dp(
                 datapipe_graph,
                 item_sampler,
                 item_sampler.sharding_filter(),
@@ -186,7 +190,7 @@ def __init__(
         # (3) Limit the number of UVA threads used if the feature_fetcher has
         # overlapping optimization enabled.
         if num_workers == 0 and torch.cuda.is_available():
-            feature_fetchers = dp_utils.find_dps(
+            feature_fetchers = find_dps(
                 datapipe_graph,
                 FeatureFetcher,
             )
@@ -200,7 +204,7 @@ def __init__(
             and torch.cuda.is_available()
         ):
             torch.ops.graphbolt.set_max_uva_threads(max_uva_threads)
-            samplers = dp_utils.find_dps(
+            samplers = find_dps(
                 datapipe_graph,
                 SamplePerLayer,
             )
@@ -210,7 +214,7 @@ def __init__(
                     gpu_graph_cache = construct_gpu_graph_cache(
                         sampler, num_gpu_cached_edges, gpu_cache_threshold
                     )
-                datapipe_graph = dp_utils.replace_dp(
+                datapipe_graph = replace_dp(
                     datapipe_graph,
                     sampler,
                     sampler.fetch_and_sample(
@@ -225,10 +229,10 @@ def __init__(
         # Prefetching enables the data pipeline up to the CopyTo to run in a
         # separate thread.
         if torch.cuda.is_available():
-            copiers = dp_utils.find_dps(datapipe_graph, CopyTo)
+            copiers = find_dps(datapipe_graph, CopyTo)
             for copier in copiers:
                 if copier.device.type == "cuda":
-                    datapipe_graph = dp_utils.replace_dp(
+                    datapipe_graph = replace_dp(
                         datapipe_graph,
                         copier,
                         # Add prefetch so that CPU and GPU can run concurrently.