Skip to content

Commit

Permalink
[GraphBolt][CUDA] Use better memory allocation algorithm to avoid OOM. (
Browse files Browse the repository at this point in the history
  • Loading branch information
mfbalin committed Aug 2, 2024
1 parent f724ec0 commit 56a1e64
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
37 changes: 36 additions & 1 deletion python/dgl/graphbolt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,42 @@
import os
import sys

from .internal_utils import *

CUDA_ALLOCATOR_ENV_WARNING_STR = """
An experimental feature for CUDA allocations is turned on for better allocation
pattern resulting in better memory usage for minibatch GNN training workloads.
See https://pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf,
and set the environment variable `PYTORCH_CUDA_ALLOC_CONF=expandable_segments:False`
if you want to disable it.
"""
cuda_allocator_env = os.getenv("PYTORCH_CUDA_ALLOC_CONF")
if cuda_allocator_env is None:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
gb_warning(CUDA_ALLOCATOR_ENV_WARNING_STR)
else:
configs = {
kv_pair.split(":")[0]: kv_pair.split(":")[1]
for kv_pair in cuda_allocator_env.split(",")
}
if "expandable_segments" in configs:
if configs["expandable_segments"] != "True":
gb_warning(
"You should consider `expandable_segments:True` in the"
" environment variable `PYTORCH_CUDA_ALLOC_CONF` for lower"
" memory usage. See "
"https://pytorch.org/docs/stable/notes/cuda.html"
"#optimizing-memory-usage-with-pytorch-cuda-alloc-conf"
)
else:
configs["expandable_segments"] = "True"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = ",".join(
[k + ":" + v for k, v in configs.items()]
)
gb_warning(CUDA_ALLOCATOR_ENV_WARNING_STR)


# pylint: disable=wrong-import-position, wrong-import-order
import torch

### FROM DGL @todo
Expand Down Expand Up @@ -47,7 +83,6 @@ def load_graphbolt():
from .itemset import *
from .item_sampler import *
from .minibatch_transformer import *
from .internal_utils import *
from .negative_sampler import *
from .sampled_subgraph import *
from .subgraph_sampler import *
Expand Down
8 changes: 8 additions & 0 deletions tests/python/pytorch/graphbolt/test_base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import re
import unittest
from collections.abc import Iterable, Mapping
Expand All @@ -12,6 +13,13 @@
from . import gb_test_utils


def test_pytorch_cuda_allocator_conf():
env = os.getenv("PYTORCH_CUDA_ALLOC_CONF")
assert env is not None
config_list = env.split(",")
assert "expandable_segments:True" in config_list


@unittest.skipIf(F._default_context_str != "gpu", "CopyTo needs GPU to test")
@pytest.mark.parametrize("non_blocking", [False, True])
def test_CopyTo(non_blocking):
Expand Down

0 comments on commit 56a1e64

Please sign in to comment.