diff --git a/tests/python/pytorch/graphbolt/impl/test_gpu_cached_feature.py b/tests/python/pytorch/graphbolt/impl/test_gpu_cached_feature.py index 2a2c82fc7101..f00f27373196 100644 --- a/tests/python/pytorch/graphbolt/impl/test_gpu_cached_feature.py +++ b/tests/python/pytorch/graphbolt/impl/test_gpu_cached_feature.py @@ -101,3 +101,42 @@ def test_gpu_cached_feature(dtype, cache_size_a, cache_size_b): # Test with different dimensionality feat_store_a.update(b) assert torch.equal(feat_store_a.read(), b.to("cuda")) + + +@unittest.skipIf( + F._default_context_str != "gpu" + or torch.cuda.get_device_capability()[0] < 7, + reason="GPUCachedFeature requires a Volta or later generation NVIDIA GPU.", +) +@pytest.mark.parametrize( + "dtype", + [ + torch.bool, + torch.uint8, + torch.int8, + torch.int16, + torch.int32, + torch.int64, + torch.float16, + torch.bfloat16, + torch.float32, + torch.float64, + ], +) +@pytest.mark.parametrize("pin_memory", [False, True]) +def test_gpu_cached_feature_read_async(dtype, pin_memory): + a = torch.randint(0, 2, [1000, 13], dtype=dtype, pin_memory=pin_memory) + a_cuda = a.to(F.ctx()) + + cache_size = 256 * a[:1].nbytes + + feat_store = gb.GPUCachedFeature(gb.TorchBasedFeature(a), cache_size) + + # Test read with ids. + ids1 = torch.tensor([0, 15, 71, 101], device=F.ctx()) + ids2 = torch.tensor([71, 101, 202, 303], device=F.ctx()) + for ids in [ids1, ids2]: + reader = feat_store.read_async(ids) + for _ in range(feat_store.read_async_num_stages(ids.device)): + values = next(reader) + assert torch.equal(values.wait(), a_cuda[ids])