Skip to content

Commit

Permalink
Use current device in _as_gpu (NVIDIA#3586)
Browse files Browse the repository at this point in the history
In the internal `Tensor[List]CPU._as_gpu` used in debug mode
we used stream tied with the device of the newly created `Tensor[List]GPU`,
which is not initialized to any of current GPU.
Use the current GPU device for the returned `Tensor[List]GPU` and 
do the copy & wait on the stream associated with that device.

Signed-off-by: Krzysztof Lecki <klecki@nvidia.com>
  • Loading branch information
klecki authored and cyyever committed Jan 23, 2022
1 parent ab94be3 commit 92a1c1b
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
12 changes: 8 additions & 4 deletions dali/python/backend_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -395,9 +395,11 @@ void ExposeTensor(py::module &m) {
})
.def("_as_gpu", [](Tensor<CPUBackend> &t) -> Tensor<GPUBackend>* {
auto ret = std::make_unique<Tensor<GPUBackend>>();
UserStream * us = UserStream::Get();
int dev = -1;
CUDA_CALL(cudaGetDevice(&dev));
ret->set_device_id(dev);
UserStream *us = UserStream::Get();
cudaStream_t s = us->GetStream(*ret);
DeviceGuard g((*ret).device_id());
ret->Copy(t, s);
us->Wait(*ret);
return ret.release();
Expand Down Expand Up @@ -675,9 +677,11 @@ void ExposeTensorList(py::module &m) {
)code")
.def("_as_gpu", [](TensorList<CPUBackend> &t) {
auto ret = std::make_shared<TensorList<GPUBackend>>();
UserStream * us = UserStream::Get();
int dev = -1;
CUDA_CALL(cudaGetDevice(&dev));
ret->set_device_id(dev);
UserStream *us = UserStream::Get();
cudaStream_t s = us->GetStream(*ret);
DeviceGuard g((*ret).device_id());
ret->Copy(t, s);
us->Wait(*ret);
return ret;
Expand Down
14 changes: 14 additions & 0 deletions dali/test/python/test_backend_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,20 @@ def test_array_interface_tensor_cpu():
assert np.array_equal(tensorlist[0].__array_interface__['shape'], tensorlist[0].shape())
assert tensorlist[0].__array_interface__['typestr'] == tensorlist[0].dtype()

def check_transfer(dali_type):
arr = np.random.rand(3, 5, 6)
data = dali_type(arr)
data_gpu = data._as_gpu()
data_cpu = data_gpu.as_cpu()
if dali_type is TensorListCPU:
np.testing.assert_array_equal(arr, data_cpu.as_array())
else:
np.testing.assert_array_equal(arr, np.array(data_cpu))

def test_transfer_cpu_gpu():
for dali_type in [TensorCPU, TensorListCPU]:
yield check_transfer, dali_type

def check_array_types(t):
arr = np.array([[-0.39, 1.5], [-1.5, 0.33]], dtype=t)
tensor = TensorCPU(arr, "NHWC")
Expand Down

0 comments on commit 92a1c1b

Please sign in to comment.