From 6c4a2f6dd949ae37635d6908f1cd22b2b372a32f Mon Sep 17 00:00:00 2001 From: zhouwei25 Date: Thu, 21 Jul 2022 09:09:18 +0000 Subject: [PATCH 1/2] fix behavior of device_id=None in Tensor.cuda --- .../fluid/dygraph/varbase_patch_methods.py | 19 ++++++++----- .../fluid/tests/unittests/test_var_base.py | 28 +++++++++---------- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 48497f4b9092f..d24d0c8577897 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -866,15 +866,20 @@ def cpu(self): return res @framework.dygraph_only - def cuda(self, device_id=0, blocking=True): - if device_id is None: - device_id = 0 - if not isinstance(device_id, int): - raise ValueError("\'device_id\' must be a positive integer") - if self.place.is_gpu_place(): + def cuda(self, device=None, blocking=True): + if device is None: + res_place = framework._current_expected_place() + elif isinstance(device, int): + res_place = core.CUDAPlace(device_id) + elif isinstance(device, core.CUDAPlace()): + res_place = device + else: + raise TypeError("device must be CUDAPlace device id or CUDAPlace") + + if self.place._equals(res_place): return self else: - res = self._copy_to(core.CUDAPlace(device_id), True) + res = self._copy_to(res_place, True) res.stop_gradient = self.stop_gradient res.persistable = self.persistable return res diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index e66f310eb977d..227d05c37b60f 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -34,7 +34,7 @@ def setUp(self): def func_test_to_tensor(self): - def _test_place(place): + def check_with_place(place): with fluid.dygraph.guard(): paddle.set_default_dtype('float32') # set_default_dtype should not take effect on int @@ -76,17 +76,19 @@ def _test_place(place): y = x.cpu() self.assertEqual(y.place.__repr__(), "Place(cpu)") if core.is_compiled_with_cuda(): + res_place = paddle.framework._current_expected_place() y = x.pin_memory() self.assertEqual(y.place.__repr__(), "Place(gpu_pinned)") y = x.cuda() + self.assertEqual(y.place.__repr__(), res_place.__repr__()) y = x.cuda(None) - self.assertEqual(y.place.__repr__(), "Place(gpu:0)") + self.assertEqual(y.place.__repr__(), res_place.__repr__()) y = x.cuda(device_id=0) self.assertEqual(y.place.__repr__(), "Place(gpu:0)") y = x.cuda(blocking=False) - self.assertEqual(y.place.__repr__(), "Place(gpu:0)") + self.assertEqual(y.place.__repr__(), res_place.__repr__()) y = x.cuda(blocking=True) - self.assertEqual(y.place.__repr__(), "Place(gpu:0)") + self.assertEqual(y.place.__repr__(), res_place.__repr__()) with self.assertRaises(ValueError): y = x.cuda("test") @@ -266,20 +268,18 @@ def _test_place(place): with self.assertRaises(ValueError): paddle.to_tensor([[1], [2, 3]], place=1) - _test_place(core.CPUPlace()) - _test_place("cpu") + check_with_place(core.CPUPlace()) + check_with_place("cpu") if core.is_compiled_with_cuda(): - _test_place(core.CUDAPinnedPlace()) - _test_place("gpu_pinned") - _test_place(core.CUDAPlace(0)) - _test_place("gpu:0") + check_with_place(core.CUDAPinnedPlace()) + check_with_place("gpu_pinned") + check_with_place(core.CUDAPlace(0)) + check_with_place("gpu:0") if core.is_compiled_with_npu(): - _test_place(core.NPUPlace(0)) - _test_place("npu:0") + check_with_place(core.NPUPlace(0)) + check_with_place("npu:0") def test_to_tensor(self): - with _test_eager_guard(): - self.func_test_to_tensor() self.func_test_to_tensor() def func_test_to_tensor_not_change_input_stop_gradient(self): From 3f67446a1968cbdd00eef8fb8b064c99246d994a Mon Sep 17 00:00:00 2001 From: zhouwei25 Date: Mon, 25 Jul 2022 09:10:23 +0000 Subject: [PATCH 2/2] fix CI --- .../sparse/gpu/fused_attention_grad_kernel.cu | 2 +- .../phi/kernels/sparse/gpu/fused_attention_kernel.cu | 2 +- python/paddle/fluid/dygraph/varbase_patch_methods.py | 12 ++++++------ python/paddle/fluid/tests/unittests/test_var_base.py | 11 ++++++----- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu index 4d31ad96cdd3b..70203836d4412 100644 --- a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu @@ -75,7 +75,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx, #if CUDA_VERSION >= 11070 /* Step1: Forward: softmax{CSR} * value{Dense} -> out{Dense}, reuse */ SparseCsrTensor dsoftmax; - CsrDenseMatmulGradKernel( + MatmulCsrDenseGradKernel( dev_ctx, softmax, value, dout, &dsoftmax, dvalue); /* Step2: Calculate grad of sdd_result, manualy not reuse */ diff --git a/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu b/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu index 46412d57f16c7..b1e30f3b654a4 100644 --- a/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu @@ -263,7 +263,7 @@ void FusedAttentionCsrKernel( /* Step3: DSD Matmul, reuse */ softmax->set_dims(phi::make_ddim({q_dim[0], q_dim[1], q_dim[2], q_dim[2]})); - CsrDenseMatmulKernel(dev_ctx, *softmax, value, out); + MatmulCsrDenseKernel(dev_ctx, *softmax, value, out); #else PADDLE_THROW( phi::errors::Unimplemented("forward of 'sparse.nn.functional.attention' " diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index d24d0c8577897..5b0aba7a9dabb 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -866,15 +866,15 @@ def cpu(self): return res @framework.dygraph_only - def cuda(self, device=None, blocking=True): - if device is None: + def cuda(self, device_id=None, blocking=True): + if device_id is None: res_place = framework._current_expected_place() - elif isinstance(device, int): + if not isinstance(res_place, core.CUDAPlace): + res_place = core.CUDAPlace(0) + elif isinstance(device_id, int): res_place = core.CUDAPlace(device_id) - elif isinstance(device, core.CUDAPlace()): - res_place = device else: - raise TypeError("device must be CUDAPlace device id or CUDAPlace") + raise ValueError("device_id must be int|None") if self.place._equals(res_place): return self diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index 1b32d05c08487..c16238486df94 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -76,19 +76,18 @@ def check_with_place(place): y = x.cpu() self.assertEqual(y.place.__repr__(), "Place(cpu)") if core.is_compiled_with_cuda(): - res_place = paddle.framework._current_expected_place() y = x.pin_memory() self.assertEqual(y.place.__repr__(), "Place(gpu_pinned)") y = x.cuda() - self.assertEqual(y.place.__repr__(), res_place.__repr__()) + self.assertEqual(y.place.__repr__(), "Place(gpu:0)") y = x.cuda(None) - self.assertEqual(y.place.__repr__(), res_place.__repr__()) + self.assertEqual(y.place.__repr__(), "Place(gpu:0)") y = x.cuda(device_id=0) self.assertEqual(y.place.__repr__(), "Place(gpu:0)") y = x.cuda(blocking=False) - self.assertEqual(y.place.__repr__(), res_place.__repr__()) + self.assertEqual(y.place.__repr__(), "Place(gpu:0)") y = x.cuda(blocking=True) - self.assertEqual(y.place.__repr__(), res_place.__repr__()) + self.assertEqual(y.place.__repr__(), "Place(gpu:0)") with self.assertRaises(ValueError): y = x.cuda("test") @@ -280,6 +279,8 @@ def check_with_place(place): check_with_place("npu:0") def test_to_tensor(self): + with _test_eager_guard(): + self.func_test_to_tensor() self.func_test_to_tensor() def func_test_to_tensor_not_change_input_stop_gradient(self):