Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bugs of pipeline on ascend. #32737

Merged
merged 2 commits into from
May 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paddle/fluid/framework/device_worker.h
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ class PSGPUWorker : public HogwildWorker {
#endif

#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(WITH_ASCEND_CL)
defined(PADDLE_WITH_ASCEND_CL)
class SectionWorker : public DeviceWorker {
public:
SectionWorker() {}
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/device_worker_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ REGISTER_DEVICE_WORKER_CLASS(PSGPUWorker);
#endif

#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(WITH_ASCEND_CL)
defined(PADDLE_WITH_ASCEND_CL)
REGISTER_DEVICE_WORKER_CLASS(SectionWorker);
#endif
} // namespace framework
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/pipeline_trainer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(WITH_ASCEND_CL)
defined(PADDLE_WITH_ASCEND_CL)
#include "paddle/fluid/framework/data_feed_factory.h"
#include "paddle/fluid/framework/device_worker_factory.h"
#include "paddle/fluid/framework/trainer.h"
Expand All @@ -37,7 +37,7 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc,
int place_id = section_config.place_id();
#if (defined PADDLE_WITH_NCCL)
place_ = platform::CUDAPlace(place_id);
#elif (defined WITH_ASCEND_CL) // NOLINT
#elif (defined PADDLE_WITH_ASCEND_CL) // NOLINT
place_ = platform::NPUPlace(place_id);
#endif
worker_ = DeviceWorkerFactory::CreateDeviceWorker(
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/section_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(WITH_ASCEND_CL)
defined(PADDLE_WITH_ASCEND_CL)
#include <float.h>
#include "paddle/fluid/framework/device_worker.h"
#include "paddle/fluid/framework/executor_gc_helper.h"
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/trainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ class PSGPUTrainer : public TrainerBase {
#endif

#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(WITH_ASCEND_CL)
defined(PADDLE_WITH_ASCEND_CL)
class PipelineTrainer : public TrainerBase {
public:
PipelineTrainer() {}
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/framework/trainer_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ REGISTER_TRAINER_CLASS(HeterBoxTrainer);
(defined PADDLE_WITH_PSLIB)
REGISTER_TRAINER_CLASS(PSGPUTrainer);
#endif
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \
defined(PADDLE_WITH_ASCEND_CL)
REGISTER_TRAINER_CLASS(PipelineTrainer);
#endif
} // namespace framework
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/operators/collective/c_allreduce_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel<T> {
int64_t numel = in->numel();

void* sendbuff = reinterpret_cast<void*>(const_cast<T*>(in->data<T>()));
out->mutable_data<T>(in->dims(), ctx.GetPlace());
void* recvbuff = reinterpret_cast<void*>(out->data<T>());

int ring_id = ctx.Attr<int>("ring_id");
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/fluid/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -6124,9 +6124,9 @@ def device_guard(device=None):
device, index = device.split(':')
if device == 'cpu':
raise ValueError("Should not set device id for cpu.")
if device not in ['cpu', 'gpu', '', None]:
if device not in ['cpu', 'gpu', 'npu', '', None]:
raise ValueError(
"The Attr(device) should be 'cpu' or 'gpu', and it can also be empty string or None "
"The Attr(device) should be 'cpu' 'npu' or 'gpu', and it can also be empty string or None "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'cpu' 'npu',这里中间是不是应该有个逗号

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I will add it later.

"when there is no need to specify device. But received %s" % device)
if index:
device = ":".join([device, index])
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/fluid/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4116,7 +4116,7 @@ def _get_op_device_attr(self, op):
device = op.attr(self._op_device_key) \
if op.has_attr(self._op_device_key) else None
if device:
assert device[0:3] == 'gpu', "Now, only gpu devices are " \
assert device[0:3] == 'gpu' or dev_type == 'npu', "Now, only gpu and npu devices are " \
"supported in pipeline parallemism."
return device

Expand Down