Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【code format check upgrade】 step2:clang-format #42840

Merged
merged 1 commit into from
Jun 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ repos:
entry: bash ./tools/codestyle/clang_format.hook -i
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|xpu|kps)$
exclude: |
(?x)^(
paddle/fluid/distributed/ps/thirdparty/round_robin.h
)$
- repo: local
hooks:
- id: cpplint-cpp-source
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/distributed/collective/HCCLTools.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include "paddle/fluid/distributed/collective/HCCLTools.h"

#include "paddle/fluid/distributed/collective/Types.h"

namespace paddle {
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/distributed/collective/HCCLTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#pragma once

#include <error.h>

#include <string>

#include "boost/variant.hpp"
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/distributed/collective/NCCLTools.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include "paddle/fluid/distributed/collective/NCCLTools.h"

#include "paddle/fluid/distributed/collective/Types.h"

namespace paddle {
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/distributed/collective/NCCLTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@

#include <cuda_runtime.h>
#include <error.h>

#include <string>

#include "boost/variant.hpp"
#include "paddle/fluid/distributed/collective/Types.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/enforce.h"

#include "paddle/fluid/distributed/collective/Types.h"

namespace paddle {
namespace distributed {

Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/distributed/collective/ProcessGroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

#include "paddle/fluid/distributed/collective/Types.h"
#include "paddle/fluid/eager/api/utils/tensor_utils.h"

#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/enforce.h"
Expand Down
6 changes: 4 additions & 2 deletions paddle/fluid/distributed/collective/ProcessGroupGloo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <gloo/broadcast.h>
#include <gloo/reduce.h>
#include <gloo/scatter.h>

#include "paddle/fluid/distributed/collective/Common.h"
#include "paddle/fluid/distributed/collective/ProcessGroupGloo.h"
#include "paddle/fluid/framework/fleet/gloo_wrapper.h"
Expand Down Expand Up @@ -485,8 +486,9 @@ std::shared_ptr<::gloo::transport::Device>
ProcessGroupGloo::createDefaultDevice() {
std::array<char, HOST_NAME_MAX> hostname{};
auto ret = ::gethostname(hostname.data(), HOST_NAME_MAX);
PADDLE_ENFORCE_EQ(ret, 0, platform::errors::Fatal(
"Get hostname error for createDefaultDevice."));
PADDLE_ENFORCE_EQ(
ret, 0,
platform::errors::Fatal("Get hostname error for createDefaultDevice."));
::addrinfo* result;
result = tcputils::get_addr_info(hostname.data(), "", 0, AF_UNSPEC);
::addrinfo* cur;
Expand Down
20 changes: 11 additions & 9 deletions paddle/fluid/distributed/collective/ProcessGroupHCCL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include "paddle/fluid/distributed/collective/ProcessGroupHCCL.h"

#include "paddle/fluid/distributed/collective/Common.h"
#include "paddle/fluid/distributed/collective/HCCLTools.h"
#include "paddle/fluid/memory/malloc.h"
Expand Down Expand Up @@ -216,15 +217,16 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHCCL::AllReduce(
std::vector<phi::DenseTensor>& in_tensors, // NOLINT
std::vector<phi::DenseTensor>& out_tensors, // NOLINT
const AllreduceOptions& opts) {
return Collective(in_tensors, out_tensors,
[&](phi::DenseTensor& input, phi::DenseTensor& output,
HcclComm comm, const aclrtStream& stream) {
return platform::dynload::HcclAllReduce(
input.data(), output.data(), input.numel(),
platform::ToHCCLDataType(input.dtype()),
ToHCCLRedType(opts.reduce_op), comm, stream);
},
CommType::ALLREDUCE);
return Collective(
in_tensors, out_tensors,
[&](phi::DenseTensor& input, phi::DenseTensor& output, HcclComm comm,
const aclrtStream& stream) {
return platform::dynload::HcclAllReduce(
input.data(), output.data(), input.numel(),
platform::ToHCCLDataType(input.dtype()),
ToHCCLRedType(opts.reduce_op), comm, stream);
},
CommType::ALLREDUCE);
}

std::shared_ptr<ProcessGroup::Task> ProcessGroupHCCL::Broadcast(
Expand Down
5 changes: 2 additions & 3 deletions paddle/fluid/distributed/collective/ProcessGroupHCCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,11 @@
#include <unordered_map>
#include <vector>

#include "paddle/fluid/distributed/collective/HCCLTools.h"
#include "paddle/fluid/distributed/collective/ProcessGroup.h"
#include "paddle/fluid/distributed/store/store.h"
#include "paddle/fluid/platform/device/npu/npu_stream.h"
#include "paddle/fluid/platform/device_context.h"

#include "paddle/fluid/distributed/collective/HCCLTools.h"
#include "paddle/fluid/distributed/store/store.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gen_comm_id_helper.h"
#include "paddle/fluid/platform/place.h"
Expand Down
27 changes: 17 additions & 10 deletions paddle/fluid/distributed/collective/ProcessGroupHeter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
// limitations under the License.

#include "paddle/fluid/distributed/collective/ProcessGroupHeter.h"

#include <chrono>

#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/api/include/api.h"
Expand Down Expand Up @@ -129,8 +131,9 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::AllReduce(
gid_, {dense_cpu_tensor.name()}, send_size, dense_cpu_tensor.data(),
dense_cpu_tensor.numel() *
framework::DataTypeSize(dense_cpu_tensor.dtype()));
PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet(
"Send to the switch module error."));
PADDLE_ENFORCE_EQ(ret, 0,
platform::errors::PreconditionNotMet(
"Send to the switch module error."));
phi::DenseTensor cpu_tensor2;
cpu_tensor2.AllocateFrom(
std::make_unique<paddle::experimental::DefaultAllocator>(
Expand All @@ -140,8 +143,9 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::AllReduce(
ret = client_->Recv(
gid_, {dense_cpu_tensor.name()}, cpu_tensor2.data(),
cpu_tensor2.numel() * framework::DataTypeSize(cpu_tensor2.dtype()));
PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet(
"Recv from the switch module error."));
PADDLE_ENFORCE_EQ(ret, 0,
platform::errors::PreconditionNotMet(
"Recv from the switch module error."));

switch (dense_cpu_tensor.dtype()) {
case DataType::FLOAT32:
Expand Down Expand Up @@ -226,8 +230,9 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::Broadcast(
dense_cpu_tensor.data(),
dense_cpu_tensor.numel() *
framework::DataTypeSize(dense_cpu_tensor.dtype()));
PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet(
"Send to the switch module error."));
PADDLE_ENFORCE_EQ(ret, 0,
platform::errors::PreconditionNotMet(
"Send to the switch module error."));
} else {
int ret = client_->Recv(
gid_, {dense_cpu_tensor.name()}, dense_cpu_tensor.data(),
Expand Down Expand Up @@ -286,8 +291,9 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::Send(
VLOG(2) << "tensor_name:" << tensor_name;
int ret = client_->Send(gid_, {tensor_name}, send_size, cpu_tensor.data(),
tensor_size);
PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet(
"Send to the switch module error."));
PADDLE_ENFORCE_EQ(
ret, 0,
platform::errors::PreconditionNotMet("Send to the switch module error."));
return CreateTask(rank_, CommType::SEND, in_tensors);
}

Expand Down Expand Up @@ -319,8 +325,9 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::Recv(
int ret = client_->Recv(
gid_, {tensor_name}, cpu_tensor.data(),
cpu_tensor.numel() * framework::DataTypeSize(cpu_tensor.dtype()));
PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet(
"receive to the switch module error."));
PADDLE_ENFORCE_EQ(ret, 0,
platform::errors::PreconditionNotMet(
"receive to the switch module error."));
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = end - start;
double goodput = cpu_tensor.numel() *
Expand Down
132 changes: 67 additions & 65 deletions paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"

#include "paddle/fluid/distributed/collective/Common.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
Expand Down Expand Up @@ -320,15 +321,16 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::AllReduce(
PADDLE_ENFORCE_EQ(
CheckTensorsInCudaPlace(in_tensors), true,
platform::errors::InvalidArgument("All inputs should be in CudaPlace."));
return Collective(in_tensors, out_tensors,
[&](const phi::DenseTensor& input, phi::DenseTensor& output,
ncclComm_t comm, const gpuStream_t& stream) {
return platform::dynload::ncclAllReduce(
input.data(), output.data(), input.numel(),
platform::ToNCCLDataType(input.type()),
ToNCCLRedType(opts.reduce_op), comm, stream);
},
CommType::ALLREDUCE);
return Collective(
in_tensors, out_tensors,
[&](const phi::DenseTensor& input, phi::DenseTensor& output,
ncclComm_t comm, const gpuStream_t& stream) {
return platform::dynload::ncclAllReduce(
input.data(), output.data(), input.numel(),
platform::ToNCCLDataType(input.type()),
ToNCCLRedType(opts.reduce_op), comm, stream);
},
CommType::ALLREDUCE);
}

std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Broadcast(
Expand All @@ -338,17 +340,17 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Broadcast(
CheckTensorsInCudaPlace(in_tensors), true,
platform::errors::InvalidArgument("All inputs should be in CudaPlace."));

return Collective(in_tensors, out_tensors,
[&](phi::DenseTensor& input, phi::DenseTensor& output,
ncclComm_t comm, const gpuStream_t& stream) {
const auto root = opts.source_rank * in_tensors.size() +
opts.source_root;
return platform::dynload::ncclBroadcast(
input.data(), output.data(), input.numel(),
platform::ToNCCLDataType(input.type()), root, comm,
stream);
},
CommType::BROADCAST);
return Collective(
in_tensors, out_tensors,
[&](phi::DenseTensor& input, phi::DenseTensor& output, ncclComm_t comm,
const gpuStream_t& stream) {
const auto root =
opts.source_rank * in_tensors.size() + opts.source_root;
return platform::dynload::ncclBroadcast(
input.data(), output.data(), input.numel(),
platform::ToNCCLDataType(input.type()), root, comm, stream);
},
CommType::BROADCAST);
}

std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Barrier(
Expand Down Expand Up @@ -400,31 +402,31 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Send(
std::vector<phi::DenseTensor>& tensors, int dst_rank) {
CheckTensorsInDifferentDevices(tensors, static_cast<size_t>(GetSize()));

auto task = PointToPoint(tensors,
[&](phi::DenseTensor& input, ncclComm_t comm,
const gpuStream_t& stream, int dst_rank) {
return platform::dynload::ncclSend(
input.data(), input.numel(),
platform::ToNCCLDataType(input.dtype()),
dst_rank, comm, stream);
},
dst_rank, CommType::SEND);
auto task = PointToPoint(
tensors,
[&](phi::DenseTensor& input, ncclComm_t comm, const gpuStream_t& stream,
int dst_rank) {
return platform::dynload::ncclSend(
input.data(), input.numel(),
platform::ToNCCLDataType(input.dtype()), dst_rank, comm, stream);
},
dst_rank, CommType::SEND);
return task;
}

std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Recv(
std::vector<phi::DenseTensor>& tensors, int src_rank) {
CheckTensorsInDifferentDevices(tensors, static_cast<size_t>(GetSize()));

auto task = PointToPoint(tensors,
[&](phi::DenseTensor& output, ncclComm_t comm,
const gpuStream_t& stream, int src_rank) {
return platform::dynload::ncclRecv(
output.data(), output.numel(),
platform::ToNCCLDataType(output.dtype()),
src_rank, comm, stream);
},
src_rank, CommType::RECV);
auto task = PointToPoint(
tensors,
[&](phi::DenseTensor& output, ncclComm_t comm, const gpuStream_t& stream,
int src_rank) {
return platform::dynload::ncclRecv(
output.data(), output.numel(),
platform::ToNCCLDataType(output.dtype()), src_rank, comm, stream);
},
src_rank, CommType::RECV);
return task;
}

Expand All @@ -440,15 +442,15 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Send_Partial(
std::vector<phi::DenseTensor> shared_tensors;
shared_tensors.push_back(shared_input);

auto task = PointToPoint(shared_tensors,
[&](phi::DenseTensor& input, ncclComm_t comm,
const gpuStream_t& stream, int dst_rank) {
return platform::dynload::ncclSend(
input.data(), input.numel(),
platform::ToNCCLDataType(input.dtype()),
dst_rank, comm, stream);
},
dst_rank, CommType::SEND);
auto task = PointToPoint(
shared_tensors,
[&](phi::DenseTensor& input, ncclComm_t comm, const gpuStream_t& stream,
int dst_rank) {
return platform::dynload::ncclSend(
input.data(), input.numel(),
platform::ToNCCLDataType(input.dtype()), dst_rank, comm, stream);
},
dst_rank, CommType::SEND);
return task;
}

Expand All @@ -463,15 +465,15 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Recv_Partial(
std::vector<phi::DenseTensor> shared_tensors;
shared_tensors.push_back(shared_input);

auto task = PointToPoint(shared_tensors,
[&](phi::DenseTensor& output, ncclComm_t comm,
const gpuStream_t& stream, int src_rank) {
return platform::dynload::ncclRecv(
output.data(), output.numel(),
platform::ToNCCLDataType(output.dtype()),
src_rank, comm, stream);
},
src_rank, CommType::RECV);
auto task = PointToPoint(
shared_tensors,
[&](phi::DenseTensor& output, ncclComm_t comm, const gpuStream_t& stream,
int src_rank) {
return platform::dynload::ncclRecv(
output.data(), output.numel(),
platform::ToNCCLDataType(output.dtype()), src_rank, comm, stream);
},
src_rank, CommType::RECV);
return task;
}

Expand All @@ -484,15 +486,15 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::AllGather(
PADDLE_ENFORCE_EQ(
CheckTensorsInCudaPlace(out_tensors), true,
platform::errors::InvalidArgument("All outputs should be in CudaPlace."));
return Collective(in_tensors, out_tensors,
[&](const phi::DenseTensor& input, phi::DenseTensor& output,
ncclComm_t comm, const gpuStream_t& stream) {
return platform::dynload::ncclAllGather(
input.data(), output.data(), input.numel(),
platform::ToNCCLDataType(input.dtype()), comm,
stream);
},
CommType::ALLGATHER);
return Collective(
in_tensors, out_tensors,
[&](const phi::DenseTensor& input, phi::DenseTensor& output,
ncclComm_t comm, const gpuStream_t& stream) {
return platform::dynload::ncclAllGather(
input.data(), output.data(), input.numel(),
platform::ToNCCLDataType(input.dtype()), comm, stream);
},
CommType::ALLGATHER);
}

void* GetPointerByOffset(void* raw_pointer, size_t offset,
Expand Down
Loading