Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into support…
Browse files Browse the repository at this point in the history
…_while_quant_post
  • Loading branch information
yghstill committed Dec 9, 2021
2 parents 9536678 + 033ebe7 commit 51d466a
Show file tree
Hide file tree
Showing 26 changed files with 795 additions and 571 deletions.
16 changes: 16 additions & 0 deletions paddle/fluid/eager/accumulation/gradient_accumulation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,22 @@ class TensorAddFunctor : public boost::static_visitor<> {
}
#endif

#ifdef PADDLE_WITH_IPU
void operator()(const paddle::platform::IPUPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode",
place));
}
#else
void operator()(const paddle::platform::IPUPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode",
place));
}
#endif

void operator()(const paddle::platform::NPUPinnedPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/framework/dlpack_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ struct DLDeviceVisitor : public boost::static_visitor<::DLDevice> {
return device;
}

inline ::DLDevice operator()(const platform::IPUPlace &place) const {
PADDLE_THROW(
platform::errors::Unimplemented("platform::IPUPlace is not supported"));
}

inline ::DLDevice operator()(const platform::XPUPlace &place) const {
PADDLE_THROW(
platform::errors::Unimplemented("platform::XPUPlace is not supported"));
Expand Down
8 changes: 8 additions & 0 deletions paddle/fluid/framework/executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,14 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
#else
PADDLE_THROW(
platform::errors::Unimplemented("No XPU gc found in CPU/GPU paddle"));
#endif
} else if (platform::is_ipu_place(place_)) {
#ifdef PADDLE_WITH_IPU
gc.reset(new IPUGarbageCollector(
BOOST_GET_CONST(platform::IPUPlace, place_), max_memory_size));
#else
PADDLE_THROW(
platform::errors::Unimplemented("No IPU gc found in CPU/IPU paddle"));
#endif
} else if (platform::is_npu_place(place_)) {
#ifdef PADDLE_WITH_ASCEND_CL
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/framework/op_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)

#define REGISTER_OP_IPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, IPU, ::paddle::platform::IPUPlace, __VA_ARGS__)

#define REGISTER_OP_XPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, XPU, ::paddle::platform::XPUPlace, __VA_ARGS__)

Expand Down
51 changes: 46 additions & 5 deletions paddle/fluid/framework/tensor_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
}
#ifdef PADDLE_WITH_IPU
else if (platform::is_ipu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::IPUPlace, src_place), src_ptr, size);
} else if (platform::is_cpu_place(src_place) &&
platform::is_ipu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::IPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
} else if (platform::is_ipu_place(src_place) &&
platform::is_ipu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::IPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::IPUPlace, src_place), src_ptr, size);
}
#endif

#ifdef PADDLE_WITH_XPU
else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
Expand Down Expand Up @@ -386,25 +402,42 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
}
#ifdef PADDLE_WITH_IPU
else if (platform::is_ipu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::IPUPlace, src_place), src_ptr, size);
} else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_ipu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::IPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
} else { // NOLINT
PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
#endif
#ifdef PADDLE_WITH_XPU
else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::XPUPlace, src_place), src_ptr, size);
} else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
}
else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size);
} else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
}
else if (platform::is_xpu_place(src_place) && // NOLINT
platform::is_xpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::XPUPlace, src_place), src_ptr, size);
} else { // NOLINT
}
else { // NOLINT
PADDLE_THROW(platform::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
Expand Down Expand Up @@ -571,6 +604,11 @@ class AnyVisitor : public boost::static_visitor<bool> {
platform::errors::Unimplemented("Not supported on place (%s) ", npu));
// return GetResultHelper(out, npu);
}
bool GetResult(const framework::Tensor& out,
const platform::IPUPlace& ipu) const {
PADDLE_THROW(
platform::errors::Unimplemented("Not supported on place (%s) ", ipu));
}

bool GetResult(const framework::Tensor& out,
const platform::NPUPinnedPlace& cpu) const {
Expand Down Expand Up @@ -762,6 +800,9 @@ struct BothFalseVisitor : public boost::static_visitor<> {
void VisitorImpl(const platform::XPUPlace& xpu) const {
PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported"));
}
void VisitorImpl(const platform::IPUPlace& ipu) const {
PADDLE_THROW(platform::errors::Unimplemented("IPUPlace is not supported"));
}

void VisitorImpl(const platform::CUDAPlace& gpu) const {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/imperative/gradient_accumulator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ class TensorAddFunctor : public boost::static_visitor<> {
"is not supported in imperative mode",
place));
}
// there is NO support in IPUPlace
void operator()(const platform::IPUPlace& place) {
PADDLE_THROW(platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode",
place));
}

private:
int64_t numel_;
Expand Down
28 changes: 28 additions & 0 deletions paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,34 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
return GetCPUBuddyAllocator()->Used();
}

// For Graphcore IPU
template <>
void *Alloc<platform::IPUPlace>(const platform::IPUPlace &place, size_t size) {
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
VLOG(10) << "IPUPlace, Allocate on cpu.";

void *p = GetCPUBuddyAllocator()->Alloc(size);
if (FLAGS_init_allocated_mem) {
memset(p, 0xEF, size);
}
VLOG(10) << " pointer=" << p;
return p;
}
template <>
void Free<platform::IPUPlace>(const platform::IPUPlace &place, void *p,
size_t size) {
VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place);
GetCPUBuddyAllocator()->Free(p);
}
template <>
uint64_t Release<platform::IPUPlace>(const platform::IPUPlace &place) {
return GetCPUBuddyAllocator()->Release();
}
template <>
size_t Used<platform::IPUPlace>(const platform::IPUPlace &place) {
return GetCPUBuddyAllocator()->Used();
}

// For kunlun XPU
template <>
void *Alloc<platform::XPUPlace>(const platform::XPUPlace &place, size_t size) {
Expand Down
41 changes: 0 additions & 41 deletions paddle/fluid/operators/flip_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,6 @@ namespace operators {
using Tensor = framework::Tensor;
using CUDADeviceContext = paddle::platform::CUDADeviceContext;

template <typename T>
__global__ void kernel_pointwise_flip_apply(const int N, const T* in_data,
T* out_data, int dim0, int stride0,
int dim1, int flip_dim) {
for (int idx = blockIdx.x * blockDim.x + threadIdx.x; idx < N;
idx += gridDim.x * blockDim.x) {
int dst_offset = 0;
if (flip_dim == 0) {
// flip 1st dim
dst_offset = (dim0 - 1 - idx / stride0) * stride0 + idx % stride0;
} else {
// flip last dim
dst_offset = idx / stride0 * stride0 + (dim1 - 1 - idx % stride0);
}
out_data[dst_offset] = in_data[idx];
}
}

template <typename T>
__global__ void flip_cuda_kernel(const int N, const T* in_data, T* out_data,
int64_t* x_shape, int64_t* x_stride,
Expand Down Expand Up @@ -103,29 +85,6 @@ class FlipKernel<platform::CUDADeviceContext, T>
std::vector<int64_t> x_dims_v = framework::vectorize(x_dims);
std::vector<int64_t> x_stride_v = framework::vectorize(x_stride);

// wrap high-dims to 2-dims
if (flip_dims_size == 1 &&
(flip_dims[0] == 0 || flip_dims[0] == total_dims - 1)) {
int dim0 = 1, dim1 = 1;
int stride0 = 1;
if (flip_dims[0] == 0) {
dim0 = x_dims_v[0];
stride0 = x_stride_v[0];
for (size_t i = 1; i < total_dims; ++i) {
dim1 *= x_dims_v[i];
}
} else {
dim1 = x_dims_v[total_dims - 1];
for (size_t i = 0; i < total_dims - 1; ++i) {
dim0 *= x_dims_v[i];
}
stride0 *= x_dims_v[total_dims - 1];
}
kernel_pointwise_flip_apply<
T><<<dim_grid, dim_block, 0, ctx.cuda_device_context().stream()>>>(
N, in_data, out_data, dim0, stride0, dim1, flip_dims[0]);
}

int bytes = total_dims * sizeof(int64_t);
auto x_strides_array_tmp = memory::Alloc(dev_ctx, bytes);
int64_t* x_strides_array_gpu =
Expand Down
62 changes: 62 additions & 0 deletions paddle/fluid/operators/ipu_runtime_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/operators/ipu_runtime_op.h"

namespace paddle {
namespace operators {

class IpuRuntimeOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {}

protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::proto::VarType::Type(ctx.Attr<int>("dtype")),
ctx.device_context());
}
};

class IpuRuntimeOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("FeedList", "FeedList of Graph").AsDuplicable();
AddOutput("FetchList", "FetchList of Graph").AsDuplicable();
AddAttr<int>("dtype",
"(int, default 5 (FP32)) "
"Output data type")
.SetDefault(framework::proto::VarType::FP32);
AddComment(R"DOC(
Run graph by PopART runtime.
)DOC");
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OPERATOR(ipu_runtime, ops::IpuRuntimeOp, ops::IpuRuntimeOpMaker);

REGISTER_OP_IPU_KERNEL(ipu_runtime, ops::IpuRuntimeKernel<float>,
ops::IpuRuntimeKernel<double>,
ops::IpuRuntimeKernel<int>,
ops::IpuRuntimeKernel<int64_t>,
ops::IpuRuntimeKernel<bool>,
ops::IpuRuntimeKernel<int8_t>,
ops::IpuRuntimeKernel<paddle::platform::float16>);
69 changes: 69 additions & 0 deletions paddle/fluid/operators/ipu_runtime_op.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <memory>
#include <vector>

#include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/framework/ipu/ipu_backend.h"
#include "paddle/fluid/framework/tensor.h"
#endif

namespace paddle {
namespace operators {

template <typename T>
class IpuRuntimeKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
#ifdef PADDLE_WITH_IPU
auto ipu_backend = framework::ipu::IpuBackend::GetInstance();
if (!ipu_backend->DeviceIsAttached()) {
const platform::IPUDeviceContext& ipu_ctx =
reinterpret_cast<const platform::IPUDeviceContext&>(
ctx.device_context());
ipu_backend->AttachDevice(ipu_ctx.DeviceId());
}

auto inputs = ctx.MultiInput<framework::Tensor>("FeedList");
auto outputs = ctx.MultiOutput<framework::Tensor>("FetchList");
auto output_names = ctx.OutputNames("FetchList");
VLOG(4) << "IpuRuntime Kernel, begin to run graph";
ipu_backend->Run(inputs, outputs, ctx);

// post-run
// resize tensor when tensor.dims() is empty
for (size_t i = 0; i < outputs.size(); ++i) {
auto* out = outputs[i];
if (out->dims().size() == 0) {
auto tensor_dtype = out->type();
auto sizeof_dtype = framework::SizeOfType(tensor_dtype);
int64_t dim = out->memory_size() / sizeof_dtype;
out->Resize({dim});
VLOG(10) << "set ipu_runtime_op output: " << output_names[i]
<< " dims from () to: "
<< "(" << dim << ")";
}
}
#else
PADDLE_THROW(platform::errors::PreconditionNotMet(
"Please compile WITH_IPU option to enable ipu_runtime op"));
#endif
}
};

} // namespace operators
} // namespace paddle
Loading

1 comment on commit 51d466a

@paddle-bot-old
Copy link

@paddle-bot-old paddle-bot-old bot commented on 51d466a Dec 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🕵️ CI failures summary

🔍 PR: #37498 Commit ID: 51d466a contains failed CI.

🔹 Failed: PR-CI-Windows-Inference

Unknown Failed
2021-12-10 09:59:55   File "C:\Users\Administrator\Downloads\workspace\cd8c52a4-e8a7-4d1b-8e61-49986b5d8e71\Paddle\paddle\scripts\installation_validate.py", line 15, in 
2021-12-10 09:59:55 import paddle.fluid as fluid
2021-12-10 09:59:55 File "C:\Users\Administrator\AppData\Roaming\Python\Python37\site-packages\paddle_init_.py", line 25, in
2021-12-10 09:59:55 from .fluid import monkey_patch_variable
2021-12-10 09:59:55 File "C:\Users\Administrator\AppData\Roaming\Python\Python37\site-packages\paddle\fluid_init_.py", line 223, in
2021-12-10 09:59:55 monkey_patch_eagertensor()
2021-12-10 09:59:55 File "C:\Users\Administrator\AppData\Roaming\Python\Python37\site-packages\paddle\fluid\eager\eager_tensor_patch_methods.py", line 23, in monkey_patch_eagertensor
2021-12-10 09:59:55 setattr(core.eager.EagerTensor, "str", str)
2021-12-10 09:59:55 AttributeError: module 'paddle.fluid.core_avx' has no attribute 'eager'
2021-12-10 09:59:55 C:\Users\Administrator\Downloads\workspace\cd8c52a4-e8a7-4d1b-8e61-49986b5d8e71\Paddle\build>goto:eof
2021-12-10 09:59:55 C:\Users\Administrator\Downloads\workspace\cd8c52a4-e8a7-4d1b-8e61-49986b5d8e71\Paddle\build>echo Test import paddle failed, will exit!
2021-12-10 09:59:55 Test import paddle failed, will exit
2021-12-10 09:59:55 C:\Users\Administrator\Downloads\workspace\cd8c52a4-e8a7-4d1b-8e61-49986b5d8e71\Paddle\build>exit /b 1
2021-12-10 09:59:55 C:\Users\Administrator\Downloads\workspace\cd8c52a4-e8a7-4d1b-8e61-49986b5d8e71\Paddle>set EXCODE=1
2021-12-10 09:59:55 C:\Users\Administrator\Downloads\workspace\cd8c52a4-e8a7-4d1b-8e61-49986b5d8e71\Paddle>cd C:\Users\administrator\Downloads
2021-12-10 09:59:55 C:\Users\Administrator\Downloads>rmdir C:\Users\administrator\Downloads\workspace /s/q 2>NUL
2021-12-10 10:00:01 C:\Users\Administrator\Downloads>echo EXCODE: 1
2021-12-10 10:00:01 EXCODE: 1
2021-12-10 10:00:01 C:\Users\Administrator\Downloads>exit /b 1

Please sign in to comment.