Skip to content

Commit

Permalink
[PTEN] Add xpu context. (#39098)
Browse files Browse the repository at this point in the history
  • Loading branch information
jiweibo committed Jan 25, 2022
1 parent b2a7261 commit c1e5a39
Show file tree
Hide file tree
Showing 28 changed files with 958 additions and 448 deletions.
7 changes: 7 additions & 0 deletions paddle/fluid/framework/pten_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,12 @@ struct ConvertToPtenContext<platform::CPUDeviceContext> {
using TYPE = pten::CPUContext;
};

#ifdef PADDLE_WITH_XPU
template <>
struct ConvertToPtenContext<platform::XPUDeviceContext> {
using TYPE = pten::XPUContext;
};
#endif

} // namespace framework
} // namespace paddle
4 changes: 2 additions & 2 deletions paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,11 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
inverse_scale = 0.0;
}

paddle::platform::XPUVersion version = dev_ctx.xpu_version();
auto version = dev_ctx.xpu_version();
framework::Tensor float_x;
framework::Tensor float_out;
if (std::is_same<T, paddle::platform::float16>::value &&
(version == paddle::platform::XPUVersion::XPU1)) {
(version == pten::backends::xpu::XPUVersion::XPU1)) {
float_x.mutable_data<MPDType>(dev_ctx.GetPlace(),
x->numel() * sizeof(MPDType));
float_out.mutable_data<MPDType>(dev_ctx.GetPlace(),
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/dropout_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ class DropoutGradXPUKernel : public framework::OpKernel<T> {
return;
}

paddle::platform::XPUVersion version = dev_ctx.xpu_version();
if (version == paddle::platform::XPUVersion::XPU1) {
auto version = dev_ctx.xpu_version();
if (version == pten::backends::xpu::XPUVersion::XPU1) {
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
XPUType* mask_new = RAII_GUARD.alloc_l3_or_gm<XPUType>(mask->numel());
float scale =
Expand Down
10 changes: 7 additions & 3 deletions paddle/fluid/operators/reshape_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,8 @@ class ReshapeKernel {
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeKernel(dev_ctx, *pt_x.get(), pt_scalar_shape, pt_out);
pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx),
*pt_x.get(), pt_scalar_shape, pt_out);
}
#endif
// non-inplace need move all result from pt_out to out, inplace need set
Expand Down Expand Up @@ -485,7 +486,8 @@ class ReshapeGradKernel {
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeGradKernel(dev_ctx, *pt_d_out.get(), pt_d_x.get());
pten::ReshapeGradKernel(static_cast<const pten::XPUContext &>(dev_ctx),
*pt_d_out.get(), pt_d_x.get());
}
#endif
}
Expand Down Expand Up @@ -516,7 +518,9 @@ class ReshapeDoubleGradKernel {
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeDoubleGradKernel(dev_ctx, *pt_dd_x.get(), pt_dd_out.get());
pten::ReshapeDoubleGradKernel(
static_cast<const pten::XPUContext &>(dev_ctx), *pt_dd_x.get(),
pt_dd_out.get());
}
#endif
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/softmax_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ class SoftmaxXPUKernel : public framework::OpKernel<T> {
auto& dev_ctx = context.template device_context<DeviceContext>();

int r = XPU_SUCCESS;
paddle::platform::XPUVersion version = dev_ctx.xpu_version();
if (version == paddle::platform::XPUVersion::XPU1) {
auto version = dev_ctx.xpu_version();
if (version == pten::backends::xpu::XPUVersion::XPU1) {
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
XPUType* clip_x_data_l3 = RAII_GUARD.alloc_l3_or_gm<XPUType>(x->numel());
r = xpu::clip_v2(dev_ctx.x_context(),
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/platform/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost)
cc_library(device_context SRCS device_context.cc init.cc DEPS simple_threadpool malloc xxhash ${STREAM_CALLBACK_DEPS}
place pten_place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}
${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS} ${MLU_CTX_DEPS} cpu_context)
if(WITH_XPU)
target_link_libraries(device_context xpu_context)
endif()

cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce)
if(WITH_ASCEND_CL)
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/platform/device/xpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ endif()

set(XPU_CTX_DEPS xpulib ssl crypto rt z resolv dl)

cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place)
cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place pten_xpu_info)
cc_library(xpu_op_list SRCS xpu_op_list.cc DEPS gflags glog enforce xpulib device_context)

add_subdirectory(tests)
157 changes: 8 additions & 149 deletions paddle/fluid/platform/device/xpu/enforce_xpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,177 +15,36 @@ limitations under the License. */
#pragma once

#include "paddle/fluid/platform/device/xpu/xpu_header.h"
#include "paddle/fluid/platform/enforce.h"
#include "xpu/bkcl.h"

#include "paddle/pten/backends/xpu/enforce_xpu.h"

namespace paddle {
namespace platform {

// Note: XPU runtime api return int, not XPUError_t
inline const char* xpuGetErrorString(int stat) {
switch (stat) {
case XPU_SUCCESS:
return "Success";
case XPUERR_INVALID_DEVICE:
return "Invalid XPU device";
case XPUERR_UNINIT:
return "XPU runtime not properly inited";
case XPUERR_NOMEM:
return "Device memory not enough";
case XPUERR_NOCPUMEM:
return "CPU memory not enough";
case XPUERR_INVALID_PARAM:
return "Invalid parameter";
case XPUERR_NOXPUFUNC:
return "Cannot get XPU Func";
case XPUERR_LDSO:
return "Error loading dynamic library";
case XPUERR_LDSYM:
return "Error loading func from dynamic library";
case XPUERR_SIMULATOR:
return "Error from XPU Simulator";
case XPUERR_NOSUPPORT:
return "Operation not supported";
case XPUERR_ABNORMAL:
return "Device abnormal due to previous error";
case XPUERR_KEXCEPTION:
return "Exception in kernel execution";
case XPUERR_TIMEOUT:
return "Kernel execution timed out";
case XPUERR_BUSY:
return "Resource busy";
case XPUERR_USEAFCLOSE:
return "Use a stream after closed";
case XPUERR_UCECC:
return "Uncorrectable ECC";
case XPUERR_OVERHEAT:
return "Overheat";
case XPUERR_UNEXPECT:
return "Execution error, reach unexpected control flow";
case XPUERR_DEVRESET:
return "Device is being reset, try again later";
case XPUERR_HWEXCEPTION:
return "Hardware module exception";
case XPUERR_HBM_INIT:
return "Error init HBM";
case XPUERR_DEVINIT:
return "Error init device";
case XPUERR_PEERRESET:
return "Device is being reset, try again later";
case XPUERR_MAXDEV:
return "Device count exceed limit";
case XPUERR_NOIOC:
return "Unknown IOCTL command";
case XPUERR_DMATIMEOUT:
return "DMA timed out, a reboot maybe needed";
case XPUERR_DMAABORT:
return "DMA aborted due to error, possibly wrong address or hardware "
"state";
case XPUERR_MCUUNINIT:
return "Firmware not initialized";
case XPUERR_OLDFW:
return "Firmware version too old (<15), please update.";
case XPUERR_PCIE:
return "Error in PCIE";
case XPUERR_FAULT:
return "Error copy between kernel and user space";
case XPUERR_INTERRUPTED:
return "Execution interrupted by user";
default:
return "unkonwn error";
}
return pten::backends::xpu::xpuGetErrorString(stat);
}

inline const char* bkclGetErrorString(BKCLResult_t stat) {
switch (stat) {
case BKCL_SUCCESS:
return "BKCL_SUCCESS";
case BKCL_INVALID_ARGUMENT:
return "BKCL_INVALID_ARGUMENT";
case BKCL_RUNTIME_ERROR:
return "BKCL_RUNTIME_ERROR";
case BKCL_SYSTEM_ERROR:
return "BKCL_SYSTEM_ERROR";
case BKCL_INTERNAL_ERROR:
return "BKCL_INTERNAL_ERROR";
default:
return "Unknown BKCL status";
}
return pten::backends::xpu::bkclGetErrorString(stat);
}

inline const char* xdnnGetErrorString(int stat) {
switch (stat) {
case xpu::Error_t::SUCCESS:
return "XDNN_SUCCESS";
case xpu::Error_t::INVALID_PARAM:
return "XDNN_INVALID_PARAM";
case xpu::Error_t::RUNTIME_ERROR:
return "XDNN_RUNTIME_ERROR";
case xpu::Error_t::NO_ENOUGH_WORKSPACE:
return "XDNN_NO_ENOUGH_WORKSPACE";
case xpu::Error_t::NOT_IMPLEMENT:
return "XDNN_NOT_IMPLEMENT";
default:
return "Unknown XDNN status";
}
return pten::backends::xpu::xdnnGetErrorString(stat);
}

inline std::string build_xpu_error_msg(int stat) {
std::string msg("XPU Error <" + std::to_string(stat) + ">, ");
return msg + xpuGetErrorString(stat) + " ";
return pten::backends::xpu::build_xpu_error_msg(stat);
}

inline std::string build_xpu_error_msg(BKCLResult_t stat) {
std::string msg("BKCL Error, ");
return msg + bkclGetErrorString(stat) + " ";
return pten::backends::xpu::build_xpu_error_msg(stat);
}

inline std::string build_xpu_xdnn_error_msg(int stat, std::string msg) {
return msg + " XDNN Error, " + xdnnGetErrorString(stat) + " ";
return pten::backends::xpu::build_xpu_xdnn_error_msg(stat, msg);
}

namespace details {

template <typename T>
struct ExternalApiType {};

#define DEFINE_EXTERNAL_API_TYPE(type, success_value) \
template <> \
struct ExternalApiType<type> { \
using Type = type; \
static constexpr Type kSuccess = success_value; \
}

DEFINE_EXTERNAL_API_TYPE(int, XPU_SUCCESS);
DEFINE_EXTERNAL_API_TYPE(BKCLResult_t, BKCL_SUCCESS);

#undef DEFINE_EXTERNAL_API_TYPE

} // namespace details

#define PADDLE_ENFORCE_XPU_SUCCESS(COND) \
do { \
auto __cond__ = (COND); \
using __XPU_STATUS_TYPE__ = decltype(__cond__); \
constexpr auto __success_type__ = \
::paddle::platform::details::ExternalApiType< \
__XPU_STATUS_TYPE__>::kSuccess; \
if (UNLIKELY(__cond__ != __success_type__)) { \
auto __summary__ = paddle::platform::errors::External( \
::paddle::platform::build_xpu_error_msg(__cond__)); \
__THROW_ERROR_INTERNAL__(__summary__); \
} \
} while (0)

#define PADDLE_ENFORCE_XDNN_SUCCESS(COND, MSG) \
do { \
auto __cond__ = (COND); \
if (UNLIKELY(__cond__ != xpu::Error_t::SUCCESS)) { \
auto __summary__ = paddle::platform::errors::External( \
::paddle::platform::build_xpu_xdnn_error_msg(__cond__, MSG)); \
__THROW_ERROR_INTERNAL__(__summary__); \
} \
} while (0)

} // namespace platform
} // namespace paddle
39 changes: 1 addition & 38 deletions paddle/fluid/platform/device/xpu/xpu_header.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,42 +15,5 @@ limitations under the License. */
#pragma once

#ifdef PADDLE_WITH_XPU
#include <map>
#include <string>
#include <unordered_map>

#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"

#include "xpu/runtime.h"
#include "xpu/runtime_ex.h"
#include "xpu/xdnn.h"

namespace xpu = baidu::xpu::api;

static std::map<int, std::string> XPUAPIErrorMsg = {
{xpu::Error_t::SUCCESS, "xpu api success"},
{xpu::Error_t::INVALID_PARAM, "xpu api invalid param"},
{xpu::Error_t::RUNTIME_ERROR, "xpu api runtime error"},
{xpu::Error_t::NO_ENOUGH_WORKSPACE, "xpu api no enough workspace"}};

template <typename T>
class XPUTypeTrait {
public:
using Type = T;
};

template <>
class XPUTypeTrait<paddle::platform::float16> {
public:
using Type = float16;
};

template <>
class XPUTypeTrait<paddle::platform::bfloat16> {
public:
using Type = bfloat16;
};

#include "paddle/pten/backends/xpu/xpu_header.h"
#endif
Loading

0 comments on commit c1e5a39

Please sign in to comment.