Skip to content

Commit

Permalink
[Unify Tensors PR #3]Port framework::Tensor members & interfaces to p…
Browse files Browse the repository at this point in the history
…ten::DenseTensor, test=allcases (PaddlePaddle#38473)

* Added shared_ptr<Allocation> member & corresponding interfaces to Storage

* Removed original pten::Allocation from Storage and adjusted the interfaces accordingly

* Fixed issues with storage offset

* Used place to malloc allocation for TensorStorage

* [Unify Tensors PR #3]Ported framework::Tensor interfaces to pten::DenseTensor

* Fixed issues with place

* Added comments

* Moved mutable_data with stream argument to DenseTensor

* Added set_offset interface

* Fixed CI issues,test=allcases

* [Unify Tensors PR #4] Port LoDTensor interfaces to DenseTensor

* Reverted changes too pten_layout() interface

* Removed friend classes
  • Loading branch information
jim19930609 committed Jan 4, 2022
1 parent a7b13d3 commit dfdc996
Show file tree
Hide file tree
Showing 11 changed files with 721 additions and 25 deletions.
8 changes: 4 additions & 4 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor)
if(WITH_GPU)
if (WIN32)
windows_symbolic(tensor_util SRCS tensor_util.cu)
nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context)
nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context dense_tensor)
add_dependencies(tensor tensor_util)
else()
nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler)
nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor)
endif(WIN32)
elseif(WITH_ROCM)
hip_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler)
hip_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor)
else()
cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context profiler)
cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context profiler dense_tensor)
endif()

cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ class Tensor {
explicit Tensor(const proto::VarType::Type&);

/*! Return a pointer to mutable memory block. */
const void* data() const;

template <typename T>
T* data();

Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/framework/tensor_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ inline T* Tensor::data() {
offset_);
}

inline const void* Tensor::data() const {
check_memory_size();
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}

template <typename T>
inline T* Tensor::mutable_data(const DDim& dims, const platform::Place& place,
size_t requested_size) {
Expand Down
40 changes: 31 additions & 9 deletions paddle/fluid/framework/tensor_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,22 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/profiler.h"

#include "paddle/pten/core/dense_tensor.h"

#ifdef PADDLE_WITH_MKLDNN
#include "dnnl_debug.h" // NOLINT
#endif

namespace paddle {
namespace framework {

void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) {
template <typename TENSOR>
void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, TENSOR* dst) {
if (&src == dst) {
auto src_copy = src;
TensorCopy(src_copy, dst_place, ctx, dst);
TensorCopyImpl(src_copy, dst_place, ctx, dst);
return;
}

Expand All @@ -45,7 +49,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
dst->Resize(src.dims());
dst->set_layout(src.layout());
auto src_place = src.place();
auto src_ptr = src.data<void>();
auto src_ptr = src.data();
#ifdef PADDLE_WITH_MKLDNN
dst->set_format(src.format());
// oneDNN tensors due to padding may be of bigger size
Expand Down Expand Up @@ -389,16 +393,34 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
#endif
}

void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
template <typename TENSOR>
void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
TENSOR* dst) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext* dev_ctx;
if (platform::is_gpu_place(dst_place) || platform::is_npu_place(dst_place)) {
dev_ctx = pool.Get(dst_place);
} else {
dev_ctx = pool.Get(src.place());
}
TensorCopy(src, dst_place, *dev_ctx, dst);
TensorCopyImpl(src, dst_place, *dev_ctx, dst);
}

void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
TensorCopyImpl<Tensor>(src, dst_place, dst);
}
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
pten::DenseTensor* dst) {
TensorCopyImpl<pten::DenseTensor>(src, dst_place, dst);
}
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) {
TensorCopyImpl<Tensor>(src, dst_place, ctx, dst);
}
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, pten::DenseTensor* dst) {
TensorCopyImpl<pten::DenseTensor>(src, dst_place, ctx, dst);
}

void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Expand All @@ -418,7 +440,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
dst->set_format(src.format());
#endif
auto src_place = src.place();
auto src_ptr = src.data<void>();
auto src_ptr = src.data();
auto dst_ptr = dst->mutable_data(dst_place, src.type());

if (src_ptr == dst_ptr && src_place == dst_place) {
Expand Down Expand Up @@ -971,7 +993,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
{ // the 3rd field, tensor data
uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type());

auto* data_ptr = tensor.data<void>();
auto* data_ptr = tensor.data();
PADDLE_ENFORCE_LT(size, (std::numeric_limits<std::streamsize>::max)(),
platform::errors::ResourceExhausted(
"tensor size %d overflow when writing tensor", size));
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/framework/tensor_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ limitations under the License. */
#include "paddle/fluid/platform/device/mlu/device_context.h"
#endif

#include "paddle/pten/core/dense_tensor.h"

namespace paddle {
namespace framework {

Expand Down Expand Up @@ -75,6 +77,8 @@ class Tensor;

void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst);
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, pten::DenseTensor* dst);

// NOTE(zcd): If the src.place() and dst_place are two different GPU,
// the copy operation is carried out on the dst_place's stream. This is
Expand All @@ -85,6 +89,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
// not completed.
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst);
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
pten::DenseTensor* dst);

void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst);
Expand Down
7 changes: 6 additions & 1 deletion paddle/pten/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ endif()
cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils)
cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context)
cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce)

cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce mixed_vector)
cc_library(dense_tensor SRCS dense_tensor.cc DEPS convert_utils tensor_meta tensor_base)

cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_meta tensor_base)
# Will remove once we implemented MKLDNN_Tensor
if(WITH_MKLDNN)
add_dependencies(dense_tensor mkldnn)
endif()
Loading

0 comments on commit dfdc996

Please sign in to comment.