diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc index e422774bf9cf0..02eaa79fc9b28 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc @@ -83,7 +83,7 @@ void ScaleAPI(const egr::EagerTensor& x, float scale, float bias, SizeOf(dense_tensor->dtype()); auto dense_out = std::make_shared( pten::make_intrusive( - paddle::memory::Alloc(place, bytes_size), 0), + paddle::memory::Alloc(place, bytes_size)), std::move(tensor_meta)); // Handle Device Context const paddle::platform::Place& expected_kernel_place = diff --git a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc index e292844c8ee58..1fef0905b4cc5 100644 --- a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc +++ b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc @@ -41,7 +41,7 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype()); auto ret_dense = std::make_shared( pten::make_intrusive( - paddle::memory::Alloc(place, bytes_size), 0), + paddle::memory::Alloc(place, bytes_size)), std::move(ret_meta)); float* t_ptr = t_dense->mutable_data(); diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc index 32b28d8efd21b..4ec49bfa56676 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -42,7 +42,7 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype()); auto ret_dense = std::make_shared( pten::make_intrusive( - paddle::memory::Alloc(place, bytes_size), 0), + paddle::memory::Alloc(place, bytes_size)), std::move(ret_meta)); float* t_ptr = t_dense->mutable_data(); diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index 572e36be9b776..785973e041a0d 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, "Tensors.", vec_true_outs.size(), outs.size())); for (size_t j = 0; j < vec_true_outs.size(); ++j) { - experimental::MovesSharedStorage( + experimental::SharesStorage( std::dynamic_pointer_cast(outs.at(j).impl()) .get(), vec_true_outs.at(j)); } } else { auto* true_out = ctx.Output(out_name); - experimental::MovesSharedStorage( + experimental::SharesStorage( std::dynamic_pointer_cast(outs.at(i).impl()) .get(), true_out); diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index a146f57174412..4f54ce33c14f1 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -273,6 +273,7 @@ class Tensor { const std::shared_ptr& Holder() const { return holder_; } size_t offset() const { return offset_; } + void set_offset(size_t offset) { offset_ = offset; } std::shared_ptr MoveMemoryHolder() { return std::move(holder_); diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 856f4020cfcf6..f50ae53e3fea0 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -456,8 +456,7 @@ class ReshapeKernel { // non-inplace need move all result from pt_out to out, inplace need set // result dims. if (in != out) { - paddle::experimental::MovesSharedStorage(pt_out, - static_cast(out)); + paddle::experimental::SharesStorage(pt_out, static_cast(out)); } else { out->Resize(pt_out->dims()); } diff --git a/paddle/pten/api/include/tensor.h b/paddle/pten/api/include/tensor.h index 4a8a593561ad4..b22d2d65a439c 100644 --- a/paddle/pten/api/include/tensor.h +++ b/paddle/pten/api/include/tensor.h @@ -304,7 +304,7 @@ class PADDLE_API Tensor final { * The index number begins from begin_idx + 1. * @return Tensor */ - Tensor slice(const int64_t begin_idx, const int64_t end_idx) const; + Tensor slice(int64_t begin_idx, int64_t end_idx) const; /** * @brief Return the implemention of current Tensor. diff --git a/paddle/pten/api/lib/tensor.cc b/paddle/pten/api/lib/tensor.cc index 74451d00e5546..e5dd1ca5f870d 100644 --- a/paddle/pten/api/lib/tensor.cc +++ b/paddle/pten/api/lib/tensor.cc @@ -253,11 +253,11 @@ template PADDLE_API paddle::platform::float16 * Tensor::data(); // TODO(chenweihang): replace slice impl by API -Tensor Tensor::slice(const int64_t begin_idx, const int64_t end_idx) const { +Tensor Tensor::slice(int64_t begin_idx, int64_t end_idx) const { if (is_dense_tensor()) { return Tensor(std::make_shared( std::move(pten::CompatibleDenseTensorUtils::Slice( - std::dynamic_pointer_cast(impl_).get(), + *(std::dynamic_pointer_cast(impl_).get()), begin_idx, end_idx)))); } else { diff --git a/paddle/pten/api/lib/utils/storage.h b/paddle/pten/api/lib/utils/storage.h index 793ddfcee3a63..41b0f4744d12a 100644 --- a/paddle/pten/api/lib/utils/storage.h +++ b/paddle/pten/api/lib/utils/storage.h @@ -37,7 +37,6 @@ class ExternalStorage : public pten::Storage { void Clear() override { data_ = nullptr; size_ = 0; - offset_ = 0; } size_t size() const noexcept override { return size_; } @@ -57,13 +56,11 @@ class ExternalStorage : public pten::Storage { class SharedStorage : public pten::Storage { public: explicit SharedStorage( - const std::shared_ptr& allocation, - size_t offset) + const std::shared_ptr& allocation) : Storage(allocation) { CHECK(allocation); place_ = allocation->place(); size_ = allocation->size(); - offset_ = offset; } // In order to be compatible with the original Tensor design and execution @@ -84,7 +81,6 @@ class SharedStorage : public pten::Storage { void Clear() override { data_ = nullptr; size_ = 0; - offset_ = 0; } size_t size() const noexcept override { return size_; } @@ -96,12 +92,10 @@ class SharedStorage : public pten::Storage { } // Temporary method: For compatible with fluid Tensor and improve performance - void ResetAllocation(std::shared_ptr allocation, - size_t offset) { + void ResetAllocation(std::shared_ptr allocation) { data_ = allocation; size_ = allocation->size(); place_ = allocation->place(); - offset_ = offset; } // Temporary method: For compatible with fluid Tensor and improve performance diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index 4ce63c7b821b4..e02dd9e78d6ef 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -33,39 +33,35 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) { std::unique_ptr MakePtenDenseTensor( const paddle::framework::Tensor& src) { + VLOG(3) << "MakePtenDenseTensor based Tensor."; pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()), src.dims(), - pten::TransToPtenDataLayout(src.layout())}; - auto shared_storage = - pten::make_intrusive(src.Holder(), src.offset()); + pten::TransToPtenDataLayout(src.layout()), + src.offset()}; + auto shared_storage = pten::make_intrusive(src.Holder()); return std::make_unique(std::move(shared_storage), std::move(meta)); } std::unique_ptr MakePtenDenseTensor( const paddle::framework::LoDTensor& src) { - pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()), - src.dims(), - pten::TransToPtenDataLayout(src.layout())}; - SetLoD(&meta.lod, src.lod()); - auto shared_storage = - pten::make_intrusive(src.Holder(), src.offset()); - - return std::make_unique(std::move(shared_storage), - std::move(meta)); + auto out = + MakePtenDenseTensor(static_cast(src)); + SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod), + src.lod()); + return std::move(out); } std::unique_ptr MakePtenDenseTensor( - const paddle::framework::Tensor& tensor, - const pten::TensorArgDef& arg_def) { + const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) { pten::DenseTensorMeta meta{arg_def.dtype, - tensor.dims(), - pten::TransToPtenDataLayout(tensor.layout())}; + src.dims(), + pten::TransToPtenDataLayout(src.layout()), + src.offset()}; - if (tensor.IsInitialized() && - tensor.place() == pten::TransToFluidPlace(arg_def.backend)) { - auto shared_storage = - pten::make_intrusive(tensor.Holder(), tensor.offset()); + if (src.IsInitialized() && + src.place() == pten::TransToFluidPlace(arg_def.backend)) { + auto shared_storage = pten::make_intrusive(src.Holder()); return std::make_unique(std::move(shared_storage), std::move(meta)); } else { @@ -77,25 +73,13 @@ std::unique_ptr MakePtenDenseTensor( } std::unique_ptr MakePtenDenseTensor( - const paddle::framework::LoDTensor& tensor, + const paddle::framework::LoDTensor& src, const pten::TensorArgDef& arg_def) { - pten::DenseTensorMeta meta{arg_def.dtype, - tensor.dims(), - pten::TransToPtenDataLayout(tensor.layout()), - pten::TransToPtenLoD(tensor.lod())}; - - if (tensor.IsInitialized() && - tensor.place() == pten::TransToFluidPlace(arg_def.backend)) { - auto shared_storage = - pten::make_intrusive(tensor.Holder(), tensor.offset()); - return std::make_unique(std::move(shared_storage), - std::move(meta)); - } else { - return std::make_unique( - std::move(pten::make_intrusive( - pten::TransToFluidPlace(arg_def.backend))), - std::move(meta)); - } + auto out = MakePtenDenseTensor( + static_cast(src), arg_def); + SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod), + src.lod()); + return std::move(out); } pten::Scalar MakePtenScalar(const paddle::framework::LoDTensor& src) { @@ -328,23 +312,15 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { std::shared_ptr holder( new TensorStorage(std::move(storage))); dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->dtype())); + dst->set_offset(src->meta().offset); } void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) { - PADDLE_ENFORCE_NOT_NULL( - src, - platform::errors::InvalidArgument( - "The source DenseTensor is nullptr when move storage.")); - PADDLE_ENFORCE_NOT_NULL( - dst, - platform::errors::InvalidArgument( - "The destination LoDTensor is nullptr when move storage.")); - SetLoD(dst->mutable_lod(), src->lod()); MovesStorage(src, static_cast(dst)); + SetLoD(dst->mutable_lod(), src->lod()); } -void MovesSharedStorage(pten::DenseTensor* src, - paddle::framework::Tensor* dst) { +void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { PADDLE_ENFORCE_NOT_NULL( src, platform::errors::InvalidArgument( @@ -358,24 +334,22 @@ void MovesSharedStorage(pten::DenseTensor* src, pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(src)); dst->ResetHolderWithType(storage->GetAllocation(), pten::TransToProtoVarType(src->dtype())); + dst->set_offset(src->meta().offset); } -void MovesSharedStorage(pten::DenseTensor* src, - paddle::framework::LoDTensor* dst) { - MovesSharedStorage(src, static_cast(dst)); +void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) { + SharesStorage(src, static_cast(dst)); SetLoD(dst->mutable_lod(), src->lod()); } void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, - const pten::TensorArgDef& arg_def, pten::DenseTensor* dst) { + VLOG(3) << "ReMakePtenDenseTensor based Tensor."; auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); meta->dims = src.dims(); - // Since the type of DenseTensorMeta is const, const_cast must be used - const_cast(meta->dtype) = arg_def.dtype; - // Since the type of DenseTensorMeta is const, const_cast must be used - const_cast(meta->layout) = - pten::TransToPtenDataLayout(src.layout()); + meta->dtype = pten::TransToPtenDataType(src.type()); + meta->layout = pten::TransToPtenDataLayout(src.layout()); + meta->offset = src.offset(); auto* shared_storage = static_cast( pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst)); @@ -384,42 +358,30 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, platform::errors::NotFound( "Target DenseTensor's shared storage is nullptr.")); - if (src.IsInitialized()) { - shared_storage->ResetAllocation(src.Holder(), src.offset()); - } + PADDLE_ENFORCE_EQ(src.IsInitialized(), + true, + paddle::platform::errors::InvalidArgument( + "Source Tensor is not initialized.")); + shared_storage->ResetAllocation(src.Holder()); } void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src, pten::DenseTensor* dst) { auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); - meta->dims = src.dims(); - // Since the type of DenseTensorMeta is const, const_cast must be used - const_cast(meta->dtype) = pten::TransToPtenDataType(src.type()); - // Since the type of DenseTensorMeta is const, const_cast must be used - const_cast(meta->layout) = - pten::TransToPtenDataLayout(src.layout()); - - auto* shared_storage = static_cast( - pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst)); - PADDLE_ENFORCE_NOT_NULL( - shared_storage, - platform::errors::NotFound( - "Target DenseTensor's shared storage is nullptr.")); - - if (src.IsInitialized()) { - shared_storage->ResetAllocation(src.Holder(), src.offset()); - } + SetLoD(&meta->lod, src.lod()); + ReMakePtenDenseTensor(static_cast(src), + dst); } -void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, - pten::DenseTensor* dst) { +void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src, + const pten::TensorArgDef& arg_def, + pten::DenseTensor* dst) { + VLOG(3) << "ReMakePtenDenseTensor based Tensor and TensorArgDef."; auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); meta->dims = src.dims(); - // Since the type of DenseTensorMeta is const, const_cast must be used - const_cast(meta->dtype) = pten::TransToPtenDataType(src.type()); - // Since the type of DenseTensorMeta is const, const_cast must be used - const_cast(meta->layout) = - pten::TransToPtenDataLayout(src.layout()); + meta->dtype = arg_def.dtype; + meta->layout = pten::TransToPtenDataLayout(src.layout()); + meta->offset = src.offset(); auto* shared_storage = static_cast( pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst)); @@ -428,38 +390,24 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, platform::errors::NotFound( "Target DenseTensor's shared storage is nullptr.")); - if (src.IsInitialized()) { - shared_storage->ResetAllocation(src.Holder(), src.offset()); - } -} - -void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src, - const pten::TensorArgDef& arg_def, - pten::DenseTensor* dst) { - auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); - meta->dims = src.dims(); - // Since the type of DenseTensorMeta is const, const_cast must be used - const_cast(meta->dtype) = arg_def.dtype; - // Since the type of DenseTensorMeta is const, const_cast must be used - const_cast(meta->layout) = - pten::TransToPtenDataLayout(src.layout()); - SetLoD(&(meta->lod), src.lod()); - - auto* shared_storage = static_cast( - pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst)); - PADDLE_ENFORCE_NOT_NULL( - shared_storage, - platform::errors::NotFound( - "Target DenseTensor's shared storage is nullptr.")); if (src.IsInitialized() && src.place() == pten::TransToFluidPlace(arg_def.backend)) { - shared_storage->ResetAllocation(src.Holder(), src.offset()); + shared_storage->ResetAllocation(src.Holder()); } else { shared_storage->ResetAllocationPlace( pten::TransToFluidPlace(arg_def.backend)); } } +void ReMakePtenDenseTensorByArgDef(const paddle::framework::LoDTensor& src, + const pten::TensorArgDef& arg_def, + pten::DenseTensor* dst) { + auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); + SetLoD(&meta->lod, src.lod()); + ReMakePtenDenseTensorByArgDef( + static_cast(src), arg_def, dst); +} + void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, const pten::TensorArgDef& arg_def, pten::DenseTensor* dst) { @@ -475,9 +423,9 @@ void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, if (!platform::is_same_place(tensor.place(), expected_place)) { framework::LoDTensor tmp_tensor; framework::TensorCopySync(tensor, expected_place, &tmp_tensor); - ReMakePtenDenseTensor(tmp_tensor, arg_def, dst); + ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst); } else { - ReMakePtenDenseTensor(tensor, arg_def, dst); + ReMakePtenDenseTensorByArgDef(tensor, arg_def, dst); } } else if (variable.IsType()) { // TODO(chenweihang): now we don't deal with row and height @@ -492,9 +440,9 @@ void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, framework::Tensor tmp_tensor; TensorCopySync(tensor.value(), expected_place, &tmp_tensor); // TODO(chenweihang): adapt SelectedRows by xiaowei's design - ReMakePtenDenseTensor(tmp_tensor, arg_def, dst); + ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst); } else { - ReMakePtenDenseTensor(tensor.value(), arg_def, dst); + ReMakePtenDenseTensorByArgDef(tensor.value(), arg_def, dst); } } else { PADDLE_THROW(platform::errors::Unimplemented( @@ -510,12 +458,12 @@ void ReMakePtenDenseTensorFromVar(framework::Variable* variable, // KernelContext to original tensor if (variable->template IsType()) { auto* tensor = variable->template GetMutable(); - ReMakePtenDenseTensor(*tensor, arg_def, dst); + ReMakePtenDenseTensorByArgDef(*tensor, arg_def, dst); } else if (variable->template IsType()) { auto* tensor = variable->template GetMutable(); // TODO(chenweihang): adapt SelectedRows by xiaowei's design, // here the row and height will lost in output! - ReMakePtenDenseTensor(tensor->value(), arg_def, dst); + ReMakePtenDenseTensorByArgDef(tensor->value(), arg_def, dst); } else { PADDLE_THROW(platform::errors::Unimplemented( "Unsupported shared output `%s` type now when call pt kernel.", diff --git a/paddle/pten/api/lib/utils/tensor_utils.h b/paddle/pten/api/lib/utils/tensor_utils.h index 838a63e1a8d5f..06edb4a7516b0 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.h +++ b/paddle/pten/api/lib/utils/tensor_utils.h @@ -58,10 +58,9 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst); -void MovesSharedStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); +void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); -void MovesSharedStorage(pten::DenseTensor* src, - paddle::framework::LoDTensor* dst); +void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst); /** * In order to improve the compatibility state performance, some tricky tool @@ -72,20 +71,20 @@ void MovesSharedStorage(pten::DenseTensor* src, * the overhead caused by frequent construction and destruction of the * DenseTensor. */ -void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src, - pten::DenseTensor* dst); - -void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, - pten::DenseTensor* dst); - void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, - const pten::TensorArgDef& arg_def, pten::DenseTensor* dst); void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src, - const pten::TensorArgDef& arg_def, pten::DenseTensor* dst); +void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src, + const pten::TensorArgDef& arg_def, + pten::DenseTensor* dst); + +void ReMakePtenDenseTensorByArgDef(const paddle::framework::LoDTensor& src, + const pten::TensorArgDef& arg_def, + pten::DenseTensor* dst); + void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, const pten::TensorArgDef& arg_def, pten::DenseTensor* dst); diff --git a/paddle/pten/core/compat_utils.h b/paddle/pten/core/compat_utils.h index c61b96546ec63..0bd82080ddebc 100644 --- a/paddle/pten/core/compat_utils.h +++ b/paddle/pten/core/compat_utils.h @@ -48,16 +48,16 @@ class CompatibleDenseTensorUtils { } } - static DenseTensor Slice(DenseTensor* tensor, + static DenseTensor Slice(const DenseTensor& tensor, int64_t begin_idx, int64_t end_idx) { - size_t bytes = tensor->numel() * SizeOf(tensor->dtype()); - PADDLE_ENFORCE_GE(tensor->capacity(), + size_t bytes = tensor.numel() * SizeOf(tensor.dtype()); + PADDLE_ENFORCE_GE(tensor.capacity(), bytes, paddle::platform::errors::InvalidArgument( "The memory size %d should be enough to meet the " "volume required by metadata %d.", - tensor->capacity(), + tensor.capacity(), bytes)); PADDLE_ENFORCE_GE(begin_idx, 0, @@ -66,7 +66,7 @@ class CompatibleDenseTensorUtils { "But received the start index is d%.", begin_idx)); PADDLE_ENFORCE_LE(end_idx, - tensor->dims()[0], + tensor.dims()[0], paddle::platform::errors::OutOfRange( "The end row index is out of bound.")); PADDLE_ENFORCE_LT( @@ -77,13 +77,12 @@ class CompatibleDenseTensorUtils { "But received the start index = %d, the end index = %d.", begin_idx, end_idx)); - DenseTensor ret = - DenseTensor(copy_intrusive(tensor->storage_), tensor->meta_); - if (tensor->dims()[0] != 1) { + DenseTensor ret(tensor); + if (tensor.dims()[0] != 1) { ret.meta_.dims[0] = end_idx - begin_idx; - ret.meta_.offset = tensor->meta_.offset + - begin_idx * (tensor->numel() / tensor->dims()[0]) * - paddle::experimental::SizeOf(tensor->dtype()); + ret.meta_.offset = tensor.meta_.offset + + begin_idx * (tensor.numel() / tensor.dims()[0]) * + paddle::experimental::SizeOf(tensor.dtype()); } return ret; } diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index 4298906df0b06..6fe85610612ac 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -72,12 +72,14 @@ void* DenseTensor::mutable_data(size_t request_bytes) { bytes)); bytes = request_bytes; } - if (storage_->size() < bytes || storage_->size() == 0) { + if (storage_->size() < bytes + meta_.offset || storage_->size() == 0) { VLOG(10) << "mutbale data realloc, original size: " << storage_->size() << ", new size: " << bytes; storage_->Realloc(bytes); + meta_.offset = 0; } - return storage_->data(); + return reinterpret_cast(reinterpret_cast(storage_->data()) + + meta_.offset); } template @@ -116,7 +118,8 @@ const void* DenseTensor::data() const { storage_, paddle::platform::errors::PreconditionNotMet( "The storage must be valid when call the mutable data function.")); - return storage_->data(); + return reinterpret_cast(reinterpret_cast(storage_->data()) + + meta_.offset); } void DenseTensor::set_meta(DenseTensorMeta&& meta) { diff --git a/paddle/pten/core/storage.h b/paddle/pten/core/storage.h index bc652f52c1f3b..7d4b6a28be22d 100644 --- a/paddle/pten/core/storage.h +++ b/paddle/pten/core/storage.h @@ -47,7 +47,7 @@ class Storage : public intrusive_ref_counter { void* data() const { return data_ ? reinterpret_cast( - reinterpret_cast(data_->ptr()) + offset_) + reinterpret_cast(data_->ptr())) : nullptr; } @@ -71,7 +71,6 @@ class Storage : public intrusive_ref_counter { virtual void Realloc(size_t n) = 0; protected: - size_t offset_{0}; std::shared_ptr data_; }; @@ -89,7 +88,6 @@ class TensorStorage : public Storage { void Clear() override { data_ = nullptr; size_ = 0; - offset_ = 0; } void Realloc(size_t size) override; diff --git a/paddle/pten/core/tensor_meta.cc b/paddle/pten/core/tensor_meta.cc index 3343527e8cd41..844387bec5c58 100644 --- a/paddle/pten/core/tensor_meta.cc +++ b/paddle/pten/core/tensor_meta.cc @@ -21,14 +21,16 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims) DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims, - DataLayout layout) - : dims(dims), dtype(dtype), layout(layout) {} + DataLayout layout, + size_t offset) + : dims(dims), dtype(dtype), layout(layout), offset(offset) {} DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims, DataLayout layout, - const LoD& lod) - : dims(dims), dtype(dtype), layout(layout), lod(lod) {} + const LoD& lod, + size_t offset) + : dims(dims), dtype(dtype), layout(layout), lod(lod), offset(offset) {} bool DenseTensorMeta::valid() const noexcept { bool valid{true}; diff --git a/paddle/pten/core/tensor_meta.h b/paddle/pten/core/tensor_meta.h index 083ef2c5d39a5..2df6b48b674a7 100644 --- a/paddle/pten/core/tensor_meta.h +++ b/paddle/pten/core/tensor_meta.h @@ -41,18 +41,20 @@ struct DenseTensorMeta { DenseTensorMeta() = default; DenseTensorMeta(DataType dtype, const DDim& dims); - DenseTensorMeta(DataType dtype, const DDim& dims, DataLayout layout); DenseTensorMeta(DataType dtype, const DDim& dims, DataLayout layout, - const LoD& lod); + size_t offset = 0); + DenseTensorMeta(DataType dtype, + const DDim& dims, + DataLayout layout, + const LoD& lod, + size_t offset = 0); /// \brief Test whether the metadata is valid. Does not throw exceptions. /// \return Whether the metadata is valid. bool valid() const noexcept; - /// During the entire life cycle of a DenseTensor, the following attributes - /// marked with `const` are expected to remain unchanged. bool is_scalar{false}; DDim dims; DataType dtype{DataType::UNDEFINED}; diff --git a/python/paddle/fluid/tests/custom_op/custom_simple_slice_op.cc b/python/paddle/fluid/tests/custom_op/custom_simple_slice_op.cc new file mode 100644 index 0000000000000..783e0cd96fdd9 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/custom_simple_slice_op.cc @@ -0,0 +1,47 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "paddle/extension.h" + +#define CHECK_INPUT(x) \ + PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.") + +std::vector SimpleSliceFunction(const paddle::Tensor& x, + int64_t begin_index, + int64_t end_index) { + return {x.slice(begin_index, end_index)}; +} + +std::vector> SimpleSliceInferShape( + const std::vector& x_shape, + int64_t begin_index, + int64_t end_index) { + PD_CHECK(begin_index > 0, "The begin index is out of bound."); + PD_CHECK(end_index > 0, "The end index must is out of bound."); + PD_CHECK(begin_index < end_index, + "The begin index is greater than end index."); + auto out_shape = x_shape; + out_shape[0] = end_index - begin_index; + return {out_shape}; +} + +PD_BUILD_OP(custom_simple_slice) + .Inputs({"X"}) + .Outputs({"Out"}) + .Attrs({"begin_index: int64_t", "end_index: int64_t"}) + .SetKernelFn(PD_KERNEL(SimpleSliceFunction)) + .SetInferShapeFn(PD_INFER_SHAPE(SimpleSliceInferShape)); diff --git a/python/paddle/fluid/tests/custom_op/test_custom_simple_slice.py b/python/paddle/fluid/tests/custom_op/test_custom_simple_slice.py new file mode 100644 index 0000000000000..c60bac4060b64 --- /dev/null +++ b/python/paddle/fluid/tests/custom_op/test_custom_simple_slice.py @@ -0,0 +1,53 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtaina copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import unittest +import numpy as np + +import paddle +from paddle.utils.cpp_extension import load, get_build_directory +from paddle.utils.cpp_extension.extension_utils import run_cmd +from utils import paddle_includes, extra_cc_args, extra_nvcc_args + +# Because Windows don't use docker, the shared lib already exists in the +# cache dir, it will not be compiled again unless the shared lib is removed. +file = '{}\\custom_simple_slice\\custom_simple_slice.pyd'.format( + get_build_directory()) +if os.name == 'nt' and os.path.isfile(file): + cmd = 'del {}'.format(file) + run_cmd(cmd, True) + +custom_ops = load( + name='custom_simple_slice_jit', + sources=['custom_simple_slice_op.cc'], + extra_include_paths=paddle_includes, # add for Coverage CI + extra_cxx_cflags=extra_cc_args, # test for cc flags + extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags + verbose=True) + + +class TestCustomSimpleSliceJit(unittest.TestCase): + def test_slice_output(self): + np_x = np.random.random((5, 2)).astype("float32") + x = paddle.to_tensor(np_x) + custom_op_out = custom_ops.custom_simple_slice(x, 2, 3) + np_out = np_x[2:3] + self.assertTrue( + np.array_equal(custom_op_out, np_out), + "custom op: {},\n numpy: {}".format(np_out, custom_op_out.numpy())) + + +if __name__ == "__main__": + unittest.main()