Skip to content

Commit

Permalink
Add GetBasePtr interface in paddle::memory (#39145)
Browse files Browse the repository at this point in the history
  • Loading branch information
From00 committed Jan 25, 2022
1 parent 529f142 commit b2a7261
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 27 deletions.
7 changes: 7 additions & 0 deletions paddle/fluid/memory/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ if (WITH_ROCM)
DEPS device_context malloc)
endif()

if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
nv_test(get_base_ptr_test SRCS get_base_ptr_test.cu DEPS malloc gpu_info)
set_tests_properties(get_base_ptr_test PROPERTIES
ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
FLAGS_use_stream_safe_cuda_allocator=true;")
endif()

#if (WITH_GPU)
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
#endif()
7 changes: 0 additions & 7 deletions paddle/fluid/memory/allocation/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,3 @@ if(NOT WIN32)
cc_library(mmap_allocator SRCS mmap_allocator.cc DEPS allocator)
cc_test(mmap_allocator_test SRCS mmap_allocator_test.cc DEPS mmap_allocator allocator)
endif(NOT WIN32)

if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
nv_test(base_ptr_test SRCS base_ptr_test.cu DEPS malloc gpu_info)
set_tests_properties(base_ptr_test PROPERTIES
ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
FLAGS_use_stream_safe_cuda_allocator=true;")
endif()
9 changes: 1 addition & 8 deletions paddle/fluid/memory/allocation/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,7 @@ class Allocation : public pten::Allocation {
const platform::Place& place)
: pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {}

void* base_ptr() const {
PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth",
paddle::platform::errors::Unimplemented(
"base_ptr() is only implemented for auto_growth "
"strategy, not support %s strategy",
FLAGS_allocator_strategy));
return base_ptr_;
}
void* base_ptr() const { return base_ptr_; }

private:
inline void RegisterDecoratedAllocator(Allocator* allocator) {
Expand Down
19 changes: 19 additions & 0 deletions paddle/fluid/memory/allocation/allocator_facade.cc
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ class AllocatorFacadePrivate {
return iter->second;
}

void* GetBasePtr(const std::shared_ptr<pten::Allocation>& allocation) {
return static_cast<Allocation*>(allocation.get())->base_ptr();
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
bool HasCUDAAllocator(const platform::CUDAPlace& place,
const gpuStream_t& stream) {
Expand Down Expand Up @@ -821,6 +825,21 @@ const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator(
return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1);
}

void* AllocatorFacade::GetBasePtr(
const std::shared_ptr<pten::Allocation>& allocation) {
PADDLE_ENFORCE_EQ(GetAllocatorStrategy(), AllocatorStrategy::kAutoGrowth,
paddle::platform::errors::Unimplemented(
"GetBasePtr() is only implemented for auto_growth "
"strategy, not support allocator strategy: %d",
static_cast<int>(GetAllocatorStrategy())));
PADDLE_ENFORCE_EQ(platform::is_gpu_place(allocation->place()), true,
paddle::platform::errors::Unimplemented(
"GetBasePtr() is only implemented for CUDAPlace(), not "
"suppot place: %s",
allocation->place()));
return m_->GetBasePtr(allocation);
}

std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size) {
return std::shared_ptr<pten::Allocation>(Alloc(place, size));
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/memory/allocation/allocator_facade.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class AllocatorFacade {

const std::shared_ptr<Allocator>& GetAllocator(const platform::Place& place);

void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);

// Allocate a shared allocation.
std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
size_t size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
void OneByOneAllocTest() {
for (size_t i = 0; i < alloc_times_; ++i) {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
auto allocation = AllocShared(place_, size);

void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
Expand All @@ -47,21 +47,21 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
}

void BatchByBatchAllocTest() {
std::vector<AllocationPtr> allocations;
std::vector<std::shared_ptr<pten::Allocation>> allocations;
allocations.reserve(batch_size_);
size_t batch_num = alloc_times_ / batch_size_;

for (size_t i = 0; i < batch_num; ++i) {
for (size_t j = 0; j < batch_size_; ++j) {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
auto allocation = AllocShared(place_, size);

void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);

allocations.emplace_back(std::move(allocation));
allocations.emplace_back(allocation);
}
allocations.clear();
}
Expand All @@ -70,28 +70,28 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
}

void ContinuousAllocTest() {
std::vector<AllocationPtr> allocations;
std::vector<std::shared_ptr<pten::Allocation>> allocations;
allocations.reserve(alloc_times_);

for (size_t i = 0; i < alloc_times_; ++i) {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
auto allocation = AllocShared(place_, size);

void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);

allocations.emplace_back(std::move(allocation));
allocations.emplace_back(allocation);
}

allocations.clear();
Release(place_);
}

void ZeroSizeAllocTest() {
AllocationPtr allocation = Alloc(place_, 0);
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
auto allocation = AllocShared(place_, 0);
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/memory/malloc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ bool InSameStream(const std::shared_ptr<Allocation>& allocation,
stream);
}

void* GetBasePtr(const std::shared_ptr<Allocation>& allocation) {
return allocation::AllocatorFacade::Instance().GetBasePtr(allocation);
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
const gpuStream_t& stream) {
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/memory/malloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ extern std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
extern bool InSameStream(const std::shared_ptr<Allocation>& allocation,
const platform::Stream& stream);

extern void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
extern AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
const gpuStream_t& stream);
Expand Down

0 comments on commit b2a7261

Please sign in to comment.