Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[refactor] Renamed allocate_memory_ndarray to allocate_memory_on_device #8240

Merged
merged 8 commits into from
Jul 11, 2023
4 changes: 2 additions & 2 deletions taichi/program/ndarray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ Ndarray::Ndarray(Program *prog,
"Ndarray index might be out of int32 boundary but int64 indexing is "
"not supported yet.");
}
ndarray_alloc_ = prog->allocate_memory_ndarray(nelement_ * element_size_,
prog->result_buffer);
ndarray_alloc_ = prog->allocate_memory_on_device(nelement_ * element_size_,
prog->result_buffer);
}

Ndarray::Ndarray(DeviceAllocation &devalloc,
Expand Down
6 changes: 3 additions & 3 deletions taichi/program/program.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,9 @@ class TI_DLL_EXPORT Program {
}

// TODO: do we still need result_buffer?
DeviceAllocation allocate_memory_ndarray(std::size_t alloc_size,
uint64 *result_buffer) {
return program_impl_->allocate_memory_ndarray(alloc_size, result_buffer);
DeviceAllocation allocate_memory_on_device(std::size_t alloc_size,
uint64 *result_buffer) {
return program_impl_->allocate_memory_on_device(alloc_size, result_buffer);
}
DeviceAllocation allocate_texture(const ImageParams &params) {
return program_impl_->allocate_texture(params);
Expand Down
4 changes: 2 additions & 2 deletions taichi/program/program_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ class ProgramImpl {
return kDeviceNullPtr;
}

virtual DeviceAllocation allocate_memory_ndarray(std::size_t alloc_size,
uint64 *result_buffer) {
virtual DeviceAllocation allocate_memory_on_device(std::size_t alloc_size,
uint64 *result_buffer) {
return kDeviceNullAllocation;
}

Expand Down
4 changes: 2 additions & 2 deletions taichi/runtime/amdgpu/kernel_launcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void KernelLauncher::launch_llvm_kernel(Handle handle,
if (on_amdgpu_device(data_ptr)) {
device_ptrs[data_ptr_idx] = data_ptr;
} else {
DeviceAllocation devalloc = executor->allocate_memory_ndarray(
DeviceAllocation devalloc = executor->allocate_memory_on_device(
arr_sz, (uint64 *)device_result_buffer);
device_ptrs[data_ptr_idx] =
executor->get_ndarray_alloc_info_ptr(devalloc);
Expand Down Expand Up @@ -128,7 +128,7 @@ void KernelLauncher::launch_llvm_kernel(Handle handle,
AMDGPUDriver::get_instance().memcpy_device_to_host(
itr->second.first, (void *)device_ptrs[idx],
ctx.array_runtime_sizes[arg_id]);
executor->deallocate_memory_ndarray(itr->second.second);
executor->deallocate_memory_on_device(itr->second.second);
}
}
}
Expand Down
9 changes: 5 additions & 4 deletions taichi/runtime/cuda/kernel_launcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ void KernelLauncher::launch_llvm_kernel(Handle handle,
device_ptrs[data_ptr_idx] = data_ptr;
device_ptrs[grad_ptr_idx] = grad_ptr;
} else {
DeviceAllocation devalloc = executor->allocate_memory_ndarray(
DeviceAllocation devalloc = executor->allocate_memory_on_device(
arr_sz, (uint64 *)device_result_buffer);
device_ptrs[data_ptr_idx] =
executor->get_ndarray_alloc_info_ptr(devalloc);
Expand All @@ -89,8 +89,9 @@ void KernelLauncher::launch_llvm_kernel(Handle handle,
CUDADriver::get_instance().memcpy_host_to_device(
(void *)device_ptrs[data_ptr_idx], data_ptr, arr_sz);
if (grad_ptr != nullptr) {
DeviceAllocation grad_devalloc = executor->allocate_memory_ndarray(
arr_sz, (uint64 *)device_result_buffer);
DeviceAllocation grad_devalloc =
executor->allocate_memory_on_device(
arr_sz, (uint64 *)device_result_buffer);
device_ptrs[grad_ptr_idx] =
executor->get_ndarray_alloc_info_ptr(grad_devalloc);
transfers[grad_ptr_idx] = {grad_ptr, grad_devalloc};
Expand Down Expand Up @@ -165,7 +166,7 @@ void KernelLauncher::launch_llvm_kernel(Handle handle,
CUDADriver::get_instance().memcpy_device_to_host(
itr->second.first, (void *)device_ptrs[idx],
ctx.array_runtime_sizes[arg_id]);
executor->deallocate_memory_ndarray(itr->second.second);
executor->deallocate_memory_on_device(itr->second.second);
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions taichi/runtime/llvm/llvm_runtime_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ LlvmDevice *LlvmRuntimeExecutor::llvm_device() {
return static_cast<LlvmDevice *>(device_.get());
}

DeviceAllocation LlvmRuntimeExecutor::allocate_memory_ndarray(
DeviceAllocation LlvmRuntimeExecutor::allocate_memory_on_device(
std::size_t alloc_size,
uint64 *result_buffer) {
auto devalloc = llvm_device()->allocate_memory_runtime(
Expand All @@ -490,7 +490,7 @@ DeviceAllocation LlvmRuntimeExecutor::allocate_memory_ndarray(
return devalloc;
}

void LlvmRuntimeExecutor::deallocate_memory_ndarray(DeviceAllocation handle) {
void LlvmRuntimeExecutor::deallocate_memory_on_device(DeviceAllocation handle) {
TI_ASSERT(allocated_runtime_memory_allocs_.find(handle.alloc_id) !=
allocated_runtime_memory_allocs_.end());
llvm_device()->dealloc_memory(handle);
Expand Down Expand Up @@ -562,7 +562,7 @@ void LlvmRuntimeExecutor::finalize() {
if (ptr == nullptr)
continue;

deallocate_memory_ndarray(iter.second);
deallocate_memory_on_device(iter.second);
}
allocated_runtime_memory_allocs_.clear();

Expand Down
8 changes: 4 additions & 4 deletions taichi/runtime/llvm/llvm_runtime_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ class LlvmRuntimeExecutor {
const LlvmOfflineCache::FieldCacheData &field_cache_data,
uint64 *result_buffer);

// Ndarray Allocation
DeviceAllocation allocate_memory_ndarray(std::size_t alloc_size,
uint64 *result_buffer);
// Ndarray and ArgPack Allocation
DeviceAllocation allocate_memory_on_device(std::size_t alloc_size,
uint64 *result_buffer);

void deallocate_memory_ndarray(DeviceAllocation handle);
void deallocate_memory_on_device(DeviceAllocation handle);

void check_runtime_error(uint64 *result_buffer);

Expand Down
4 changes: 2 additions & 2 deletions taichi/runtime/llvm/snode_tree_buffer_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ SNodeTreeBufferManager::SNodeTreeBufferManager(
Ptr SNodeTreeBufferManager::allocate(std::size_t size,
const int snode_tree_id,
uint64 *result_buffer) {
auto devalloc = runtime_exec_->allocate_memory_ndarray(size, result_buffer);
auto devalloc = runtime_exec_->allocate_memory_on_device(size, result_buffer);
snode_tree_id_to_device_alloc_[snode_tree_id] = devalloc;
return (Ptr)runtime_exec_->get_ndarray_alloc_info_ptr(devalloc);
}

void SNodeTreeBufferManager::destroy(SNodeTree *snode_tree) {
auto devalloc = snode_tree_id_to_device_alloc_[snode_tree->id()];
runtime_exec_->deallocate_memory_ndarray(devalloc);
runtime_exec_->deallocate_memory_on_device(devalloc);
snode_tree_id_to_device_alloc_.erase(snode_tree->id());
}

Expand Down
3 changes: 2 additions & 1 deletion taichi/runtime/program_impls/gfx/gfx_program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ std::unique_ptr<AotModuleBuilder> GfxProgramImpl::make_aot_module_builder(
}
}

DeviceAllocation GfxProgramImpl::allocate_memory_ndarray(
DeviceAllocation GfxProgramImpl::allocate_memory_on_device(
std::size_t alloc_size,
uint64 *result_buffer) {
DeviceAllocation alloc;
Expand All @@ -51,6 +51,7 @@ DeviceAllocation GfxProgramImpl::allocate_memory_ndarray(
TI_ASSERT(res == RhiResult::success);
return alloc;
}

DeviceAllocation GfxProgramImpl::allocate_texture(const ImageParams &params) {
return runtime_->create_image(params);
}
Expand Down
4 changes: 2 additions & 2 deletions taichi/runtime/program_impls/gfx/gfx_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ class GfxProgramImpl : public ProgramImpl {
snode_tree_mgr_->destroy_snode_tree(snode_tree);
}

DeviceAllocation allocate_memory_ndarray(std::size_t alloc_size,
uint64 *result_buffer) override;
DeviceAllocation allocate_memory_on_device(std::size_t alloc_size,
uint64 *result_buffer) override;

bool used_in_kernel(DeviceAllocationId id) override {
return runtime_->used_in_kernel(id);
Expand Down
6 changes: 3 additions & 3 deletions taichi/runtime/program_impls/llvm/llvm_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ class LlvmProgramImpl : public ProgramImpl {
return runtime_exec_->fill_ndarray(alloc, size, data);
}

DeviceAllocation allocate_memory_ndarray(std::size_t alloc_size,
uint64 *result_buffer) override {
return runtime_exec_->allocate_memory_ndarray(alloc_size, result_buffer);
DeviceAllocation allocate_memory_on_device(std::size_t alloc_size,
uint64 *result_buffer) override {
return runtime_exec_->allocate_memory_on_device(alloc_size, result_buffer);
}

Device *get_compute_device() override {
Expand Down
8 changes: 4 additions & 4 deletions tests/cpp/aot/llvm/graph_aot_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ TEST(LlvmCGraph, RunGraphCpu) {
constexpr int ArrLength = 100;
constexpr int kArrBytes_arr = ArrLength * 1 * sizeof(int32_t);
auto devalloc_arr_0 =
exec.allocate_memory_ndarray(kArrBytes_arr, result_buffer);
exec.allocate_memory_on_device(kArrBytes_arr, result_buffer);
auto devalloc_arr_1 =
exec.allocate_memory_ndarray(kArrBytes_arr, result_buffer);
exec.allocate_memory_on_device(kArrBytes_arr, result_buffer);

/* Test with Graph */
// Prepare & Run "init" Graph
Expand Down Expand Up @@ -115,10 +115,10 @@ TEST(LlvmCGraph, RunGraphCuda) {
constexpr int ArrLength = 100;
constexpr int kArrBytes_arr = ArrLength * 1 * sizeof(int32_t);
auto devalloc_arr_0 =
exec.allocate_memory_ndarray(kArrBytes_arr, result_buffer);
exec.allocate_memory_on_device(kArrBytes_arr, result_buffer);

auto devalloc_arr_1 =
exec.allocate_memory_ndarray(kArrBytes_arr, result_buffer);
exec.allocate_memory_on_device(kArrBytes_arr, result_buffer);

/* Test with Graph */
// Prepare & Run "init" Graph
Expand Down
5 changes: 3 additions & 2 deletions tests/cpp/aot/llvm/kernel_aot_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ TEST(LlvmAotTest, CpuKernel) {

constexpr int kArrLen = 32;
constexpr int kArrBytes = kArrLen * sizeof(int32_t);
auto arr_devalloc = exec.allocate_memory_ndarray(kArrBytes, result_buffer);
auto arr_devalloc = exec.allocate_memory_on_device(kArrBytes, result_buffer);
Ndarray arr = Ndarray(arr_devalloc, PrimitiveType::i32, {kArrLen});

LLVM::AotModuleParams aot_params;
Expand Down Expand Up @@ -80,7 +80,8 @@ TEST(LlvmAotTest, CudaKernel) {

constexpr int kArrLen = 32;
constexpr int kArrBytes = kArrLen * sizeof(int32_t);
auto arr_devalloc = exec.allocate_memory_ndarray(kArrBytes, result_buffer);
auto arr_devalloc =
exec.allocate_memory_on_device(kArrBytes, result_buffer);
Ndarray arr = Ndarray(arr_devalloc, PrimitiveType::i32, {kArrLen});

LLVM::AotModuleParams aot_params;
Expand Down
32 changes: 18 additions & 14 deletions tests/cpp/aot/llvm/mpm88_graph_aot_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,17 @@ TEST(LlvmCGraph, Mpm88Cpu) {

/* Prepare arguments */
constexpr int kArrBytes_x = NR_PARTICLES * 2 * sizeof(float);
auto devalloc_x = exec.allocate_memory_ndarray(kArrBytes_x, result_buffer);
auto devalloc_x = exec.allocate_memory_on_device(kArrBytes_x, result_buffer);
auto x = taichi::lang::Ndarray(devalloc_x, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2});

constexpr int kArrBytes_v = NR_PARTICLES * 2 * sizeof(float);
auto devalloc_v = exec.allocate_memory_ndarray(kArrBytes_v, result_buffer);
auto devalloc_v = exec.allocate_memory_on_device(kArrBytes_v, result_buffer);
auto v = taichi::lang::Ndarray(devalloc_v, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2});

constexpr int kArrBytes_J = NR_PARTICLES * sizeof(float);
auto devalloc_J = exec.allocate_memory_ndarray(kArrBytes_J, result_buffer);
auto devalloc_J = exec.allocate_memory_on_device(kArrBytes_J, result_buffer);
auto J = taichi::lang::Ndarray(devalloc_J, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES});

Expand All @@ -78,24 +78,24 @@ TEST(LlvmCGraph, Mpm88Cpu) {

constexpr int kArrBytes_grid_v = N_GRID * N_GRID * 2 * sizeof(float);
auto devalloc_grid_v =
exec.allocate_memory_ndarray(kArrBytes_grid_v, result_buffer);
exec.allocate_memory_on_device(kArrBytes_grid_v, result_buffer);
auto grid_v = taichi::lang::Ndarray(
devalloc_grid_v, taichi::lang::PrimitiveType::f32, {N_GRID, N_GRID}, {2});

constexpr int kArrBytes_grid_m = N_GRID * N_GRID * sizeof(float);
auto devalloc_grid_m =
exec.allocate_memory_ndarray(kArrBytes_grid_m, result_buffer);
exec.allocate_memory_on_device(kArrBytes_grid_m, result_buffer);
auto grid_m = taichi::lang::Ndarray(
devalloc_grid_m, taichi::lang::PrimitiveType::f32, {N_GRID, N_GRID});

constexpr int kArrBytes_pos = NR_PARTICLES * 3 * sizeof(float);
auto devalloc_pos =
exec.allocate_memory_ndarray(kArrBytes_pos, result_buffer);
exec.allocate_memory_on_device(kArrBytes_pos, result_buffer);
auto pos = taichi::lang::Ndarray(
devalloc_pos, taichi::lang::PrimitiveType::f32, {NR_PARTICLES}, {3});

constexpr int kArrBytes_C = NR_PARTICLES * sizeof(float) * 2 * 2;
auto devalloc_C = exec.allocate_memory_ndarray(kArrBytes_C, result_buffer);
auto devalloc_C = exec.allocate_memory_on_device(kArrBytes_C, result_buffer);
auto C = taichi::lang::Ndarray(devalloc_C, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2, 2});

Expand Down Expand Up @@ -136,17 +136,20 @@ TEST(LlvmCGraph, Mpm88Cuda) {

/* Prepare arguments */
constexpr int kArrBytes_x = NR_PARTICLES * 2 * sizeof(float);
auto devalloc_x = exec.allocate_memory_ndarray(kArrBytes_x, result_buffer);
auto devalloc_x =
exec.allocate_memory_on_device(kArrBytes_x, result_buffer);
auto x = taichi::lang::Ndarray(devalloc_x, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2});

constexpr int kArrBytes_v = NR_PARTICLES * 2 * sizeof(float);
auto devalloc_v = exec.allocate_memory_ndarray(kArrBytes_v, result_buffer);
auto devalloc_v =
exec.allocate_memory_on_device(kArrBytes_v, result_buffer);
auto v = taichi::lang::Ndarray(devalloc_v, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2});

constexpr int kArrBytes_J = NR_PARTICLES * sizeof(float);
auto devalloc_J = exec.allocate_memory_ndarray(kArrBytes_J, result_buffer);
auto devalloc_J =
exec.allocate_memory_on_device(kArrBytes_J, result_buffer);
auto J = taichi::lang::Ndarray(devalloc_J, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES});

Expand All @@ -163,25 +166,26 @@ TEST(LlvmCGraph, Mpm88Cuda) {

constexpr int kArrBytes_grid_v = N_GRID * N_GRID * 2 * sizeof(float);
auto devalloc_grid_v =
exec.allocate_memory_ndarray(kArrBytes_grid_v, result_buffer);
exec.allocate_memory_on_device(kArrBytes_grid_v, result_buffer);
auto grid_v =
taichi::lang::Ndarray(devalloc_grid_v, taichi::lang::PrimitiveType::f32,
{N_GRID, N_GRID}, {2});

constexpr int kArrBytes_grid_m = N_GRID * N_GRID * sizeof(float);
auto devalloc_grid_m =
exec.allocate_memory_ndarray(kArrBytes_grid_m, result_buffer);
exec.allocate_memory_on_device(kArrBytes_grid_m, result_buffer);
auto grid_m = taichi::lang::Ndarray(
devalloc_grid_m, taichi::lang::PrimitiveType::f32, {N_GRID, N_GRID});

constexpr int kArrBytes_pos = NR_PARTICLES * 3 * sizeof(float);
auto devalloc_pos =
exec.allocate_memory_ndarray(kArrBytes_pos, result_buffer);
exec.allocate_memory_on_device(kArrBytes_pos, result_buffer);
auto pos = taichi::lang::Ndarray(
devalloc_pos, taichi::lang::PrimitiveType::f32, {NR_PARTICLES}, {3});

constexpr int kArrBytes_C = NR_PARTICLES * sizeof(float) * 2 * 2;
auto devalloc_C = exec.allocate_memory_ndarray(kArrBytes_C, result_buffer);
auto devalloc_C =
exec.allocate_memory_on_device(kArrBytes_C, result_buffer);
auto C = taichi::lang::Ndarray(devalloc_C, taichi::lang::PrimitiveType::f32,
{NR_PARTICLES}, {2, 2});

Expand Down
Loading