From 99b8158d3b10a2424728dec1bc575dd2bf1e74b2 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Mon, 31 Jan 2022 05:00:05 +0900 Subject: [PATCH 1/4] Support OpenCL in Autoscheduler tuning --- apps/topi_recipe/gemm/cuda_gemm_square.py | 21 --------------------- src/auto_scheduler/search_task.cc | 23 +++++++++++++++++++++-- src/runtime/opencl/opencl_device_api.cc | 4 +++- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/apps/topi_recipe/gemm/cuda_gemm_square.py b/apps/topi_recipe/gemm/cuda_gemm_square.py index f9b10bd495c6..be55d158fcbc 100644 --- a/apps/topi_recipe/gemm/cuda_gemm_square.py +++ b/apps/topi_recipe/gemm/cuda_gemm_square.py @@ -27,27 +27,6 @@ USE_MANUAL_CODE = False -@tvm.register_func("tvm_callback_cuda_compile", override=True) -def tvm_callback_cuda_compile(code): - ptx = nvcc.compile_cuda(code, target_format="ptx") - return ptx - - -def write_code(code, fname): - with open(fname, "w") as f: - f.write(code) - - -@tvm.register_func -def tvm_callback_cuda_postproc(code): - if not os.path.exists("perf"): - os.mkdir("perf") - write_code(code, "perf/%s_generated.cu" % TASK) - if USE_MANUAL_CODE: - code = open("perf/%s_manual.cu" % TASK).read() - return code - - def test_gemm(): # graph nn = 2048 diff --git a/src/auto_scheduler/search_task.cc b/src/auto_scheduler/search_task.cc index 03d880e7769e..861a37abdac3 100755 --- a/src/auto_scheduler/search_task.cc +++ b/src/auto_scheduler/search_task.cc @@ -104,8 +104,27 @@ HardwareParams HardwareParamsNode::GetDefaultHardwareParams(const Target& target max_threads_per_block, max_vthread_extent, warp_size); } else { // add other opencl target - auto target_device = target->GetAttr("device", ""); - LOG(FATAL) << "No default hardware parameters for opencl target device: " << target_device; + auto dev = Device{static_cast(device_type), 0}; + auto device_name = "device_api.opencl"; + auto func = tvm::runtime::Registry::Get(device_name); + ICHECK(func != nullptr) << "Cannot find OpenCL device_api in registry"; + auto device_api = static_cast(((*func)()).operator void*()); + + tvm::runtime::TVMRetValue ret; + device_api->GetAttr(dev, tvm::runtime::DeviceAttrKind::kMaxSharedMemoryPerBlock, &ret); + int max_shared_memory_per_block = ret; + + int max_local_memory_per_block = INT32_MAX; + + device_api->GetAttr(dev, tvm::runtime::DeviceAttrKind::kMaxThreadsPerBlock, &ret); + int max_threads_per_block = ret; + + device_api->GetAttr(dev, tvm::runtime::DeviceAttrKind::kWarpSize, &ret); + int warp_size = ret; + + int max_vthread_extent = warp_size / 4; + return HardwareParams(-1, 16, 64, max_shared_memory_per_block, max_local_memory_per_block, + max_threads_per_block, max_vthread_extent, warp_size); } } else if (device_type == kDLVulkan) { auto dev = Device{static_cast(device_type), 0}; diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index f12a143ab0cc..5274d7713441 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -20,6 +20,7 @@ /*! * \file opencl_device_api.cc */ +#include #include #include @@ -122,7 +123,8 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) corresponding to the number of SIMD entries the heardware configures. We need to figure out a way to query this information from the hardware. */ - *rv = 1; + const int warp_size = dmlc::GetEnv("TVM_OPENCL_WARP_SIZE", 1); + *rv = warp_size; break; } case kMaxSharedMemoryPerBlock: { From 66621972fe53020eaebcd0ba2db0e7c46c618d7a Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Mon, 31 Jan 2022 05:32:10 +0900 Subject: [PATCH 2/4] add warning --- src/auto_scheduler/search_task.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/auto_scheduler/search_task.cc b/src/auto_scheduler/search_task.cc index 861a37abdac3..39e53efc1dc7 100755 --- a/src/auto_scheduler/search_task.cc +++ b/src/auto_scheduler/search_task.cc @@ -122,6 +122,10 @@ HardwareParams HardwareParamsNode::GetDefaultHardwareParams(const Target& target device_api->GetAttr(dev, tvm::runtime::DeviceAttrKind::kWarpSize, &ret); int warp_size = ret; + if (warp_size == 1) { + LOG(WARNING) << "Th warp size is 1, tuning might crash or stuck."; + } + int max_vthread_extent = warp_size / 4; return HardwareParams(-1, 16, 64, max_shared_memory_per_block, max_local_memory_per_block, max_threads_per_block, max_vthread_extent, warp_size); From 7a078d3b81885a367e533f4fea504d769a49e12a Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Tue, 1 Feb 2022 04:36:41 +0900 Subject: [PATCH 3/4] Update src/auto_scheduler/search_task.cc Co-authored-by: Cody Yu --- src/auto_scheduler/search_task.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_scheduler/search_task.cc b/src/auto_scheduler/search_task.cc index 39e53efc1dc7..a3ac2cdd7d09 100755 --- a/src/auto_scheduler/search_task.cc +++ b/src/auto_scheduler/search_task.cc @@ -123,7 +123,7 @@ HardwareParams HardwareParamsNode::GetDefaultHardwareParams(const Target& target int warp_size = ret; if (warp_size == 1) { - LOG(WARNING) << "Th warp size is 1, tuning might crash or stuck."; + LOG(WARNING) << "Warp size 1 is not recommended for OpenCL devices. Tuning might crash or stuck"; } int max_vthread_extent = warp_size / 4; From 493b1bba904d53e79c6c38aa060c8d793807b833 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Tue, 1 Feb 2022 04:41:52 +0900 Subject: [PATCH 4/4] fix lint --- src/auto_scheduler/search_task.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/auto_scheduler/search_task.cc b/src/auto_scheduler/search_task.cc index a3ac2cdd7d09..cc18de25ee9e 100755 --- a/src/auto_scheduler/search_task.cc +++ b/src/auto_scheduler/search_task.cc @@ -123,7 +123,8 @@ HardwareParams HardwareParamsNode::GetDefaultHardwareParams(const Target& target int warp_size = ret; if (warp_size == 1) { - LOG(WARNING) << "Warp size 1 is not recommended for OpenCL devices. Tuning might crash or stuck"; + LOG(WARNING) + << "Warp size 1 is not recommended for OpenCL devices. Tuning might crash or stuck"; } int max_vthread_extent = warp_size / 4;