diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 0a4b71a773..67b1b26e7f 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -38,7 +38,7 @@ ur_result_t adapterStateTeardown() { // Print the balance of various create/destroy native calls. // The idea is to verify if the number of create(+) and destroy(-) calls are // matched. - if (ZeCallCount && (UrL0Debug & UR_L0_DEBUG_CALL_COUNT) != 0) { + if (ZeCallCount && (UrL0LeaksDebug) != 0) { // clang-format off // // The format of this table is such that each row accounts for a @@ -79,8 +79,7 @@ ur_result_t adapterStateTeardown() { // // clang-format on - fprintf(stderr, "ZE_DEBUG=%d: check balance of create/destroy calls\n", - UR_L0_DEBUG_CALL_COUNT); + fprintf(stderr, "Check balance of create/destroy calls\n"); fprintf(stderr, "----------------------------------------------------------\n"); for (const auto &Row : CreateDestroySet) { diff --git a/source/adapters/level_zero/common.cpp b/source/adapters/level_zero/common.cpp index eb0f34307c..a249996b91 100644 --- a/source/adapters/level_zero/common.cpp +++ b/source/adapters/level_zero/common.cpp @@ -149,7 +149,7 @@ ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *ZeName, const char *ZeArgs, bool TraceError) { urPrint("ZE ---> %s%s\n", ZeName, ZeArgs); - if (UrL0Debug & UR_L0_DEBUG_CALL_COUNT) { + if (UrL0LeaksDebug) { ++(*ZeCallCount)[ZeName]; } diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index 7c2ac7f8be..5c363a5984 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -187,7 +187,6 @@ enum UrDebugLevel { UR_L0_DEBUG_NONE = 0x0, UR_L0_DEBUG_BASIC = 0x1, UR_L0_DEBUG_VALIDATION = 0x2, - UR_L0_DEBUG_CALL_COUNT = 0x4, UR_L0_DEBUG_ALL = -1 }; @@ -203,6 +202,13 @@ const int UrL0Debug = [] { return DebugMode; }(); +const int UrL0LeaksDebug = [] { + const char *UrRet = std::getenv("UR_L0_LEAKS_DEBUG"); + if (!UrRet) + return 0; + return std::atoi(UrRet); +}(); + // Controls Level Zero calls serialization to w/a Level Zero driver being not MT // ready. Recognized values (can be used as a bit mask): enum { diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index 308b6909eb..b7680b1638 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -30,7 +30,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( static std::once_flag ZeCallCountInitialized; try { std::call_once(ZeCallCountInitialized, []() { - if (UrL0Debug & UR_L0_DEBUG_CALL_COUNT) { + if (UrL0LeaksDebug) { ZeCallCount = new std::map; } }); diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 121a991cbd..a4506bed3c 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -11,58 +11,118 @@ #include "command_buffer.hpp" #include "common.hpp" -/// Stub implementations of UR experimental feature command-buffers - UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_device_handle_t hDevice, + ur_context_handle_t hContext, ur_device_handle_t hDevice, [[maybe_unused]] const ur_exp_command_buffer_desc_t *pCommandBufferDesc, - [[maybe_unused]] ur_exp_command_buffer_handle_t *phCommandBuffer) { + ur_exp_command_buffer_handle_t *phCommandBuffer) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_queue_handle_t Queue = nullptr; + UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue)); + + cl_context CLContext = cl_adapter::cast(hContext); + cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCreateCommandBufferKHRCache, + cl_ext::CreateCommandBufferName, &clCreateCommandBufferKHR); + + if (!clCreateCommandBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + auto CLCommandBuffer = clCreateCommandBufferKHR( + 1, cl_adapter::cast(&Queue), nullptr, &Res); + CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); + + try { + auto URCommandBuffer = std::make_unique( + Queue, hContext, CLCommandBuffer); + *phCommandBuffer = URCommandBuffer.release(); + } catch (...) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } + + CL_RETURN_ON_FAILURE(Res); + return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + UR_RETURN_ON_FAILURE(urQueueRetain(hCommandBuffer->hInternalQueue)); - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache, + cl_ext::RetainCommandBufferName, &clRetainCommandBuffer); + + if (!clRetainCommandBuffer || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(clRetainCommandBuffer(hCommandBuffer->CLCommandBuffer)); + return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + UR_RETURN_ON_FAILURE(urQueueRelease(hCommandBuffer->hInternalQueue)); - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache, + cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR); + + if (!clReleaseCommandBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE( + clReleaseCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); + return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer) { +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clFinalizeCommandBufferKHRCache, + cl_ext::FinalizeCommandBufferName, &clFinalizeCommandBufferKHR); - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + if (!clFinalizeCommandBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE( + clFinalizeCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] ur_kernel_handle_t hKernel, - [[maybe_unused]] uint32_t workDim, - [[maybe_unused]] const size_t *pGlobalWorkOffset, - [[maybe_unused]] const size_t *pGlobalWorkSize, - [[maybe_unused]] const size_t *pLocalWorkSize, - [[maybe_unused]] uint32_t numSyncPointsInWaitList, - [[maybe_unused]] const ur_exp_command_buffer_sync_point_t - *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, + uint32_t workDim, const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandNDRangeKernelKHRCache, + cl_ext::CommandNRRangeKernelName, &clCommandNDRangeKernelKHR); + + if (!clCommandNDRangeKernelKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, + cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, + pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( @@ -73,44 +133,76 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemcpyUSMExp( [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] ur_mem_handle_t hSrcMem, - [[maybe_unused]] ur_mem_handle_t hDstMem, [[maybe_unused]] size_t srcOffset, - [[maybe_unused]] size_t dstOffset, [[maybe_unused]] size_t size, + [[maybe_unused]] void *pMemory, [[maybe_unused]] const void *pPattern, + [[maybe_unused]] size_t patternSize, [[maybe_unused]] size_t size, [[maybe_unused]] uint32_t numSyncPointsInWaitList, [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache, + cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR); + + if (!clCommandCopyBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR( + hCommandBuffer->CLCommandBuffer, nullptr, + cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), + srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] ur_mem_handle_t hSrcMem, - [[maybe_unused]] ur_mem_handle_t hDstMem, - [[maybe_unused]] ur_rect_offset_t srcOrigin, - [[maybe_unused]] ur_rect_offset_t dstOrigin, - [[maybe_unused]] ur_rect_region_t region, - [[maybe_unused]] size_t srcRowPitch, [[maybe_unused]] size_t srcSlicePitch, - [[maybe_unused]] size_t dstRowPitch, [[maybe_unused]] size_t dstSlicePitch, - [[maybe_unused]] uint32_t numSyncPointsInWaitList, - [[maybe_unused]] const ur_exp_command_buffer_sync_point_t - *pSyncPointWaitList, - [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + size_t OpenCLOriginRect[3]{srcOrigin.x, srcOrigin.y, srcOrigin.z}; + size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z}; + size_t OpenCLRegion[3]{region.width, region.height, region.depth}; + + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache, + cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR); + + if (!clCommandCopyBufferRectKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR( + hCommandBuffer->CLCommandBuffer, nullptr, + cl_adapter::cast(hSrcMem), cl_adapter::cast(hDstMem), + OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, + pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; } UR_APIEXPORT @@ -122,9 +214,6 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteExp( [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -137,9 +226,6 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadExp( [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -158,9 +244,6 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferWriteRectExp( [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -179,20 +262,55 @@ ur_result_t UR_APICALL urCommandBufferAppendMembufferReadRectExp( [[maybe_unused]] const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, [[maybe_unused]] ur_exp_command_buffer_sync_point_t *pSyncPoint) { - - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, size_t offset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr; + cl_int Res = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache, + cl_ext::CommandFillBufferName, &clCommandFillBufferKHR); + + if (!clCommandFillBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + CL_RETURN_ON_FAILURE(clCommandFillBufferKHR( + hCommandBuffer->CLCommandBuffer, nullptr, + cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - [[maybe_unused]] ur_exp_command_buffer_handle_t hCommandBuffer, - [[maybe_unused]] ur_queue_handle_t hQueue, - [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_event_handle_t *phEvent) { + ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { - cl_adapter::die("Experimental Command-buffer feature is not " - "implemented for OpenCL adapter."); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr; + cl_int Res = + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache, + cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR); + + if (!clEnqueueCommandBufferKHR || Res != CL_SUCCESS) + return UR_RESULT_ERROR_INVALID_OPERATION; + + const uint32_t NumberOfQueues = 1; + + CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR( + NumberOfQueues, cl_adapter::cast(&hQueue), + hCommandBuffer->CLCommandBuffer, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/command_buffer.hpp b/source/adapters/opencl/command_buffer.hpp index 7ab145c53d..d80f29594b 100644 --- a/source/adapters/opencl/command_buffer.hpp +++ b/source/adapters/opencl/command_buffer.hpp @@ -8,8 +8,17 @@ // //===----------------------------------------------------------------------===// +#include #include -/// Stub implementation of command-buffers for OpenCL +struct ur_exp_command_buffer_handle_t_ { + ur_queue_handle_t hInternalQueue; + ur_context_handle_t hContext; + cl_command_buffer_khr CLCommandBuffer; -struct ur_exp_command_buffer_handle_t_ {}; + ur_exp_command_buffer_handle_t_(ur_queue_handle_t hQueue, + ur_context_handle_t hContext, + cl_command_buffer_khr CLCommandBuffer) + : hInternalQueue(hQueue), hContext(hContext), + CLCommandBuffer(CLCommandBuffer) {} +}; diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 77a51694dd..4fe8bed408 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -77,6 +77,10 @@ ur_result_t mapCLErrorToUR(cl_int Result) { return UR_RESULT_ERROR_PROGRAM_LINK_FAILURE; case CL_INVALID_ARG_INDEX: return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX; + case CL_INVALID_COMMAND_BUFFER_KHR: + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP; + case CL_INVALID_SYNC_POINT_WAIT_LIST_KHR: + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; default: return UR_RESULT_ERROR_UNKNOWN; } diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 95105b552d..767dc02fda 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -192,6 +192,16 @@ CONSTFIX char EnqueueReadGlobalVariableName[] = // Names of host pipe functions queried from OpenCL CONSTFIX char EnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; CONSTFIX char EnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; +// Names of command buffer functions queried from OpenCL +CONSTFIX char CreateCommandBufferName[] = "clCreateCommandBufferKHR"; +CONSTFIX char RetainCommandBufferName[] = "clRetainCommandBufferKHR"; +CONSTFIX char ReleaseCommandBufferName[] = "clReleaseCommandBufferKHR"; +CONSTFIX char FinalizeCommandBufferName[] = "clFinalizeCommandBufferKHR"; +CONSTFIX char CommandNRRangeKernelName[] = "clCommandNDRangeKernelKHR"; +CONSTFIX char CommandCopyBufferName[] = "clCommandCopyBufferKHR"; +CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; +CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; +CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; #undef CONSTFIX @@ -226,6 +236,58 @@ cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program, cl_uint num_events_in_waitlist, const cl_event *events_waitlist, cl_event *event); +using clCreateCommandBufferKHR_fn = CL_API_ENTRY cl_command_buffer_khr( + CL_API_CALL *)(cl_uint num_queues, const cl_command_queue *queues, + const cl_command_buffer_properties_khr *properties, + cl_int *errcode_ret); + +using clRetainCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clReleaseCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clFinalizeCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer); + +using clCommandNDRangeKernelKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + const cl_ndrange_kernel_command_properties_khr *properties, + cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, + const size_t *global_work_size, const size_t *local_work_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandCopyBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, + size_t size, cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandCopyBufferRectKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem src_buffer, cl_mem dst_buffer, const size_t *src_origin, + const size_t *dst_origin, const size_t *region, size_t src_row_pitch, + size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clCommandFillBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, + size_t size, cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + +using clEnqueueCommandBufferKHR_fn = CL_API_ENTRY +cl_int(CL_API_CALL *)(cl_uint num_queues, cl_command_queue *queues, + cl_command_buffer_khr command_buffer, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + template struct FuncPtrCache { std::map Map; std::mutex Mutex; @@ -255,6 +317,15 @@ struct ExtFuncPtrCacheT { FuncPtrCache clEnqueueWriteHostPipeINTELCache; FuncPtrCache clSetProgramSpecializationConstantCache; + FuncPtrCache clCreateCommandBufferKHRCache; + FuncPtrCache clRetainCommandBufferKHRCache; + FuncPtrCache clReleaseCommandBufferKHRCache; + FuncPtrCache clFinalizeCommandBufferKHRCache; + FuncPtrCache clCommandNDRangeKernelKHRCache; + FuncPtrCache clCommandCopyBufferKHRCache; + FuncPtrCache clCommandCopyBufferRectKHRCache; + FuncPtrCache clCommandFillBufferKHRCache; + FuncPtrCache clEnqueueCommandBufferKHRCache; }; // A raw pointer is used here since the lifetime of this map has to be tied to // piTeardown to avoid issues with static destruction order (a user application diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 710ebcfb88..27577eab39 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -886,7 +886,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_PROFILE: case UR_DEVICE_INFO_VERSION: case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: - case UR_DEVICE_INFO_EXTENSIONS: case UR_DEVICE_INFO_BUILT_IN_KERNELS: case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: @@ -908,6 +907,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } + case UR_DEVICE_INFO_EXTENSIONS: { + cl_device_id Dev = cl_adapter::cast(hDevice); + size_t ExtSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + + std::string ExtStr(ExtSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, + ExtStr.data(), nullptr)); + + std::string SupportedExtensions(ExtStr.c_str()); + if (ExtStr.find("cl_khr_command_buffer") != std::string::npos) { + SupportedExtensions += " ur_exp_command_buffer"; + } + return ReturnValue(SupportedExtensions.c_str()); + } /* TODO: Check regularly to see if support is enabled in OpenCL. Intel GPU * EU device-specific information extensions. Some of the queries are * enabled by cl_intel_device_attribute_query extension, but it's not yet in