diff --git a/.github/workflows/build-hw-reusable.yml b/.github/workflows/build-hw-reusable.yml index 88b0877c27..fa23c38248 100644 --- a/.github/workflows/build-hw-reusable.yml +++ b/.github/workflows/build-hw-reusable.yml @@ -18,6 +18,10 @@ on: required: false type: string default: OFF + static_adapter: + required: false + type: string + default: OFF permissions: contents: read @@ -36,7 +40,7 @@ jobs: strategy: matrix: adapter: [ - {name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}"}, + {name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}", static_adapter: "${{inputs.static_loader}}"}, ] build_type: [Debug, Release] compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}] @@ -49,6 +53,10 @@ jobs: build_type: Release - adapter: {static_Loader: ON} compiler: {c: clang, cxx: clang++} + - adapter: {static_adapter: ON} + build_type: Release + - adapter: {static_adapter: ON} + compiler: {c: clang, cxx: clang++} runs-on: ${{inputs.runner_name}} @@ -76,6 +84,7 @@ jobs: -DUR_BUILD_TESTS=ON -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON -DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}} + -DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}} -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ -DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib ${{ matrix.adapter.name == 'HIP' && '-DUR_CONFORMANCE_AMD_ARCH=gfx1030' || '' }} diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index cd5c91854c..f76326c1c4 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -155,6 +155,7 @@ jobs: adapter_name: L0 runner_name: L0 static_loader: ON + static_adapter: ON opencl: name: OpenCL @@ -216,7 +217,8 @@ jobs: os: ['windows-2019', 'windows-2022'] adapter: [ {name: None, var: ''}, {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'}, - {name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'} + {name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}, + {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} ] # TODO: building level zero loader on windows-2019 and clang-cl is currently broken @@ -225,16 +227,25 @@ jobs: adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'} - os: 'windows-2019' adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'} + - os: 'windows-2019' + adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'} compiler: {c: clang-cl, cxx: clang-cl} - adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'} compiler: {c: clang-cl, cxx: clang-cl} + - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} + compiler: {c: clang-cl, cxx: clang-cl} build_type: [Debug, Release] compiler: [{c: cl, cxx: cl}, {c: clang-cl, cxx: clang-cl}] include: - compiler: {c: clang-cl, cxx: clang-cl} toolset: "-T ClangCL" + - os: 'windows-2022' + adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} + build_type: 'Release' + compiler: {c: cl, cxx: cl} + runs-on: ${{matrix.os}} steps: diff --git a/CMakeLists.txt b/CMakeLists.txt index f54cbd1067..a908a22d80 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,7 @@ option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF) option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF) option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF) option(UR_BUILD_ADAPTER_L0_V2 "Build the (experimental) Level-Zero v2 adapter" OFF) +option(UR_STATIC_ADAPTER_L0 "Build the Level-Zero adapter as static and embed in the loader" OFF) option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF) option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF) option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF) diff --git a/include/ur_api.h b/include/ur_api.h index 082890e73d..a707d40a3f 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -1569,8 +1569,7 @@ typedef enum ur_device_info_t { ///< ::urDevicePartition UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS = 80, ///< [uint32_t] max number of sub groups UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81, ///< [::ur_bool_t] support sub group independent forward progress - UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 82, ///< [uint32_t[]] return an array of sub group sizes supported on Intel - ///< device + UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = 82, ///< [uint32_t[]] return an array of supported sub group sizes UR_DEVICE_INFO_USM_HOST_SUPPORT = 83, ///< [::ur_device_usm_access_capability_flags_t] support USM host memory ///< access UR_DEVICE_INFO_USM_DEVICE_SUPPORT = 84, ///< [::ur_device_usm_access_capability_flags_t] support USM device memory diff --git a/scripts/core/INTRO.rst b/scripts/core/INTRO.rst index 448e3569e2..898d4ce5f3 100644 --- a/scripts/core/INTRO.rst +++ b/scripts/core/INTRO.rst @@ -396,6 +396,14 @@ Specific environment variables can be set to control the behavior of unified run See the Layers_ section for details of the layers currently included in the runtime. +.. envvar:: UR_LOADER_PRELOAD_FILTER + + If set, the loader will read `ONEAPI_DEVICE_SELECTOR` before loading the UR Adapters to determine which backends should be loaded. + + .. note:: + + This environment variable is default enabled on Linux, but default disabled on Windows. + Service identifiers --------------------- diff --git a/scripts/core/device.yml b/scripts/core/device.yml index 23c0233ef7..c063466b22 100644 --- a/scripts/core/device.yml +++ b/scripts/core/device.yml @@ -365,7 +365,7 @@ etors: - name: SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS desc: "[$x_bool_t] support sub group independent forward progress" - name: SUB_GROUP_SIZES_INTEL - desc: "[uint32_t[]] return an array of sub group sizes supported on Intel device" + desc: "[uint32_t[]] return an array of supported sub group sizes" - name: USM_HOST_SUPPORT desc: "[$x_device_usm_access_capability_flags_t] support USM host memory access" - name: USM_DEVICE_SUPPORT diff --git a/scripts/generate_code.py b/scripts/generate_code.py index 0e58c7a43c..0c7476ab42 100644 --- a/scripts/generate_code.py +++ b/scripts/generate_code.py @@ -400,6 +400,32 @@ def generate_loader(path, section, namespace, tags, version, specs, meta): ) print("Generated %s lines of code.\n"%loc) +""" + generates c/c++ files from the specification documents +""" +def _mako_interface_loader_api(path, adapter, ext, namespace, tags, version, specs, meta): + dstpath = os.path.join(path, adapter) + os.makedirs(dstpath, exist_ok=True) + + template = f"ur_interface_loader.{ext}.mako" + fin = os.path.join(templates_dir, template) + + name = f"ur_interface_loader" + + filename = f"{name}.{ext}" + fout = os.path.join(dstpath, filename) + + print("Generating %s..."%fout) + return util.makoWrite( + fin, fout, + name=name, + adapter=adapter, + ver=version, + namespace=namespace, + tags=tags, + specs=specs, + meta=meta,) + """ Entry-point: generates adapter for unified_runtime @@ -416,6 +442,10 @@ def generate_adapters(path, section, namespace, tags, version, specs, meta): loc += _mako_linker_scripts( dstpath, "adapter", "def", namespace, tags, version, specs, meta ) + + loc += _mako_interface_loader_api(dstpath, "level_zero", "cpp", namespace, tags, version, specs, meta) + loc += _mako_interface_loader_api(dstpath, "level_zero", "hpp", namespace, tags, version, specs, meta) + print("Generated %s lines of code.\n"%loc) """ diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako index 44631cc360..9c797a0ec3 100644 --- a/scripts/templates/ldrddi.cpp.mako +++ b/scripts/templates/ldrddi.cpp.mako @@ -365,6 +365,10 @@ ${tbl['export']['name']}( // Load the device-platform DDI tables for( auto& platform : ur_loader::getContext()->platforms ) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) + continue; + if(platform.initStatus != ${X}_RESULT_SUCCESS) continue; auto getTable = reinterpret_cast<${tbl['pfn']}>( diff --git a/scripts/templates/queue_api.cpp.mako b/scripts/templates/queue_api.cpp.mako index f941c7ba03..fcfa89d258 100644 --- a/scripts/templates/queue_api.cpp.mako +++ b/scripts/templates/queue_api.cpp.mako @@ -24,8 +24,9 @@ from templates import helper as th ur_queue_handle_t_::~ur_queue_handle_t_() {} ## FUNCTION ################################################################### +namespace ${x}::level_zero { %for obj in th.get_queue_related_functions(specs, n, tags): -${X}_APIEXPORT ${x}_result_t ${X}_APICALL +${x}_result_t ${th.make_func_name(n, tags, obj)}( %for line in th.make_param_lines(n, tags, obj, format=["name", "type", "delim"]): ${line} @@ -35,3 +36,4 @@ ${th.make_func_name(n, tags, obj)}( return ${obj['params'][0]['name']}->${th.transform_queue_related_function_name(n, tags, obj, format=["name"])}; } %endfor +} \ No newline at end of file diff --git a/scripts/templates/ur_interface_loader.cpp.mako b/scripts/templates/ur_interface_loader.cpp.mako new file mode 100644 index 0000000000..3298b5bcae --- /dev/null +++ b/scripts/templates/ur_interface_loader.cpp.mako @@ -0,0 +1,88 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() + Adapter=adapter.upper() +%>//===--------- ${n}_interface_loader.cpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include <${n}_api.h> +#include <${n}_ddi.h> + +#include "ur_interface_loader.hpp" + +static ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + // Pre 1.0 we enforce loader and adapter must have same version. + // Post 1.0 only major version match should be required. + if (version != UR_API_VERSION_CURRENT) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + return UR_RESULT_SUCCESS; +} + +#ifdef UR_STATIC_ADAPTER_${Adapter} +namespace ${n}::${adapter} { +#elif defined(__cplusplus) +extern "C" { +#endif + +%for tbl in th.get_pfntables(specs, meta, n, tags): +${X}_APIEXPORT ${x}_result_t ${X}_APICALL ${tbl['export']['name']}( + %for line in th.make_param_lines(n, tags, tbl['export'], format=["type", "name", "delim"]): + ${line} + %endfor + ) +{ + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + %for obj in tbl['functions']: + pDdiTable->${th.append_ws(th.make_pfn_name(n, tags, obj), 43)} = ${n}::${adapter}::${th.make_func_name(n, tags, obj)}; + %endfor + + return result; +} + +%endfor + +#ifdef UR_STATIC_ADAPTER_${Adapter} +} // namespace ur::${adapter} +#elif defined(__cplusplus) +} // extern "C" +#endif + +#ifdef UR_STATIC_ADAPTER_${Adapter} +namespace ur::${adapter} { +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { + if (ddi == nullptr) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + ur_result_t result; + +%for tbl in th.get_pfntables(specs, meta, n, tags): + result = ${n}::${adapter}::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} ); + if (result != UR_RESULT_SUCCESS) + return result; +%endfor + + return result; +} +} +#endif diff --git a/scripts/templates/ur_interface_loader.hpp.mako b/scripts/templates/ur_interface_loader.hpp.mako new file mode 100644 index 0000000000..e2902f93c8 --- /dev/null +++ b/scripts/templates/ur_interface_loader.hpp.mako @@ -0,0 +1,38 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() + Adapter=adapter.upper() +%>//===--------- ${n}_interface_loader.hpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include <${n}_api.h> +#include <${n}_ddi.h> + +namespace ${n}::${adapter} { +%for s in specs: +%for obj in th.filter_items(s['objects'], 'type', 'function'): +%if not th.obj_traits.is_loader_only(obj): +${x}_result_t ${th.make_func_name(n, tags, obj)}( + %for line in th.make_param_lines(n, tags, obj, format=["type", "name", "delim"]): + ${line} + %endfor + ); +%endif +%endfor +%endfor +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); +#endif +} diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index b7a32e8cdb..653bf4d118 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -73,27 +73,33 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) ${level-zero-loader_SOURCE_DIR}/include CACHE PATH "Path to Level Zero Headers") endif() -add_library (LevelZeroLoader INTERFACE) +add_library(LevelZeroLoader INTERFACE) # The MSVC linker does not like / at the start of a path, so to work around this # we split it into a link library and a library path, where the path is allowed # to have leading /. get_filename_component(LEVEL_ZERO_LIBRARY_SRC "${LEVEL_ZERO_LIBRARY}" DIRECTORY) get_filename_component(LEVEL_ZERO_LIB_NAME "${LEVEL_ZERO_LIBRARY}" NAME) target_link_directories(LevelZeroLoader - INTERFACE "${LEVEL_ZERO_LIBRARY_SRC}" + INTERFACE "$" + "$" ) target_link_libraries(LevelZeroLoader INTERFACE "${LEVEL_ZERO_LIB_NAME}" ) -add_library (LevelZeroLoader-Headers INTERFACE) +add_library(LevelZeroLoader-Headers INTERFACE) target_include_directories(LevelZeroLoader-Headers - INTERFACE "${LEVEL_ZERO_INCLUDE_DIR}" + INTERFACE "$" + "$" ) if(UR_BUILD_ADAPTER_L0) - add_ur_adapter(ur_adapter_level_zero - SHARED + set(ADAPTER_LIB_TYPE SHARED) + if(UR_STATIC_ADAPTER_L0) + set(ADAPTER_LIB_TYPE STATIC) + endif() + + add_ur_adapter(ur_adapter_level_zero ${ADAPTER_LIB_TYPE} ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp @@ -109,7 +115,6 @@ if(UR_BUILD_ADAPTER_L0) ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.hpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp ${CMAKE_CURRENT_SOURCE_DIR}/program.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/queue_api.hpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.hpp @@ -127,7 +132,6 @@ if(UR_BUILD_ADAPTER_L0) ${CMAKE_CURRENT_SOURCE_DIR}/physical_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/queue_api.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp @@ -135,6 +139,20 @@ if(UR_BUILD_ADAPTER_L0) ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp ) + if(UR_STATIC_ADAPTER_L0) + target_compile_definitions(ur_adapter_level_zero PUBLIC UR_STATIC_ADAPTER_LEVEL_ZERO) + + # 'utils' target from 'level-zero-loader' includes path which is prefixed + # in the source directory, this breaks the installation of 'utils' target. + set_target_properties(utils PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "") + install(TARGETS ur_adapter_level_zero ur_umf LevelZeroLoader LevelZeroLoader-Headers ze_loader utils + EXPORT ${PROJECT_NAME}-targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + endif() + if(NOT WIN32) target_sources(ur_adapter_level_zero PRIVATE @@ -154,7 +172,7 @@ if(UR_BUILD_ADAPTER_L0) if (WIN32) # 0x800: Search for the DLL only in the System32 folder - target_link_options(ur_adapter_level_zero PUBLIC /DEPENDENTLOADFLAG:0x800) + target_link_options(ur_adapter_level_zero PRIVATE /DEPENDENTLOADFLAG:0x800) endif() target_link_libraries(ur_adapter_level_zero PRIVATE diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index ed52254ec3..eaabb70a29 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -289,7 +289,8 @@ ur_result_t adapterStateTeardown() { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( +namespace ur::level_zero { +ur_result_t urAdapterGet( uint32_t NumEntries, ///< [in] the number of platforms to be added to ///< phAdapters. If phAdapters is not NULL, then ///< NumEntries should be greater than zero, otherwise @@ -330,7 +331,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { +ur_result_t urAdapterRelease(ur_adapter_handle_t) { // Check first if the Adapter pointer is valid if (GlobalAdapter) { std::lock_guard Lock{GlobalAdapter->Mutex}; @@ -342,7 +343,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { +ur_result_t urAdapterRetain(ur_adapter_handle_t) { if (GlobalAdapter) { std::lock_guard Lock{GlobalAdapter->Mutex}; GlobalAdapter->RefCount++; @@ -351,7 +352,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( +ur_result_t urAdapterGetLastError( ur_adapter_handle_t, ///< [in] handle of the platform instance const char **Message, ///< [out] pointer to a C string where the adapter ///< specific error message will be stored. @@ -364,11 +365,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( return ErrorMessageCode; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, - ur_adapter_info_t PropName, - size_t PropSize, - void *PropValue, - size_t *PropSizeRet) { +ur_result_t urAdapterGetInfo(ur_adapter_handle_t, ur_adapter_info_t PropName, + size_t PropSize, void *PropValue, + size_t *PropSizeRet) { UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); switch (PropName) { @@ -382,3 +381,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index e507730888..1bf4f26716 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -10,6 +10,7 @@ #include "command_buffer.hpp" #include "helpers/kernel_helpers.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" /* L0 Command-buffer Extension Doc see: @@ -297,16 +298,16 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( IsUpdatable(Desc ? Desc->isUpdatable : false), IsProfilingEnabled(Desc ? Desc->enableProfiling : false), IsInOrderCmdList(IsInOrderCmdList) { - urContextRetain(Context); - urDeviceRetain(Device); + ur::level_zero::urContextRetain(Context); + ur::level_zero::urDeviceRetain(Device); } void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() { // Release the memory allocated to the Context stored in the command_buffer - urContextRelease(Context); + ur::level_zero::urContextRelease(Context); // Release the device - urDeviceRelease(Device); + ur::level_zero::urDeviceRelease(Device); // Release the memory allocated to the CommandList stored in the // command_buffer @@ -376,7 +377,7 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() { for (auto &AssociatedKernel : KernelsList) { ReleaseIndirectMem(AssociatedKernel); - urKernelRelease(AssociatedKernel); + ur::level_zero::urKernelRelease(AssociatedKernel); } } @@ -387,16 +388,16 @@ ur_exp_command_buffer_command_handle_t_:: ur_kernel_handle_t Kernel = nullptr) : CommandBuffer(CommandBuffer), CommandId(CommandId), WorkDim(WorkDim), UserDefinedLocalSize(UserDefinedLocalSize), Kernel(Kernel) { - urCommandBufferRetainExp(CommandBuffer); + ur::level_zero::urCommandBufferRetainExp(CommandBuffer); if (Kernel) - urKernelRetain(Kernel); + ur::level_zero::urKernelRetain(Kernel); } ur_exp_command_buffer_command_handle_t_:: ~ur_exp_command_buffer_command_handle_t_() { - urCommandBufferReleaseExp(CommandBuffer); + ur::level_zero::urCommandBufferReleaseExp(CommandBuffer); if (Kernel) - urKernelRelease(Kernel); + ur::level_zero::urKernelRelease(Kernel); } void ur_exp_command_buffer_handle_t_::registerSyncPoint( @@ -433,7 +434,7 @@ ur_result_t ur_exp_command_buffer_handle_t_::getFenceForQueue( return UR_RESULT_SUCCESS; } -namespace { +namespace ur::level_zero { /** * Creates a L0 command list @@ -493,9 +494,8 @@ bool canBeInOrder(ur_context_handle_t Context, ? (CommandBufferDesc ? CommandBufferDesc->isInOrder : false) : false; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, const ur_exp_command_buffer_desc_t *CommandBufferDesc, ur_exp_command_buffer_handle_t *CommandBuffer) { @@ -567,13 +567,13 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferRetainExp(ur_exp_command_buffer_handle_t CommandBuffer) { CommandBuffer->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) { if (!CommandBuffer->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -583,7 +583,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { UR_ASSERT(CommandBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); // It is not allowed to append to command list from multiple threads. @@ -627,8 +627,6 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { return UR_RESULT_SUCCESS; } -namespace { - /** * Sets the global offset for a kernel command that will be appended to the * command buffer. @@ -730,9 +728,8 @@ createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer, return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( +ur_result_t urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, uint32_t WorkDim, const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, @@ -769,7 +766,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( // is in use. Once the event has been signaled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); if (Command && CommandBuffer->IsUpdatable) { UR_CALL(createCommandHandle(CommandBuffer, Kernel, WorkDim, LocalWorkSize, @@ -790,7 +787,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( +ur_result_t urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t CommandBuffer, void *Dst, const void *Src, size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -812,7 +809,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( +ur_result_t urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem, ur_mem_handle_t DstMem, size_t SrcOffset, size_t DstOffset, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -842,7 +839,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem, ur_mem_handle_t DstMem, ur_rect_offset_t SrcOrigin, ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch, @@ -875,7 +872,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( +ur_result_t urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, size_t Offset, size_t Size, const void *Src, uint32_t NumSyncPointsInWaitList, @@ -897,7 +894,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch, @@ -922,7 +919,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( +ur_result_t urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, size_t Offset, size_t Size, void *Dst, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -942,7 +939,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( +ur_result_t urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch, @@ -966,7 +963,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( +ur_result_t urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, ur_usm_migration_flags_t Flags, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -1005,7 +1002,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( +ur_result_t urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -1067,7 +1064,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( +ur_result_t urCommandBufferAppendMemBufferFillExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, const void *Pattern, size_t PatternSize, size_t Offset, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -1088,7 +1085,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( +ur_result_t urCommandBufferAppendUSMFillExp( ur_exp_command_buffer_handle_t CommandBuffer, void *Ptr, const void *Pattern, size_t PatternSize, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -1102,8 +1099,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -namespace { - /** * Gets an L0 command queue that supports the chosen engine. * @param[in] Queue The UR queue used to submit the command buffer. @@ -1112,8 +1107,7 @@ namespace { * @param[out] ZeCommandQueue The L0 command queue. * @return UR_RESULT_SUCCESS or an error code on failure */ -ur_result_t getZeCommandQueue(ur_queue_handle_legacy_t Queue, - bool UseCopyEngine, +ur_result_t getZeCommandQueue(ur_queue_handle_t Queue, bool UseCopyEngine, ze_command_queue_handle_t &ZeCommandQueue) { auto &QGroup = Queue->getQueueGroup(UseCopyEngine); uint32_t QueueGroupOrdinal; @@ -1130,7 +1124,7 @@ ur_result_t getZeCommandQueue(ur_queue_handle_legacy_t Queue, * @return UR_RESULT_SUCCESS or an error code on failure */ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer, - ur_queue_handle_legacy_t Queue, + ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList) { const bool UseCopyEngine = false; @@ -1182,7 +1176,7 @@ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer, * @return UR_RESULT_SUCCESS or an error code on failure */ ur_result_t createUserEvent(ur_exp_command_buffer_handle_t CommandBuffer, - ur_queue_handle_legacy_t Queue, + ur_queue_handle_t Queue, ur_command_list_ptr_t SignalCommandList, ur_event_handle_t *Event) { // Execution event for this enqueue of the UR command-buffer @@ -1226,13 +1220,12 @@ ur_result_t createUserEvent(ur_exp_command_buffer_handle_t CommandBuffer, return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t UrQueue, - uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, - ur_event_handle_t *Event) { - auto Queue = Legacy(UrQueue); +ur_result_t +urCommandBufferEnqueueExp(ur_exp_command_buffer_handle_t CommandBuffer, + ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventWaitList, + ur_event_handle_t *Event) { std::scoped_lock Lock(Queue->Mutex); ze_command_queue_handle_t ZeCommandQueue; @@ -1294,13 +1287,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp( +ur_result_t urCommandBufferRetainCommandExp( ur_exp_command_buffer_command_handle_t Command) { Command->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( +ur_result_t urCommandBufferReleaseCommandExp( ur_exp_command_buffer_command_handle_t Command) { if (!Command->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1309,8 +1302,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( return UR_RESULT_SUCCESS; } -namespace { - /** * Validates contents of the update command description. * @param[in] Command The command which is being updated. @@ -1620,9 +1611,8 @@ ur_result_t updateKernelCommand( return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( +ur_result_t urCommandBufferUpdateKernelLaunchExp( ur_exp_command_buffer_command_handle_t Command, const ur_exp_command_buffer_update_kernel_launch_desc_t *CommandDesc) { UR_ASSERT(Command->Kernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -1653,10 +1643,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( - ur_exp_command_buffer_handle_t hCommandBuffer, - ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { @@ -1669,10 +1660,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( - ur_exp_command_buffer_command_handle_t Command, - ur_exp_command_buffer_command_info_t PropName, size_t PropSize, - void *PropValue, size_t *PropSizeRet) { +ur_result_t +urCommandBufferCommandGetInfoExp(ur_exp_command_buffer_command_handle_t Command, + ur_exp_command_buffer_command_info_t PropName, + size_t PropSize, void *PropValue, + size_t *PropSizeRet) { UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); switch (PropName) { @@ -1684,3 +1676,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } + +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index b7d0a4a913..6dd8a614c5 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 452189d038..de75dc6126 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -18,7 +18,9 @@ #include "queue.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( +namespace ur::level_zero { + +ur_result_t urContextCreate( uint32_t DeviceCount, ///< [in] the number of devices given in phDevices const ur_device_handle_t *Devices, ///< [in][range(0, DeviceCount)] array of handle of devices. @@ -53,7 +55,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextRetain( +ur_result_t urContextRetain( ur_context_handle_t Context ///< [in] handle of the context to get a reference of. ) { @@ -61,7 +63,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextRelease( +ur_result_t urContextRelease( ur_context_handle_t Context ///< [in] handle of the context to release. ) { ur_platform_handle_t Plt = Context->getPlatform(); @@ -85,7 +87,7 @@ static const bool UseMemcpy2DOperations = [] { return std::atoi(UseMemcpy2DOperationsFlag) > 0; }(); -UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( +ur_result_t urContextGetInfo( ur_context_handle_t Context, ///< [in] handle of the context ur_context_info_t ContextInfoType, ///< [in] type of the info to retrieve size_t PropSize, ///< [in] the number of bytes of memory pointed to by @@ -133,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( +ur_result_t urContextGetNativeHandle( ur_context_handle_t Context, ///< [in] handle of the context. ur_native_handle_t *NativeContext ///< [out] a pointer to the native ///< handle of the context. @@ -142,7 +144,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( +ur_result_t urContextCreateWithNativeHandle( ur_native_handle_t NativeContext, ///< [in] the native handle of the context. ur_adapter_handle_t, uint32_t NumDevices, const ur_device_handle_t *Devices, @@ -166,7 +168,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( +ur_result_t urContextSetExtendedDeleter( ur_context_handle_t Context, ///< [in] handle of the context. ur_context_extended_deleter_t Deleter, ///< [in] Function pointer to extended deleter. @@ -180,6 +182,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero ur_result_t ur_context_handle_t_::initialize() { @@ -576,8 +579,8 @@ void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) { std::scoped_lock Lock(EventCacheMutex); ur_device_handle_t Device = nullptr; - if (!Event->IsMultiDevice && Legacy(Event->UrQueue)) { - Device = Legacy(Event->UrQueue)->Device; + if (!Event->IsMultiDevice && Event->UrQueue) { + Device = Event->UrQueue->Device; } auto Cache = getEventCache(Event->isHostVisible(), @@ -598,10 +601,10 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) { ze_device_handle_t ZeDevice = nullptr; bool UsingImmediateCommandlists = - !Legacy(Event->UrQueue) || Legacy(Event->UrQueue)->UsingImmCmdLists; + !Event->UrQueue || Event->UrQueue->UsingImmCmdLists; - if (!Event->IsMultiDevice && Legacy(Event->UrQueue)) { - ZeDevice = Legacy(Event->UrQueue)->Device->ZeDevice; + if (!Event->IsMultiDevice && Event->UrQueue) { + ZeDevice = Event->UrQueue->Device->ZeDevice; } std::list *ZePoolCache = getZeEventPoolCache( @@ -644,7 +647,7 @@ static const size_t CmdListsCleanupThreshold = [] { // Retrieve an available command list to be used in a PI call. ur_result_t ur_context_handle_t_::getAvailableCommandList( - ur_queue_handle_legacy_t Queue, ur_command_list_ptr_t &CommandList, + ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList, bool UseCopyEngine, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, bool AllowBatching, ze_command_queue_handle_t *ForcedCmdQueue) { @@ -767,9 +770,11 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( CommandList = Queue->CommandListMap .emplace(ZeCommandList, - ur_command_list_info_t(ZeFence, true, false, - ZeCommandQueue, ZeQueueDesc, - Queue->useCompletionBatching())) + ur_command_list_info_t( + ZeFence, true, false, ZeCommandQueue, ZeQueueDesc, + Queue->useCompletionBatching(), true, + ZeCommandListIt->second.InOrderList, + ZeCommandListIt->second.IsImmediate)) .first; } ZeCommandListCache.erase(ZeCommandListIt); diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index a1212f0698..c2fbba633f 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include @@ -297,7 +297,7 @@ struct ur_context_handle_t_ : _ur_object { // for executing on this device. Immediate commandlists are created only // once for each SYCL Queue and after that they are reused. ur_result_t getAvailableCommandList( - ur_queue_handle_legacy_t Queue, ur_command_list_ptr_t &CommandList, + ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList, bool UseCopyEngine, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, bool AllowBatching = false, ze_command_queue_handle_t *ForcedCmdQueue = nullptr); diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index de2bee3789..e6cb650420 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -10,13 +10,59 @@ #include "device.hpp" #include "adapter.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" #include #include #include -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( +// UR_L0_USE_COPY_ENGINE can be set to an integer value, or +// a pair of integer values of the form "lower_index:upper_index". +// Here, the indices point to copy engines in a list of all available copy +// engines. +// This functions returns this pair of indices. +// If the user specifies only a single integer, a value of 0 indicates that +// the copy engines will not be used at all. A value of 1 indicates that all +// available copy engines can be used. +const std::pair +getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) { + const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); + static const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + // If the environment variable is not set, no copy engines are used when + // immediate commandlists are being used. For standard commandlists all are + // used. + if (!EnvVar) { + if (Device->ImmCommandListUsed) + return std::pair(0, 0); // Only main copy engine will be used. + return std::pair(0, INT_MAX); // All copy engines will be used. + } + std::string CopyEngineRange = EnvVar; + // Environment variable can be a single integer or a pair of integers + // separated by ":" + auto pos = CopyEngineRange.find(":"); + if (pos == std::string::npos) { + bool UseCopyEngine = (std::stoi(CopyEngineRange) != 0); + if (UseCopyEngine) + return std::pair(0, INT_MAX); // All copy engines can be used. + return std::pair(-1, -1); // No copy engines will be used. + } + int LowerCopyEngineIndex = std::stoi(CopyEngineRange.substr(0, pos)); + int UpperCopyEngineIndex = std::stoi(CopyEngineRange.substr(pos + 1)); + if ((LowerCopyEngineIndex > UpperCopyEngineIndex) || + (LowerCopyEngineIndex < -1) || (UpperCopyEngineIndex < -1)) { + logger::error("UR_L0_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, " + "default set."); + LowerCopyEngineIndex = 0; + UpperCopyEngineIndex = INT_MAX; + } + return std::pair(LowerCopyEngineIndex, UpperCopyEngineIndex); +} + +namespace ur::level_zero { + +ur_result_t urDeviceGet( ur_platform_handle_t Platform, ///< [in] handle of the platform instance ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to @@ -143,7 +189,7 @@ uint64_t calculateGlobalMemSize(ur_device_handle_t Device) { return Device->ZeGlobalMemSize.operator->()->value; } -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( +ur_result_t urDeviceGetInfo( ur_device_handle_t Device, ///< [in] handle of the device instance ur_device_info_t ParamName, ///< [in] type of the info to retrieve size_t propSize, ///< [in] the number of bytes pointed to by ParamValue. @@ -1068,158 +1114,353 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return UR_RESULT_SUCCESS; } -// UR_L0_USE_COPY_ENGINE can be set to an integer value, or -// a pair of integer values of the form "lower_index:upper_index". -// Here, the indices point to copy engines in a list of all available copy -// engines. -// This functions returns this pair of indices. -// If the user specifies only a single integer, a value of 0 indicates that -// the copy engines will not be used at all. A value of 1 indicates that all -// available copy engines can be used. -const std::pair -getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) { - const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE"); - const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); - static const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr); - // If the environment variable is not set, no copy engines are used when - // immediate commandlists are being used. For standard commandlists all are - // used. - if (!EnvVar) { - if (Device->ImmCommandListUsed) - return std::pair(0, 0); // Only main copy engine will be used. - return std::pair(0, INT_MAX); // All copy engines will be used. - } - std::string CopyEngineRange = EnvVar; - // Environment variable can be a single integer or a pair of integers - // separated by ":" - auto pos = CopyEngineRange.find(":"); - if (pos == std::string::npos) { - bool UseCopyEngine = (std::stoi(CopyEngineRange) != 0); - if (UseCopyEngine) - return std::pair(0, INT_MAX); // All copy engines can be used. - return std::pair(-1, -1); // No copy engines will be used. - } - int LowerCopyEngineIndex = std::stoi(CopyEngineRange.substr(0, pos)); - int UpperCopyEngineIndex = std::stoi(CopyEngineRange.substr(pos + 1)); - if ((LowerCopyEngineIndex > UpperCopyEngineIndex) || - (LowerCopyEngineIndex < -1) || (UpperCopyEngineIndex < -1)) { - logger::error("UR_L0_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, " - "default set."); - LowerCopyEngineIndex = 0; - UpperCopyEngineIndex = INT_MAX; - } - return std::pair(LowerCopyEngineIndex, UpperCopyEngineIndex); -} - bool CopyEngineRequested(const ur_device_handle_t &Device) { int LowerCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).first; int UpperCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).second; return ((LowerCopyQueueIndex != -1) || (UpperCopyQueueIndex != -1)); } -// Whether immediate commandlists will be used for kernel launches and copies. -// The default is standard commandlists. Setting 1 or 2 specifies use of -// immediate commandlists. Note: when immediate commandlists are used then -// device-only events must be either AllHostVisible or OnDemandHostVisibleProxy. -// (See env var UR_L0_DEVICE_SCOPE_EVENTS). - -// Get value of immediate commandlists env var setting or -1 if unset -ur_device_handle_t_::ImmCmdlistMode -ur_device_handle_t_::useImmediateCommandLists() { - // If immediate commandlist setting is not explicitly set, then use the device - // default. - // TODO: confirm this is good once make_queue revert is added - static const int ImmediateCommandlistsSetting = [] { - const char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS"); - const char *PiRet = - std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); - const char *ImmediateCommandlistsSettingStr = - UrRet ? UrRet : (PiRet ? PiRet : nullptr); - if (!ImmediateCommandlistsSettingStr) - return -1; - return std::atoi(ImmediateCommandlistsSettingStr); - }(); - - if (ImmediateCommandlistsSetting == -1) { - bool isDG2SupportedDriver = - this->Platform->isDriverVersionNewerOrSimilar(1, 5, 30820); - if ((isDG2SupportedDriver && isDG2()) || isPVC()) { - return PerQueue; - } else { - return NotUsed; +ur_result_t urDevicePartition( + ur_device_handle_t Device, ///< [in] handle of the device to partition. + const ur_device_partition_properties_t + *Properties, ///< [in] Device partition properties. + uint32_t NumDevices, ///< [in] the number of sub-devices. + ur_device_handle_t + *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle + ///< of devices. If NumDevices is less than the number of + ///< sub-devices available, then the function shall only + ///< retrieve that number of sub-devices. + uint32_t *NumDevicesRet ///< [out][optional] pointer to the number of + ///< sub-devices the device can be partitioned into + ///< according to the partitioning property. +) { + // Other partitioning ways are not supported by Level Zero + UR_ASSERT(Properties->PropCount == 1, UR_RESULT_ERROR_INVALID_VALUE); + if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + if ((Properties->pProperties->value.affinity_domain != + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && + Properties->pProperties->value.affinity_domain != + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) { + return UR_RESULT_ERROR_INVALID_VALUE; } + } else if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { + if (Properties->pProperties->value.affinity_domain != 0) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + } else { + return UR_RESULT_ERROR_INVALID_VALUE; } - switch (ImmediateCommandlistsSetting) { - case 0: - return NotUsed; - case 1: - return PerQueue; - case 2: - return PerThreadPerQueue; - default: - return NotUsed; - } -} -bool ur_device_handle_t_::useRelaxedAllocationLimits() { - static const bool EnableRelaxedAllocationLimits = [] { - auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS"); - const bool RetVal = UrRet ? std::stoi(*UrRet) : 0; - return RetVal; - }(); + // Devices cache is normally created in piDevicesGet but still make + // sure that cache is populated. + // + auto Res = Device->Platform->populateDeviceCacheIfNeeded(); + if (Res != UR_RESULT_SUCCESS) { + return Res; + } - return EnableRelaxedAllocationLimits; -} + auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) { + if (Device->SubDevices.size() == 0) + return 0; -bool ur_device_handle_t_::useDriverInOrderLists() { - // Use in-order lists implementation from L0 driver instead - // of adapter's implementation. + // Sub-Sub-Devices are partitioned by CSlices, not by affinity domain. + // However, if + // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that + // still expose CSlices in partitioning by affinity domain for compatibility + // reasons. + if (Properties->pProperties->type == + UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && + !ExposeCSliceInAffinityPartitioning) { + if (Device->isSubDevice()) { + return 0; + } + } + if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { + // Not a CSlice-based partitioning. + if (!Device->SubDevices[0]->isCCS()) { + return 0; + } + } - static const bool UseDriverInOrderLists = [&] { - const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); - bool CompatibleDriver = this->Platform->isDriverVersionNewerOrSimilar( - 1, 3, L0_DRIVER_INORDER_MIN_VERSION); - if (!UrRet) - return CompatibleDriver; - return std::atoi(UrRet) != 0; + return Device->SubDevices.size(); }(); - return UseDriverInOrderLists; -} - -ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, - int SubSubDeviceIndex) { - // Maintain various device properties cache. - // Note that we just describe here how to compute the data. - // The real initialization is upon first access. + // TODO: Consider support for partitioning to <= total sub-devices. + // Currently supported partitioning (by affinity domain/numa) would always + // partition to all sub-devices. // - auto ZeDevice = this->ZeDevice; - ZeDeviceProperties.Compute = [ZeDevice](ze_device_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &Properties)); - }; - - ZeDeviceComputeProperties.Compute = - [ZeDevice](ze_device_compute_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetComputeProperties, (ZeDevice, &Properties)); - }; + if (NumDevices != 0) + UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); - ZeDeviceIpVersionExt.Compute = - [ZeDevice](ze_device_ip_version_ext_t &Properties) { - ze_device_properties_t P; - P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; - P.pNext = (void *)&Properties; - ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P)); - }; + for (uint32_t I = 0; I < NumDevices; I++) { + auto prop = Properties->pProperties[0]; + if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + // In case the value is NEXT_PARTITIONABLE, we need to change it to the + // chosen domain. This will always be NUMA since that's the only domain + // supported by level zero. + prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; + } + Device->SubDevices[I]->SubDeviceCreationProperty = prop; - ZeDeviceImageProperties.Compute = - [ZeDevice](ze_device_image_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetImageProperties, (ZeDevice, &Properties)); - }; + OutDevices[I] = Device->SubDevices[I]; + // reusing the same pi_device needs to increment the reference count + ur::level_zero::urDeviceRetain(OutDevices[I]); + } - ZeDeviceModuleProperties.Compute = - [ZeDevice](ze_device_module_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetModuleProperties, (ZeDevice, &Properties)); - }; + if (NumDevicesRet) { + *NumDevicesRet = EffectiveNumDevices; + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceSelectBinary( + ur_device_handle_t + Device, ///< [in] handle of the device to select binary for. + const ur_device_binary_t + *Binaries, ///< [in] the array of binaries to select from. + uint32_t NumBinaries, ///< [in] the number of binaries passed in ppBinaries. + ///< Must greater than or equal to zero otherwise + ///< ::UR_RESULT_ERROR_INVALID_VALUE is returned. + uint32_t + *SelectedBinary ///< [out] the index of the selected binary in the input + ///< array of binaries. If a suitable binary was not + ///< found the function returns ${X}_INVALID_BINARY. +) { + std::ignore = Device; + // TODO: this is a bare-bones implementation for choosing a device image + // that would be compatible with the targeted device. An AOT-compiled + // image is preferred over SPIR-V for known devices (i.e. Intel devices) + // The implementation makes no effort to differentiate between multiple images + // for the given device, and simply picks the first one compatible. + // + // Real implementation will use the same mechanism OpenCL ICD dispatcher + // uses. Something like: + // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); + // return context->dispatch->piextDeviceSelectIR( + // ctx, images, num_images, selected_image); + // where context->dispatch is set to the dispatch table provided by PI + // plugin for platform/device the ctx was created for. + + // Look for GEN binary, which we known can only be handled by Level-Zero now. + const char *BinaryTarget = + UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; // UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; + + uint32_t *SelectedBinaryInd = SelectedBinary; + + // Find the appropriate device image, fallback to spirv if not found + constexpr uint32_t InvalidInd = (std::numeric_limits::max)(); + uint32_t Spirv = InvalidInd; + + for (uint32_t i = 0; i < NumBinaries; ++i) { + if (strcmp(Binaries[i].pDeviceTargetSpec, BinaryTarget) == 0) { + *SelectedBinaryInd = i; + return UR_RESULT_SUCCESS; + } + if (strcmp(Binaries[i].pDeviceTargetSpec, + UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) + Spirv = i; + } + // Points to a spirv image, if such indeed was found + if ((*SelectedBinaryInd = Spirv) != InvalidInd) + return UR_RESULT_SUCCESS; + + // No image can be loaded for the given device + return UR_RESULT_ERROR_INVALID_BINARY; +} + +ur_result_t urDeviceGetNativeHandle( + ur_device_handle_t Device, ///< [in] handle of the device. + ur_native_handle_t + *NativeDevice ///< [out] a pointer to the native handle of the device. +) { + *NativeDevice = reinterpret_cast(Device->ZeDevice); + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceCreateWithNativeHandle( + ur_native_handle_t NativeDevice, ///< [in] the native handle of the device. + [[maybe_unused]] ur_adapter_handle_t + Adapter, ///< [in] handle of the platform instance + [[maybe_unused]] const ur_device_native_properties_t + *Properties, ///< [in][optional] pointer to native device properties + ///< struct. + ur_device_handle_t + *Device ///< [out] pointer to the handle of the device object created. +) { + auto ZeDevice = ur_cast(NativeDevice); + + // The SYCL spec requires that the set of devices must remain fixed for the + // duration of the application's execution. We assume that we found all of the + // Level Zero devices when we initialized the platforms/devices cache, so the + // "NativeHandle" must already be in the cache. If it is not, this must not be + // a valid Level Zero device. + + ur_device_handle_t Dev = nullptr; + if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) { + for (const auto &p : *platforms) { + Dev = p->getDeviceFromNativeHandle(ZeDevice); + } + } else { + return GlobalAdapter->PlatformCache->get_error(); + } + + if (Dev == nullptr) + return UR_RESULT_ERROR_INVALID_VALUE; + + *Device = Dev; + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceGetGlobalTimestamps( + ur_device_handle_t Device, ///< [in] handle of the device instance + uint64_t *DeviceTimestamp, ///< [out][optional] pointer to the Device's + ///< global timestamp that correlates with the + ///< Host's global timestamp value + uint64_t *HostTimestamp ///< [out][optional] pointer to the Host's global + ///< timestamp that correlates with the Device's + ///< global timestamp value +) { + const uint64_t &ZeTimerResolution = + Device->ZeDeviceProperties->timerResolution; + const uint64_t TimestampMaxCount = Device->getTimestampMask(); + uint64_t DeviceClockCount, Dummy; + + ZE2UR_CALL(zeDeviceGetGlobalTimestamps, + (Device->ZeDevice, + HostTimestamp == nullptr ? &Dummy : HostTimestamp, + &DeviceClockCount)); + + if (DeviceTimestamp != nullptr) { + *DeviceTimestamp = + (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceRetain(ur_device_handle_t Device) { + // The root-device ref-count remains unchanged (always 1). + if (Device->isSubDevice()) { + Device->RefCount.increment(); + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceRelease(ur_device_handle_t Device) { + // Root devices are destroyed during the piTearDown process. + if (Device->isSubDevice()) { + if (Device->RefCount.decrementAndTest()) { + delete Device; + } + } + + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero + +// Whether immediate commandlists will be used for kernel launches and copies. +// The default is standard commandlists. Setting 1 or 2 specifies use of +// immediate commandlists. Note: when immediate commandlists are used then +// device-only events must be either AllHostVisible or OnDemandHostVisibleProxy. +// (See env var UR_L0_DEVICE_SCOPE_EVENTS). + +// Get value of immediate commandlists env var setting or -1 if unset +ur_device_handle_t_::ImmCmdlistMode +ur_device_handle_t_::useImmediateCommandLists() { + // If immediate commandlist setting is not explicitly set, then use the device + // default. + // TODO: confirm this is good once make_queue revert is added + static const int ImmediateCommandlistsSetting = [] { + const char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS"); + const char *PiRet = + std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); + const char *ImmediateCommandlistsSettingStr = + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (!ImmediateCommandlistsSettingStr) + return -1; + return std::atoi(ImmediateCommandlistsSettingStr); + }(); + + if (ImmediateCommandlistsSetting == -1) { + bool isDG2SupportedDriver = + this->Platform->isDriverVersionNewerOrSimilar(1, 5, 30820); + if ((isDG2SupportedDriver && isDG2()) || isPVC()) { + return PerQueue; + } else { + return NotUsed; + } + } + switch (ImmediateCommandlistsSetting) { + case 0: + return NotUsed; + case 1: + return PerQueue; + case 2: + return PerThreadPerQueue; + default: + return NotUsed; + } +} + +bool ur_device_handle_t_::useRelaxedAllocationLimits() { + static const bool EnableRelaxedAllocationLimits = [] { + auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS"); + const bool RetVal = UrRet ? std::stoi(*UrRet) : 0; + return RetVal; + }(); + + return EnableRelaxedAllocationLimits; +} + +bool ur_device_handle_t_::useDriverInOrderLists() { + // Use in-order lists implementation from L0 driver instead + // of adapter's implementation. + + static const bool UseDriverInOrderLists = [&] { + const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); + bool CompatibleDriver = this->Platform->isDriverVersionNewerOrSimilar( + 1, 3, L0_DRIVER_INORDER_MIN_VERSION); + if (!UrRet) + return CompatibleDriver; + return std::atoi(UrRet) != 0; + }(); + + return UseDriverInOrderLists; +} + +ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, + int SubSubDeviceIndex) { + // Maintain various device properties cache. + // Note that we just describe here how to compute the data. + // The real initialization is upon first access. + // + auto ZeDevice = this->ZeDevice; + ZeDeviceProperties.Compute = [ZeDevice](ze_device_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceComputeProperties.Compute = + [ZeDevice](ze_device_compute_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetComputeProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceIpVersionExt.Compute = + [ZeDevice](ze_device_ip_version_ext_t &Properties) { + ze_device_properties_t P; + P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; + P.pNext = (void *)&Properties; + ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P)); + }; + + ZeDeviceImageProperties.Compute = + [ZeDevice](ze_device_image_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetImageProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceModuleProperties.Compute = + [ZeDevice](ze_device_module_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetModuleProperties, (ZeDevice, &Properties)); + }; ZeDeviceMemoryProperties.Compute = [ZeDevice]( @@ -1314,7 +1555,7 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, return UR_RESULT_ERROR_UNKNOWN; } - if (CopyEngineRequested((ur_device_handle_t)this)) { + if (ur::level_zero::CopyEngineRequested((ur_device_handle_t)this)) { for (uint32_t i = 0; i < numQueueGroups; i++) { if (((QueueGroupProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) && @@ -1355,26 +1596,6 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, return UR_RESULT_SUCCESS; } -ur_result_t urDeviceRetain(ur_device_handle_t Device) { - - // The root-device ref-count remains unchanged (always 1). - if (Device->isSubDevice()) { - Device->RefCount.increment(); - } - return UR_RESULT_SUCCESS; -} - -ur_result_t urDeviceRelease(ur_device_handle_t Device) { - // Root devices are destroyed during the piTearDown process. - if (Device->isSubDevice()) { - if (Device->RefCount.decrementAndTest()) { - delete Device; - } - } - - return UR_RESULT_SUCCESS; -} - void ZeDriverVersionStringExtension::setZeDriverVersionString( ur_platform_handle_t_ *Platform) { // Check if Intel Driver Version String is available. If yes, save the API @@ -1442,221 +1663,3 @@ void ZeUSMImportExtension::doZeUSMRelease(ze_driver_handle_t DriverHandle, void *HostPtr) { ZE_CALL_NOCHECK(zexDriverReleaseImportedPointer, (DriverHandle, HostPtr)); } - -UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( - ur_device_handle_t Device, ///< [in] handle of the device to partition. - const ur_device_partition_properties_t - *Properties, ///< [in] Device partition properties. - uint32_t NumDevices, ///< [in] the number of sub-devices. - ur_device_handle_t - *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle - ///< of devices. If NumDevices is less than the number of - ///< sub-devices available, then the function shall only - ///< retrieve that number of sub-devices. - uint32_t *NumDevicesRet ///< [out][optional] pointer to the number of - ///< sub-devices the device can be partitioned into - ///< according to the partitioning property. -) { - // Other partitioning ways are not supported by Level Zero - UR_ASSERT(Properties->PropCount == 1, UR_RESULT_ERROR_INVALID_VALUE); - if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { - if ((Properties->pProperties->value.affinity_domain != - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && - Properties->pProperties->value.affinity_domain != - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - } else if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { - if (Properties->pProperties->value.affinity_domain != 0) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - } else { - return UR_RESULT_ERROR_INVALID_VALUE; - } - - // Devices cache is normally created in piDevicesGet but still make - // sure that cache is populated. - // - auto Res = Device->Platform->populateDeviceCacheIfNeeded(); - if (Res != UR_RESULT_SUCCESS) { - return Res; - } - - auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) { - if (Device->SubDevices.size() == 0) - return 0; - - // Sub-Sub-Devices are partitioned by CSlices, not by affinity domain. - // However, if - // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that - // still expose CSlices in partitioning by affinity domain for compatibility - // reasons. - if (Properties->pProperties->type == - UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && - !ExposeCSliceInAffinityPartitioning) { - if (Device->isSubDevice()) { - return 0; - } - } - if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { - // Not a CSlice-based partitioning. - if (!Device->SubDevices[0]->isCCS()) { - return 0; - } - } - - return Device->SubDevices.size(); - }(); - - // TODO: Consider support for partitioning to <= total sub-devices. - // Currently supported partitioning (by affinity domain/numa) would always - // partition to all sub-devices. - // - if (NumDevices != 0) - UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); - - for (uint32_t I = 0; I < NumDevices; I++) { - auto prop = Properties->pProperties[0]; - if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { - // In case the value is NEXT_PARTITIONABLE, we need to change it to the - // chosen domain. This will always be NUMA since that's the only domain - // supported by level zero. - prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; - } - Device->SubDevices[I]->SubDeviceCreationProperty = prop; - - OutDevices[I] = Device->SubDevices[I]; - // reusing the same pi_device needs to increment the reference count - urDeviceRetain(OutDevices[I]); - } - - if (NumDevicesRet) { - *NumDevicesRet = EffectiveNumDevices; - } - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( - ur_device_handle_t - Device, ///< [in] handle of the device to select binary for. - const ur_device_binary_t - *Binaries, ///< [in] the array of binaries to select from. - uint32_t NumBinaries, ///< [in] the number of binaries passed in ppBinaries. - ///< Must greater than or equal to zero otherwise - ///< ::UR_RESULT_ERROR_INVALID_VALUE is returned. - uint32_t - *SelectedBinary ///< [out] the index of the selected binary in the input - ///< array of binaries. If a suitable binary was not - ///< found the function returns ${X}_INVALID_BINARY. -) { - std::ignore = Device; - // TODO: this is a bare-bones implementation for choosing a device image - // that would be compatible with the targeted device. An AOT-compiled - // image is preferred over SPIR-V for known devices (i.e. Intel devices) - // The implementation makes no effort to differentiate between multiple images - // for the given device, and simply picks the first one compatible. - // - // Real implementation will use the same mechanism OpenCL ICD dispatcher - // uses. Something like: - // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); - // return context->dispatch->piextDeviceSelectIR( - // ctx, images, num_images, selected_image); - // where context->dispatch is set to the dispatch table provided by PI - // plugin for platform/device the ctx was created for. - - // Look for GEN binary, which we known can only be handled by Level-Zero now. - const char *BinaryTarget = - UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; // UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; - - uint32_t *SelectedBinaryInd = SelectedBinary; - - // Find the appropriate device image, fallback to spirv if not found - constexpr uint32_t InvalidInd = (std::numeric_limits::max)(); - uint32_t Spirv = InvalidInd; - - for (uint32_t i = 0; i < NumBinaries; ++i) { - if (strcmp(Binaries[i].pDeviceTargetSpec, BinaryTarget) == 0) { - *SelectedBinaryInd = i; - return UR_RESULT_SUCCESS; - } - if (strcmp(Binaries[i].pDeviceTargetSpec, - UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) - Spirv = i; - } - // Points to a spirv image, if such indeed was found - if ((*SelectedBinaryInd = Spirv) != InvalidInd) - return UR_RESULT_SUCCESS; - - // No image can be loaded for the given device - return UR_RESULT_ERROR_INVALID_BINARY; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( - ur_device_handle_t Device, ///< [in] handle of the device. - ur_native_handle_t - *NativeDevice ///< [out] a pointer to the native handle of the device. -) { - *NativeDevice = reinterpret_cast(Device->ZeDevice); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( - ur_native_handle_t NativeDevice, ///< [in] the native handle of the device. - [[maybe_unused]] ur_adapter_handle_t - Adapter, ///< [in] handle of the platform instance - [[maybe_unused]] const ur_device_native_properties_t - *Properties, ///< [in][optional] pointer to native device properties - ///< struct. - ur_device_handle_t - *Device ///< [out] pointer to the handle of the device object created. -) { - auto ZeDevice = ur_cast(NativeDevice); - - // The SYCL spec requires that the set of devices must remain fixed for the - // duration of the application's execution. We assume that we found all of the - // Level Zero devices when we initialized the platforms/devices cache, so the - // "NativeHandle" must already be in the cache. If it is not, this must not be - // a valid Level Zero device. - - ur_device_handle_t Dev = nullptr; - if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) { - for (const auto &p : *platforms) { - Dev = p->getDeviceFromNativeHandle(ZeDevice); - } - } else { - return GlobalAdapter->PlatformCache->get_error(); - } - - if (Dev == nullptr) - return UR_RESULT_ERROR_INVALID_VALUE; - - *Device = Dev; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( - ur_device_handle_t Device, ///< [in] handle of the device instance - uint64_t *DeviceTimestamp, ///< [out][optional] pointer to the Device's - ///< global timestamp that correlates with the - ///< Host's global timestamp value - uint64_t *HostTimestamp ///< [out][optional] pointer to the Host's global - ///< timestamp that correlates with the Device's - ///< global timestamp value -) { - const uint64_t &ZeTimerResolution = - Device->ZeDeviceProperties->timerResolution; - const uint64_t TimestampMaxCount = Device->getTimestampMask(); - uint64_t DeviceClockCount, Dummy; - - ZE2UR_CALL(zeDeviceGetGlobalTimestamps, - (Device->ZeDevice, - HostTimestamp == nullptr ? &Dummy : HostTimestamp, - &DeviceClockCount)); - - if (DeviceTimestamp != nullptr) { - *DeviceTimestamp = - (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution; - } - - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 898edff779..a8b8098819 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/enqueue_native.cpp b/source/adapters/level_zero/enqueue_native.cpp index b67cccc4f1..7c3a1da988 100644 --- a/source/adapters/level_zero/enqueue_native.cpp +++ b/source/adapters/level_zero/enqueue_native.cpp @@ -8,13 +8,30 @@ // //===----------------------------------------------------------------------===// +#include #include +#include -#include "queue.hpp" +namespace ur::level_zero { + +ur_result_t urEnqueueNativeCommandExp( + ur_queue_handle_t hQueue, + ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, + uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, + const ur_exp_enqueue_native_command_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pfnNativeEnqueue; + std::ignore = data; + std::ignore = numMemsInMemList; + std::ignore = phMemList; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; -ur_result_t ur_queue_handle_legacy_t_::enqueueNativeCommandExp( - ur_exp_enqueue_native_command_function_t, void *, uint32_t, - const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *, - uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index f4dee0d661..84a7c0b159 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -18,6 +18,7 @@ #include "common.hpp" #include "event.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" void printZeEventList(const _ur_ze_event_list_t &UrZeEventList) { @@ -46,21 +47,23 @@ static const bool UseMultipleCmdlistBarriers = [] { }(); bool WaitListEmptyOrAllEventsFromSameQueue( - ur_queue_handle_legacy_t Queue, uint32_t NumEventsInWaitList, + ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList) { if (!NumEventsInWaitList) return true; for (uint32_t i = 0; i < NumEventsInWaitList; ++i) { - if (Queue != Legacy(EventWaitList[i]->UrQueue)) + if (Queue != EventWaitList[i]->UrQueue) return false; } return true; } -ur_result_t ur_queue_handle_legacy_t_::enqueueEventsWait( ///< [in] handle of - ///< the queue object +namespace ur::level_zero { + +ur_result_t urEnqueueEventsWait( + ur_queue_handle_t Queue, ///< [in] handle of the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] @@ -72,7 +75,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueEventsWait( ///< [in] handle of *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; if (EventWaitList) { bool UseCopyEngine = false; @@ -152,9 +154,8 @@ static const bool InOrderBarrierBySignal = [] { return (UrRet ? std::atoi(UrRet) : true); }(); -ur_result_t -ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the - ///< queue object +ur_result_t urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t Queue, ///< [in] handle of the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] @@ -166,8 +167,6 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; - // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -234,7 +233,7 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList, EventWaitList) && Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) { - UR_CALL(urEventRetain(Queue->LastCommandEvent)); + UR_CALL(ur::level_zero::urEventRetain(Queue->LastCommandEvent)); *Event = Queue->LastCommandEvent; return UR_RESULT_SUCCESS; } @@ -304,8 +303,8 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the for (auto &QueueMap : {Queue->ComputeQueueGroupsByTID, Queue->CopyQueueGroupsByTID}) for (auto &QueueGroup : QueueMap) { - bool UseCopyEngine = QueueGroup.second.Type != - ur_queue_handle_legacy_t_::queue_type::Compute; + bool UseCopyEngine = + QueueGroup.second.Type != ur_queue_handle_t_::queue_type::Compute; if (Queue->UsingImmCmdLists) { // If immediate command lists are being used, each will act as their own // queue, so we must insert a barrier into each. @@ -374,8 +373,8 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the // Execute each command list so the barriers can be encountered. for (ur_command_list_ptr_t &CmdList : CmdLists) { - bool IsCopy = CmdList->second.isCopy( - reinterpret_cast(Queue)); + bool IsCopy = + CmdList->second.isCopy(reinterpret_cast(Queue)); const auto &CommandBatch = (IsCopy) ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch; // Only batch if the matching CmdList is already open. @@ -390,7 +389,7 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( +ur_result_t urEventGetInfo( ur_event_handle_t Event, ///< [in] handle of the event object ur_event_info_t PropName, ///< [in] the name of the event property to query size_t PropValueSize, ///< [in] size in bytes of the event property value @@ -419,7 +418,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( // possible that this is trying to query some event's status that // is part of the batch. This isn't strictly required, but it seems // like a reasonable thing to do. - auto UrQueue = Legacy(Event->UrQueue); + auto UrQueue = Event->UrQueue; if (UrQueue) { // Lock automatically releases when this goes out of scope. std::unique_lock Lock(UrQueue->Mutex, std::try_to_lock); @@ -473,7 +472,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( +ur_result_t urEventGetProfilingInfo( ur_event_handle_t Event, ///< [in] handle of the event object ur_profiling_info_t PropName, ///< [in] the name of the profiling property to query @@ -491,9 +490,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; } - ur_device_handle_t Device = Legacy(Event->UrQueue) - ? Legacy(Event->UrQueue)->Device - : Event->Context->Devices[0]; + ur_device_handle_t Device = + Event->UrQueue ? Event->UrQueue->Device : Event->Context->Devices[0]; uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution; const uint64_t TimestampMaxValue = Device->getTimestampMask(); @@ -517,10 +515,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return ReturnValue(Event->RecordEventEndTimestamp); // Otherwise we need to collect it from the queue. - auto Entry = Legacy(Event->UrQueue)->EndTimeRecordings.find(Event); + auto Entry = Event->UrQueue->EndTimeRecordings.find(Event); // Unexpected state if there is no end-time record. - if (Entry == Legacy(Event->UrQueue)->EndTimeRecordings.end()) + if (Entry == Event->UrQueue->EndTimeRecordings.end()) return UR_RESULT_ERROR_UNKNOWN; auto &EndTimeRecording = Entry->second; @@ -545,7 +543,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( // anymore, so we cache it on the event and evict the record from the // queue. Event->RecordEventEndTimestamp = ContextEndTime; - Legacy(Event->UrQueue)->EndTimeRecordings.erase(Entry); + Event->UrQueue->EndTimeRecordings.erase(Entry); return ReturnValue(ContextEndTime); } @@ -663,7 +661,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( +ur_result_t urEnqueueTimestampRecordingExp( + ur_queue_handle_t Queue, ///< [in] handle of the queue object bool Blocking, ///< [in] blocking or non-blocking enqueue uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -677,7 +676,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( *OutEvent ///< [in,out] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -701,12 +699,13 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( (*OutEvent)->WaitList = TmpWaitList; uint64_t DeviceStartTimestamp = 0; - UR_CALL(urDeviceGetGlobalTimestamps(Device, &DeviceStartTimestamp, nullptr)); + UR_CALL(ur::level_zero::urDeviceGetGlobalTimestamps( + Device, &DeviceStartTimestamp, nullptr)); (*OutEvent)->RecordEventStartTimestamp = DeviceStartTimestamp; // Create a new entry in the queue's recordings. Queue->EndTimeRecordings[*OutEvent] = - ur_queue_handle_legacy_t_::end_time_recording{}; + ur_queue_handle_t_::end_time_recording{}; ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp, (CommandList->first, @@ -720,64 +719,15 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( return UR_RESULT_SUCCESS; } -ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( - ze_event_handle_t &ZeHostVisibleEvent) { - auto UrQueue = Legacy(this->UrQueue); - - std::scoped_lock Lock(UrQueue->Mutex, - this->Mutex); - - if (!HostVisibleEvent) { - this->IsCreatingHostProxyEvent = true; - if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy) - die("getOrCreateHostVisibleEvent: missing host-visible event"); - - // Submit the command(s) signalling the proxy event to the queue. - // We have to first submit a wait for the device-only event for which this - // proxy is created. - // - // Get a new command list to be used on this call - - // We want to batch these commands to avoid extra submissions (costly) - bool OkToBatch = true; - - ur_command_list_ptr_t CommandList{}; - UR_CALL(UrQueue->Context->getAvailableCommandList( - UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch)) - - // Create a "proxy" host-visible event. - UR_CALL(createEventAndAssociateQueue( - UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList, - /* IsInternal */ false, /* IsMultiDevice */ false, - /* HostVisible */ true)); - - if (this->IsInnerBatchedEvent) { - ZE2UR_CALL(zeCommandListAppendBarrier, - (CommandList->first, ZeEvent, 0, nullptr)); - } else { - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (CommandList->first, 1, &ZeEvent)); - } - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (CommandList->first, HostVisibleEvent->ZeEvent)); - - UR_CALL(UrQueue->executeCommandList(CommandList, false, OkToBatch)) - this->IsCreatingHostProxyEvent = false; - } - - ZeHostVisibleEvent = HostVisibleEvent->ZeEvent; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEventWait( - uint32_t NumEvents, ///< [in] number of events in the event list - const ur_event_handle_t - *EventWaitList ///< [in][range(0, numEvents)] pointer to a list of - ///< events to wait for completion +ur_result_t +urEventWait(uint32_t NumEvents, ///< [in] number of events in the event list + const ur_event_handle_t + *EventWaitList ///< [in][range(0, numEvents)] pointer to a list + ///< of events to wait for completion ) { for (uint32_t I = 0; I < NumEvents; I++) { auto e = EventWaitList[I]; - auto UrQueue = Legacy(e->UrQueue); + auto UrQueue = e->UrQueue; if (UrQueue && UrQueue->ZeEventsScope == OnDemandHostVisibleProxy) { // Make sure to add all host-visible "proxy" event signals if needed. // This ensures that all signalling commands are submitted below and @@ -795,7 +745,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( // Submit dependent open command lists for execution, if any for (uint32_t I = 0; I < NumEvents; I++) { ur_event_handle_t_ *Event = ur_cast(EventWaitList[I]); - auto UrQueue = Legacy(Event->UrQueue); + auto UrQueue = Event->UrQueue; if (UrQueue) { // Lock automatically releases when this goes out of scope. std::scoped_lock lock(UrQueue->Mutex); @@ -803,7 +753,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( UR_CALL(UrQueue->executeAllOpenCommandLists()); } } - std::unordered_set Queues; + std::unordered_set Queues; for (uint32_t I = 0; I < NumEvents; I++) { { ur_event_handle_t_ *Event = @@ -830,13 +780,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( Event->Completed = true; } } - if (auto Q = Legacy(Event->UrQueue)) { + if (auto Q = Event->UrQueue) { if (Q->UsingImmCmdLists && Q->isInOrderQueue()) // Use information about waited event to cleanup completed events in // the in-order queue. CleanupEventsInImmCmdLists( - Legacy(Event->UrQueue), false /* QueueLocked */, - false /* QueueSynced */, + Event->UrQueue, false /* QueueLocked */, false /* QueueSynced */, reinterpret_cast(Event)); else { // NOTE: we are cleaning up after the event here to free resources @@ -861,8 +810,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventRetain( - ur_event_handle_t Event ///< [in] handle of the event object +ur_result_t +urEventRetain(ur_event_handle_t Event ///< [in] handle of the event object ) { Event->RefCountExternal++; Event->RefCount.increment(); @@ -870,8 +819,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventRelease( - ur_event_handle_t Event ///< [in] handle of the event object +ur_result_t +urEventRelease(ur_event_handle_t Event ///< [in] handle of the event object ) { Event->RefCountExternal--; UR_CALL(urEventReleaseInternal(Event)); @@ -879,7 +828,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( +ur_result_t urEventGetNativeHandle( ur_event_handle_t Event, ///< [in] handle of the event. ur_native_handle_t *NativeEvent ///< [out] a pointer to the native handle of the event. @@ -892,7 +841,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( // Event can potentially be in an open command-list, make sure that // it is submitted for execution to avoid potential deadlock if // interop app is going to wait for it. - auto Queue = Legacy(Event->UrQueue); + auto Queue = Event->UrQueue; if (Queue) { std::scoped_lock lock(Queue->Mutex); const auto &OpenCommandList = Queue->eventOpenCommandList(Event); @@ -904,7 +853,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate( +ur_result_t urExtEventCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_event_handle_t *Event ///< [out] pointer to the handle of the event object created. @@ -917,7 +866,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( +ur_result_t urEventCreateWithNativeHandle( ur_native_handle_t NativeEvent, ///< [in] the native handle of the event. ur_context_handle_t Context, ///< [in] handle of the context object const ur_event_native_properties_t *Properties, @@ -967,7 +916,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback( +ur_result_t urEventSetCallback( ur_event_handle_t Event, ///< [in] handle of the event object ur_execution_info_t ExecStatus, ///< [in] execution status of the event ur_event_callback_t Notify, ///< [in] execution status of the event @@ -983,6 +932,57 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero + +ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( + ze_event_handle_t &ZeHostVisibleEvent) { + auto UrQueue = this->UrQueue; + + std::scoped_lock Lock(UrQueue->Mutex, + this->Mutex); + + if (!HostVisibleEvent) { + this->IsCreatingHostProxyEvent = true; + if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy) + die("getOrCreateHostVisibleEvent: missing host-visible event"); + + // Submit the command(s) signalling the proxy event to the queue. + // We have to first submit a wait for the device-only event for which this + // proxy is created. + // + // Get a new command list to be used on this call + + // We want to batch these commands to avoid extra submissions (costly) + bool OkToBatch = true; + + ur_command_list_ptr_t CommandList{}; + UR_CALL(UrQueue->Context->getAvailableCommandList( + UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch)) + + // Create a "proxy" host-visible event. + UR_CALL(createEventAndAssociateQueue( + UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList, + /* IsInternal */ false, /* IsMultiDevice */ false, + /* HostVisible */ true)); + + if (this->IsInnerBatchedEvent) { + ZE2UR_CALL(zeCommandListAppendBarrier, + (CommandList->first, ZeEvent, 0, nullptr)); + } else { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CommandList->first, 1, &ZeEvent)); + } + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CommandList->first, HostVisibleEvent->ZeEvent)); + + UR_CALL(UrQueue->executeCommandList(CommandList, false, OkToBatch)) + this->IsCreatingHostProxyEvent = false; + } + + ZeHostVisibleEvent = HostVisibleEvent->ZeEvent; + return UR_RESULT_SUCCESS; +} + ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { if (!Event->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1022,7 +1022,7 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { } // Save pointer to the queue before deleting/resetting event. - auto Queue = Legacy(Event->UrQueue); + auto Queue = Event->UrQueue; // If the event was a timestamp recording, we try to evict its entry in the // queue. @@ -1099,7 +1099,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, ur_kernel_handle_t AssociatedKernel = nullptr; // List of dependent events. std::list EventsToBeReleased; - ur_queue_handle_legacy_t AssociatedQueue = nullptr; + ur_queue_handle_t AssociatedQueue = nullptr; { // If the Event is already locked, then continue with the cleanup, otherwise // block on locking the event. @@ -1113,7 +1113,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, if (Event->CleanedUp) return UR_RESULT_SUCCESS; - AssociatedQueue = Legacy(Event->UrQueue); + AssociatedQueue = Event->UrQueue; // Remember the kernel associated with this event if there is one. We are // going to release it later. @@ -1158,7 +1158,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, // We've reset event data members above, now cleanup resources. if (AssociatedKernel) { ReleaseIndirectMem(AssociatedKernel); - UR_CALL(urKernelRelease(AssociatedKernel)); + UR_CALL(ur::level_zero::urKernelRelease(AssociatedKernel)); } if (AssociatedQueue) { @@ -1217,7 +1217,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, } if (DepEventKernel) { ReleaseIndirectMem(DepEventKernel); - UR_CALL(urKernelRelease(DepEventKernel)); + UR_CALL(ur::level_zero::urKernelRelease(DepEventKernel)); } UR_CALL(urEventReleaseInternal(DepEvent)); } @@ -1230,9 +1230,9 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, // The "HostVisible" argument specifies if event needs to be allocated from // a host-visible pool. // -ur_result_t EventCreate(ur_context_handle_t Context, - ur_queue_handle_legacy_t Queue, bool IsMultiDevice, - bool HostVisible, ur_event_handle_t *RetEvent, +ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, + bool IsMultiDevice, bool HostVisible, + ur_event_handle_t *RetEvent, bool CounterBasedEventEnabled, bool ForceDisableProfiling) { bool ProfilingEnabled = @@ -1319,7 +1319,7 @@ ur_result_t ur_event_handle_t_::reset() { ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( uint32_t EventListLength, const ur_event_handle_t *EventList, - ur_queue_handle_legacy_t CurQueue, bool UseCopyEngine) { + ur_queue_handle_t CurQueue, bool UseCopyEngine) { this->Length = 0; this->ZeEventList = nullptr; this->UrEventList = nullptr; @@ -1435,7 +1435,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( } } - auto Queue = Legacy(EventList[I]->UrQueue); + auto Queue = EventList[I]->UrQueue; auto CurQueueDevice = CurQueue->Device; std::optional> QueueLock = @@ -1636,7 +1636,7 @@ ur_result_t _ur_ze_event_list_t::collectEventsForReleaseAndDestroyUrZeEventList( // Tells if this event is with profiling capabilities. bool ur_event_handle_t_::isProfilingEnabled() const { return !UrQueue || // tentatively assume user events are profiling enabled - (Legacy(UrQueue)->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0; + (UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0; } // Tells if this event was created as a timestamp event, allowing profiling diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index e99df2a272..7dd64acdaa 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include @@ -29,9 +29,9 @@ extern "C" { ur_result_t urEventReleaseInternal(ur_event_handle_t Event); -ur_result_t EventCreate(ur_context_handle_t Context, - ur_queue_handle_legacy_t Queue, bool IsMultiDevice, - bool HostVisible, ur_event_handle_t *RetEvent, +ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, + bool IsMultiDevice, bool HostVisible, + ur_event_handle_t *RetEvent, bool CounterBasedEventEnabled = false, bool ForceDisableProfiling = false); } // extern "C" @@ -89,7 +89,7 @@ struct _ur_ze_event_list_t { // command-lists. ur_result_t createAndRetainUrZeEventList(uint32_t EventListLength, const ur_event_handle_t *EventList, - ur_queue_handle_legacy_t CurQueue, + ur_queue_handle_t CurQueue, bool UseCopyEngine); // Add all the events in this object's UrEventList to the end diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index f68b2d93be..fc623e7e74 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -14,6 +14,7 @@ #include "event.hpp" #include "logger/ur_logger.hpp" #include "sampler.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" typedef ze_result_t(ZE_APICALL *zeImageGetDeviceOffsetExp_pfn)( @@ -631,11 +632,14 @@ getImageFormatTypeAndSize(const ur_image_format_t *ImageFormat) { return {ZeImageFormatType, ZeImageFormatTypeSize}; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, - size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem, - size_t *pResultPitch) { +namespace ur::level_zero { + +ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t widthInBytes, + size_t height, size_t elementSizeBytes, + void **ppMem, size_t *pResultPitch) { std::shared_lock Lock(hContext->Mutex); UR_ASSERT(hContext && hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -668,13 +672,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( *pResultPitch = RowPitch; size_t Size = height * RowPitch; - UR_CALL(urUSMDeviceAlloc(hContext, hDevice, pUSMDesc, pool, Size, ppMem)); + UR_CALL(ur::level_zero::urUSMDeviceAlloc(hContext, hDevice, pUSMDesc, pool, + Size, ppMem)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesUnsampledImageHandleDestroyExp( +ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { UR_ASSERT(hContext && hDevice && hImage, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -691,17 +695,16 @@ urBindlessImagesUnsampledImageHandleDestroyExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesSampledImageHandleDestroyExp( +ur_result_t urBindlessImagesSampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { // Sampled image is a combination of unsampled image and sampler. // Sampler is released in urSamplerRelease. - return urBindlessImagesUnsampledImageHandleDestroyExp(hContext, hDevice, - hImage); + return ur::level_zero::urBindlessImagesUnsampledImageHandleDestroyExp( + hContext, hDevice, hImage); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( +ur_result_t urBindlessImagesImageAllocateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -730,16 +733,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hImageMem) { +ur_result_t +urBindlessImagesImageFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem) { std::ignore = hContext; std::ignore = hDevice; - UR_CALL(urMemRelease(reinterpret_cast(hImageMem))); + UR_CALL(ur::level_zero::urMemRelease( + reinterpret_cast(hImageMem))); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( +ur_result_t urBindlessImagesUnsampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -749,7 +754,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( +ur_result_t urBindlessImagesSampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -759,8 +764,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( - [[maybe_unused]] const void *pSrc, [[maybe_unused]] void *pDst, +ur_result_t urBindlessImagesImageCopyExp( + ur_queue_handle_t hQueue, [[maybe_unused]] const void *pSrc, + [[maybe_unused]] void *pDst, [[maybe_unused]] const ur_image_desc_t *pSrcImageDesc, [[maybe_unused]] const ur_image_desc_t *pDstImageDesc, [[maybe_unused]] const ur_image_format_t *pSrcImageFormat, @@ -770,7 +776,6 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( [[maybe_unused]] uint32_t numEventsInWaitList, [[maybe_unused]] const ur_event_handle_t *phEventWaitList, [[maybe_unused]] ur_event_handle_t *phEvent) { - auto hQueue = this; std::scoped_lock Lock(hQueue->Mutex); UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -920,7 +925,7 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( +ur_result_t urBindlessImagesImageGetInfoExp( ur_context_handle_t, ur_exp_image_mem_native_handle_t hImageMem, ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet) { UR_ASSERT(hImageMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -970,7 +975,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( } } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( +ur_result_t urBindlessImagesMipmapGetLevelExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -984,13 +989,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hMem) { - return urBindlessImagesImageFreeExp(hContext, hDevice, hMem); +ur_result_t +urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hMem) { + return ur::level_zero::urBindlessImagesImageFreeExp(hContext, hDevice, hMem); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( +ur_result_t urBindlessImagesImportExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, ur_exp_external_mem_type_t memHandleType, ur_exp_external_mem_desc_t *pExternalMemDesc, @@ -1050,7 +1056,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( +ur_result_t urBindlessImagesMapExternalArrayExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_external_mem_handle_t hExternalMem, @@ -1085,7 +1091,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp( +ur_result_t urBindlessImagesMapExternalLinearMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset, uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **phRetMem) { std::ignore = hContext; @@ -1099,7 +1105,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( +ur_result_t urBindlessImagesReleaseExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_mem_handle_t hExternalMem) { @@ -1109,7 +1115,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( struct ur_ze_external_memory_data *externalMemoryData = reinterpret_cast(hExternalMem); - UR_CALL(urMemRelease(externalMemoryData->urMemoryHandle)); + UR_CALL(ur::level_zero::urMemRelease(externalMemoryData->urMemoryHandle)); switch (externalMemoryData->type) { case UR_ZE_EXTERNAL_OPAQUE_FD: @@ -1129,7 +1135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( +ur_result_t urBindlessImagesImportExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_type_t semHandleType, ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc, @@ -1144,7 +1150,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp( +ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_handle_t hExternalSemaphore) { std::ignore = hContext; @@ -1155,10 +1161,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t ur_queue_handle_legacy_t_::bindlessImagesWaitExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t hSemaphore, bool hasValue, - uint64_t waitValue, uint32_t numEventsInWaitList, +ur_result_t urBindlessImagesWaitExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, + bool hasValue, uint64_t waitValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hQueue; std::ignore = hSemaphore; std::ignore = hasValue; std::ignore = waitValue; @@ -1170,10 +1177,11 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesWaitExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t ur_queue_handle_legacy_t_::bindlessImagesSignalExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t hSemaphore, bool hasValue, - uint64_t signalValue, uint32_t numEventsInWaitList, +ur_result_t urBindlessImagesSignalExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, + bool hasValue, uint64_t signalValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = hQueue; std::ignore = hSemaphore; std::ignore = hasValue; std::ignore = signalValue; @@ -1184,3 +1192,5 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesSignalExternalSemaphoreExp( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/image.hpp b/source/adapters/level_zero/image.hpp index 618258601d..43f37fa757 100644 --- a/source/adapters/level_zero/image.hpp +++ b/source/adapters/level_zero/image.hpp @@ -10,7 +10,7 @@ #pragma once #include -#include +#include #include #include diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 3469620b71..8e627f3ade 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -11,11 +11,29 @@ #include "kernel.hpp" #include "logger/ur_logger.hpp" #include "ur_api.h" -#include "ur_level_zero.hpp" +#include "ur_interface_loader.hpp" #include "helpers/kernel_helpers.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( +ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel, + ze_kernel_handle_t *phZeKernel) { + if (hKernel->ZeKernelMap.empty()) { + *phZeKernel = hKernel->ZeKernel; + } else { + auto It = hKernel->ZeKernelMap.find(hDevice); + if (It == hKernel->ZeKernelMap.end()) { + /* kernel and queue don't match */ + return UR_RESULT_ERROR_INVALID_QUEUE; + } + *phZeKernel = It->second; + } + + return UR_RESULT_SUCCESS; +} + +namespace ur::level_zero { + +ur_result_t urKernelGetSuggestedLocalWorkSize( ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t workDim, [[maybe_unused]] const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, size_t *pSuggestedLocalWorkSize) { @@ -29,32 +47,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( std::copy(pGlobalWorkSize, pGlobalWorkSize + workDim, GlobalWorkSize3D); ze_kernel_handle_t ZeKernel{}; - UR_CALL(getZeKernel(Legacy(hQueue)->Device->ZeDevice, hKernel, &ZeKernel)); + UR_CALL(getZeKernel(hQueue->Device->ZeDevice, hKernel, &ZeKernel)); - UR_CALL(getSuggestedLocalWorkSize(Legacy(hQueue)->Device, ZeKernel, - GlobalWorkSize3D, LocalWorkSize)); + UR_CALL(getSuggestedLocalWorkSize(hQueue->Device, ZeKernel, GlobalWorkSize3D, + LocalWorkSize)); std::copy(LocalWorkSize, LocalWorkSize + workDim, pSuggestedLocalWorkSize); return UR_RESULT_SUCCESS; } -ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel, - ze_kernel_handle_t *phZeKernel) { - if (hKernel->ZeKernelMap.empty()) { - *phZeKernel = hKernel->ZeKernel; - } else { - auto It = hKernel->ZeKernelMap.find(hDevice); - if (It == hKernel->ZeKernelMap.end()) { - /* kernel and queue don't match */ - return UR_RESULT_ERROR_INVALID_QUEUE; - } - *phZeKernel = It->second; - } - - return UR_RESULT_SUCCESS; -} - -ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( +ur_result_t urEnqueueKernelLaunch( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify ///< the global and work-group work-items @@ -86,7 +89,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - auto Queue = this; ze_kernel_handle_t ZeKernel{}; UR_CALL(getZeKernel(Queue->Device->ZeDevice, Kernel, &ZeKernel)); @@ -158,7 +160,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); // Add to list of kernels to be submitted if (IndirectAccessTrackingEnabled) @@ -204,7 +206,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( +ur_result_t urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify ///< the global and work-group work-items @@ -236,7 +239,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( UR_ASSERT(WorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); UR_ASSERT(WorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - auto Queue = this; auto ZeDevice = Queue->Device->ZeDevice; ze_kernel_handle_t ZeKernel{}; @@ -422,7 +424,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); // Add to list of kernels to be submitted if (IndirectAccessTrackingEnabled) @@ -468,7 +470,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite( +ur_result_t urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. ur_program_handle_t Program, ///< [in] handle of the program containing the ///< device global variable. const char @@ -489,7 +492,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite( *Event ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = this; std::scoped_lock lock(Queue->Mutex); // Find global variable pointer @@ -522,29 +524,28 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite( EventWaitList, Event, PreferCopyEngine); } -ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead( - ur_program_handle_t Program, ///< [in] handle of the program containing - ///< the device global variable. - const char *Name, ///< [in] the unique identifier for the device global - ///< variable. +ur_result_t urEnqueueDeviceGlobalVariableRead( + ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. + ur_program_handle_t Program, ///< [in] handle of the program containing the + ///< device global variable. + const char + *Name, ///< [in] the unique identifier for the device global variable. bool BlockingRead, ///< [in] indicates if this operation should block. size_t Count, ///< [in] the number of bytes to copy. - size_t Offset, ///< [in] the byte offset into the device global variable - ///< to start copying. - void *Dst, ///< [in] pointer to where the data must be copied to. + size_t Offset, ///< [in] the byte offset into the device global variable to + ///< start copying. + void *Dst, ///< [in] pointer to where the data must be copied to. uint32_t NumEventsInWaitList, ///< [in] size of the event wait list. const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be - ///< complete before the kernel execution. If - ///< nullptr, the numEventsInWaitList must be 0, - ///< indicating that no wait event. + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating that no + ///< wait event. ur_event_handle_t - *Event ///< [in,out][optional] return an event object that - ///< identifies this particular kernel execution instance. + *Event ///< [in,out][optional] return an event object that identifies + ///< this particular kernel execution instance. ) { - auto Queue = this; - std::scoped_lock lock(Queue->Mutex); // Find global variable pointer @@ -577,7 +578,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead( EventWaitList, Event, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate( +ur_result_t urKernelCreate( ur_program_handle_t Program, ///< [in] handle of the program instance const char *KernelName, ///< [in] pointer to null-terminated string. ur_kernel_handle_t @@ -640,7 +641,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( +ur_result_t urKernelSetArgValue( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] size_t ArgSize, ///< [in] size of argument type @@ -690,7 +691,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( return ze2urResult(ZeResult); } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( +ur_result_t urKernelSetArgLocal( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] size_t ArgSize, ///< [in] size of the local buffer to be allocated by the @@ -700,12 +701,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( ) { std::ignore = Properties; - UR_CALL(urKernelSetArgValue(Kernel, ArgIndex, ArgSize, nullptr, nullptr)); + UR_CALL(ur::level_zero::urKernelSetArgValue(Kernel, ArgIndex, ArgSize, + nullptr, nullptr)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo( +ur_result_t urKernelGetInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_kernel_info_t ParamName, ///< [in] name of the Kernel property to query size_t PropSize, ///< [in] the size of the Kernel property value. @@ -767,7 +769,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo( +ur_result_t urKernelGetGroupInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_device_handle_t Device, ///< [in] handle of the Device object ur_kernel_group_info_t @@ -848,7 +850,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo( +ur_result_t urKernelGetSubGroupInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_device_handle_t Device, ///< [in] handle of the Device object ur_kernel_sub_group_info_t @@ -879,7 +881,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain( +ur_result_t urKernelRetain( ur_kernel_handle_t Kernel ///< [in] handle for the Kernel to retain ) { Kernel->RefCount.increment(); @@ -887,7 +889,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( +ur_result_t urKernelRelease( ur_kernel_handle_t Kernel ///< [in] handle for the Kernel to release ) { if (!Kernel->RefCount.decrementAndTest()) @@ -904,7 +906,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( } Kernel->ZeKernelMap.clear(); if (IndirectAccessTrackingEnabled) { - UR_CALL(urContextRelease(KernelProgram->Context)); + UR_CALL(ur::level_zero::urContextRelease(KernelProgram->Context)); } // do a release on the program this kernel was part of without delete of the // program handle @@ -915,7 +917,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( +ur_result_t urKernelSetArgPointer( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_pointer_properties_t @@ -927,12 +929,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( std::ignore = Properties; // KernelSetArgValue is expecting a pointer to the argument - UR_CALL(urKernelSetArgValue(Kernel, ArgIndex, sizeof(const void *), nullptr, - &ArgValue)); + UR_CALL(ur::level_zero::urKernelSetArgValue( + Kernel, ArgIndex, sizeof(const void *), nullptr, &ArgValue)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( +ur_result_t urKernelSetExecInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object ur_kernel_exec_info_t PropName, ///< [in] name of the execution attribute size_t PropSize, ///< [in] size in byte the attribute value @@ -978,7 +980,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( +ur_result_t urKernelSetArgSampler( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_sampler_properties_t @@ -996,7 +998,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( +ur_result_t urKernelSetArgMemObj( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_mem_obj_properties_t @@ -1038,7 +1040,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( +ur_result_t urKernelGetNativeHandle( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel. ur_native_handle_t *NativeKernel ///< [out] a pointer to the native handle of the kernel. @@ -1049,7 +1051,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( +ur_result_t urKernelSuggestMaxCooperativeGroupCountExp( ur_kernel_handle_t hKernel, size_t localWorkSize, size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) { (void)localWorkSize; @@ -1062,7 +1064,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( +ur_result_t urKernelCreateWithNativeHandle( ur_native_handle_t NativeKernel, ///< [in] the native handle of the kernel. ur_context_handle_t Context, ///< [in] handle of the context object ur_program_handle_t Program, @@ -1098,13 +1100,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( return UR_RESULT_SUCCESS; } +ur_result_t urKernelSetSpecializationConstants( + ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object + uint32_t Count, ///< [in] the number of elements in the pSpecConstants array + const ur_specialization_constant_info_t + *SpecConstants ///< [in] array of specialization constant value + ///< descriptions +) { + std::ignore = Kernel; + std::ignore = Count; + std::ignore = SpecConstants; + logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +} // namespace ur::level_zero + ur_result_t ur_kernel_handle_t_::initialize() { // Retain the program and context to show it's used by this kernel. - UR_CALL(urProgramRetain(Program)); + UR_CALL(ur::level_zero::urProgramRetain(Program)); if (IndirectAccessTrackingEnabled) // TODO: do piContextRetain without the guard - UR_CALL(urContextRetain(Program->Context)); + UR_CALL(ur::level_zero::urContextRetain(Program->Context)); // Set up how to obtain kernel properties when needed. ZeKernelProperties.Compute = [this](ze_kernel_properties_t &Properties) { @@ -1123,36 +1142,3 @@ ur_result_t ur_kernel_handle_t_::initialize() { return UR_RESULT_SUCCESS; } - -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetSpecializationConstants( - ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object - uint32_t Count, ///< [in] the number of elements in the pSpecConstants array - const ur_specialization_constant_info_t - *SpecConstants ///< [in] array of specialization constant value - ///< descriptions -) { - std::ignore = Kernel; - std::ignore = Count; - std::ignore = SpecConstants; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunchCustomExp( - ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList, - const ur_exp_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - std::ignore = hKernel; - std::ignore = workDim; - std::ignore = pGlobalWorkSize; - std::ignore = pLocalWorkSize; - std::ignore = numPropsInLaunchPropList; - std::ignore = launchPropList; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 585a10ef4f..9786092073 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -18,6 +18,7 @@ #include "image.hpp" #include "logger/ur_logger.hpp" #include "queue.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" // Default to using compute engine for fill operation, but allow to @@ -59,7 +60,7 @@ bool IsSharedPointer(ur_context_handle_t Context, const void *Ptr) { // PI interfaces must have queue's and destination buffer's mutexes locked for // exclusive use and source buffer's mutex locked for shared use on entry. ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, - ur_queue_handle_legacy_t Queue, void *Dst, + ur_queue_handle_t Queue, void *Dst, ur_bool_t BlockingWrite, size_t Size, const void *Src, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, @@ -112,13 +113,12 @@ ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, // PI interfaces must have queue's and destination buffer's mutexes locked for // exclusive use and source buffer's mutex locked for shared use on entry. ur_result_t enqueueMemCopyRectHelper( - ur_command_t CommandType, ur_queue_handle_legacy_t Queue, - const void *SrcBuffer, void *DstBuffer, ur_rect_offset_t SrcOrigin, - ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch, - size_t DstRowPitch, size_t SrcSlicePitch, size_t DstSlicePitch, - ur_bool_t Blocking, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent, - bool PreferCopyEngine) { + ur_command_t CommandType, ur_queue_handle_t Queue, const void *SrcBuffer, + void *DstBuffer, ur_rect_offset_t SrcOrigin, ur_rect_offset_t DstOrigin, + ur_rect_region_t Region, size_t SrcRowPitch, size_t DstRowPitch, + size_t SrcSlicePitch, size_t DstSlicePitch, ur_bool_t Blocking, + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent, bool PreferCopyEngine) { bool UseCopyEngine = Queue->useCopyEngine(PreferCopyEngine); _ur_ze_event_list_t TmpWaitList; @@ -198,9 +198,9 @@ ur_result_t enqueueMemCopyRectHelper( // PI interfaces must have queue's and buffer's mutexes locked on entry. static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, - ur_queue_handle_legacy_t Queue, - void *Ptr, const void *Pattern, - size_t PatternSize, size_t Size, + ur_queue_handle_t Queue, void *Ptr, + const void *Pattern, size_t PatternSize, + size_t Size, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { @@ -315,7 +315,7 @@ static ur_result_t ZeHostMemAllocHelper(void **ResultPtr, // indirect access, that is why explicitly retain context to be sure // that it is released after all memory allocations in this context are // released. - UR_CALL(urContextRetain(UrContext)); + UR_CALL(ur::level_zero::urContextRetain(UrContext)); } ZeStruct ZeDesc; @@ -337,7 +337,7 @@ static ur_result_t ZeHostMemAllocHelper(void **ResultPtr, // PI interfaces must have queue's and destination image's mutexes locked for // exclusive use and source image's mutex locked for shared use on entry. static ur_result_t enqueueMemImageCommandHelper( - ur_command_t CommandType, ur_queue_handle_legacy_t Queue, + ur_command_t CommandType, ur_queue_handle_t Queue, const void *Src, // image or ptr void *Dst, // image or ptr ur_bool_t IsBlocking, ur_rect_offset_t *SrcOrigin, @@ -474,7 +474,10 @@ static ur_result_t enqueueMemImageCommandHelper( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead( +namespace ur::level_zero { + +ur_result_t urEnqueueMemBufferRead( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) size_t offset, ///< [in] offset in bytes in the buffer object @@ -492,7 +495,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; ur_mem_handle_t_ *Src = ur_cast(hBuffer); std::shared_lock SrcLock(Src->Mutex, std::defer_lock); @@ -508,7 +510,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferRead( true /* PreferCopyEngine */); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite( +ur_result_t urEnqueueMemBufferWrite( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) @@ -528,7 +531,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::scoped_lock Lock(Queue->Mutex, @@ -545,7 +547,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWrite( true /* PreferCopyEngine */); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect( +ur_result_t urEnqueueMemBufferReadRect( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t bufferOffset, ///< [in] 3D offset in the buffer @@ -573,7 +576,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::shared_lock SrcLock(Buffer->Mutex, std::defer_lock); @@ -590,7 +592,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferReadRect( phEvent); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect( +ur_result_t urEnqueueMemBufferWriteRect( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) @@ -620,7 +623,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect( *phEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; ur_mem_handle_t_ *Buffer = ur_cast(hBuffer); std::scoped_lock Lock(Queue->Mutex, @@ -637,7 +639,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferWriteRect( phEventWaitList, phEvent); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy( +ur_result_t urEnqueueMemBufferCopy( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t BufferSrc, ///< [in] handle of the src buffer object ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object size_t SrcOffset, ///< [in] offset into hBufferSrc to begin copying from @@ -655,7 +658,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; _ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc); _ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst); @@ -688,9 +690,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferCopy( OutEvent, PreferCopyEngine); } -ur_result_t -ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the - ///< queue object +ur_result_t urEnqueueMemBufferCopyRect( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t BufferSrc, ///< [in] handle of the source buffer object ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object ur_rect_offset_t SrcOrigin, ///< [in] 3D offset in the source buffer @@ -717,7 +718,6 @@ ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; _ur_buffer *SrcBuffer = ur_cast<_ur_buffer *>(BufferSrc); _ur_buffer *DstBuffer = ur_cast<_ur_buffer *>(BufferDst); @@ -748,11 +748,12 @@ ur_queue_handle_legacy_t_::enqueueMemBufferCopyRect( ///< [in] handle of the NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill( - ur_mem_handle_t Buffer, ///< [in] handle of the buffer object - const void *Pattern, ///< [in] pointer to the fill pattern - size_t PatternSize, ///< [in] size in bytes of the pattern - size_t Offset, ///< [in] offset into the buffer +ur_result_t urEnqueueMemBufferFill( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_mem_handle_t Buffer, ///< [in] handle of the buffer object + const void *Pattern, ///< [in] pointer to the fill pattern + size_t PatternSize, ///< [in] size in bytes of the pattern + size_t Offset, ///< [in] offset into the buffer size_t Size, ///< [in] fill size in bytes, must be a multiple of patternSize uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -766,7 +767,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Buffer->Mutex); @@ -781,8 +781,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferFill( Size, NumEventsInWaitList, EventWaitList, OutEvent); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead( - ur_mem_handle_t Image, ///< [in] handle of the image object +ur_result_t urEnqueueMemImageRead( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_mem_handle_t Image, ///< [in] handle of the image object bool BlockingRead, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t Origin, ///< [in] defines the (x,y,z) offset in pixels in ///< the 1D, 2D, or 3D image @@ -803,7 +804,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Image->Mutex); return enqueueMemImageCommandHelper( @@ -812,8 +812,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageRead( EventWaitList, OutEvent); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite( - ur_mem_handle_t Image, ///< [in] handle of the image object +ur_result_t urEnqueueMemImageWrite( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_mem_handle_t Image, ///< [in] handle of the image object bool BlockingWrite, ///< [in] indicates blocking (true), non-blocking (false) ur_rect_offset_t Origin, ///< [in] defines the (x,y,z) offset in pixels in @@ -835,7 +836,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::scoped_lock Lock(Queue->Mutex, Image->Mutex); return enqueueMemImageCommandHelper( @@ -844,9 +844,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemImageWrite( EventWaitList, OutEvent); } -ur_result_t -ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of - ///< the queue object +ur_result_t urEnqueueMemImageCopy( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t ImageSrc, ///< [in] handle of the src image object ur_mem_handle_t ImageDst, ///< [in] handle of the dest image object ur_rect_offset_t SrcOrigin, ///< [in] defines the (x,y,z) offset in pixels @@ -867,7 +866,6 @@ ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::shared_lock SrcLock(ImageSrc->Mutex, std::defer_lock); std::scoped_lock, ur_shared_mutex, ur_shared_mutex> @@ -885,8 +883,9 @@ ur_queue_handle_legacy_t_::enqueueMemImageCopy( ///< [in] handle of NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( - ur_mem_handle_t Buf, ///< [in] handle of the buffer object +ur_result_t urEnqueueMemBufferMap( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_mem_handle_t Buf, ///< [in] handle of the buffer object bool BlockingMap, ///< [in] indicates blocking (true), non-blocking (false) ur_map_flags_t MapFlags, ///< [in] flags for read, write, readwrite mapping size_t Offset, ///< [in] offset in bytes of the buffer region being mapped @@ -905,7 +904,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( void **RetMap ///< [in,out] return mapped pointer. TODO: move it before ///< numEventsInWaitList? ) { - auto Queue = this; auto Buffer = ur_cast<_ur_buffer *>(Buf); UR_ASSERT(!Buffer->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); @@ -964,10 +962,10 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( if (Buffer->OnHost) { // Wait on incoming events before doing the copy if (NumEventsInWaitList > 0) - UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); + UR_CALL(ur::level_zero::urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(Queue)); + UR_CALL(ur::level_zero::urQueueFinish(Queue)); // Lock automatically releases when this goes out of scope. std::scoped_lock Guard(Buffer->Mutex); @@ -1053,7 +1051,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( +ur_result_t urEnqueueMemUnmap( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t Mem, ///< [in] handle of the memory (buffer or image) object void *MappedPtr, ///< [in] mapped host address uint32_t NumEventsInWaitList, ///< [in] size of the event wait list @@ -1068,7 +1067,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; UR_ASSERT(!Mem->isImage(), UR_RESULT_ERROR_INVALID_MEM_OBJECT); auto Buffer = ur_cast<_ur_buffer *>(Mem); @@ -1120,10 +1118,10 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( if (Buffer->OnHost) { // Wait on incoming events before doing the copy if (NumEventsInWaitList > 0) - UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); + UR_CALL(ur::level_zero::urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(Queue)); + UR_CALL(ur::level_zero::urQueueFinish(Queue)); char *ZeHandleDst; UR_CALL(Buffer->getZeHandle(ZeHandleDst, ur_mem_handle_t_::write_only, @@ -1146,8 +1144,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( - reinterpret_cast(Queue), CommandList, - UseCopyEngine, NumEventsInWaitList, EventWaitList)); + reinterpret_cast(Queue), CommandList, UseCopyEngine, + NumEventsInWaitList, EventWaitList)); CommandList->second.append(reinterpret_cast(*Event)); (*Event)->RefCount.increment(); @@ -1180,8 +1178,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy( - bool Blocking, ///< [in] blocking or non-blocking copy +ur_result_t urEnqueueUSMMemcpy( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + bool Blocking, ///< [in] blocking or non-blocking copy void *Dst, ///< [in] pointer to the destination USM memory object const void *Src, ///< [in] pointer to the source USM memory object size_t Size, ///< [in] size in bytes to be copied @@ -1197,7 +1196,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::scoped_lock lock(Queue->Mutex); // Device to Device copies are found to execute slower on copy engine @@ -1219,7 +1217,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy( NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch( +ur_result_t urEnqueueUSMPrefetch( + ur_queue_handle_t Queue, ///< [in] handle of the queue object const void *Mem, ///< [in] pointer to the USM memory object size_t Size, ///< [in] size in bytes to be fetched ur_usm_migration_flags_t Flags, ///< [in] USM prefetch flags @@ -1235,7 +1234,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; std::ignore = Flags; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1287,7 +1285,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMPrefetch( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise( +ur_result_t urEnqueueUSMAdvise( + ur_queue_handle_t Queue, ///< [in] handle of the queue object const void *Mem, ///< [in] pointer to the USM memory object size_t Size, ///< [in] size in bytes to be advised ur_usm_advice_flags_t Advice, ///< [in] USM memory advice @@ -1295,7 +1294,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - auto Queue = this; // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1345,8 +1343,9 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMAdvise( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill2D( - void *Mem, ///< [in] pointer to memory to be filled. +ur_result_t urEnqueueUSMFill2D( + ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. + void *Mem, ///< [in] pointer to memory to be filled. size_t Pitch, ///< [in] the total width of the destination memory including ///< padding. size_t PatternSize, ///< [in] the size in bytes of the pattern. @@ -1364,6 +1363,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill2D( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { + std::ignore = Queue; std::ignore = Mem; std::ignore = Pitch; std::ignore = PatternSize; @@ -1378,7 +1378,8 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill2D( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy2D( +ur_result_t urEnqueueUSMMemcpy2D( + ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. bool Blocking, ///< [in] indicates if this operation should block the host. void *Dst, ///< [in] pointer to memory where data will be copied. size_t DstPitch, ///< [in] the total width of the source memory including @@ -1399,7 +1400,6 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueUSMMemcpy2D( *Event ///< [in,out][optional] return an event object that identifies ///< this particular kernel execution instance. ) { - auto Queue = this; ur_rect_offset_t ZeroOffset{0, 0, 0}; ur_rect_region_t Region{Width, Height, 0}; @@ -1500,7 +1500,7 @@ static ur_result_t ur2zeImageDesc(const ur_image_format_t *ImageFormat, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( +ur_result_t urMemImageCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_mem_flags_t Flags, ///< [in] allocation and usage information flags const ur_image_format_t @@ -1549,7 +1549,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( +ur_result_t urMemImageCreateWithNativeHandle( ur_native_handle_t NativeMem, ///< [in] the native handle to the memory. ur_context_handle_t Context, ///< [in] handle of the context object. [[maybe_unused]] const ur_image_format_t @@ -1577,7 +1577,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( +ur_result_t urMemBufferCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_mem_flags_t Flags, ///< [in] allocation and usage information flags size_t Size, ///< [in] size in bytes of the memory object to be allocated @@ -1671,14 +1671,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemRetain( +ur_result_t urMemRetain( ur_mem_handle_t Mem ///< [in] handle of the memory object to get access ) { Mem->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemRelease( +ur_result_t urMemRelease( ur_mem_handle_t Mem ///< [in] handle of the memory object to release ) { if (!Mem->RefCount.decrementAndTest()) @@ -1704,7 +1704,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( +ur_result_t urMemBufferPartition( ur_mem_handle_t Buffer, ///< [in] handle of the buffer object to allocate from ur_mem_flags_t Flags, ///< [in] allocation and usage information flags @@ -1740,7 +1740,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( +ur_result_t urMemGetNativeHandle( ur_mem_handle_t Mem, ///< [in] handle of the mem. ur_device_handle_t, ///< [in] handle of the device. ur_native_handle_t @@ -1754,7 +1754,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( +ur_result_t urMemBufferCreateWithNativeHandle( ur_native_handle_t NativeMem, ///< [in] the native handle to the memory. ur_context_handle_t Context, ///< [in] handle of the context object. const ur_mem_native_properties_t @@ -1821,7 +1821,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ContextsLock.lock(); // Retain context to be sure that it is released after all memory // allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); Context->MemAllocs.emplace(std::piecewise_construct, std::forward_as_tuple(Ptr), @@ -1857,7 +1857,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo( +ur_result_t urMemGetInfo( ur_mem_handle_t Memory, ///< [in] handle to the memory object being queried. ur_mem_info_t MemInfoType, ///< [in] type of the info to retrieve. size_t PropSize, ///< [in] the number of bytes of memory pointed to by @@ -1893,7 +1893,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo( +ur_result_t urMemImageGetInfo( ur_mem_handle_t Memory, ///< [in] handle to the image object being queried. ur_image_info_t ImgInfoType, ///< [in] type of image info to retrieve. size_t PropSize, ///< [in] the number of bytes of memory pointer to by @@ -1916,6 +1916,79 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +ur_result_t urEnqueueUSMFill( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + void *Ptr, ///< [in] pointer to USM memory object + size_t PatternSize, ///< [in] the size in bytes of the pattern. Must be a + ///< power of 2 and less than or equal to width. + const void *Pattern, ///< [in] pointer with the bytes of the pattern to set. + size_t Size, ///< [in] size in bytes to be set. Must be a multiple of + ///< patternSize. + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, the + ///< numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t *Event ///< [out][optional] return an event object that + ///< identifies this particular command instance. +) { + std::scoped_lock Lock(Queue->Mutex); + + return enqueueMemFillHelper( + // TODO: do we need a new command type for USM memset? + UR_COMMAND_MEM_BUFFER_FILL, Queue, Ptr, + Pattern, // It will be interpreted as an 8-bit value, + PatternSize, // which is indicated with this pattern_size==1 + Size, NumEventsInWaitList, EventWaitList, Event); +} + +/// Host Pipes +ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = hProgram; + std::ignore = pipe_symbol; + std::ignore = blocking; + std::ignore = pDst; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = hProgram; + std::ignore = pipe_symbol; + std::ignore = blocking; + std::ignore = pSrc; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +} // namespace ur::level_zero + // If indirect access tracking is enabled then performs reference counting, // otherwise just calls zeMemAllocDevice. static ur_result_t ZeDeviceMemAllocHelper(void **ResultPtr, @@ -1935,7 +2008,7 @@ static ur_result_t ZeDeviceMemAllocHelper(void **ResultPtr, // indirect access, that is why explicitly retain context to be sure // that it is released after all memory allocations in this context are // released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } ze_device_mem_alloc_desc_t ZeDesc = {}; @@ -1995,8 +2068,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL(urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, - reinterpret_cast(&ZeHandle))); + UR_CALL(ur::level_zero::urUSMHostAlloc( + UrContext, &USMDesc, Pool, Size, + reinterpret_cast(&ZeHandle))); } else { HostAllocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeHostMemAllocHelper(reinterpret_cast(&ZeHandle), @@ -2054,8 +2128,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL(urUSMDeviceAlloc(UrContext, Device, &USMDesc, Pool, Size, - reinterpret_cast(&ZeHandle))); + UR_CALL(ur::level_zero::urUSMDeviceAlloc( + UrContext, Device, &USMDesc, Pool, Size, + reinterpret_cast(&ZeHandle))); } else { Allocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeDeviceMemAllocHelper(reinterpret_cast(&ZeHandle), @@ -2118,8 +2193,8 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL( - urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, &ZeHandleHost)); + UR_CALL(ur::level_zero::urUSMHostAlloc(UrContext, &USMDesc, Pool, + Size, &ZeHandleHost)); } else { HostAllocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeHostMemAllocHelper(&ZeHandleHost, UrContext, Size)); @@ -2301,66 +2376,3 @@ size_t _ur_buffer::getAlignment() const { Alignment = 1UL; return Alignment; } - -ur_result_t ur_queue_handle_legacy_t_::enqueueUSMFill( - void *Ptr, ///< [in] pointer to USM memory object - size_t PatternSize, ///< [in] the size in bytes of the pattern. Must be a - ///< power of 2 and less than or equal to width. - const void *Pattern, ///< [in] pointer with the bytes of the pattern to set. - size_t Size, ///< [in] size in bytes to be set. Must be a multiple of - ///< patternSize. - uint32_t NumEventsInWaitList, ///< [in] size of the event wait list - const ur_event_handle_t * - EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that this - ///< command does not wait on any event to complete. - ur_event_handle_t *Event ///< [out][optional] return an event object that - ///< identifies this particular command instance. -) { - auto Queue = this; - std::scoped_lock Lock(Queue->Mutex); - - return enqueueMemFillHelper( - // TODO: do we need a new command type for USM memset? - UR_COMMAND_MEM_BUFFER_FILL, Queue, Ptr, - Pattern, // It will be interpreted as an 8-bit value, - PatternSize, // which is indicated with this pattern_size==1 - Size, NumEventsInWaitList, EventWaitList, Event); -} - -/// Host Pipes -ur_result_t ur_queue_handle_legacy_t_::enqueueReadHostPipe( - ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, - void *pDst, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hProgram; - std::ignore = pipe_symbol; - std::ignore = blocking; - std::ignore = pDst; - std::ignore = size; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t ur_queue_handle_legacy_t_::enqueueWriteHostPipe( - ur_program_handle_t hProgram, const char *pipe_symbol, bool blocking, - void *pSrc, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hProgram; - std::ignore = pipe_symbol; - std::ignore = blocking; - std::ignore = pSrc; - std::ignore = size; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index 43d548f16b..71d102e9dd 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -20,15 +20,12 @@ #include #include -#include +#include #include #include #include "ur_level_zero.hpp" -struct ur_queue_handle_legacy_t_; -using ur_queue_handle_legacy_t = ur_queue_handle_legacy_t_ *; - struct ur_device_handle_t_; bool IsDevicePointer(ur_context_handle_t Context, const void *Ptr); @@ -48,7 +45,7 @@ const bool UseCopyEngineForD2DCopy = [] { // PI interfaces must have queue's and destination buffer's mutexes locked for // exclusive use and source buffer's mutex locked for shared use on entry. ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, - ur_queue_handle_legacy_t Queue, void *Dst, + ur_queue_handle_t Queue, void *Dst, ur_bool_t BlockingWrite, size_t Size, const void *Src, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, @@ -56,13 +53,12 @@ ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, bool PreferCopyEngine); ur_result_t enqueueMemCopyRectHelper( - ur_command_t CommandType, ur_queue_handle_legacy_t Queue, - const void *SrcBuffer, void *DstBuffer, ur_rect_offset_t SrcOrigin, - ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch, - size_t DstRowPitch, size_t SrcSlicePitch, size_t DstSlicePitch, - ur_bool_t Blocking, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent, - bool PreferCopyEngine = false); + ur_command_t CommandType, ur_queue_handle_t Queue, const void *SrcBuffer, + void *DstBuffer, ur_rect_offset_t SrcOrigin, ur_rect_offset_t DstOrigin, + ur_rect_region_t Region, size_t SrcRowPitch, size_t DstRowPitch, + size_t SrcSlicePitch, size_t DstSlicePitch, ur_bool_t Blocking, + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent, bool PreferCopyEngine = false); struct ur_mem_handle_t_ : _ur_object { // Keeps the PI context of this memory handle. diff --git a/source/adapters/level_zero/physical_mem.cpp b/source/adapters/level_zero/physical_mem.cpp index d4d9792f24..e7bb498859 100644 --- a/source/adapters/level_zero/physical_mem.cpp +++ b/source/adapters/level_zero/physical_mem.cpp @@ -14,7 +14,9 @@ #include "device.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( +namespace ur::level_zero { + +ur_result_t urPhysicalMemCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, [[maybe_unused]] const ur_physical_mem_properties_t *pProperties, ur_physical_mem_handle_t *phPhysicalMem) { @@ -35,14 +37,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { hPhysicalMem->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { if (!hPhysicalMem->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -52,3 +52,4 @@ urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index 68aebf97c7..721db3c359 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -12,7 +12,9 @@ #include "adapter.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( +namespace ur::level_zero { + +ur_result_t urPlatformGet( ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ///< [in] the number of platforms to be added to ///< phPlatforms. If phPlatforms is not NULL, then @@ -47,7 +49,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo( +ur_result_t urPlatformGetInfo( ur_platform_handle_t Platform, ///< [in] handle of the platform ur_platform_info_t ParamName, ///< [in] type of the info to retrieve size_t Size, ///< [in] the number of bytes pointed to by pPlatformInfo. @@ -101,7 +103,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion( +ur_result_t urPlatformGetApiVersion( ur_platform_handle_t Driver, ///< [in] handle of the platform ur_api_version_t *Version ///< [out] api version ) { @@ -110,7 +112,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( +ur_result_t urPlatformGetNativeHandle( ur_platform_handle_t Platform, ///< [in] handle of the platform. ur_native_handle_t *NativePlatform ///< [out] a pointer to the native ///< handle of the platform. @@ -120,7 +122,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( +ur_result_t urPlatformCreateWithNativeHandle( ur_native_handle_t NativePlatform, ///< [in] the native handle of the platform. ur_adapter_handle_t, @@ -135,12 +137,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( uint32_t NumPlatforms = 0; ur_adapter_handle_t AdapterHandle = GlobalAdapter; - UR_CALL(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms)); + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, 0, nullptr, + &NumPlatforms)); if (NumPlatforms) { std::vector Platforms(NumPlatforms); - UR_CALL(urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(), - nullptr)); + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, NumPlatforms, + Platforms.data(), nullptr)); // The SYCL spec requires that the set of platforms must remain fixed for // the duration of the application's execution. We assume that we found all @@ -158,6 +161,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( return UR_RESULT_ERROR_INVALID_VALUE; } +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return '-ze-opt-disable' for frontend_option = -O0. +// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3. +// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for +// frontend_option=-ftarget-compile-fast. +ur_result_t urPlatformGetBackendOption( + ur_platform_handle_t Platform, ///< [in] handle of the platform instance. + const char *FrontendOption, ///< [in] string containing the frontend option. + const char * + *PlatformOption ///< [out] returns the correct platform specific + ///< compiler option based on the frontend option. +) { + std::ignore = Platform; + using namespace std::literals; + if (FrontendOption == nullptr) { + return UR_RESULT_SUCCESS; + } + if (FrontendOption == ""sv) { + *PlatformOption = ""; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-O0"sv) { + *PlatformOption = "-ze-opt-disable"; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv || + FrontendOption == "-O3"sv) { + *PlatformOption = "-ze-opt-level=2"; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-ftarget-compile-fast"sv) { + *PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_INVALID_VALUE; +} + +} // namespace ur::level_zero + ur_result_t ur_platform_handle_t_::initialize() { ZE2UR_CALL(zeDriverGetApiVersion, (ZeDriver, &ZeApiVersion)); ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + "." + @@ -513,41 +556,3 @@ ur_device_handle_t ur_platform_handle_t_::getDeviceById(DeviceId id) { } return nullptr; } - -// Returns plugin specific backend option. -// Current support is only for optimization options. -// Return '-ze-opt-disable' for frontend_option = -O0. -// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3. -// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for -// frontend_option=-ftarget-compile-fast. -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( - ur_platform_handle_t Platform, ///< [in] handle of the platform instance. - const char *FrontendOption, ///< [in] string containing the frontend option. - const char * - *PlatformOption ///< [out] returns the correct platform specific - ///< compiler option based on the frontend option. -) { - std::ignore = Platform; - using namespace std::literals; - if (FrontendOption == nullptr) { - return UR_RESULT_SUCCESS; - } - if (FrontendOption == ""sv) { - *PlatformOption = ""; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-O0"sv) { - *PlatformOption = "-ze-opt-disable"; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv || - FrontendOption == "-O3"sv) { - *PlatformOption = "-ze-opt-level=2"; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-ftarget-compile-fast"sv) { - *PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; - return UR_RESULT_SUCCESS; - } - return UR_RESULT_ERROR_INVALID_VALUE; -} diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index a6d34ccb23..02aef2d058 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -11,6 +11,7 @@ #include "program.hpp" #include "device.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #ifdef UR_ADAPTER_LEVEL_ZERO_V2 #include "v2/context.hpp" @@ -54,7 +55,9 @@ checkUnresolvedSymbols(ze_module_handle_t ZeModule, } } // extern "C" -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( +namespace ur::level_zero { + +ur_result_t urProgramCreateWithIL( ur_context_handle_t Context, ///< [in] handle of the context instance const void *IL, ///< [in] pointer to IL binary. size_t Length, ///< [in] length of `pIL` in bytes. @@ -79,7 +82,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( +ur_result_t urProgramCreateWithBinary( ur_context_handle_t Context, ///< [in] handle of the context instance ur_device_handle_t Device, ///< [in] handle to device associated with binary. @@ -115,17 +118,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild( +ur_result_t urProgramBuild( ur_context_handle_t Context, ///< [in] handle of the context instance. ur_program_handle_t Program, ///< [in] Handle of the program to build. const char *Options ///< [in][optional] pointer to build options ///< null-terminated string. ) { std::vector Devices = Context->getDevices(); - return urProgramBuildExp(Program, Devices.size(), Devices.data(), Options); + return ur::level_zero::urProgramBuildExp(Program, Devices.size(), + Devices.data(), Options); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( +ur_result_t urProgramBuildExp( ur_program_handle_t hProgram, ///< [in] Handle of the program to build. uint32_t numDevices, ///< [in] number of devices ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to @@ -228,7 +232,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( return Result; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( +ur_result_t urProgramCompileExp( ur_program_handle_t hProgram, ///< [in][out] handle of the program to compile. uint32_t numDevices, ///< [in] number of devices @@ -239,10 +243,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( ) { std::ignore = numDevices; std::ignore = phDevices; - return urProgramCompile(hProgram->Context, hProgram, pOptions); + return ur::level_zero::urProgramCompile(hProgram->Context, hProgram, + pOptions); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( +ur_result_t urProgramCompile( ur_context_handle_t Context, ///< [in] handle of the context instance. ur_program_handle_t Program, ///< [in][out] handle of the program to compile. @@ -281,7 +286,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( +ur_result_t urProgramLink( ur_context_handle_t Context, ///< [in] handle of the context instance. uint32_t Count, ///< [in] number of program handles in `phPrograms`. const ur_program_handle_t *Programs, ///< [in][range(0, count)] pointer to @@ -292,11 +297,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( *Program ///< [out] pointer to handle of program object created. ) { std::vector Devices = Context->getDevices(); - return urProgramLinkExp(Context, Devices.size(), Devices.data(), Count, - Programs, Options, Program); + return ur::level_zero::urProgramLinkExp(Context, Devices.size(), + Devices.data(), Count, Programs, + Options, Program); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( +ur_result_t urProgramLinkExp( ur_context_handle_t hContext, ///< [in] handle of the context instance. uint32_t numDevices, ///< [in] number of devices ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to @@ -482,14 +488,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( return UrResult; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain( +ur_result_t urProgramRetain( ur_program_handle_t Program ///< [in] handle for the Program to retain ) { Program->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease( +ur_result_t urProgramRelease( ur_program_handle_t Program ///< [in] handle for the Program to release ) { if (!Program->RefCount.decrementAndTest()) @@ -526,7 +532,7 @@ static bool is_in_separated_string(const std::string &str, char delimiter, return false; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( +ur_result_t urProgramGetFunctionPointer( ur_device_handle_t Device, ///< [in] handle of the device to retrieve pointer for. ur_program_handle_t @@ -566,12 +572,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( if (ZeResult == ZE_RESULT_ERROR_INVALID_ARGUMENT) { size_t Size; *FunctionPointerRet = 0; - UR_CALL(urProgramGetInfo(Program, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, - &Size)); + UR_CALL(ur::level_zero::urProgramGetInfo( + Program, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &Size)); std::string ClResult(Size, ' '); - UR_CALL(urProgramGetInfo(Program, UR_PROGRAM_INFO_KERNEL_NAMES, - ClResult.size(), &ClResult[0], nullptr)); + UR_CALL(ur::level_zero::urProgramGetInfo( + Program, UR_PROGRAM_INFO_KERNEL_NAMES, ClResult.size(), &ClResult[0], + nullptr)); // Get rid of the null terminator and search for kernel_name // If function can be found return error code to indicate it @@ -591,7 +598,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( return ze2urResult(ZeResult); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( +ur_result_t urProgramGetGlobalVariablePointer( ur_device_handle_t Device, ///< [in] handle of the device to retrieve the pointer for. ur_program_handle_t @@ -626,7 +633,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( return ze2urResult(ZeResult); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( +ur_result_t urProgramGetInfo( ur_program_handle_t Program, ///< [in] handle of the Program object ur_program_info_t PropName, ///< [in] name of the Program property to query size_t PropSize, ///< [in] the size of the Program property. @@ -818,7 +825,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo( +ur_result_t urProgramGetBuildInfo( ur_program_handle_t Program, ///< [in] handle of the Program object ur_device_handle_t Device, ///< [in] handle of the Device object ur_program_build_info_t @@ -898,7 +905,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstant( +ur_result_t urProgramSetSpecializationConstant( ur_program_handle_t Program, ///< [in] handle of the Program object uint32_t SpecId, ///< [in] specification constant Id size_t SpecSize, ///< [in] size of the specialization constant value @@ -913,7 +920,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstant( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( +ur_result_t urProgramGetNativeHandle( ur_program_handle_t Program, ///< [in] handle of the program. ur_native_handle_t *NativeProgram ///< [out] a pointer to the native ///< handle of the program. @@ -934,7 +941,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( +ur_result_t urProgramCreateWithNativeHandle( ur_native_handle_t NativeProgram, ///< [in] the native handle of the program. ur_context_handle_t Context, ///< [in] handle of the context instance @@ -966,6 +973,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( return UR_RESULT_SUCCESS; } +ur_result_t urProgramSetSpecializationConstants( + ur_program_handle_t Program, ///< [in] handle of the Program object + uint32_t Count, ///< [in] the number of elements in the pSpecConstants array + const ur_specialization_constant_info_t + *SpecConstants ///< [in][range(0, count)] array of specialization + ///< constant value descriptions +) { + std::scoped_lock Guard(Program->Mutex); + + // Remember the value of this specialization constant until the program is + // built. Note that we only save the pointer to the buffer that contains the + // value. The caller is responsible for maintaining storage for this buffer. + // + // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by + // SpecID. + for (uint32_t SpecIt = 0; SpecIt < Count; SpecIt++) { + uint32_t SpecId = SpecConstants[SpecIt].id; + Program->SpecConstants[SpecId] = SpecConstants[SpecIt].pValue; + } + return UR_RESULT_SUCCESS; +} + +} // namespace ur::level_zero + ur_program_handle_t_::~ur_program_handle_t_() { if (!resourcesReleased) { ur_release_program_resources(true); @@ -1000,25 +1031,3 @@ void ur_program_handle_t_::ur_release_program_resources(bool deletion) { resourcesReleased = true; } } - -UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( - ur_program_handle_t Program, ///< [in] handle of the Program object - uint32_t Count, ///< [in] the number of elements in the pSpecConstants array - const ur_specialization_constant_info_t - *SpecConstants ///< [in][range(0, count)] array of specialization - ///< constant value descriptions -) { - std::scoped_lock Guard(Program->Mutex); - - // Remember the value of this specialization constant until the program is - // built. Note that we only save the pointer to the buffer that contains the - // value. The caller is responsible for maintaining storage for this buffer. - // - // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by - // SpecID. - for (uint32_t SpecIt = 0; SpecIt < Count; SpecIt++) { - uint32_t SpecId = SpecConstants[SpecIt].id; - Program->SpecConstants[SpecId] = SpecConstants[SpecIt].pValue; - } - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 2845120113..9757dad74f 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -19,7 +19,7 @@ #include "common.hpp" #include "event.hpp" #include "queue.hpp" -#include "ur_api.h" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" #include "ze_api.h" @@ -99,7 +99,7 @@ bool ur_completion_batch::checkComplete() { return st == COMPLETED; } -ur_result_t ur_completion_batch::seal(ur_queue_handle_legacy_t queue, +ur_result_t ur_completion_batch::seal(ur_queue_handle_t queue, ze_command_list_handle_t cmdlist) { assert(st == ACCUMULATING); @@ -187,7 +187,7 @@ ur_completion_batches::ur_completion_batches() { } ur_result_t ur_completion_batches::tryCleanup( - ur_queue_handle_legacy_t queue, ze_command_list_handle_t cmdlist, + ur_queue_handle_t queue, ze_command_list_handle_t cmdlist, std::vector &events, std::vector &EventListToCleanup) { cleanup(events, EventListToCleanup); @@ -229,7 +229,7 @@ void ur_completion_batches::forceReset() { /// the call, in case of in-order queue it allows to cleanup all preceding /// events. /// @return PI_SUCCESS if successful, PI error code otherwise. -ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue, +ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, bool QueueLocked, bool QueueSynced, ur_event_handle_t CompletedEvent) { // Handle only immediate command lists here. @@ -303,7 +303,7 @@ ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue, /// @param Queue Queue where we look for signalled command lists and cleanup /// events. /// @return PI_SUCCESS if successful, PI error code otherwise. -ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue) { +ur_result_t resetCommandLists(ur_queue_handle_t Queue) { // Handle immediate command lists here, they don't need to be reset and we // only need to cleanup events. if (Queue->UsingImmCmdLists) { @@ -342,7 +342,10 @@ ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue) { return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueGetInfo( +namespace ur::level_zero { + +ur_result_t urQueueGetInfo( + ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_queue_info_t ParamName, ///< [in] name of the queue property to query size_t ParamValueSize, ///< [in] size in bytes of the queue property value ///< provided @@ -350,8 +353,6 @@ ur_result_t ur_queue_handle_legacy_t_::queueGetInfo( size_t *ParamValueSizeRet ///< [out] size in bytes returned in queue ///< property value ) { - auto Queue = this; - std::shared_lock Lock(Queue->Mutex); UrReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); // TODO: consider support for queue properties and size @@ -467,7 +468,7 @@ static bool doEagerInit = [] { return EagerInit ? std::atoi(EagerInit) != 0 : false; }(); -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( +ur_result_t urQueueCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_queue_properties_t @@ -502,7 +503,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( // Create placeholder queues in the compute queue group. // Actual L0 queues will be created at first use. std::vector ZeComputeCommandQueues( - Device->QueueGroup[ur_queue_handle_legacy_t_::queue_type::Compute] + Device->QueueGroup[ur_queue_handle_t_::queue_type::Compute] .ZeProperties.numQueues, nullptr); @@ -512,21 +513,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( size_t NumCopyGroups = 0; if (Device->hasMainCopyEngine()) { NumCopyGroups += - Device->QueueGroup[ur_queue_handle_legacy_t_::queue_type::MainCopy] + Device->QueueGroup[ur_queue_handle_t_::queue_type::MainCopy] .ZeProperties.numQueues; } if (Device->hasLinkCopyEngine()) { NumCopyGroups += - Device->QueueGroup[ur_queue_handle_legacy_t_::queue_type::LinkCopy] + Device->QueueGroup[ur_queue_handle_t_::queue_type::LinkCopy] .ZeProperties.numQueues; } std::vector ZeCopyCommandQueues(NumCopyGroups, nullptr); try { - *Queue = new ur_queue_handle_legacy_t_(ZeComputeCommandQueues, - ZeCopyCommandQueues, Context, Device, - true, Flags, ForceComputeIndex); + *Queue = + new ur_queue_handle_t_(ZeComputeCommandQueues, ZeCopyCommandQueues, + Context, Device, true, Flags, ForceComputeIndex); } catch (const std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -535,7 +536,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( // Do eager initialization of Level Zero handles on request. if (doEagerInit) { - ur_queue_handle_legacy_t Q = Legacy(*Queue); + auto Q = *Queue; // Creates said number of command-lists. auto warmupQueueGroup = [Q](bool UseCopyEngine, uint32_t RepeatCount) -> ur_result_t { @@ -576,9 +577,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueRetain() { - auto Queue = this; - +ur_result_t urQueueRetain( + ur_queue_handle_t Queue ///< [in] handle of the queue object to get access +) { { std::scoped_lock Lock(Queue->Mutex); Queue->RefCountExternal++; @@ -587,9 +588,9 @@ ur_result_t ur_queue_handle_legacy_t_::queueRetain() { return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueRelease() { - auto Queue = this; - +ur_result_t urQueueRelease( + ur_queue_handle_t Queue ///< [in] handle of the queue object to release +) { std::vector EventListToCleanup; { std::scoped_lock Lock(Queue->Mutex); @@ -690,13 +691,12 @@ ur_result_t ur_queue_handle_legacy_t_::queueRelease() { return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueGetNativeHandle( +ur_result_t urQueueGetNativeHandle( + ur_queue_handle_t Queue, ///< [in] handle of the queue. ur_queue_native_desc_t *Desc, ur_native_handle_t *NativeQueue ///< [out] a pointer to the native handle of the queue. ) { - auto Queue = this; - // Lock automatically releases when this goes out of scope. std::shared_lock lock(Queue->Mutex); @@ -728,24 +728,7 @@ ur_result_t ur_queue_handle_legacy_t_::queueGetNativeHandle( return UR_RESULT_SUCCESS; } -void ur_queue_handle_legacy_t_::ur_queue_group_t::setImmCmdList( - ur_queue_handle_legacy_t queue, ze_command_list_handle_t ZeCommandList) { - // An immediate command list was given to us but we don't have the queue - // descriptor information. Create a dummy and note that it is not recycleable. - ZeStruct ZeQueueDesc; - - ImmCmdLists = std::vector( - 1, - Queue->CommandListMap - .insert(std::pair{ - ZeCommandList, - ur_command_list_info_t(nullptr, true, false, nullptr, ZeQueueDesc, - queue->useCompletionBatching(), false, - false, true)}) - .first); -} - -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( +ur_result_t urQueueCreateWithNativeHandle( ur_native_handle_t NativeQueue, ///< [in] the native handle of the queue. ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, /// @@ -785,12 +768,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( uint32_t NumEntries = 1; ur_platform_handle_t Platform{}; ur_adapter_handle_t AdapterHandle = GlobalAdapter; - UR_CALL(urPlatformGet(&AdapterHandle, 1, NumEntries, &Platform, nullptr)); + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, NumEntries, + &Platform, nullptr)); ur_device_handle_t UrDevice = Device; if (UrDevice == nullptr) { - UR_CALL(urDeviceGet(Platform, UR_DEVICE_TYPE_GPU, NumEntries, &UrDevice, - nullptr)); + UR_CALL(ur::level_zero::urDeviceGet(Platform, UR_DEVICE_TYPE_GPU, + NumEntries, &UrDevice, nullptr)); } // The NativeHandleDesc has value if if the native handle is an immediate @@ -800,7 +784,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( std::vector CopyQueues; try { - ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( + ur_queue_handle_t_ *Queue = new ur_queue_handle_t_( ComputeQueues, CopyQueues, Context, UrDevice, OwnNativeHandle, Flags); *RetQueue = reinterpret_cast(Queue); } catch (const std::bad_alloc &) { @@ -808,9 +792,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( } catch (...) { return UR_RESULT_ERROR_UNKNOWN; } - auto &InitialGroup = - Legacy(*RetQueue)->ComputeQueueGroupsByTID.begin()->second; - InitialGroup.setImmCmdList(Legacy(*RetQueue), + auto &InitialGroup = (*RetQueue)->ComputeQueueGroupsByTID.begin()->second; + InitialGroup.setImmCmdList(*RetQueue, ur_cast(NativeQueue)); } else { auto ZeQueue = ur_cast(NativeQueue); @@ -823,7 +806,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( std::vector ZeroCopyQueues; try { - ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( + ur_queue_handle_t_ *Queue = new ur_queue_handle_t_( ZeQueues, ZeroCopyQueues, Context, UrDevice, OwnNativeHandle, Flags); *RetQueue = reinterpret_cast(Queue); } catch (const std::bad_alloc &) { @@ -832,13 +815,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( return UR_RESULT_ERROR_UNKNOWN; } } - Legacy(*RetQueue)->UsingImmCmdLists = (NativeHandleDesc == 1); + (*RetQueue)->UsingImmCmdLists = (NativeHandleDesc == 1); return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueFinish() { - auto Queue = this; +ur_result_t urQueueFinish( + ur_queue_handle_t Queue ///< [in] handle of the queue to be finished. +) { if (Queue->UsingImmCmdLists) { // Lock automatically releases when this goes out of scope. std::scoped_lock Lock(Queue->Mutex); @@ -903,12 +887,38 @@ ur_result_t ur_queue_handle_legacy_t_::queueFinish() { return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::queueFlush() { - auto Queue = this; +ur_result_t urQueueFlush( + ur_queue_handle_t Queue ///< [in] handle of the queue to be flushed. +) { std::scoped_lock Lock(Queue->Mutex); return Queue->executeAllOpenCommandLists(); } +ur_result_t urEnqueueKernelLaunchCustomExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = hKernel; + std::ignore = workDim; + std::ignore = pGlobalWorkSize; + std::ignore = pLocalWorkSize; + std::ignore = numPropsInLaunchPropList; + std::ignore = launchPropList; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + + logger::error("[UR][L0] {} function not implemented!", + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +} // namespace ur::level_zero + // Configuration of the command-list batching. struct zeCommandListBatchConfig { // Default value of 0. This specifies to use dynamic batch size adjustment. @@ -1063,7 +1073,7 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] { return ZeCommandListBatchConfig(IsCopy{true}); }(); -ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_( +ur_queue_handle_t_::ur_queue_handle_t_( std::vector &ComputeQueues, std::vector &CopyQueues, ur_context_handle_t Context, ur_device_handle_t Device, @@ -1089,8 +1099,8 @@ ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_( // First, see if the queue's device allows for round-robin or it is // fixed to one particular compute CCS (it is so for sub-sub-devices). auto &ComputeQueueGroupInfo = Device->QueueGroup[queue_type::Compute]; - ur_queue_group_t ComputeQueueGroup{ - reinterpret_cast(this), queue_type::Compute}; + ur_queue_group_t ComputeQueueGroup{reinterpret_cast(this), + queue_type::Compute}; ComputeQueueGroup.ZeQueues = ComputeQueues; // Create space to hold immediate commandlists corresponding to the // ZeQueues @@ -1136,8 +1146,8 @@ ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_( ComputeQueueGroupsByTID.set(ComputeQueueGroup); // Copy group initialization. - ur_queue_group_t CopyQueueGroup{ - reinterpret_cast(this), queue_type::MainCopy}; + ur_queue_group_t CopyQueueGroup{reinterpret_cast(this), + queue_type::MainCopy}; const auto &Range = getRangeOfAllowedCopyEngines((ur_device_handle_t)Device); if (Range.first < 0 || Range.second < 0) { // We are asked not to use copy engines, just do nothing. @@ -1182,7 +1192,7 @@ ur_queue_handle_legacy_t_::ur_queue_handle_legacy_t_( Device->Platform->ZeDriverEventPoolCountingEventsExtensionFound; } -void ur_queue_handle_legacy_t_::adjustBatchSizeForFullBatch(bool IsCopy) { +void ur_queue_handle_t_::adjustBatchSizeForFullBatch(bool IsCopy) { auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch; auto &ZeCommandListBatchConfig = IsCopy ? ZeCommandListBatchCopyConfig : ZeCommandListBatchComputeConfig; @@ -1209,7 +1219,7 @@ void ur_queue_handle_legacy_t_::adjustBatchSizeForFullBatch(bool IsCopy) { } } -void ur_queue_handle_legacy_t_::adjustBatchSizeForPartialBatch(bool IsCopy) { +void ur_queue_handle_t_::adjustBatchSizeForPartialBatch(bool IsCopy) { auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch; auto &ZeCommandListBatchConfig = IsCopy ? ZeCommandListBatchCopyConfig : ZeCommandListBatchComputeConfig; @@ -1235,14 +1245,15 @@ void ur_queue_handle_legacy_t_::adjustBatchSizeForPartialBatch(bool IsCopy) { } } -ur_result_t ur_queue_handle_legacy_t_::executeCommandList( - ur_command_list_ptr_t CommandList, bool IsBlocking, bool OKToBatchCommand) { +ur_result_t +ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList, + bool IsBlocking, bool OKToBatchCommand) { // Do nothing if command list is already closed. if (CommandList->second.IsClosed) return UR_RESULT_SUCCESS; - bool UseCopyEngine = CommandList->second.isCopy( - reinterpret_cast(this)); + bool UseCopyEngine = + CommandList->second.isCopy(reinterpret_cast(this)); // If the current LastCommandEvent is the nullptr, then it means // either that no command has ever been issued to the queue @@ -1349,7 +1360,7 @@ ur_result_t ur_queue_handle_legacy_t_::executeCommandList( // ur_event_handle_t HostVisibleEvent; auto Res = createEventAndAssociateQueue( - reinterpret_cast(this), &HostVisibleEvent, + reinterpret_cast(this), &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList, /* IsInternal */ false, /* IsMultiDevice */ true, /* HostVisible */ true); @@ -1473,12 +1484,12 @@ ur_result_t ur_queue_handle_legacy_t_::executeCommandList( return UR_RESULT_SUCCESS; } -bool ur_queue_handle_legacy_t_::doReuseDiscardedEvents() { +bool ur_queue_handle_t_::doReuseDiscardedEvents() { return ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents(); } -ur_result_t ur_queue_handle_legacy_t_::resetDiscardedEvent( - ur_command_list_ptr_t CommandList) { +ur_result_t +ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) { if (LastCommandEvent && LastCommandEvent->IsDiscarded) { ZE2UR_CALL(zeCommandListAppendBarrier, (CommandList->first, nullptr, 1, &(LastCommandEvent->ZeEvent))); @@ -1511,8 +1522,7 @@ ur_result_t ur_queue_handle_legacy_t_::resetDiscardedEvent( return UR_RESULT_SUCCESS; } -ur_result_t -ur_queue_handle_legacy_t_::addEventToQueueCache(ur_event_handle_t Event) { +ur_result_t ur_queue_handle_t_::addEventToQueueCache(ur_event_handle_t Event) { if (!Event->IsMultiDevice) { auto EventCachesMap = Event->isHostVisible() ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1]; @@ -1528,19 +1538,19 @@ ur_queue_handle_legacy_t_::addEventToQueueCache(ur_event_handle_t Event) { return UR_RESULT_SUCCESS; } -void ur_queue_handle_legacy_t_::active_barriers::add(ur_event_handle_t &Event) { +void ur_queue_handle_t_::active_barriers::add(ur_event_handle_t &Event) { Event->RefCount.increment(); Events.push_back(Event); } -ur_result_t ur_queue_handle_legacy_t_::active_barriers::clear() { +ur_result_t ur_queue_handle_t_::active_barriers::clear() { for (const auto &Event : Events) UR_CALL(urEventReleaseInternal(Event)); Events.clear(); return UR_RESULT_SUCCESS; } -void ur_queue_handle_legacy_t_::clearEndTimeRecordings() { +void ur_queue_handle_t_::clearEndTimeRecordings() { uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution; const uint64_t TimestampMaxValue = Device->getTimestampMask(); @@ -1567,7 +1577,7 @@ void ur_queue_handle_legacy_t_::clearEndTimeRecordings() { EndTimeRecordings.clear(); } -ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue) { +ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) { if (!Queue->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1606,33 +1616,33 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue) { return UR_RESULT_SUCCESS; } -bool ur_queue_handle_legacy_t_::isBatchingAllowed(bool IsCopy) const { +bool ur_queue_handle_t_::isBatchingAllowed(bool IsCopy) const { auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch; return (CommandBatch.QueueBatchSize > 0 && ((UrL0Serialize & UrL0SerializeBlock) == 0)); } -bool ur_queue_handle_legacy_t_::isDiscardEvents() const { +bool ur_queue_handle_t_::isDiscardEvents() const { return ((this->Properties & UR_QUEUE_FLAG_DISCARD_EVENTS) != 0); } -bool ur_queue_handle_legacy_t_::isPriorityLow() const { +bool ur_queue_handle_t_::isPriorityLow() const { return ((this->Properties & UR_QUEUE_FLAG_PRIORITY_LOW) != 0); } -bool ur_queue_handle_legacy_t_::isPriorityHigh() const { +bool ur_queue_handle_t_::isPriorityHigh() const { return ((this->Properties & UR_QUEUE_FLAG_PRIORITY_HIGH) != 0); } -bool ur_queue_handle_legacy_t_::isBatchedSubmission() const { +bool ur_queue_handle_t_::isBatchedSubmission() const { return ((this->Properties & UR_QUEUE_FLAG_SUBMISSION_BATCHED) != 0); } -bool ur_queue_handle_legacy_t_::isImmediateSubmission() const { +bool ur_queue_handle_t_::isImmediateSubmission() const { return ((this->Properties & UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE) != 0); } -bool ur_queue_handle_legacy_t_::isInOrderQueue() const { +bool ur_queue_handle_t_::isInOrderQueue() const { // If out-of-order queue property is not set, then this is a in-order queue. return ((this->Properties & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0); @@ -1662,11 +1672,11 @@ ur_result_t CleanupEventListFromResetCmdList( // TODO: Event release in immediate commandlist mode is driven by the SYCL // runtime. Need to investigate whether relase can be done earlier, at sync // points such as this, to reduce total number of active Events. -ur_result_t ur_queue_handle_legacy_t_::synchronize() { +ur_result_t ur_queue_handle_t_::synchronize() { if (!Healthy) return UR_RESULT_SUCCESS; - auto syncImmCmdList = [](ur_queue_handle_legacy_t_ *Queue, + auto syncImmCmdList = [](ur_queue_handle_t_ *Queue, ur_command_list_ptr_t ImmCmdList) { if (ImmCmdList == Queue->CommandListMap.end()) return UR_RESULT_SUCCESS; @@ -1757,9 +1767,8 @@ ur_result_t ur_queue_handle_legacy_t_::synchronize() { return UR_RESULT_SUCCESS; } -ur_event_handle_t -ur_queue_handle_legacy_t_::getEventFromQueueCache(bool IsMultiDevice, - bool HostVisible) { +ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice, + bool HostVisible) { std::list *Cache; if (!IsMultiDevice) { @@ -1791,7 +1800,7 @@ ur_queue_handle_legacy_t_::getEventFromQueueCache(bool IsMultiDevice, // at the end of a command list batch. This will only be true if the event does // not have dependencies or the dependencies are not for events which exist in // this batch. -bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, +bool eventCanBeBatched(ur_queue_handle_t Queue, bool UseCopyEngine, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList) { auto &CommandBatch = @@ -1821,7 +1830,7 @@ bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, // dependencies, then this command can be enqueued without a signal event set in // a command list batch. The signal event will be appended at the end of the // batch to be signalled at the end of the command list. -ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, +ur_result_t setSignalEvent(ur_queue_handle_t Queue, bool UseCopyEngine, ze_event_handle_t *ZeEvent, ur_event_handle_t *Event, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, @@ -1852,7 +1861,7 @@ ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, // visible pool. // \param HostVisible tells if the event must be created in the // host-visible pool. If not set then this function will decide. -ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, +ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType, ur_command_list_ptr_t CommandList, @@ -1908,12 +1917,12 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, // event will not be waited/released by SYCL RT, so it must be destroyed by // EventRelease in resetCommandList. if (!IsInternal) - UR_CALL(urEventRetain(*Event)); + UR_CALL(ur::level_zero::urEventRetain(*Event)); return UR_RESULT_SUCCESS; } -void ur_queue_handle_legacy_t_::CaptureIndirectAccesses() { +void ur_queue_handle_t_::CaptureIndirectAccesses() { for (auto &Kernel : KernelsToBeSubmitted) { if (!Kernel->hasIndirectAccess()) continue; @@ -1937,8 +1946,7 @@ void ur_queue_handle_legacy_t_::CaptureIndirectAccesses() { KernelsToBeSubmitted.clear(); } -ur_result_t -ur_queue_handle_legacy_t_::signalEventFromCmdListIfLastEventDiscarded( +ur_result_t ur_queue_handle_t_::signalEventFromCmdListIfLastEventDiscarded( ur_command_list_ptr_t CommandList) { // We signal new event at the end of command list only if we have queue with // discard_events property and the last command event is discarded. @@ -1952,7 +1960,7 @@ ur_queue_handle_legacy_t_::signalEventFromCmdListIfLastEventDiscarded( // from the host. ur_event_handle_t Event; UR_CALL(createEventAndAssociateQueue( - reinterpret_cast(this), &Event, + reinterpret_cast(this), &Event, UR_EXT_COMMAND_TYPE_USER, CommandList, /* IsInternal */ false, /* IsMultiDevice */ true, /* HostVisible */ false)); @@ -1964,7 +1972,7 @@ ur_queue_handle_legacy_t_::signalEventFromCmdListIfLastEventDiscarded( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::executeOpenCommandList(bool IsCopy) { +ur_result_t ur_queue_handle_t_::executeOpenCommandList(bool IsCopy) { auto &CommandBatch = IsCopy ? CopyCommandBatch : ComputeCommandBatch; // If there are any commands still in the open command list for this // queue, then close and execute that command list now. @@ -1978,7 +1986,7 @@ ur_result_t ur_queue_handle_legacy_t_::executeOpenCommandList(bool IsCopy) { return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::resetCommandList( +ur_result_t ur_queue_handle_t_::resetCommandList( ur_command_list_ptr_t CommandList, bool MakeAvailable, std::vector &EventListToCleanup, bool CheckStatus) { bool UseCopyEngine = CommandList->second.isCopy(this); @@ -2080,7 +2088,7 @@ ur_result_t ur_queue_handle_legacy_t_::resetCommandList( return UR_RESULT_SUCCESS; } -bool ur_command_list_info_t::isCopy(ur_queue_handle_legacy_t Queue) const { +bool ur_command_list_info_t::isCopy(ur_queue_handle_t Queue) const { return ZeQueueDesc.ordinal != (uint32_t)Queue->Device ->QueueGroup @@ -2096,7 +2104,7 @@ void ur_command_list_info_t::append(ur_event_handle_t Event) { } ur_command_list_ptr_t -ur_queue_handle_legacy_t_::eventOpenCommandList(ur_event_handle_t Event) { +ur_queue_handle_t_::eventOpenCommandList(ur_event_handle_t Event) { using IsCopy = bool; if (UsingImmCmdLists) { @@ -2121,15 +2129,32 @@ ur_queue_handle_legacy_t_::eventOpenCommandList(ur_event_handle_t Event) { return CommandListMap.end(); } -ur_queue_handle_legacy_t_::ur_queue_group_t & -ur_queue_handle_legacy_t_::getQueueGroup(bool UseCopyEngine) { +void ur_queue_handle_t_::ur_queue_group_t::setImmCmdList( + ur_queue_handle_t queue, ze_command_list_handle_t ZeCommandList) { + // An immediate command list was given to us but we don't have the queue + // descriptor information. Create a dummy and note that it is not recycleable. + ZeStruct ZeQueueDesc; + + ImmCmdLists = std::vector( + 1, + Queue->CommandListMap + .insert(std::pair{ + ZeCommandList, + ur_command_list_info_t(nullptr, true, false, nullptr, ZeQueueDesc, + queue->useCompletionBatching(), false, + false, true)}) + .first); +} + +ur_queue_handle_t_::ur_queue_group_t & +ur_queue_handle_t_::getQueueGroup(bool UseCopyEngine) { auto &Map = (UseCopyEngine ? CopyQueueGroupsByTID : ComputeQueueGroupsByTID); return Map.get(); } // Return the index of the next queue to use based on a // round robin strategy and the queue group ordinal. -uint32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getQueueIndex( +uint32_t ur_queue_handle_t_::ur_queue_group_t::getQueueIndex( uint32_t *QueueGroupOrdinal, uint32_t *QueueIndex, bool QueryOnly) { auto CurrentIndex = NextIndex; @@ -2163,8 +2188,7 @@ uint32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getQueueIndex( // This function will return one of possibly multiple available native // queues and the value of the queue group ordinal. ze_command_queue_handle_t & -ur_queue_handle_legacy_t_::ur_queue_group_t::getZeQueue( - uint32_t *QueueGroupOrdinal) { +ur_queue_handle_t_::ur_queue_group_t::getZeQueue(uint32_t *QueueGroupOrdinal) { // QueueIndex is the proper L0 index. // Index is the plugins concept of index, with main and link copy engines in @@ -2209,7 +2233,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getZeQueue( return ZeQueue; } -int32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getCmdQueueOrdinal( +int32_t ur_queue_handle_t_::ur_queue_group_t::getCmdQueueOrdinal( ze_command_queue_handle_t CmdQueue) { // Find out the right queue group ordinal (first queue might be "main" or // "link") @@ -2221,7 +2245,7 @@ int32_t ur_queue_handle_legacy_t_::ur_queue_group_t::getCmdQueueOrdinal( return Queue->Device->QueueGroup[QueueType].ZeOrdinal; } -bool ur_queue_handle_legacy_t_::useCompletionBatching() { +bool ur_queue_handle_t_::useCompletionBatching() { static bool enabled = getenv_tobool( "UR_L0_IMMEDIATE_COMMANDLISTS_BATCH_EVENT_COMPLETIONS", false); return enabled && !isInOrderQueue() && UsingImmCmdLists; @@ -2231,7 +2255,7 @@ bool ur_queue_handle_legacy_t_::useCompletionBatching() { // fence tracking its completion. This command list & fence are added to the // map of command lists in this queue with ZeFenceInUse = false. // The caller must hold a lock of the queue already. -ur_result_t ur_queue_handle_legacy_t_::createCommandList( +ur_result_t ur_queue_handle_t_::createCommandList( bool UseCopyEngine, ur_command_list_ptr_t &CommandList, ze_command_queue_handle_t *ForcedCmdQueue) { @@ -2274,8 +2298,8 @@ ur_result_t ur_queue_handle_legacy_t_::createCommandList( } ur_result_t -ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList, - bool UseCopyEngine) { +ur_queue_handle_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList, + bool UseCopyEngine) { // Early exit if there are no active barriers. if (ActiveBarriers.empty()) return UR_RESULT_SUCCESS; @@ -2284,7 +2308,7 @@ ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList, _ur_ze_event_list_t ActiveBarriersWaitList; UR_CALL(ActiveBarriersWaitList.createAndRetainUrZeEventList( ActiveBarriers.vector().size(), ActiveBarriers.vector().data(), - reinterpret_cast(this), UseCopyEngine)); + reinterpret_cast(this), UseCopyEngine)); // We can now replace active barriers with the ones in the wait list. UR_CALL(ActiveBarriers.clear()); @@ -2300,7 +2324,7 @@ ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList, ur_event_handle_t Event = nullptr; if (auto Res = createEventAndAssociateQueue( - reinterpret_cast(this), &Event, + reinterpret_cast(this), &Event, UR_EXT_COMMAND_TYPE_USER, CmdList, /* IsInternal */ true, /* IsMultiDevice */ true)) return Res; @@ -2316,7 +2340,7 @@ ur_queue_handle_legacy_t_::insertActiveBarriers(ur_command_list_ptr_t &CmdList, return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::insertStartBarrierIfDiscardEventsMode( +ur_result_t ur_queue_handle_t_::insertStartBarrierIfDiscardEventsMode( ur_command_list_ptr_t &CmdList) { // If current command list is different from the last command list then insert // a barrier waiting for the last command event. @@ -2342,7 +2366,7 @@ static const bool UseCopyEngineForInOrderQueue = [] { (std::stoi(CopyEngineForInOrderQueue) != 0)); }(); -bool ur_queue_handle_legacy_t_::useCopyEngine(bool PreferCopyEngine) const { +bool ur_queue_handle_t_::useCopyEngine(bool PreferCopyEngine) const { auto InitialCopyGroup = CopyQueueGroupsByTID.begin()->second; return PreferCopyEngine && InitialCopyGroup.ZeQueues.size() > 0 && (!isInOrderQueue() || UseCopyEngineForInOrderQueue); @@ -2350,8 +2374,7 @@ bool ur_queue_handle_legacy_t_::useCopyEngine(bool PreferCopyEngine) const { // This function will return one of po6ssibly multiple available // immediate commandlists associated with this Queue. -ur_command_list_ptr_t & -ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() { +ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { uint32_t QueueIndex, QueueOrdinal; auto Index = getQueueIndex(&QueueOrdinal, &QueueIndex); @@ -2455,7 +2478,7 @@ static const size_t ImmCmdListsEventCleanupThreshold = [] { return Threshold; }(); -size_t ur_queue_handle_legacy_t_::getImmdCmmdListsEventCleanupThreshold() { +size_t ur_queue_handle_t_::getImmdCmmdListsEventCleanupThreshold() { return useCompletionBatching() ? CompletionEventsPerBatch : ImmCmdListsEventCleanupThreshold; } diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 97ddcf014c..699d7ec960 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -20,19 +20,15 @@ #include #include -#include +#include #include #include #include "common.hpp" #include "device.hpp" -#include "queue_api.hpp" - -struct ur_queue_handle_legacy_t_; -using ur_queue_handle_legacy_t = ur_queue_handle_legacy_t_ *; extern "C" { -ur_result_t urQueueReleaseInternal(ur_queue_handle_legacy_t Queue); +ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue); } // extern "C" struct ur_completion_batch; @@ -74,8 +70,7 @@ struct ur_completion_batch { // Seals the event batch and appends a barrier to the command list. // Adding any further events after this, but before reset, is undefined. - ur_result_t seal(ur_queue_handle_legacy_t queue, - ze_command_list_handle_t cmdlist); + ur_result_t seal(ur_queue_handle_t queue, ze_command_list_handle_t cmdlist); // Resets a complete batch back to an empty state. Cleanups internal state // but keeps allocated resources for reuse. @@ -117,7 +112,7 @@ struct ur_completion_batches { // returned to indicate that there are no batches available. // This is safe, but will increase how many events are associated // with the active batch. - ur_result_t tryCleanup(ur_queue_handle_legacy_t queue, + ur_result_t tryCleanup(ur_queue_handle_t queue, ze_command_list_handle_t cmdlist, std::vector &EventList, std::vector &EventListToCleanup); @@ -154,10 +149,10 @@ struct ur_completion_batches { ur_completion_batch_it active; }; -ur_result_t resetCommandLists(ur_queue_handle_legacy_t Queue); +ur_result_t resetCommandLists(ur_queue_handle_t Queue); ur_result_t -CleanupEventsInImmCmdLists(ur_queue_handle_legacy_t UrQueue, - bool QueueLocked = false, bool QueueSynced = false, +CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, bool QueueLocked = false, + bool QueueSynced = false, ur_event_handle_t CompletedEvent = nullptr); // Structure describing the specific use of a command-list in a queue. @@ -208,7 +203,7 @@ struct ur_command_list_info_t { bool IsImmediate; // Helper functions to tell if this is a copy command-list. - bool isCopy(ur_queue_handle_legacy_t Queue) const; + bool isCopy(ur_queue_handle_t Queue) const; // An optional event completion batching mechanism for out-of-order immediate // command lists. @@ -230,209 +225,23 @@ using ur_command_list_map_t = // The iterator pointing to a specific command-list in use. using ur_command_list_ptr_t = ur_command_list_map_t::iterator; -struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { - ur_queue_handle_legacy_t_( - std::vector &ComputeQueues, - std::vector &CopyQueues, - ur_context_handle_t Context, ur_device_handle_t Device, - bool OwnZeCommandQueue, ur_queue_flags_t Properties = 0, - int ForceComputeIndex = -1); - - ur_result_t queueGetInfo(ur_queue_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) override; - ur_result_t queueRetain() override; - ur_result_t queueRelease() override; - ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *pDesc, - ur_native_handle_t *phNativeQueue) override; - ur_result_t queueFinish() override; - ur_result_t queueFlush() override; - ur_result_t enqueueKernelLaunch(ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, - const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueEventsWait(uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t - enqueueEventsWaitWithBarrier(uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferRead(ur_mem_handle_t hBuffer, bool blockingRead, - size_t offset, size_t size, void *pDst, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferWrite(ur_mem_handle_t hBuffer, bool blockingWrite, - size_t offset, size_t size, - const void *pSrc, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferReadRect( - ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, - ur_rect_offset_t hostOrigin, ur_rect_region_t region, - size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, - size_t hostSlicePitch, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferWriteRect( - ur_mem_handle_t hBuffer, bool blockingWrite, - ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferCopy(ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, size_t srcOffset, - size_t dstOffset, size_t size, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferCopyRect( - ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, - ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, - ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, - size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferFill(ur_mem_handle_t hBuffer, - const void *pPattern, size_t patternSize, - size_t offset, size_t size, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemImageRead(ur_mem_handle_t hImage, bool blockingRead, - ur_rect_offset_t origin, - ur_rect_region_t region, size_t rowPitch, - size_t slicePitch, void *pDst, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemImageWrite(ur_mem_handle_t hImage, bool blockingWrite, - ur_rect_offset_t origin, - ur_rect_region_t region, size_t rowPitch, - size_t slicePitch, void *pSrc, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t - enqueueMemImageCopy(ur_mem_handle_t hImageSrc, ur_mem_handle_t hImageDst, - ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, - ur_rect_region_t region, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueMemBufferMap(ur_mem_handle_t hBuffer, bool blockingMap, - ur_map_flags_t mapFlags, size_t offset, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, - void **ppRetMap) override; - ur_result_t enqueueMemUnmap(ur_mem_handle_t hMem, void *pMappedPtr, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueUSMFill(void *pMem, size_t patternSize, - const void *pPattern, size_t size, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueUSMMemcpy(bool blocking, void *pDst, const void *pSrc, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *, size_t, - size_t, uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) override; - ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *, size_t, - size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) override; - ur_result_t enqueueUSMPrefetch(const void *pMem, size_t size, - ur_usm_migration_flags_t flags, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueUSMAdvise(const void *pMem, size_t size, - ur_usm_advice_flags_t advice, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueDeviceGlobalVariableWrite( - ur_program_handle_t hProgram, const char *name, bool blockingWrite, - size_t count, size_t offset, const void *pSrc, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueDeviceGlobalVariableRead( - ur_program_handle_t hProgram, const char *name, bool blockingRead, - size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueReadHostPipe(ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, - void *pDst, size_t size, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueWriteHostPipe(ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, - void *pSrc, size_t size, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t bindlessImagesImageCopyExp( - const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, - const ur_image_desc_t *pDstImageDesc, - const ur_image_format_t *pSrcImageFormat, - const ur_image_format_t *pDstImageFormat, - ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t bindlessImagesWaitExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t hSemaphore, bool hasWaitValue, - uint64_t waitValue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t bindlessImagesSignalExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t hSemaphore, bool hasSignalValue, - uint64_t signalValue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueCooperativeKernelLaunchExp( - ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t - enqueueTimestampRecordingExp(bool blocking, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t enqueueKernelLaunchCustomExp( - ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, - uint32_t numPropsInLaunchPropList, - const ur_exp_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) override; - ur_result_t - enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, - uint32_t, const ur_mem_handle_t *, - const ur_exp_enqueue_native_command_properties_t *, - uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) override; +struct ur_queue_handle_t_ : _ur_object { + ur_queue_handle_t_(std::vector &ComputeQueues, + std::vector &CopyQueues, + ur_context_handle_t Context, ur_device_handle_t Device, + bool OwnZeCommandQueue, ur_queue_flags_t Properties = 0, + int ForceComputeIndex = -1); using queue_type = ur_device_handle_t_::queue_group_info_t::type; // PI queue is in general a one to many mapping to L0 native queues. struct ur_queue_group_t { - ur_queue_handle_legacy_t Queue; + ur_queue_handle_t Queue; ur_queue_group_t() = delete; // The Queue argument captures the enclosing PI queue. // The Type argument specifies the type of this queue group. // The actual ZeQueues are populated at PI queue construction. - ur_queue_group_t(ur_queue_handle_legacy_t Queue, queue_type Type) + ur_queue_group_t(ur_queue_handle_t Queue, queue_type Type) : Queue(Queue), Type(Type) {} // The type of the queue group. @@ -462,8 +271,7 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { ze_command_queue_handle_t &getZeQueue(uint32_t *QueueGroupOrdinal); // This function sets an immediate commandlist from the interop interface. - void setImmCmdList(ur_queue_handle_legacy_t queue, - ze_command_list_handle_t); + void setImmCmdList(ur_queue_handle_t queue, ze_command_list_handle_t); // This function returns the next immediate commandlist to use. ur_command_list_ptr_t &getImmCmdList(); @@ -530,15 +338,15 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { pi_queue_group_by_tid_t CopyQueueGroupsByTID; // Keeps the PI context to which this queue belongs. - // This field is only set at ur_queue_handle_legacy_t_ creation time, and + // This field is only set at ur_queue_handle_t_ creation time, and // cannot change. Therefore it can be accessed without holding a lock on this - // ur_queue_handle_legacy_t_. + // ur_queue_handle_t_. const ur_context_handle_t Context; // Keeps the PI device to which this queue belongs. - // This field is only set at ur_queue_handle_legacy_t_ creation time, and + // This field is only set at ur_queue_handle_t_ creation time, and // cannot change. Therefore it can be accessed without holding a lock on this - // ur_queue_handle_legacy_t_. + // ur_queue_handle_t_. const ur_device_handle_t Device; // A queue may use either standard or immediate commandlists. At queue @@ -881,21 +689,10 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ { // Threshold for cleaning up the EventList for immediate command lists. size_t getImmdCmmdListsEventCleanupThreshold(); -}; - -template QueueT GetQueue(ur_queue_handle_t Queue) { - if (!Queue) - return nullptr; - auto *Q = dynamic_cast(Queue); - if (!Q) { - throw UR_RESULT_ERROR_INVALID_QUEUE; - } - return Q; -} -static inline ur_queue_handle_legacy_t Legacy(ur_queue_handle_t Queue) { - return GetQueue(Queue); -} + // Pointer to the unified handle. + ur_queue_handle_t_ *UnifiedHandle; +}; // This helper function creates a ur_event_handle_t and associate a // ur_queue_handle_t. Note that the caller of this function must have acquired @@ -910,18 +707,16 @@ static inline ur_queue_handle_legacy_t Legacy(ur_queue_handle_t Queue) { // multiple devices. // \param ForceHostVisible tells if the event must be created in // the host-visible pool -ur_result_t -createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, - ur_event_handle_t *Event, ur_command_t CommandType, - ur_command_list_ptr_t CommandList, bool IsInternal, - bool IsMultiDevice, - std::optional HostVisible = std::nullopt); +ur_result_t createEventAndAssociateQueue( + ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType, + ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice, + std::optional HostVisible = std::nullopt); // This helper function checks to see if an event for a command can be included // at the end of a command list batch. This will only be true if the event does // not have dependencies or the dependencies are not for events which exist in // this batch. -bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, +bool eventCanBeBatched(ur_queue_handle_t Queue, bool UseCopyEngine, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList); @@ -930,7 +725,7 @@ bool eventCanBeBatched(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, // dependencies, then this command can be enqueued without a signal event set in // a command list batch. The signal event will be appended at the end of the // batch to be signalled at the end of the command list. -ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine, +ur_result_t setSignalEvent(ur_queue_handle_t Queue, bool UseCopyEngine, ze_event_handle_t *ZeEvent, ur_event_handle_t *Event, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, diff --git a/source/adapters/level_zero/queue_api.cpp b/source/adapters/level_zero/queue_api.cpp deleted file mode 100644 index 188f7c3102..0000000000 --- a/source/adapters/level_zero/queue_api.cpp +++ /dev/null @@ -1,323 +0,0 @@ -/* - * - * Copyright (C) 2024 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. See LICENSE.TXT - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * @file queue_api.cpp - * - */ - -#include "queue_api.hpp" - -ur_queue_handle_t_::~ur_queue_handle_t_() {} - -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, - ur_queue_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { - return hQueue->queueGetInfo(propName, propSize, pPropValue, pPropSizeRet); -} -UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { - return hQueue->queueRetain(); -} -UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { - return hQueue->queueRelease(); -} -UR_APIEXPORT ur_result_t UR_APICALL -urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, - ur_native_handle_t *phNativeQueue) { - return hQueue->queueGetNativeHandle(pDesc, phNativeQueue); -} -UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { - return hQueue->queueFinish(); -} -UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { - return hQueue->queueFlush(); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueKernelLaunch( - hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, - numEventsInWaitList, phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( - ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueEventsWait(numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( - ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueEventsWaitWithBarrier(numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, - size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferRead(hBuffer, blockingRead, offset, size, pDst, - numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, - size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferWrite(hBuffer, blockingWrite, offset, size, - pSrc, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, - ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pDst, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferReadRect( - hBuffer, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, - bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, - ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferWriteRect( - hBuffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, - bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferCopy(hBufferSrc, hBufferDst, srcOffset, - dstOffset, size, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( - ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, - size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferCopyRect( - hBufferSrc, hBufferDst, srcOrigin, dstOrigin, region, srcRowPitch, - srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, - size_t patternSize, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemBufferFill(hBuffer, pPattern, patternSize, offset, - size, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( - ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, - ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, - size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueMemImageRead( - hImage, blockingRead, origin, region, rowPitch, slicePitch, pDst, - numEventsInWaitList, phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( - ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, - ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, - size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueMemImageWrite( - hImage, blockingWrite, origin, region, rowPitch, slicePitch, pSrc, - numEventsInWaitList, phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, - ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemImageCopy(hImageSrc, hImageDst, srcOrigin, dstOrigin, - region, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, - ur_map_flags_t mapFlags, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, void **ppRetMap) { - return hQueue->enqueueMemBufferMap(hBuffer, blockingMap, mapFlags, offset, - size, numEventsInWaitList, phEventWaitList, - phEvent, ppRetMap); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( - ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueMemUnmap(hMem, pMappedPtr, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( - ur_queue_handle_t hQueue, void *pMem, size_t patternSize, - const void *pPattern, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMFill(pMem, patternSize, pPattern, size, - numEventsInWaitList, phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( - ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMMemcpy(blocking, pDst, pSrc, size, - numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMPrefetch(pMem, size, flags, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL -urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMAdvise(pMem, size, advice, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, - const void *pPattern, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMFill2D(pMem, pitch, patternSize, pPattern, width, - height, numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, - const void *pSrc, size_t srcPitch, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueUSMMemcpy2D(blocking, pDst, dstPitch, pSrc, srcPitch, - width, height, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, - bool blockingWrite, size_t count, size_t offset, const void *pSrc, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueDeviceGlobalVariableWrite( - hProgram, name, blockingWrite, count, offset, pSrc, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, - bool blockingRead, size_t count, size_t offset, void *pDst, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueDeviceGlobalVariableRead( - hProgram, name, blockingRead, count, offset, pDst, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pDst, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueReadHostPipe(hProgram, pipe_symbol, blocking, pDst, - size, numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pSrc, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueWriteHostPipe(hProgram, pipe_symbol, blocking, pSrc, - size, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( - ur_queue_handle_t hQueue, const void *pSrc, void *pDst, - const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, - const ur_image_format_t *pSrcImageFormat, - const ur_image_format_t *pDstImageFormat, - ur_exp_image_copy_region_t *pCopyRegion, - ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->bindlessImagesImageCopyExp( - pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, - pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( - ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, - bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->bindlessImagesWaitExternalSemaphoreExp( - hSemaphore, hasWaitValue, waitValue, numEventsInWaitList, phEventWaitList, - phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( - ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, - bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->bindlessImagesSignalExternalSemaphoreExp( - hSemaphore, hasSignalValue, signalValue, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueCooperativeKernelLaunchExp( - hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, - numEventsInWaitList, phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( - ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return hQueue->enqueueTimestampRecordingExp(blocking, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( - ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, - const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, - uint32_t numPropsInLaunchPropList, - const ur_exp_launch_property_t *launchPropList, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueKernelLaunchCustomExp( - hKernel, workDim, pGlobalWorkSize, pLocalWorkSize, - numPropsInLaunchPropList, launchPropList, numEventsInWaitList, - phEventWaitList, phEvent); -} -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( - ur_queue_handle_t hQueue, - ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, - uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, - const ur_exp_enqueue_native_command_properties_t *pProperties, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return hQueue->enqueueNativeCommandExp( - pfnNativeEnqueue, data, numMemsInMemList, phMemList, pProperties, - numEventsInWaitList, phEventWaitList, phEvent); -} diff --git a/source/adapters/level_zero/queue_api.hpp b/source/adapters/level_zero/queue_api.hpp deleted file mode 100644 index bc01596d2b..0000000000 --- a/source/adapters/level_zero/queue_api.hpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - * - * Copyright (C) 2024 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. See LICENSE.TXT - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * @file queue_api.hpp - * - */ - -#pragma once - -#include - -struct ur_queue_handle_t_ { - virtual ~ur_queue_handle_t_(); - virtual ur_result_t queueGetInfo(ur_queue_info_t, size_t, void *, - size_t *) = 0; - virtual ur_result_t queueRetain() = 0; - virtual ur_result_t queueRelease() = 0; - virtual ur_result_t queueGetNativeHandle(ur_queue_native_desc_t *, - ur_native_handle_t *) = 0; - virtual ur_result_t queueFinish() = 0; - virtual ur_result_t queueFlush() = 0; - virtual ur_result_t enqueueKernelLaunch(ur_kernel_handle_t, uint32_t, - const size_t *, const size_t *, - const size_t *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueEventsWait(uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueEventsWaitWithBarrier(uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemBufferRead(ur_mem_handle_t, bool, size_t, - size_t, void *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemBufferWrite(ur_mem_handle_t, bool, size_t, - size_t, const void *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t - enqueueMemBufferReadRect(ur_mem_handle_t, bool, ur_rect_offset_t, - ur_rect_offset_t, ur_rect_region_t, size_t, size_t, - size_t, size_t, void *, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t - enqueueMemBufferWriteRect(ur_mem_handle_t, bool, ur_rect_offset_t, - ur_rect_offset_t, ur_rect_region_t, size_t, size_t, - size_t, size_t, void *, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemBufferCopy(ur_mem_handle_t, ur_mem_handle_t, - size_t, size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t - enqueueMemBufferCopyRect(ur_mem_handle_t, ur_mem_handle_t, ur_rect_offset_t, - ur_rect_offset_t, ur_rect_region_t, size_t, size_t, - size_t, size_t, uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemBufferFill(ur_mem_handle_t, const void *, - size_t, size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemImageRead(ur_mem_handle_t, bool, - ur_rect_offset_t, ur_rect_region_t, - size_t, size_t, void *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemImageWrite(ur_mem_handle_t, bool, - ur_rect_offset_t, ur_rect_region_t, - size_t, size_t, void *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemImageCopy(ur_mem_handle_t, ur_mem_handle_t, - ur_rect_offset_t, ur_rect_offset_t, - ur_rect_region_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueMemBufferMap(ur_mem_handle_t, bool, ur_map_flags_t, - size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *, void **) = 0; - virtual ur_result_t enqueueMemUnmap(ur_mem_handle_t, void *, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMFill(void *, size_t, const void *, size_t, - uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMMemcpy(bool, void *, const void *, size_t, - uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMPrefetch(const void *, size_t, - ur_usm_migration_flags_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMAdvise(const void *, size_t, - ur_usm_advice_flags_t, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMFill2D(void *, size_t, size_t, const void *, - size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueUSMMemcpy2D(bool, void *, size_t, const void *, - size_t, size_t, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueDeviceGlobalVariableWrite( - ur_program_handle_t, const char *, bool, size_t, size_t, const void *, - uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t enqueueDeviceGlobalVariableRead( - ur_program_handle_t, const char *, bool, size_t, size_t, void *, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t enqueueReadHostPipe(ur_program_handle_t, const char *, - bool, void *, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueWriteHostPipe(ur_program_handle_t, const char *, - bool, void *, size_t, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t bindlessImagesImageCopyExp( - const void *, void *, const ur_image_desc_t *, const ur_image_desc_t *, - const ur_image_format_t *, const ur_image_format_t *, - ur_exp_image_copy_region_t *, ur_exp_image_copy_flags_t, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t bindlessImagesWaitExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t, bool, uint64_t, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t bindlessImagesSignalExternalSemaphoreExp( - ur_exp_external_semaphore_handle_t, bool, uint64_t, uint32_t, - const ur_event_handle_t *, ur_event_handle_t *) = 0; - virtual ur_result_t enqueueCooperativeKernelLaunchExp( - ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, - const size_t *, uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueTimestampRecordingExp(bool, uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t enqueueKernelLaunchCustomExp( - ur_kernel_handle_t, uint32_t, const size_t *, const size_t *, uint32_t, - const ur_exp_launch_property_t *, uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; - virtual ur_result_t - enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, - uint32_t, const ur_mem_handle_t *, - const ur_exp_enqueue_native_command_properties_t *, - uint32_t, const ur_event_handle_t *, - ur_event_handle_t *) = 0; -}; diff --git a/source/adapters/level_zero/sampler.cpp b/source/adapters/level_zero/sampler.cpp index 54ca1b6672..d48e6aeede 100644 --- a/source/adapters/level_zero/sampler.cpp +++ b/source/adapters/level_zero/sampler.cpp @@ -12,7 +12,9 @@ #include "logger/ur_logger.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreate( +namespace ur::level_zero { + +ur_result_t urSamplerCreate( ur_context_handle_t Context, ///< [in] handle of the context object const ur_sampler_desc_t *Props, ///< [in] specifies a list of sampler property names and their @@ -109,17 +111,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain( - ur_sampler_handle_t - Sampler ///< [in] handle of the sampler object to get access +ur_result_t +urSamplerRetain(ur_sampler_handle_t + Sampler ///< [in] handle of the sampler object to get access ) { Sampler->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease( - ur_sampler_handle_t - Sampler ///< [in] handle of the sampler object to release +ur_result_t +urSamplerRelease(ur_sampler_handle_t + Sampler ///< [in] handle of the sampler object to release ) { if (!Sampler->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -133,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo( +ur_result_t urSamplerGetInfo( ur_sampler_handle_t Sampler, ///< [in] handle of the sampler object ur_sampler_info_t PropName, ///< [in] name of the sampler property to query size_t PropValueSize, ///< [in] size in bytes of the sampler property value @@ -152,7 +154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( +ur_result_t urSamplerGetNativeHandle( ur_sampler_handle_t Sampler, ///< [in] handle of the sampler. ur_native_handle_t *NativeSampler ///< [out] a pointer to the native ///< handle of the sampler. @@ -164,7 +166,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( +ur_result_t urSamplerCreateWithNativeHandle( ur_native_handle_t NativeSampler, ///< [in] the native handle of the sampler. ur_context_handle_t Context, ///< [in] handle of the context object @@ -182,3 +184,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 8941f756ea..9bdd672818 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -1,19 +1,19 @@ -//===--------- ur_interface_loader.cpp - Level Zero Adapter----------------===// +//===--------- ur_interface_loader.cpp - Level Zero Adapter ------------===// // -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2024 Intel Corporation // // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM // Exceptions. See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - #include #include -namespace { +#include "ur_interface_loader.hpp" -ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { +static ur_result_t validateProcInputs(ur_api_version_t version, + void *pDdiTable) { if (nullptr == pDdiTable) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } @@ -24,475 +24,592 @@ ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { } return UR_RESULT_SUCCESS; } -} // namespace -#if defined(__cplusplus) +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +namespace ur::level_zero { +#elif defined(__cplusplus) extern "C" { #endif -UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_global_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( + ur_api_version_t version, ur_global_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnAdapterGet = urAdapterGet; - pDdiTable->pfnAdapterRelease = urAdapterRelease; - pDdiTable->pfnAdapterRetain = urAdapterRetain; - pDdiTable->pfnAdapterGetLastError = urAdapterGetLastError; - pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo; - return retVal; + pDdiTable->pfnAdapterGet = ur::level_zero::urAdapterGet; + pDdiTable->pfnAdapterRelease = ur::level_zero::urAdapterRelease; + pDdiTable->pfnAdapterRetain = ur::level_zero::urAdapterRetain; + pDdiTable->pfnAdapterGetLastError = ur::level_zero::urAdapterGetLastError; + pDdiTable->pfnAdapterGetInfo = ur::level_zero::urAdapterGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_context_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( + ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urContextCreate; - pDdiTable->pfnRetain = urContextRetain; - pDdiTable->pfnRelease = urContextRelease; - pDdiTable->pfnGetInfo = urContextGetInfo; - pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle; - pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; + pDdiTable->pfnUnsampledImageHandleDestroyExp = + ur::level_zero::urBindlessImagesUnsampledImageHandleDestroyExp; + pDdiTable->pfnSampledImageHandleDestroyExp = + ur::level_zero::urBindlessImagesSampledImageHandleDestroyExp; + pDdiTable->pfnImageAllocateExp = + ur::level_zero::urBindlessImagesImageAllocateExp; + pDdiTable->pfnImageFreeExp = ur::level_zero::urBindlessImagesImageFreeExp; + pDdiTable->pfnUnsampledImageCreateExp = + ur::level_zero::urBindlessImagesUnsampledImageCreateExp; + pDdiTable->pfnSampledImageCreateExp = + ur::level_zero::urBindlessImagesSampledImageCreateExp; + pDdiTable->pfnImageCopyExp = ur::level_zero::urBindlessImagesImageCopyExp; + pDdiTable->pfnImageGetInfoExp = + ur::level_zero::urBindlessImagesImageGetInfoExp; + pDdiTable->pfnMipmapGetLevelExp = + ur::level_zero::urBindlessImagesMipmapGetLevelExp; + pDdiTable->pfnMipmapFreeExp = ur::level_zero::urBindlessImagesMipmapFreeExp; + pDdiTable->pfnImportExternalMemoryExp = + ur::level_zero::urBindlessImagesImportExternalMemoryExp; + pDdiTable->pfnMapExternalArrayExp = + ur::level_zero::urBindlessImagesMapExternalArrayExp; + pDdiTable->pfnMapExternalLinearMemoryExp = + ur::level_zero::urBindlessImagesMapExternalLinearMemoryExp; + pDdiTable->pfnReleaseExternalMemoryExp = + ur::level_zero::urBindlessImagesReleaseExternalMemoryExp; + pDdiTable->pfnImportExternalSemaphoreExp = + ur::level_zero::urBindlessImagesImportExternalSemaphoreExp; + pDdiTable->pfnReleaseExternalSemaphoreExp = + ur::level_zero::urBindlessImagesReleaseExternalSemaphoreExp; + pDdiTable->pfnWaitExternalSemaphoreExp = + ur::level_zero::urBindlessImagesWaitExternalSemaphoreExp; + pDdiTable->pfnSignalExternalSemaphoreExp = + ur::level_zero::urBindlessImagesSignalExternalSemaphoreExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_enqueue_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( + ur_api_version_t version, ur_command_buffer_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; - pDdiTable->pfnEventsWait = urEnqueueEventsWait; - pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; - pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead; - pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite; - pDdiTable->pfnMemBufferReadRect = urEnqueueMemBufferReadRect; - pDdiTable->pfnMemBufferWriteRect = urEnqueueMemBufferWriteRect; - pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy; - pDdiTable->pfnMemBufferCopyRect = urEnqueueMemBufferCopyRect; - pDdiTable->pfnMemBufferFill = urEnqueueMemBufferFill; - pDdiTable->pfnMemImageRead = urEnqueueMemImageRead; - pDdiTable->pfnMemImageWrite = urEnqueueMemImageWrite; - pDdiTable->pfnMemImageCopy = urEnqueueMemImageCopy; - pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap; - pDdiTable->pfnMemUnmap = urEnqueueMemUnmap; - pDdiTable->pfnUSMFill = urEnqueueUSMFill; - pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; - pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; - pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise; - pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D; - pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; - pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite; - pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead; - - return retVal; + pDdiTable->pfnCreateExp = ur::level_zero::urCommandBufferCreateExp; + pDdiTable->pfnRetainExp = ur::level_zero::urCommandBufferRetainExp; + pDdiTable->pfnReleaseExp = ur::level_zero::urCommandBufferReleaseExp; + pDdiTable->pfnFinalizeExp = ur::level_zero::urCommandBufferFinalizeExp; + pDdiTable->pfnAppendKernelLaunchExp = + ur::level_zero::urCommandBufferAppendKernelLaunchExp; + pDdiTable->pfnAppendUSMMemcpyExp = + ur::level_zero::urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMFillExp = + ur::level_zero::urCommandBufferAppendUSMFillExp; + pDdiTable->pfnAppendMemBufferCopyExp = + ur::level_zero::urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferWriteExp = + ur::level_zero::urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferReadExp = + ur::level_zero::urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + ur::level_zero::urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + ur::level_zero::urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + ur::level_zero::urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferFillExp = + ur::level_zero::urCommandBufferAppendMemBufferFillExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur::level_zero::urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur::level_zero::urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnEnqueueExp = ur::level_zero::urCommandBufferEnqueueExp; + pDdiTable->pfnRetainCommandExp = + ur::level_zero::urCommandBufferRetainCommandExp; + pDdiTable->pfnReleaseCommandExp = + ur::level_zero::urCommandBufferReleaseCommandExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur::level_zero::urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = ur::level_zero::urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = + ur::level_zero::urCommandBufferCommandGetInfoExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_event_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( + ur_api_version_t version, ur_context_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGetInfo = urEventGetInfo; - pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; - pDdiTable->pfnWait = urEventWait; - pDdiTable->pfnRetain = urEventRetain; - pDdiTable->pfnRelease = urEventRelease; - pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; - pDdiTable->pfnSetCallback = urEventSetCallback; - - return retVal; + + pDdiTable->pfnCreate = ur::level_zero::urContextCreate; + pDdiTable->pfnRetain = ur::level_zero::urContextRetain; + pDdiTable->pfnRelease = ur::level_zero::urContextRelease; + pDdiTable->pfnGetInfo = ur::level_zero::urContextGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urContextGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urContextCreateWithNativeHandle; + pDdiTable->pfnSetExtendedDeleter = + ur::level_zero::urContextSetExtendedDeleter; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_kernel_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( + ur_api_version_t version, ur_enqueue_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urKernelCreate; - pDdiTable->pfnGetInfo = urKernelGetInfo; - pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; - pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; - pDdiTable->pfnRetain = urKernelRetain; - pDdiTable->pfnRelease = urKernelRelease; - pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; - pDdiTable->pfnSetArgValue = urKernelSetArgValue; - pDdiTable->pfnSetArgLocal = urKernelSetArgLocal; - pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; - pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; - pDdiTable->pfnSetArgSampler = urKernelSetArgSampler; - pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj; - pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants; - pDdiTable->pfnGetSuggestedLocalWorkSize = urKernelGetSuggestedLocalWorkSize; - return retVal; + + pDdiTable->pfnKernelLaunch = ur::level_zero::urEnqueueKernelLaunch; + pDdiTable->pfnEventsWait = ur::level_zero::urEnqueueEventsWait; + pDdiTable->pfnEventsWaitWithBarrier = + ur::level_zero::urEnqueueEventsWaitWithBarrier; + pDdiTable->pfnMemBufferRead = ur::level_zero::urEnqueueMemBufferRead; + pDdiTable->pfnMemBufferWrite = ur::level_zero::urEnqueueMemBufferWrite; + pDdiTable->pfnMemBufferReadRect = ur::level_zero::urEnqueueMemBufferReadRect; + pDdiTable->pfnMemBufferWriteRect = + ur::level_zero::urEnqueueMemBufferWriteRect; + pDdiTable->pfnMemBufferCopy = ur::level_zero::urEnqueueMemBufferCopy; + pDdiTable->pfnMemBufferCopyRect = ur::level_zero::urEnqueueMemBufferCopyRect; + pDdiTable->pfnMemBufferFill = ur::level_zero::urEnqueueMemBufferFill; + pDdiTable->pfnMemImageRead = ur::level_zero::urEnqueueMemImageRead; + pDdiTable->pfnMemImageWrite = ur::level_zero::urEnqueueMemImageWrite; + pDdiTable->pfnMemImageCopy = ur::level_zero::urEnqueueMemImageCopy; + pDdiTable->pfnMemBufferMap = ur::level_zero::urEnqueueMemBufferMap; + pDdiTable->pfnMemUnmap = ur::level_zero::urEnqueueMemUnmap; + pDdiTable->pfnUSMFill = ur::level_zero::urEnqueueUSMFill; + pDdiTable->pfnUSMMemcpy = ur::level_zero::urEnqueueUSMMemcpy; + pDdiTable->pfnUSMPrefetch = ur::level_zero::urEnqueueUSMPrefetch; + pDdiTable->pfnUSMAdvise = ur::level_zero::urEnqueueUSMAdvise; + pDdiTable->pfnUSMFill2D = ur::level_zero::urEnqueueUSMFill2D; + pDdiTable->pfnUSMMemcpy2D = ur::level_zero::urEnqueueUSMMemcpy2D; + pDdiTable->pfnDeviceGlobalVariableWrite = + ur::level_zero::urEnqueueDeviceGlobalVariableWrite; + pDdiTable->pfnDeviceGlobalVariableRead = + ur::level_zero::urEnqueueDeviceGlobalVariableRead; + pDdiTable->pfnReadHostPipe = ur::level_zero::urEnqueueReadHostPipe; + pDdiTable->pfnWriteHostPipe = ur::level_zero::urEnqueueWriteHostPipe; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnImageCreate = urMemImageCreate; - pDdiTable->pfnBufferCreate = urMemBufferCreate; - pDdiTable->pfnRetain = urMemRetain; - pDdiTable->pfnRelease = urMemRelease; - pDdiTable->pfnBufferPartition = urMemBufferPartition; - pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; - pDdiTable->pfnBufferCreateWithNativeHandle = - urMemBufferCreateWithNativeHandle; - pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle; - pDdiTable->pfnGetInfo = urMemGetInfo; - pDdiTable->pfnImageGetInfo = urMemImageGetInfo; - return retVal; + pDdiTable->pfnKernelLaunchCustomExp = + ur::level_zero::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnCooperativeKernelLaunchExp = + ur::level_zero::urEnqueueCooperativeKernelLaunchExp; + pDdiTable->pfnTimestampRecordingExp = + ur::level_zero::urEnqueueTimestampRecordingExp; + pDdiTable->pfnNativeCommandExp = ur::level_zero::urEnqueueNativeCommandExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_platform_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( + ur_api_version_t version, ur_event_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGet = urPlatformGet; - pDdiTable->pfnGetInfo = urPlatformGetInfo; - pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urPlatformCreateWithNativeHandle; - pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; - pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; - - return retVal; -} -UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_program_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { + pDdiTable->pfnGetInfo = ur::level_zero::urEventGetInfo; + pDdiTable->pfnGetProfilingInfo = ur::level_zero::urEventGetProfilingInfo; + pDdiTable->pfnWait = ur::level_zero::urEventWait; + pDdiTable->pfnRetain = ur::level_zero::urEventRetain; + pDdiTable->pfnRelease = ur::level_zero::urEventRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urEventGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urEventCreateWithNativeHandle; + pDdiTable->pfnSetCallback = ur::level_zero::urEventSetCallback; + + return result; +} - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( + ur_api_version_t version, ur_kernel_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; - pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; - pDdiTable->pfnBuild = urProgramBuild; - pDdiTable->pfnCompile = urProgramCompile; - pDdiTable->pfnLink = urProgramLink; - pDdiTable->pfnRetain = urProgramRetain; - pDdiTable->pfnRelease = urProgramRelease; - pDdiTable->pfnGetFunctionPointer = urProgramGetFunctionPointer; - pDdiTable->pfnGetGlobalVariablePointer = urProgramGetGlobalVariablePointer; - pDdiTable->pfnGetInfo = urProgramGetInfo; - pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; + + pDdiTable->pfnCreate = ur::level_zero::urKernelCreate; + pDdiTable->pfnGetInfo = ur::level_zero::urKernelGetInfo; + pDdiTable->pfnGetGroupInfo = ur::level_zero::urKernelGetGroupInfo; + pDdiTable->pfnGetSubGroupInfo = ur::level_zero::urKernelGetSubGroupInfo; + pDdiTable->pfnRetain = ur::level_zero::urKernelRetain; + pDdiTable->pfnRelease = ur::level_zero::urKernelRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urKernelGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urKernelCreateWithNativeHandle; + pDdiTable->pfnGetSuggestedLocalWorkSize = + ur::level_zero::urKernelGetSuggestedLocalWorkSize; + pDdiTable->pfnSetArgValue = ur::level_zero::urKernelSetArgValue; + pDdiTable->pfnSetArgLocal = ur::level_zero::urKernelSetArgLocal; + pDdiTable->pfnSetArgPointer = ur::level_zero::urKernelSetArgPointer; + pDdiTable->pfnSetExecInfo = ur::level_zero::urKernelSetExecInfo; + pDdiTable->pfnSetArgSampler = ur::level_zero::urKernelSetArgSampler; + pDdiTable->pfnSetArgMemObj = ur::level_zero::urKernelSetArgMemObj; pDdiTable->pfnSetSpecializationConstants = - urProgramSetSpecializationConstants; - pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; + ur::level_zero::urKernelSetSpecializationConstants; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_queue_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGetInfo = urQueueGetInfo; - pDdiTable->pfnCreate = urQueueCreate; - pDdiTable->pfnRetain = urQueueRetain; - pDdiTable->pfnRelease = urQueueRelease; - pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; - pDdiTable->pfnFinish = urQueueFinish; - pDdiTable->pfnFlush = urQueueFlush; + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + ur::level_zero::urKernelSuggestMaxCooperativeGroupCountExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_sampler_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL +urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urSamplerCreate; - pDdiTable->pfnRetain = urSamplerRetain; - pDdiTable->pfnRelease = urSamplerRelease; - pDdiTable->pfnGetInfo = urSamplerGetInfo; - pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; - - return retVal; + + pDdiTable->pfnImageCreate = ur::level_zero::urMemImageCreate; + pDdiTable->pfnBufferCreate = ur::level_zero::urMemBufferCreate; + pDdiTable->pfnRetain = ur::level_zero::urMemRetain; + pDdiTable->pfnRelease = ur::level_zero::urMemRelease; + pDdiTable->pfnBufferPartition = ur::level_zero::urMemBufferPartition; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urMemGetNativeHandle; + pDdiTable->pfnBufferCreateWithNativeHandle = + ur::level_zero::urMemBufferCreateWithNativeHandle; + pDdiTable->pfnImageCreateWithNativeHandle = + ur::level_zero::urMemImageCreateWithNativeHandle; + pDdiTable->pfnGetInfo = ur::level_zero::urMemGetInfo; + pDdiTable->pfnImageGetInfo = ur::level_zero::urMemImageGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_usm_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( + ur_api_version_t version, ur_physical_mem_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnHostAlloc = urUSMHostAlloc; - pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc; - pDdiTable->pfnSharedAlloc = urUSMSharedAlloc; - pDdiTable->pfnFree = urUSMFree; - pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; - pDdiTable->pfnPoolCreate = urUSMPoolCreate; - pDdiTable->pfnPoolRetain = urUSMPoolRetain; - pDdiTable->pfnPoolRelease = urUSMPoolRelease; - pDdiTable->pfnPoolGetInfo = urUSMPoolGetInfo; - - return retVal; + pDdiTable->pfnCreate = ur::level_zero::urPhysicalMemCreate; + pDdiTable->pfnRetain = ur::level_zero::urPhysicalMemRetain; + pDdiTable->pfnRelease = ur::level_zero::urPhysicalMemRelease; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_device_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( + ur_api_version_t version, ur_platform_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGet = urDeviceGet; - pDdiTable->pfnGetInfo = urDeviceGetInfo; - pDdiTable->pfnRetain = urDeviceRetain; - pDdiTable->pfnRelease = urDeviceRelease; - pDdiTable->pfnPartition = urDevicePartition; - pDdiTable->pfnSelectBinary = urDeviceSelectBinary; - pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle; - pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; - - return retVal; + + pDdiTable->pfnGet = ur::level_zero::urPlatformGet; + pDdiTable->pfnGetInfo = ur::level_zero::urPlatformGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urPlatformGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urPlatformCreateWithNativeHandle; + pDdiTable->pfnGetApiVersion = ur::level_zero::urPlatformGetApiVersion; + pDdiTable->pfnGetBackendOption = ur::level_zero::urPlatformGetBackendOption; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_command_buffer_exp_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( + ur_api_version_t version, ur_program_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreateExp = urCommandBufferCreateExp; - pDdiTable->pfnRetainExp = urCommandBufferRetainExp; - pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; - pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; - pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; - pDdiTable->pfnAppendUSMFillExp = urCommandBufferAppendUSMFillExp; - pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; - pDdiTable->pfnAppendMemBufferCopyRectExp = - urCommandBufferAppendMemBufferCopyRectExp; - pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp; - pDdiTable->pfnAppendMemBufferReadRectExp = - urCommandBufferAppendMemBufferReadRectExp; - pDdiTable->pfnAppendMemBufferWriteExp = - urCommandBufferAppendMemBufferWriteExp; - pDdiTable->pfnAppendMemBufferWriteRectExp = - urCommandBufferAppendMemBufferWriteRectExp; - pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; - pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; - pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; - pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; - pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; - pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; - pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; - pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; - pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; - - return retVal; + + pDdiTable->pfnCreateWithIL = ur::level_zero::urProgramCreateWithIL; + pDdiTable->pfnCreateWithBinary = ur::level_zero::urProgramCreateWithBinary; + pDdiTable->pfnBuild = ur::level_zero::urProgramBuild; + pDdiTable->pfnCompile = ur::level_zero::urProgramCompile; + pDdiTable->pfnLink = ur::level_zero::urProgramLink; + pDdiTable->pfnRetain = ur::level_zero::urProgramRetain; + pDdiTable->pfnRelease = ur::level_zero::urProgramRelease; + pDdiTable->pfnGetFunctionPointer = + ur::level_zero::urProgramGetFunctionPointer; + pDdiTable->pfnGetGlobalVariablePointer = + ur::level_zero::urProgramGetGlobalVariablePointer; + pDdiTable->pfnGetInfo = ur::level_zero::urProgramGetInfo; + pDdiTable->pfnGetBuildInfo = ur::level_zero::urProgramGetBuildInfo; + pDdiTable->pfnSetSpecializationConstants = + ur::level_zero::urProgramSetSpecializationConstants; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urProgramGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urProgramCreateWithNativeHandle; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( - ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp; - pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp; - pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp; - return retVal; + pDdiTable->pfnBuildExp = ur::level_zero::urProgramBuildExp; + pDdiTable->pfnCompileExp = ur::level_zero::urProgramCompileExp; + pDdiTable->pfnLinkExp = ur::level_zero::urProgramLinkExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( - ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( + ur_api_version_t version, ur_queue_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnUnsampledImageHandleDestroyExp = - urBindlessImagesUnsampledImageHandleDestroyExp; - pDdiTable->pfnSampledImageHandleDestroyExp = - urBindlessImagesSampledImageHandleDestroyExp; - pDdiTable->pfnImageAllocateExp = urBindlessImagesImageAllocateExp; - pDdiTable->pfnImageFreeExp = urBindlessImagesImageFreeExp; - pDdiTable->pfnUnsampledImageCreateExp = - urBindlessImagesUnsampledImageCreateExp; - pDdiTable->pfnSampledImageCreateExp = urBindlessImagesSampledImageCreateExp; - pDdiTable->pfnImageCopyExp = urBindlessImagesImageCopyExp; - pDdiTable->pfnImageGetInfoExp = urBindlessImagesImageGetInfoExp; - pDdiTable->pfnMipmapGetLevelExp = urBindlessImagesMipmapGetLevelExp; - pDdiTable->pfnMipmapFreeExp = urBindlessImagesMipmapFreeExp; - pDdiTable->pfnImportExternalMemoryExp = - urBindlessImagesImportExternalMemoryExp; - pDdiTable->pfnMapExternalArrayExp = urBindlessImagesMapExternalArrayExp; - pDdiTable->pfnMapExternalLinearMemoryExp = - urBindlessImagesMapExternalLinearMemoryExp; - pDdiTable->pfnReleaseExternalMemoryExp = - urBindlessImagesReleaseExternalMemoryExp; - pDdiTable->pfnImportExternalSemaphoreExp = - urBindlessImagesImportExternalSemaphoreExp; - pDdiTable->pfnReleaseExternalSemaphoreExp = - urBindlessImagesReleaseExternalSemaphoreExp; - pDdiTable->pfnWaitExternalSemaphoreExp = - urBindlessImagesWaitExternalSemaphoreExp; - pDdiTable->pfnSignalExternalSemaphoreExp = - urBindlessImagesSignalExternalSemaphoreExp; - return UR_RESULT_SUCCESS; + + pDdiTable->pfnGetInfo = ur::level_zero::urQueueGetInfo; + pDdiTable->pfnCreate = ur::level_zero::urQueueCreate; + pDdiTable->pfnRetain = ur::level_zero::urQueueRetain; + pDdiTable->pfnRelease = ur::level_zero::urQueueRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urQueueGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urQueueCreateWithNativeHandle; + pDdiTable->pfnFinish = ur::level_zero::urQueueFinish; + pDdiTable->pfnFlush = ur::level_zero::urQueueFlush; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( - ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( + ur_api_version_t version, ur_sampler_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnPitchedAllocExp = urUSMPitchedAllocExp; - pDdiTable->pfnImportExp = urUSMImportExp; - pDdiTable->pfnReleaseExp = urUSMReleaseExp; - return UR_RESULT_SUCCESS; + + pDdiTable->pfnCreate = ur::level_zero::urSamplerCreate; + pDdiTable->pfnRetain = ur::level_zero::urSamplerRetain; + pDdiTable->pfnRelease = ur::level_zero::urSamplerRelease; + pDdiTable->pfnGetInfo = ur::level_zero::urSamplerGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urSamplerGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urSamplerCreateWithNativeHandle; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_virtual_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL +urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnFree = urVirtualMemFree; - pDdiTable->pfnGetInfo = urVirtualMemGetInfo; - pDdiTable->pfnGranularityGetInfo = urVirtualMemGranularityGetInfo; - pDdiTable->pfnMap = urVirtualMemMap; - pDdiTable->pfnReserve = urVirtualMemReserve; - pDdiTable->pfnSetAccess = urVirtualMemSetAccess; - pDdiTable->pfnUnmap = urVirtualMemUnmap; - - return retVal; + pDdiTable->pfnHostAlloc = ur::level_zero::urUSMHostAlloc; + pDdiTable->pfnDeviceAlloc = ur::level_zero::urUSMDeviceAlloc; + pDdiTable->pfnSharedAlloc = ur::level_zero::urUSMSharedAlloc; + pDdiTable->pfnFree = ur::level_zero::urUSMFree; + pDdiTable->pfnGetMemAllocInfo = ur::level_zero::urUSMGetMemAllocInfo; + pDdiTable->pfnPoolCreate = ur::level_zero::urUSMPoolCreate; + pDdiTable->pfnPoolRetain = ur::level_zero::urUSMPoolRetain; + pDdiTable->pfnPoolRelease = ur::level_zero::urUSMPoolRelease; + pDdiTable->pfnPoolGetInfo = ur::level_zero::urUSMPoolGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_physical_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( + ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urPhysicalMemCreate; - pDdiTable->pfnRelease = urPhysicalMemRelease; - pDdiTable->pfnRetain = urPhysicalMemRetain; + pDdiTable->pfnPitchedAllocExp = ur::level_zero::urUSMPitchedAllocExp; + pDdiTable->pfnImportExp = ur::level_zero::urUSMImportExp; + pDdiTable->pfnReleaseExp = ur::level_zero::urUSMReleaseExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( - ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( + ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnCooperativeKernelLaunchExp = - urEnqueueCooperativeKernelLaunchExp; - pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp; - pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp; + pDdiTable->pfnEnablePeerAccessExp = + ur::level_zero::urUsmP2PEnablePeerAccessExp; + pDdiTable->pfnDisablePeerAccessExp = + ur::level_zero::urUsmP2PDisablePeerAccessExp; + pDdiTable->pfnPeerAccessGetInfoExp = + ur::level_zero::urUsmP2PPeerAccessGetInfoExp; - return UR_RESULT_SUCCESS; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( - ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( + ur_api_version_t version, ur_virtual_mem_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = - urKernelSuggestMaxCooperativeGroupCountExp; + pDdiTable->pfnGranularityGetInfo = + ur::level_zero::urVirtualMemGranularityGetInfo; + pDdiTable->pfnReserve = ur::level_zero::urVirtualMemReserve; + pDdiTable->pfnFree = ur::level_zero::urVirtualMemFree; + pDdiTable->pfnMap = ur::level_zero::urVirtualMemMap; + pDdiTable->pfnUnmap = ur::level_zero::urVirtualMemUnmap; + pDdiTable->pfnSetAccess = ur::level_zero::urVirtualMemSetAccess; + pDdiTable->pfnGetInfo = ur::level_zero::urVirtualMemGetInfo; - return UR_RESULT_SUCCESS; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( - ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( + ur_api_version_t version, ur_device_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnBuildExp = urProgramBuildExp; - pDdiTable->pfnCompileExp = urProgramCompileExp; - pDdiTable->pfnLinkExp = urProgramLinkExp; - - return UR_RESULT_SUCCESS; + pDdiTable->pfnGet = ur::level_zero::urDeviceGet; + pDdiTable->pfnGetInfo = ur::level_zero::urDeviceGetInfo; + pDdiTable->pfnRetain = ur::level_zero::urDeviceRetain; + pDdiTable->pfnRelease = ur::level_zero::urDeviceRelease; + pDdiTable->pfnPartition = ur::level_zero::urDevicePartition; + pDdiTable->pfnSelectBinary = ur::level_zero::urDeviceSelectBinary; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urDeviceGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urDeviceCreateWithNativeHandle; + pDdiTable->pfnGetGlobalTimestamps = + ur::level_zero::urDeviceGetGlobalTimestamps; + + return result; } -#if defined(__cplusplus) + +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +} // namespace ur::level_zero +#elif defined(__cplusplus) } // extern "C" #endif + +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +namespace ur::level_zero { +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { + if (ddi == nullptr) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + ur_result_t result; + + result = ur::level_zero::urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Global); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetBindlessImagesExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->BindlessImagesExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetCommandBufferExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->CommandBufferExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetContextProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Context); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Enqueue); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->EnqueueExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEventProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Event); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Kernel); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->KernelExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = + ur::level_zero::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Mem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->PhysicalMem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Platform); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Program); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->ProgramExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Queue); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Sampler); + if (result != UR_RESULT_SUCCESS) + return result; + result = + ur::level_zero::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->USMExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->UsmP2PExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->VirtualMem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Device); + if (result != UR_RESULT_SUCCESS) + return result; + + return result; +} +} // namespace ur::level_zero +#endif diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp new file mode 100644 index 0000000000..f95625dd5b --- /dev/null +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -0,0 +1,706 @@ +//===--------- ur_interface_loader.hpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +namespace ur::level_zero { +ur_result_t urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, + uint32_t *pNumAdapters); +ur_result_t urAdapterRelease(ur_adapter_handle_t hAdapter); +ur_result_t urAdapterRetain(ur_adapter_handle_t hAdapter); +ur_result_t urAdapterGetLastError(ur_adapter_handle_t hAdapter, + const char **ppMessage, int32_t *pError); +ur_result_t urAdapterGetInfo(ur_adapter_handle_t hAdapter, + ur_adapter_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPlatformGet(ur_adapter_handle_t *phAdapters, uint32_t NumAdapters, + uint32_t NumEntries, + ur_platform_handle_t *phPlatforms, + uint32_t *pNumPlatforms); +ur_result_t urPlatformGetInfo(ur_platform_handle_t hPlatform, + ur_platform_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPlatformGetApiVersion(ur_platform_handle_t hPlatform, + ur_api_version_t *pVersion); +ur_result_t urPlatformGetNativeHandle(ur_platform_handle_t hPlatform, + ur_native_handle_t *phNativePlatform); +ur_result_t urPlatformCreateWithNativeHandle( + ur_native_handle_t hNativePlatform, ur_adapter_handle_t hAdapter, + const ur_platform_native_properties_t *pProperties, + ur_platform_handle_t *phPlatform); +ur_result_t urPlatformGetBackendOption(ur_platform_handle_t hPlatform, + const char *pFrontendOption, + const char **ppPlatformOption); +ur_result_t urDeviceGet(ur_platform_handle_t hPlatform, + ur_device_type_t DeviceType, uint32_t NumEntries, + ur_device_handle_t *phDevices, uint32_t *pNumDevices); +ur_result_t urDeviceGetInfo(ur_device_handle_t hDevice, + ur_device_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urDeviceRetain(ur_device_handle_t hDevice); +ur_result_t urDeviceRelease(ur_device_handle_t hDevice); +ur_result_t +urDevicePartition(ur_device_handle_t hDevice, + const ur_device_partition_properties_t *pProperties, + uint32_t NumDevices, ur_device_handle_t *phSubDevices, + uint32_t *pNumDevicesRet); +ur_result_t urDeviceSelectBinary(ur_device_handle_t hDevice, + const ur_device_binary_t *pBinaries, + uint32_t NumBinaries, + uint32_t *pSelectedBinary); +ur_result_t urDeviceGetNativeHandle(ur_device_handle_t hDevice, + ur_native_handle_t *phNativeDevice); +ur_result_t +urDeviceCreateWithNativeHandle(ur_native_handle_t hNativeDevice, + ur_adapter_handle_t hAdapter, + const ur_device_native_properties_t *pProperties, + ur_device_handle_t *phDevice); +ur_result_t urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice, + uint64_t *pDeviceTimestamp, + uint64_t *pHostTimestamp); +ur_result_t urContextCreate(uint32_t DeviceCount, + const ur_device_handle_t *phDevices, + const ur_context_properties_t *pProperties, + ur_context_handle_t *phContext); +ur_result_t urContextRetain(ur_context_handle_t hContext); +ur_result_t urContextRelease(ur_context_handle_t hContext); +ur_result_t urContextGetInfo(ur_context_handle_t hContext, + ur_context_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urContextGetNativeHandle(ur_context_handle_t hContext, + ur_native_handle_t *phNativeContext); +ur_result_t urContextCreateWithNativeHandle( + ur_native_handle_t hNativeContext, ur_adapter_handle_t hAdapter, + uint32_t numDevices, const ur_device_handle_t *phDevices, + const ur_context_native_properties_t *pProperties, + ur_context_handle_t *phContext); +ur_result_t +urContextSetExtendedDeleter(ur_context_handle_t hContext, + ur_context_extended_deleter_t pfnDeleter, + void *pUserData); +ur_result_t urMemImageCreate(ur_context_handle_t hContext, ur_mem_flags_t flags, + const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, void *pHost, + ur_mem_handle_t *phMem); +ur_result_t urMemBufferCreate(ur_context_handle_t hContext, + ur_mem_flags_t flags, size_t size, + const ur_buffer_properties_t *pProperties, + ur_mem_handle_t *phBuffer); +ur_result_t urMemRetain(ur_mem_handle_t hMem); +ur_result_t urMemRelease(ur_mem_handle_t hMem); +ur_result_t urMemBufferPartition(ur_mem_handle_t hBuffer, ur_mem_flags_t flags, + ur_buffer_create_type_t bufferCreateType, + const ur_buffer_region_t *pRegion, + ur_mem_handle_t *phMem); +ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem, + ur_device_handle_t hDevice, + ur_native_handle_t *phNativeMem); +ur_result_t urMemBufferCreateWithNativeHandle( + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem); +ur_result_t urMemImageCreateWithNativeHandle( + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem); +ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urMemImageGetInfo(ur_mem_handle_t hMemory, ur_image_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urSamplerCreate(ur_context_handle_t hContext, + const ur_sampler_desc_t *pDesc, + ur_sampler_handle_t *phSampler); +ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler); +ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler); +ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler, + ur_sampler_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urSamplerGetNativeHandle(ur_sampler_handle_t hSampler, + ur_native_handle_t *phNativeSampler); +ur_result_t urSamplerCreateWithNativeHandle( + ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, + const ur_sampler_native_properties_t *pProperties, + ur_sampler_handle_t *phSampler); +ur_result_t urUSMHostAlloc(ur_context_handle_t hContext, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMDeviceAlloc(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMSharedAlloc(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMFree(ur_context_handle_t hContext, void *pMem); +ur_result_t urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, + ur_usm_alloc_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urUSMPoolCreate(ur_context_handle_t hContext, + ur_usm_pool_desc_t *pPoolDesc, + ur_usm_pool_handle_t *ppPool); +ur_result_t urUSMPoolRetain(ur_usm_pool_handle_t pPool); +ur_result_t urUSMPoolRelease(ur_usm_pool_handle_t pPool); +ur_result_t urUSMPoolGetInfo(ur_usm_pool_handle_t hPool, + ur_usm_pool_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urVirtualMemGranularityGetInfo( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_virtual_mem_granularity_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, + const void *pStart, size_t size, + void **ppStart); +ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart, + size_t size); +ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_physical_mem_handle_t hPhysicalMem, + size_t offset, ur_virtual_mem_access_flags_t flags); +ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart, + size_t size); +ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_access_flags_t flags); +ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPhysicalMemCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, size_t size, + const ur_physical_mem_properties_t *pProperties, + ur_physical_mem_handle_t *phPhysicalMem); +ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem); +ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem); +ur_result_t urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL, + size_t length, + const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urProgramCreateWithBinary( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urProgramBuild(ur_context_handle_t hContext, + ur_program_handle_t hProgram, const char *pOptions); +ur_result_t urProgramCompile(ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const char *pOptions); +ur_result_t urProgramLink(ur_context_handle_t hContext, uint32_t count, + const ur_program_handle_t *phPrograms, + const char *pOptions, ur_program_handle_t *phProgram); +ur_result_t urProgramRetain(ur_program_handle_t hProgram); +ur_result_t urProgramRelease(ur_program_handle_t hProgram); +ur_result_t urProgramGetFunctionPointer(ur_device_handle_t hDevice, + ur_program_handle_t hProgram, + const char *pFunctionName, + void **ppFunctionPointer); +ur_result_t urProgramGetGlobalVariablePointer( + ur_device_handle_t hDevice, ur_program_handle_t hProgram, + const char *pGlobalVariableName, size_t *pGlobalVariableSizeRet, + void **ppGlobalVariablePointerRet); +ur_result_t urProgramGetInfo(ur_program_handle_t hProgram, + ur_program_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urProgramGetBuildInfo(ur_program_handle_t hProgram, + ur_device_handle_t hDevice, + ur_program_build_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urProgramSetSpecializationConstants( + ur_program_handle_t hProgram, uint32_t count, + const ur_specialization_constant_info_t *pSpecConstants); +ur_result_t urProgramGetNativeHandle(ur_program_handle_t hProgram, + ur_native_handle_t *phNativeProgram); +ur_result_t urProgramCreateWithNativeHandle( + ur_native_handle_t hNativeProgram, ur_context_handle_t hContext, + const ur_program_native_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urKernelCreate(ur_program_handle_t hProgram, + const char *pKernelName, + ur_kernel_handle_t *phKernel); +ur_result_t urKernelSetArgValue( + ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, + const ur_kernel_arg_value_properties_t *pProperties, const void *pArgValue); +ur_result_t +urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, + size_t argSize, + const ur_kernel_arg_local_properties_t *pProperties); +ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel, + ur_kernel_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urKernelGetGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_sub_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urKernelRetain(ur_kernel_handle_t hKernel); +ur_result_t urKernelRelease(ur_kernel_handle_t hKernel); +ur_result_t +urKernelSetArgPointer(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_pointer_properties_t *pProperties, + const void *pArgValue); +ur_result_t +urKernelSetExecInfo(ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, + size_t propSize, + const ur_kernel_exec_info_properties_t *pProperties, + const void *pPropValue); +ur_result_t +urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_sampler_properties_t *pProperties, + ur_sampler_handle_t hArgValue); +ur_result_t +urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *pProperties, + ur_mem_handle_t hArgValue); +ur_result_t urKernelSetSpecializationConstants( + ur_kernel_handle_t hKernel, uint32_t count, + const ur_specialization_constant_info_t *pSpecConstants); +ur_result_t urKernelGetNativeHandle(ur_kernel_handle_t hKernel, + ur_native_handle_t *phNativeKernel); +ur_result_t +urKernelCreateWithNativeHandle(ur_native_handle_t hNativeKernel, + ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const ur_kernel_native_properties_t *pProperties, + ur_kernel_handle_t *phKernel); +ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel, + ur_queue_handle_t hQueue, + uint32_t numWorkDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + size_t *pSuggestedLocalWorkSize); +ur_result_t urQueueGetInfo(ur_queue_handle_t hQueue, ur_queue_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urQueueCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_queue_properties_t *pProperties, + ur_queue_handle_t *phQueue); +ur_result_t urQueueRetain(ur_queue_handle_t hQueue); +ur_result_t urQueueRelease(ur_queue_handle_t hQueue); +ur_result_t urQueueGetNativeHandle(ur_queue_handle_t hQueue, + ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue); +ur_result_t urQueueCreateWithNativeHandle( + ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, + ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, + ur_queue_handle_t *phQueue); +ur_result_t urQueueFinish(ur_queue_handle_t hQueue); +ur_result_t urQueueFlush(ur_queue_handle_t hQueue); +ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent, + ur_profiling_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEventWait(uint32_t numEvents, + const ur_event_handle_t *phEventWaitList); +ur_result_t urEventRetain(ur_event_handle_t hEvent); +ur_result_t urEventRelease(ur_event_handle_t hEvent); +ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent, + ur_native_handle_t *phNativeEvent); +ur_result_t +urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, + ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent); +ur_result_t urEventSetCallback(ur_event_handle_t hEvent, + ur_execution_info_t execStatus, + ur_event_callback_t pfnNotify, void *pUserData); +ur_result_t urEnqueueKernelLaunch( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueEventsWait(ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferRead(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferReadRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferWriteRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferCopy(ur_queue_handle_t hQueue, + ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferCopyRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferFill(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemImageRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemImageWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t +urEnqueueMemImageCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferMap(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap); +ur_result_t urEnqueueMemUnmap(ur_queue_handle_t hQueue, ur_mem_handle_t hMem, + void *pMappedPtr, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMFill(ur_queue_handle_t hQueue, void *pMem, + size_t patternSize, const void *pPattern, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMMemcpy(ur_queue_handle_t hQueue, bool blocking, + void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMPrefetch(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMFill2D(ur_queue_handle_t hQueue, void *pMem, + size_t pitch, size_t patternSize, + const void *pPattern, size_t width, + size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMMemcpy2D(ur_queue_handle_t hQueue, bool blocking, + void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueDeviceGlobalVariableRead( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingRead, size_t count, size_t offset, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t widthInBytes, + size_t height, size_t elementSizeBytes, + void **ppMem, size_t *pResultPitch); +ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_native_handle_t hImage); +ur_result_t urBindlessImagesSampledImageHandleDestroyExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_native_handle_t hImage); +ur_result_t urBindlessImagesImageAllocateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t +urBindlessImagesImageFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem); +ur_result_t urBindlessImagesUnsampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_native_handle_t *phImage); +ur_result_t urBindlessImagesSampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_sampler_handle_t hSampler, ur_exp_image_native_handle_t *phImage); +ur_result_t urBindlessImagesImageCopyExp( + ur_queue_handle_t hQueue, const void *pSrc, void *pDst, + const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, + const ur_image_format_t *pSrcImageFormat, + const ur_image_format_t *pDstImageFormat, + ur_exp_image_copy_region_t *pCopyRegion, + ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urBindlessImagesImageGetInfoExp( + ur_context_handle_t hContext, ur_exp_image_mem_native_handle_t hImageMem, + ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet); +ur_result_t urBindlessImagesMipmapGetLevelExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t +urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hMem); +ur_result_t urBindlessImagesImportExternalMemoryExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + ur_exp_external_mem_type_t memHandleType, + ur_exp_external_mem_desc_t *pExternalMemDesc, + ur_exp_external_mem_handle_t *phExternalMem); +ur_result_t urBindlessImagesMapExternalArrayExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_external_mem_handle_t hExternalMem, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t urBindlessImagesMapExternalLinearMemoryExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset, + uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **ppRetMem); +ur_result_t urBindlessImagesReleaseExternalMemoryExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_external_mem_handle_t hExternalMem); +ur_result_t urBindlessImagesImportExternalSemaphoreExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_external_semaphore_type_t semHandleType, + ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc, + ur_exp_external_semaphore_handle_t *phExternalSemaphore); +ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_external_semaphore_handle_t hExternalSemaphore); +ur_result_t urBindlessImagesWaitExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, + bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urBindlessImagesSignalExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, + bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t +urCommandBufferCreateExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + ur_exp_command_buffer_handle_t *phCommandBuffer); +ur_result_t +urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t +urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t +urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t urCommandBufferAppendKernelLaunchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, + uint32_t workDim, const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *phCommand); +ur_result_t urCommandBufferAppendUSMMemcpyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, + size_t size, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory, + const void *pPattern, size_t patternSize, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferCopyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferWriteExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, const void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferReadExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferReadRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, size_t offset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, + size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferEnqueueExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t hCommand); +ur_result_t urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t hCommand); +ur_result_t urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch); +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t hCommand, + ur_exp_command_buffer_command_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, size_t localWorkSize, + size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet); +ur_result_t urEnqueueTimestampRecordingExp( + ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueKernelLaunchCustomExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urProgramBuildExp(ur_program_handle_t hProgram, uint32_t numDevices, + ur_device_handle_t *phDevices, + const char *pOptions); +ur_result_t urProgramCompileExp(ur_program_handle_t hProgram, + uint32_t numDevices, + ur_device_handle_t *phDevices, + const char *pOptions); +ur_result_t urProgramLinkExp(ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, uint32_t count, + const ur_program_handle_t *phPrograms, + const char *pOptions, + ur_program_handle_t *phProgram); +ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem, + size_t size); +ur_result_t urUSMReleaseExp(ur_context_handle_t hContext, void *pMem); +ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice); +ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice); +ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEnqueueNativeCommandExp( + ur_queue_handle_t hQueue, + ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, + uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, + const ur_exp_enqueue_native_command_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); +#endif +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_level_zero.hpp b/source/adapters/level_zero/ur_level_zero.hpp index 096ae076f9..36965c5d58 100644 --- a/source/adapters/level_zero/ur_level_zero.hpp +++ b/source/adapters/level_zero/ur_level_zero.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 1069ec78da..5296391794 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -17,6 +17,7 @@ #include "usm.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" @@ -296,7 +297,9 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( +namespace ur::level_zero { + +ur_result_t urUSMHostAlloc( ur_context_handle_t Context, ///< [in] handle of the context object const ur_usm_desc_t *USMDesc, ///< [in][optional] USM memory allocation descriptor @@ -335,7 +338,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } else { ContextLock.lock(); } @@ -368,7 +371,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( +ur_result_t urUSMDeviceAlloc( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_usm_desc_t @@ -410,7 +413,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } else { ContextLock.lock(); } @@ -448,7 +451,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( +ur_result_t urUSMSharedAlloc( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_usm_desc_t @@ -513,7 +516,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } umf_memory_pool_handle_t hPoolInternal = nullptr; @@ -555,9 +558,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( - ur_context_handle_t Context, ///< [in] handle of the context object - void *Mem ///< [in] pointer to USM memory object +ur_result_t +urUSMFree(ur_context_handle_t Context, ///< [in] handle of the context object + void *Mem ///< [in] pointer to USM memory object ) { ur_platform_handle_t Plt = Context->getPlatform(); @@ -567,7 +570,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( return USMFreeHelper(Context, Mem); } -UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( +ur_result_t urUSMGetMemAllocInfo( ur_context_handle_t Context, ///< [in] handle of the context object const void *Ptr, ///< [in] pointer to USM memory object ur_usm_alloc_info_t @@ -667,6 +670,103 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( return UR_RESULT_SUCCESS; } +ur_result_t urUSMPoolCreate( + ur_context_handle_t Context, ///< [in] handle of the context object + ur_usm_pool_desc_t + *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool +) { + + try { + *Pool = reinterpret_cast( + new ur_usm_pool_handle_t_(Context, PoolDesc)); + + std::shared_lock ContextLock(Context->Mutex); + Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); + + } catch (const UsmAllocationException &Ex) { + return Ex.getError(); + } + return UR_RESULT_SUCCESS; +} + +ur_result_t +urUSMPoolRetain(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool +) { + Pool->RefCount.increment(); + return UR_RESULT_SUCCESS; +} + +ur_result_t +urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool +) { + if (Pool->RefCount.decrementAndTest()) { + std::shared_lock ContextLock(Pool->Context->Mutex); + Pool->Context->UsmPoolHandles.remove(Pool); + delete Pool; + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urUSMPoolGetInfo( + ur_usm_pool_handle_t Pool, ///< [in] handle of the USM memory pool + ur_usm_pool_info_t PropName, ///< [in] name of the pool property to query + size_t PropSize, ///< [in] size in bytes of the pool property value provided + void *PropValue, ///< [out][typename(propName, propSize)] value of the pool + ///< property + size_t *PropSizeRet ///< [out] size in bytes returned in pool property value +) { + UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + + switch (PropName) { + case UR_USM_POOL_INFO_REFERENCE_COUNT: { + return ReturnValue(Pool->RefCount.load()); + } + case UR_USM_POOL_INFO_CONTEXT: { + return ReturnValue(Pool->Context); + } + default: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + } +} + +ur_result_t urUSMImportExp(ur_context_handle_t Context, void *HostPtr, + size_t Size) { + UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); + + // Promote the host ptr to USM host memory. + if (ZeUSMImport.Supported && HostPtr != nullptr) { + // Query memory type of the host pointer + ze_device_handle_t ZeDeviceHandle; + ZeStruct ZeMemoryAllocationProperties; + ZE2UR_CALL(zeMemGetAllocProperties, + (Context->ZeContext, HostPtr, &ZeMemoryAllocationProperties, + &ZeDeviceHandle)); + + // If not shared of any type, we can import the ptr + if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) { + // Promote the host ptr to USM host memory + ze_driver_handle_t driverHandle = + Context->getPlatform()->ZeDriverHandleExpTranslated; + ZeUSMImport.doZeUSMImport(driverHandle, HostPtr, Size); + } + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urUSMReleaseExp(ur_context_handle_t Context, void *HostPtr) { + UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); + + // Release the imported memory. + if (ZeUSMImport.Supported && HostPtr != nullptr) + ZeUSMImport.doZeUSMRelease( + Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr); + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero + static ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr) { auto ZeResult = ZE_CALL_NOCHECK(zeMemFree, (Context->ZeContext, Ptr)); // Handle When the driver is already released @@ -972,68 +1072,6 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, } } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( - ur_context_handle_t Context, ///< [in] handle of the context object - ur_usm_pool_desc_t - *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with - ///< ::ur_usm_pool_limits_desc_t - ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool -) { - - try { - *Pool = reinterpret_cast( - new ur_usm_pool_handle_t_(Context, PoolDesc)); - - std::shared_lock ContextLock(Context->Mutex); - Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); - - } catch (const UsmAllocationException &Ex) { - return Ex.getError(); - } - return UR_RESULT_SUCCESS; -} - -ur_result_t -urUSMPoolRetain(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool -) { - Pool->RefCount.increment(); - return UR_RESULT_SUCCESS; -} - -ur_result_t -urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool -) { - if (Pool->RefCount.decrementAndTest()) { - std::shared_lock ContextLock(Pool->Context->Mutex); - Pool->Context->UsmPoolHandles.remove(Pool); - delete Pool; - } - return UR_RESULT_SUCCESS; -} - -ur_result_t urUSMPoolGetInfo( - ur_usm_pool_handle_t Pool, ///< [in] handle of the USM memory pool - ur_usm_pool_info_t PropName, ///< [in] name of the pool property to query - size_t PropSize, ///< [in] size in bytes of the pool property value provided - void *PropValue, ///< [out][typename(propName, propSize)] value of the pool - ///< property - size_t *PropSizeRet ///< [out] size in bytes returned in pool property value -) { - UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); - - switch (PropName) { - case UR_USM_POOL_INFO_REFERENCE_COUNT: { - return ReturnValue(Pool->RefCount.load()); - } - case UR_USM_POOL_INFO_CONTEXT: { - return ReturnValue(Pool->Context); - } - default: { - return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; - } - } -} - // If indirect access tracking is not enabled then this functions just performs // zeMemFree. If indirect access tracking is enabled then reference counting is // performed. @@ -1116,38 +1154,3 @@ ur_result_t USMFreeHelper(ur_context_handle_t Context, void *Ptr, UR_CALL(ContextReleaseHelper(Context)); return umf2urResult(umfRet); } - -UR_APIEXPORT ur_result_t UR_APICALL urUSMImportExp(ur_context_handle_t Context, - void *HostPtr, size_t Size) { - UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); - - // Promote the host ptr to USM host memory. - if (ZeUSMImport.Supported && HostPtr != nullptr) { - // Query memory type of the host pointer - ze_device_handle_t ZeDeviceHandle; - ZeStruct ZeMemoryAllocationProperties; - ZE2UR_CALL(zeMemGetAllocProperties, - (Context->ZeContext, HostPtr, &ZeMemoryAllocationProperties, - &ZeDeviceHandle)); - - // If not shared of any type, we can import the ptr - if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) { - // Promote the host ptr to USM host memory - ze_driver_handle_t driverHandle = - Context->getPlatform()->ZeDriverHandleExpTranslated; - ZeUSMImport.doZeUSMImport(driverHandle, HostPtr, Size); - } - } - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context, - void *HostPtr) { - UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); - - // Release the imported memory. - if (ZeUSMImport.Supported && HostPtr != nullptr) - ZeUSMImport.doZeUSMRelease( - Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr); - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/usm_p2p.cpp b/source/adapters/level_zero/usm_p2p.cpp index 2b81828423..6e701aa803 100644 --- a/source/adapters/level_zero/usm_p2p.cpp +++ b/source/adapters/level_zero/usm_p2p.cpp @@ -11,8 +11,10 @@ #include "logger/ur_logger.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +namespace ur::level_zero { + +ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { std::ignore = commandDevice; std::ignore = peerDevice; @@ -21,8 +23,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { std::ignore = commandDevice; std::ignore = peerDevice; @@ -31,10 +33,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice, - ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); @@ -69,3 +72,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return ReturnValue(propertyValue); } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp index dc52874364..eba7359379 100644 --- a/source/adapters/level_zero/v2/api.cpp +++ b/source/adapters/level_zero/v2/api.cpp @@ -17,13 +17,14 @@ std::mutex ZeCall::GlobalLock; -ur_result_t UR_APICALL urContextGetNativeHandle( - ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { +namespace ur::level_zero { +ur_result_t urContextGetNativeHandle(ur_context_handle_t hContext, + ur_native_handle_t *phNativeContext) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urContextCreateWithNativeHandle( +ur_result_t urContextCreateWithNativeHandle( ur_native_handle_t hNativeContext, ur_adapter_handle_t hAdapter, uint32_t numDevices, const ur_device_handle_t *phDevices, const ur_context_native_properties_t *pProperties, @@ -32,62 +33,63 @@ ur_result_t UR_APICALL urContextCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urContextSetExtendedDeleter( - ur_context_handle_t hContext, ur_context_extended_deleter_t pfnDeleter, - void *pUserData) { +ur_result_t +urContextSetExtendedDeleter(ur_context_handle_t hContext, + ur_context_extended_deleter_t pfnDeleter, + void *pUserData) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemImageCreate(ur_context_handle_t hContext, - ur_mem_flags_t flags, - const ur_image_format_t *pImageFormat, - const ur_image_desc_t *pImageDesc, - void *pHost, ur_mem_handle_t *phMem) { +ur_result_t urMemImageCreate(ur_context_handle_t hContext, ur_mem_flags_t flags, + const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, void *pHost, + ur_mem_handle_t *phMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemBufferCreate( - ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, - const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { +ur_result_t urMemBufferCreate(ur_context_handle_t hContext, + ur_mem_flags_t flags, size_t size, + const ur_buffer_properties_t *pProperties, + ur_mem_handle_t *phBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { +ur_result_t urMemRetain(ur_mem_handle_t hMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { +ur_result_t urMemRelease(ur_mem_handle_t hMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemBufferPartition( - ur_mem_handle_t hBuffer, ur_mem_flags_t flags, - ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion, - ur_mem_handle_t *phMem) { +ur_result_t urMemBufferPartition(ur_mem_handle_t hBuffer, ur_mem_flags_t flags, + ur_buffer_create_type_t bufferCreateType, + const ur_buffer_region_t *pRegion, + ur_mem_handle_t *phMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemGetNativeHandle(ur_mem_handle_t hMem, - ur_device_handle_t hDevice, - ur_native_handle_t *phNativeMem) { +ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem, + ur_device_handle_t hDevice, + ur_native_handle_t *phNativeMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( +ur_result_t urMemBufferCreateWithNativeHandle( ur_native_handle_t hNativeMem, ur_context_handle_t hContext, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( +ur_result_t urMemImageCreateWithNativeHandle( ur_native_handle_t hNativeMem, ur_context_handle_t hContext, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { @@ -95,53 +97,51 @@ ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, - ur_mem_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, - ur_image_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urMemImageGetInfo(ur_mem_handle_t hMemory, ur_image_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerCreate(ur_context_handle_t hContext, - const ur_sampler_desc_t *pDesc, - ur_sampler_handle_t *phSampler) { +ur_result_t urSamplerCreate(ur_context_handle_t hContext, + const ur_sampler_desc_t *pDesc, + ur_sampler_handle_t *phSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) { +ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) { +ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerGetInfo(ur_sampler_handle_t hSampler, - ur_sampler_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler, + ur_sampler_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerGetNativeHandle( - ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) { +ur_result_t urSamplerGetNativeHandle(ur_sampler_handle_t hSampler, + ur_native_handle_t *phNativeSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( +ur_result_t urSamplerCreateWithNativeHandle( ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, const ur_sampler_native_properties_t *pProperties, ur_sampler_handle_t *phSampler) { @@ -149,7 +149,7 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( +ur_result_t urVirtualMemGranularityGetInfo( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_virtual_mem_granularity_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { @@ -157,111 +157,109 @@ ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t hContext, - const void *pStart, size_t size, - void **ppStart) { +ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, + const void *pStart, size_t size, + void **ppStart) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t hContext, - const void *pStart, size_t size) { +ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart, + size_t size) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemMap(ur_context_handle_t hContext, - const void *pStart, size_t size, - ur_physical_mem_handle_t hPhysicalMem, - size_t offset, - ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_physical_mem_handle_t hPhysicalMem, + size_t offset, + ur_virtual_mem_access_flags_t flags) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t hContext, - const void *pStart, size_t size) { +ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart, + size_t size) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, - size_t size, ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_access_flags_t flags) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t hContext, - const void *pStart, size_t size, - ur_virtual_mem_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urPhysicalMemCreate( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const ur_physical_mem_properties_t *pProperties, - ur_physical_mem_handle_t *phPhysicalMem) { +ur_result_t urPhysicalMemCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, size_t size, + const ur_physical_mem_properties_t *pProperties, + ur_physical_mem_handle_t *phPhysicalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelSetArgLocal( - ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, - const ur_kernel_arg_local_properties_t *pProperties) { +ur_result_t +urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, + size_t argSize, + const ur_kernel_arg_local_properties_t *pProperties) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, - ur_kernel_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel, + ur_kernel_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetGroupInfo(ur_kernel_handle_t hKernel, - ur_device_handle_t hDevice, - ur_kernel_group_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urKernelGetGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, - ur_kernel_sub_group_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_sub_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelSetExecInfo( - ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, size_t propSize, - const ur_kernel_exec_info_properties_t *pProperties, - const void *pPropValue) { +ur_result_t +urKernelSetExecInfo(ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, + size_t propSize, + const ur_kernel_exec_info_properties_t *pProperties, + const void *pPropValue) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_sampler_properties_t *pProperties, ur_sampler_handle_t hArgValue) { @@ -269,7 +267,7 @@ urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *pProperties, ur_mem_handle_t hArgValue) { @@ -277,104 +275,107 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelSetSpecializationConstants( +ur_result_t urKernelSetSpecializationConstants( ur_kernel_handle_t hKernel, uint32_t count, const ur_specialization_constant_info_t *pSpecConstants) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetNativeHandle( - ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { +ur_result_t urKernelGetNativeHandle(ur_kernel_handle_t hKernel, + ur_native_handle_t *phNativeKernel) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelCreateWithNativeHandle( - ur_native_handle_t hNativeKernel, ur_context_handle_t hContext, - ur_program_handle_t hProgram, - const ur_kernel_native_properties_t *pProperties, - ur_kernel_handle_t *phKernel) { +ur_result_t +urKernelCreateWithNativeHandle(ur_native_handle_t hNativeKernel, + ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const ur_kernel_native_properties_t *pProperties, + ur_kernel_handle_t *phKernel) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( - ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t numWorkDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - size_t *pSuggestedLocalWorkSize) { +ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel, + ur_queue_handle_t hQueue, + uint32_t numWorkDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + size_t *pSuggestedLocalWorkSize) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, - ur_event_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventGetProfilingInfo(ur_event_handle_t hEvent, - ur_profiling_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent, + ur_profiling_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventWait(uint32_t numEvents, - const ur_event_handle_t *phEventWaitList) { +ur_result_t urEventWait(uint32_t numEvents, + const ur_event_handle_t *phEventWaitList) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventGetNativeHandle( - ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { +ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent, + ur_native_handle_t *phNativeEvent) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventCreateWithNativeHandle( - ur_native_handle_t hNativeEvent, ur_context_handle_t hContext, - const ur_event_native_properties_t *pProperties, - ur_event_handle_t *phEvent) { +ur_result_t +urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, + ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t hEvent, - ur_execution_info_t execStatus, - ur_event_callback_t pfnNotify, - void *pUserData) { +ur_result_t urEventSetCallback(ur_event_handle_t hEvent, + ur_execution_info_t execStatus, + ur_event_callback_t pfnNotify, void *pUserData) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUSMPitchedAllocExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, - size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem, - size_t *pResultPitch) { +ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t widthInBytes, + size_t height, size_t elementSizeBytes, + void **ppMem, size_t *pResultPitch) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesUnsampledImageHandleDestroyExp( +ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesSampledImageHandleDestroyExp( +ur_result_t urBindlessImagesSampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( +ur_result_t urBindlessImagesImageAllocateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -382,14 +383,15 @@ ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImageFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hImageMem) { +ur_result_t +urBindlessImagesImageFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( +ur_result_t urBindlessImagesUnsampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -398,7 +400,7 @@ ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( +ur_result_t urBindlessImagesSampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -407,14 +409,14 @@ ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( +ur_result_t urBindlessImagesImageGetInfoExp( ur_context_handle_t hContext, ur_exp_image_mem_native_handle_t hImageMem, ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( +ur_result_t urBindlessImagesMipmapGetLevelExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -422,14 +424,15 @@ ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hMem) { +ur_result_t +urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( +ur_result_t urBindlessImagesImportExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, ur_exp_external_mem_type_t memHandleType, ur_exp_external_mem_desc_t *pExternalMemDesc, @@ -438,7 +441,7 @@ ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( +ur_result_t urBindlessImagesMapExternalArrayExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_external_mem_handle_t hExternalMem, @@ -447,21 +450,21 @@ ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp( +ur_result_t urBindlessImagesMapExternalLinearMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset, uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **ppRetMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( +ur_result_t urBindlessImagesReleaseExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_mem_handle_t hExternalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( +ur_result_t urBindlessImagesImportExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_type_t semHandleType, ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc, @@ -470,40 +473,41 @@ ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp( +ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_handle_t hExternalSemaphore) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_exp_command_buffer_desc_t *pCommandBufferDesc, - ur_exp_command_buffer_handle_t *phCommandBuffer) { +ur_result_t +urCommandBufferCreateExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + ur_exp_command_buffer_handle_t *phCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( +ur_result_t urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, @@ -515,7 +519,7 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( +ur_result_t urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, @@ -524,7 +528,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( +ur_result_t urCommandBufferAppendUSMFillExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory, const void *pPattern, size_t patternSize, size_t size, uint32_t numSyncPointsInWaitList, @@ -534,7 +538,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( +ur_result_t urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numSyncPointsInWaitList, @@ -544,7 +548,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( +ur_result_t urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, size_t offset, size_t size, const void *pSrc, uint32_t numSyncPointsInWaitList, @@ -554,7 +558,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( +ur_result_t urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, @@ -563,7 +567,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, @@ -575,7 +579,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -587,7 +591,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( +ur_result_t urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -599,7 +603,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( +ur_result_t urCommandBufferAppendMemBufferFillExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, const void *pPattern, size_t patternSize, size_t offset, size_t size, uint32_t numSyncPointsInWaitList, @@ -609,7 +613,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( +ur_result_t urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, size_t size, ur_usm_migration_flags_t flags, uint32_t numSyncPointsInWaitList, @@ -619,7 +623,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( +ur_result_t urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, @@ -628,7 +632,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferEnqueueExp( +ur_result_t urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -636,19 +640,19 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferRetainCommandExp( +ur_result_t urCommandBufferRetainCommandExp( ur_exp_command_buffer_command_handle_t hCommand) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( +ur_result_t urCommandBufferReleaseCommandExp( ur_exp_command_buffer_command_handle_t hCommand) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( +ur_result_t urCommandBufferUpdateKernelLaunchExp( ur_exp_command_buffer_command_handle_t hCommand, const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch) { @@ -656,15 +660,16 @@ ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferGetInfoExp( - ur_exp_command_buffer_handle_t hCommandBuffer, - ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( +ur_result_t urCommandBufferCommandGetInfoExp( ur_exp_command_buffer_command_handle_t hCommand, ur_exp_command_buffer_command_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { @@ -672,41 +677,42 @@ ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( +ur_result_t urKernelSuggestMaxCooperativeGroupCountExp( ur_kernel_handle_t hKernel, size_t localWorkSize, size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUSMImportExp(ur_context_handle_t hContext, void *pMem, - size_t size) { +ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem, + size_t size) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t hContext, - void *pMem) { +ur_result_t urUSMReleaseExp(ur_context_handle_t hContext, void *pMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice, - ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/command_list_cache.hpp b/source/adapters/level_zero/v2/command_list_cache.hpp index 1850a4334c..bb32e0e64a 100644 --- a/source/adapters/level_zero/v2/command_list_cache.hpp +++ b/source/adapters/level_zero/v2/command_list_cache.hpp @@ -14,7 +14,7 @@ #include "latency_tracker.hpp" #include -#include +#include #include #include "../common.hpp" diff --git a/source/adapters/level_zero/v2/context.cpp b/source/adapters/level_zero/v2/context.cpp index 08032fe85e..84e3d96b88 100644 --- a/source/adapters/level_zero/v2/context.cpp +++ b/source/adapters/level_zero/v2/context.cpp @@ -72,10 +72,11 @@ bool ur_context_handle_t_::isValidDevice(ur_device_handle_t hDevice) const { return false; } -UR_APIEXPORT ur_result_t UR_APICALL -urContextCreate(uint32_t deviceCount, const ur_device_handle_t *phDevices, - const ur_context_properties_t *pProperties, - ur_context_handle_t *phContext) { +namespace ur::level_zero { +ur_result_t urContextCreate(uint32_t deviceCount, + const ur_device_handle_t *phDevices, + const ur_context_properties_t *pProperties, + ur_context_handle_t *phContext) { std::ignore = pProperties; ur_platform_handle_t hPlatform = phDevices[0]->Platform; @@ -89,23 +90,20 @@ urContextCreate(uint32_t deviceCount, const ur_device_handle_t *phDevices, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urContextRetain(ur_context_handle_t hContext) { +ur_result_t urContextRetain(ur_context_handle_t hContext) { return hContext->retain(); } -UR_APIEXPORT ur_result_t UR_APICALL -urContextRelease(ur_context_handle_t hContext) { +ur_result_t urContextRelease(ur_context_handle_t hContext) { return hContext->release(); } -UR_APIEXPORT ur_result_t UR_APICALL -urContextGetInfo(ur_context_handle_t hContext, - ur_context_info_t contextInfoType, size_t propSize, +ur_result_t urContextGetInfo(ur_context_handle_t hContext, + ur_context_info_t contextInfoType, size_t propSize, - void *pContextInfo, + void *pContextInfo, - size_t *pPropSizeRet) { + size_t *pPropSizeRet) { std::shared_lock Lock(hContext->Mutex); UrReturnHelper ReturnValue(propSize, pContextInfo, pPropSizeRet); switch ( @@ -121,3 +119,4 @@ urContextGetInfo(ur_context_handle_t hContext, return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/event.cpp b/source/adapters/level_zero/v2/event.cpp index 3129e3dd3e..8654e0b25a 100644 --- a/source/adapters/level_zero/v2/event.cpp +++ b/source/adapters/level_zero/v2/event.cpp @@ -45,10 +45,10 @@ ur_result_t ur_event_handle_t_::release() { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { - return hEvent->retain(); -} +namespace ur::level_zero { +ur_result_t urEventRetain(ur_event_handle_t hEvent) { return hEvent->retain(); } -UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { +ur_result_t urEventRelease(ur_event_handle_t hEvent) { return hEvent->release(); } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/event_provider_normal.hpp b/source/adapters/level_zero/v2/event_provider_normal.hpp index 238ab2f360..1260964a4f 100644 --- a/source/adapters/level_zero/v2/event_provider_normal.hpp +++ b/source/adapters/level_zero/v2/event_provider_normal.hpp @@ -23,6 +23,7 @@ #include "event.hpp" #include "../device.hpp" +#include "../ur_interface_loader.hpp" namespace v2 { @@ -50,10 +51,10 @@ class provider_normal : public event_provider { event_type etype, queue_type qtype) : producedType(etype), queueType(qtype), urContext(context), urDevice(device) { - urDeviceRetain(device); + ur::level_zero::urDeviceRetain(device); } - ~provider_normal() override { urDeviceRelease(urDevice); } + ~provider_normal() override { ur::level_zero::urDeviceRelease(urDevice); } event_allocation allocate() override; ur_device_handle_t device() override; diff --git a/source/adapters/level_zero/v2/kernel.cpp b/source/adapters/level_zero/v2/kernel.cpp index daad306b0c..e6a37af814 100644 --- a/source/adapters/level_zero/v2/kernel.cpp +++ b/source/adapters/level_zero/v2/kernel.cpp @@ -16,6 +16,7 @@ #include "../device.hpp" #include "../platform.hpp" #include "../program.hpp" +#include "../ur_interface_loader.hpp" ur_single_device_kernel_t::ur_single_device_kernel_t(ze_device_handle_t hDevice, ze_kernel_handle_t hKernel, @@ -36,7 +37,7 @@ ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *kernelName) : hProgram(hProgram), deviceKernels(hProgram->Context->getPlatform()->getNumDevices()) { - urProgramRetain(hProgram); + ur::level_zero::urProgramRetain(hProgram); for (auto [zeDevice, zeModule] : hProgram->ZeModuleMap) { ZeStruct zeKernelDesc; @@ -81,7 +82,7 @@ ur_result_t ur_kernel_handle_t_::release() { } } - UR_CALL_THROWS(urProgramRelease(hProgram)); + UR_CALL_THROWS(ur::level_zero::urProgramRelease(hProgram)); return UR_RESULT_SUCCESS; } @@ -196,21 +197,22 @@ ur_program_handle_t ur_kernel_handle_t_::getProgramHandle() const { return hProgram; } -UR_APIEXPORT ur_result_t UR_APICALL -urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, - ur_kernel_handle_t *phKernel) { +namespace ur::level_zero { +ur_result_t urKernelCreate(ur_program_handle_t hProgram, + const char *pKernelName, + ur_kernel_handle_t *phKernel) { *phKernel = new ur_kernel_handle_t_(hProgram, pKernelName); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain( +ur_result_t urKernelRetain( ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to retain ) { hKernel->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( +ur_result_t urKernelRelease( ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to release ) { if (!hKernel->RefCount.decrementAndTest()) @@ -222,7 +224,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( +ur_result_t urKernelSetArgValue( ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t argIndex, ///< [in] argument index in range [0, num args - 1] size_t argSize, ///< [in] size of argument type @@ -235,7 +237,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( return hKernel->setArgValue(argIndex, argSize, pProperties, pArgValue); } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( +ur_result_t urKernelSetArgPointer( ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t argIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_pointer_properties_t @@ -246,3 +248,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgPointer"); return hKernel->setArgPointer(argIndex, pProperties, pArgValue); } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/queue_api.cpp b/source/adapters/level_zero/v2/queue_api.cpp index 188f7c3102..ea2e931bfe 100644 --- a/source/adapters/level_zero/v2/queue_api.cpp +++ b/source/adapters/level_zero/v2/queue_api.cpp @@ -14,31 +14,30 @@ ur_queue_handle_t_::~ur_queue_handle_t_() {} -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, - ur_queue_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { +namespace ur::level_zero { +ur_result_t urQueueGetInfo(ur_queue_handle_t hQueue, ur_queue_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { return hQueue->queueGetInfo(propName, propSize, pPropValue, pPropSizeRet); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { +ur_result_t urQueueRetain(ur_queue_handle_t hQueue) { return hQueue->queueRetain(); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { +ur_result_t urQueueRelease(ur_queue_handle_t hQueue) { return hQueue->queueRelease(); } -UR_APIEXPORT ur_result_t UR_APICALL -urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, - ur_native_handle_t *phNativeQueue) { +ur_result_t urQueueGetNativeHandle(ur_queue_handle_t hQueue, + ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue) { return hQueue->queueGetNativeHandle(pDesc, phNativeQueue); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { +ur_result_t urQueueFinish(ur_queue_handle_t hQueue) { return hQueue->queueFinish(); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { +ur_result_t urQueueFlush(ur_queue_handle_t hQueue) { return hQueue->queueFlush(); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( +ur_result_t urEnqueueKernelLaunch( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, @@ -47,27 +46,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( - ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueEventsWait(ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( +ur_result_t urEnqueueEventsWaitWithBarrier( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { return hQueue->enqueueEventsWaitWithBarrier(numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, - size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferRead(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferRead(hBuffer, blockingRead, offset, size, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( +ur_result_t urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -75,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( +ur_result_t urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -87,7 +89,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( +ur_result_t urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -99,16 +101,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferCopy(ur_queue_handle_t hQueue, + ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferCopy(hBufferSrc, hBufferDst, srcOffset, dstOffset, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( +ur_result_t urEnqueueMemBufferCopyRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, @@ -120,16 +124,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, - size_t patternSize, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferFill(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferFill(hBuffer, pPattern, patternSize, offset, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( +ur_result_t urEnqueueMemImageRead( ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, @@ -138,7 +144,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( hImage, blockingRead, origin, region, rowPitch, slicePitch, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( +ur_result_t urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, @@ -147,78 +153,85 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( hImage, blockingWrite, origin, region, rowPitch, slicePitch, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, - ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t +urEnqueueMemImageCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemImageCopy(hImageSrc, hImageDst, srcOrigin, dstOrigin, region, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, - ur_map_flags_t mapFlags, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, void **ppRetMap) { +ur_result_t urEnqueueMemBufferMap(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap) { return hQueue->enqueueMemBufferMap(hBuffer, blockingMap, mapFlags, offset, size, numEventsInWaitList, phEventWaitList, phEvent, ppRetMap); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( - ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemUnmap(ur_queue_handle_t hQueue, ur_mem_handle_t hMem, + void *pMappedPtr, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemUnmap(hMem, pMappedPtr, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( - ur_queue_handle_t hQueue, void *pMem, size_t patternSize, - const void *pPattern, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMFill(ur_queue_handle_t hQueue, void *pMem, + size_t patternSize, const void *pPattern, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMFill(pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( - ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMMemcpy(ur_queue_handle_t hQueue, bool blocking, + void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMMemcpy(blocking, pDst, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMPrefetch(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMPrefetch(pMem, size, flags, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL -urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMAdvise(pMem, size, advice, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, - const void *pPattern, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMFill2D(ur_queue_handle_t hQueue, void *pMem, + size_t pitch, size_t patternSize, + const void *pPattern, size_t width, + size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMFill2D(pMem, pitch, patternSize, pPattern, width, height, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, - const void *pSrc, size_t srcPitch, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMMemcpy2D(ur_queue_handle_t hQueue, bool blocking, + void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMMemcpy2D(blocking, pDst, dstPitch, pSrc, srcPitch, width, height, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( +ur_result_t urEnqueueDeviceGlobalVariableWrite( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, bool blockingWrite, size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, @@ -227,7 +240,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( hProgram, name, blockingWrite, count, offset, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( +ur_result_t urEnqueueDeviceGlobalVariableRead( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, @@ -236,25 +249,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( hProgram, name, blockingRead, count, offset, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pDst, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueReadHostPipe(hProgram, pipe_symbol, blocking, pDst, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pSrc, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueWriteHostPipe(hProgram, pipe_symbol, blocking, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( +ur_result_t urBindlessImagesImageCopyExp( ur_queue_handle_t hQueue, const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, const ur_image_format_t *pSrcImageFormat, @@ -267,7 +284,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( +ur_result_t urBindlessImagesWaitExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -275,7 +292,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( hSemaphore, hasWaitValue, waitValue, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( +ur_result_t urBindlessImagesSignalExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -283,7 +300,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( hSemaphore, hasSignalValue, signalValue, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( +ur_result_t urEnqueueCooperativeKernelLaunchExp( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, @@ -292,13 +309,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( +ur_result_t urEnqueueTimestampRecordingExp( ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { return hQueue->enqueueTimestampRecordingExp(blocking, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( +ur_result_t urEnqueueKernelLaunchCustomExp( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList, @@ -310,7 +327,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( numPropsInLaunchPropList, launchPropList, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( +ur_result_t urEnqueueNativeCommandExp( ur_queue_handle_t hQueue, ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, @@ -321,3 +338,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( pfnNativeEnqueue, data, numMemsInMemList, phMemList, pProperties, numEventsInWaitList, phEventWaitList, phEvent); } +} // namespace ur::level_zero \ No newline at end of file diff --git a/source/adapters/level_zero/v2/queue_create.cpp b/source/adapters/level_zero/v2/queue_create.cpp index 938dd5cb64..c72320842b 100644 --- a/source/adapters/level_zero/v2/queue_create.cpp +++ b/source/adapters/level_zero/v2/queue_create.cpp @@ -17,16 +17,18 @@ #include #include -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { +namespace ur::level_zero { +ur_result_t urQueueCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_queue_properties_t *pProperties, + ur_queue_handle_t *phQueue) { // TODO: For now, always use immediate, in-order *phQueue = new v2::ur_queue_immediate_in_order_t(hContext, hDevice, pProperties); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( +ur_result_t urQueueCreateWithNativeHandle( ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, ur_queue_handle_t *phQueue) { @@ -38,3 +40,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/usm.cpp b/source/adapters/level_zero/v2/usm.cpp index 6ac5f0f3b6..3706fe21e4 100644 --- a/source/adapters/level_zero/v2/usm.cpp +++ b/source/adapters/level_zero/v2/usm.cpp @@ -22,7 +22,8 @@ ur_context_handle_t ur_usm_pool_handle_t_::getContextHandle() const { return hContext; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( +namespace ur::level_zero { +ur_result_t urUSMPoolCreate( ur_context_handle_t hContext, ///< [in] handle of the context object ur_usm_pool_desc_t * pPoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with @@ -34,15 +35,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRetain( - ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool +ur_result_t +urUSMPoolRetain(ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool ) { hPool->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRelease( - ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool +ur_result_t +urUSMPoolRelease(ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool ) { if (hPool->RefCount.decrementAndTest()) { delete hPool; @@ -50,7 +51,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( +ur_result_t urUSMPoolGetInfo( ur_usm_pool_handle_t hPool, ///< [in] handle of the USM memory pool ur_usm_pool_info_t propName, ///< [in] name of the pool property to query size_t propSize, ///< [in] size in bytes of the pool property value provided @@ -74,7 +75,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( } } -UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( +ur_result_t urUSMDeviceAlloc( ur_context_handle_t hContext, ///< [in] handle of the context object ur_device_handle_t hDevice, ///< [in] handle of the device object const ur_usm_desc_t @@ -96,7 +97,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( +ur_result_t urUSMSharedAlloc( ur_context_handle_t hContext, ///< [in] handle of the context object ur_device_handle_t hDevice, ///< [in] handle of the device object const ur_usm_desc_t @@ -121,7 +122,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( +ur_result_t urUSMHostAlloc( ur_context_handle_t hContext, ///< [in] handle of the context object const ur_usm_desc_t *pUSMDesc, ///< [in][optional] USM memory allocation descriptor @@ -142,9 +143,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( - ur_context_handle_t hContext, ///< [in] handle of the context object - void *pMem ///< [in] pointer to USM memory object +ur_result_t +urUSMFree(ur_context_handle_t hContext, ///< [in] handle of the context object + void *pMem ///< [in] pointer to USM memory object ) { std::ignore = hContext; @@ -152,7 +153,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( +ur_result_t urUSMGetMemAllocInfo( ur_context_handle_t hContext, ///< [in] handle of the context object const void *ptr, ///< [in] pointer to USM memory object ur_usm_alloc_info_t @@ -223,3 +224,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( } return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/virtual_mem.cpp b/source/adapters/level_zero/virtual_mem.cpp index e3b90121a1..e89899ded7 100644 --- a/source/adapters/level_zero/virtual_mem.cpp +++ b/source/adapters/level_zero/virtual_mem.cpp @@ -15,7 +15,9 @@ #include "physical_mem.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( +namespace ur::level_zero { + +ur_result_t urVirtualMemGranularityGetInfo( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_virtual_mem_granularity_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { @@ -39,24 +41,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemReserve(ur_context_handle_t hContext, const void *pStart, - size_t size, void **ppStart) { +ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, + const void *pStart, size_t size, + void **ppStart) { ZE2UR_CALL(zeVirtualMemReserve, (hContext->ZeContext, pStart, size, ppStart)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree( - ur_context_handle_t hContext, const void *pStart, size_t size) { +ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart, + size_t size) { ZE2UR_CALL(zeVirtualMemFree, (hContext->ZeContext, pStart, size)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, - size_t size, ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_access_flags_t flags) { ze_memory_access_attribute_t AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_NONE; if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE; @@ -69,10 +71,10 @@ urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, - ur_physical_mem_handle_t hPhysicalMem, size_t offset, - ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_physical_mem_handle_t hPhysicalMem, + size_t offset, + ur_virtual_mem_access_flags_t flags) { ze_memory_access_attribute_t AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_NONE; if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE; @@ -86,17 +88,18 @@ urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap( - ur_context_handle_t hContext, const void *pStart, size_t size) { +ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart, + size_t size) { ZE2UR_CALL(zeVirtualMemUnmap, (hContext->ZeContext, pStart, size)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( - ur_context_handle_t hContext, const void *pStart, - [[maybe_unused]] size_t size, ur_virtual_mem_info_t propName, - size_t propSize, void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, + const void *pStart, + [[maybe_unused]] size_t size, + ur_virtual_mem_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { @@ -119,3 +122,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index fe197849b6..b2db1f5bcd 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -54,7 +54,7 @@ if(UR_OPENCL_INCLUDE_DIR) else() FetchContent_Declare(OpenCL-Headers GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-Headers.git" - GIT_TAG main + GIT_TAG 1e193332d02e27e15812d24ff2a3a7a908eb92a3 ) FetchContent_MakeAvailable(OpenCL-Headers) FetchContent_GetProperties(OpenCL-Headers diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index a31d6580a0..071a3a7c5a 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -825,12 +825,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: case UR_DEVICE_INFO_LOCAL_MEM_TYPE: case UR_DEVICE_INFO_EXECUTION_CAPABILITIES: - case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: - case UR_DEVICE_INFO_USM_HOST_SUPPORT: - case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: - case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: - case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: - case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { + case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: { /* CL type: cl_bitfield / enum * UR type: ur_flags_t (uint32_t) */ @@ -844,6 +839,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * types are uint32_t */ return ReturnValue(static_cast(CLValue)); } + case UR_DEVICE_INFO_USM_HOST_SUPPORT: + case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: + case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT: + case UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT: { + /* CL type: cl_bitfield / enum + * UR type: ur_flags_t (uint32_t) */ + bool Supported = false; + UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_unified_shared_memory"}, Supported)); + if (Supported) { + cl_bitfield CLValue = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + sizeof(cl_bitfield), &CLValue, nullptr)); + return ReturnValue(static_cast(CLValue)); + } else { + return ReturnValue(0); + } + } case UR_DEVICE_INFO_IMAGE_SUPPORTED: case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY: @@ -918,8 +934,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_VERSION: case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: case UR_DEVICE_INFO_BUILT_IN_KERNELS: - case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: - case UR_DEVICE_INFO_IP_VERSION: { + case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: { /* We can just use the OpenCL outputs because the sizes of OpenCL types * are the same as UR. * | CL | UR | Size | @@ -937,7 +952,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } + case UR_DEVICE_INFO_IP_VERSION: { + bool Supported; + UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_device_attribute_query"}, Supported)); + if (!Supported) { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + propSize, pPropValue, pPropSizeRet)); + + return UR_RESULT_SUCCESS; + } + case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { + bool isExtensionSupported; + if (cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_required_subgroup_size"}, + isExtensionSupported) != UR_RESULT_SUCCESS || + !isExtensionSupported) { + std::vector aThreadIsItsOwnSubGroup({1}); + return ReturnValue(aThreadIsItsOwnSubGroup.data(), + aThreadIsItsOwnSubGroup.size()); + } + // Have to convert size_t to uint32_t size_t SubGroupSizesSize = 0; CL_RETURN_ON_FAILURE( diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index 01f6a34325..0cb3777601 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -36,6 +36,13 @@ FetchContent_Declare(unified-memory-framework GIT_TAG ${UMF_TAG} ) +if (UR_STATIC_ADAPTER_L0) + if (UMF_BUILD_SHARED_LIBRARY) + message(STATUS "Static adapter is not compatible with shared UMF, switching to fully statically linked UMF") + set(UMF_BUILD_SHARED_LIBRARY OFF) + endif() +endif() + set(UMF_BUILD_TESTS OFF CACHE INTERNAL "Build UMF tests") set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "Build UMF examples") # TODO: L0 provider not ready yet @@ -85,9 +92,9 @@ install(TARGETS ur_common add_library(ur_umf INTERFACE) target_sources(ur_umf INTERFACE - umf_helpers.hpp - umf_pools/disjoint_pool_config_parser.cpp - ur_pool_manager.hpp + $ + $ + $ ) add_library(${PROJECT_NAME}::umf ALIAS ur_umf) diff --git a/source/common/latency_tracker.hpp b/source/common/latency_tracker.hpp index 03ed6b6443..bf20e3819f 100644 --- a/source/common/latency_tracker.hpp +++ b/source/common/latency_tracker.hpp @@ -42,6 +42,7 @@ static constexpr double percentiles[numPercentiles] = { 50.0, 90.0, 99.0, 99.9, 99.99, 99.999, 99.9999}; struct latencyValues { + int64_t count; int64_t min; int64_t max; int64_t mean; @@ -54,6 +55,7 @@ using histogram_ptr = static inline latencyValues getValues(const struct hdr_histogram *histogram) { latencyValues values; + values.count = histogram->total_count; values.max = hdr_max(histogram); values.min = hdr_min(histogram); values.mean = static_cast(hdr_mean(histogram)); @@ -92,13 +94,16 @@ class latency_printer { for (auto &[name, histogram] : values) { auto value = getValues(histogram.get()); - logger.log(logger::Level::INFO, - "{},{},{},{},{},{},{},{},{},{},{},{},ns", name, - value.min, value.max, value.mean, value.stddev, - value.percentileValues[0], value.percentileValues[1], - value.percentileValues[2], value.percentileValues[3], - value.percentileValues[4], value.percentileValues[5], - value.percentileValues[6]); + auto f = groupDigits; + logger.log( + logger::Level::INFO, + "{},{},{},{},{},{},{},{},{},{},{},{},{},{},ns", name, + f(value.mean), f(value.percentileValues[0]), + f(value.percentileValues[1]), f(value.percentileValues[2]), + f(value.percentileValues[3]), f(value.percentileValues[4]), + f(value.percentileValues[5]), f(value.percentileValues[6]), + f(value.count), f(value.count * value.mean), f(value.min), + f(value.max), value.stddev); } } @@ -106,7 +111,8 @@ class latency_printer { inline void printHeader() { logger.log(logger::Level::INFO, "Latency histogram:"); logger.log(logger::Level::INFO, - "name,min,max,mean,stdev,p{},p{},p{},p{},p{},p{},p{},unit", + "name,mean,p{},p{},p{},p{},p{},p{}" + ",p{},count,sum,min,max,stdev,unit", percentiles[0], percentiles[1], percentiles[2], percentiles[3], percentiles[4], percentiles[5], percentiles[6]); diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 0475cf31e4..0ede3c93dc 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -60,12 +60,14 @@ int ur_duplicate_fd(int pid, int fd_in); /////////////////////////////////////////////////////////////////////////////// #if defined(_WIN32) #define MAKE_LIBRARY_NAME(NAME, VERSION) NAME ".dll" +#define STATIC_LIBRARY_EXTENSION ".lib" #else #if defined(__APPLE__) #define MAKE_LIBRARY_NAME(NAME, VERSION) "lib" NAME "." VERSION ".dylib" #else #define MAKE_LIBRARY_NAME(NAME, VERSION) "lib" NAME ".so." VERSION #endif +#define STATIC_LIBRARY_EXTENSION ".a" #endif inline std::string create_library_path(const char *name, const char *path) { @@ -478,6 +480,25 @@ template class AtomicSingleton { } }; +template +static inline std::string groupDigits(Numeric numeric) { + auto number = std::to_string(numeric); + std::string sign = numeric >= 0 ? "" : "-"; + auto digits = number.substr(sign.size(), number.size() - sign.size()); + + std::string separated; + + for (size_t i = 0; i < digits.size(); i++) { + separated.push_back(digits[i]); + + if (i != digits.size() - 1 && (digits.size() - i - 1) % 3 == 0) { + separated.push_back('\''); + } + } + + return sign + separated; +} + template Spinlock> AtomicSingleton::instance; #endif /* UR_UTIL_H */ diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index af05c81767..48329cfb37 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -47,6 +47,7 @@ add_library(${PROJECT_NAME}::loader ALIAS ur_loader) target_include_directories(ur_loader PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/layers ) @@ -60,6 +61,13 @@ target_link_libraries(ur_loader PRIVATE ${PROJECT_NAME}::headers ) +if(UR_STATIC_ADAPTER_L0) + target_link_libraries(ur_loader PRIVATE + ur_adapter_level_zero + ) + target_compile_definitions(ur_loader PRIVATE UR_STATIC_ADAPTER_LEVEL_ZERO) +endif() + if(UR_ENABLE_TRACING) target_link_libraries(ur_loader PRIVATE ${TARGET_XPTI}) target_include_directories(ur_loader PRIVATE ${xpti_SOURCE_DIR}/include) diff --git a/source/loader/ur_adapter_registry.hpp b/source/loader/ur_adapter_registry.hpp index 25cd9a9fff..7df799ab1e 100644 --- a/source/loader/ur_adapter_registry.hpp +++ b/source/loader/ur_adapter_registry.hpp @@ -33,6 +33,14 @@ class AdapterRegistry { if (forceLoadedAdaptersOpt.has_value()) { for (const auto &s : forceLoadedAdaptersOpt.value()) { auto path = fs::path(s); + if (path.filename().extension() == STATIC_LIBRARY_EXTENSION) { + logger::warning( + "UR_ADAPTERS_FORCE_LOAD contains a path to a static" + "library {}, it will be skipped", + s); + continue; + } + bool exists = false; try { exists = fs::exists(path); @@ -41,11 +49,12 @@ class AdapterRegistry { } if (exists) { + forceLoaded = true; adaptersLoadPaths.emplace_back( std::vector{std::move(path)}); } else { logger::warning( - "Detected nonexistent path {} in environmental " + "Detected nonexistent path {} in environment " "variable UR_ADAPTERS_FORCE_LOAD", s); } @@ -92,6 +101,8 @@ class AdapterRegistry { size_t size() const noexcept { return adaptersLoadPaths.size(); } + bool adaptersForceLoaded() { return forceLoaded; } + std::vector>::const_iterator begin() const noexcept { return adaptersLoadPaths.begin(); } @@ -152,10 +163,123 @@ class AdapterRegistry { return paths.empty() ? std::nullopt : std::optional(paths); } + ur_result_t readPreFilterODS(std::string platformBackendName) { + // TODO: Refactor this to the common code such that both the prefilter and urDeviceGetSelected use the same functionality. + bool acceptLibrary = true; + std::optional odsEnvMap; + try { + odsEnvMap = getenv_to_map("ONEAPI_DEVICE_SELECTOR", false); + + } catch (...) { + // If the selector is malformed, then we ignore selector and return success. + logger::error("ERROR: missing backend, format of filter = " + "'[!]backend:filterStrings'"); + return UR_RESULT_SUCCESS; + } + logger::debug( + "getenv_to_map parsed env var and {} a map", + (odsEnvMap.has_value() ? "produced" : "failed to produce")); + + // if the ODS env var is not set at all, then pretend it was set to the default + using EnvVarMap = std::map>; + EnvVarMap mapODS = + odsEnvMap.has_value() ? odsEnvMap.value() : EnvVarMap{{"*", {"*"}}}; + for (auto &termPair : mapODS) { + std::string backend = termPair.first; + // TODO: Figure out how to process all ODS errors rather than returning + // on the first error. + if (backend.empty()) { + // FIXME: never true because getenv_to_map rejects this case + // malformed term: missing backend -- output ERROR, then continue + logger::error("ERROR: missing backend, format of filter = " + "'[!]backend:filterStrings'"); + continue; + } + logger::debug("ONEAPI_DEVICE_SELECTOR Pre-Filter with backend '{}' " + "and platform library name '{}'", + backend, platformBackendName); + enum FilterType { + AcceptFilter, + DiscardFilter, + } termType = + (backend.front() != '!') ? AcceptFilter : DiscardFilter; + logger::debug( + "termType is {}", + (termType != AcceptFilter ? "DiscardFilter" : "AcceptFilter")); + if (termType != AcceptFilter) { + logger::debug("DEBUG: backend was '{}'", backend); + backend.erase(backend.cbegin()); + logger::debug("DEBUG: backend now '{}'", backend); + } + + // Verify that the backend string is valid, otherwise ignore the backend. + if ((strcmp(backend.c_str(), "*") != 0) && + (strcmp(backend.c_str(), "level_zero") != 0) && + (strcmp(backend.c_str(), "opencl") != 0) && + (strcmp(backend.c_str(), "cuda") != 0) && + (strcmp(backend.c_str(), "hip") != 0)) { + logger::debug("ONEAPI_DEVICE_SELECTOR Pre-Filter with illegal " + "backend '{}' ", + backend); + continue; + } + + // case-insensitive comparison by converting both tolower + std::transform(platformBackendName.begin(), + platformBackendName.end(), + platformBackendName.begin(), + [](unsigned char c) { return std::tolower(c); }); + std::transform(backend.begin(), backend.end(), backend.begin(), + [](unsigned char c) { return std::tolower(c); }); + std::size_t nameFound = platformBackendName.find(backend); + + bool backendFound = nameFound != std::string::npos; + if (termType == AcceptFilter) { + if (backend.front() != '*' && !backendFound) { + logger::debug( + "The ONEAPI_DEVICE_SELECTOR backend name '{}' was not " + "found in the platform library name '{}'", + backend, platformBackendName); + acceptLibrary = false; + continue; + } else if (backend.front() == '*' || backendFound) { + return UR_RESULT_SUCCESS; + } + } else { + if (backendFound || backend.front() == '*') { + acceptLibrary = false; + logger::debug( + "The ONEAPI_DEVICE_SELECTOR backend name for discard " + "'{}' was found in the platform library name '{}'", + backend, platformBackendName); + continue; + } + } + } + if (acceptLibrary) { + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_INVALID_VALUE; + } + void discoverKnownAdapters() { auto searchPathsEnvOpt = getEnvAdapterSearchPaths(); auto loaderLibPathOpt = getLoaderLibPath(); +#if defined(_WIN32) + bool loaderPreFilter = getenv_tobool("UR_LOADER_PRELOAD_FILTER", false); +#else + bool loaderPreFilter = getenv_tobool("UR_LOADER_PRELOAD_FILTER", true); +#endif for (const auto &adapterName : knownAdapterNames) { + + if (loaderPreFilter) { + if (readPreFilterODS(adapterName) != UR_RESULT_SUCCESS) { + logger::debug("The adapter '{}' was removed based on the " + "pre-filter from ONEAPI_DEVICE_SELECTOR.", + adapterName); + continue; + } + } std::vector loadPaths; // Adapter search order: @@ -183,6 +307,8 @@ class AdapterRegistry { } } + bool forceLoaded = false; + public: void enableMock() { adaptersLoadPaths.clear(); diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 56e16b769d..26f55c071f 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -8631,6 +8631,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8689,6 +8694,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8781,6 +8791,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8875,6 +8890,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8936,6 +8956,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9019,6 +9044,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9081,6 +9111,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9142,6 +9177,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9213,6 +9253,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9269,6 +9314,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9333,6 +9383,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9390,6 +9445,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9451,6 +9511,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9522,6 +9587,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9579,6 +9649,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9640,6 +9715,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9699,6 +9779,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9760,6 +9845,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9815,6 +9905,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9875,6 +9970,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9937,6 +10037,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } diff --git a/source/loader/ur_loader.cpp b/source/loader/ur_loader.cpp index bfc9da3e50..f2b43f2725 100644 --- a/source/loader/ur_loader.cpp +++ b/source/loader/ur_loader.cpp @@ -8,13 +8,24 @@ * */ #include "ur_loader.hpp" +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +#include "adapters/level_zero/ur_interface_loader.hpp" +#endif namespace ur_loader { /////////////////////////////////////////////////////////////////////////////// context_t *getContext() { return context_t::get_direct(); } -/////////////////////////////////////////////////////////////////////////////// ur_result_t context_t::init() { +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO + // If the adapters were force loaded, it means the user wants to use + // a specific adapter library. Don't load any static adapters. + if (!adapter_registry.adaptersForceLoaded()) { + auto &level_zero = platforms.emplace_back(nullptr); + ur::level_zero::urAdapterGetDdiTables(&level_zero.dditable.ur); + } +#endif + for (const auto &adapterPaths : adapter_registry) { for (const auto &path : adapterPaths) { auto handle = LibLoader::loadAdapterLibrary(path.string().c_str()); diff --git a/test/adapters/level_zero/CMakeLists.txt b/test/adapters/level_zero/CMakeLists.txt index f372dc655d..b1c34b8916 100644 --- a/test/adapters/level_zero/CMakeLists.txt +++ b/test/adapters/level_zero/CMakeLists.txt @@ -40,11 +40,12 @@ if(UR_BUILD_ADAPTER_L0) generate_device_binaries kernel_names_header) endif() - if(NOT WIN32) + if(NOT WIN32 AND NOT UR_STATIC_ADAPTER_L0) # Make L0 use CallMap from a seprate shared lib so that we can access the map # from the tests. This only seems to work on linux add_library(zeCallMap SHARED zeCallMap.cpp) target_compile_definitions(ur_adapter_level_zero PRIVATE UR_L0_CALL_COUNT_IN_TESTS) + # TODO: stop exporting internals like this for tests... target_link_libraries(ur_adapter_level_zero PRIVATE zeCallMap) add_adapter_test(level_zero_ze_calls diff --git a/test/adapters/level_zero/v2/CMakeLists.txt b/test/adapters/level_zero/v2/CMakeLists.txt index 95f1f40902..f1c88a35ee 100644 --- a/test/adapters/level_zero/v2/CMakeLists.txt +++ b/test/adapters/level_zero/v2/CMakeLists.txt @@ -35,6 +35,10 @@ add_unittest(level_zero_command_list_cache add_unittest(level_zero_event_pool event_pool_test.cpp + ${PROJECT_SOURCE_DIR}/source/ur/ur.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/adapter.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/device.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/platform.cpp ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool.cpp ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool_cache.cpp ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_provider_normal.cpp diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index 85457bea97..c144ac5fa2 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -159,37 +159,17 @@ struct urUpdatableCommandBufferExpExecutionTest ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, &updatable_cmd_buf_handle)); ASSERT_NE(updatable_cmd_buf_handle, nullptr); - - // Currently there are synchronization issue with immediate submission when used for command buffers. - // So, create queue with batched submission for this test suite if the backend is Level Zero. - if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { - ur_queue_flags_t flags = UR_QUEUE_FLAG_SUBMISSION_BATCHED; - ur_queue_properties_t props = { - /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, - /*.pNext =*/nullptr, - /*.flags =*/flags, - }; - ASSERT_SUCCESS(urQueueCreate(context, device, &props, &queue)); - ASSERT_NE(queue, nullptr); - } else { - queue = urCommandBufferExpExecutionTest::queue; - } } void TearDown() override { if (updatable_cmd_buf_handle) { EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); } - if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO && queue) { - ASSERT_SUCCESS(urQueueRelease(queue)); - } - UUR_RETURN_ON_FATAL_FAILURE( urCommandBufferExpExecutionTest::TearDown()); } ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; - ur_queue_handle_t queue = nullptr; }; struct urCommandBufferCommandExpTest diff --git a/test/loader/adapter_registry/CMakeLists.txt b/test/loader/adapter_registry/CMakeLists.txt index 2778ad5c40..6d80430e6c 100644 --- a/test/loader/adapter_registry/CMakeLists.txt +++ b/test/loader/adapter_registry/CMakeLists.txt @@ -51,3 +51,7 @@ add_adapter_reg_search_test(search-order SEARCH_PATH ${TEST_SEARCH_PATH} ENVS "TEST_ADAPTER_SEARCH_PATH=\"${TEST_SEARCH_PATH}\"" "TEST_CUR_SEARCH_PATH=\"${TEST_BIN_PATH}\"" SOURCES search_order.cpp) + +add_adapter_reg_search_test(prefilter + SEARCH_PATH "" + SOURCES prefilter.cpp) diff --git a/test/loader/adapter_registry/fixtures.hpp b/test/loader/adapter_registry/fixtures.hpp index 79a831d40f..da5c963e8a 100644 --- a/test/loader/adapter_registry/fixtures.hpp +++ b/test/loader/adapter_registry/fixtures.hpp @@ -74,5 +74,49 @@ struct adapterRegSearchTest : ::testing::Test { } } }; +#ifndef _WIN32 +struct adapterPreFilterTest : ::testing::Test { + ur_loader::AdapterRegistry *registry; + const fs::path levelzeroLibName = + MAKE_LIBRARY_NAME("ur_adapter_level_zero", "0"); + std::function islevelzeroLibName = + [this](const fs::path &path) { return path == levelzeroLibName; }; + + std::function &)> haslevelzeroLibName = + [this](const std::vector &paths) { + return std::any_of(paths.cbegin(), paths.cend(), + islevelzeroLibName); + }; + + const fs::path openclLibName = MAKE_LIBRARY_NAME("ur_adapter_opencl", "0"); + std::function isOpenclLibName = + [this](const fs::path &path) { return path == openclLibName; }; + + std::function &)> hasOpenclLibName = + [this](const std::vector &paths) { + return std::any_of(paths.cbegin(), paths.cend(), isOpenclLibName); + }; + + const fs::path cudaLibName = MAKE_LIBRARY_NAME("ur_adapter_cuda", "0"); + std::function isCudaLibName = + [this](const fs::path &path) { return path == cudaLibName; }; + + std::function &)> hasCudaLibName = + [this](const std::vector &paths) { + return std::any_of(paths.cbegin(), paths.cend(), isCudaLibName); + }; + + void SetUp(std::string filter) { + try { + setenv("ONEAPI_DEVICE_SELECTOR", filter.c_str(), 1); + registry = new ur_loader::AdapterRegistry; + } catch (const std::invalid_argument &e) { + FAIL() << e.what(); + } + } + void SetUp() override {} + void TearDown() override { delete registry; } +}; +#endif #endif // UR_ADAPTER_REG_TEST_HELPERS_H diff --git a/test/loader/adapter_registry/prefilter.cpp b/test/loader/adapter_registry/prefilter.cpp new file mode 100644 index 0000000000..1d2b095da3 --- /dev/null +++ b/test/loader/adapter_registry/prefilter.cpp @@ -0,0 +1,140 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "fixtures.hpp" + +#ifndef _WIN32 + +TEST_F(adapterPreFilterTest, testPrefilterAcceptFilterSingleBackend) { + SetUp("level_zero:*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_FALSE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterAcceptFilterMultipleBackends) { + SetUp("level_zero:*;opencl:*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterDiscardFilterSingleBackend) { + SetUp("!level_zero:*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_FALSE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_TRUE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterDiscardFilterMultipleBackends) { + SetUp("!level_zero:*;!cuda:*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_FALSE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterAcceptAndDiscardFilter) { + SetUp("!cuda:*;level_zero:*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_FALSE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterDiscardFilterAll) { + SetUp("*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_TRUE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterWithInvalidMissingBackend) { + SetUp(":garbage"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_TRUE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterWithInvalidBackend) { + SetUp("garbage:0"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_TRUE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_TRUE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterWithNotAllAndAcceptFilter) { + SetUp("!*;level_zero"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_TRUE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_FALSE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +TEST_F(adapterPreFilterTest, testPrefilterWithNotAllFilter) { + SetUp("!*"); + auto levelZeroExists = + std::any_of(registry->cbegin(), registry->cend(), haslevelzeroLibName); + EXPECT_FALSE(levelZeroExists); + auto openclExists = + std::any_of(registry->cbegin(), registry->cend(), hasOpenclLibName); + EXPECT_FALSE(openclExists); + auto cudaExists = + std::any_of(registry->cbegin(), registry->cend(), hasCudaLibName); + EXPECT_FALSE(cudaExists); +} + +#endif diff --git a/test/unit/utils/CMakeLists.txt b/test/unit/utils/CMakeLists.txt index a0e0fd3ef7..62681b1032 100644 --- a/test/unit/utils/CMakeLists.txt +++ b/test/unit/utils/CMakeLists.txt @@ -13,3 +13,6 @@ add_unit_test(params add_unit_test(print print.cpp) + +add_unit_test(helpers + helpers.cpp) diff --git a/test/unit/utils/helpers.cpp b/test/unit/utils/helpers.cpp new file mode 100644 index 0000000000..87223b21cc --- /dev/null +++ b/test/unit/utils/helpers.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +#include "ur_util.hpp" + +TEST(groupDigits, Success) { + EXPECT_EQ(groupDigits(-1), "-1"); + EXPECT_EQ(groupDigits(-12), "-12"); + EXPECT_EQ(groupDigits(-123), "-123"); + EXPECT_EQ(groupDigits(-1234), "-1'234"); + EXPECT_EQ(groupDigits(-12345), "-12'345"); + EXPECT_EQ(groupDigits(-123456), "-123'456"); + EXPECT_EQ(groupDigits(-1234567), "-1'234'567"); + EXPECT_EQ(groupDigits(-12345678), "-12'345'678"); + + EXPECT_EQ(groupDigits(0), "0"); + EXPECT_EQ(groupDigits(1), "1"); + EXPECT_EQ(groupDigits(12), "12"); + EXPECT_EQ(groupDigits(123), "123"); + EXPECT_EQ(groupDigits(1234), "1'234"); + EXPECT_EQ(groupDigits(12345), "12'345"); + EXPECT_EQ(groupDigits(123456), "123'456"); + EXPECT_EQ(groupDigits(1234567), "1'234'567"); + EXPECT_EQ(groupDigits(12345678), "12'345'678"); +} diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 7ee9c0ee45..9975d59353 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -22,7 +22,7 @@ pyparsing==2.4.5 pytest>=7.0 pytz==2019.3 PyYAML==6.0.1 -requests==2.32.0 +requests==2.32.2 rst2pdf==0.98 six==1.13.0 snowballstemmer==2.0.0