diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index a115d636b3..f67c6174b3 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -215,7 +215,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, int Major = 0; UR_CHECK_ERROR(cuDeviceGetAttribute( &Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get())); - uint64_t Capabilities = + ur_memory_scope_capability_flags_t Capabilities = (Major >= 7) ? UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP | @@ -270,7 +270,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, int WarpSize = 0; UR_CHECK_ERROR(cuDeviceGetAttribute( &WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, hDevice->get())); - size_t Sizes[1] = {static_cast(WarpSize)}; + uint32_t Sizes[1] = {static_cast(WarpSize)}; return ReturnValue(Sizes, 1); } case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: { diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 839fee841d..eac42d3039 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -190,7 +190,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, int WarpSize = 0; UR_CHECK_ERROR(hipDeviceGetAttribute(&WarpSize, hipDeviceAttributeWarpSize, hDevice->get())); - size_t Sizes[1] = {static_cast(WarpSize)}; + uint32_t Sizes[1] = {static_cast(WarpSize)}; return ReturnValue(Sizes, 1); } case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: { @@ -792,9 +792,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // Because scopes are hierarchical, wider scopes support all narrower // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) - uint64_t Capabilities = UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; + ur_memory_scope_capability_flags_t Capabilities = + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; return ReturnValue(Capabilities); } case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 19748ee7c6..087d459506 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -626,11 +626,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(static_cast(false)); } case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { - // ze_device_compute_properties.subGroupSizes is in uint32_t whereas the - // expected return is size_t datatype. size_t can be 8 bytes of data. - return ReturnValue.template operator()( - Device->ZeDeviceComputeProperties->subGroupSizes, - Device->ZeDeviceComputeProperties->numSubGroupSizes); + return ReturnValue(Device->ZeDeviceComputeProperties->subGroupSizes, + Device->ZeDeviceComputeProperties->numSubGroupSizes); } case UR_DEVICE_INFO_IL_VERSION: { // Set to a space separated list of IL version strings of the form diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index ac79b71876..0c03c727f3 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -910,7 +910,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: case UR_DEVICE_INFO_BUILT_IN_KERNELS: case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: - case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: case UR_DEVICE_INFO_IP_VERSION: { /* We can just use the OpenCL outputs because the sizes of OpenCL types * are the same as UR. @@ -929,6 +928,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } + case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { + // Have to convert size_t to uint32_t + size_t SubGroupSizesSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, 0, + nullptr, &SubGroupSizesSize)); + std::vector SubGroupSizes(SubGroupSizesSize / sizeof(size_t)); + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, + SubGroupSizesSize, SubGroupSizes.data(), nullptr)); + return ReturnValue.template operator()(SubGroupSizes.data(), + SubGroupSizes.size()); + } case UR_DEVICE_INFO_EXTENSIONS: { cl_device_id Dev = cl_adapter::cast(hDevice); size_t ExtSize = 0;