Skip to content

[UR][L0] Add support for querying the Vector Width Size Properties #19159

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions unified-runtime/scripts/core/LEVEL_ZERO.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ Environment Variables
| | The wait-event path relies on | the immediate append path only for some devices when the | |
| | zeCommandQueueExecuteCommandLists() | pre-requisites are met. | |
+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+
| UR_L0_VECTOR_WIDTH_SIZE | Specifies the size (in bits) of the vector width supported | Any positive integer: Indicates the maximum number of data | Device-specific |
| | by the Level Zero device. This value indicates the maximum | elements that can be processed simultaneously in a single | |
| | number of data elements that can be processed simultaneously | instruction. The value entered by user user is the desired | |
| | in a single instruction, which is useful for optimizing | width size to config. If this width size is not supported, | |
| | data-parallel workloads and understanding device caps. | then the default "max" will be used. | |
+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+
Contributors
------------

Expand Down
5 changes: 5 additions & 0 deletions unified-runtime/source/adapters/level_zero/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,11 @@ ze_structure_type_t getZeStructureType<ze_device_ip_version_ext_t>() {
return ZE_STRUCTURE_TYPE_DEVICE_IP_VERSION_EXT;
}
template <>
ze_structure_type_t
getZeStructureType<ze_device_vector_width_properties_ext_t>() {
return ZE_STRUCTURE_TYPE_DEVICE_VECTOR_WIDTH_PROPERTIES_EXT;
}
template <>
ze_structure_type_t getZeStructureType<ze_device_memory_access_properties_t>() {
return ZE_STRUCTURE_TYPE_DEVICE_MEMORY_ACCESS_PROPERTIES;
}
Expand Down
7 changes: 7 additions & 0 deletions unified-runtime/source/adapters/level_zero/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ const int UrL0LeaksDebug = [] {
return std::atoi(UrRet);
}();

const int UrL0VectorWidth = [] {
const char *UrRet = std::getenv("UR_L0_VECTOR_WIDTH_SIZE");
if (!UrRet)
return 0;
return std::atoi(UrRet);
}();

// Enable for UR L0 Adapter to Init all L0 Drivers on the system with filtering
// in place for only currently used Drivers.
const int UrL0InitAllDrivers = [] {
Expand Down
96 changes: 88 additions & 8 deletions unified-runtime/source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -690,35 +690,54 @@ ur_result_t urDeviceGetInfo(
case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE:
return ReturnValue(
size_t{Device->ZeDeviceImageProperties->maxImageArraySlices});
// Handle SIMD widths, matching compute-runtime OpenCL implementation:
// https://github.com/intel/compute-runtime/blob/291745cdf76d83f5dc40e7ef41d347366235ccdb/opencl/source/cl_device/cl_device_caps.cpp#L236
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR:
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_char);
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR:
return ReturnValue(uint32_t{16});
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_char);
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT:
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_short);
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT:
return ReturnValue(uint32_t{8});
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_short);
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT:
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_int);
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT:
return ReturnValue(uint32_t{4});
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_int);
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG:
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_long);
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG:
return ReturnValue(uint32_t{1});
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_long);
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT:
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_float);
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT:
return ReturnValue(uint32_t{1});
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_float);
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE:
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE:
// Must return 0 for *vector_width_double* if the device does not have fp64.
if (!(Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP64))
return ReturnValue(uint32_t{0});
return ReturnValue(uint32_t{1});
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF:
// Must return 0 for *vector_width_half* if the device does not have fp16.
if (!(Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16))
return ReturnValue(uint32_t{0});
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_half);
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF:
// Must return 0 for *vector_width_half* if the device does not have fp16.
if (!(Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16))
return ReturnValue(uint32_t{0});
return ReturnValue(uint32_t{8});
return ReturnValue(
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_half);
case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: {
// Max_num_sub_Groups = maxTotalGroupSize/min(set of subGroupSizes);
uint32_t MinSubGroupSize =
Expand Down Expand Up @@ -1857,6 +1876,67 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
};
#endif // ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME

auto UrPlatform = this->Platform;
ZeDeviceVectorWidthPropertiesExt.Compute =
[ZeDevice, UrPlatform](
ZeStruct<ze_device_vector_width_properties_ext_t> &Properties) {
// Set default vector width properties
Properties.preferred_vector_width_char = 16u;
Properties.preferred_vector_width_short = 8u;
Properties.preferred_vector_width_int = 4u;
Properties.preferred_vector_width_long = 1u;
Properties.preferred_vector_width_float = 1u;
Properties.preferred_vector_width_half = 8u;
Properties.native_vector_width_char = 16u;
Properties.native_vector_width_short = 8u;
Properties.native_vector_width_int = 4u;
Properties.native_vector_width_long = 1u;
Properties.native_vector_width_float = 1u;
Properties.native_vector_width_half = 8u;

if (UrPlatform->zeDriverExtensionMap.count(
ZE_DEVICE_VECTOR_SIZES_EXT_NAME)) {
uint32_t Count = 0;
ZE_CALL_NOCHECK(zeDeviceGetVectorWidthPropertiesExt,
(ZeDevice, &Count, nullptr));

std::vector<ZeStruct<ze_device_vector_width_properties_ext_t>>
PropertiesVector;
PropertiesVector.reserve(Count);

ZeStruct<ze_device_vector_width_properties_ext_t>
MaxVectorWidthProperties;

ZE_CALL_NOCHECK(zeDeviceGetVectorWidthPropertiesExt,
(ZeDevice, &Count, PropertiesVector.data()));
if (!PropertiesVector.empty()) {
// Find the largest vector_width_size property
uint32_t max_vector_width_size = 0;
for (const auto &prop : PropertiesVector) {
if (!max_vector_width_size) {
max_vector_width_size = prop.vector_width_size;
MaxVectorWidthProperties = prop;
} else if (prop.vector_width_size > max_vector_width_size) {
max_vector_width_size = prop.vector_width_size;
MaxVectorWidthProperties = prop;
}
}
Properties = MaxVectorWidthProperties;
// If the environment variable is set, use the specified vector
// width if it exists
if (UrL0VectorWidth) {
for (const auto &prop : PropertiesVector) {
if (prop.vector_width_size ==
static_cast<uint32_t>(UrL0VectorWidth)) {
Properties = prop;
break;
}
}
}
}
}
};

ImmCommandListUsed = this->useImmediateCommandLists();

uint32_t numQueueGroups = 0;
Expand Down
2 changes: 2 additions & 0 deletions unified-runtime/source/adapters/level_zero/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ struct ur_device_handle_t_ : ur_object {
ZeCache<ZeStruct<ze_intel_device_block_array_exp_properties_t>>
ZeDeviceBlockArrayProperties;
#endif // ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME
ZeCache<ZeStruct<ze_device_vector_width_properties_ext_t>>
ZeDeviceVectorWidthPropertiesExt;

// Map device bindless image offset to corresponding host image handle.
std::unordered_map<ur_exp_image_native_handle_t, ze_image_handle_t>
Expand Down