From 775d7c8ae595655806eed7ce331c870da42280bc Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Tue, 1 Apr 2025 18:13:30 +0100 Subject: [PATCH 1/3] Add support for raw_kernel_arg extension --- dpctl/__init__.py | 2 + dpctl/_backend.pxd | 14 + dpctl/_sycl_queue.pxd | 11 + dpctl/_sycl_queue.pyx | 111 ++++++ dpctl/sycl.pxd | 11 + dpctl/tests/input_files/raw-arg-kernel.spv | Bin 0 -> 1640 bytes dpctl/tests/test_raw_kernel_arg.py | 112 ++++++ dpctl/tests/test_sycl_kernel_submit.py | 1 + .../syclinterface/dpctl_sycl_enum_types.h | 1 + .../dpctl_sycl_extension_interface.h | 30 ++ .../syclinterface/dpctl_sycl_type_casters.hpp | 2 + .../source/dpctl_sycl_extension_interface.cpp | 32 ++ .../source/dpctl_sycl_queue_interface.cpp | 15 +- libsyclinterface/tests/CMakeLists.txt | 3 + .../tests/raw_kernel_arg_kernel_fp64.spv | Bin 0 -> 1400 bytes .../raw_kernel_arg_kernel_inttys_fp32.spv | Bin 0 -> 7900 bytes .../test_sycl_queue_submit_raw_kernel_arg.cpp | 377 ++++++++++++++++++ 17 files changed, 721 insertions(+), 1 deletion(-) create mode 100644 dpctl/tests/input_files/raw-arg-kernel.spv create mode 100644 dpctl/tests/test_raw_kernel_arg.py create mode 100644 libsyclinterface/tests/raw_kernel_arg_kernel_fp64.spv create mode 100644 libsyclinterface/tests/raw_kernel_arg_kernel_inttys_fp32.spv create mode 100644 libsyclinterface/tests/test_sycl_queue_submit_raw_kernel_arg.cpp diff --git a/dpctl/__init__.py b/dpctl/__init__.py index 3a80c68a71..dd7b8f9ac5 100644 --- a/dpctl/__init__.py +++ b/dpctl/__init__.py @@ -50,6 +50,7 @@ from ._sycl_platform import SyclPlatform, get_platforms, lsplatform from ._sycl_queue import ( LocalAccessor, + RawKernelArg, SyclKernelInvalidRangeError, SyclKernelSubmitError, SyclQueue, @@ -106,6 +107,7 @@ "SyclQueueCreationError", "WorkGroupMemory", "LocalAccessor", + "RawKernelArg", ] __all__ += [ "get_device_cached_queue", diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index 7464d311c6..ebda950a41 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -71,6 +71,7 @@ cdef extern from "syclinterface/dpctl_sycl_enum_types.h": _VOID_PTR "DPCTL_VOID_PTR", _LOCAL_ACCESSOR "DPCTL_LOCAL_ACCESSOR", _WORK_GROUP_MEMORY "DPCTL_WORK_GROUP_MEMORY" + _RAW_KERNEL_ARG "DPCTL_RAW_KERNEL_ARG" ctypedef enum _queue_property_type "DPCTLQueuePropertyType": _DEFAULT_PROPERTY "DPCTL_DEFAULT_PROPERTY" @@ -571,3 +572,16 @@ cdef extern from "syclinterface/dpctl_sycl_extension_interface.h": DPCTLSyclWorkGroupMemoryRef Ref) cdef bint DPCTLWorkGroupMemory_Available() + + cdef struct RawKernelArgDataTy + ctypedef RawKernelArgDataTy RawKernelArgData + + cdef struct DPCTLOpaqueRawKernelArg + ctypedef DPCTLOpaqueRawKernelArg *DPCTLSyclRawKernelArgRef; + + cdef DPCTLSyclRawKernelArgRef DPCTLRawKernelArg_Create(void* bytes, size_t count); + + cdef void DPCTLRawKernelArg_Delete( + DPCTLSyclRawKernelArgRef Ref); + + cdef bint DPCTLRawKernelArg_Available(); diff --git a/dpctl/_sycl_queue.pxd b/dpctl/_sycl_queue.pxd index 5836bc95de..9469415cb8 100644 --- a/dpctl/_sycl_queue.pxd +++ b/dpctl/_sycl_queue.pxd @@ -25,6 +25,7 @@ from libcpp cimport bool as cpp_bool from ._backend cimport ( DPCTLSyclDeviceRef, DPCTLSyclQueueRef, + DPCTLSyclRawKernelArgRef, DPCTLSyclWorkGroupMemoryRef, _arg_data_type, ) @@ -115,3 +116,13 @@ cdef public api class WorkGroupMemory(_WorkGroupMemory) [ object PyWorkGroupMemoryObject, type PyWorkGroupMemoryType ]: pass + +cdef public api class _RawKernelArg [ + object Py_RawKernelArgObject, type Py_RawKernelArgType +]: + cdef DPCTLSyclRawKernelArgRef _arg_ref + +cdef public api class RawKernelArg(_RawKernelArg) [ + object PyRawKernelArgObject, type PyRawKernelArgType +]: + pass diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index e04658dc44..1cf417c79e 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -51,6 +51,9 @@ from ._backend cimport ( # noqa: E211 DPCTLQueue_SubmitNDRange, DPCTLQueue_SubmitRange, DPCTLQueue_Wait, + DPCTLRawKernelArg_Available, + DPCTLRawKernelArg_Create, + DPCTLRawKernelArg_Delete, DPCTLSyclContextRef, DPCTLSyclDeviceSelectorRef, DPCTLSyclEventRef, @@ -364,6 +367,15 @@ cdef class _kernel_arg_type: _arg_data_type._WORK_GROUP_MEMORY ) + @property + def dpctl_raw_kernel_arg(self): + cdef str p_name = "dpctl_raw_kernel_arg" + return kernel_arg_type_attribute( + self._name, + p_name, + _arg_data_type._RAW_KERNEL_ARG + ) + kernel_arg_type = _kernel_arg_type() @@ -973,6 +985,9 @@ cdef class SyclQueue(_SyclQueue): elif isinstance(arg, LocalAccessor): kargs[idx] = ((arg).addressof()) kargty[idx] = _arg_data_type._LOCAL_ACCESSOR + elif isinstance(arg, RawKernelArg): + kargs[idx] = (arg._ref) + kargty[idx] = _arg_data_type._RAW_KERNEL_ARG else: ret = -1 return ret @@ -1738,3 +1753,99 @@ cdef class WorkGroupMemory: """ def __get__(self): return self._mem_ref + + +cdef class _RawKernelArg: + def __dealloc(self): + if(self._arg_ref): + DPCTLRawKernelArg_Delete(self._arg_ref) + + +cdef class RawKernelArg: + """ + RawKernelArg(*args) + Python class representing the ``raw_kernel_arg`` class from the Raw Kernel + Argument oneAPI SYCL extension for passing binary data as data to kernels. + + This class is intended to be used as kernel argument when launching kernels. + + This is based on a DPC++ SYCL extension and only available in newer + versions. Use ``is_available()`` to check availability in your build. + + There are multiple ways to create a ``RawKernelArg``. + + - If the constructor is invoked with just a single argument, this argument + is expected to expose the Python buffer interface. The raw kernel arg will + be constructed from the data in that buffer. + + - If the constructor is invoked with two arguments, the first argument is + interpreted as the number of bytes in the binary argument, while the + second argument is interpreted as a pointer to the data. + + Note that construction of the ``RawKernelArg`` copies the bytes, so + modifications made after construction of the ``RawKernelArg`` will not be + reflected in the kernel launch. + + Args: + args: + Variadic argument, see class documentation. + + Raises: + TypeError: In case of incorrect arguments given to constructurs, + unexpected types of input arguments. + """ + def __cinit__(self, *args): + cdef void* ptr = NULL + cdef size_t count + cdef int ret_code = 0 + cdef Py_buffer _buffer + cdef bint _is_buf + + if not DPCTLRawKernelArg_Available(): + raise RuntimeError("Raw kernel arg extension not available") + + if not (0 < len(args) < 3): + raise TypeError("RawKernelArg constructor takes 1 or 2 " + f"arguments, but {len(args)} were given") + + if len(args) == 1: + if not _is_buffer(args[0]): + raise TypeError("RawKernelArg single argument constructor" + "expects argument to be buffer", + f"but got {type(args[0])}") + + ret_code = PyObject_GetBuffer(args[0], &(_buffer), PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS) + if ret_code != 0: # pragma: no cover + raise RuntimeError("Could not access buffer") + + ptr = _buffer.buf + count = _buffer.len + _is_buf = True + else: + if not isinstance(args[0], numbers.Integral): + raise TypeError("RawKernelArg constructor expects first" + "argument to be `int`, but got {type(args[0])}") + if not isinstance(args[1], numbers.Integral): + raise TypeError("RawKernelArg constructor expects second" + "argument to be `int`, but got {type(args[1])}") + + _is_buf = False + count = args[0] + ptr = (args[1]) + + self._arg_ref = DPCTLRawKernelArg_Create(ptr, count) + if(_is_buf): + PyBuffer_Release(&(_buffer)) + + + """Check whether the raw_kernel_arg extension is available""" + @staticmethod + def is_available(): + return DPCTLRawKernelArg_Available(); + + property _ref: + """Returns the address of the C API ``DPCTLRawKernelArgRef`` pointer + as a ``size_t``. + """ + def __get__(self): + return self._arg_ref diff --git a/dpctl/sycl.pxd b/dpctl/sycl.pxd index f0b6f1eb2a..e49aa6b812 100644 --- a/dpctl/sycl.pxd +++ b/dpctl/sycl.pxd @@ -45,6 +45,8 @@ cdef extern from "sycl/sycl.hpp" namespace "sycl": cdef extern from "syclinterface/dpctl_sycl_extension_interface.h": cdef struct RawWorkGroupMemoryTy ctypedef RawWorkGroupMemoryTy RawWorkGroupMemory + cdef struct RawKernelArgDataTy + ctypedef RawKernelArgDataTy RawKernelArgData cdef extern from "syclinterface/dpctl_sycl_type_casters.hpp" \ namespace "dpctl::syclinterface": @@ -85,3 +87,12 @@ cdef extern from "syclinterface/dpctl_sycl_type_casters.hpp" \ "dpctl::syclinterface::unwrap" ( dpctl_backend.DPCTLSyclWorkGroupMemoryRef ) + + # raw kernel arg extension + cdef dpctl_backend.DPCTLSyclRawKernelArgRef wrap_raw_kernel_arg \ + "dpctl::syclinterface::wrap" \ + (const RawKernelArgData *) + + cdef RawKernelArgData * unwrap_raw_kernel_arg \ + "dpctl::syclinterface::unwrap" ( + dpctl_backend.DPCTLSyclRawKernelArgRef) diff --git a/dpctl/tests/input_files/raw-arg-kernel.spv b/dpctl/tests/input_files/raw-arg-kernel.spv new file mode 100644 index 0000000000000000000000000000000000000000..d819549cdc44e666e8b4d67c8d7a91e971bb23e9 GIT binary patch literal 1640 zcmaKs+fEcg5QYnO76epOKs@3CA{xv_)ToKk7)?N;2}BcbC&Mz}#yvSRi;#FhjPW^q z1fRjD@&@Do^-RMC(ILInf7Rc0>S{{;vx64&*=ZZFo0flPt=|G|k>x@T6mq1H`x9ov zHfV3x-i5DTz6s-GS_QC4raR+O5!1FFX?&5~{{tFA>!)J&beth~vQoa!xY#VyNCj@ybZ*!t|Ki>6 z;id2Eo76UJ7yirh{sOxuUfNx+)~j`w*Nkwj*(q?@X7Q6G(QbWXM-<1IldjoqEP8wc z&c?>w9G7jr=gd7Rm)(t<-ID&;eiwJbOf2gjnXB_|AGkepHQj@6YhQ6VpOb(3J#vrU zKl9@Kv&a7x=Pz(Gap*sA_e*``w5)sMzQLUH7(TNH=I(p>;9fg(fSp-~@3HsqvG)%? zJYO<;-_`KYC;YIVkRSX#)8Yf4kgNdoEhu~J)LhZxTPQdeb9lh1!;$v1QT~*4*gXGBkpu4< zU)X<0N8e_&&{wp6PxL#yBpZF +# +# using namespace sycl; +# +# namespace syclexp = sycl::ext::oneapi::experimental; +# namespace syclext = sycl::ext::oneapi; +# +# using data_t = int32_t; +# +# struct Params { data_t mul; data_t add; }; +# +# extern "C" SYCL_EXTERNAL +# SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) +# void raw_arg_kernel(data_t* in, data_t* out, Params p){ +# auto item = syclext::this_work_item::get_nd_item<1>(); +# size_t global_id = item.get_global_linear_id(); +# out[global_id] = (in[global_id] * p.mul) + p.add; +# } + + +class Params(ctypes.Structure): + _fields_ = [("mul", ctypes.c_int32), ("add", ctypes.c_int32)] + + +def launch_raw_arg_kernel(raw): + if not dpctl.RawKernelArg.is_available(): + pytest.skip("Raw kernel arg extension not supported") + + try: + q = dpctl.SyclQueue("level_zero") + except dpctl.SyclQueueCreationError: + pytest.skip("LevelZero queue could not be created") + spirv_file = get_spirv_abspath("raw-arg-kernel.spv") + with open(spirv_file, "br") as spv: + spv_bytes = spv.read() + prog = dpctl.program.create_program_from_spirv(q, spv_bytes) + kernel = prog.get_sycl_kernel("__sycl_kernel_raw_arg_kernel") + local_size = 16 + global_size = local_size * 8 + + x = dpctl.tensor.ones(global_size, dtype="int32") + y = dpctl.tensor.zeros(global_size, dtype="int32") + x.sycl_queue.wait() + y.sycl_queue.wait() + + try: + q.submit( + kernel, + [ + x.usm_data, + y.usm_data, + raw, + ], + [global_size], + [local_size], + ) + q.wait() + except dpctl._sycl_queue.SyclKernelSubmitError: + pytest.skip(f"Kernel submission to {q.sycl_device} failed") + + assert dpctl.tensor.all(y == 9) + + +def test_submit_raw_kernel_arg_pointer(): + paramStruct = Params(4, 5) + raw = dpctl.RawKernelArg( + ctypes.sizeof(paramStruct), ctypes.addressof(paramStruct) + ) + launch_raw_arg_kernel(raw) + + +def test_submit_raw_kernel_arg_buffer(): + paramStruct = Params(4, 5) + byteArr = bytearray(paramStruct) + raw = dpctl.RawKernelArg(byteArr) + del byteArr + launch_raw_arg_kernel(raw) diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py index e46c4f1760..fc03e7aaf0 100644 --- a/dpctl/tests/test_sycl_kernel_submit.py +++ b/dpctl/tests/test_sycl_kernel_submit.py @@ -280,6 +280,7 @@ def test_kernel_arg_type(): _check_kernel_arg_type_instance(kernel_arg_type.dpctl_void_ptr) _check_kernel_arg_type_instance(kernel_arg_type.dpctl_local_accessor) _check_kernel_arg_type_instance(kernel_arg_type.dpctl_work_group_memory) + _check_kernel_arg_type_instance(kernel_arg_type.dpctl_raw_kernel_arg) def get_spirv_abspath(fn): diff --git a/libsyclinterface/include/syclinterface/dpctl_sycl_enum_types.h b/libsyclinterface/include/syclinterface/dpctl_sycl_enum_types.h index 2c2ff3bc09..799f9d1484 100644 --- a/libsyclinterface/include/syclinterface/dpctl_sycl_enum_types.h +++ b/libsyclinterface/include/syclinterface/dpctl_sycl_enum_types.h @@ -101,6 +101,7 @@ typedef enum DPCTL_VOID_PTR, DPCTL_LOCAL_ACCESSOR, DPCTL_WORK_GROUP_MEMORY, + DPCTL_RAW_KERNEL_ARG, DPCTL_UNSUPPORTED_KERNEL_ARG } DPCTLKernelArgType; diff --git a/libsyclinterface/include/syclinterface/dpctl_sycl_extension_interface.h b/libsyclinterface/include/syclinterface/dpctl_sycl_extension_interface.h index ee4d7d4fbb..76567a011e 100644 --- a/libsyclinterface/include/syclinterface/dpctl_sycl_extension_interface.h +++ b/libsyclinterface/include/syclinterface/dpctl_sycl_extension_interface.h @@ -34,6 +34,9 @@ #include "dpctl_sycl_enum_types.h" #include "dpctl_sycl_types.h" +#include +#include + DPCTL_C_EXTERN_C_BEGIN typedef struct RawWorkGroupMemoryTy @@ -53,4 +56,31 @@ void DPCTLWorkGroupMemory_Delete(__dpctl_take DPCTLSyclWorkGroupMemoryRef Ref); DPCTL_API bool DPCTLWorkGroupMemory_Available(); +typedef class RawKernelArgDataTy +{ +public: + RawKernelArgDataTy(void *bytes, size_t count) : data(count) + { + std::memcpy(data.data(), bytes, count); + } + + void *bytes() { return data.data(); } + size_t count() { return data.size(); } + +private: + std::vector data; +} RawKernelArgData; + +typedef struct DPCTLOpaqueSyclRawKernelArg *DPCTLSyclRawKernelArgRef; + +DPCTL_API +__dpctl_give DPCTLSyclRawKernelArgRef DPCTLRawKernelArg_Create(void *bytes, + size_t count); + +DPCTL_API +void DPCTLRawKernelArg_Delete(__dpctl_take DPCTLSyclRawKernelArgRef Ref); + +DPCTL_API +bool DPCTLRawKernelArg_Available(); + DPCTL_C_EXTERN_C_END diff --git a/libsyclinterface/include/syclinterface/dpctl_sycl_type_casters.hpp b/libsyclinterface/include/syclinterface/dpctl_sycl_type_casters.hpp index 638916f083..f79e0bec1a 100644 --- a/libsyclinterface/include/syclinterface/dpctl_sycl_type_casters.hpp +++ b/libsyclinterface/include/syclinterface/dpctl_sycl_type_casters.hpp @@ -84,6 +84,8 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(std::vector, DEFINE_SIMPLE_CONVERSION_FUNCTIONS(RawWorkGroupMemory, DPCTLSyclWorkGroupMemoryRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(RawKernelArgData, DPCTLSyclRawKernelArgRef) + #endif } // namespace dpctl::syclinterface diff --git a/libsyclinterface/source/dpctl_sycl_extension_interface.cpp b/libsyclinterface/source/dpctl_sycl_extension_interface.cpp index 862be8dded..9dc65f6104 100644 --- a/libsyclinterface/source/dpctl_sycl_extension_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_extension_interface.cpp @@ -62,3 +62,35 @@ bool DPCTLWorkGroupMemory_Available() return false; #endif } + +DPCTL_API +__dpctl_give DPCTLSyclRawKernelArgRef DPCTLRawKernelArg_Create(void *bytes, + size_t count) +{ + DPCTLSyclRawKernelArgRef rka = nullptr; + try { + auto RawKernelArg = std::unique_ptr( + new RawKernelArgData(bytes, count)); + rka = wrap(RawKernelArg.get()); + RawKernelArg.release(); + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } + return rka; +} + +DPCTL_API +void DPCTLRawKernelArg_Delete(__dpctl_take DPCTLSyclRawKernelArgRef Ref) +{ + delete unwrap(Ref); +} + +DPCTL_API +bool DPCTLRawKernelArg_Available() +{ +#ifdef SYCL_EXT_ONEAPI_RAW_KERNEL_ARG + return true; +#else + return false; +#endif +} diff --git a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp index 7fb971d253..988f2b1c0f 100644 --- a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp @@ -42,7 +42,8 @@ #include /* SYCL headers */ #include -#ifdef SYCL_EXT_ONEAPI_WORK_GROUP_MEMORY +#if defined(SYCL_EXT_ONEAPI_WORK_GROUP_MEMORY) || \ + defined(SYCL_EXT_ONEAPI_RAW_KERNEL_ARG) #include "dpctl_sycl_extension_interface.h" #endif @@ -231,6 +232,18 @@ bool set_kernel_arg(handler &cgh, cgh.set_arg(idx, mem); break; } +#endif +#ifdef SYCL_EXT_ONEAPI_RAW_KERNEL_ARG + case DPCTL_RAW_KERNEL_ARG: + { + auto ref = static_cast(Arg); + RawKernelArgData *raw_arg = unwrap(ref); + void *bytes = raw_arg->bytes(); + size_t count = raw_arg->count(); + sycl::ext::oneapi::experimental::raw_kernel_arg arg{bytes, count}; + cgh.set_arg(idx, arg); + break; + } #endif default: arg_set = false; diff --git a/libsyclinterface/tests/CMakeLists.txt b/libsyclinterface/tests/CMakeLists.txt index 36a511bdc6..424340260e 100644 --- a/libsyclinterface/tests/CMakeLists.txt +++ b/libsyclinterface/tests/CMakeLists.txt @@ -17,6 +17,8 @@ set(spirv-test-files local_accessor_kernel_fp64.spv work_group_memory_kernel_fp64.spv work_group_memory_kernel_inttys_fp32.spv + raw_kernel_arg_kernel_fp64.spv + raw_kernel_arg_kernel_inttys_fp32.spv ) foreach(tf ${spirv-test-files}) @@ -53,6 +55,7 @@ add_sycl_to_target( ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_submit.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_submit_local_accessor_arg.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_submit_work_group_memory_arg.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_submit_raw_kernel_arg.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_usm_interface.cpp ) diff --git a/libsyclinterface/tests/raw_kernel_arg_kernel_fp64.spv b/libsyclinterface/tests/raw_kernel_arg_kernel_fp64.spv new file mode 100644 index 0000000000000000000000000000000000000000..b878e7bceb68bc9f87c33c4e9511bfb164731676 GIT binary patch literal 1400 zcma)+T~8B16o#jC0ig&YAAVy26$P`22|*I0L5-{J>Nt3vUY@YXz5^-wERFQt0Cf%2B!n`;%s##S9<77tB z)VL^#KD;}4w0SUyJFO(^CY@-%^}Kob%JouJ4848(S`j9z_)(s=v*YMRzuhUC-A1SP z(RP~M<6dlwcCXu1De=b4gmhkH{kRDCY-W#gXAjhea@g!sWFzqcGgIPph%X5z-69(} ze$C8DpYJZrvz`q%!|gEcrGrk-Qc}65&6U4!x5RZd(Vc>_1k z;D_4RJ!Yoh3m!AmdlK%u;b;8EUv)kAXNPs@|5*D4@!;=BSm1GKaNG41tnH1NS!5H963ttA zP17vFg1lsEsioxwq|z>4y8SKvVy`~$ndieiGp`w2zP=9UoXIY4%I2UwGuk0kWx7_g$23Xd>MJ&M&)+cKH@LdLG~7Q} zSigE%Z}F09FU~uyUEa3FyhZKu%8hxKwacqC=Be%0{S7qc-PkVg`o_Hb+T{&4<~`Xi zZ>TZv&31WPYIzxP&TRMI)*(KbiteXS9vvv{C@dcv7_9UT_Y96~Dh~Dz?-=PTRt83f zd%5RMP3IWta-}rZSIMs{mWo5=DxM7=qeii`wNNZ5FRQsmf3)ULm)}2JDUHYc&q((T zjphfcerGsYx>za|#|Jj=1nkr)I7L|F3CNdABgOpu{NjB7&Pu7MCe+GsU$sUyd`^03 zY;eBU&uY7V$&vNtZ39~>)%DZDsT0Ug_nWQ!;^xhZYWrow9Qi2+r`wMl@SLO^d_jJd z0~P4hf9J?==HOgmN5jEf<)<8Uw_V?wgLxCkPdPYG`E?E^hxzgw&PRSh^*k&Vo^rFq ze&ptS`6)L`<=45%hUN0h8hVZE&ags$Ggm#rj)tpM%1^nvwC(!txy=WL^SOE=`H?K1 zi8acvbCnHi<)@sivmZIRTz<;QM)@fxSISR0xk`RBCtnkGG@M+c{FIYx+pcfT$#oOS zPdF(kzs^ZlD9UdKWo2lJd31e&lPb{FJXD`6*u`@>9M>V&&k&~O{r<~j(KjmbP{FIYhaL-$Lkd_7=4^7WwnI$u-6 zL-O0*d*8_D_~1y9Porud7anbsyHMY&>Yk&8e6ieDELW=Ev4TEzS~oR3_7UqE_sE9F zm7C6QpZs)wPsneaUpDL)R-Z>U93Uq-Lr=+1IeFTCzX-vQMoB6FUe0id0BolC$9)gIeC?wBb;=F-%~?p?8xt&d^ex0>KQsHo$__ae&p*d z`E|aggm>!k7W2=BcNI_je@}ke|NHXO{(JOmt5evSl8O9o>rh^ge)F-0-|Rj57Gd){ z{^Z2x_ed`K{GP+Ft*!l@)=zT7S9B}C9Z>AtCI^MClxkuCt>*qvp!*tk(Nf&EJh8hxIuLb)BzrZP^v7jv zhwL1HE@O|#cF)H43d=~ijJ+)zKDbVjMV$KRoP1(DxA-S0GCAVZ28WwdlyywB@mY6l z^ucE-ljokbtRqH_=&r7>agnXQC#cE(C+go%OH+HkG&#;&Ae()_sl8CzwTV&t0%>w; zv&n{wWmB6P)GWrB#MGX=+k~n*0VJ=J-9b$p@$Az0$5p zj2aJ1lY`6@%db`OT0Oo8_%$pWUYAWCIQ8C;c0FR$J1k9(*L)zG9`P?w-17ums0iz) zNw}ENBK5$Gco$y8dGDVjVK1HsgnV*7 zDIr$lDum~i#k~0&ofyx{e`idp`YRQqC%zl>MVva^=VvA0-gmqD13yJVPkXc$pTE<| z1wSN=o@YtO$w|EKRN3IWD*YmuEc)FXHAhFB{zZnv-*d(UZBH zE1O(!bJ;DMoSek#_`4q5T%ISJwcv2cUSQs@naf3raq)cnT=EUa<~d&?fy?uy;gV%-xcriEV&vu|=5ncQV&)S6uqu$ZTtaM_#Qd(1O|Qi0?PBRm zB;cO&%7{?|fj7=&kA%MA8-JCATyXg2zAlxJ(<|}1)e(np)~%7S7Ti62SvEbuKQ=ku zznTBb6yr+yUn}4Iuagb`mrI-f^|Hau{|4D$-oyM~A*>buUlC4>{G7!6Z|0-ehO3e5ERoUR?|7zJ_?#(<_|Nn(>^M8$Oa>332wX(Ull>h7GgPZ??Y}SIi zhp)?~2l&S($NM+)zgaP^K8eqNf3!DAZ;`iYp?^BHHUWw2D z{n5Th`T+_2KPYYfe-Lr=|B!qz_Lw3e?_ud5N?6Z%#eXznk4U@Lk0S0`kI4sfEzk1D z!dlJpC&G!5pObi&kIN?JS>k(^`(kXbw4bFXWOLud=xwI-PbJ{)?Pn2lZ_EyRzl0b( z@LYf$kbrYPuJ?1<)MGB#kWyYmxIxM zUHXuOxx6J!{ln6Kh&cVeCi{;OV~?!FbNQ2O)_X4R$Od~`;#z-}4enZhkqzcrp37f_ zwVKP{gcBn_C-Gd~l}*fZ!S`I=i?Q_G_r5Uhn;5-)ApLg + +#include +#include +#include +#include +#include + +#include +#include + +namespace +{ +constexpr std::size_t SIZE = 320; + +static_assert(SIZE % 10 == 0); + +using namespace dpctl::syclinterface; + +template struct Params +{ + T mul; + T add; +}; + +template +void submit_kernel(DPCTLSyclQueueRef QRef, + DPCTLSyclKernelBundleRef KBRef, + std::vector spirvBuffer, + std::size_t spirvFileSize, + std::string kernelName) +{ + if (!DPCTLRawKernelArg_Available()) { + GTEST_SKIP() << "Skipping raw_kernel_arg test since the compiler does " + "not support this feature"; + return; + } + + constexpr std::size_t NARGS = 2; + constexpr std::size_t RANGE_NDIMS = 1; + + ASSERT_TRUE(DPCTLKernelBundle_HasKernel(KBRef, kernelName.c_str())); + auto kernel = DPCTLKernelBundle_GetKernel(KBRef, kernelName.c_str()); + + // Create the input args + auto a = DPCTLmalloc_shared(SIZE * sizeof(T), QRef); + ASSERT_TRUE(a != nullptr); + auto a_ptr = static_cast(unwrap(a)); + for (auto i = 0ul; i < SIZE; ++i) { + a_ptr[i] = T{1}; + } + + // Create kernel args for vector_add + std::size_t lws = SIZE / 10; + std::size_t gRange[] = {SIZE}; + std::size_t lRange[] = {lws}; + + Params p{T{4}, T{5}}; + auto rka = DPCTLRawKernelArg_Create(&p, sizeof(Params)); + ASSERT_TRUE(rka != nullptr); + auto *rka_raw = unwrap(rka); + ASSERT_TRUE(rka_raw != nullptr); + void *args_1d[NARGS] = {unwrap(a), rka}; + DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, + DPCTL_RAW_KERNEL_ARG}; + + DPCTLSyclEventRef E1Ref = DPCTLQueue_SubmitNDRange( + kernel, QRef, args_1d, addKernelArgTypes, NARGS, gRange, lRange, + RANGE_NDIMS, nullptr, 0); + ASSERT_TRUE(E1Ref != nullptr); + + DPCTLSyclEventRef DepEv1[] = {E1Ref}; + void *args_2d[NARGS] = {unwrap(a), rka}; + + DPCTLSyclEventRef E2Ref = + DPCTLQueue_SubmitNDRange(kernel, QRef, args_2d, addKernelArgTypes, + NARGS, gRange, lRange, RANGE_NDIMS, DepEv1, 1); + ASSERT_TRUE(E2Ref != nullptr); + + DPCTLSyclEventRef DepEv2[] = {E1Ref, E2Ref}; + void *args_3d[NARGS] = {unwrap(a), rka}; + + DPCTLSyclEventRef E3Ref = + DPCTLQueue_SubmitNDRange(kernel, QRef, args_3d, addKernelArgTypes, + NARGS, gRange, lRange, RANGE_NDIMS, DepEv2, 2); + ASSERT_TRUE(E3Ref != nullptr); + + DPCTLEvent_Wait(E3Ref); + + std::cout << a_ptr[0] << std::endl; + ASSERT_TRUE(a_ptr[0] == T(169)); + + // clean ups + DPCTLEvent_Delete(E1Ref); + DPCTLEvent_Delete(E2Ref); + DPCTLEvent_Delete(E3Ref); + DPCTLRawKernelArg_Delete(rka); + DPCTLKernel_Delete(kernel); + DPCTLfree_with_queue((DPCTLSyclUSMRef)a, QRef); +} + +} /* end of anonymous namespace */ + +/* +// The work_group_memory_kernel spv files were generated from the SYCL program +// included in this comment. The program can be compiled using +// `icpx -fsycl raw_kernel_arg_kernel.cpp`. After that if the generated +// executable is run with the environment variable `SYCL_DUMP_IMAGES=1`, icpx +// runtime will dump all offload sections of fat binary to the current working +// directory. When tested with DPC++ 2025.1 the kernels are split across two +// separate SPV files. One contains all kernels for integers and FP32 +// data type, and another contains the kernel for FP64. +// +// Note that, `SYCL_DUMP_IMAGES=1` will also generate extra SPV files that +// contain the code for built in functions such as indexing and barriers. To +// figure which SPV file contains the kernels, use `spirv-dis` from the +// spirv-tools package to translate the SPV binary format to a human-readable +// textual format. +#include +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +template +struct Params{ T mul; T add; }; + +template +class SyclKernel_RKA +{ +private: + T *a_ = nullptr; + Params p_; + +public: + SyclKernel_RKA(T *a, Params p) + : a_(a), p_(p) + { + } + + void operator()(sycl::nd_item<1> it) const + { + int i = it.get_global_id(); + a_[i] = (a_[i] * p_.mul) + p_.add; + } +}; + +template +sycl::event +submit_kernel(sycl::queue q, const unsigned long N, T *a, T mul, T add) +{ + auto gws = N; + auto lws = (N/10); + + sycl::range<1> gRange{gws}; + sycl::range<1> lRange{lws}; + sycl::nd_range<1> ndRange{gRange, lRange}; + + Params p{mul, add}; + + sycl::event e = + q.submit([&](auto &h) + { + h.parallel_for( + ndRange, + SyclKernel_RKA(a, p)); + }); + + return e; +} + +template +void driver(std::size_t N) +{ + sycl::queue q; + auto *a = sycl::malloc_shared(N, q); + submit_kernel(q, N, a, T{4}, T{5}).wait(); + sycl::free(a, q); +} + +int main(int argc, const char **argv) +{ + std::size_t N = 0; + std::cout << "Enter problem size in N:\n"; + std::cin >> N; + std::cout << "Executing with N = " << N << std::endl; + + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + + return 0; +} +*/ + +struct TestQueueSubmitWithRawKernelArg : public ::testing::Test +{ + std::ifstream spirvFile; + std::size_t spirvFileSize_; + std::vector spirvBuffer_; + DPCTLSyclQueueRef QRef = nullptr; + DPCTLSyclKernelBundleRef KBRef = nullptr; + + TestQueueSubmitWithRawKernelArg() + { + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + DPCTLSyclDeviceRef DRef = nullptr; + const char *test_spv_fn = "./raw_kernel_arg_kernel_inttys_fp32.spv"; + + spirvFile.open(test_spv_fn, std::ios::binary | std::ios::ate); + spirvFileSize_ = std::filesystem::file_size(test_spv_fn); + spirvBuffer_.reserve(spirvFileSize_); + spirvFile.seekg(0, std::ios::beg); + spirvFile.read(spirvBuffer_.data(), spirvFileSize_); + + DSRef = DPCTLDefaultSelector_Create(); + DRef = DPCTLDevice_CreateFromSelector(DSRef); + QRef = + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); + auto CRef = DPCTLQueue_GetContext(QRef); + + KBRef = DPCTLKernelBundle_CreateFromSpirv( + CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); + DPCTLDevice_Delete(DRef); + DPCTLDeviceSelector_Delete(DSRef); + } + + ~TestQueueSubmitWithRawKernelArg() + { + spirvFile.close(); + DPCTLQueue_Delete(QRef); + DPCTLKernelBundle_Delete(KBRef); + } +}; + +struct TestQueueSubmitWithRawKernelArgFP64 : public ::testing::Test +{ + std::ifstream spirvFile; + std::size_t spirvFileSize_; + std::vector spirvBuffer_; + DPCTLSyclDeviceRef DRef = nullptr; + DPCTLSyclQueueRef QRef = nullptr; + DPCTLSyclKernelBundleRef KBRef = nullptr; + + TestQueueSubmitWithRawKernelArgFP64() + { + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + const char *test_spv_fn = "./raw_kernel_arg_kernel_fp64.spv"; + + spirvFile.open(test_spv_fn, std::ios::binary | std::ios::ate); + spirvFileSize_ = std::filesystem::file_size(test_spv_fn); + spirvBuffer_.reserve(spirvFileSize_); + spirvFile.seekg(0, std::ios::beg); + spirvFile.read(spirvBuffer_.data(), spirvFileSize_); + DSRef = DPCTLDefaultSelector_Create(); + DRef = DPCTLDevice_CreateFromSelector(DSRef); + QRef = + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); + auto CRef = DPCTLQueue_GetContext(QRef); + + KBRef = DPCTLKernelBundle_CreateFromSpirv( + CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); + DPCTLDeviceSelector_Delete(DSRef); + } + + ~TestQueueSubmitWithRawKernelArgFP64() + { + spirvFile.close(); + DPCTLDevice_Delete(DRef); + DPCTLQueue_Delete(QRef); + DPCTLKernelBundle_Delete(KBRef); + } +}; + +TEST_F(TestQueueSubmitWithRawKernelArg, CheckForInt8) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + "_ZTS14SyclKernel_RKAIaE"); +} + +TEST_F(TestQueueSubmitWithRawKernelArg, CheckForUInt8) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + "_ZTS14SyclKernel_RKAIhE"); +} + +TEST_F(TestQueueSubmitWithRawKernelArg, CheckForInt16) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + "_ZTS14SyclKernel_RKAIsE"); +} + +TEST_F(TestQueueSubmitWithRawKernelArg, CheckForUInt16) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + "_ZTS14SyclKernel_RKAItE"); +} + +TEST_F(TestQueueSubmitWithRawKernelArg, CheckForInt32) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + "_ZTS14SyclKernel_RKAIiE"); +} + +TEST_F(TestQueueSubmitWithRawKernelArg, CheckForUInt32) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + "_ZTS14SyclKernel_RKAIjE"); +} + +TEST_F(TestQueueSubmitWithRawKernelArg, CheckForInt64) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + "_ZTS14SyclKernel_RKAIlE"); +} + +TEST_F(TestQueueSubmitWithRawKernelArg, CheckForUInt64) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + "_ZTS14SyclKernel_RKAImE"); +} + +TEST_F(TestQueueSubmitWithRawKernelArg, CheckForFloat) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + "_ZTS14SyclKernel_RKAIfE"); +} + +TEST_F(TestQueueSubmitWithRawKernelArgFP64, CheckForDouble) +{ + if (DPCTLDevice_HasAspect(DRef, DPCTLSyclAspectType::fp64)) { + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + "_ZTS14SyclKernel_RKAIdE"); + } +} From a0b4c75a7684651cf7ebbfa0a2198707b9590a83 Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Tue, 22 Apr 2025 16:51:17 +0100 Subject: [PATCH 2/3] Use opaque pointer to std::vector directly Switch approach to not introduce an intermediate struct, but have the opaque pointer directly point to std::vector instead. --- dpctl/_backend.pxd | 3 --- dpctl/sycl.pxd | 10 ++++------ .../dpctl_sycl_extension_interface.h | 18 ------------------ .../syclinterface/dpctl_sycl_type_casters.hpp | 3 ++- .../source/dpctl_sycl_extension_interface.cpp | 11 +++++++---- .../source/dpctl_sycl_queue_interface.cpp | 7 ++++--- .../test_sycl_queue_submit_raw_kernel_arg.cpp | 2 +- 7 files changed, 18 insertions(+), 36 deletions(-) diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index ebda950a41..e5e2fbdbb6 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -573,9 +573,6 @@ cdef extern from "syclinterface/dpctl_sycl_extension_interface.h": cdef bint DPCTLWorkGroupMemory_Available() - cdef struct RawKernelArgDataTy - ctypedef RawKernelArgDataTy RawKernelArgData - cdef struct DPCTLOpaqueRawKernelArg ctypedef DPCTLOpaqueRawKernelArg *DPCTLSyclRawKernelArgRef; diff --git a/dpctl/sycl.pxd b/dpctl/sycl.pxd index e49aa6b812..67748ad58e 100644 --- a/dpctl/sycl.pxd +++ b/dpctl/sycl.pxd @@ -45,8 +45,6 @@ cdef extern from "sycl/sycl.hpp" namespace "sycl": cdef extern from "syclinterface/dpctl_sycl_extension_interface.h": cdef struct RawWorkGroupMemoryTy ctypedef RawWorkGroupMemoryTy RawWorkGroupMemory - cdef struct RawKernelArgDataTy - ctypedef RawKernelArgDataTy RawKernelArgData cdef extern from "syclinterface/dpctl_sycl_type_casters.hpp" \ namespace "dpctl::syclinterface": @@ -90,9 +88,9 @@ cdef extern from "syclinterface/dpctl_sycl_type_casters.hpp" \ # raw kernel arg extension cdef dpctl_backend.DPCTLSyclRawKernelArgRef wrap_raw_kernel_arg \ - "dpctl::syclinterface::wrap" \ - (const RawKernelArgData *) + "dpctl::syclinterface::wrap>" \ + (const std::vector *) - cdef RawKernelArgData * unwrap_raw_kernel_arg \ - "dpctl::syclinterface::unwrap" ( + cdef std::vector * unwrap_raw_kernel_arg \ + "dpctl::syclinterface::unwrap>" ( dpctl_backend.DPCTLSyclRawKernelArgRef) diff --git a/libsyclinterface/include/syclinterface/dpctl_sycl_extension_interface.h b/libsyclinterface/include/syclinterface/dpctl_sycl_extension_interface.h index 76567a011e..1225eef6cb 100644 --- a/libsyclinterface/include/syclinterface/dpctl_sycl_extension_interface.h +++ b/libsyclinterface/include/syclinterface/dpctl_sycl_extension_interface.h @@ -34,9 +34,6 @@ #include "dpctl_sycl_enum_types.h" #include "dpctl_sycl_types.h" -#include -#include - DPCTL_C_EXTERN_C_BEGIN typedef struct RawWorkGroupMemoryTy @@ -56,21 +53,6 @@ void DPCTLWorkGroupMemory_Delete(__dpctl_take DPCTLSyclWorkGroupMemoryRef Ref); DPCTL_API bool DPCTLWorkGroupMemory_Available(); -typedef class RawKernelArgDataTy -{ -public: - RawKernelArgDataTy(void *bytes, size_t count) : data(count) - { - std::memcpy(data.data(), bytes, count); - } - - void *bytes() { return data.data(); } - size_t count() { return data.size(); } - -private: - std::vector data; -} RawKernelArgData; - typedef struct DPCTLOpaqueSyclRawKernelArg *DPCTLSyclRawKernelArgRef; DPCTL_API diff --git a/libsyclinterface/include/syclinterface/dpctl_sycl_type_casters.hpp b/libsyclinterface/include/syclinterface/dpctl_sycl_type_casters.hpp index f79e0bec1a..e39bffecab 100644 --- a/libsyclinterface/include/syclinterface/dpctl_sycl_type_casters.hpp +++ b/libsyclinterface/include/syclinterface/dpctl_sycl_type_casters.hpp @@ -84,7 +84,8 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(std::vector, DEFINE_SIMPLE_CONVERSION_FUNCTIONS(RawWorkGroupMemory, DPCTLSyclWorkGroupMemoryRef) -DEFINE_SIMPLE_CONVERSION_FUNCTIONS(RawKernelArgData, DPCTLSyclRawKernelArgRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(std::vector, + DPCTLSyclRawKernelArgRef) #endif diff --git a/libsyclinterface/source/dpctl_sycl_extension_interface.cpp b/libsyclinterface/source/dpctl_sycl_extension_interface.cpp index 9dc65f6104..59bf8b07e4 100644 --- a/libsyclinterface/source/dpctl_sycl_extension_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_extension_interface.cpp @@ -63,15 +63,18 @@ bool DPCTLWorkGroupMemory_Available() #endif } +using raw_kernel_arg_t = std::vector; + DPCTL_API __dpctl_give DPCTLSyclRawKernelArgRef DPCTLRawKernelArg_Create(void *bytes, size_t count) { DPCTLSyclRawKernelArgRef rka = nullptr; try { - auto RawKernelArg = std::unique_ptr( - new RawKernelArgData(bytes, count)); - rka = wrap(RawKernelArg.get()); + auto RawKernelArg = + std::unique_ptr(new raw_kernel_arg_t(count)); + std::memcpy(RawKernelArg->data(), bytes, count); + rka = wrap(RawKernelArg.get()); RawKernelArg.release(); } catch (std::exception const &e) { error_handler(e, __FILE__, __func__, __LINE__); @@ -82,7 +85,7 @@ __dpctl_give DPCTLSyclRawKernelArgRef DPCTLRawKernelArg_Create(void *bytes, DPCTL_API void DPCTLRawKernelArg_Delete(__dpctl_take DPCTLSyclRawKernelArgRef Ref) { - delete unwrap(Ref); + delete unwrap(Ref); } DPCTL_API diff --git a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp index 988f2b1c0f..77db689c1d 100644 --- a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp @@ -237,9 +237,10 @@ bool set_kernel_arg(handler &cgh, case DPCTL_RAW_KERNEL_ARG: { auto ref = static_cast(Arg); - RawKernelArgData *raw_arg = unwrap(ref); - void *bytes = raw_arg->bytes(); - size_t count = raw_arg->count(); + std::vector *raw_arg = + unwrap>(ref); + void *bytes = raw_arg->data(); + size_t count = raw_arg->size(); sycl::ext::oneapi::experimental::raw_kernel_arg arg{bytes, count}; cgh.set_arg(idx, arg); break; diff --git a/libsyclinterface/tests/test_sycl_queue_submit_raw_kernel_arg.cpp b/libsyclinterface/tests/test_sycl_queue_submit_raw_kernel_arg.cpp index 4dde8534ee..2d120f7157 100644 --- a/libsyclinterface/tests/test_sycl_queue_submit_raw_kernel_arg.cpp +++ b/libsyclinterface/tests/test_sycl_queue_submit_raw_kernel_arg.cpp @@ -93,7 +93,7 @@ void submit_kernel(DPCTLSyclQueueRef QRef, Params p{T{4}, T{5}}; auto rka = DPCTLRawKernelArg_Create(&p, sizeof(Params)); ASSERT_TRUE(rka != nullptr); - auto *rka_raw = unwrap(rka); + auto *rka_raw = unwrap>(rka); ASSERT_TRUE(rka_raw != nullptr); void *args_1d[NARGS] = {unwrap(a), rka}; DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, From 5fb74e63bd2ea17dccfc7a60ddd24fd2c5a64939 Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Wed, 23 Apr 2025 08:39:36 +0100 Subject: [PATCH 3/3] Fix test and linter issues --- dpctl/_backend.pxd | 9 +++++---- dpctl/_sycl_queue.pyx | 9 ++++----- dpctl/sycl.pxd | 9 --------- 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index e5e2fbdbb6..ca9e9ccb9f 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -574,11 +574,12 @@ cdef extern from "syclinterface/dpctl_sycl_extension_interface.h": cdef bint DPCTLWorkGroupMemory_Available() cdef struct DPCTLOpaqueRawKernelArg - ctypedef DPCTLOpaqueRawKernelArg *DPCTLSyclRawKernelArgRef; + ctypedef DPCTLOpaqueRawKernelArg *DPCTLSyclRawKernelArgRef - cdef DPCTLSyclRawKernelArgRef DPCTLRawKernelArg_Create(void* bytes, size_t count); + cdef DPCTLSyclRawKernelArgRef DPCTLRawKernelArg_Create(void* bytes, + size_t count) cdef void DPCTLRawKernelArg_Delete( - DPCTLSyclRawKernelArgRef Ref); + DPCTLSyclRawKernelArgRef Ref) - cdef bint DPCTLRawKernelArg_Available(); + cdef bint DPCTLRawKernelArg_Available() diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index 1cf417c79e..48c882b860 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -1814,8 +1814,9 @@ cdef class RawKernelArg: "expects argument to be buffer", f"but got {type(args[0])}") - ret_code = PyObject_GetBuffer(args[0], &(_buffer), PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS) - if ret_code != 0: # pragma: no cover + ret_code = PyObject_GetBuffer(args[0], &(_buffer), + PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS) + if ret_code != 0: # pragma: no cover raise RuntimeError("Could not access buffer") ptr = _buffer.buf @@ -1837,11 +1838,9 @@ cdef class RawKernelArg: if(_is_buf): PyBuffer_Release(&(_buffer)) - - """Check whether the raw_kernel_arg extension is available""" @staticmethod def is_available(): - return DPCTLRawKernelArg_Available(); + return DPCTLRawKernelArg_Available() property _ref: """Returns the address of the C API ``DPCTLRawKernelArgRef`` pointer diff --git a/dpctl/sycl.pxd b/dpctl/sycl.pxd index 67748ad58e..f0b6f1eb2a 100644 --- a/dpctl/sycl.pxd +++ b/dpctl/sycl.pxd @@ -85,12 +85,3 @@ cdef extern from "syclinterface/dpctl_sycl_type_casters.hpp" \ "dpctl::syclinterface::unwrap" ( dpctl_backend.DPCTLSyclWorkGroupMemoryRef ) - - # raw kernel arg extension - cdef dpctl_backend.DPCTLSyclRawKernelArgRef wrap_raw_kernel_arg \ - "dpctl::syclinterface::wrap>" \ - (const std::vector *) - - cdef std::vector * unwrap_raw_kernel_arg \ - "dpctl::syclinterface::unwrap>" ( - dpctl_backend.DPCTLSyclRawKernelArgRef)