From c36166f79b1bae71541981faed360302c638cebf Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 20 Apr 2026 18:19:55 -0700 Subject: [PATCH 01/41] add C API back to _usmarray.pyx --- dpnp/backend/include/dpnp4pybind11.hpp | 164 ++++++++++------- dpnp/tensor/_usmarray.pyx | 232 +++++++++++++++++++++++++ 2 files changed, 334 insertions(+), 62 deletions(-) diff --git a/dpnp/backend/include/dpnp4pybind11.hpp b/dpnp/backend/include/dpnp4pybind11.hpp index 3150d63146f..6394987a5f8 100644 --- a/dpnp/backend/include/dpnp4pybind11.hpp +++ b/dpnp/backend/include/dpnp4pybind11.hpp @@ -77,6 +77,37 @@ class dpnp_capi public: PyTypeObject *PyUSMArrayType_; + char *(*UsmNDArray_GetData_)(PyUSMArrayObject *); + int (*UsmNDArray_GetNDim_)(PyUSMArrayObject *); + py::ssize_t *(*UsmNDArray_GetShape_)(PyUSMArrayObject *); + py::ssize_t *(*UsmNDArray_GetStrides_)(PyUSMArrayObject *); + int (*UsmNDArray_GetTypenum_)(PyUSMArrayObject *); + int (*UsmNDArray_GetElementSize_)(PyUSMArrayObject *); + int (*UsmNDArray_GetFlags_)(PyUSMArrayObject *); + DPCTLSyclQueueRef (*UsmNDArray_GetQueueRef_)(PyUSMArrayObject *); + py::ssize_t (*UsmNDArray_GetOffset_)(PyUSMArrayObject *); + PyObject *(*UsmNDArray_GetUSMData_)(PyUSMArrayObject *); + void (*UsmNDArray_SetWritableFlag_)(PyUSMArrayObject *, int); + PyObject *(*UsmNDArray_MakeSimpleFromMemory_)(int, + const py::ssize_t *, + int, + Py_MemoryObject *, + py::ssize_t, + char); + PyObject *(*UsmNDArray_MakeSimpleFromPtr_)(size_t, + int, + DPCTLSyclUSMRef, + DPCTLSyclQueueRef, + PyObject *); + PyObject *(*UsmNDArray_MakeFromPtr_)(int, + const py::ssize_t *, + int, + const py::ssize_t *, + DPCTLSyclUSMRef, + DPCTLSyclQueueRef, + py::ssize_t, + PyObject *); + int USM_ARRAY_C_CONTIGUOUS_; int USM_ARRAY_F_CONTIGUOUS_; int USM_ARRAY_WRITABLE_; @@ -119,7 +150,15 @@ class dpnp_capi std::shared_ptr default_usm_ndarray_; dpnp_capi() - : PyUSMArrayType_(nullptr), USM_ARRAY_C_CONTIGUOUS_(0), + : PyUSMArrayType_(nullptr), UsmNDArray_GetData_(nullptr), + UsmNDArray_GetNDim_(nullptr), UsmNDArray_GetShape_(nullptr), + UsmNDArray_GetStrides_(nullptr), UsmNDArray_GetTypenum_(nullptr), + UsmNDArray_GetElementSize_(nullptr), UsmNDArray_GetFlags_(nullptr), + UsmNDArray_GetQueueRef_(nullptr), UsmNDArray_GetOffset_(nullptr), + UsmNDArray_GetUSMData_(nullptr), UsmNDArray_SetWritableFlag_(nullptr), + UsmNDArray_MakeSimpleFromMemory_(nullptr), + UsmNDArray_MakeSimpleFromPtr_(nullptr), + UsmNDArray_MakeFromPtr_(nullptr), USM_ARRAY_C_CONTIGUOUS_(0), USM_ARRAY_F_CONTIGUOUS_(0), USM_ARRAY_WRITABLE_(0), UAR_BOOL_(-1), UAR_BYTE_(-1), UAR_UBYTE_(-1), UAR_SHORT_(-1), UAR_USHORT_(-1), UAR_INT_(-1), UAR_UINT_(-1), UAR_LONG_(-1), UAR_ULONG_(-1), @@ -135,6 +174,23 @@ class dpnp_capi this->PyUSMArrayType_ = &PyUSMArrayType; + // dpnp.tensor.usm_ndarray API + this->UsmNDArray_GetData_ = UsmNDArray_GetData; + this->UsmNDArray_GetNDim_ = UsmNDArray_GetNDim; + this->UsmNDArray_GetShape_ = UsmNDArray_GetShape; + this->UsmNDArray_GetStrides_ = UsmNDArray_GetStrides; + this->UsmNDArray_GetTypenum_ = UsmNDArray_GetTypenum; + this->UsmNDArray_GetElementSize_ = UsmNDArray_GetElementSize; + this->UsmNDArray_GetFlags_ = UsmNDArray_GetFlags; + this->UsmNDArray_GetQueueRef_ = UsmNDArray_GetQueueRef; + this->UsmNDArray_GetOffset_ = UsmNDArray_GetOffset; + this->UsmNDArray_GetUSMData_ = UsmNDArray_GetUSMData; + this->UsmNDArray_SetWritableFlag_ = UsmNDArray_SetWritableFlag; + this->UsmNDArray_MakeSimpleFromMemory_ = + UsmNDArray_MakeSimpleFromMemory; + this->UsmNDArray_MakeSimpleFromPtr_ = UsmNDArray_MakeSimpleFromPtr; + this->UsmNDArray_MakeFromPtr_ = UsmNDArray_MakeFromPtr; + // constants this->USM_ARRAY_C_CONTIGUOUS_ = USM_ARRAY_C_CONTIGUOUS; this->USM_ARRAY_F_CONTIGUOUS_ = USM_ARRAY_F_CONTIGUOUS; @@ -269,7 +325,9 @@ class usm_ndarray : public py::object char *get_data() const { PyUSMArrayObject *raw_ar = usm_array_ptr(); - return raw_ar->data_; + + auto const &api = detail::dpnp_capi::get(); + return api.UsmNDArray_GetData_(raw_ar); } template @@ -281,13 +339,17 @@ class usm_ndarray : public py::object int get_ndim() const { PyUSMArrayObject *raw_ar = usm_array_ptr(); - return raw_ar->nd_; + + auto const &api = detail::dpnp_capi::get(); + return api.UsmNDArray_GetNDim_(raw_ar); } const py::ssize_t *get_shape_raw() const { PyUSMArrayObject *raw_ar = usm_array_ptr(); - return raw_ar->shape_; + + auto const &api = detail::dpnp_capi::get(); + return api.UsmNDArray_GetShape_(raw_ar); } std::vector get_shape_vector() const @@ -308,7 +370,9 @@ class usm_ndarray : public py::object const py::ssize_t *get_strides_raw() const { PyUSMArrayObject *raw_ar = usm_array_ptr(); - return raw_ar->strides_; + + auto const &api = detail::dpnp_capi::get(); + return api.UsmNDArray_GetStrides_(raw_ar); } std::vector get_strides_vector() const @@ -343,8 +407,9 @@ class usm_ndarray : public py::object { PyUSMArrayObject *raw_ar = usm_array_ptr(); - int ndim = raw_ar->nd_; - const py::ssize_t *shape = raw_ar->shape_; + auto const &api = detail::dpnp_capi::get(); + int ndim = api.UsmNDArray_GetNDim_(raw_ar); + const py::ssize_t *shape = api.UsmNDArray_GetShape_(raw_ar); py::ssize_t nelems = 1; for (int i = 0; i < ndim; ++i) { @@ -359,9 +424,10 @@ class usm_ndarray : public py::object { PyUSMArrayObject *raw_ar = usm_array_ptr(); - int nd = raw_ar->nd_; - const py::ssize_t *shape = raw_ar->shape_; - const py::ssize_t *strides = raw_ar->strides_; + auto const &api = detail::dpnp_capi::get(); + int nd = api.UsmNDArray_GetNDim_(raw_ar); + const py::ssize_t *shape = api.UsmNDArray_GetShape_(raw_ar); + const py::ssize_t *strides = api.UsmNDArray_GetStrides_(raw_ar); py::ssize_t offset_min = 0; py::ssize_t offset_max = 0; @@ -389,77 +455,43 @@ class usm_ndarray : public py::object sycl::queue get_queue() const { PyUSMArrayObject *raw_ar = usm_array_ptr(); - Py_MemoryObject *mem_obj = - reinterpret_cast(raw_ar->base_); - auto const &dpctl_api = ::dpctl::detail::dpctl_capi::get(); - DPCTLSyclQueueRef QRef = dpctl_api.Memory_GetQueueRef_(mem_obj); + auto const &api = detail::dpnp_capi::get(); + DPCTLSyclQueueRef QRef = api.UsmNDArray_GetQueueRef_(raw_ar); return *(reinterpret_cast(QRef)); } sycl::device get_device() const { PyUSMArrayObject *raw_ar = usm_array_ptr(); - Py_MemoryObject *mem_obj = - reinterpret_cast(raw_ar->base_); - auto const &dpctl_api = ::dpctl::detail::dpctl_capi::get(); - DPCTLSyclQueueRef QRef = dpctl_api.Memory_GetQueueRef_(mem_obj); + auto const &api = detail::dpnp_capi::get(); + DPCTLSyclQueueRef QRef = api.UsmNDArray_GetQueueRef_(raw_ar); return reinterpret_cast(QRef)->get_device(); } int get_typenum() const { PyUSMArrayObject *raw_ar = usm_array_ptr(); - return raw_ar->typenum_; + + auto const &api = detail::dpnp_capi::get(); + return api.UsmNDArray_GetTypenum_(raw_ar); } int get_flags() const { PyUSMArrayObject *raw_ar = usm_array_ptr(); - return raw_ar->flags_; + + auto const &api = detail::dpnp_capi::get(); + return api.UsmNDArray_GetFlags_(raw_ar); } int get_elemsize() const { - int typenum = get_typenum(); - auto const &api = detail::dpnp_capi::get(); + PyUSMArrayObject *raw_ar = usm_array_ptr(); - // Lookup table for element sizes based on typenum - if (typenum == api.UAR_BOOL_) - return 1; - if (typenum == api.UAR_BYTE_) - return 1; - if (typenum == api.UAR_UBYTE_) - return 1; - if (typenum == api.UAR_SHORT_) - return 2; - if (typenum == api.UAR_USHORT_) - return 2; - if (typenum == api.UAR_INT_) - return 4; - if (typenum == api.UAR_UINT_) - return 4; - if (typenum == api.UAR_LONG_) - return sizeof(long); - if (typenum == api.UAR_ULONG_) - return sizeof(unsigned long); - if (typenum == api.UAR_LONGLONG_) - return 8; - if (typenum == api.UAR_ULONGLONG_) - return 8; - if (typenum == api.UAR_FLOAT_) - return 4; - if (typenum == api.UAR_DOUBLE_) - return 8; - if (typenum == api.UAR_CFLOAT_) - return 8; - if (typenum == api.UAR_CDOUBLE_) - return 16; - if (typenum == api.UAR_HALF_) - return 2; - - return 0; // Unknown type + auto const &api = detail::dpnp_capi::get(); + return api.UsmNDArray_GetElementSize_(raw_ar); } bool is_c_contiguous() const @@ -487,9 +519,10 @@ class usm_ndarray : public py::object py::object get_usm_data() const { PyUSMArrayObject *raw_ar = usm_array_ptr(); + + auto const &api = detail::dpnp_capi::get(); // base_ is the Memory object - return new reference - PyObject *usm_data = raw_ar->base_; - Py_XINCREF(usm_data); + PyObject *usm_data = api.UsmNDArray_GetUSMData_(raw_ar); // pass reference ownership to py::object return py::reinterpret_steal(usm_data); @@ -498,10 +531,13 @@ class usm_ndarray : public py::object bool is_managed_by_smart_ptr() const { PyUSMArrayObject *raw_ar = usm_array_ptr(); - PyObject *usm_data = raw_ar->base_; + + auto const &api = detail::dpnp_capi::get(); + PyObject *usm_data = api.UsmNDArray_GetUSMData_(raw_ar); auto const &dpctl_api = ::dpctl::detail::dpctl_capi::get(); if (!PyObject_TypeCheck(usm_data, dpctl_api.Py_MemoryType_)) { + Py_DECREF(usm_data); return false; } @@ -509,17 +545,20 @@ class usm_ndarray : public py::object reinterpret_cast(usm_data); const void *opaque_ptr = dpctl_api.Memory_GetOpaquePointer_(mem_obj); + Py_DECREF(usm_data); return bool(opaque_ptr); } const std::shared_ptr &get_smart_ptr_owner() const { PyUSMArrayObject *raw_ar = usm_array_ptr(); - PyObject *usm_data = raw_ar->base_; - auto const &dpctl_api = ::dpctl::detail::dpctl_capi::get(); + auto const &api = detail::dpnp_capi::get(); + PyObject *usm_data = api.UsmNDArray_GetUSMData_(raw_ar); + auto const &dpctl_api = ::dpctl::detail::dpctl_capi::get(); if (!PyObject_TypeCheck(usm_data, dpctl_api.Py_MemoryType_)) { + Py_DECREF(usm_data); throw std::runtime_error( "usm_ndarray object does not have Memory object " "managing lifetime of USM allocation"); @@ -528,6 +567,7 @@ class usm_ndarray : public py::object Py_MemoryObject *mem_obj = reinterpret_cast(usm_data); void *opaque_ptr = dpctl_api.Memory_GetOpaquePointer_(mem_obj); + Py_DECREF(usm_data); if (opaque_ptr) { auto shptr_ptr = diff --git a/dpnp/tensor/_usmarray.pyx b/dpnp/tensor/_usmarray.pyx index c696056d53c..7d90ffeb05a 100644 --- a/dpnp/tensor/_usmarray.pyx +++ b/dpnp/tensor/_usmarray.pyx @@ -1740,6 +1740,238 @@ cdef usm_ndarray _zero_like(usm_ndarray ary): return r +cdef api char* UsmNDArray_GetData(usm_ndarray arr): + """Get allocation pointer of zero index element of array """ + return arr.get_data() + + +cdef api int UsmNDArray_GetNDim(usm_ndarray arr): + """Get array rank: length of its shape""" + return arr.get_ndim() + + +cdef api Py_ssize_t* UsmNDArray_GetShape(usm_ndarray arr): + """Get host pointer to shape vector""" + return arr.get_shape() + + +cdef api Py_ssize_t* UsmNDArray_GetStrides(usm_ndarray arr): + """Get host pointer to strides vector""" + return arr.get_strides() + + +cdef api int UsmNDArray_GetTypenum(usm_ndarray arr): + """Get type number for data type of array elements""" + return arr.get_typenum() + + +cdef api int UsmNDArray_GetElementSize(usm_ndarray arr): + """Get array element size in bytes""" + return arr.get_itemsize() + + +cdef api int UsmNDArray_GetFlags(usm_ndarray arr): + """Get flags of array""" + return arr.get_flags() + + +cdef api c_dpctl.DPCTLSyclQueueRef UsmNDArray_GetQueueRef(usm_ndarray arr): + """Get DPCTLSyclQueueRef for queue associated with the array""" + return arr.get_queue_ref() + + +cdef api Py_ssize_t UsmNDArray_GetOffset(usm_ndarray arr): + """Get offset of zero-index array element from the beginning of the USM + allocation""" + return arr.get_offset() + + +cdef api object UsmNDArray_GetUSMData(usm_ndarray arr): + """Get USM data object underlying the array""" + return arr.get_base() + + +cdef api void UsmNDArray_SetWritableFlag(usm_ndarray arr, int flag): + """Set/unset USM_ARRAY_WRITABLE in the given array `arr`.""" + arr._set_writable_flag(flag) + + +cdef api object UsmNDArray_MakeSimpleFromMemory( + int nd, const Py_ssize_t *shape, int typenum, + c_dpmem._Memory mobj, Py_ssize_t offset, char order +): + """Create contiguous usm_ndarray. + + Args: + nd: number of dimensions (non-negative) + shape: array of nd non-negative array's sizes along each dimension + typenum: array elemental type number + ptr: pointer to the start of allocation + QRef: DPCTLSyclQueueRef associated with the allocation + offset: distance between element with zero multi-index and the + start of allocation + order: Memory layout of the array. Use 'C' for C-contiguous or + row-major layout; 'F' for F-contiguous or column-major layout + Returns: + Created usm_ndarray instance + """ + cdef object shape_tuple = _make_int_tuple(nd, shape) + cdef usm_ndarray arr = usm_ndarray( + shape_tuple, + dtype=_make_typestr(typenum), + buffer=mobj, + offset=offset, + order=(order) + ) + return arr + + +cdef api object UsmNDArray_MakeSimpleFromPtr( + size_t nelems, + int typenum, + c_dpctl.DPCTLSyclUSMRef ptr, + c_dpctl.DPCTLSyclQueueRef QRef, + object owner +): + """Create 1D contiguous usm_ndarray from pointer. + + Args: + nelems: number of elements in array + typenum: array elemental type number + ptr: pointer to the start of allocation + QRef: DPCTLSyclQueueRef associated with the allocation + owner: Python object managing lifetime of USM allocation. + Value None implies transfer of USM allocation ownership + to the created array object. + Returns: + Created usm_ndarray instance + """ + cdef int itemsize = type_bytesize(typenum) + if (itemsize < 1): + raise ValueError( + "dtype with typenum=" + str(typenum) + " is not supported." + ) + cdef size_t nbytes = ( itemsize) * nelems + cdef c_dpmem._Memory mobj + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ptr, nbytes, QRef, memory_owner=owner + ) + cdef usm_ndarray arr = usm_ndarray( + (nelems,), + dtype=_make_typestr(typenum), + buffer=mobj + ) + return arr + +cdef api object UsmNDArray_MakeFromPtr( + int nd, + const Py_ssize_t *shape, + int typenum, + const Py_ssize_t *strides, + c_dpctl.DPCTLSyclUSMRef ptr, + c_dpctl.DPCTLSyclQueueRef QRef, + Py_ssize_t offset, + object owner +): + """ + General usm_ndarray constructor from externally made USM-allocation. + + Args: + nd: number of dimensions (non-negative) + shape: array of nd non-negative array's sizes along each dimension + typenum: array elemental type number + strides: array of nd strides along each dimension in elements + ptr: pointer to the start of allocation + QRef: DPCTLSyclQueueRef associated with the allocation + offset: distance between element with zero multi-index and the + start of allocation + owner: Python object managing lifetime of USM allocation. + Value None implies transfer of USM allocation ownership + to the created array object. + Returns: + Created usm_ndarray instance + """ + cdef int itemsize = type_bytesize(typenum) + cdef size_t nelems = 1 + cdef Py_ssize_t min_disp = 0 + cdef Py_ssize_t max_disp = 0 + cdef Py_ssize_t step_ = 0 + cdef Py_ssize_t dim_ = 0 + cdef it = 0 + cdef c_dpmem._Memory mobj + cdef usm_ndarray arr + cdef object obj_shape + cdef object obj_strides + + if (itemsize < 1): + raise ValueError( + "dtype with typenum=" + str(typenum) + " is not supported." + ) + if (nd < 0): + raise ValueError("Dimensionality must be non-negative") + if (ptr is NULL or QRef is NULL): + raise ValueError( + "Non-null USM allocation pointer and QRef are expected" + ) + if (nd == 0): + # case of 0d scalars + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ptr, itemsize, QRef, memory_owner=owner + ) + arr = usm_ndarray( + tuple(), + dtype=_make_typestr(typenum), + buffer=mobj + ) + return arr + if (shape is NULL or strides is NULL): + raise ValueError("Both shape and stride vectors are required") + for it in range(nd): + dim_ = shape[it] + if dim_ < 0: + raise ValueError( + f"Dimension along axis {it} must be non-negative" + ) + nelems *= dim_ + if dim_ > 0: + step_ = strides[it] + if step_ > 0: + max_disp += step_ * (dim_ - 1) + else: + min_disp += step_ * (dim_ - 1) + + obj_shape = _make_int_tuple(nd, shape) + obj_strides = _make_int_tuple(nd, strides) + if nelems == 0: + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ptr, itemsize, QRef, memory_owner=owner + ) + arr = usm_ndarray( + obj_shape, + dtype=_make_typestr(typenum), + strides=obj_strides, + buffer=mobj, + offset=0 + ) + return arr + if offset + min_disp < 0: + raise ValueError( + "Given shape, strides and offset reference out-of-bound memory" + ) + nbytes = ( itemsize) * (offset + max_disp + 1) + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ptr, nbytes, QRef, memory_owner=owner + ) + arr = usm_ndarray( + obj_shape, + dtype=_make_typestr(typenum), + strides=obj_strides, + buffer=mobj, + offset=offset + ) + return arr + + def _is_object_with_buffer_protocol(o): "Returns True if object supports Python buffer protocol" return _is_buffer(o) From f2b5e9ebdad51322d9faeafb22dad827c97e1190 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 21 Apr 2026 08:35:03 -0700 Subject: [PATCH 02/41] add back tests for usm_ndarray C-API --- dpnp/tests/tensor/test_usm_ndarray_ctor.py | 548 +++++++++++++++++++++ 1 file changed, 548 insertions(+) diff --git a/dpnp/tests/tensor/test_usm_ndarray_ctor.py b/dpnp/tests/tensor/test_usm_ndarray_ctor.py index 70066860b19..1d908a09c89 100644 --- a/dpnp/tests/tensor/test_usm_ndarray_ctor.py +++ b/dpnp/tests/tensor/test_usm_ndarray_ctor.py @@ -183,6 +183,554 @@ def test_usm_ndarray_writable_flag_views(): assert not a.imag.flags.writable +def _pyx_capi_fnptr_to_callable( + X, + pyx_capi_name, + caps_name, + fn_restype=ctypes.c_void_p, + fn_argtypes=(ctypes.py_object,), +): + import sys + + mod = sys.modules[X.__class__.__module__] + cap = mod.__pyx_capi__.get(pyx_capi_name, None) + if cap is None: + raise ValueError( + "__pyx_capi__ does not export {} capsule".format(pyx_capi_name) + ) + # construct Python callable to invoke these functions + cap_ptr_fn = ctypes.pythonapi.PyCapsule_GetPointer + cap_ptr_fn.restype = ctypes.c_void_p + cap_ptr_fn.argtypes = [ctypes.py_object, ctypes.c_char_p] + fn_ptr = cap_ptr_fn(cap, caps_name) + callable_maker_ptr = ctypes.PYFUNCTYPE(fn_restype, *fn_argtypes) + return callable_maker_ptr(fn_ptr) + + +def test_pyx_capi_get_data(): + try: + X = dpt.usm_ndarray(17, dtype="i8")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_data_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetData", + b"char *(struct PyUSMArrayObject *)", + fn_restype=ctypes.c_void_p, + fn_argtypes=(ctypes.py_object,), + ) + r1 = get_data_fn(X) + sua_iface = X.__sycl_usm_array_interface__ + assert r1 == sua_iface["data"][0] + sua_iface.get("offset") * X.itemsize + + +def test_pyx_capi_get_shape(): + try: + X = dpt.usm_ndarray(17, dtype="u4")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_shape_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetShape", + b"Py_ssize_t *(struct PyUSMArrayObject *)", + fn_restype=ctypes.c_void_p, + fn_argtypes=(ctypes.py_object,), + ) + c_longlong_p = ctypes.POINTER(ctypes.c_longlong) + shape0 = ctypes.cast(get_shape_fn(X), c_longlong_p).contents.value + assert shape0 == X.shape[0] + + +def test_pyx_capi_get_strides(): + try: + X = dpt.usm_ndarray(17, dtype="f4")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_strides_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetStrides", + b"Py_ssize_t *(struct PyUSMArrayObject *)", + fn_restype=ctypes.c_void_p, + fn_argtypes=(ctypes.py_object,), + ) + c_longlong_p = ctypes.POINTER(ctypes.c_longlong) + strides0_p = get_strides_fn(X) + if strides0_p: + strides0_p = ctypes.cast(strides0_p, c_longlong_p).contents + strides0_p = strides0_p.value + assert strides0_p == 0 or strides0_p == X.strides[0] + + +def test_pyx_capi_get_ndim(): + try: + X = dpt.usm_ndarray(17, dtype="?")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_ndim_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetNDim", + b"int (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_int, + fn_argtypes=(ctypes.py_object,), + ) + assert get_ndim_fn(X) == X.ndim + + +def test_pyx_capi_get_typenum(): + try: + X = dpt.usm_ndarray(17, dtype="c8")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_typenum_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetTypenum", + b"int (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_int, + fn_argtypes=(ctypes.py_object,), + ) + typenum = get_typenum_fn(X) + assert type(typenum) is int + assert typenum == X.dtype.num + + +def test_pyx_capi_get_elemsize(): + try: + X = dpt.usm_ndarray(17, dtype="u8")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_elemsize_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetElementSize", + b"int (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_int, + fn_argtypes=(ctypes.py_object,), + ) + itemsize = get_elemsize_fn(X) + assert type(itemsize) is int + assert itemsize == X.itemsize + + +def test_pyx_capi_get_flags(): + try: + X = dpt.usm_ndarray(17, dtype="i8")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_flags_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetFlags", + b"int (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_int, + fn_argtypes=(ctypes.py_object,), + ) + flags = get_flags_fn(X) + assert type(flags) is int and X.flags == flags + + +def test_pyx_capi_get_offset(): + try: + X = dpt.usm_ndarray(17, dtype="u2")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_offset_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetOffset", + b"Py_ssize_t (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_longlong, + fn_argtypes=(ctypes.py_object,), + ) + offset = get_offset_fn(X) + assert type(offset) is int + assert offset == X.__sycl_usm_array_interface__["offset"] + + +def test_pyx_capi_get_usmdata(): + try: + X = dpt.usm_ndarray(17, dtype="u2")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_usmdata_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetUSMData", + b"PyObject *(struct PyUSMArrayObject *)", + fn_restype=ctypes.py_object, + fn_argtypes=(ctypes.py_object,), + ) + capi_usm_data = get_usmdata_fn(X) + assert isinstance(capi_usm_data, dpm._memory._Memory) + assert capi_usm_data.nbytes == X.usm_data.nbytes + assert capi_usm_data._pointer == X.usm_data._pointer + assert capi_usm_data.sycl_queue == X.usm_data.sycl_queue + + +def test_pyx_capi_get_queue_ref(): + try: + X = dpt.usm_ndarray(17, dtype="i2")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_queue_ref_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetQueueRef", + b"DPCTLSyclQueueRef (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_void_p, + fn_argtypes=(ctypes.py_object,), + ) + queue_ref = get_queue_ref_fn(X) # address of a copy, should be unequal + assert queue_ref != X.sycl_queue.addressof_ref() + + +def test_pyx_capi_make_from_memory(): + q = get_queue_or_skip() + n0, n1 = 4, 6 + c_tuple = (ctypes.c_ssize_t * 2)(n0, n1) + mem = dpm.MemoryUSMShared(n0 * n1 * 4, queue=q) + typenum = dpt.dtype("single").num + any_usm_ndarray = dpt.empty(tuple(), dtype="i4", sycl_queue=q) + make_from_memory_fn = _pyx_capi_fnptr_to_callable( + any_usm_ndarray, + "UsmNDArray_MakeSimpleFromMemory", + b"PyObject *(int, Py_ssize_t const *, int, " + b"struct Py_MemoryObject *, Py_ssize_t, char)", + fn_restype=ctypes.py_object, + fn_argtypes=( + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_int, + ctypes.py_object, + ctypes.c_ssize_t, + ctypes.c_char, + ), + ) + r = make_from_memory_fn( + ctypes.c_int(2), + c_tuple, + ctypes.c_int(typenum), + mem, + ctypes.c_ssize_t(0), + ctypes.c_char(b"C"), + ) + assert isinstance(r, dpt.usm_ndarray) + assert r.ndim == 2 + assert r.shape == (n0, n1) + assert r._pointer == mem._pointer + assert r.usm_type == "shared" + assert r.sycl_queue == q + assert r.flags["C"] + r2 = make_from_memory_fn( + ctypes.c_int(2), + c_tuple, + ctypes.c_int(typenum), + mem, + ctypes.c_ssize_t(0), + ctypes.c_char(b"F"), + ) + ptr = mem._pointer + del mem + del r + assert isinstance(r2, dpt.usm_ndarray) + assert r2._pointer == ptr + assert r2.usm_type == "shared" + assert r2.sycl_queue == q + assert r2.flags["F"] + + +def test_pyx_capi_set_writable_flag(): + q = get_queue_or_skip() + usm_ndarray = dpt.empty((4, 5), dtype="i4", sycl_queue=q) + assert isinstance(usm_ndarray, dpt.usm_ndarray) + assert usm_ndarray.flags["WRITABLE"] is True + set_writable = _pyx_capi_fnptr_to_callable( + usm_ndarray, + "UsmNDArray_SetWritableFlag", + b"void (struct PyUSMArrayObject *, int)", + fn_restype=None, + fn_argtypes=(ctypes.py_object, ctypes.c_int), + ) + set_writable(usm_ndarray, ctypes.c_int(0)) + assert isinstance(usm_ndarray, dpt.usm_ndarray) + assert usm_ndarray.flags["WRITABLE"] is False + set_writable(usm_ndarray, ctypes.c_int(1)) + assert isinstance(usm_ndarray, dpt.usm_ndarray) + assert usm_ndarray.flags["WRITABLE"] is True + + +def test_pyx_capi_make_from_ptr(): + q = get_queue_or_skip() + usm_ndarray = dpt.empty(tuple(), dtype="i4", sycl_queue=q) + make_from_ptr = _pyx_capi_fnptr_to_callable( + usm_ndarray, + "UsmNDArray_MakeSimpleFromPtr", + b"PyObject *(size_t, int, DPCTLSyclUSMRef, " + b"DPCTLSyclQueueRef, PyObject *)", + fn_restype=ctypes.py_object, + fn_argtypes=( + ctypes.c_size_t, + ctypes.c_int, + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.py_object, + ), + ) + nelems = 10 + dt = dpt.int64 + mem = dpm.MemoryUSMDevice(nelems * dt.itemsize, queue=q) + arr = make_from_ptr( + ctypes.c_size_t(nelems), + dt.num, + mem._pointer, + mem.sycl_queue.addressof_ref(), + mem, + ) + assert isinstance(arr, dpt.usm_ndarray) + assert arr.shape == (nelems,) + assert arr.dtype == dt + assert arr.sycl_queue == q + assert arr._pointer == mem._pointer + del mem + assert isinstance(arr.__repr__(), str) + + +def test_pyx_capi_make_general(): + q = get_queue_or_skip() + usm_ndarray = dpt.empty(tuple(), dtype="i4", sycl_queue=q) + make_from_ptr = _pyx_capi_fnptr_to_callable( + usm_ndarray, + "UsmNDArray_MakeFromPtr", + b"PyObject *(int, Py_ssize_t const *, int, Py_ssize_t const *, " + b"DPCTLSyclUSMRef, DPCTLSyclQueueRef, Py_ssize_t, PyObject *)", + fn_restype=ctypes.py_object, + fn_argtypes=( + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_ssize_t, + ctypes.py_object, + ), + ) + # Create array to view into diagonal of a matrix + n = 5 + mat = dpt.reshape( + dpt.arange(n * n, dtype="i4", sycl_queue=q), + ( + n, + n, + ), + ) + c_shape = (ctypes.c_ssize_t * 1)( + n, + ) + c_strides = (ctypes.c_ssize_t * 1)( + n + 1, + ) + diag = make_from_ptr( + ctypes.c_int(1), + c_shape, + ctypes.c_int(mat.dtype.num), + c_strides, + mat._pointer, + mat.sycl_queue.addressof_ref(), + ctypes.c_ssize_t(0), + mat, + ) + assert isinstance(diag, dpt.usm_ndarray) + assert diag.shape == (n,) + assert diag.strides == (n + 1,) + assert diag.dtype == mat.dtype + assert diag.sycl_queue == q + assert diag._pointer == mat._pointer + del mat + assert isinstance(diag.__repr__(), str) + # create 0d scalar + mat = dpt.reshape( + dpt.arange(n * n, dtype="i4", sycl_queue=q), + ( + n, + n, + ), + ) + sc = make_from_ptr( + ctypes.c_int(0), + None, # NULL pointer + ctypes.c_int(mat.dtype.num), + None, # NULL pointer + mat._pointer, + mat.sycl_queue.addressof_ref(), + ctypes.c_ssize_t(0), + mat, + ) + assert isinstance(sc, dpt.usm_ndarray) + assert sc.shape == tuple() + assert sc.dtype == mat.dtype + assert sc.sycl_queue == q + assert sc._pointer == mat._pointer + c_shape = (ctypes.c_ssize_t * 2)(0, n) + c_strides = (ctypes.c_ssize_t * 2)(0, 1) + zd_arr = make_from_ptr( + ctypes.c_int(2), + c_shape, + ctypes.c_int(mat.dtype.num), + c_strides, + mat._pointer, + mat.sycl_queue.addressof_ref(), + ctypes.c_ssize_t(0), + mat, + ) + assert isinstance(zd_arr, dpt.usm_ndarray) + assert zd_arr.shape == ( + 0, + n, + ) + assert zd_arr.strides == ( + 0, + 1, + ) + assert zd_arr.dtype == mat.dtype + assert zd_arr.sycl_queue == q + assert zd_arr._pointer == mat._pointer + + +def test_pyx_capi_make_fns_invalid_typenum(): + q = get_queue_or_skip() + usm_ndarray = dpt.empty(tuple(), dtype="i4", sycl_queue=q) + + make_simple_from_ptr = _pyx_capi_fnptr_to_callable( + usm_ndarray, + "UsmNDArray_MakeSimpleFromPtr", + b"PyObject *(size_t, int, DPCTLSyclUSMRef, " + b"DPCTLSyclQueueRef, PyObject *)", + fn_restype=ctypes.py_object, + fn_argtypes=( + ctypes.c_size_t, + ctypes.c_int, + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.py_object, + ), + ) + + nelems = 10 + dtype = dpt.int64 + arr = dpt.arange(nelems, dtype=dtype, sycl_queue=q) + + with pytest.raises(ValueError): + make_simple_from_ptr( + ctypes.c_size_t(nelems), + -1, + arr._pointer, + arr.sycl_queue.addressof_ref(), + arr, + ) + + make_from_ptr = _pyx_capi_fnptr_to_callable( + usm_ndarray, + "UsmNDArray_MakeFromPtr", + b"PyObject *(int, Py_ssize_t const *, int, Py_ssize_t const *, " + b"DPCTLSyclUSMRef, DPCTLSyclQueueRef, Py_ssize_t, PyObject *)", + fn_restype=ctypes.py_object, + fn_argtypes=( + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_ssize_t, + ctypes.py_object, + ), + ) + c_shape = (ctypes.c_ssize_t * 1)( + nelems, + ) + c_strides = (ctypes.c_ssize_t * 1)( + 1, + ) + with pytest.raises(ValueError): + make_from_ptr( + ctypes.c_int(1), + c_shape, + -1, + c_strides, + arr._pointer, + arr.sycl_queue.addressof_ref(), + ctypes.c_ssize_t(0), + arr, + ) + del arr + + +def _pyx_capi_int(X, pyx_capi_name, caps_name=b"int", val_restype=ctypes.c_int): + import sys + + mod = sys.modules[X.__class__.__module__] + cap = mod.__pyx_capi__.get(pyx_capi_name, None) + if cap is None: + raise ValueError( + "__pyx_capi__ does not export {} capsule".format(pyx_capi_name) + ) + # construct Python callable to invoke these functions + cap_ptr_fn = ctypes.pythonapi.PyCapsule_GetPointer + cap_ptr_fn.restype = ctypes.c_void_p + cap_ptr_fn.argtypes = [ctypes.py_object, ctypes.c_char_p] + cap_ptr = cap_ptr_fn(cap, caps_name) + val_ptr = ctypes.cast(cap_ptr, ctypes.POINTER(val_restype)) + return val_ptr.contents.value + + +def test_pyx_capi_check_constants(): + try: + X = dpt.usm_ndarray(17, dtype="i1")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + cc_flag = _pyx_capi_int(X, "USM_ARRAY_C_CONTIGUOUS") + assert cc_flag > 0 and 0 == (cc_flag & (cc_flag - 1)) + fc_flag = _pyx_capi_int(X, "USM_ARRAY_F_CONTIGUOUS") + assert fc_flag > 0 and 0 == (fc_flag & (fc_flag - 1)) + w_flag = _pyx_capi_int(X, "USM_ARRAY_WRITABLE") + assert w_flag > 0 and 0 == (w_flag & (w_flag - 1)) + + bool_typenum = _pyx_capi_int(X, "UAR_BOOL") + assert bool_typenum == dpt.dtype("bool_").num + + byte_typenum = _pyx_capi_int(X, "UAR_BYTE") + assert byte_typenum == dpt.dtype(np.byte).num + ubyte_typenum = _pyx_capi_int(X, "UAR_UBYTE") + assert ubyte_typenum == dpt.dtype(np.ubyte).num + + short_typenum = _pyx_capi_int(X, "UAR_SHORT") + assert short_typenum == dpt.dtype(np.short).num + ushort_typenum = _pyx_capi_int(X, "UAR_USHORT") + assert ushort_typenum == dpt.dtype(np.ushort).num + + int_typenum = _pyx_capi_int(X, "UAR_INT") + assert int_typenum == dpt.dtype(np.intc).num + uint_typenum = _pyx_capi_int(X, "UAR_UINT") + assert uint_typenum == dpt.dtype(np.uintc).num + + long_typenum = _pyx_capi_int(X, "UAR_LONG") + assert long_typenum == dpt.dtype("l").num + ulong_typenum = _pyx_capi_int(X, "UAR_ULONG") + assert ulong_typenum == dpt.dtype("L").num + + longlong_typenum = _pyx_capi_int(X, "UAR_LONGLONG") + assert longlong_typenum == dpt.dtype(np.longlong).num + ulonglong_typenum = _pyx_capi_int(X, "UAR_ULONGLONG") + assert ulonglong_typenum == dpt.dtype(np.ulonglong).num + + half_typenum = _pyx_capi_int(X, "UAR_HALF") + assert half_typenum == dpt.dtype(np.half).num + float_typenum = _pyx_capi_int(X, "UAR_FLOAT") + assert float_typenum == dpt.dtype(np.single).num + double_typenum = _pyx_capi_int(X, "UAR_DOUBLE") + assert double_typenum == dpt.dtype(np.double).num + + cfloat_typenum = _pyx_capi_int(X, "UAR_CFLOAT") + assert cfloat_typenum == dpt.dtype(np.csingle).num + cdouble_typenum = _pyx_capi_int(X, "UAR_CDOUBLE") + assert cdouble_typenum == dpt.dtype(np.cdouble).num + + @pytest.mark.parametrize("dt1", _all_dtypes) @pytest.mark.parametrize("dt2", _all_dtypes) def test_usm_ndarray_from_zero_sized_usm_ndarray(dt1, dt2): From 15664be0e572938b80071aa418c2ff6b75d611aa Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 21 Apr 2026 08:36:05 -0700 Subject: [PATCH 03/41] use () instead of tuple() in tests --- dpnp/tests/tensor/test_usm_ndarray_ctor.py | 42 +++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/dpnp/tests/tensor/test_usm_ndarray_ctor.py b/dpnp/tests/tensor/test_usm_ndarray_ctor.py index 1d908a09c89..39ee17ed46d 100644 --- a/dpnp/tests/tensor/test_usm_ndarray_ctor.py +++ b/dpnp/tests/tensor/test_usm_ndarray_ctor.py @@ -384,7 +384,7 @@ def test_pyx_capi_make_from_memory(): c_tuple = (ctypes.c_ssize_t * 2)(n0, n1) mem = dpm.MemoryUSMShared(n0 * n1 * 4, queue=q) typenum = dpt.dtype("single").num - any_usm_ndarray = dpt.empty(tuple(), dtype="i4", sycl_queue=q) + any_usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) make_from_memory_fn = _pyx_capi_fnptr_to_callable( any_usm_ndarray, "UsmNDArray_MakeSimpleFromMemory", @@ -455,7 +455,7 @@ def test_pyx_capi_set_writable_flag(): def test_pyx_capi_make_from_ptr(): q = get_queue_or_skip() - usm_ndarray = dpt.empty(tuple(), dtype="i4", sycl_queue=q) + usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) make_from_ptr = _pyx_capi_fnptr_to_callable( usm_ndarray, "UsmNDArray_MakeSimpleFromPtr", @@ -491,7 +491,7 @@ def test_pyx_capi_make_from_ptr(): def test_pyx_capi_make_general(): q = get_queue_or_skip() - usm_ndarray = dpt.empty(tuple(), dtype="i4", sycl_queue=q) + usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) make_from_ptr = _pyx_capi_fnptr_to_callable( usm_ndarray, "UsmNDArray_MakeFromPtr", @@ -561,7 +561,7 @@ def test_pyx_capi_make_general(): mat, ) assert isinstance(sc, dpt.usm_ndarray) - assert sc.shape == tuple() + assert sc.shape == () assert sc.dtype == mat.dtype assert sc.sycl_queue == q assert sc._pointer == mat._pointer @@ -593,7 +593,7 @@ def test_pyx_capi_make_general(): def test_pyx_capi_make_fns_invalid_typenum(): q = get_queue_or_skip() - usm_ndarray = dpt.empty(tuple(), dtype="i4", sycl_queue=q) + usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) make_simple_from_ptr = _pyx_capi_fnptr_to_callable( usm_ndarray, @@ -846,7 +846,7 @@ def test_properties(dt): V.mT -@pytest.mark.parametrize("shape", [tuple(), (1,), (1, 1), (1, 1, 1)]) +@pytest.mark.parametrize("shape", [(), (1,), (1, 1), (1, 1, 1)]) @pytest.mark.parametrize("dtype", ["|b1", "|u2", "|f4", "|i8"]) class TestCopyScalar: @pytest.mark.parametrize("func", [bool, float, int, complex]) @@ -909,7 +909,7 @@ def test_index_noninteger(): @pytest.mark.parametrize( "ind", [ - tuple(), + (), (None,), ( None, @@ -1219,7 +1219,7 @@ def test_pyx_capi_check_constants(): @pytest.mark.parametrize( - "shape", [tuple(), (1,), (5,), (2, 3), (2, 3, 4), (2, 2, 2, 2, 2)] + "shape", [(), (1,), (5,), (2, 3), (2, 3, 4), (2, 2, 2, 2, 2)] ) @pytest.mark.parametrize( "dtype", @@ -1339,7 +1339,7 @@ def test_setitem_broadcasting_empty_dst_edge_case(): broadasting rule, hence no exception""" get_queue_or_skip() dst = dpt.ones(1, dtype="i8")[0:0] - src = dpt.ones(tuple(), dtype="i8") + src = dpt.ones((), dtype="i8") dst[...] = src @@ -1511,7 +1511,7 @@ def test_len(): assert len(X) == 1 X = dpt.usm_ndarray((2, 1), "i4") assert len(X) == 2 - X = dpt.usm_ndarray(tuple(), "i4") + X = dpt.usm_ndarray((), "i4") with pytest.raises(TypeError): len(X) @@ -1901,19 +1901,19 @@ def test_full_cmplx128(): dtype = "c16" skip_if_dtype_not_supported(dtype, q) fill_v = 1 + 1j - X = dpt.full(tuple(), fill_value=fill_v, dtype=dtype, sycl_queue=q) + X = dpt.full((), fill_value=fill_v, dtype=dtype, sycl_queue=q) assert np.array_equal( - dpt.asnumpy(X), np.full(tuple(), fill_value=fill_v, dtype=dtype) + dpt.asnumpy(X), np.full((), fill_value=fill_v, dtype=dtype) ) fill_v = 0 + 1j - X = dpt.full(tuple(), fill_value=fill_v, dtype=dtype, sycl_queue=q) + X = dpt.full((), fill_value=fill_v, dtype=dtype, sycl_queue=q) assert np.array_equal( - dpt.asnumpy(X), np.full(tuple(), fill_value=fill_v, dtype=dtype) + dpt.asnumpy(X), np.full((), fill_value=fill_v, dtype=dtype) ) fill_v = 0 + 0j - X = dpt.full(tuple(), fill_value=fill_v, dtype=dtype, sycl_queue=q) + X = dpt.full((), fill_value=fill_v, dtype=dtype, sycl_queue=q) assert np.array_equal( - dpt.asnumpy(X), np.full(tuple(), fill_value=fill_v, dtype=dtype) + dpt.asnumpy(X), np.full((), fill_value=fill_v, dtype=dtype) ) @@ -2188,7 +2188,7 @@ def test_empty_like(dt, usm_kind): assert X.usm_type == Y.usm_type assert X.sycl_queue == Y.sycl_queue - X = dpt.empty(tuple(), dtype=dt, usm_type=usm_kind, sycl_queue=q) + X = dpt.empty((), dtype=dt, usm_type=usm_kind, sycl_queue=q) Y = dpt.empty_like(X) assert X.shape == Y.shape assert X.dtype == Y.dtype @@ -2228,7 +2228,7 @@ def test_zeros_like(dt, usm_kind): assert X.sycl_queue == Y.sycl_queue assert np.allclose(dpt.asnumpy(Y), np.zeros(X.shape, dtype=X.dtype)) - X = dpt.empty(tuple(), dtype=dt, usm_type=usm_kind, sycl_queue=q) + X = dpt.empty((), dtype=dt, usm_type=usm_kind, sycl_queue=q) Y = dpt.zeros_like(X) assert X.shape == Y.shape assert X.dtype == Y.dtype @@ -2261,7 +2261,7 @@ def test_ones_like(dt, usm_kind): assert X.sycl_queue == Y.sycl_queue assert np.allclose(dpt.asnumpy(Y), np.ones(X.shape, dtype=X.dtype)) - X = dpt.empty(tuple(), dtype=dt, usm_type=usm_kind, sycl_queue=q) + X = dpt.empty((), dtype=dt, usm_type=usm_kind, sycl_queue=q) Y = dpt.ones_like(X) assert X.shape == Y.shape assert X.dtype == Y.dtype @@ -2295,7 +2295,7 @@ def test_full_like(dt, usm_kind): assert X.sycl_queue == Y.sycl_queue assert np.allclose(dpt.asnumpy(Y), np.ones(X.shape, dtype=X.dtype)) - X = dpt.empty(tuple(), dtype=dt, usm_type=usm_kind, sycl_queue=q) + X = dpt.empty((), dtype=dt, usm_type=usm_kind, sycl_queue=q) Y = dpt.full_like(X, fill_v) assert X.shape == Y.shape assert X.dtype == Y.dtype @@ -2551,7 +2551,7 @@ def test_common_arg_validation(): def test_flags(): try: - x = dpt.empty(tuple(), dtype="i4") + x = dpt.empty((), dtype="i4") except dpctl.SyclDeviceCreationError: pytest.skip("No SYCL devices available") f = x.flags From 5618bb114d68adea98e771ecf02b44987481eb70 Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Mon, 27 Apr 2026 13:02:59 +0200 Subject: [PATCH 04/41] Fix RNG crash on Windows (#2871) This PR adds a Windows workaround for a crash on Battlemage during test runs. In oneMKL 2026.0 `mkl_rng::mt19937` destructor may crash with `STATUS_STACK_BUFFER_OVERRUN` during `DLL_PROCESS_DETACH` when using the Level Zero backend and static storage inside a DLL. The workaround heap-allocates `backend_sycl` singleton on Windows to avoid running its destructor at shutdown. Although `~backend_sycl()` is empty, it triggers destruction of its members (including `mkl_rng::mt19937`) which causes the crash. Since `backend_sycl` is a process-lifetime singleton, skipping its destruction is safe because OS reclaims all resources at process exit. --- dpnp/backend/src/queue_sycl.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dpnp/backend/src/queue_sycl.hpp b/dpnp/backend/src/queue_sycl.hpp index 6100a03c872..3c41401ad62 100644 --- a/dpnp/backend/src/queue_sycl.hpp +++ b/dpnp/backend/src/queue_sycl.hpp @@ -67,8 +67,17 @@ class backend_sycl static backend_sycl &get() { +#if defined(_WIN32) && INTEL_MKL_VERSION == 20260000 + // TODO: remove once MKLD-19835 is resolved + // mt19937 (oneMKL 2026.0) destructor crashes during DLL_PROCESS_DETACH + // on Windows (Battlemage/Level Zero). Use a heap-allocated + // process-lifetime singleton to skip destructor; OS reclaims memory. + static backend_sycl *backend = new backend_sycl{}; + return *backend; +#else static backend_sycl backend{}; return backend; +#endif } static sycl::queue &get_queue() From 2bdf8d9476b1a531b8e2e7d915c726908cfd0df8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 Apr 2026 15:05:03 +0200 Subject: [PATCH 05/41] Bump nick-fields/retry from 3.0.2 to 4.0.0 (#2874) Bumps [nick-fields/retry](https://github.com/nick-fields/retry) from 3.0.2 to 4.0.0. --- .github/workflows/check-onemath.yaml | 4 ++-- .github/workflows/conda-package.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/check-onemath.yaml b/.github/workflows/check-onemath.yaml index 3ad8ba1ee84..3dc6221e469 100644 --- a/.github/workflows/check-onemath.yaml +++ b/.github/workflows/check-onemath.yaml @@ -165,7 +165,7 @@ jobs: - name: ReRun tensor tests on Linux if: env.rerun-tests-on-failure == 'true' id: run_tensor_tests - uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2 + uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0 with: timeout_minutes: ${{ env.rerun-tests-timeout }} max_attempts: ${{ env.rerun-tests-max-attempts }} @@ -297,7 +297,7 @@ jobs: - name: ReRun tensor tests on Linux if: env.rerun-tests-on-failure == 'true' id: run_tensor_tests_branch - uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2 + uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0 with: timeout_minutes: ${{ env.rerun-tests-timeout }} max_attempts: ${{ env.rerun-tests-max-attempts }} diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 77e8c61ff35..bc5fe5f01a8 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -257,7 +257,7 @@ jobs: - name: Run tensor tests if: env.rerun-tests-on-failure == 'true' id: run_tests_tensor_linux - uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2 + uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0 with: timeout_minutes: ${{ env.rerun-tests-timeout }} max_attempts: ${{ env.rerun-tests-max-attempts }} @@ -439,7 +439,7 @@ jobs: - name: Run tensor tests if: env.rerun-tests-on-failure == 'true' id: run_tests_tensor_win - uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2 + uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0 with: timeout_minutes: ${{ env.rerun-tests-timeout }} max_attempts: ${{ env.rerun-tests-max-attempts }} From 98c35223c9658a11453ee2b3ad9f650b9fc55692 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 29 Apr 2026 14:55:49 +0200 Subject: [PATCH 06/41] Bump mshick/add-pr-comment from 3.10.0 to 3.11.0 (#2876) Bumps [mshick/add-pr-comment](https://github.com/mshick/add-pr-comment) from 3.10.0 to 3.11.0. --- .github/workflows/build-sphinx.yml | 4 ++-- .github/workflows/conda-package.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 05f5b2711e2..769fea3ed7b 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -224,7 +224,7 @@ jobs: if: env.GH_EVENT_OPEN_PR_UPSTREAM == 'true' env: PR_NUM: ${{ github.event.number }} - uses: mshick/add-pr-comment@64b8e914979889d746c99dea15a76e77ef64580a # v3.10.0.8.3.10.0 + uses: mshick/add-pr-comment@8e4927817251f1ff60c001f04568532b38e0b4a0 # v3.11.0.8.3.11.0 with: message-id: url_to_docs message: | @@ -268,7 +268,7 @@ jobs: git push tokened_docs gh-pages - name: Modify the comment with URL to official documentation - uses: mshick/add-pr-comment@64b8e914979889d746c99dea15a76e77ef64580a # v3.10.0.8.3.10.0 + uses: mshick/add-pr-comment@8e4927817251f1ff60c001f04568532b38e0b4a0 # v3.11.0.8.3.11.0 with: message-id: url_to_docs find: | diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index bc5fe5f01a8..19194bc7f51 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -696,7 +696,7 @@ jobs: - name: Post result to PR if: ${{ github.event.pull_request && !github.event.pull_request.head.repo.fork }} - uses: mshick/add-pr-comment@64b8e914979889d746c99dea15a76e77ef64580a # v3.10.0.8.3.10.0 + uses: mshick/add-pr-comment@8e4927817251f1ff60c001f04568532b38e0b4a0 # v3.11.0.8.3.11.0 with: message-id: array_api_results message: | From cd198bc8d8711fbb0e0a802ab541969b8b9ac207 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 29 Apr 2026 18:11:11 +0200 Subject: [PATCH 07/41] Bump conda-incubator/setup-miniconda from 3.3.0 to 4.0.1 (#2875) Bumps [conda-incubator/setup-miniconda](https://github.com/conda-incubator/setup-miniconda) from 3.3.0 to 4.0.1. --- .github/workflows/build-sphinx.yml | 4 ++-- .github/workflows/check-onemath.yaml | 8 ++++---- .github/workflows/conda-package.yml | 24 ++++++++++++------------ .github/workflows/cron-run-tests.yaml | 4 ++-- .github/workflows/generate_coverage.yaml | 4 ++-- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 769fea3ed7b..9d67c9c66c9 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -119,7 +119,7 @@ jobs: - name: Setup miniconda id: setup_miniconda continue-on-error: true - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -129,7 +129,7 @@ jobs: - name: ReSetup miniconda if: steps.setup_miniconda.outcome == 'failure' - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' diff --git a/.github/workflows/check-onemath.yaml b/.github/workflows/check-onemath.yaml index 3dc6221e469..5936834dca1 100644 --- a/.github/workflows/check-onemath.yaml +++ b/.github/workflows/check-onemath.yaml @@ -95,7 +95,7 @@ jobs: - name: Setup miniconda id: setup_miniconda continue-on-error: true - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -106,7 +106,7 @@ jobs: - name: ReSetup miniconda if: steps.setup_miniconda.outcome == 'failure' - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -227,7 +227,7 @@ jobs: - name: Setup miniconda id: setup_miniconda continue-on-error: true - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -238,7 +238,7 @@ jobs: - name: ReSetup miniconda if: steps.setup_miniconda.outcome == 'failure' - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 19194bc7f51..f0c85d8c3f0 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -61,7 +61,7 @@ jobs: - name: Setup miniconda id: setup_miniconda continue-on-error: true - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -71,7 +71,7 @@ jobs: - name: ReSetup miniconda if: steps.setup_miniconda.outcome == 'failure' - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -159,7 +159,7 @@ jobs: - name: Setup miniconda id: setup_miniconda continue-on-error: true - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -169,7 +169,7 @@ jobs: - name: ReSetup miniconda if: steps.setup_miniconda.outcome == 'failure' - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -321,7 +321,7 @@ jobs: - name: Setup miniconda id: setup_miniconda continue-on-error: true - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -331,7 +331,7 @@ jobs: - name: ReSetup miniconda if: steps.setup_miniconda.outcome == 'failure' - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -493,7 +493,7 @@ jobs: - name: Setup miniconda id: setup_miniconda continue-on-error: true - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -503,7 +503,7 @@ jobs: - name: ReSetup miniconda if: steps.setup_miniconda.outcome == 'failure' - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -578,7 +578,7 @@ jobs: - name: Setup miniconda id: setup_miniconda continue-on-error: true - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -588,7 +588,7 @@ jobs: - name: ReSetup miniconda if: steps.setup_miniconda.outcome == 'failure' - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -727,7 +727,7 @@ jobs: - name: Setup miniconda id: setup_miniconda continue-on-error: true - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -737,7 +737,7 @@ jobs: - name: ReSetup miniconda if: steps.setup_miniconda.outcome == 'failure' - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' diff --git a/.github/workflows/cron-run-tests.yaml b/.github/workflows/cron-run-tests.yaml index 5b3a6452401..e2b2768967f 100644 --- a/.github/workflows/cron-run-tests.yaml +++ b/.github/workflows/cron-run-tests.yaml @@ -61,7 +61,7 @@ jobs: - name: Setup miniconda id: setup_miniconda continue-on-error: true - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -72,7 +72,7 @@ jobs: - name: ReSetup miniconda if: steps.setup_miniconda.outcome == 'failure' - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index 3d5d34531ad..8aa5787dab6 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -78,7 +78,7 @@ jobs: - name: Setup miniconda id: setup_miniconda continue-on-error: true - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' @@ -88,7 +88,7 @@ jobs: - name: ReSetup miniconda if: steps.setup_miniconda.outcome == 'failure' - uses: conda-incubator/setup-miniconda@fc2d68f6413eb2d87b895e92f8584b5b94a10167 # v3.3.0 + uses: conda-incubator/setup-miniconda@8ee1f361103df19b6f8c8655fd3967a8ecb162d5 # v4.0.1 with: miniforge-version: latest use-mamba: 'true' From 12551b6ac7eac8d144611f316f6d99de926ff1be Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 30 Apr 2026 15:52:25 +0200 Subject: [PATCH 08/41] Weekly pre-commit autoupdate (#2873) This PR updates the `.pre-commit-config.yaml` using `pre-commit autoupdate`. --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 76163657df3..eb0df88102d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -69,7 +69,7 @@ repos: - id: black exclude: "dpnp/_version.py" - repo: https://github.com/pycqa/isort - rev: 8.0.1 + rev: 9.0.0a3 hooks: - id: isort name: isort (python) @@ -89,7 +89,7 @@ repos: - flake8-docstrings==1.7.0 - flake8-bugbear==24.12.12 - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v22.1.3 + rev: v22.1.4 hooks: - id: clang-format args: ["-i"] From 39656ca64b2f7b77db0a34c06c9a01a96de87de2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 4 May 2026 08:10:55 +0200 Subject: [PATCH 09/41] Bump github/codeql-action from 4.35.2 to 4.35.3 (#2886) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.35.2 to 4.35.3. --- .github/workflows/openssf-scorecard.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml index 311677f16c4..272c989f7b7 100644 --- a/.github/workflows/openssf-scorecard.yml +++ b/.github/workflows/openssf-scorecard.yml @@ -72,6 +72,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2 + uses: github/codeql-action/upload-sarif@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3 with: sarif_file: results.sarif From cea897e6f91c723def666f1e6bd01e96349091cc Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Mon, 4 May 2026 23:06:33 +0200 Subject: [PATCH 10/41] Fix conda package build in public CI (#2888) This PR proposes to fix CI failures by using `conda-build` directly instead of `conda build` in `conda-packages.yml`. In the current environment `conda-build` is installed and works but `conda` does not register `build` as a subcommand and fails with ``` conda: error: argument COMMAND: invalid choice: 'build' ``` Calling `conda-build` directly uses the same tool and avoids this issue --- .github/workflows/conda-package.yml | 4 ++-- CHANGELOG.md | 2 ++ doc/quick_start_guide.rst | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index f0c85d8c3f0..6d1f2e5d00e 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -91,13 +91,13 @@ jobs: - name: Build conda package id: build_conda_pkg continue-on-error: true - run: conda build --no-test --python ${{ matrix.python }} --numpy 2.0 ${{ env.channels-list }} conda-recipe + run: conda-build --no-test --python ${{ matrix.python }} --numpy 2.0 ${{ env.channels-list }} conda-recipe env: MAX_BUILD_CMPL_MKL_VERSION: '2026.0a0' - name: ReBuild conda package if: steps.build_conda_pkg.outcome == 'failure' - run: conda build --no-test --python ${{ matrix.python }} --numpy 2.0 ${{ env.channels-list }} conda-recipe + run: conda-build --no-test --python ${{ matrix.python }} --numpy 2.0 ${{ env.channels-list }} conda-recipe env: MAX_BUILD_CMPL_MKL_VERSION: '2026.0a0' diff --git a/CHANGELOG.md b/CHANGELOG.md index d6239fd9c1f..aed06813d39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +* Fixed `conda build` command syntax in GitHub workflows and documentation to use `conda-build` [#2888](https://github.com/IntelPython/dpnp/pull/2888) + ### Security diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst index 6226a655c33..b383be91049 100644 --- a/doc/quick_start_guide.rst +++ b/doc/quick_start_guide.rst @@ -74,7 +74,7 @@ And to build dpnp package from the sources: .. code-block:: bash - conda build conda-recipe -c https://software.repos.intel.com/python/conda/ -c conda-forge --override-channels + conda-build conda-recipe -c https://software.repos.intel.com/python/conda/ -c conda-forge --override-channels Finally, to install the result package: From 5e823b11e4cb8e727a1c9ac888d46fcab1c7852f Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Mon, 4 May 2026 16:12:52 -0700 Subject: [PATCH 11/41] Update public CI with 2026.0 release (#2890) The PR updates the conda package workflow in GitHub actions to use the latest 2026.0 oneAPI release. --- .github/workflows/conda-package.yml | 4 ++-- conda-recipe/meta.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 6d1f2e5d00e..3d9a994ead4 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -93,13 +93,13 @@ jobs: continue-on-error: true run: conda-build --no-test --python ${{ matrix.python }} --numpy 2.0 ${{ env.channels-list }} conda-recipe env: - MAX_BUILD_CMPL_MKL_VERSION: '2026.0a0' + MAX_BUILD_CMPL_MKL_VERSION: '2027.0a0' - name: ReBuild conda package if: steps.build_conda_pkg.outcome == 'failure' run: conda-build --no-test --python ${{ matrix.python }} --numpy 2.0 ${{ env.channels-list }} conda-recipe env: - MAX_BUILD_CMPL_MKL_VERSION: '2026.0a0' + MAX_BUILD_CMPL_MKL_VERSION: '2027.0a0' - name: Upload artifact uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 956ff6db013..f15818fdf39 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -1,6 +1,6 @@ {% set max_compiler_and_mkl_version = environ.get("MAX_BUILD_CMPL_MKL_VERSION", "2027.0a0") %} -{% set required_compiler_and_mkl_version = "2025.0" %} -{% set required_dpctl_version = "0.22.0*" %} +{% set required_compiler_and_mkl_version = "2026.0" %} +{% set required_dpctl_version = "0.22.0" %} {% set pyproject = load_file_data('pyproject.toml') %} {% set py_build_deps = pyproject.get('build-system', {}).get('requires', []) %} From da526caa0ca1644236b0b23c8f3efdedec626be7 Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Wed, 6 May 2026 11:41:50 +0200 Subject: [PATCH 12/41] Fix in-place indexing with range/list for 4D arrays (#2872) This PR proposes to fix #2783 by addressing incorrect in-place updates for 4D arrays. `_is_boolean()` in `_slicing.pxi` had a `__bool__` fallback that misclassified `range` objects as boolean scalar indices since `range` exposes `__bool__`. This caused them to insert `newaxis(1)` instead of being treated as integer index arrays. The fallback was removed because all legitimate boolean types are already caught by earlier checks in the same function. `range` and `list` are not part of the Python Array API indexing spec but NumPy accepts them as integer index arrays. Conversion of these types to numpy arrays is added in `_get_unwrapped_index_key()` in `dpnp_array.py` which is the NumPy compatibility layer but keeping the tensor layer Array API compliant. Also added tests for `range` and `list` advanced indexing covering to `test_indexing.py` --- CHANGELOG.md | 1 + dpnp/dpnp_array.py | 41 +++++++++++++++++++++------- dpnp/tensor/_slicing.pxi | 6 ----- dpnp/tests/test_indexing.py | 53 +++++++++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aed06813d39..752bf2ad4b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +* Fixed incorrect in-place advanced indexing for 4D arrays when using `range` or `list` as index keys [#2872](https://github.com/IntelPython/dpnp/pull/2872) * Fixed `conda build` command syntax in GitHub workflows and documentation to use `conda-build` [#2888](https://github.com/IntelPython/dpnp/pull/2888) ### Security diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index 00a1b2d00e5..02cd655fcef 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -38,6 +38,8 @@ import warnings +import numpy + import dpnp import dpnp.tensor as dpt import dpnp.tensor._type_utils as dtu @@ -46,24 +48,45 @@ from .exceptions import AxisError +def _unwrap_index_element(x): + """ + Unwrap a single index element for the tensor indexing layer. + + Converts dpnp arrays to usm_ndarray and array-like objects (range, list) + to numpy arrays with intp dtype for NumPy-compatible advanced indexing. + + """ + + if isinstance(x, dpt.usm_ndarray): + return x + if isinstance(x, dpnp_array): + return x.get_array() + if isinstance(x, range): + return numpy.asarray(x, dtype=numpy.intp) + if isinstance(x, list): + # keep boolean lists as boolean + arr = numpy.asarray(x) + # cast empty lists (float64 in NumPy) to intp + # for correct tensor indexing + if arr.size == 0: + arr = arr.astype(numpy.intp) + return arr + return x + + def _get_unwrapped_index_key(key): """ Get an unwrapped index key. Return a key where each nested instance of DPNP array is unwrapped into - USM ndarray for further processing in DPCTL advanced indexing functions. + USM ndarray, and array-like objects (range, list) are converted to numpy + arrays for further processing in advanced indexing functions. """ if isinstance(key, tuple): - if any(isinstance(x, dpnp_array) for x in key): - # create a new tuple from the input key with unwrapped DPNP arrays - return tuple( - x.get_array() if isinstance(x, dpnp_array) else x for x in key - ) - elif isinstance(key, dpnp_array): - return key.get_array() - return key + return tuple(_unwrap_index_element(x) for x in key) + return _unwrap_index_element(key) # pylint: disable=too-many-public-methods diff --git a/dpnp/tensor/_slicing.pxi b/dpnp/tensor/_slicing.pxi index f387aef8afd..2f22894c4b1 100644 --- a/dpnp/tensor/_slicing.pxi +++ b/dpnp/tensor/_slicing.pxi @@ -104,12 +104,6 @@ cdef bint _is_boolean(object x) except *: return f in "?" else: return False - if callable(getattr(x, "__bool__", None)): - try: - x.__bool__() - except (TypeError, ValueError): - return False - return True return False diff --git a/dpnp/tests/test_indexing.py b/dpnp/tests/test_indexing.py index bfdcf0ed30a..2edc8214f3e 100644 --- a/dpnp/tests/test_indexing.py +++ b/dpnp/tests/test_indexing.py @@ -353,6 +353,59 @@ def test_indexing_array_negative_strides(self): arr[slices] = 10 assert_equal(arr, 10.0, strict=False) + @pytest.mark.parametrize( + "idx", + [ + (range(2), range(2)), + ([0, 1], [0, 1]), + ], + ids=["range", "list"], + ) + def test_array_like_index_getitem(self, idx): + np_a = numpy.arange(36).reshape(2, 2, 3, 3) + dp_a = dpnp.arange(36).reshape(2, 2, 3, 3) + assert_array_equal(dp_a[idx], np_a[idx]) + + @pytest.mark.parametrize( + "idx", + [ + (range(2), range(2)), + ([0, 1], [0, 1]), + ], + ids=["range", "list"], + ) + def test_array_like_index_setitem(self, idx): + np_a = numpy.arange(36).reshape(2, 2, 3, 3) + dp_a = dpnp.arange(36).reshape(2, 2, 3, 3) + np_a[idx] = 0 + dp_a[idx] = 0 + assert_array_equal(dp_a, np_a) + + def test_array_like_index_inplace_add(self): + np_a = numpy.arange(36).reshape(2, 2, 3, 3) + dp_a = dpnp.arange(36).reshape(2, 2, 3, 3) + np_tmp = -numpy.ones((2, 3, 3), dtype=numpy.intp) + dp_tmp = -dpnp.ones((2, 3, 3), dtype=numpy.intp) + + np_a[range(2), range(2)] += 2 * np_tmp + dp_a[range(2), range(2)] += 2 * dp_tmp + assert_array_equal(dp_a, np_a) + + @pytest.mark.parametrize( + "idx", + [ + range(2), + [0, 1], + range(0), + [], + ], + ids=["range", "list", "empty_range", "empty_list"], + ) + def test_array_like_single_index(self, idx): + np_a = numpy.arange(24).reshape(2, 3, 4) + dp_a = dpnp.arange(24).reshape(2, 3, 4) + assert_array_equal(dp_a[idx], np_a[idx]) + class TestIx: @pytest.mark.parametrize( From 35010ee5cbf1c6ac2f62fa71a6289edde63240de Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Wed, 6 May 2026 10:06:55 -0700 Subject: [PATCH 13/41] Temporary disable `test_minimum_nan` for float16 (#2900) This PR adds skip for known IGC driver issue to unblock the internal CI until the driver releases the fix. --- dpnp/tests/helper.py | 3 +-- .../third_party/cupy/math_tests/test_misc.py | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/dpnp/tests/helper.py b/dpnp/tests/helper.py index c0778777ec5..2cb230fe25d 100644 --- a/dpnp/tests/helper.py +++ b/dpnp/tests/helper.py @@ -13,7 +13,6 @@ class LTS_VERSION(Enum): - V1_3 = "1.3" V1_6 = "1.6" @@ -489,7 +488,7 @@ def is_lnl(device=None): return _get_dev_mask(device) == 0x6400 -def is_lts_driver(version=LTS_VERSION.V1_3, device=None): +def is_lts_driver(version=LTS_VERSION.V1_6, device=None): """ Return True if a test is running on a GPU device with LTS driver version, False otherwise. diff --git a/dpnp/tests/third_party/cupy/math_tests/test_misc.py b/dpnp/tests/third_party/cupy/math_tests/test_misc.py index dcc3c4017c6..0098994a3bf 100644 --- a/dpnp/tests/third_party/cupy/math_tests/test_misc.py +++ b/dpnp/tests/third_party/cupy/math_tests/test_misc.py @@ -4,7 +4,12 @@ import pytest import dpnp as cupy -from dpnp.tests.helper import has_support_aspect64, numpy_version +from dpnp.tests.helper import ( + has_support_aspect64, + is_lts_driver, + is_win_platform, + numpy_version, +) from dpnp.tests.third_party.cupy import testing @@ -113,6 +118,14 @@ def check_unary_inf_nan(self, name, xp, dtype): @testing.for_dtypes(["e", "f", "d", "F", "D"]) @testing.numpy_cupy_array_equal() def check_binary_nan(self, name, xp, dtype): + if ( + not is_win_platform() + and not is_lts_driver() + and name == "minimum" + and dtype == numpy.float16 + ): + pytest.skip("GSD-12679") + a = xp.array( [-3, numpy.nan, -1, numpy.nan, 0, numpy.nan, 2], dtype=dtype ) From e266f26e2474ddf345b023ceb1a352dad145dff0 Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Fri, 8 May 2026 02:02:51 +0200 Subject: [PATCH 14/41] Pin Intel OneAPI to 2025.3 in CI workflows for coverage (#2887) This PR proposes using OneAPI 2025.3 for `build-sphinx` and `generate_coverage` workflows to match the compiler version used to build dpctl packages since dpctl is waiting for the latest compiler to become available on conda-forge [dpctl#2300](https://github.com/IntelPython/dpctl/pull/2300/changes#diff-a88e46d7ba5cf89249cb4894a9dbbc93a2f6b8a631a87dc2241c3e8e3acf8eeeL575) --- .github/workflows/generate_coverage.yaml | 26 +++++++++++++++--------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index 8aa5787dab6..c7b38ec306c 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -46,16 +46,18 @@ jobs: echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list sudo apt update - - name: Install latest Intel OneAPI + # Pinned to OneAPI 2025.3 due to known 2026.0 DPC++ compiler issue with Segfault during the sycl-post-link + # TODO: renmove once CMPLRLLVM-75178 is resolved and released + - name: Install Intel OneAPI if: env.oneapi-pkgs-env == '' run: | - sudo apt install hwloc \ - intel-oneapi-mkl \ - intel-oneapi-umf \ - intel-oneapi-mkl-devel \ - intel-oneapi-tbb-devel \ - intel-oneapi-libdpstd-devel \ - intel-oneapi-compiler-dpcpp-cpp + sudo apt install hwloc \ + intel-oneapi-mkl-2025.3 \ + intel-oneapi-umf-1.0 \ + intel-oneapi-mkl-devel-2025.3 \ + intel-oneapi-tbb-devel-2022.3 \ + intel-oneapi-libdpstd-devel-2022.10 \ + intel-oneapi-compiler-dpcpp-cpp-2025.3 - name: Checkout repo uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -107,10 +109,14 @@ jobs: # with a newer version of the DPC++ compiler). # Installing dpctl via the pip manager has no such limitation, as the package has no # run dependency on the DPC++ RT pip package, so this is why the step is necessary here. + # - name: Install dpctl + # if: env.oneapi-pkgs-env == '' + # run: | + # pip install -r ${{ env.dpctl-pkg-txt }} + # TODO: renmove pinning once CMPLRLLVM-75178 is resolved and released - name: Install dpctl - if: env.oneapi-pkgs-env == '' run: | - pip install -r ${{ env.dpctl-pkg-txt }} + pip install dpctl>=0.23.0dev0 --index-url https://pypi.anaconda.org/dppy/label/coverage/simple - name: Conda info run: | From a3cced712ea23de55a87f62d199a70b000d25e77 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Thu, 7 May 2026 21:15:33 -0700 Subject: [PATCH 15/41] Change order for step with installing OCL CPU RT (#2903) The PR fixes the issue in GitHub workflow to scheduled run of the tests. We have to install proper `intel-opencl-rt` package first, since otherwise installing `intel-opencl-rt` next step after installing dpnp might cause reinstall of older dpnp version, since we don't use strict conda channel priority. --- .github/workflows/cron-run-tests.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cron-run-tests.yaml b/.github/workflows/cron-run-tests.yaml index e2b2768967f..7bc8fa57c38 100644 --- a/.github/workflows/cron-run-tests.yaml +++ b/.github/workflows/cron-run-tests.yaml @@ -81,6 +81,9 @@ jobs: python-version: ${{ matrix.python }} activate-environment: ${{ env.test-env-name }} + - name: Install OCL CPU RT from Intel channel + run: mamba install intel-opencl-rt=*=intel_* ${{ env.channels-list }} + - name: Install dpnp id: install_dpnp continue-on-error: true @@ -92,9 +95,6 @@ jobs: run: | mamba install ${{ env.package-name }}=${{ steps.find_latest_tag.outputs.tag }} ${{ env.test-packages }} ${{ env.channels-list }} - - name: Install OCL CPU RT from Intel channel - run: mamba install intel-opencl-rt=*=intel_* ${{ env.channels-list }} - - name: List installed packages run: mamba list From 8c1f79472a8dac09b732909daa655cd56a33cc01 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Sat, 9 May 2026 09:50:17 -0700 Subject: [PATCH 16/41] Remove workarounds to issues with build env for oneMath interface (#2904) This PR partially removes workarounds added in #2646 --- environments/oneapi_pkgs.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/environments/oneapi_pkgs.yml b/environments/oneapi_pkgs.yml index c0f5c024afd..4b086110d69 100644 --- a/environments/oneapi_pkgs.yml +++ b/environments/oneapi_pkgs.yml @@ -2,8 +2,7 @@ name: OneAPI packages to build DPNP without OneAPI env activated channels: - https://software.repos.intel.com/python/conda/ dependencies: - - dpcpp_linux-64>=2025.3 # force to install the latest release - - mkl-devel-dpcpp>=2025.3 # force to install the latest release - - mkl-devel>=2025.3 # w/a to intel_repack-feedstock#121 + - dpcpp_linux-64 + - mkl-devel-dpcpp - onedpl-devel - tbb-devel * intel_* # MKL hardly dependces on TBB from Intel channel From c6b26c8bfcf1699ff9817150bc26edf9d483152e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 10 May 2026 19:17:11 -0700 Subject: [PATCH 17/41] Weekly pre-commit autoupdate (#2906) This PR updates the `.pre-commit-config.yaml` using `pre-commit autoupdate`. --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index eb0df88102d..a189961a59b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -89,7 +89,7 @@ repos: - flake8-docstrings==1.7.0 - flake8-bugbear==24.12.12 - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v22.1.4 + rev: v22.1.5 hooks: - id: clang-format args: ["-i"] @@ -128,7 +128,7 @@ repos: hooks: - id: actionlint - repo: https://github.com/BlankSpruce/gersemi-pre-commit - rev: 0.27.2 + rev: 0.27.4 hooks: - id: gersemi exclude: "dpnp/backend/cmake/Modules/" From 9c2a53ee86862423221ad145642361b385d88de5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 10 May 2026 21:29:19 -0700 Subject: [PATCH 18/41] Bump github/codeql-action from 4.35.3 to 4.35.4 (#2907) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.35.3 to 4.35.4. --- .github/workflows/openssf-scorecard.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml index 272c989f7b7..1e12ad9a358 100644 --- a/.github/workflows/openssf-scorecard.yml +++ b/.github/workflows/openssf-scorecard.yml @@ -72,6 +72,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3 + uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4.35.4 with: sarif_file: results.sarif From 09993c6c17b649ed0c7329d68966c7d47bb7c375 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 11 May 2026 11:03:02 -0700 Subject: [PATCH 19/41] add gh-2866 to changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 752bf2ad4b3..ebd1b51f303 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +* Added C API functions for `tensor.usm_ndarray` setters and getters to avoid ABI breakage if `tensor.usm_ndarray` is modified [gh-2866](https://github.com/IntelPython/dpnp/pull/2866) + ### Changed ### Deprecated From 4a733bfebfc6d70a8357e0e0ec0ab5dc71d02bf5 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 11 May 2026 11:11:33 -0700 Subject: [PATCH 20/41] separate usm_ndarray C-API tests --- dpnp/tests/tensor/test_usm_ndarray_capi.py | 586 +++++++++++++++++++++ dpnp/tests/tensor/test_usm_ndarray_ctor.py | 548 ------------------- 2 files changed, 586 insertions(+), 548 deletions(-) create mode 100644 dpnp/tests/tensor/test_usm_ndarray_capi.py diff --git a/dpnp/tests/tensor/test_usm_ndarray_capi.py b/dpnp/tests/tensor/test_usm_ndarray_capi.py new file mode 100644 index 00000000000..5ceb25fe7ec --- /dev/null +++ b/dpnp/tests/tensor/test_usm_ndarray_capi.py @@ -0,0 +1,586 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import ctypes + +import dpctl +import dpctl.memory as dpm +import numpy as np +import pytest + +import dpnp.tensor as dpt + +from .helper import get_queue_or_skip + + +def _pyx_capi_fnptr_to_callable( + X, + pyx_capi_name, + caps_name, + fn_restype=ctypes.c_void_p, + fn_argtypes=(ctypes.py_object,), +): + import sys + + mod = sys.modules[X.__class__.__module__] + cap = mod.__pyx_capi__.get(pyx_capi_name, None) + if cap is None: + raise ValueError( + "__pyx_capi__ does not export {} capsule".format(pyx_capi_name) + ) + # construct Python callable to invoke these functions + cap_ptr_fn = ctypes.pythonapi.PyCapsule_GetPointer + cap_ptr_fn.restype = ctypes.c_void_p + cap_ptr_fn.argtypes = [ctypes.py_object, ctypes.c_char_p] + fn_ptr = cap_ptr_fn(cap, caps_name) + callable_maker_ptr = ctypes.PYFUNCTYPE(fn_restype, *fn_argtypes) + return callable_maker_ptr(fn_ptr) + + +def test_pyx_capi_get_data(): + try: + X = dpt.usm_ndarray(17, dtype="i8")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_data_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetData", + b"char *(struct PyUSMArrayObject *)", + fn_restype=ctypes.c_void_p, + fn_argtypes=(ctypes.py_object,), + ) + r1 = get_data_fn(X) + sua_iface = X.__sycl_usm_array_interface__ + assert r1 == sua_iface["data"][0] + sua_iface.get("offset") * X.itemsize + + +def test_pyx_capi_get_shape(): + try: + X = dpt.usm_ndarray(17, dtype="u4")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_shape_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetShape", + b"Py_ssize_t *(struct PyUSMArrayObject *)", + fn_restype=ctypes.c_void_p, + fn_argtypes=(ctypes.py_object,), + ) + c_longlong_p = ctypes.POINTER(ctypes.c_longlong) + shape0 = ctypes.cast(get_shape_fn(X), c_longlong_p).contents.value + assert shape0 == X.shape[0] + + +def test_pyx_capi_get_strides(): + try: + X = dpt.usm_ndarray(17, dtype="f4")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_strides_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetStrides", + b"Py_ssize_t *(struct PyUSMArrayObject *)", + fn_restype=ctypes.c_void_p, + fn_argtypes=(ctypes.py_object,), + ) + c_longlong_p = ctypes.POINTER(ctypes.c_longlong) + strides0_p = get_strides_fn(X) + if strides0_p: + strides0_p = ctypes.cast(strides0_p, c_longlong_p).contents + strides0_p = strides0_p.value + assert strides0_p == 0 or strides0_p == X.strides[0] + + +def test_pyx_capi_get_ndim(): + try: + X = dpt.usm_ndarray(17, dtype="?")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_ndim_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetNDim", + b"int (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_int, + fn_argtypes=(ctypes.py_object,), + ) + assert get_ndim_fn(X) == X.ndim + + +def test_pyx_capi_get_typenum(): + try: + X = dpt.usm_ndarray(17, dtype="c8")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_typenum_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetTypenum", + b"int (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_int, + fn_argtypes=(ctypes.py_object,), + ) + typenum = get_typenum_fn(X) + assert type(typenum) is int + assert typenum == X.dtype.num + + +def test_pyx_capi_get_elemsize(): + try: + X = dpt.usm_ndarray(17, dtype="u8")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_elemsize_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetElementSize", + b"int (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_int, + fn_argtypes=(ctypes.py_object,), + ) + itemsize = get_elemsize_fn(X) + assert type(itemsize) is int + assert itemsize == X.itemsize + + +def test_pyx_capi_get_flags(): + try: + X = dpt.usm_ndarray(17, dtype="i8")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_flags_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetFlags", + b"int (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_int, + fn_argtypes=(ctypes.py_object,), + ) + flags = get_flags_fn(X) + assert type(flags) is int and X.flags == flags + + +def test_pyx_capi_get_offset(): + try: + X = dpt.usm_ndarray(17, dtype="u2")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_offset_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetOffset", + b"Py_ssize_t (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_longlong, + fn_argtypes=(ctypes.py_object,), + ) + offset = get_offset_fn(X) + assert type(offset) is int + assert offset == X.__sycl_usm_array_interface__["offset"] + + +def test_pyx_capi_get_usmdata(): + try: + X = dpt.usm_ndarray(17, dtype="u2")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_usmdata_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetUSMData", + b"PyObject *(struct PyUSMArrayObject *)", + fn_restype=ctypes.py_object, + fn_argtypes=(ctypes.py_object,), + ) + capi_usm_data = get_usmdata_fn(X) + assert isinstance(capi_usm_data, dpm._memory._Memory) + assert capi_usm_data.nbytes == X.usm_data.nbytes + assert capi_usm_data._pointer == X.usm_data._pointer + assert capi_usm_data.sycl_queue == X.usm_data.sycl_queue + + +def test_pyx_capi_get_queue_ref(): + try: + X = dpt.usm_ndarray(17, dtype="i2")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + get_queue_ref_fn = _pyx_capi_fnptr_to_callable( + X, + "UsmNDArray_GetQueueRef", + b"DPCTLSyclQueueRef (struct PyUSMArrayObject *)", + fn_restype=ctypes.c_void_p, + fn_argtypes=(ctypes.py_object,), + ) + queue_ref = get_queue_ref_fn(X) # address of a copy, should be unequal + assert queue_ref != X.sycl_queue.addressof_ref() + + +def test_pyx_capi_make_from_memory(): + q = get_queue_or_skip() + n0, n1 = 4, 6 + c_tuple = (ctypes.c_ssize_t * 2)(n0, n1) + mem = dpm.MemoryUSMShared(n0 * n1 * 4, queue=q) + typenum = dpt.dtype("single").num + any_usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) + make_from_memory_fn = _pyx_capi_fnptr_to_callable( + any_usm_ndarray, + "UsmNDArray_MakeSimpleFromMemory", + b"PyObject *(int, Py_ssize_t const *, int, " + b"struct Py_MemoryObject *, Py_ssize_t, char)", + fn_restype=ctypes.py_object, + fn_argtypes=( + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_int, + ctypes.py_object, + ctypes.c_ssize_t, + ctypes.c_char, + ), + ) + r = make_from_memory_fn( + ctypes.c_int(2), + c_tuple, + ctypes.c_int(typenum), + mem, + ctypes.c_ssize_t(0), + ctypes.c_char(b"C"), + ) + assert isinstance(r, dpt.usm_ndarray) + assert r.ndim == 2 + assert r.shape == (n0, n1) + assert r._pointer == mem._pointer + assert r.usm_type == "shared" + assert r.sycl_queue == q + assert r.flags["C"] + r2 = make_from_memory_fn( + ctypes.c_int(2), + c_tuple, + ctypes.c_int(typenum), + mem, + ctypes.c_ssize_t(0), + ctypes.c_char(b"F"), + ) + ptr = mem._pointer + del mem + del r + assert isinstance(r2, dpt.usm_ndarray) + assert r2._pointer == ptr + assert r2.usm_type == "shared" + assert r2.sycl_queue == q + assert r2.flags["F"] + + +def test_pyx_capi_set_writable_flag(): + q = get_queue_or_skip() + usm_ndarray = dpt.empty((4, 5), dtype="i4", sycl_queue=q) + assert isinstance(usm_ndarray, dpt.usm_ndarray) + assert usm_ndarray.flags["WRITABLE"] is True + set_writable = _pyx_capi_fnptr_to_callable( + usm_ndarray, + "UsmNDArray_SetWritableFlag", + b"void (struct PyUSMArrayObject *, int)", + fn_restype=None, + fn_argtypes=(ctypes.py_object, ctypes.c_int), + ) + set_writable(usm_ndarray, ctypes.c_int(0)) + assert isinstance(usm_ndarray, dpt.usm_ndarray) + assert usm_ndarray.flags["WRITABLE"] is False + set_writable(usm_ndarray, ctypes.c_int(1)) + assert isinstance(usm_ndarray, dpt.usm_ndarray) + assert usm_ndarray.flags["WRITABLE"] is True + + +def test_pyx_capi_make_from_ptr(): + q = get_queue_or_skip() + usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) + make_from_ptr = _pyx_capi_fnptr_to_callable( + usm_ndarray, + "UsmNDArray_MakeSimpleFromPtr", + b"PyObject *(size_t, int, DPCTLSyclUSMRef, " + b"DPCTLSyclQueueRef, PyObject *)", + fn_restype=ctypes.py_object, + fn_argtypes=( + ctypes.c_size_t, + ctypes.c_int, + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.py_object, + ), + ) + nelems = 10 + dt = dpt.int64 + mem = dpm.MemoryUSMDevice(nelems * dt.itemsize, queue=q) + arr = make_from_ptr( + ctypes.c_size_t(nelems), + dt.num, + mem._pointer, + mem.sycl_queue.addressof_ref(), + mem, + ) + assert isinstance(arr, dpt.usm_ndarray) + assert arr.shape == (nelems,) + assert arr.dtype == dt + assert arr.sycl_queue == q + assert arr._pointer == mem._pointer + del mem + assert isinstance(arr.__repr__(), str) + + +def test_pyx_capi_make_general(): + q = get_queue_or_skip() + usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) + make_from_ptr = _pyx_capi_fnptr_to_callable( + usm_ndarray, + "UsmNDArray_MakeFromPtr", + b"PyObject *(int, Py_ssize_t const *, int, Py_ssize_t const *, " + b"DPCTLSyclUSMRef, DPCTLSyclQueueRef, Py_ssize_t, PyObject *)", + fn_restype=ctypes.py_object, + fn_argtypes=( + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_ssize_t, + ctypes.py_object, + ), + ) + # Create array to view into diagonal of a matrix + n = 5 + mat = dpt.reshape( + dpt.arange(n * n, dtype="i4", sycl_queue=q), + ( + n, + n, + ), + ) + c_shape = (ctypes.c_ssize_t * 1)( + n, + ) + c_strides = (ctypes.c_ssize_t * 1)( + n + 1, + ) + diag = make_from_ptr( + ctypes.c_int(1), + c_shape, + ctypes.c_int(mat.dtype.num), + c_strides, + mat._pointer, + mat.sycl_queue.addressof_ref(), + ctypes.c_ssize_t(0), + mat, + ) + assert isinstance(diag, dpt.usm_ndarray) + assert diag.shape == (n,) + assert diag.strides == (n + 1,) + assert diag.dtype == mat.dtype + assert diag.sycl_queue == q + assert diag._pointer == mat._pointer + del mat + assert isinstance(diag.__repr__(), str) + # create 0d scalar + mat = dpt.reshape( + dpt.arange(n * n, dtype="i4", sycl_queue=q), + ( + n, + n, + ), + ) + sc = make_from_ptr( + ctypes.c_int(0), + None, # NULL pointer + ctypes.c_int(mat.dtype.num), + None, # NULL pointer + mat._pointer, + mat.sycl_queue.addressof_ref(), + ctypes.c_ssize_t(0), + mat, + ) + assert isinstance(sc, dpt.usm_ndarray) + assert sc.shape == () + assert sc.dtype == mat.dtype + assert sc.sycl_queue == q + assert sc._pointer == mat._pointer + c_shape = (ctypes.c_ssize_t * 2)(0, n) + c_strides = (ctypes.c_ssize_t * 2)(0, 1) + zd_arr = make_from_ptr( + ctypes.c_int(2), + c_shape, + ctypes.c_int(mat.dtype.num), + c_strides, + mat._pointer, + mat.sycl_queue.addressof_ref(), + ctypes.c_ssize_t(0), + mat, + ) + assert isinstance(zd_arr, dpt.usm_ndarray) + assert zd_arr.shape == ( + 0, + n, + ) + assert zd_arr.strides == ( + 0, + 1, + ) + assert zd_arr.dtype == mat.dtype + assert zd_arr.sycl_queue == q + assert zd_arr._pointer == mat._pointer + + +def test_pyx_capi_make_fns_invalid_typenum(): + q = get_queue_or_skip() + usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) + + make_simple_from_ptr = _pyx_capi_fnptr_to_callable( + usm_ndarray, + "UsmNDArray_MakeSimpleFromPtr", + b"PyObject *(size_t, int, DPCTLSyclUSMRef, " + b"DPCTLSyclQueueRef, PyObject *)", + fn_restype=ctypes.py_object, + fn_argtypes=( + ctypes.c_size_t, + ctypes.c_int, + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.py_object, + ), + ) + + nelems = 10 + dtype = dpt.int64 + arr = dpt.arange(nelems, dtype=dtype, sycl_queue=q) + + with pytest.raises(ValueError): + make_simple_from_ptr( + ctypes.c_size_t(nelems), + -1, + arr._pointer, + arr.sycl_queue.addressof_ref(), + arr, + ) + + make_from_ptr = _pyx_capi_fnptr_to_callable( + usm_ndarray, + "UsmNDArray_MakeFromPtr", + b"PyObject *(int, Py_ssize_t const *, int, Py_ssize_t const *, " + b"DPCTLSyclUSMRef, DPCTLSyclQueueRef, Py_ssize_t, PyObject *)", + fn_restype=ctypes.py_object, + fn_argtypes=( + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_int, + ctypes.POINTER(ctypes.c_ssize_t), + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_ssize_t, + ctypes.py_object, + ), + ) + c_shape = (ctypes.c_ssize_t * 1)( + nelems, + ) + c_strides = (ctypes.c_ssize_t * 1)( + 1, + ) + with pytest.raises(ValueError): + make_from_ptr( + ctypes.c_int(1), + c_shape, + -1, + c_strides, + arr._pointer, + arr.sycl_queue.addressof_ref(), + ctypes.c_ssize_t(0), + arr, + ) + del arr + + +def _pyx_capi_int(X, pyx_capi_name, caps_name=b"int", val_restype=ctypes.c_int): + import sys + + mod = sys.modules[X.__class__.__module__] + cap = mod.__pyx_capi__.get(pyx_capi_name, None) + if cap is None: + raise ValueError( + "__pyx_capi__ does not export {} capsule".format(pyx_capi_name) + ) + # construct Python callable to invoke these functions + cap_ptr_fn = ctypes.pythonapi.PyCapsule_GetPointer + cap_ptr_fn.restype = ctypes.c_void_p + cap_ptr_fn.argtypes = [ctypes.py_object, ctypes.c_char_p] + cap_ptr = cap_ptr_fn(cap, caps_name) + val_ptr = ctypes.cast(cap_ptr, ctypes.POINTER(val_restype)) + return val_ptr.contents.value + + +def test_pyx_capi_check_constants(): + try: + X = dpt.usm_ndarray(17, dtype="i1")[1::2] + except dpctl.SyclDeviceCreationError: + pytest.skip("No SYCL devices available") + cc_flag = _pyx_capi_int(X, "USM_ARRAY_C_CONTIGUOUS") + assert cc_flag > 0 and 0 == (cc_flag & (cc_flag - 1)) + fc_flag = _pyx_capi_int(X, "USM_ARRAY_F_CONTIGUOUS") + assert fc_flag > 0 and 0 == (fc_flag & (fc_flag - 1)) + w_flag = _pyx_capi_int(X, "USM_ARRAY_WRITABLE") + assert w_flag > 0 and 0 == (w_flag & (w_flag - 1)) + + bool_typenum = _pyx_capi_int(X, "UAR_BOOL") + assert bool_typenum == dpt.dtype("bool_").num + + byte_typenum = _pyx_capi_int(X, "UAR_BYTE") + assert byte_typenum == dpt.dtype(np.byte).num + ubyte_typenum = _pyx_capi_int(X, "UAR_UBYTE") + assert ubyte_typenum == dpt.dtype(np.ubyte).num + + short_typenum = _pyx_capi_int(X, "UAR_SHORT") + assert short_typenum == dpt.dtype(np.short).num + ushort_typenum = _pyx_capi_int(X, "UAR_USHORT") + assert ushort_typenum == dpt.dtype(np.ushort).num + + int_typenum = _pyx_capi_int(X, "UAR_INT") + assert int_typenum == dpt.dtype(np.intc).num + uint_typenum = _pyx_capi_int(X, "UAR_UINT") + assert uint_typenum == dpt.dtype(np.uintc).num + + long_typenum = _pyx_capi_int(X, "UAR_LONG") + assert long_typenum == dpt.dtype("l").num + ulong_typenum = _pyx_capi_int(X, "UAR_ULONG") + assert ulong_typenum == dpt.dtype("L").num + + longlong_typenum = _pyx_capi_int(X, "UAR_LONGLONG") + assert longlong_typenum == dpt.dtype(np.longlong).num + ulonglong_typenum = _pyx_capi_int(X, "UAR_ULONGLONG") + assert ulonglong_typenum == dpt.dtype(np.ulonglong).num + + half_typenum = _pyx_capi_int(X, "UAR_HALF") + assert half_typenum == dpt.dtype(np.half).num + float_typenum = _pyx_capi_int(X, "UAR_FLOAT") + assert float_typenum == dpt.dtype(np.single).num + double_typenum = _pyx_capi_int(X, "UAR_DOUBLE") + assert double_typenum == dpt.dtype(np.double).num + + cfloat_typenum = _pyx_capi_int(X, "UAR_CFLOAT") + assert cfloat_typenum == dpt.dtype(np.csingle).num + cdouble_typenum = _pyx_capi_int(X, "UAR_CDOUBLE") + assert cdouble_typenum == dpt.dtype(np.cdouble).num diff --git a/dpnp/tests/tensor/test_usm_ndarray_ctor.py b/dpnp/tests/tensor/test_usm_ndarray_ctor.py index 39ee17ed46d..e5d3bd2308f 100644 --- a/dpnp/tests/tensor/test_usm_ndarray_ctor.py +++ b/dpnp/tests/tensor/test_usm_ndarray_ctor.py @@ -183,554 +183,6 @@ def test_usm_ndarray_writable_flag_views(): assert not a.imag.flags.writable -def _pyx_capi_fnptr_to_callable( - X, - pyx_capi_name, - caps_name, - fn_restype=ctypes.c_void_p, - fn_argtypes=(ctypes.py_object,), -): - import sys - - mod = sys.modules[X.__class__.__module__] - cap = mod.__pyx_capi__.get(pyx_capi_name, None) - if cap is None: - raise ValueError( - "__pyx_capi__ does not export {} capsule".format(pyx_capi_name) - ) - # construct Python callable to invoke these functions - cap_ptr_fn = ctypes.pythonapi.PyCapsule_GetPointer - cap_ptr_fn.restype = ctypes.c_void_p - cap_ptr_fn.argtypes = [ctypes.py_object, ctypes.c_char_p] - fn_ptr = cap_ptr_fn(cap, caps_name) - callable_maker_ptr = ctypes.PYFUNCTYPE(fn_restype, *fn_argtypes) - return callable_maker_ptr(fn_ptr) - - -def test_pyx_capi_get_data(): - try: - X = dpt.usm_ndarray(17, dtype="i8")[1::2] - except dpctl.SyclDeviceCreationError: - pytest.skip("No SYCL devices available") - get_data_fn = _pyx_capi_fnptr_to_callable( - X, - "UsmNDArray_GetData", - b"char *(struct PyUSMArrayObject *)", - fn_restype=ctypes.c_void_p, - fn_argtypes=(ctypes.py_object,), - ) - r1 = get_data_fn(X) - sua_iface = X.__sycl_usm_array_interface__ - assert r1 == sua_iface["data"][0] + sua_iface.get("offset") * X.itemsize - - -def test_pyx_capi_get_shape(): - try: - X = dpt.usm_ndarray(17, dtype="u4")[1::2] - except dpctl.SyclDeviceCreationError: - pytest.skip("No SYCL devices available") - get_shape_fn = _pyx_capi_fnptr_to_callable( - X, - "UsmNDArray_GetShape", - b"Py_ssize_t *(struct PyUSMArrayObject *)", - fn_restype=ctypes.c_void_p, - fn_argtypes=(ctypes.py_object,), - ) - c_longlong_p = ctypes.POINTER(ctypes.c_longlong) - shape0 = ctypes.cast(get_shape_fn(X), c_longlong_p).contents.value - assert shape0 == X.shape[0] - - -def test_pyx_capi_get_strides(): - try: - X = dpt.usm_ndarray(17, dtype="f4")[1::2] - except dpctl.SyclDeviceCreationError: - pytest.skip("No SYCL devices available") - get_strides_fn = _pyx_capi_fnptr_to_callable( - X, - "UsmNDArray_GetStrides", - b"Py_ssize_t *(struct PyUSMArrayObject *)", - fn_restype=ctypes.c_void_p, - fn_argtypes=(ctypes.py_object,), - ) - c_longlong_p = ctypes.POINTER(ctypes.c_longlong) - strides0_p = get_strides_fn(X) - if strides0_p: - strides0_p = ctypes.cast(strides0_p, c_longlong_p).contents - strides0_p = strides0_p.value - assert strides0_p == 0 or strides0_p == X.strides[0] - - -def test_pyx_capi_get_ndim(): - try: - X = dpt.usm_ndarray(17, dtype="?")[1::2] - except dpctl.SyclDeviceCreationError: - pytest.skip("No SYCL devices available") - get_ndim_fn = _pyx_capi_fnptr_to_callable( - X, - "UsmNDArray_GetNDim", - b"int (struct PyUSMArrayObject *)", - fn_restype=ctypes.c_int, - fn_argtypes=(ctypes.py_object,), - ) - assert get_ndim_fn(X) == X.ndim - - -def test_pyx_capi_get_typenum(): - try: - X = dpt.usm_ndarray(17, dtype="c8")[1::2] - except dpctl.SyclDeviceCreationError: - pytest.skip("No SYCL devices available") - get_typenum_fn = _pyx_capi_fnptr_to_callable( - X, - "UsmNDArray_GetTypenum", - b"int (struct PyUSMArrayObject *)", - fn_restype=ctypes.c_int, - fn_argtypes=(ctypes.py_object,), - ) - typenum = get_typenum_fn(X) - assert type(typenum) is int - assert typenum == X.dtype.num - - -def test_pyx_capi_get_elemsize(): - try: - X = dpt.usm_ndarray(17, dtype="u8")[1::2] - except dpctl.SyclDeviceCreationError: - pytest.skip("No SYCL devices available") - get_elemsize_fn = _pyx_capi_fnptr_to_callable( - X, - "UsmNDArray_GetElementSize", - b"int (struct PyUSMArrayObject *)", - fn_restype=ctypes.c_int, - fn_argtypes=(ctypes.py_object,), - ) - itemsize = get_elemsize_fn(X) - assert type(itemsize) is int - assert itemsize == X.itemsize - - -def test_pyx_capi_get_flags(): - try: - X = dpt.usm_ndarray(17, dtype="i8")[1::2] - except dpctl.SyclDeviceCreationError: - pytest.skip("No SYCL devices available") - get_flags_fn = _pyx_capi_fnptr_to_callable( - X, - "UsmNDArray_GetFlags", - b"int (struct PyUSMArrayObject *)", - fn_restype=ctypes.c_int, - fn_argtypes=(ctypes.py_object,), - ) - flags = get_flags_fn(X) - assert type(flags) is int and X.flags == flags - - -def test_pyx_capi_get_offset(): - try: - X = dpt.usm_ndarray(17, dtype="u2")[1::2] - except dpctl.SyclDeviceCreationError: - pytest.skip("No SYCL devices available") - get_offset_fn = _pyx_capi_fnptr_to_callable( - X, - "UsmNDArray_GetOffset", - b"Py_ssize_t (struct PyUSMArrayObject *)", - fn_restype=ctypes.c_longlong, - fn_argtypes=(ctypes.py_object,), - ) - offset = get_offset_fn(X) - assert type(offset) is int - assert offset == X.__sycl_usm_array_interface__["offset"] - - -def test_pyx_capi_get_usmdata(): - try: - X = dpt.usm_ndarray(17, dtype="u2")[1::2] - except dpctl.SyclDeviceCreationError: - pytest.skip("No SYCL devices available") - get_usmdata_fn = _pyx_capi_fnptr_to_callable( - X, - "UsmNDArray_GetUSMData", - b"PyObject *(struct PyUSMArrayObject *)", - fn_restype=ctypes.py_object, - fn_argtypes=(ctypes.py_object,), - ) - capi_usm_data = get_usmdata_fn(X) - assert isinstance(capi_usm_data, dpm._memory._Memory) - assert capi_usm_data.nbytes == X.usm_data.nbytes - assert capi_usm_data._pointer == X.usm_data._pointer - assert capi_usm_data.sycl_queue == X.usm_data.sycl_queue - - -def test_pyx_capi_get_queue_ref(): - try: - X = dpt.usm_ndarray(17, dtype="i2")[1::2] - except dpctl.SyclDeviceCreationError: - pytest.skip("No SYCL devices available") - get_queue_ref_fn = _pyx_capi_fnptr_to_callable( - X, - "UsmNDArray_GetQueueRef", - b"DPCTLSyclQueueRef (struct PyUSMArrayObject *)", - fn_restype=ctypes.c_void_p, - fn_argtypes=(ctypes.py_object,), - ) - queue_ref = get_queue_ref_fn(X) # address of a copy, should be unequal - assert queue_ref != X.sycl_queue.addressof_ref() - - -def test_pyx_capi_make_from_memory(): - q = get_queue_or_skip() - n0, n1 = 4, 6 - c_tuple = (ctypes.c_ssize_t * 2)(n0, n1) - mem = dpm.MemoryUSMShared(n0 * n1 * 4, queue=q) - typenum = dpt.dtype("single").num - any_usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) - make_from_memory_fn = _pyx_capi_fnptr_to_callable( - any_usm_ndarray, - "UsmNDArray_MakeSimpleFromMemory", - b"PyObject *(int, Py_ssize_t const *, int, " - b"struct Py_MemoryObject *, Py_ssize_t, char)", - fn_restype=ctypes.py_object, - fn_argtypes=( - ctypes.c_int, - ctypes.POINTER(ctypes.c_ssize_t), - ctypes.c_int, - ctypes.py_object, - ctypes.c_ssize_t, - ctypes.c_char, - ), - ) - r = make_from_memory_fn( - ctypes.c_int(2), - c_tuple, - ctypes.c_int(typenum), - mem, - ctypes.c_ssize_t(0), - ctypes.c_char(b"C"), - ) - assert isinstance(r, dpt.usm_ndarray) - assert r.ndim == 2 - assert r.shape == (n0, n1) - assert r._pointer == mem._pointer - assert r.usm_type == "shared" - assert r.sycl_queue == q - assert r.flags["C"] - r2 = make_from_memory_fn( - ctypes.c_int(2), - c_tuple, - ctypes.c_int(typenum), - mem, - ctypes.c_ssize_t(0), - ctypes.c_char(b"F"), - ) - ptr = mem._pointer - del mem - del r - assert isinstance(r2, dpt.usm_ndarray) - assert r2._pointer == ptr - assert r2.usm_type == "shared" - assert r2.sycl_queue == q - assert r2.flags["F"] - - -def test_pyx_capi_set_writable_flag(): - q = get_queue_or_skip() - usm_ndarray = dpt.empty((4, 5), dtype="i4", sycl_queue=q) - assert isinstance(usm_ndarray, dpt.usm_ndarray) - assert usm_ndarray.flags["WRITABLE"] is True - set_writable = _pyx_capi_fnptr_to_callable( - usm_ndarray, - "UsmNDArray_SetWritableFlag", - b"void (struct PyUSMArrayObject *, int)", - fn_restype=None, - fn_argtypes=(ctypes.py_object, ctypes.c_int), - ) - set_writable(usm_ndarray, ctypes.c_int(0)) - assert isinstance(usm_ndarray, dpt.usm_ndarray) - assert usm_ndarray.flags["WRITABLE"] is False - set_writable(usm_ndarray, ctypes.c_int(1)) - assert isinstance(usm_ndarray, dpt.usm_ndarray) - assert usm_ndarray.flags["WRITABLE"] is True - - -def test_pyx_capi_make_from_ptr(): - q = get_queue_or_skip() - usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) - make_from_ptr = _pyx_capi_fnptr_to_callable( - usm_ndarray, - "UsmNDArray_MakeSimpleFromPtr", - b"PyObject *(size_t, int, DPCTLSyclUSMRef, " - b"DPCTLSyclQueueRef, PyObject *)", - fn_restype=ctypes.py_object, - fn_argtypes=( - ctypes.c_size_t, - ctypes.c_int, - ctypes.c_void_p, - ctypes.c_void_p, - ctypes.py_object, - ), - ) - nelems = 10 - dt = dpt.int64 - mem = dpm.MemoryUSMDevice(nelems * dt.itemsize, queue=q) - arr = make_from_ptr( - ctypes.c_size_t(nelems), - dt.num, - mem._pointer, - mem.sycl_queue.addressof_ref(), - mem, - ) - assert isinstance(arr, dpt.usm_ndarray) - assert arr.shape == (nelems,) - assert arr.dtype == dt - assert arr.sycl_queue == q - assert arr._pointer == mem._pointer - del mem - assert isinstance(arr.__repr__(), str) - - -def test_pyx_capi_make_general(): - q = get_queue_or_skip() - usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) - make_from_ptr = _pyx_capi_fnptr_to_callable( - usm_ndarray, - "UsmNDArray_MakeFromPtr", - b"PyObject *(int, Py_ssize_t const *, int, Py_ssize_t const *, " - b"DPCTLSyclUSMRef, DPCTLSyclQueueRef, Py_ssize_t, PyObject *)", - fn_restype=ctypes.py_object, - fn_argtypes=( - ctypes.c_int, - ctypes.POINTER(ctypes.c_ssize_t), - ctypes.c_int, - ctypes.POINTER(ctypes.c_ssize_t), - ctypes.c_void_p, - ctypes.c_void_p, - ctypes.c_ssize_t, - ctypes.py_object, - ), - ) - # Create array to view into diagonal of a matrix - n = 5 - mat = dpt.reshape( - dpt.arange(n * n, dtype="i4", sycl_queue=q), - ( - n, - n, - ), - ) - c_shape = (ctypes.c_ssize_t * 1)( - n, - ) - c_strides = (ctypes.c_ssize_t * 1)( - n + 1, - ) - diag = make_from_ptr( - ctypes.c_int(1), - c_shape, - ctypes.c_int(mat.dtype.num), - c_strides, - mat._pointer, - mat.sycl_queue.addressof_ref(), - ctypes.c_ssize_t(0), - mat, - ) - assert isinstance(diag, dpt.usm_ndarray) - assert diag.shape == (n,) - assert diag.strides == (n + 1,) - assert diag.dtype == mat.dtype - assert diag.sycl_queue == q - assert diag._pointer == mat._pointer - del mat - assert isinstance(diag.__repr__(), str) - # create 0d scalar - mat = dpt.reshape( - dpt.arange(n * n, dtype="i4", sycl_queue=q), - ( - n, - n, - ), - ) - sc = make_from_ptr( - ctypes.c_int(0), - None, # NULL pointer - ctypes.c_int(mat.dtype.num), - None, # NULL pointer - mat._pointer, - mat.sycl_queue.addressof_ref(), - ctypes.c_ssize_t(0), - mat, - ) - assert isinstance(sc, dpt.usm_ndarray) - assert sc.shape == () - assert sc.dtype == mat.dtype - assert sc.sycl_queue == q - assert sc._pointer == mat._pointer - c_shape = (ctypes.c_ssize_t * 2)(0, n) - c_strides = (ctypes.c_ssize_t * 2)(0, 1) - zd_arr = make_from_ptr( - ctypes.c_int(2), - c_shape, - ctypes.c_int(mat.dtype.num), - c_strides, - mat._pointer, - mat.sycl_queue.addressof_ref(), - ctypes.c_ssize_t(0), - mat, - ) - assert isinstance(zd_arr, dpt.usm_ndarray) - assert zd_arr.shape == ( - 0, - n, - ) - assert zd_arr.strides == ( - 0, - 1, - ) - assert zd_arr.dtype == mat.dtype - assert zd_arr.sycl_queue == q - assert zd_arr._pointer == mat._pointer - - -def test_pyx_capi_make_fns_invalid_typenum(): - q = get_queue_or_skip() - usm_ndarray = dpt.empty((), dtype="i4", sycl_queue=q) - - make_simple_from_ptr = _pyx_capi_fnptr_to_callable( - usm_ndarray, - "UsmNDArray_MakeSimpleFromPtr", - b"PyObject *(size_t, int, DPCTLSyclUSMRef, " - b"DPCTLSyclQueueRef, PyObject *)", - fn_restype=ctypes.py_object, - fn_argtypes=( - ctypes.c_size_t, - ctypes.c_int, - ctypes.c_void_p, - ctypes.c_void_p, - ctypes.py_object, - ), - ) - - nelems = 10 - dtype = dpt.int64 - arr = dpt.arange(nelems, dtype=dtype, sycl_queue=q) - - with pytest.raises(ValueError): - make_simple_from_ptr( - ctypes.c_size_t(nelems), - -1, - arr._pointer, - arr.sycl_queue.addressof_ref(), - arr, - ) - - make_from_ptr = _pyx_capi_fnptr_to_callable( - usm_ndarray, - "UsmNDArray_MakeFromPtr", - b"PyObject *(int, Py_ssize_t const *, int, Py_ssize_t const *, " - b"DPCTLSyclUSMRef, DPCTLSyclQueueRef, Py_ssize_t, PyObject *)", - fn_restype=ctypes.py_object, - fn_argtypes=( - ctypes.c_int, - ctypes.POINTER(ctypes.c_ssize_t), - ctypes.c_int, - ctypes.POINTER(ctypes.c_ssize_t), - ctypes.c_void_p, - ctypes.c_void_p, - ctypes.c_ssize_t, - ctypes.py_object, - ), - ) - c_shape = (ctypes.c_ssize_t * 1)( - nelems, - ) - c_strides = (ctypes.c_ssize_t * 1)( - 1, - ) - with pytest.raises(ValueError): - make_from_ptr( - ctypes.c_int(1), - c_shape, - -1, - c_strides, - arr._pointer, - arr.sycl_queue.addressof_ref(), - ctypes.c_ssize_t(0), - arr, - ) - del arr - - -def _pyx_capi_int(X, pyx_capi_name, caps_name=b"int", val_restype=ctypes.c_int): - import sys - - mod = sys.modules[X.__class__.__module__] - cap = mod.__pyx_capi__.get(pyx_capi_name, None) - if cap is None: - raise ValueError( - "__pyx_capi__ does not export {} capsule".format(pyx_capi_name) - ) - # construct Python callable to invoke these functions - cap_ptr_fn = ctypes.pythonapi.PyCapsule_GetPointer - cap_ptr_fn.restype = ctypes.c_void_p - cap_ptr_fn.argtypes = [ctypes.py_object, ctypes.c_char_p] - cap_ptr = cap_ptr_fn(cap, caps_name) - val_ptr = ctypes.cast(cap_ptr, ctypes.POINTER(val_restype)) - return val_ptr.contents.value - - -def test_pyx_capi_check_constants(): - try: - X = dpt.usm_ndarray(17, dtype="i1")[1::2] - except dpctl.SyclDeviceCreationError: - pytest.skip("No SYCL devices available") - cc_flag = _pyx_capi_int(X, "USM_ARRAY_C_CONTIGUOUS") - assert cc_flag > 0 and 0 == (cc_flag & (cc_flag - 1)) - fc_flag = _pyx_capi_int(X, "USM_ARRAY_F_CONTIGUOUS") - assert fc_flag > 0 and 0 == (fc_flag & (fc_flag - 1)) - w_flag = _pyx_capi_int(X, "USM_ARRAY_WRITABLE") - assert w_flag > 0 and 0 == (w_flag & (w_flag - 1)) - - bool_typenum = _pyx_capi_int(X, "UAR_BOOL") - assert bool_typenum == dpt.dtype("bool_").num - - byte_typenum = _pyx_capi_int(X, "UAR_BYTE") - assert byte_typenum == dpt.dtype(np.byte).num - ubyte_typenum = _pyx_capi_int(X, "UAR_UBYTE") - assert ubyte_typenum == dpt.dtype(np.ubyte).num - - short_typenum = _pyx_capi_int(X, "UAR_SHORT") - assert short_typenum == dpt.dtype(np.short).num - ushort_typenum = _pyx_capi_int(X, "UAR_USHORT") - assert ushort_typenum == dpt.dtype(np.ushort).num - - int_typenum = _pyx_capi_int(X, "UAR_INT") - assert int_typenum == dpt.dtype(np.intc).num - uint_typenum = _pyx_capi_int(X, "UAR_UINT") - assert uint_typenum == dpt.dtype(np.uintc).num - - long_typenum = _pyx_capi_int(X, "UAR_LONG") - assert long_typenum == dpt.dtype("l").num - ulong_typenum = _pyx_capi_int(X, "UAR_ULONG") - assert ulong_typenum == dpt.dtype("L").num - - longlong_typenum = _pyx_capi_int(X, "UAR_LONGLONG") - assert longlong_typenum == dpt.dtype(np.longlong).num - ulonglong_typenum = _pyx_capi_int(X, "UAR_ULONGLONG") - assert ulonglong_typenum == dpt.dtype(np.ulonglong).num - - half_typenum = _pyx_capi_int(X, "UAR_HALF") - assert half_typenum == dpt.dtype(np.half).num - float_typenum = _pyx_capi_int(X, "UAR_FLOAT") - assert float_typenum == dpt.dtype(np.single).num - double_typenum = _pyx_capi_int(X, "UAR_DOUBLE") - assert double_typenum == dpt.dtype(np.double).num - - cfloat_typenum = _pyx_capi_int(X, "UAR_CFLOAT") - assert cfloat_typenum == dpt.dtype(np.csingle).num - cdouble_typenum = _pyx_capi_int(X, "UAR_CDOUBLE") - assert cdouble_typenum == dpt.dtype(np.cdouble).num - - @pytest.mark.parametrize("dt1", _all_dtypes) @pytest.mark.parametrize("dt2", _all_dtypes) def test_usm_ndarray_from_zero_sized_usm_ndarray(dt1, dt2): From a5fa80dcb8e7a3e9eca6347b4cfe340e6456b2f8 Mon Sep 17 00:00:00 2001 From: ndgrigorian <46709016+ndgrigorian@users.noreply.github.com> Date: Tue, 12 May 2026 07:22:16 -0700 Subject: [PATCH 21/41] Update CHANGELOG.md Co-authored-by: vlad-perevezentsev --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ebd1b51f303..14aaf83ebeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -* Added C API functions for `tensor.usm_ndarray` setters and getters to avoid ABI breakage if `tensor.usm_ndarray` is modified [gh-2866](https://github.com/IntelPython/dpnp/pull/2866) +* Added C API functions for `dpnp.tensor.usm_ndarray` setters and getters to avoid ABI breakage if `dpnp.tensor.usm_ndarray` is modified [gh-2866](https://github.com/IntelPython/dpnp/pull/2866) ### Changed From 6ec4ddcc1ab4c8478667b6c4d23ead8309db914b Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Wed, 13 May 2026 09:33:44 -0700 Subject: [PATCH 22/41] Fix `meshgrid` return type (#2854) This PR fixes `dpnp.meshgrid` and `dpnp.tensor.meshgrid` to always return tuple instead of list. Changes `meshgrid()` to consistently return a tuple to match NumPy's behavior (version 2.5+) and align with the newer version 2025.12 of Python array API spec. --- CHANGELOG.md | 1 + dpnp/dpnp_iface_arraycreation.py | 4 ++-- dpnp/tensor/_ctors.py | 10 +++++----- dpnp/tests/tensor/test_usm_ndarray_ctor.py | 3 ++- dpnp/tests/test_arraycreation.py | 10 ++++++++-- .../third_party/cupy/creation_tests/test_ranges.py | 2 +- 6 files changed, 19 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 752bf2ad4b3..935f00f09f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Changed +* Changed `dpnp.meshgrid` and `dpnp.tensor.meshgrid` to return a tuple instead of a list, aligning with NumPy 2.5+ behavior and 2025.12 version of the Python array API standard [#2854](https://github.com/IntelPython/dpnp/pull/2854) ### Deprecated diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index 31be7a8c276..5a245c69037 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -3111,7 +3111,7 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): ) if ndim < 1: - return [] + return () s0 = (1,) * ndim output = [ @@ -3132,7 +3132,7 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): if copy: output = [dpt.copy(x) for x in output] - return [dpnp_array._create_from_usm_ndarray(x) for x in output] + return tuple(dpnp_array._create_from_usm_ndarray(x) for x in output) class MGridClass: diff --git a/dpnp/tensor/_ctors.py b/dpnp/tensor/_ctors.py index b6e28afdc9e..bff2a9537e2 100644 --- a/dpnp/tensor/_ctors.py +++ b/dpnp/tensor/_ctors.py @@ -1441,7 +1441,7 @@ def linspace( def meshgrid(*arrays, indexing="xy"): """ - Creates list of :class:`dpctl.tensor.usm_ndarray` coordinate matrices + Creates tuple of :class:`dpctl.tensor.usm_ndarray` coordinate matrices from vectors. Args: @@ -1456,8 +1456,8 @@ def meshgrid(*arrays, indexing="xy"): keyword has no effect and should be ignored. Default: ``"xy"`` Returns: - List[array]: - list of ``N`` arrays, where ``N`` is the number of + Tuple[array]: + tuple of ``N`` arrays, where ``N`` is the number of provided one-dimensional input arrays. Each returned array must have rank ``N``. For a set of ``n`` vectors with lengths ``N0``, ``N1``, ``N2``, ... @@ -1495,7 +1495,7 @@ def meshgrid(*arrays, indexing="xy"): ) n = len(arrays) if n == 0: - return [] + return () sh = (-1,) + (1,) * (n - 1) @@ -1511,7 +1511,7 @@ def meshgrid(*arrays, indexing="xy"): output = dpt.broadcast_arrays(*res) - return output + return tuple(output) def ones( diff --git a/dpnp/tests/tensor/test_usm_ndarray_ctor.py b/dpnp/tests/tensor/test_usm_ndarray_ctor.py index 70066860b19..59f1d6c28b2 100644 --- a/dpnp/tests/tensor/test_usm_ndarray_ctor.py +++ b/dpnp/tests/tensor/test_usm_ndarray_ctor.py @@ -1924,7 +1924,8 @@ def test_meshgrid(): assert n == len(Znp) for i in range(n): assert np.array_equal(dpt.asnumpy(Z[i]), Znp[i]) - assert dpt.meshgrid() == [] + assert isinstance(Z, tuple) + assert dpt.meshgrid() == () # dimension > 1 must raise ValueError with pytest.raises(ValueError): dpt.meshgrid(dpt.usm_ndarray((4, 4))) diff --git a/dpnp/tests/test_arraycreation.py b/dpnp/tests/test_arraycreation.py index b195c048410..5a9a1c73c6e 100644 --- a/dpnp/tests/test_arraycreation.py +++ b/dpnp/tests/test_arraycreation.py @@ -984,13 +984,19 @@ def test_dpctl_tensor_input(func, args): [[], [[1]], [[1, 2, 3], [4, 5, 6]], [[1, 2], [3, 4], [5, 6]]], ids=["[]", "[[1]]", "[[1, 2, 3], [4, 5, 6]]", "[[1, 2], [3, 4], [5, 6]]"], ) -@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False)) +@pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_float16=False) +) @pytest.mark.parametrize("indexing", ["ij", "xy"]) def test_meshgrid(arrays, dtype, indexing): func = lambda xp, xi: xp.meshgrid(*xi, indexing=indexing) a = tuple(numpy.array(array, dtype=dtype) for array in arrays) ia = tuple(dpnp.array(array, dtype=dtype) for array in arrays) - assert_array_equal(func(numpy, a), func(dpnp, ia)) + + result = func(dpnp, ia) + expected = func(numpy, a) + assert_array_equal(result, expected, strict=True) + assert isinstance(result, tuple) @pytest.mark.parametrize("shape", [(24,), (4, 6), (2, 3, 4), (2, 3, 2, 2)]) diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py b/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py index 69873473e0d..ce716b10dd3 100644 --- a/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py +++ b/dpnp/tests/third_party/cupy/creation_tests/test_ranges.py @@ -344,7 +344,7 @@ def test_meshgrid0(self, dtype): out = cupy.meshgrid( indexing=self.indexing, sparse=self.sparse, copy=self.copy ) - assert out == [] + assert out == () @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() From 53dd6d15f5f25fa56014575f30f53e297b9ae072 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Thu, 14 May 2026 14:55:17 -0700 Subject: [PATCH 23/41] Tune tolerance in matvec test and disable large dim in matmul test (#2912) Few tests are failing with the latest blas libs and numpy: ```bash FAILED test_product.py::TestMatmul::test_strided1[-2-float32] - AssertionError: Not equal to tolerance rtol=1.6e-05, atol=1.6e-05 Mismatched elements: 1 / 10000 (0.01%) Mismatch at index: [6, 5, 4, 8]: 0.13449859619140625 (ACTUAL), 0.13451732695102692 (DESIRED) Max absolute difference among violations: 1.873076e-05 Max relative difference among violations: 0.00013924 ACTUAL: array([[[[ 9.825830e+01, 8.411874e+01, 2.118525e+02, ..., -1.437696e+02, -1.788609e+00, 3.740148e+01], [ 8.804939e+01, -2.840893e+01, -1.463431e+02, ...,... DESIRED: array([[[[ 9.825830e+01, 8.411874e+01, 2.118525e+02, ..., -1.437696e+02, -1.788605e+00, 3.740148e+01], [ 8.804940e+01, -2.840893e+01, -1.463431e+02, ...,... FAILED test_product.py::TestMatvec::test_axes[axes0] - AssertionError: Not equal to tolerance rtol=8e-15, atol=8e-15 Mismatched elements: 1 / 96 (1.04%) Mismatch at index: [0, 0, 0, 1]: 0.36551382063033344 (ACTUAL), 0.36551382063031923 (DESIRED) Max absolute difference among violations: 1.42108547e-14 Max relative difference among violations: 3.88791173e-14 ACTUAL: array([[[[ 17.130334, 0.365514, -16.568001], [ 52.87157 , -75.25945 , -105.961006]], ... DESIRED: array([[[[ 17.130334, 0.365514, -16.568001], [ 52.87157 , -75.25945 , -105.961006]], ... FAILED test_product.py::TestMatvec::test_axes[axes1] - AssertionError: Not equal to tolerance rtol=8e-15, atol=8e-15 Mismatched elements: 1 / 96 (1.04%) Mismatch at index: [0, 0, 0, 1]: 0.36551382063033344 (ACTUAL), 0.36551382063031923 (DESIRED) Max absolute difference among violations: 1.42108547e-14 Max relative difference among violations: 3.88791173e-14 ACTUAL: array([[[[ 17.130334, 0.365514, -16.568001], [ 52.87157 , -75.25945 , -105.961006]], ... DESIRED: array([[[[ 17.130334, 0.365514, -16.568001], [ 52.87157 , -75.25945 , -105.961006]], ``` Disable the tests temporary, once the issue is identified and reported. The issue is only visible when running tests through GH workflow, but passing locally within the same env and with OpenCL:CPU device. --- dpnp/tests/test_product.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py index 3ac324b055e..ebf9b640d8c 100644 --- a/dpnp/tests/test_product.py +++ b/dpnp/tests/test_product.py @@ -18,7 +18,7 @@ from .third_party.cupy import testing # A list of selected dtypes including both integer and float dtypes -# to test differennt backends: OneMath (for float) and dpctl (for integer) +# to test different backends: OneMath (for float) and dpctl (for integer) _selected_dtypes = [numpy.int64, numpy.float32] @@ -878,7 +878,9 @@ def test_order(self, dtype, order1, order2, order, shape1, shape2): ids=["-2", "2", "(-2, 2)", "(2, -2)"], ) def test_strided1(self, dtype, stride): - for dim in [1, 2, 3, 4]: + # TODO: enable back when the root cause is identified + # for dim in [1, 2, 3, 4]: + for dim in [1, 2, 3]: shape = tuple(20 for _ in range(dim)) A = generate_random_numpy_array(shape, dtype) iA = dpnp.array(A) @@ -1533,7 +1535,9 @@ def test_axes(self, axes): result = dpnp.matvec(ia, ib, axes=axes) expected = numpy.matvec(a, b, axes=axes) - assert_dtype_allclose(result, expected) + + # TODO: check if failing with newer NumPy + assert_dtype_allclose(result, expected, factor=40) @pytest.mark.parametrize("xp", [numpy, dpnp]) def test_error(self, xp): From e088f6c038d607cc27fcaf3c4a23a987690f1ca4 Mon Sep 17 00:00:00 2001 From: ndgrigorian <46709016+ndgrigorian@users.noreply.github.com> Date: Thu, 14 May 2026 19:19:51 -0700 Subject: [PATCH 24/41] Update `searchsorted` for array api 2025 spec (#2902) This PR updates searchsorted for the 2025 array API spec, allowing scalars in the second argument --- CHANGELOG.md | 2 + dpnp/dpnp_iface_searching.py | 12 +- dpnp/tensor/_searchsorted.py | 100 ++++++++++------- .../tensor/test_usm_ndarray_searchsorted.py | 104 ++++++++++-------- 4 files changed, 127 insertions(+), 91 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 935f00f09f7..15f22b8bb6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed * Changed `dpnp.meshgrid` and `dpnp.tensor.meshgrid` to return a tuple instead of a list, aligning with NumPy 2.5+ behavior and 2025.12 version of the Python array API standard [#2854](https://github.com/IntelPython/dpnp/pull/2854) +* Updated `searchsorted` implementations to align with the 2025.12 array API spec [gh-2902](https://github.com/IntelPython/dpnp/pull/2902) + ### Deprecated ### Removed diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py index 856fdbc9893..d72130a6f1f 100644 --- a/dpnp/dpnp_iface_searching.py +++ b/dpnp/dpnp_iface_searching.py @@ -373,15 +373,13 @@ def searchsorted(a, v, side="left", sorter=None): """ - usm_a = dpnp.get_usm_ndarray(a) - if dpnp.isscalar(v): - usm_v = dpt.asarray(v, sycl_queue=a.sycl_queue, usm_type=a.usm_type) - else: - usm_v = dpnp.get_usm_ndarray(v) + a = dpnp.get_usm_ndarray(a) + if not dpnp.isscalar(v): + v = dpnp.get_usm_ndarray(v) - usm_sorter = None if sorter is None else dpnp.get_usm_ndarray(sorter) + sorter = None if sorter is None else dpnp.get_usm_ndarray(sorter) return dpnp_array._create_from_usm_ndarray( - dpt.searchsorted(usm_a, usm_v, side=side, sorter=usm_sorter) + dpt.searchsorted(a, v, side=side, sorter=sorter) ) diff --git a/dpnp/tensor/_searchsorted.py b/dpnp/tensor/_searchsorted.py index 4c9b54cb63f..6d3f8846012 100644 --- a/dpnp/tensor/_searchsorted.py +++ b/dpnp/tensor/_searchsorted.py @@ -26,36 +26,41 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** +from typing import Literal -from typing import Literal, Union - -import dpctl import dpctl.utils as du +import dpnp.tensor as dpt + from ._compute_follows_data import ( ExecutionPlacementError, get_coerced_usm_type, get_execution_queue, ) from ._copy_utils import _empty_like_orderK -from ._ctors import empty +from ._ctors import empty_like +from ._scalar_utils import _get_dtype, _get_queue_usm_type, _validate_dtype from ._tensor_impl import _copy_usm_ndarray_into_usm_ndarray as ti_copy from ._tensor_impl import _take as ti_take from ._tensor_impl import ( default_device_index_type as ti_default_device_index_type, ) from ._tensor_sorting_impl import _searchsorted_left, _searchsorted_right -from ._type_utils import isdtype, result_type +from ._type_utils import ( + _resolve_weak_types_all_py_ints, + _to_device_supported_dtype, + isdtype, +) from ._usmarray import usm_ndarray def searchsorted( x1: usm_ndarray, - x2: usm_ndarray, + x2: usm_ndarray | int | float | complex | bool, /, *, side: Literal["left", "right"] = "left", - sorter: Union[usm_ndarray, None] = None, + sorter: usm_ndarray | None = None, ) -> usm_ndarray: """searchsorted(x1, x2, side='left', sorter=None) @@ -68,8 +73,8 @@ def searchsorted( input array. Must be a one-dimensional array. If `sorter` is `None`, must be sorted in ascending order; otherwise, `sorter` must be an array of indices that sort `x1` in ascending order. - x2 (usm_ndarray): - array containing search values. + x2 (usm_ndarray | int | float | complex | bool): + search value or values. side (Literal["left", "right]): argument controlling which index is returned if a value lands exactly on an edge. If `x2` is an array of rank `N` where @@ -85,13 +90,11 @@ def searchsorted( array of indices that sort `x1` in ascending order. The array must have the same shape as `x1` and have an integral data type. Out of bound index values of `sorter` array are treated using - `"wrap"` mode documented in :py:func:`dpctl.tensor.take`. + `"wrap"` mode documented in :py:func:`dpnp.tensor.take`. Default: `None`. """ if not isinstance(x1, usm_ndarray): raise TypeError(f"Expected dpnp.tensor.usm_ndarray, got {type(x1)}") - if not isinstance(x2, usm_ndarray): - raise TypeError(f"Expected dpnp.tensor.usm_ndarray, got {type(x2)}") if sorter is not None and not isinstance(sorter, usm_ndarray): raise TypeError(f"Expected dpnp.tensor.usm_ndarray, got {type(sorter)}") @@ -101,27 +104,43 @@ def searchsorted( "Expected either 'left' or 'right'" ) - if sorter is None: - q = get_execution_queue([x1.sycl_queue, x2.sycl_queue]) - else: - q = get_execution_queue( - [x1.sycl_queue, x2.sycl_queue, sorter.sycl_queue] - ) + q1, x1_usm_type = x1.sycl_queue, x1.usm_type + q2, x2_usm_type = _get_queue_usm_type(x2) + q3 = sorter.sycl_queue if sorter is not None else None + q = get_execution_queue(tuple(q for q in (q1, q2, q3) if q is not None)) if q is None: raise ExecutionPlacementError( "Execution placement can not be unambiguously " "inferred from input arguments." ) + res_usm_type = get_coerced_usm_type( + tuple( + ut + for ut in ( + x1_usm_type, + x2_usm_type, + ) + if ut is not None + ) + ) + dpt.validate_usm_type(res_usm_type, allow_none=False) + sycl_dev = q.sycl_device + if x1.ndim != 1: raise ValueError("First argument array must be one-dimensional") x1_dt = x1.dtype - x2_dt = x2.dtype + x2_dt = _get_dtype(x2, sycl_dev) + if not _validate_dtype(x2_dt): + raise ValueError( + "dpt.searchsorted search value argument has " + f"unsupported data type {x2_dt}" + ) _manager = du.SequentialOrderManager[q] dep_evs = _manager.submitted_events - ev = dpctl.SyclEvent() + x1_deps = dep_evs if sorter is not None: if not isdtype(sorter.dtype, "integral"): raise ValueError( @@ -132,7 +151,7 @@ def searchsorted( "Sorter array must be one-dimension with the same " "shape as the first argument array" ) - res = empty(x1.shape, dtype=x1_dt, usm_type=x1.usm_type, sycl_queue=q) + res = empty_like(x1) ind = (sorter,) axis = 0 wrap_out_of_bound_indices_mode = 0 @@ -146,31 +165,30 @@ def searchsorted( depends=dep_evs, ) x1 = res + x1_deps = [ev] _manager.add_event_pair(ht_ev, ev) - if x1_dt != x2_dt: - dt = result_type(x1, x2) - if x1_dt != dt: - x1_buf = _empty_like_orderK(x1, dt) - dep_evs = _manager.submitted_events - ht_ev, ev = ti_copy( - src=x1, dst=x1_buf, sycl_queue=q, depends=dep_evs - ) - _manager.add_event_pair(ht_ev, ev) - x1 = x1_buf - if x2_dt != dt: - x2_buf = _empty_like_orderK(x2, dt) - dep_evs = _manager.submitted_events - ht_ev, ev = ti_copy( - src=x2, dst=x2_buf, sycl_queue=q, depends=dep_evs - ) - _manager.add_event_pair(ht_ev, ev) - x2 = x2_buf + dt1, dt2 = _resolve_weak_types_all_py_ints(x1_dt, x2_dt, sycl_dev) + dt = _to_device_supported_dtype(dpt.result_type(dt1, dt2), sycl_dev) + + if x1_dt != dt: + x1_buf = _empty_like_orderK(x1, dt) + # get the submitted events again to ensure the copy waits take call + ht_ev, ev = ti_copy(src=x1, dst=x1_buf, sycl_queue=q, depends=x1_deps) + _manager.add_event_pair(ht_ev, ev) + x1 = x1_buf + + if not isinstance(x2, usm_ndarray): + x2 = dpt.asarray(x2, dtype=dt, usm_type=res_usm_type, sycl_queue=q) + elif x2_dt != dt: + x2_buf = _empty_like_orderK(x2, dt) + ht_ev, ev = ti_copy(src=x2, dst=x2_buf, sycl_queue=q, depends=dep_evs) + _manager.add_event_pair(ht_ev, ev) + x2 = x2_buf - dst_usm_type = get_coerced_usm_type([x1.usm_type, x2.usm_type]) index_dt = ti_default_device_index_type(q) - dst = _empty_like_orderK(x2, index_dt, usm_type=dst_usm_type) + dst = _empty_like_orderK(x2, index_dt, usm_type=res_usm_type) dep_evs = _manager.submitted_events if side == "left": diff --git a/dpnp/tests/tensor/test_usm_ndarray_searchsorted.py b/dpnp/tests/tensor/test_usm_ndarray_searchsorted.py index aef782f06f0..632a66146ad 100644 --- a/dpnp/tests/tensor/test_usm_ndarray_searchsorted.py +++ b/dpnp/tests/tensor/test_usm_ndarray_searchsorted.py @@ -26,6 +26,8 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** +import ctypes + import dpctl import numpy as np import pytest @@ -37,6 +39,30 @@ skip_if_dtype_not_supported, ) +_integer_dtypes = [ + "i1", + "u1", + "i2", + "u2", + "i4", + "u4", + "i8", + "u8", +] + +_floating_dtypes = [ + "f2", + "f4", + "f8", +] + +_complex_dtypes = [ + "c8", + "c16", +] + +_all_dtypes = ["?"] + _integer_dtypes + _floating_dtypes + _complex_dtypes + def _check(hay_stack, needles, needles_np): assert hay_stack.dtype == needles.dtype @@ -103,19 +129,7 @@ def test_searchsorted_strided_bool(): ) -@pytest.mark.parametrize( - "idt", - [ - dpt.int8, - dpt.uint8, - dpt.int16, - dpt.uint16, - dpt.int32, - dpt.uint32, - dpt.int64, - dpt.uint64, - ], -) +@pytest.mark.parametrize("idt", _integer_dtypes) def test_searchsorted_contig_int(idt): q = get_queue_or_skip() skip_if_dtype_not_supported(idt, q) @@ -135,19 +149,7 @@ def test_searchsorted_contig_int(idt): ) -@pytest.mark.parametrize( - "idt", - [ - dpt.int8, - dpt.uint8, - dpt.int16, - dpt.uint16, - dpt.int32, - dpt.uint32, - dpt.int64, - dpt.uint64, - ], -) +@pytest.mark.parametrize("idt", _integer_dtypes) def test_searchsorted_strided_int(idt): q = get_queue_or_skip() skip_if_dtype_not_supported(idt, q) @@ -174,12 +176,12 @@ def _add_extended_fp(array): array[-1] = dpt.nan -@pytest.mark.parametrize("idt", [dpt.float16, dpt.float32, dpt.float64]) -def test_searchsorted_contig_fp(idt): +@pytest.mark.parametrize("fdt", _floating_dtypes) +def test_searchsorted_contig_fp(fdt): q = get_queue_or_skip() - skip_if_dtype_not_supported(idt, q) + skip_if_dtype_not_supported(fdt, q) - dt = dpt.dtype(idt) + dt = dpt.dtype(fdt) hay_stack = dpt.linspace(0, 1, num=255, dtype=dt, endpoint=True) _add_extended_fp(hay_stack) @@ -195,12 +197,12 @@ def test_searchsorted_contig_fp(idt): ) -@pytest.mark.parametrize("idt", [dpt.float16, dpt.float32, dpt.float64]) -def test_searchsorted_strided_fp(idt): +@pytest.mark.parametrize("fdt", _floating_dtypes) +def test_searchsorted_strided_fp(fdt): q = get_queue_or_skip() - skip_if_dtype_not_supported(idt, q) + skip_if_dtype_not_supported(fdt, q) - dt = dpt.dtype(idt) + dt = dpt.dtype(fdt) hay_stack = dpt.repeat( dpt.linspace(0, 1, num=255, dtype=dt, endpoint=True), 4 @@ -243,12 +245,12 @@ def _add_extended_cfp(array): return dpt.sort(dpt.concat((ev, array))) -@pytest.mark.parametrize("idt", [dpt.complex64, dpt.complex128]) -def test_searchsorted_contig_cfp(idt): +@pytest.mark.parametrize("cdt", _complex_dtypes) +def test_searchsorted_contig_cfp(cdt): q = get_queue_or_skip() - skip_if_dtype_not_supported(idt, q) + skip_if_dtype_not_supported(cdt, q) - dt = dpt.dtype(idt) + dt = dpt.dtype(cdt) hay_stack = dpt.linspace(0, 1, num=255, dtype=dt, endpoint=True) hay_stack = _add_extended_cfp(hay_stack) @@ -263,12 +265,12 @@ def test_searchsorted_contig_cfp(idt): ) -@pytest.mark.parametrize("idt", [dpt.complex64, dpt.complex128]) -def test_searchsorted_strided_cfp(idt): +@pytest.mark.parametrize("cdt", _complex_dtypes) +def test_searchsorted_strided_cfp(cdt): q = get_queue_or_skip() - skip_if_dtype_not_supported(idt, q) + skip_if_dtype_not_supported(cdt, q) - dt = dpt.dtype(idt) + dt = dpt.dtype(cdt) hay_stack = dpt.repeat( dpt.linspace(0, 1, num=255, dtype=dt, endpoint=True), 4 @@ -315,7 +317,7 @@ def test_searchsorted_validation(): x1 = dpt.arange(10, dtype="i4") except dpctl.SyclDeviceCreationError: pytest.skip("Default device could not be created") - with pytest.raises(TypeError): + with pytest.raises(ValueError): dpt.searchsorted(x1, None) with pytest.raises(TypeError): dpt.searchsorted(x1, x1, sorter=dict()) @@ -405,3 +407,19 @@ def test_searchsorted_strided_scalar_needle(): needles = dpt.asarray(needles_np) _check(hay_stack, needles, needles_np) + + +@pytest.mark.parametrize( + "py_zero", + [bool(0), int(0), float(0), complex(0), np.float32(0), ctypes.c_int(0)], +) +@pytest.mark.parametrize("dt", _all_dtypes) +def test_searchsorted_py_scalars(py_zero, dt): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dt, q) + + x = dpt.zeros(10, dtype=dt, sycl_queue=q) + + r1 = dpt.searchsorted(x, py_zero) + assert isinstance(r1, dpt.usm_ndarray) + assert r1.shape == () From 5865e8204fba00bd86c8369134fb40aa279c2484 Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Mon, 18 May 2026 11:44:05 +0200 Subject: [PATCH 25/41] Fix `dpnp.tensor.acosh` handling of complex(+-0, NaN) (#2914) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR proposes to fix issue #2880 where `dpnp.tensor.acosh(complex(0, NaN))` returned `NaN + NaN j` instead of `NaN ± π/2 j` as required by the Array API specification The issue was caused by an incorrect early-return branch in acosh.hpp: ``` if (std::isnan(ry)) { return resT{ry, ry}; } ``` This special case bypassed the general logic which already produces the correct result. The fix removes this unnecessary branch making the behavior consistent with both the Array API specification and NumPy ``` In [1]: import dpnp.tensor as dpt In [2]: import numpy as np In [3]: a = dpt.asarray([complex(+0, np.nan)]) In [4]: dpt.acosh(a) Out[4]: usm_ndarray([nan+1.57079633j]) In [5]: a = np.asarray([complex(+0, np.nan)]) In [6]: np.acosh(a) Out[6]: array([nan+1.57079633j]) ``` Also adds a new `test_acosh_zero_nan` test to cover this case --- CHANGELOG.md | 1 + .../kernels/elementwise_functions/acosh.hpp | 4 --- .../tensor/elementwise/test_hyperbolic.py | 29 +++++++++++++++---- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab377b2b69c..8c58796ea57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Fixed incorrect in-place advanced indexing for 4D arrays when using `range` or `list` as index keys [#2872](https://github.com/IntelPython/dpnp/pull/2872) * Fixed `conda build` command syntax in GitHub workflows and documentation to use `conda-build` [#2888](https://github.com/IntelPython/dpnp/pull/2888) +* Fixed incorrect `dpnp.tensor.acosh` result for `complex(±0, NaN)` special case to match the Python Array API specification [#2914](https://github.com/IntelPython/dpnp/pull/2914) ### Security diff --git a/dpnp/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp b/dpnp/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp index 44e33ce1411..559796984b9 100644 --- a/dpnp/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp +++ b/dpnp/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp @@ -150,10 +150,6 @@ struct AcoshFunctor if (std::isnan(rx)) { return resT{sycl::fabs(ry), rx}; } - /* acosh(0 + I*NaN) = NaN + I*NaN */ - if (std::isnan(ry)) { - return resT{ry, ry}; - } /* ordinary cases */ const realT res_im = sycl::copysign(rx, std::imag(in)); return resT{sycl::fabs(ry), res_im}; diff --git a/dpnp/tests/tensor/elementwise/test_hyperbolic.py b/dpnp/tests/tensor/elementwise/test_hyperbolic.py index b94c5ede3f2..d25a9845358 100644 --- a/dpnp/tests/tensor/elementwise/test_hyperbolic.py +++ b/dpnp/tests/tensor/elementwise/test_hyperbolic.py @@ -38,7 +38,9 @@ ) from .utils import ( _all_dtypes, + _complex_fp_dtypes, _map_to_device_dtype, + _real_fp_dtypes, ) _hyper_funcs = [(np.sinh, dpt.sinh), (np.cosh, dpt.cosh), (np.tanh, dpt.tanh)] @@ -65,7 +67,7 @@ def test_hyper_out_type(np_call, dpt_call, dtype): @pytest.mark.parametrize("np_call, dpt_call", _all_funcs) -@pytest.mark.parametrize("dtype", ["f2", "f4", "f8"]) +@pytest.mark.parametrize("dtype", _real_fp_dtypes) def test_hyper_real_contig(np_call, dpt_call, dtype): q = get_queue_or_skip() skip_if_dtype_not_supported(dtype, q) @@ -96,7 +98,7 @@ def test_hyper_real_contig(np_call, dpt_call, dtype): @pytest.mark.parametrize("np_call, dpt_call", _all_funcs) -@pytest.mark.parametrize("dtype", ["c8", "c16"]) +@pytest.mark.parametrize("dtype", _complex_fp_dtypes) def test_hyper_complex_contig(np_call, dpt_call, dtype): q = get_queue_or_skip() skip_if_dtype_not_supported(dtype, q) @@ -123,7 +125,7 @@ def test_hyper_complex_contig(np_call, dpt_call, dtype): @pytest.mark.parametrize("np_call, dpt_call", _all_funcs) -@pytest.mark.parametrize("dtype", ["f2", "f4", "f8"]) +@pytest.mark.parametrize("dtype", _real_fp_dtypes) def test_hyper_real_strided(np_call, dpt_call, dtype): q = get_queue_or_skip() skip_if_dtype_not_supported(dtype, q) @@ -157,7 +159,7 @@ def test_hyper_real_strided(np_call, dpt_call, dtype): @pytest.mark.parametrize("np_call, dpt_call", _all_funcs) -@pytest.mark.parametrize("dtype", ["c8", "c16"]) +@pytest.mark.parametrize("dtype", _complex_fp_dtypes) def test_hyper_complex_strided(np_call, dpt_call, dtype): q = get_queue_or_skip() skip_if_dtype_not_supported(dtype, q) @@ -185,7 +187,7 @@ def test_hyper_complex_strided(np_call, dpt_call, dtype): @pytest.mark.parametrize("np_call, dpt_call", _all_funcs) -@pytest.mark.parametrize("dtype", ["f2", "f4", "f8"]) +@pytest.mark.parametrize("dtype", _real_fp_dtypes) def test_hyper_real_special_cases(np_call, dpt_call, dtype): q = get_queue_or_skip() skip_if_dtype_not_supported(dtype, q) @@ -200,3 +202,20 @@ def test_hyper_real_special_cases(np_call, dpt_call, dtype): tol = 8 * dpt.finfo(dtype).resolution assert_allclose(dpt.asnumpy(dpt_call(yf)), Y_np, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _complex_fp_dtypes) +def test_acosh_zero_nan(dtype): + # check acosh(±0 + NaN j) = NaN ± π/2 j (Array API spec) + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = [complex(+0.0, np.nan), complex(-0.0, np.nan)] + + xf = np.array(x, dtype=dtype) + yf = dpt.asarray(xf, dtype=dtype, sycl_queue=q) + + Y_dpt = dpt.asnumpy(dpt.acosh(yf)) + + assert np.isnan(Y_dpt.real).all() + assert_allclose(np.abs(Y_dpt.imag), np.pi / 2, atol=1e-6, strict=False) From 2c740b33d935685f38ded93484a59a1e037324c8 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Mon, 18 May 2026 13:55:39 +0200 Subject: [PATCH 26/41] Align tests with numpy 2.4.5 (#2920) This PR updates the tests to align with recent NumPy 2.4.5 release. Also it removes TODOs and workarounds for fixes now available in the latest NumPy. That PR also includes: - work around for NumPy 2.4.5 regression in `conj()` for boolean arrays - extending `gcd()` overflow testing with additional test cases - cleaning p unused `allow_fall_back_on_numpy` fixture in correlation tests The PR adds compatibility note to `CHANGELOG.md` indicating this release is going to be compatible with NumPy 2.4.5. --- CHANGELOG.md | 7 +++-- dpnp/tests/test_binary_ufuncs.py | 19 +++++++++----- dpnp/tests/test_indexing.py | 7 ++--- dpnp/tests/test_linalg.py | 18 +++---------- dpnp/tests/test_manipulation.py | 26 +++++++------------ dpnp/tests/test_mathematical.py | 7 +++-- .../core_tests/test_ndarray_complex_ops.py | 8 ++++++ .../cupy/core_tests/test_nep50_examples.py | 7 ----- .../cupy/indexing_tests/test_indexing.py | 4 --- .../cupy/statistics_tests/test_correlation.py | 3 ++- 10 files changed, 46 insertions(+), 60 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c58796ea57..f995d1c9a41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,16 +4,19 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.21.0] - MM/DD/2026 +## [0.21.0] - 2026-MM-DD + +This release is compatible with NumPy 2.4.5. ### Added * Added C API functions for `dpnp.tensor.usm_ndarray` setters and getters to avoid ABI breakage if `dpnp.tensor.usm_ndarray` is modified [gh-2866](https://github.com/IntelPython/dpnp/pull/2866) ### Changed -* Changed `dpnp.meshgrid` and `dpnp.tensor.meshgrid` to return a tuple instead of a list, aligning with NumPy 2.5+ behavior and 2025.12 version of the Python array API standard [#2854](https://github.com/IntelPython/dpnp/pull/2854) +* Changed `dpnp.meshgrid` and `dpnp.tensor.meshgrid` to return a tuple instead of a list, aligning with NumPy 2.5+ behavior and 2025.12 version of the Python array API standard [#2854](https://github.com/IntelPython/dpnp/pull/2854) * Updated `searchsorted` implementations to align with the 2025.12 array API spec [gh-2902](https://github.com/IntelPython/dpnp/pull/2902) +* Updated tests to align with NumPy 2.4.5 compatibility [gh-2920](https://github.com/IntelPython/dpnp/pull/2920) ### Deprecated diff --git a/dpnp/tests/test_binary_ufuncs.py b/dpnp/tests/test_binary_ufuncs.py index 0de83c5b99c..f638fbf2ac1 100644 --- a/dpnp/tests/test_binary_ufuncs.py +++ b/dpnp/tests/test_binary_ufuncs.py @@ -1014,16 +1014,21 @@ def test_broadcasting(self, func, dt): result = getattr(dpnp, func)(ia, b) assert_array_equal(result, expected) - @pytest.mark.parametrize("dt", [numpy.int32, numpy.int64]) - def test_gcd_overflow(self, dt): - a = dt(numpy.iinfo(dt).min) # negative power of two - ia = dpnp.array(a) - q = -(a // 4) + @pytest.mark.parametrize("sign", [1, -1]) + @pytest.mark.parametrize("dt", get_integer_dtypes(no_unsigned=True)) + def test_gcd_overflow(self, sign, dt): + a = dt(numpy.iinfo(dt).min) # INT_MIN + q = (a // 4) * sign + ia, iq = dpnp.array(a), dpnp.array(q) # verify that we don't overflow when taking abs(x) # not relevant for lcm, where the result is unrepresentable anyway - expected = numpy.gcd(a, q) - result = dpnp.gcd(ia, q) + expected = numpy.gcd(a, q * 3) + result = dpnp.gcd(ia, iq * 3) + assert_array_equal(result, expected) + + expected = numpy.gcd(q * 3, a) + result = dpnp.gcd(iq * 3, ia) assert_array_equal(result, expected) def test_lcm_overflow(self): diff --git a/dpnp/tests/test_indexing.py b/dpnp/tests/test_indexing.py index 2edc8214f3e..29939740a40 100644 --- a/dpnp/tests/test_indexing.py +++ b/dpnp/tests/test_indexing.py @@ -661,10 +661,11 @@ def test_empty(self, dtype, mode): dpnp.put(ia, [1, 2, 3], [], mode=mode) assert_array_equal(ia, a) - # TODO: enable test for numpy also since 2.0 + @testing.with_requires("numpy>=2.0") + @pytest.mark.parametrize("xp", [dpnp, numpy]) @pytest.mark.parametrize("mode", ["clip", "wrap"]) - def test_empty_input(self, mode): - empty = dpnp.asarray(list()) + def test_empty_input(self, xp, mode): + empty = xp.asarray(list()) with pytest.raises(IndexError): empty.put(1, 1, mode=mode) diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py index e0225ec5060..be8987ab066 100644 --- a/dpnp/tests/test_linalg.py +++ b/dpnp/tests/test_linalg.py @@ -290,8 +290,7 @@ def test_empty(self, shape, p): expected = numpy.linalg.cond(a, p=p) assert_dtype_allclose(result, expected) - # TODO: uncomment once numpy 2.3.3 release is published - # @testing.with_requires("numpy>=2.3.3") + @testing.with_requires("numpy>=2.3.3") @pytest.mark.parametrize( "dtype", get_all_dtypes(no_none=True, no_bool=True) ) @@ -305,9 +304,6 @@ def test_basic(self, dtype, shape, p): result = dpnp.linalg.cond(ia, p=p) expected = numpy.linalg.cond(a, p=p) - # TODO: remove when numpy#29333 is released - if numpy_version() < "2.3.3": - expected = expected.real assert_dtype_allclose(result, expected, factor=16) @pytest.mark.parametrize("p", _norms) @@ -3238,8 +3234,7 @@ def test_errors(self): ValueError, dpnp.linalg.matrix_rank, a_dp, tol=1e-06, rtol=1e-04 ) - # TODO: use below fixture when NumPy 2.5 is released - # @testing.with_requires("numpy>=2.5") + @testing.with_requires("numpy>=2.4.5") @pytest.mark.parametrize( "shape", [ @@ -3258,14 +3253,7 @@ def test_empty(self, shape): ia = dpnp.array(a) result = dpnp.linalg.matrix_rank(ia) - if numpy_version() < "2.5.0": # TODO: remove - # Expected behavior: rank of empty matrix is 0 - # For stacked matrices, return array of zeros - expected = numpy.zeros(shape[:-2], dtype=numpy.intp) - if expected.ndim == 0: - expected = numpy.array(0) - else: - expected = numpy.linalg.matrix_rank(a) + expected = numpy.linalg.matrix_rank(a) assert_array_equal(result, expected, strict=True) # Also test with hermitian=True diff --git a/dpnp/tests/test_manipulation.py b/dpnp/tests/test_manipulation.py index 4fc4b8cb161..d09f19f1d77 100644 --- a/dpnp/tests/test_manipulation.py +++ b/dpnp/tests/test_manipulation.py @@ -23,7 +23,6 @@ get_integer_float_dtypes, get_unsigned_dtypes, has_support_aspect64, - numpy_version, ) from .third_party.cupy import testing @@ -90,16 +89,15 @@ def test_size(self): assert dpnp.size(ia, 1) == numpy.size(a, 1) - # TODO: include commented code in the test when numpy-2.4 is released - # @testing.with_requires("numpy>=2.4") - def test_size_tuple(self): + @testing.with_requires("numpy>=2.4.0") + @pytest.mark.parametrize("axis", [(), (0,), (1,), (0, 1)]) + def test_size_tuple(self, axis): a = [[1, 2, 3], [4, 5, 6]] ia = dpnp.array(a) - assert dpnp.size(ia, ()) == 1 # numpy.size(a, ()) - assert dpnp.size(ia, (0,)) == 2 # numpy.size(a, (0,)) - assert dpnp.size(ia, (1,)) == 3 # numpy.size(a, (1,)) - assert dpnp.size(ia, (0, 1)) == 6 # numpy.size(a, (0, 1)) + result = dpnp.size(ia, axis=axis) + expected = numpy.size(a, axis=axis) + assert result == expected class TestAppend: @@ -1891,8 +1889,7 @@ def test_equal_nan(self, eq_nan_kwd): expected = numpy.unique(a, **eq_nan_kwd) assert_array_equal(result, expected) - # TODO: uncomment once numpy 2.4.0 release is published - # @testing.with_requires("numpy>=2.4.0") + @testing.with_requires("numpy>=2.4.0") @pytest.mark.parametrize("axis", [0, -1]) def test_1d_equal_nan_axis(self, axis): a = numpy.array([numpy.nan, 0, 0, numpy.nan]) @@ -1900,16 +1897,11 @@ def test_1d_equal_nan_axis(self, axis): result = dpnp.unique(ia, axis=axis, equal_nan=True) expected = numpy.unique(a, axis=axis, equal_nan=True) - # TODO: remove when numpy#29372 is released - if numpy_version() < "2.4.0": - expected = numpy.array([0.0, numpy.nan]) assert_array_equal(result, expected) - # TODO: uncomment once numpy 2.4.0 release is published - # @testing.with_requires("numpy>=2.4.0") + @testing.with_requires("numpy>=2.4.0") @pytest.mark.parametrize("equal_nan", [True, False]) - # @pytest.mark.parametrize("xp", [numpy, dpnp]) - @pytest.mark.parametrize("xp", [dpnp]) + @pytest.mark.parametrize("xp", [numpy, dpnp]) def test_1d_axis_float_raises_typeerror(self, xp, equal_nan): a = xp.array([xp.nan, 0, 0, xp.nan]) with pytest.raises(TypeError, match="integer argument expected"): diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index 8de7ec2ed80..3dcae2ac269 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -72,6 +72,7 @@ def test_angle_complex(self, dtype, deg): class TestConj: + @testing.with_requires("numpy!=2.4.5") @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) def test_conj(self, dtype): a = generate_random_numpy_array(20, dtype) @@ -1674,15 +1675,13 @@ def test_zero(self, dt): flag = dt in [numpy.int8, numpy.int16, numpy.uint8, numpy.uint16] assert_dtype_allclose(result, expected, check_only_type_kind=flag) - # TODO: add a proper NumPy version once resolved - @testing.with_requires("numpy>=2.0.0") + @testing.with_requires("numpy>=2.3.0") def test_zero_fp16(self): a = numpy.array([0.0], dtype=numpy.float16) ia = dpnp.array(a) result = dpnp.sinc(ia) - # expected = numpy.sinc(a) # numpy returns NaN, but expected 1.0 - expected = numpy.ones_like(a) + expected = numpy.sinc(a) assert_dtype_allclose(result, expected) @pytest.mark.usefixtures("suppress_invalid_numpy_warnings") diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py index a2fe0e2f256..b13e326c745 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py @@ -13,6 +13,8 @@ class TestConj: @testing.numpy_cupy_array_almost_equal() def test_conj(self, xp, dtype): x = testing.shaped_arange((2, 3), xp, dtype) + if xp is numpy and numpy.__version__ == "2.4.5" and x.dtype == bool: + return x # NumPy 2.4.5 had a regression for bool_arr.conj() return x.conj() @testing.for_all_dtypes(no_complex=True) @@ -20,6 +22,8 @@ def test_conj(self, xp, dtype): def test_conj_pass(self, xp, dtype): x = testing.shaped_arange((2, 3), xp, dtype) y = x.conj() + if xp is numpy and numpy.__version__ == "2.4.5": + return x # NumPy 2.4.5 had a regression for bool_arr.conj() assert x is y return y @@ -27,6 +31,8 @@ def test_conj_pass(self, xp, dtype): @testing.numpy_cupy_array_almost_equal() def test_conjugate(self, xp, dtype): x = testing.shaped_arange((2, 3), xp, dtype) + if xp is numpy and numpy.__version__ == "2.4.5" and x.dtype == bool: + return x # NumPy 2.4.5 had a regression for bool_arr.conj() return x.conjugate() @testing.for_all_dtypes(no_complex=True) @@ -34,6 +40,8 @@ def test_conjugate(self, xp, dtype): def test_conjugate_pass(self, xp, dtype): x = testing.shaped_arange((2, 3), xp, dtype) y = x.conjugate() + if xp is numpy and numpy.__version__ == "2.4.5": + return x # NumPy 2.4.5 had a regression for bool_arr.conj() assert x is y return y diff --git a/dpnp/tests/third_party/cupy/core_tests/test_nep50_examples.py b/dpnp/tests/third_party/cupy/core_tests/test_nep50_examples.py index 44f5433281e..4e7e7b99dc5 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_nep50_examples.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_nep50_examples.py @@ -1,15 +1,8 @@ -import numpy import pytest -import dpnp as cp from dpnp.tests.helper import has_support_aspect64 from dpnp.tests.third_party.cupy import testing -# TODO: remove once all dtype aliases added -cp.int8 = numpy.int8 -cp.uint8 = numpy.uint8 -cp.int16 = numpy.int16 - # "example string" or # ("example string", "xfail message") examples = [ diff --git a/dpnp/tests/third_party/cupy/indexing_tests/test_indexing.py b/dpnp/tests/third_party/cupy/indexing_tests/test_indexing.py index f28d7647f14..bec8f4204b3 100644 --- a/dpnp/tests/third_party/cupy/indexing_tests/test_indexing.py +++ b/dpnp/tests/third_party/cupy/indexing_tests/test_indexing.py @@ -204,10 +204,6 @@ class TestChoose(unittest.TestCase): @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() def test_choose(self, xp, dtype): - # TODO: include additional dtype when dpnp#2201 is merged - dtype_list = [xp.int8, xp.int16] - if dtype in dtype_list or xp.issubdtype(dtype, xp.unsignedinteger): - pytest.skip("dpnp.choose() does not support new integer dtypes.") a = xp.array([0, 2, 1, 2]) c = testing.shaped_arange((3, 4), xp, dtype) return a.choose(c) diff --git a/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py b/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py index 604e545e078..4eaaf0ddf03 100644 --- a/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py +++ b/dpnp/tests/third_party/cupy/statistics_tests/test_correlation.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import unittest import numpy @@ -226,7 +228,6 @@ def test_correlate_diff_types(self, xp, dtype1, dtype2, mode): @testing.parameterize(*testing.product({"mode": ["valid", "same", "full"]})) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestCorrelateInvalid(unittest.TestCase): @testing.with_requires("numpy>=1.18") From e9972698e07b9e368f4a400c07ca2c11e53e799a Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Mon, 18 May 2026 16:25:59 +0200 Subject: [PATCH 27/41] Fix fork PR docs workflow (#2910) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem Fork PRs to this repository fail the `Build Sphinx` workflow with a 403 permission error: ``` remote: Permission to IntelPython/dpnp.git denied to github-actions[bot]. fatal: unable to access 'https://github.com/IntelPython/dpnp.git/': The requested URL returned error: 403 Error: Action failed with "The process '/usr/bin/git' failed with exit code 128" ``` ## GitHub Security Context Fork PRs run with read-only `GITHUB_TOKEN` and cannot: - ❌ Push to branches (including `gh-pages`) - ❌ Post PR comments - ❌ Access repository secrets **Reference:** [GitHub Actions documentation on fork PR security](https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#workflows-in-forked-repositories) > "The `GITHUB_TOKEN` has read-only permissions in pull requests from forked repositories." This is a security feature that cannot be overridden by workflow permissions. ## Solution This PR implements a **conditional publishing strategy** that respects GitHub's fork PR security model: ### For Upstream PRs (same-repo branches): ✅ Builds documentation ✅ Publishes to GitHub Pages (`gh-pages` branch) ✅ Comments with direct URL: https://intelpython.github.io/dpnp/pull/{number}/index.html ✅ Workflow passes ### For Fork PRs: ✅ Builds documentation ✅ Uploads as workflow artifact (30-day retention) ✅ Workflow passes (no 403 error) ℹ️ No comment (cannot post due to read-only token) ℹ️ Artifact visible in workflow run's "Artifacts" section --- .github/workflows/build-sphinx.yml | 31 +++++++++++++++++++++++++----- CHANGELOG.md | 1 + 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 9d67c9c66c9..9048ce7af05 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -11,10 +11,9 @@ permissions: read-all env: GH_BOT_NAME: 'github-actions[bot]' GH_BOT_EMAIL: 'github-actions[bot]@users.noreply.github.com' - GH_EVENT_OPEN_PR_UPSTREAM: ${{ github.event_name == 'pull_request' && github.event.action != 'closed' && - github.event.pull_request && !github.event.pull_request.base.repo.fork }} GH_EVENT_PUSH_UPSTREAM: ${{ github.ref == 'refs/heads/master' && github.event_name == 'push' && github.event.ref == 'refs/heads/master' && github.event.repository && !github.event.repository.fork }} + GH_EVENT_PR_OPEN: ${{ github.event_name == 'pull_request' && github.event.action != 'closed' }} PUBLISH_DIR: doc/_build/html/ defaults: @@ -191,6 +190,26 @@ jobs: - name: Copy backend docs run: cp -r dpnp/backend/doc/html ${{ env.PUBLISH_DIR }}/backend_doc + # Detect if this is a fork PR + - name: Check if fork PR + id: check_fork + run: | + IS_FORK="false" + if [ "${{ github.event_name }}" == "pull_request" ] && [ "${{ github.event.pull_request.head.repo.fork }}" == "true" ]; then + IS_FORK="true" + fi + echo "is_fork=$IS_FORK" >> "$GITHUB_OUTPUT" + echo "Is fork PR: $IS_FORK" + + # Upload artifact for fork PRs + - name: Upload docs artifact (Fork PRs) + if: env.GH_EVENT_PR_OPEN == 'true' && steps.check_fork.outputs.is_fork == 'true' + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + with: + name: pr-${{ github.event.number }}-docs + path: ${{ env.PUBLISH_DIR }} + retention-days: 30 + # The step is only used to build docs while pushing a PR to "master" - name: Deploy docs if: env.GH_EVENT_PUSH_UPSTREAM == 'true' @@ -206,7 +225,7 @@ jobs: # The step is only used to build docs while pushing to PR branch - name: Publish pull-request docs - if: env.GH_EVENT_OPEN_PR_UPSTREAM == 'true' + if: env.GH_EVENT_PR_OPEN == 'true' && steps.check_fork.outputs.is_fork == 'false' uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} @@ -220,8 +239,10 @@ jobs: user_email: ${{ env.GH_BOT_EMAIL }} # The step is only used to build docs while pushing to PR branch + # Note: Fork PRs have read-only GITHUB_TOKEN and cannot post comments + # See: https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#workflows-in-forked-repositories - name: Comment with URL to published pull-request docs - if: env.GH_EVENT_OPEN_PR_UPSTREAM == 'true' + if: env.GH_EVENT_PR_OPEN == 'true' && steps.check_fork.outputs.is_fork == 'false' env: PR_NUM: ${{ github.event.number }} uses: mshick/add-pr-comment@8e4927817251f1ff60c001f04568532b38e0b4a0 # v3.11.0.8.3.11.0 @@ -235,7 +256,7 @@ jobs: clean: if: | github.event_name == 'pull_request' && github.event.action == 'closed' && - github.event.pull_request && !github.event.pull_request.base.repo.fork + github.event.pull_request.head.repo && !github.event.pull_request.head.repo.fork needs: build-and-deploy diff --git a/CHANGELOG.md b/CHANGELOG.md index f995d1c9a41..db0f8413c5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ This release is compatible with NumPy 2.4.5. * Fixed incorrect in-place advanced indexing for 4D arrays when using `range` or `list` as index keys [#2872](https://github.com/IntelPython/dpnp/pull/2872) * Fixed `conda build` command syntax in GitHub workflows and documentation to use `conda-build` [#2888](https://github.com/IntelPython/dpnp/pull/2888) * Fixed incorrect `dpnp.tensor.acosh` result for `complex(±0, NaN)` special case to match the Python Array API specification [#2914](https://github.com/IntelPython/dpnp/pull/2914) +* Fixed fork PR documentation workflow failures by implementing conditional publishing strategy: upstream PRs publish to GitHub Pages with comment, fork PRs upload artifacts [#2910](https://github.com/IntelPython/dpnp/pull/2910) ### Security From 467bab2f5e281f71b404ba5ede9daa6acdb4b101 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 18 May 2026 19:35:41 +0200 Subject: [PATCH 28/41] Weekly pre-commit autoupdate (#2917) This PR updates the `.pre-commit-config.yaml` using `pre-commit autoupdate`. --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a189961a59b..4ff8b1e6ef0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -128,7 +128,7 @@ repos: hooks: - id: actionlint - repo: https://github.com/BlankSpruce/gersemi-pre-commit - rev: 0.27.4 + rev: 0.27.5 hooks: - id: gersemi exclude: "dpnp/backend/cmake/Modules/" From 6b9774b489a640585418c2777bc42318f3525b38 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 May 2026 00:09:24 +0200 Subject: [PATCH 29/41] Bump github/codeql-action from 4.35.4 to 4.35.5 (#2919) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.35.4 to 4.35.5. --- .github/workflows/openssf-scorecard.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml index 1e12ad9a358..43d019a3063 100644 --- a/.github/workflows/openssf-scorecard.yml +++ b/.github/workflows/openssf-scorecard.yml @@ -72,6 +72,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4.35.4 + uses: github/codeql-action/upload-sarif@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5 with: sarif_file: results.sarif From 6ceefc5d674309fa2634f2a964faaaeb857b3af0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 May 2026 08:06:04 +0200 Subject: [PATCH 30/41] Bump peaceiris/actions-gh-pages from 4.0.0 to 4.1.0 (#2918) Bumps [peaceiris/actions-gh-pages](https://github.com/peaceiris/actions-gh-pages) from 4.0.0 to 4.1.0. --- .github/workflows/build-sphinx.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 9048ce7af05..ee97ab6c208 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -213,7 +213,7 @@ jobs: # The step is only used to build docs while pushing a PR to "master" - name: Deploy docs if: env.GH_EVENT_PUSH_UPSTREAM == 'true' - uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + uses: peaceiris/actions-gh-pages@84c30a85c19949d7eee79c4ff27748b70285e453 # v4.1.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ${{ env.PUBLISH_DIR }} @@ -226,7 +226,7 @@ jobs: # The step is only used to build docs while pushing to PR branch - name: Publish pull-request docs if: env.GH_EVENT_PR_OPEN == 'true' && steps.check_fork.outputs.is_fork == 'false' - uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 + uses: peaceiris/actions-gh-pages@84c30a85c19949d7eee79c4ff27748b70285e453 # v4.1.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ${{ env.PUBLISH_DIR }} From e569387c725783b2c6202b238922aa30ebe452bc Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Tue, 19 May 2026 11:27:24 +0200 Subject: [PATCH 31/41] Use `ubuntu-latest` runner where possible (#2921) This PR updates existing GitHub workflows to switch to use `ubuntu-latest` runner instead of `ubuntu-22.04` one where it's possible. Note, `cron-run-tests.yaml` still assumes to run tests on both `ubuntu-22.04` and `ubuntu-24.04` to validate both deployment paths. And `conda-package.yml` still builds on `ubuntu-22.04` to support running tests on any Ubuntu since 22.04. --- .github/workflows/build-sphinx.yml | 11 ++--------- .github/workflows/check-onemath.yaml | 4 ++-- .github/workflows/conda-package.yml | 10 +++------- .github/workflows/pre-commit.yml | 2 +- 4 files changed, 8 insertions(+), 19 deletions(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index ee97ab6c208..864f25b6708 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -24,7 +24,7 @@ jobs: build-and-deploy: name: Build and Deploy Docs - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest timeout-minutes: 90 permissions: @@ -70,13 +70,6 @@ jobs: echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list sudo apt update - - name: Update libstdc++-dev - run: | - sudo apt remove -y gcc-7 g++-7 gcc-8 g++-8 gcc-10 g++-10 - sudo apt remove -y libstdc++-10-dev - sudo apt autoremove - sudo apt install --reinstall -y gcc-9 g++-9 libstdc++-9-dev - - name: Install Intel OneAPI if: env.oneapi-pkgs-env == '' run: | @@ -97,7 +90,7 @@ jobs: run: | sudo add-apt-repository ppa:graphics-drivers/ppa sudo apt-get update - sudo apt-get install -y libnvidia-gl-450 + sudo apt-get install -y libnvidia-gl-550 sudo apt-get install -y nvidia-cuda-toolkit clinfo - name: Checkout repo diff --git a/.github/workflows/check-onemath.yaml b/.github/workflows/check-onemath.yaml index 5936834dca1..bc79745a4d0 100644 --- a/.github/workflows/check-onemath.yaml +++ b/.github/workflows/check-onemath.yaml @@ -71,7 +71,7 @@ jobs: fail-fast: false matrix: python: ['3.13'] # no dpctl package on PyPI with enabled python 3.14 support - os: [ubuntu-22.04] # windows-2022 - no DFT support for Windows in oneMKL + os: [ubuntu-latest] # windows-2022 - no DFT support for Windows in oneMKL runs-on: ${{ matrix.os }} timeout-minutes: 120 @@ -188,7 +188,7 @@ jobs: fail-fast: false matrix: python: ['3.13'] # no dpctl package on PyPI with enabled python 3.14 support - os: [ubuntu-22.04] # windows-2022 - no DFT support for Windows in oneMKL + os: [ubuntu-latest] # windows-2022 - no DFT support for Windows in oneMKL runs-on: ${{ matrix.os }} timeout-minutes: 60 diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 3d9a994ead4..5398458e67d 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -457,7 +457,7 @@ jobs: fail-fast: false matrix: python: ['3.10', '3.11', '3.12', '3.13', '3.14'] - os: [ubuntu-22.04, windows-2022] + os: [ubuntu-latest, windows-2022] runs-on: ${{ matrix.os }} timeout-minutes: 10 @@ -537,11 +537,7 @@ jobs: # Needed to add a comment to a pull request's issue pull-requests: write - strategy: - matrix: - os: [ubuntu-22.04] - - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest timeout-minutes: 15 defaults: @@ -707,7 +703,7 @@ jobs: needs: [upload] - runs-on: 'ubuntu-latest' + runs-on: ubuntu-latest timeout-minutes: 10 defaults: diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index d8f59405ce8..7f705810dac 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -11,7 +11,7 @@ jobs: pre-commit: name: Check - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest timeout-minutes: 10 steps: From db2ee8087584b3be1c953a1e192a5b722fa61dff Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Tue, 19 May 2026 13:23:50 +0200 Subject: [PATCH 32/41] Support buffer protocol objects as advanced index keys (#2889) This PR proposes by adding support for buffer protocol objects (`array.array`, `memoryview` etc.) as advanced index keys These changes were proposed in [#2872](https://github.com/IntelPython/dpnp/pull/2872#discussion_r3150370962) as an extension to the advanced indexing support. --- CHANGELOG.md | 1 + dpnp/dpnp_array.py | 38 +++++++++++++---------- dpnp/tests/test_indexing.py | 60 +++++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db0f8413c5c..dd3031c6547 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ This release is compatible with NumPy 2.4.5. ### Added * Added C API functions for `dpnp.tensor.usm_ndarray` setters and getters to avoid ABI breakage if `dpnp.tensor.usm_ndarray` is modified [gh-2866](https://github.com/IntelPython/dpnp/pull/2866) +* Added support for buffer protocol objects as advanced index keys in `dpnp.ndarray` [#2889](https://github.com/IntelPython/dpnp/pull/2889) ### Changed diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index 02cd655fcef..899379e837e 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -52,26 +52,31 @@ def _unwrap_index_element(x): """ Unwrap a single index element for the tensor indexing layer. - Converts dpnp arrays to usm_ndarray and array-like objects (range, list) - to numpy arrays with intp dtype for NumPy-compatible advanced indexing. + Converts dpnp arrays to usm_ndarray and array-like objects (range, list, + buffer protocol objects) to numpy arrays for NumPy-compatible advanced + indexing. Scalars and slices pass through to the tensor layer. """ - if isinstance(x, dpt.usm_ndarray): + if ( + x is None + or x is Ellipsis + or isinstance(x, (dpt.usm_ndarray, slice, numpy.ndarray)) + ): return x if isinstance(x, dpnp_array): return x.get_array() - if isinstance(x, range): - return numpy.asarray(x, dtype=numpy.intp) - if isinstance(x, list): - # keep boolean lists as boolean - arr = numpy.asarray(x) - # cast empty lists (float64 in NumPy) to intp - # for correct tensor indexing - if arr.size == 0: - arr = arr.astype(numpy.intp) - return arr - return x + # scalars (int, bool, numpy scalars) pass through to the tensor layer + if isinstance(x, (int, numpy.generic)): + return x + + # convert array-like objects (range, list, buffer protocol) to numpy + arr = numpy.asarray(x) + # cast empty arrays (float64 in NumPy) to intp + # for correct tensor indexing + if arr.size == 0: + arr = arr.astype(numpy.intp) + return arr def _get_unwrapped_index_key(key): @@ -79,8 +84,9 @@ def _get_unwrapped_index_key(key): Get an unwrapped index key. Return a key where each nested instance of DPNP array is unwrapped into - USM ndarray, and array-like objects (range, list) are converted to numpy - arrays for further processing in advanced indexing functions. + USM ndarray, and array-like objects (range, list, buffer protocol objects) + are converted to numpy arrays for further processing in advanced + indexing functions. """ diff --git a/dpnp/tests/test_indexing.py b/dpnp/tests/test_indexing.py index 29939740a40..0331e7151f0 100644 --- a/dpnp/tests/test_indexing.py +++ b/dpnp/tests/test_indexing.py @@ -1,3 +1,4 @@ +import array import functools import dpctl @@ -406,6 +407,65 @@ def test_array_like_single_index(self, idx): dp_a = dpnp.arange(24).reshape(2, 3, 4) assert_array_equal(dp_a[idx], np_a[idx]) + def test_buffer_protocol_getitem(self): + inds = array.array("l") + inds.frombytes(numpy.arange(3).tobytes()) + np_a = numpy.arange(12).reshape(3, 4) + dp_a = dpnp.arange(12).reshape(3, 4) + assert_array_equal(dp_a[inds], np_a[inds]) + + def test_buffer_protocol_paired_index(self): + inds = array.array("l") + inds.frombytes(numpy.arange(3).tobytes()) + np_a = numpy.arange(12).reshape(3, 4) + dp_a = dpnp.arange(12).reshape(3, 4) + assert_array_equal(dp_a[inds, inds], np_a[inds, inds]) + + def test_buffer_protocol_setitem(self): + inds = array.array("l") + inds.frombytes(numpy.arange(3).tobytes()) + np_a = numpy.arange(12).reshape(3, 4) + dp_a = dpnp.arange(12).reshape(3, 4) + np_a[inds, inds] = 0 + dp_a[inds, inds] = 0 + assert_array_equal(dp_a, np_a) + + def test_memoryview_getitem(self): + inds = memoryview(array.array("l", [0, 1, 2])) + np_a = numpy.arange(12).reshape(3, 4) + dp_a = dpnp.arange(12).reshape(3, 4) + assert_array_equal(dp_a[inds], np_a[inds]) + + def test_bytearray_getitem(self): + inds = bytearray(b"\x00\x01\x02") + np_a = numpy.arange(10) + dp_a = dpnp.arange(10) + assert_array_equal(dp_a[inds], np_a[inds]) + + @pytest.mark.parametrize( + "idx", + [ + 1.0, + 1 + 0j, + numpy.float64(1.0), + numpy.complex128(1.0), + "a", + [0.5, 1.5], + ], + ids=[ + "float", + "complex", + "np.float64", + "np.complex128", + "str", + "float_list", + ], + ) + def test_invalid_index(self, idx): + dp_a = dpnp.arange(12).reshape(3, 4) + with pytest.raises((IndexError, TypeError)): + dp_a[idx] + class TestIx: @pytest.mark.parametrize( From c1ce4916bbbe73ef0a8a7e8b2f3f738cfe97c055 Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Tue, 19 May 2026 18:00:28 +0200 Subject: [PATCH 33/41] Include libtensor headers in dpnp package distribution (#2915) This PR fixes missing libtensor headers in the installed dpnp package #2909 During the migration from `dpctl.tensor` to `dpnp.tensor` libtensor header files were not added to `package_data` so they were not included in the final package. This PR adds the missing libtensor include paths to `setup.py` --- CHANGELOG.md | 1 + dpnp/__main__.py | 27 +++++++++++++++++++++++++-- dpnp/tests/test_cli_options.py | 18 ++++++++++++++++++ setup.py | 3 +++ 4 files changed, 47 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd3031c6547..2a738267346 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ This release is compatible with NumPy 2.4.5. * Fixed `conda build` command syntax in GitHub workflows and documentation to use `conda-build` [#2888](https://github.com/IntelPython/dpnp/pull/2888) * Fixed incorrect `dpnp.tensor.acosh` result for `complex(±0, NaN)` special case to match the Python Array API specification [#2914](https://github.com/IntelPython/dpnp/pull/2914) * Fixed fork PR documentation workflow failures by implementing conditional publishing strategy: upstream PRs publish to GitHub Pages with comment, fork PRs upload artifacts [#2910](https://github.com/IntelPython/dpnp/pull/2910) +* Fixed missing `libtensor` headers in the installed `dpnp` package [#2915](https://github.com/IntelPython/dpnp/pull/2915) ### Security diff --git a/dpnp/__main__.py b/dpnp/__main__.py index 1c9c652109e..349ffcd472f 100644 --- a/dpnp/__main__.py +++ b/dpnp/__main__.py @@ -39,10 +39,19 @@ def _dpnp_dir() -> str: return abs_dpnp_dir +def get_include_dir() -> str: + """Returns path to dpnp include directory containing dpnp4pybind11.hpp""" + return os.path.join(_dpnp_dir(), "backend", "include") + + +def print_include_flags() -> None: + """Prints include flags for dpnp headers""" + print("-I " + get_include_dir()) + + def get_tensor_include_dir() -> str: """Prints path to dpnp libtensor include directory""" - dpnp_dir = _dpnp_dir() - libtensor_dir = os.path.join(dpnp_dir, "tensor", "libtensor", "include") + libtensor_dir = os.path.join(_dpnp_dir(), "tensor", "libtensor", "include") return libtensor_dir @@ -55,6 +64,16 @@ def print_tensor_include_flags() -> None: def main() -> None: """Main entry-point.""" parser = argparse.ArgumentParser() + parser.add_argument( + "--includes", + action="store_true", + help="Include flags for dpnp headers.", + ) + parser.add_argument( + "--include-dir", + action="store_true", + help="Path to dpnp include directory.", + ) parser.add_argument( "--tensor-includes", action="store_true", @@ -68,6 +87,10 @@ def main() -> None: args = parser.parse_args() if not sys.argv[1:]: parser.print_help() + if args.includes: + print_include_flags() + if args.include_dir: + print(get_include_dir()) if args.tensor_includes: print_tensor_include_flags() if args.tensor_include_dir: diff --git a/dpnp/tests/test_cli_options.py b/dpnp/tests/test_cli_options.py index 0caca95f397..1d353e9fddf 100644 --- a/dpnp/tests/test_cli_options.py +++ b/dpnp/tests/test_cli_options.py @@ -2,6 +2,24 @@ import sys +def test_includes(): + res = subprocess.run( + [sys.executable, "-m", "dpnp", "--includes"], + capture_output=True, + ) + assert res.returncode == 0 + assert res.stdout + flags = res.stdout.decode("utf-8") + res = subprocess.run( + [sys.executable, "-m", "dpnp", "--include-dir"], + capture_output=True, + ) + assert res.returncode == 0 + assert res.stdout + include_dir = res.stdout.decode("utf-8") + assert flags == "-I " + include_dir + + def test_tensor_includes(): res = subprocess.run( [sys.executable, "-m", "dpnp", "--tensor-includes"], diff --git a/setup.py b/setup.py index 3f544966350..1193b61ac2a 100644 --- a/setup.py +++ b/setup.py @@ -52,6 +52,9 @@ "libdpnp_backend_c.so", "dpnp_backend_c.lib", "dpnp_backend_c.dll", + "tensor/libtensor/include/kernels/*.h*", + "tensor/libtensor/include/kernels/*/*.h*", + "tensor/libtensor/include/utils/*.h*", "tests/*.*", "tests/tensor/*.py", "tests/tensor/*/*.py", From d8a042e695ca52ae30183e650d32dca2ebb6b17c Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Wed, 20 May 2026 17:42:13 +0200 Subject: [PATCH 34/41] Add `cython-lint` to pre-commit configuration (#2925) The PR extends pre-commit hooks with a `cython-lint` (used to lint Cython files) All detected issues were resolved --- .git-blame-ignore-revs | 3 + .pre-commit-config.yaml | 5 + dpnp/dpnp_algo/dpnp_algo.pxd | 12 +- dpnp/dpnp_algo/dpnp_algo.pyx | 15 +- dpnp/dpnp_algo/dpnp_algo_indexing.pxi | 6 +- dpnp/dpnp_algo/dpnp_algo_sorting.pxi | 109 +- dpnp/dpnp_utils/dpnp_algo_utils.pyx | 310 +++- dpnp/random/dpnp_algo_random.pyx | 2274 +++++++++++++++++-------- dpnp/tensor/_compute_follows_data.pyx | 4 +- pyproject.toml | 3 + 10 files changed, 1934 insertions(+), 807 deletions(-) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index e4fe0bcc4b2..9d5ff5a4ef0 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -20,3 +20,6 @@ e62718415aa3660da5f607e352c991a063a54219 # Bump clang-format from 12.0.1 to 22.1.0 version c2d65bd451a7d8e5b6319147da95e9dabf7a382b + +# cython-lint cleanup +6afda951b26d59aee7488f40c40968feb4725ad8 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4ff8b1e6ef0..c4a900b477b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -133,3 +133,8 @@ repos: - id: gersemi exclude: "dpnp/backend/cmake/Modules/" args: ["-i", "-l", "88", "--no-warn-about-unknown-commands"] +- repo: https://github.com/MarcoGorelli/cython-lint + rev: v0.19.0 + hooks: + - id: cython-lint + - id: double-quote-cython-strings diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 1e755770893..e83b6b58859 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -29,7 +29,8 @@ # ***************************************************************************** -cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this namespace for Enum import +# need this namespace for Enum import +cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": cdef enum DPNPFuncName "DPNPFuncName": DPNP_FN_PARTITION_EXT DPNP_FN_RNG_BETA_EXT @@ -70,7 +71,8 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_RNG_WEIBULL_EXT DPNP_FN_RNG_ZIPF_EXT -cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncType": # need this namespace for Enum import +# need this namespace for Enum import +cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncType": cdef enum DPNPFuncType "DPNPFuncType": DPNP_FT_NONE DPNP_FT_INT @@ -88,7 +90,11 @@ cdef extern from "dpnp_iface_fptr.hpp": DPNPFuncType return_type_no_fp64 void *ptr_no_fp64 - DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except + + DPNPFuncData get_dpnp_function_ptr( + DPNPFuncName name, + DPNPFuncType first_type, + DPNPFuncType second_type, + ) except + """ diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 53abcad1198..c848430cd7e 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -62,22 +62,22 @@ cdef DPNPFuncType dpnp_dtype_to_DPNPFuncType(dtype): kind = chr(kind) itemsize = dpnp.dtype(dtype).itemsize - if dt_c == 'd': + if dt_c == "d": return DPNP_FT_DOUBLE - elif dt_c == 'f': + elif dt_c == "f": return DPNP_FT_FLOAT - elif kind == 'i': + elif kind == "i": if itemsize == 8: return DPNP_FT_LONG elif itemsize == 4: return DPNP_FT_INT else: utils.checker_throw_type_error("dpnp_dtype_to_DPNPFuncType", dtype) - elif dt_c == 'F': + elif dt_c == "F": return DPNP_FT_CMPLX64 - elif dt_c == 'D': + elif dt_c == "D": return DPNP_FT_CMPLX128 - elif dt_c == '?': + elif dt_c == "?": return DPNP_FT_BOOL else: utils.checker_throw_type_error("dpnp_dtype_to_DPNPFuncType", dtype) @@ -85,7 +85,8 @@ cdef DPNPFuncType dpnp_dtype_to_DPNPFuncType(dtype): cdef dpnp_DPNPFuncType_to_dtype(size_t type): """ - Type 'size_t' used instead 'DPNPFuncType' because Cython has lack of Enum support (0.29) + Type 'size_t' used instead 'DPNPFuncType' because + Cython has lack of Enum support (0.29) TODO needs to use DPNPFuncType here """ if type == DPNP_FT_DOUBLE: diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi index 54ed3e99fba..b44dccb5ee5 100644 --- a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi +++ b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi @@ -42,7 +42,11 @@ __all__ += [ "dpnp_putmask", ] -cpdef dpnp_putmask(utils.dpnp_descriptor arr, utils.dpnp_descriptor mask, utils.dpnp_descriptor values): +cpdef dpnp_putmask( + utils.dpnp_descriptor arr, + utils.dpnp_descriptor mask, + utils.dpnp_descriptor values, +): cdef int values_size = values.size mask_flatiter = mask.get_pyobj().flat diff --git a/dpnp/dpnp_algo/dpnp_algo_sorting.pxi b/dpnp/dpnp_algo/dpnp_algo_sorting.pxi index c9463b71331..516c080a924 100644 --- a/dpnp/dpnp_algo/dpnp_algo_sorting.pxi +++ b/dpnp/dpnp_algo/dpnp_algo_sorting.pxi @@ -43,52 +43,85 @@ __all__ += [ ] -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_partition_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - void * , - void * , - const size_t, - const shape_elem_type * , - const size_t, - const c_dpctl.DPCTLEventVectorRef) - - -cpdef utils.dpnp_descriptor dpnp_partition(utils.dpnp_descriptor arr, int kth, axis=-1, kind='introselect', order=None): +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_partition_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + void *, + void *, + const size_t, + const shape_elem_type *, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) + + +cpdef utils.dpnp_descriptor dpnp_partition( + utils.dpnp_descriptor arr, int kth, + axis=-1, kind="introselect", order=None, +): cdef shape_type_c shape1 = arr.shape - cdef size_t kth_ = kth if kth >= 0 else (arr.ndim + kth) - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(arr.dtype) - - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PARTITION_EXT, param1_type, param1_type) - - cdef utils.dpnp_descriptor arr2 = dpnp.get_dpnp_descriptor(arr.get_pyobj().copy(), copy_when_nondefault_queue=False) + cdef size_t kth_ = ( + kth if kth >= 0 else (arr.ndim + kth) + ) + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(arr.dtype) + ) + + cdef DPNPFuncData kernel_data = ( + get_dpnp_function_ptr( + DPNP_FN_PARTITION_EXT, + param1_type, param1_type, + ) + ) + + cdef utils.dpnp_descriptor arr2 = ( + dpnp.get_dpnp_descriptor( + arr.get_pyobj().copy(), + copy_when_nondefault_queue=False, + ) + ) arr_obj = arr.get_array() - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(arr.shape, - kernel_data.return_type, - None, - device=arr_obj.sycl_device, - usm_type=arr_obj.usm_type, - sycl_queue=arr_obj.sycl_queue) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + arr.shape, + kernel_data.return_type, + None, + device=arr_obj.sycl_device, + usm_type=arr_obj.usm_type, + sycl_queue=arr_obj.sycl_queue, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_dpnp_partition_t func = kernel_data.ptr - - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - arr.get_data(), - arr2.get_data(), - result.get_data(), - kth_, - shape1.data(), - arr.ndim, - NULL) # dep_events_ref - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + cdef fptr_dpnp_partition_t func = ( + kernel_data.ptr + ) + + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, + arr.get_data(), + arr2.get_data(), + result.get_data(), + kth_, + shape1.data(), + arr.ndim, + NULL, + ) + + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx index 00f40a0358e..d037bba5179 100644 --- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx +++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx @@ -53,9 +53,7 @@ from dpnp.dpnp_algo.dpnp_algo cimport ( dpnp_DPNPFuncType_to_dtype, ) -""" -Python import functions -""" +# Python import functions __all__ = [ "call_origin", "checker_throw_type_error", @@ -75,9 +73,10 @@ cdef ERROR_PREFIX = "DPNP error:" def convert_item(item): if hasattr(item, "__sycl_usm_array_interface__"): item_converted = dpnp.asnumpy(item) - elif hasattr(item, "__array_interface__"): # detect if it is a container (TODO any better way?) - mod_name = getattr(item, "__module__", 'none') - if (mod_name != 'numpy'): + elif hasattr(item, "__array_interface__"): + # detect if it is a container (TODO any better way?) + mod_name = getattr(item, "__module__", "none") + if (mod_name != "numpy"): item_converted = dpnp.asnumpy(item) else: item_converted = item @@ -118,15 +117,28 @@ def call_origin(function, *args, **kwargs): allow_fallback = kwargs.pop("allow_fallback", False) - if not allow_fallback and config.__DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK__ == 1: - raise NotImplementedError(f"Requested function={function.__name__} with args={args} and kwargs={kwargs} " - "isn't currently supported and would fall back on NumPy implementation. " - "Define environment variable `DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK` to `0` " - "if the fall back is required to be supported without raising an exception.") + if ( + not allow_fallback + and config.__DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK__ == 1 + ): + raise NotImplementedError( + f"Requested function={function.__name__} " + f"with args={args} and kwargs={kwargs} " + "isn't currently supported and would fall " + "back on NumPy implementation. " + "Define environment variable " + "`DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK` " + "to `0` if the fall back is required to be " + "supported without raising an exception." + ) dpnp_inplace = kwargs.pop("dpnp_inplace", False) sycl_queue = kwargs.pop("sycl_queue", None) - # print(f"DPNP call_origin(): Fallback called. \n\t function={function}, \n\t args={args}, \n\t kwargs={kwargs}, \n\t dpnp_inplace={dpnp_inplace}") + # print(f"DPNP call_origin(): Fallback called. " + # f"\n\t function={function}, " + # f"\n\t args={args}, " + # f"\n\t kwargs={kwargs}, " + # f"\n\t dpnp_inplace={dpnp_inplace}") kwargs_out = kwargs.get("out", None) alloc_queues = [sycl_queue] if sycl_queue else [] @@ -155,11 +167,21 @@ def call_origin(function, *args, **kwargs): exec_q = get_execution_queue(alloc_queues) if exec_q is None: - exec_q = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue) - # print(f"DPNP call_origin(): backend called. \n\t function={function}, \n\t args_new={args_new}, \n\t kwargs_new={kwargs_new}, \n\t dpnp_inplace={dpnp_inplace}") + exec_q = dpnp.get_normalized_queue_device( + sycl_queue=sycl_queue + ) + # print(f"DPNP call_origin(): backend called. " + # f"\n\t function={function}, " + # f"\n\t args_new={args_new}, " + # f"\n\t kwargs_new={kwargs_new}, " + # f"\n\t dpnp_inplace={dpnp_inplace}") # TODO need to put array memory into NumPy call result_origin = function(*args_new, **kwargs_new) - # print(f"DPNP call_origin(): result from backend. \n\t result_origin={result_origin}, \n\t args_new={args_new}, \n\t kwargs_new={kwargs_new}, \n\t dpnp_inplace={dpnp_inplace}") + # print(f"DPNP call_origin(): result from backend. " + # f"\n\t result_origin={result_origin}, " + # f"\n\t args_new={args_new}, " + # f"\n\t kwargs_new={kwargs_new}, " + # f"\n\t dpnp_inplace={dpnp_inplace}") result = result_origin if dpnp_inplace: # enough to modify only first argument in place @@ -173,13 +195,22 @@ def call_origin(function, *args, **kwargs): elif isinstance(result, numpy.ndarray): if kwargs_out is None: - # use dtype from input arguments if present or from the result otherwise - result_dtype = kwargs.get("dtype", None) or result_origin.dtype + # use dtype from input arguments if present + # or from the result otherwise + result_dtype = ( + kwargs.get("dtype", None) or result_origin.dtype + ) if exec_q is not None: - result_dtype = map_dtype_to_device(result_origin.dtype, exec_q.sycl_device) - - result = dpnp_container.empty(result_origin.shape, dtype=result_dtype, sycl_queue=exec_q) + result_dtype = map_dtype_to_device( + result_origin.dtype, exec_q.sycl_device + ) + + result = dpnp_container.empty( + result_origin.shape, + dtype=result_dtype, + sycl_queue=exec_q, + ) else: result = kwargs_out @@ -192,10 +223,17 @@ def call_origin(function, *args, **kwargs): res = res_origin if isinstance(res_origin, numpy.ndarray): if exec_q is not None: - result_dtype = map_dtype_to_device(res_origin.dtype, exec_q.sycl_device) + result_dtype = map_dtype_to_device( + res_origin.dtype, + exec_q.sycl_device, + ) else: result_dtype = res_origin.d_type - res = dpnp_container.empty(res_origin.shape, dtype=result_dtype, sycl_queue=exec_q) + res = dpnp_container.empty( + res_origin.shape, + dtype=result_dtype, + sycl_queue=exec_q, + ) copy_from_origin(res, res_origin) result_list.append(res) @@ -215,7 +253,10 @@ def unwrap_array(x1): def _get_coerced_usm_type(objects): - types_in_use = [obj.usm_type for obj in objects if hasattr(obj, "usm_type")] + types_in_use = [ + obj.usm_type for obj in objects + if hasattr(obj, "usm_type") + ] if len(types_in_use) == 0: return None elif len(types_in_use) == 1: @@ -223,12 +264,17 @@ def _get_coerced_usm_type(objects): common_usm_type = get_coerced_usm_type(types_in_use) if common_usm_type is None: - raise ValueError("Input arrays must have coerced USM types") + raise ValueError( + "Input arrays must have coerced USM types" + ) return common_usm_type def _get_common_allocation_queue(objects): - queues_in_use = [obj.sycl_queue for obj in objects if hasattr(obj, "sycl_queue")] + queues_in_use = [ + obj.sycl_queue for obj in objects + if hasattr(obj, "sycl_queue") + ] if len(queues_in_use) == 0: return None elif len(queues_in_use) == 1: @@ -236,25 +282,38 @@ def _get_common_allocation_queue(objects): common_queue = get_execution_queue(queues_in_use) if common_queue is None: - raise ValueError("Input arrays must be allocated on the same SYCL queue") + raise ValueError( + "Input arrays must be allocated " + "on the same SYCL queue" + ) return common_queue def get_usm_allocations(objects): """ - Given a list of objects returns a tuple of USM type and SYCL queue - which can be used for a memory allocation and to follow compute follows data paradigm, - or returns `(None, None)` if the default USM type and SYCL queue can be used. - An exception will be raised, if the paradigm is broken for the given list of objects. + Given a list of objects returns a tuple of USM type + and SYCL queue which can be used for a memory + allocation and to follow compute follows data + paradigm, or returns `(None, None)` if the default + USM type and SYCL queue can be used. + An exception will be raised, if the paradigm is + broken for the given list of objects. """ if not isinstance(objects, (list, tuple)): - raise TypeError("Expected a list or a tuple, got {}".format(type(objects))) + raise TypeError( + "Expected a list or a tuple, got {}".format( + type(objects) + ) + ) if len(objects) == 0: return (None, None) - return (_get_coerced_usm_type(objects), _get_common_allocation_queue(objects)) + return ( + _get_coerced_usm_type(objects), + _get_common_allocation_queue(objects), + ) def map_dtype_to_device(dtype, device): @@ -263,10 +322,14 @@ def map_dtype_to_device(dtype, device): """ dtype = dpnp.dtype(dtype) - if not hasattr(dtype, 'char'): - raise TypeError(f"Invalid type of input dtype={dtype}") + if not hasattr(dtype, "char"): + raise TypeError( + f"Invalid type of input dtype={dtype}" + ) elif not isinstance(device, dpctl.SyclDevice): - raise TypeError(f"Invalid type of input device={device}") + raise TypeError( + f"Invalid type of input device={device}" + ) dtc = dtype.char if dtc == "?" or dpnp.issubdtype(dtype, dpnp.integer): @@ -298,44 +361,65 @@ def map_dtype_to_device(dtype, device): return dtype # complex64 is default complex type return dpnp.dtype("c8") - raise RuntimeError(f"Unrecognized type of input dtype={dtype}") + raise RuntimeError( + f"Unrecognized type of input dtype={dtype}" + ) cpdef checker_throw_type_error(function_name, given_type): - raise TypeError(f"{ERROR_PREFIX} in function {function_name}() type '{given_type}' is not supported") + raise TypeError( + f"{ERROR_PREFIX} in function {function_name}() " + f"type '{given_type}' is not supported" + ) -cpdef checker_throw_value_error(function_name, param_name, param, expected): +cpdef checker_throw_value_error( + function_name, param_name, param, expected +): # import sys # sys.tracebacklimit = 0 - err_msg = f"{ERROR_PREFIX} in function {function_name}() parameter '{param_name}'" + err_msg = ( + f"{ERROR_PREFIX} in function {function_name}() " + f"parameter '{param_name}'" + ) err_msg += f" expected `{expected}`, but '{param}' provided" raise ValueError(err_msg) -cdef dpnp_descriptor create_output_descriptor(shape_type_c output_shape, - DPNPFuncType c_type, - dpnp_descriptor requested_out, - device=None, - usm_type="device", - sycl_queue=None): +cdef dpnp_descriptor create_output_descriptor( + shape_type_c output_shape, + DPNPFuncType c_type, + dpnp_descriptor requested_out, + device=None, + usm_type="device", + sycl_queue=None, +): cdef dpnp_descriptor result_desc if requested_out is None: - result = None + _result = None if sycl_queue is not None: device = None - result_dtype = dpnp_DPNPFuncType_to_dtype(< size_t > c_type) - result_obj = dpnp_container.empty(output_shape, - dtype=result_dtype, - device=device, - usm_type=usm_type, - sycl_queue=sycl_queue) + result_dtype = dpnp_DPNPFuncType_to_dtype( + < size_t > c_type + ) + result_obj = dpnp_container.empty( + output_shape, + dtype=result_dtype, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) result_desc = dpnp_descriptor(result_obj) else: """ Based on 'out' parameter """ if (output_shape != requested_out.shape): - checker_throw_value_error("create_output_descriptor", "out.shape", requested_out.shape, output_shape) + checker_throw_value_error( + "create_output_descriptor", + "out.shape", + requested_out.shape, + output_shape, + ) if isinstance(requested_out, dpnp_descriptor): result_desc = requested_out @@ -354,8 +438,12 @@ cpdef inline tuple _object_to_tuple(object obj): if obj is None: return () - # dpnp.ndarray unconditionally succeeds in PySequence_Check as it implements __getitem__ - if cpython.PySequence_Check(obj) and not dpnp.is_supported_array_type(obj): + # dpnp.ndarray unconditionally succeeds in + # PySequence_Check as it implements __getitem__ + if ( + cpython.PySequence_Check(obj) + and not dpnp.is_supported_array_type(obj) + ): if isinstance(obj, numpy.ndarray): obj = numpy.atleast_1d(obj) @@ -364,7 +452,10 @@ cpdef inline tuple _object_to_tuple(object obj): for i in range(0, nd): if cpython.PyBool_Check(obj[i]): - raise TypeError("DPNP object_to_tuple(): no item in size can be bool") + raise TypeError( + "DPNP object_to_tuple(): " + "no item in size can be bool" + ) # Assumes each item is castable to Py_ssize_t, # otherwise TypeError will be raised @@ -373,20 +464,33 @@ cpdef inline tuple _object_to_tuple(object obj): if dpnp.isscalar(obj): if cpython.PyBool_Check(obj): - raise TypeError("DPNP object_to_tuple(): 'obj' can't be bool") + raise TypeError( + "DPNP object_to_tuple(): " + "'obj' can't be bool" + ) return (obj, ) - raise ValueError("DPNP object_to_tuple(): 'obj' should be 'None', collections.abc.Sequence, or 'int'") + raise ValueError( + "DPNP object_to_tuple(): 'obj' should be " + "'None', collections.abc.Sequence, or 'int'" + ) -cpdef cpp_bool use_origin_backend(input1=None, size_t compute_size=0): +cpdef cpp_bool use_origin_backend( + input1=None, size_t compute_size=0 +): """ - This function needs to redirect particular computation cases to original backend + This function needs to redirect particular + computation cases to original backend. + Parameters: - input1: One of the input parameter of the API function - compute_size: Some amount of total compute size of the task + input1: One of the input parameter of the API + function + compute_size: Some amount of total compute size + of the task Return: - True - computations are better to be executed on original backend + True - computations are better to be executed on + original backend False - it is better to use this SW to compute """ @@ -396,24 +500,47 @@ cpdef cpp_bool use_origin_backend(input1=None, size_t compute_size=0): return False -cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2): - """Get common USM allocation in the form of (sycl_device, usm_type, sycl_queue).""" +cdef tuple get_common_usm_allocation( + dpnp_descriptor x1, dpnp_descriptor x2 +): + """ + Get common USM allocation in the form of + (sycl_device, usm_type, sycl_queue). + """ array1_obj = x1.get_array() array2_obj = x2.get_array() - common_usm_type = get_coerced_usm_type((array1_obj.usm_type, array2_obj.usm_type)) + common_usm_type = get_coerced_usm_type( + (array1_obj.usm_type, array2_obj.usm_type) + ) if common_usm_type is None: raise ValueError( - "could not recognize common USM type for inputs of USM types {} and {}" - "".format(array1_obj.usm_type, array2_obj.usm_type)) - - common_sycl_queue = get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue)) + "could not recognize common USM type " + "for inputs of USM types {} and {}" + "".format( + array1_obj.usm_type, + array2_obj.usm_type, + ) + ) + + common_sycl_queue = get_execution_queue( + (array1_obj.sycl_queue, array2_obj.sycl_queue) + ) if common_sycl_queue is None: raise ValueError( - "could not recognize common SYCL queue for inputs in SYCL queues {} and {}" - "".format(array1_obj.sycl_queue, array2_obj.sycl_queue)) + "could not recognize common SYCL queue " + "for inputs in SYCL queues {} and {}" + "".format( + array1_obj.sycl_queue, + array2_obj.sycl_queue, + ) + ) - return (common_sycl_queue.sycl_device, common_usm_type, common_sycl_queue) + return ( + common_sycl_queue.sycl_device, + common_usm_type, + common_sycl_queue, + ) cdef class dpnp_descriptor: @@ -425,11 +552,15 @@ cdef class dpnp_descriptor: self.dpnp_descriptor_is_scalar = True """ Acquire DPCTL data container storage """ - self.descriptor = getattr(obj, "__sycl_usm_array_interface__", None) + self.descriptor = getattr( + obj, "__sycl_usm_array_interface__", None + ) if self.descriptor is None: """ Acquire main data storage """ - self.descriptor = getattr(obj, "__array_interface__", None) + self.descriptor = getattr( + obj, "__array_interface__", None + ) if self.descriptor is None: return @@ -442,12 +573,18 @@ cdef class dpnp_descriptor: cdef Py_ssize_t shape_it = 0 self.dpnp_descriptor_data_size = 1 for shape_it in self.shape: - # TODO need to use common procedure from utils to calculate array size by shape + # TODO need to use common procedure from + # utils to calculate array size by shape if shape_it < 0: - raise ValueError(f"{ERROR_PREFIX} dpnp_descriptor::__init__() invalid value {shape_it} in 'shape'") + raise ValueError( + f"{ERROR_PREFIX} " + "dpnp_descriptor::__init__() " + f"invalid value {shape_it} " + "in 'shape'" + ) self.dpnp_descriptor_data_size *= shape_it - """ set scalar property """ + # set scalar property self.dpnp_descriptor_is_scalar = False @property @@ -490,7 +627,7 @@ cdef class dpnp_descriptor: @property def offset(self): if self.is_valid: - return self.descriptor.get('offset', 0) + return self.descriptor.get("offset", 0) return 0 @property @@ -512,7 +649,10 @@ cdef class dpnp_descriptor: @property def __array_interface__(self): - # print(f"====dpnp_descriptor::__array_interface__====self.descriptor={ < size_t > self.descriptor}") + # print("====dpnp_descriptor::" + # "__array_interface__====" + # f"self.descriptor=" + # f"{self.descriptor}") if self.descriptor is None: return None @@ -532,14 +672,18 @@ cdef class dpnp_descriptor: return self.origin_pyobj def get_array(self): - if isinstance(self.origin_pyobj, dpnp.tensor.usm_ndarray): + if isinstance( + self.origin_pyobj, dpnp.tensor.usm_ndarray + ): return self.origin_pyobj if isinstance(self.origin_pyobj, dpnp_array): return self.origin_pyobj.get_array() raise TypeError( - "expected either dpnp.tensor.usm_ndarray or dpnp.dpnp_array.dpnp_array, got {}" - "".format(type(self.origin_pyobj))) + "expected either dpnp.tensor.usm_ndarray " + "or dpnp.dpnp_array.dpnp_array, got {}" + "".format(type(self.origin_pyobj)) + ) cdef void * get_data(self): cdef Py_ssize_t item_size = 0 diff --git a/dpnp/random/dpnp_algo_random.pyx b/dpnp/random/dpnp_algo_random.pyx index 018e9b72eea..70aa49ea63c 100644 --- a/dpnp/random/dpnp_algo_random.pyx +++ b/dpnp/random/dpnp_algo_random.pyx @@ -39,11 +39,9 @@ and the rest of the library import numbers -import dpctl import numpy import dpnp -import dpnp.config as config from dpnp.dpnp_array import dpnp_array cimport dpctl as c_dpctl @@ -91,187 +89,290 @@ __all__ = [ ] -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_beta_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_binomial_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const int, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_chisquare_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const int, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_exponential_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_f_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_gamma_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_geometric_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const float, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_gumbel_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_hypergeometric_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const int, - const int, - const int, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_laplace_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, void * , - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_logistic_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, void * , - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_lognormal_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, void * , - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_multinomial_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * result, - const int, - void * , - const size_t, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef void(*fptr_dpnp_rng_multivariate_normal_c_1out_t)(void * , - const int, - void * , - const size_t, - void * , - const size_t, - const size_t) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_negative_binomial_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_noncentral_chisquare_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_normal_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const double, - const int64_t, - void * , - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_pareto_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_poisson_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_power_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_rayleigh_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_shuffle_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, void * , - const size_t, - const size_t, - const size_t, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef void(*fptr_dpnp_rng_srand_c_1out_t)(const size_t) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_standard_cauchy_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_standard_exponential_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_standard_gamma_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_standard_t_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_triangular_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, void * , - const double, - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_uniform_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const double, - const int64_t, - void * , - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_vonmises_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_wald_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void *, - const double, - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_weibull_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_rng_zipf_c_1out_t)(c_dpctl.DPCTLSyclQueueRef, - void * , - const double, - const size_t, - const c_dpctl.DPCTLEventVectorRef) except + - +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_beta_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_binomial_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const int, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_chisquare_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const int, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_exponential_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_f_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_gamma_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_geometric_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const float, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_gumbel_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_hypergeometric_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const int, + const int, + const int, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_laplace_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_logistic_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_lognormal_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_multinomial_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void * result, + const int, + void *, + const size_t, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef void( + *fptr_dpnp_rng_multivariate_normal_c_1out_t)( + void *, + const int, + void *, + const size_t, + void *, + const size_t, + const size_t, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_negative_binomial_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_noncentral_chisquare_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_normal_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const int64_t, + void *, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_pareto_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_poisson_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_power_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_rayleigh_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_shuffle_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const size_t, + const size_t, + const size_t, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef void( + *fptr_dpnp_rng_srand_c_1out_t)( + const size_t, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_standard_cauchy_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_standard_exponential_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_standard_gamma_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_standard_t_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_triangular_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_uniform_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const int64_t, + void *, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_vonmises_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_wald_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_weibull_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + +ctypedef c_dpctl.DPCTLSyclEventRef( + *fptr_dpnp_rng_zipf_c_1out_t)( + c_dpctl.DPCTLSyclQueueRef, + void *, + const double, + const size_t, + const c_dpctl.DPCTLEventVectorRef, +) except + cdef extern from "dpnp_random_state.hpp": @@ -280,13 +381,19 @@ cdef extern from "dpnp_random_state.hpp": cdef struct mt19937_struct: pass - void MT19937_InitScalarSeed(mt19937_struct *, c_dpctl.DPCTLSyclQueueRef, uint32_t) - void MT19937_InitVectorSeed(mt19937_struct *, c_dpctl.DPCTLSyclQueueRef, uint32_t *, unsigned int) + void MT19937_InitScalarSeed( + mt19937_struct *, c_dpctl.DPCTLSyclQueueRef, + uint32_t) + void MT19937_InitVectorSeed( + mt19937_struct *, c_dpctl.DPCTLSyclQueueRef, + uint32_t *, unsigned int) void MT19937_Delete(mt19937_struct *) cdef struct mcg59_struct: pass - void MCG59_InitScalarSeed(mcg59_struct *, c_dpctl.DPCTLSyclQueueRef, uint64_t) + void MCG59_InitScalarSeed( + mcg59_struct *, c_dpctl.DPCTLSyclQueueRef, + uint64_t) void MCG59_Delete(mcg59_struct *) @@ -303,7 +410,9 @@ cdef class _Engine: # keep a reference on SYCL queue self.q = sycl_queue - self.q_ref = c_dpctl.DPCTLQueue_Copy((self.q).get_queue_ref()) + self.q_ref = c_dpctl.DPCTLQueue_Copy( + (self.q).get_queue_ref() + ) if self.q_ref is NULL: raise ValueError("SyclQueue copy failed") @@ -313,9 +422,16 @@ cdef class _Engine: cdef bint is_integer(self, value): if isinstance(value, numbers.Number): - return isinstance(value, int) or isinstance(value, dpnp.integer) + return ( + isinstance(value, int) + or isinstance(value, dpnp.integer) + ) # cover an element of dpnp array: - return numpy.ndim(value) == 0 and hasattr(value, "dtype") and dpnp.issubdtype(value, dpnp.integer) + return ( + numpy.ndim(value) == 0 + and hasattr(value, "dtype") + and dpnp.issubdtype(value, dpnp.integer) + ) cdef void set_engine(self, engine_struct* engine): self.engine_base = engine @@ -329,7 +445,9 @@ cdef class _Engine: cdef c_dpctl.DPCTLSyclQueueRef get_queue_ref(self): return self.q_ref - cpdef utils.dpnp_descriptor normal(self, loc, scale, size, dtype, usm_type): + cpdef utils.dpnp_descriptor normal( + self, loc, scale, size, dtype, usm_type + ): cdef shape_type_c result_shape cdef utils.dpnp_descriptor result cdef DPNPFuncType param1_type @@ -339,32 +457,50 @@ cdef class _Engine: result_shape = utils._object_to_tuple(size) if scale == 0.0: - return utils.dpnp_descriptor(dpnp.full(result_shape, loc, dtype=dtype)) + return utils.dpnp_descriptor( + dpnp.full(result_shape, loc, dtype=dtype) + ) - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_NORMAL_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_NORMAL_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, - None, - device=None, - usm_type=usm_type, - sycl_queue=self.get_queue()) - - func = kernel_data.ptr + result = utils.create_output_descriptor( + result_shape, + kernel_data.return_type, + None, + device=None, + usm_type=usm_type, + sycl_queue=self.get_queue(), + ) + + func = ( + + kernel_data.ptr + ) # call FPTR function - event_ref = func(self.get_queue_ref(), result.get_data(), loc, scale, result.size, self.get_engine(), NULL) + event_ref = func( + self.get_queue_ref(), result.get_data(), + loc, scale, result.size, + self.get_engine(), NULL, + ) if event_ref != NULL: - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result - cpdef utils.dpnp_descriptor uniform(self, low, high, size, dtype, usm_type): + cpdef utils.dpnp_descriptor uniform( + self, low, high, size, dtype, usm_type + ): cdef shape_type_c result_shape cdef utils.dpnp_descriptor result cdef DPNPFuncType param1_type @@ -374,35 +510,52 @@ cdef class _Engine: result_shape = utils._object_to_tuple(size) if low == high: - return utils.dpnp_descriptor(dpnp.full(result_shape, low, dtype=dtype)) + return utils.dpnp_descriptor( + dpnp.full(result_shape, low, dtype=dtype) + ) - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_UNIFORM_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_UNIFORM_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, - None, - device=None, - usm_type=usm_type, - sycl_queue=self.get_queue()) - - func = kernel_data.ptr + result = utils.create_output_descriptor( + result_shape, + kernel_data.return_type, + None, + device=None, + usm_type=usm_type, + sycl_queue=self.get_queue(), + ) + + func = ( + + kernel_data.ptr + ) # call FPTR function - event_ref = func(self.get_queue_ref(), result.get_data(), low, high, result.size, self.get_engine(), NULL) + event_ref = func( + self.get_queue_ref(), result.get_data(), + low, high, result.size, + self.get_engine(), NULL, + ) if event_ref != NULL: - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result cdef class MT19937(_Engine): """ - Class storing MKL engine for MT199374x32x10 (The Mersenne Twister pseudorandom number generator). + Class storing MKL engine for MT199374x32x10 + (The Mersenne Twister pseudorandom number generator). """ @@ -419,44 +572,82 @@ cdef class MT19937(_Engine): if self.is_uint_range(seed): scalar_seed = seed else: - raise ValueError("Seed must be between 0 and 2**32 - 1") - elif isinstance(seed, (list, tuple, range, numpy.ndarray, dpnp_array)): + raise ValueError( + "Seed must be between 0 and 2**32 - 1" + ) + elif isinstance( + seed, + (list, tuple, range, numpy.ndarray, dpnp_array), + ): if len(seed) == 0: raise ValueError("Seed must be non-empty") elif numpy.ndim(seed) > 1: - raise ValueError("Seed array must be 1-d") - elif not all([self.is_integer(item) for item in seed]): - raise TypeError("Seed must be a sequence of unsigned int elements") - elif not all([self.is_uint_range(item) for item in seed]): - raise ValueError("Seed must be between 0 and 2**32 - 1") + raise ValueError( + "Seed array must be 1-d" + ) + elif not all( + [self.is_integer(item) for item in seed] + ): + raise TypeError( + "Seed must be a sequence of " + "unsigned int elements" + ) + elif not all( + [self.is_uint_range(item) + for item in seed] + ): + raise ValueError( + "Seed must be between 0 and 2**32 - 1" + ) else: is_vector_seed = True vector_seed_len = len(seed) if vector_seed_len > 3: raise ValueError( - f"{vector_seed_len} length of seed vector isn't supported, " - "the length is limited by 3") - - vector_seed = malloc(vector_seed_len * sizeof(uint32_t)) + f"{vector_seed_len} length of " + "seed vector isn't supported, " + "the length is limited by 3" + ) + + vector_seed = malloc( + vector_seed_len * sizeof(uint32_t) + ) if (not vector_seed): - raise MemoryError(f"Could not allocate memory for seed vector of length {vector_seed_len}") - - # convert input seed's type to uint32_t one (expected in MKL function) + raise MemoryError( + "Could not allocate memory for " + "seed vector of length " + f"{vector_seed_len}" + ) + + # convert input seed's type to uint32_t + # one (expected in MKL function) try: for i in range(vector_seed_len): - vector_seed[i] = seed[i] + vector_seed[i] = ( + seed[i] + ) except Exception as e: free(vector_seed) raise e else: - raise TypeError("Seed must be an unsigned int, or a sequence of unsigned int elements") + raise TypeError( + "Seed must be an unsigned int, or a " + "sequence of unsigned int elements" + ) if is_vector_seed: - MT19937_InitVectorSeed(&self.mt19937, self.q_ref, vector_seed, vector_seed_len) + MT19937_InitVectorSeed( + &self.mt19937, self.q_ref, + vector_seed, vector_seed_len, + ) free(vector_seed) else: - MT19937_InitScalarSeed(&self.mt19937, self.q_ref, scalar_seed) - self.set_engine( &self.mt19937) + MT19937_InitScalarSeed( + &self.mt19937, self.q_ref, scalar_seed, + ) + self.set_engine( + &self.mt19937 + ) def __dealloc__(self): MT19937_Delete(&self.mt19937) @@ -467,14 +658,17 @@ cdef class MT19937(_Engine): max_val = dpnp.iinfo(numpy.uint32).max if isinstance(value, dpnp_array): - max_val = dpnp.array(max_val, dtype=numpy.uint32) + max_val = dpnp.array( + max_val, dtype=numpy.uint32 + ) return value <= max_val cdef class MCG59(_Engine): """ Class storing MKL engine for MCG59 - (the 59-bit multiplicative congruential pseudorandom number generator). + (the 59-bit multiplicative congruential + pseudorandom number generator). """ @@ -488,12 +682,18 @@ cdef class MCG59(_Engine): if self.is_uint64_range(seed): scalar_seed = seed else: - raise ValueError("Seed must be between 0 and 2**64 - 1") + raise ValueError( + "Seed must be between 0 and 2**64 - 1" + ) else: raise TypeError("Seed must be an integer") - MCG59_InitScalarSeed(&self.mcg59, self.q_ref, scalar_seed) - self.set_engine( &self.mcg59) + MCG59_InitScalarSeed( + &self.mcg59, self.q_ref, scalar_seed, + ) + self.set_engine( + &self.mcg59 + ) def __dealloc__(self): MCG59_Delete(&self.mcg59) @@ -504,49 +704,79 @@ cdef class MCG59(_Engine): max_val = dpnp.iinfo(numpy.uint64).max if isinstance(value, dpnp_array): - max_val = dpnp.array(max_val, dtype=numpy.uint64) + max_val = dpnp.array( + max_val, dtype=numpy.uint64 + ) return value <= max_val -cpdef utils.dpnp_descriptor dpnp_rng_beta(double a, double b, size): +cpdef utils.dpnp_descriptor dpnp_rng_beta( + double a, double b, size +): """ - Returns an array populated with samples from beta distribution. - `dpnp_rng_beta` generates a matrix filled with random floats sampled from a - univariate beta distribution. + Returns an array populated with samples from beta + distribution. + `dpnp_rng_beta` generates a matrix filled with random + floats sampled from a univariate beta distribution. """ - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dpnp.float64) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_BETA_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_BETA_EXT, param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) - cdef fptr_dpnp_rng_beta_c_1out_t func = kernel_data.ptr + cdef fptr_dpnp_rng_beta_c_1out_t func = ( + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), a, b, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), a, b, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_binomial(int ntrial, double p, size): +cpdef utils.dpnp_descriptor dpnp_rng_binomial( + int ntrial, double p, size +): """ - Returns an array populated with samples from binomial distribution. - `dpnp_rng_binomial` generates a matrix filled with random floats sampled from a - univariate binomial distribution for a given number of independent trials and - success probability p of a single trial. + Returns an array populated with samples from binomial + distribution. + `dpnp_rng_binomial` generates a matrix filled with + random floats sampled from a univariate binomial + distribution for a given number of independent trials + and success probability p of a single trial. """ @@ -555,137 +785,237 @@ cpdef utils.dpnp_descriptor dpnp_rng_binomial(int ntrial, double p, size): cdef DPNPFuncData kernel_data cdef fptr_dpnp_rng_binomial_c_1out_t func - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_BINOMIAL_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_BINOMIAL_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), ntrial, p, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), ntrial, p, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_chisquare(int df, size): +cpdef utils.dpnp_descriptor dpnp_rng_chisquare( + int df, size +): """ - Returns an array populated with samples from chi-square distribution. - `dpnp_rng_chisquare` generates a matrix filled with random floats sampled from a - univariate chi-square distribution for a given number of degrees of freedom. + Returns an array populated with samples from + chi-square distribution. + `dpnp_rng_chisquare` generates a matrix filled with + random floats sampled from a univariate chi-square + distribution for a given number of degrees of freedom. """ - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.default_float_type()) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dpnp.default_float_type()) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_CHISQUARE_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_CHISQUARE_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - - cdef fptr_dpnp_rng_chisquare_c_1out_t func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + cdef fptr_dpnp_rng_chisquare_c_1out_t func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), df, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), df, result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_exponential(double beta, size): +cpdef utils.dpnp_descriptor dpnp_rng_exponential( + double beta, size +): """ - Returns an array populated with samples from exponential distribution. - `dpnp_rng_exponential` generates a matrix filled with random floats sampled from a - univariate exponential distribution of `beta`. + Returns an array populated with samples from + exponential distribution. + `dpnp_rng_exponential` generates a matrix filled with + random floats sampled from a univariate exponential + distribution of `beta`. """ dtype = dpnp.float64 - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dtype) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_EXPONENTIAL_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_EXPONENTIAL_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_dpnp_rng_exponential_c_1out_t func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + cdef fptr_dpnp_rng_exponential_c_1out_t func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), beta, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), beta, result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_f(double df_num, double df_den, size): +cpdef utils.dpnp_descriptor dpnp_rng_f( + double df_num, double df_den, size +): """ - Returns an array populated with samples from F distribution. - `dpnp_rng_f` generates a matrix filled with random floats sampled from a - univariate F distribution. + Returns an array populated with samples from + F distribution. + `dpnp_rng_f` generates a matrix filled with random + floats sampled from a univariate F distribution. """ dtype = dpnp.float64 - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dtype) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_F_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_F_EXT, param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) - cdef fptr_dpnp_rng_f_c_1out_t func = kernel_data.ptr + cdef fptr_dpnp_rng_f_c_1out_t func = ( + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), df_num, df_den, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), df_num, df_den, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_gamma(double shape, double scale, size): +cpdef utils.dpnp_descriptor dpnp_rng_gamma( + double shape, double scale, size +): """ - Returns an array populated with samples from gamma distribution. - `dpnp_rng_gamma` generates a matrix filled with random floats sampled from a - univariate gamma distribution of `shape` and `scale`. + Returns an array populated with samples from gamma + distribution. + `dpnp_rng_gamma` generates a matrix filled with random + floats sampled from a univariate gamma distribution of + `shape` and `scale`. """ @@ -696,32 +1026,54 @@ cpdef utils.dpnp_descriptor dpnp_rng_gamma(double shape, double scale, size): param1_type = dpnp_dtype_to_DPNPFuncType(dtype) - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_GAMMA_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_GAMMA_EXT, param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) - func = kernel_data.ptr + func = ( + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), shape, scale, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), shape, scale, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_geometric(float p, size): +cpdef utils.dpnp_descriptor dpnp_rng_geometric( + float p, size +): """ - Returns an array populated with samples from geometric distribution. - `dpnp_rng_geometric` generates a matrix filled with random floats sampled from a - univariate geometric distribution for a success probability p of a single + Returns an array populated with samples from geometric + distribution. + `dpnp_rng_geometric` generates a matrix filled with + random floats sampled from a univariate geometric + distribution for a success probability p of a single trial. """ @@ -731,36 +1083,60 @@ cpdef utils.dpnp_descriptor dpnp_rng_geometric(float p, size): cdef DPNPFuncData kernel_data cdef fptr_dpnp_rng_geometric_c_1out_t func - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_GEOMETRIC_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_GEOMETRIC_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), p, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), p, result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_gumbel(double loc, double scale, size): +cpdef utils.dpnp_descriptor dpnp_rng_gumbel( + double loc, double scale, size +): """ - Returns an array populated with samples from gumbel distribution. - `dpnp_rng_gumbel` generates a matrix filled with random floats sampled from a - univariate Gumbel distribution. + Returns an array populated with samples from gumbel + distribution. + `dpnp_rng_gumbel` generates a matrix filled with + random floats sampled from a univariate Gumbel + distribution. """ @@ -770,32 +1146,54 @@ cpdef utils.dpnp_descriptor dpnp_rng_gumbel(double loc, double scale, size): cdef fptr_dpnp_rng_gumbel_c_1out_t func param1_type = dpnp_dtype_to_DPNPFuncType(dtype) - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_GUMBEL_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_GUMBEL_EXT, param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) - func = kernel_data.ptr + func = ( + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), loc, scale, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), loc, scale, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_hypergeometric(int l, int s, int m, size): +cpdef utils.dpnp_descriptor dpnp_rng_hypergeometric( + int l, int s, int m, size +): """ - Returns an array populated with samples from hypergeometric distribution. - `dpnp_rng_hypergeometric` generates a matrix filled with random floats sampled from a - univariate hypergeometric distribution. + Returns an array populated with samples from + hypergeometric distribution. + `dpnp_rng_hypergeometric` generates a matrix filled + with random floats sampled from a univariate + hypergeometric distribution. """ @@ -804,36 +1202,61 @@ cpdef utils.dpnp_descriptor dpnp_rng_hypergeometric(int l, int s, int m, size): cdef DPNPFuncData kernel_data cdef fptr_dpnp_rng_hypergeometric_c_1out_t func - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_HYPERGEOMETRIC_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_HYPERGEOMETRIC_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), l, s, m, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), l, s, m, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_laplace(double loc, double scale, size): +cpdef utils.dpnp_descriptor dpnp_rng_laplace( + double loc, double scale, size +): """ - Returns an array populated with samples from laplace distribution. - `dpnp_rng_laplace` generates a matrix filled with random floats sampled from a - univariate laplace distribution. + Returns an array populated with samples from laplace + distribution. + `dpnp_rng_laplace` generates a matrix filled with + random floats sampled from a univariate laplace + distribution. """ @@ -842,69 +1265,121 @@ cpdef utils.dpnp_descriptor dpnp_rng_laplace(double loc, double scale, size): cdef DPNPFuncData kernel_data cdef fptr_dpnp_rng_laplace_c_1out_t func - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_LAPLACE_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_LAPLACE_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), loc, scale, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), loc, scale, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_logistic(double loc, double scale, size): +cpdef utils.dpnp_descriptor dpnp_rng_logistic( + double loc, double scale, size +): """ - Returns an array populated with samples from logistic distribution. - `dpnp_rng_logistic` generates a matrix filled with random floats sampled from a - univariate logistic distribution. + Returns an array populated with samples from logistic + distribution. + `dpnp_rng_logistic` generates a matrix filled with + random floats sampled from a univariate logistic + distribution. """ - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dpnp.float64) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_LOGISTIC_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_LOGISTIC_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_dpnp_rng_logistic_c_1out_t func = < fptr_dpnp_rng_logistic_c_1out_t > kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + cdef fptr_dpnp_rng_logistic_c_1out_t func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), loc, scale, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), loc, scale, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_lognormal(double mean, double stddev, size): +cpdef utils.dpnp_descriptor dpnp_rng_lognormal( + double mean, double stddev, size +): """ - Returns an array populated with samples from lognormal distribution. - `dpnp_rng_lognormal` generates a matrix filled with random floats sampled from a - univariate lognormal distribution. + Returns an array populated with samples from lognormal + distribution. + `dpnp_rng_lognormal` generates a matrix filled with + random floats sampled from a univariate lognormal + distribution. """ @@ -913,38 +1388,64 @@ cpdef utils.dpnp_descriptor dpnp_rng_lognormal(double mean, double stddev, size) cdef DPNPFuncData kernel_data cdef fptr_dpnp_rng_lognormal_c_1out_t func - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_LOGNORMAL_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_LOGNORMAL_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), mean, stddev, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), mean, stddev, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_multinomial(int ntrial, utils.dpnp_descriptor p, size): +cpdef utils.dpnp_descriptor dpnp_rng_multinomial( + int ntrial, utils.dpnp_descriptor p, size +): """ - Returns an array populated with samples from multinomial distribution. + Returns an array populated with samples from + multinomial distribution. - `dpnp_rng_multinomial` generates a matrix filled with random floats sampled from a - univariate multinomial distribution for a given number of independent trials and - probabilities of each of the ``p`` different outcome. + `dpnp_rng_multinomial` generates a matrix filled with + random floats sampled from a univariate multinomial + distribution for a given number of independent trials + and probabilities of each of the ``p`` different + outcome. """ @@ -955,49 +1456,74 @@ cpdef utils.dpnp_descriptor dpnp_rng_multinomial(int ntrial, utils.dpnp_descript cdef size_t p_size = p.size - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_MULTINOMIAL_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_MULTINOMIAL_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) p_obj = p.get_array() - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, - None, - device=p_obj.device, - usm_type=p_obj.usm_type, - sycl_queue=p_obj.sycl_queue) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, + kernel_data.return_type, + None, + device=p_obj.device, + usm_type=p_obj.usm_type, + sycl_queue=p_obj.sycl_queue, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), ntrial, p.get_data(), p_size, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), ntrial, p.get_data(), + p_size, result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_multivariate_normal(utils.dpnp_descriptor mean, utils.dpnp_descriptor cov, size): +cpdef utils.dpnp_descriptor dpnp_rng_multivariate_normal( + utils.dpnp_descriptor mean, + utils.dpnp_descriptor cov, size +): """ - Returns an array populated with samples from multivariate normal distribution. - `dpnp_rng_multivariate_normal` generates a matrix filled with random floats sampled from a + Returns an array populated with samples from multivariate normal distribution. + `dpnp_rng_multivariate_normal` generates a matrix + filled with random floats sampled from a multivariate + normal distribution. """ dtype = dpnp.float64 - cdef int dimen cdef size_t mean_size cdef size_t cov_size @@ -1008,38 +1534,59 @@ cpdef utils.dpnp_descriptor dpnp_rng_multivariate_normal(utils.dpnp_descriptor m mean_size = mean.size cov_size = cov.size - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_MULTIVARIATE_NORMAL, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_MULTIVARIATE_NORMAL, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(mean, cov) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - func = kernel_data.ptr + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + result_sycl_device, result_usm_type, result_sycl_queue = ( + utils.get_common_usm_allocation(mean, cov) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, + kernel_data.return_type, + None, + device=result_sycl_device, + usm_type=result_usm_type, + sycl_queue=result_sycl_queue, + ) + ) + + func = ( + + kernel_data.ptr + ) # call FPTR function - func(result.get_data(), mean_size, mean.get_data(), mean_size, cov.get_data(), cov_size, result.size) + func( + result.get_data(), mean_size, mean.get_data(), + mean_size, cov.get_data(), cov_size, result.size, + ) return result -cpdef utils.dpnp_descriptor dpnp_rng_negative_binomial(double a, double p, size): + +cpdef utils.dpnp_descriptor dpnp_rng_negative_binomial( + double a, double p, size +): """ - Returns an array populated with samples from negative binomial distribution. + Returns an array populated with samples from negative + binomial distribution. - `negative_binomial` generates a matrix filled with random floats sampled from a - univariate negative binomial distribution for a given parameter of the distribution - `a` and success probability `p` of a single trial. + `negative_binomial` generates a matrix filled with + random floats sampled from a univariate negative + binomial distribution for a given parameter of the + distribution `a` and success probability `p` of a + single trial. """ @@ -1056,107 +1603,183 @@ cpdef utils.dpnp_descriptor dpnp_rng_negative_binomial(double a, double p, size) result_shape = utils._object_to_tuple(size) if p == 0.0: filled_val = dpnp.iinfo(dtype).min - return utils.dpnp_descriptor(dpnp.full(result_shape, filled_val, dtype=dtype)) + return utils.dpnp_descriptor( + dpnp.full(result_shape, filled_val, + dtype=dtype) + ) elif p == 1.0: - return utils.dpnp_descriptor(dpnp.full(result_shape, 0, dtype=dtype)) + return utils.dpnp_descriptor( + dpnp.full(result_shape, 0, dtype=dtype) + ) else: - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_NEGATIVE_BINOMIAL_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_NEGATIVE_BINOMIAL_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + result = utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) result_sycl_queue = result.get_array().sycl_queue q = result_sycl_queue q_ref = q.get_queue_ref() - func = kernel_data.ptr + func = ( + + kernel_data.ptr + ) # call FPTR function - event_ref = func(q_ref, result.get_data(), a, p, result.size, NULL) + event_ref = func( + q_ref, result.get_data(), a, p, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_noncentral_chisquare(double df, double nonc, size): +cpdef utils.dpnp_descriptor dpnp_rng_noncentral_chisquare( + double df, double nonc, size +): """ - Returns an array populated with samples from noncentral chisquare distribution. - `dpnp_rng_noncentral_chisquare` generates a matrix filled with random floats sampled from a - univariate noncentral chisquare distribution. + Returns an array populated with samples from + noncentral chisquare distribution. + `dpnp_rng_noncentral_chisquare` generates a matrix + filled with random floats sampled from a univariate + noncentral chisquare distribution. """ - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dpnp.float64) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_NONCENTRAL_CHISQUARE_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_NONCENTRAL_CHISQUARE_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_dpnp_rng_noncentral_chisquare_c_1out_t func = < fptr_dpnp_rng_noncentral_chisquare_c_1out_t > kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + cdef fptr_dpnp_rng_noncentral_chisquare_c_1out_t func + func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), df, nonc, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), df, nonc, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_pareto(double alpha, size): +cpdef utils.dpnp_descriptor dpnp_rng_pareto( + double alpha, size +): """ - Returns an array populated with samples from Pareto distribution. - `dpnp_rng_pareto` generates a matrix filled with random floats sampled from a - univariate Pareto distribution of `alpha`. + Returns an array populated with samples from Pareto + distribution. + `dpnp_rng_pareto` generates a matrix filled with + random floats sampled from a univariate Pareto + distribution of `alpha`. """ dtype = dpnp.float64 - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dtype) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_PARETO_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_PARETO_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) - cdef fptr_dpnp_rng_pareto_c_1out_t func = kernel_data.ptr + cdef fptr_dpnp_rng_pareto_c_1out_t func = ( + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), alpha, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), alpha, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_poisson(double lam, size): +cpdef utils.dpnp_descriptor dpnp_rng_poisson( + double lam, size +): """ - Returns an array populated with samples from Poisson distribution. - `dpnp_rng_poisson` generates a matrix filled with random floats sampled from a - univariate Poisson distribution for a given number of independent trials and - success probability p of a single trial. + Returns an array populated with samples from Poisson + distribution. + `dpnp_rng_poisson` generates a matrix filled with + random floats sampled from a univariate Poisson + distribution for a given number of independent trials + and success probability p of a single trial. """ @@ -1172,70 +1795,115 @@ cpdef utils.dpnp_descriptor dpnp_rng_poisson(double lam, size): result_shape = utils._object_to_tuple(size) if lam == 0: - return utils.dpnp_descriptor(dpnp.full(result_shape, 0, dtype=dtype)) + return utils.dpnp_descriptor( + dpnp.full(result_shape, 0, dtype=dtype) + ) else: - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_POISSON_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_POISSON_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + result = utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) result_sycl_queue = result.get_array().sycl_queue q = result_sycl_queue q_ref = q.get_queue_ref() - func = kernel_data.ptr + func = ( + + kernel_data.ptr + ) # call FPTR function - event_ref = func(q_ref, result.get_data(), lam, result.size, NULL) + event_ref = func( + q_ref, result.get_data(), lam, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_power(double alpha, size): +cpdef utils.dpnp_descriptor dpnp_rng_power( + double alpha, size +): """ - Returns an array populated with samples from power distribution. - `dpnp_rng_power` generates a matrix filled with random floats sampled from a - univariate power distribution of `alpha`. + Returns an array populated with samples from power + distribution. + `dpnp_rng_power` generates a matrix filled with random + floats sampled from a univariate power distribution of + `alpha`. """ dtype = dpnp.float64 - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dtype) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_POWER_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_POWER_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) - cdef fptr_dpnp_rng_power_c_1out_t func = kernel_data.ptr + cdef fptr_dpnp_rng_power_c_1out_t func = ( + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), alpha, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), alpha, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_rayleigh(double scale, size): +cpdef utils.dpnp_descriptor dpnp_rng_rayleigh( + double scale, size +): """ - Returns an array populated with samples from Rayleigh distribution. - `dpnp_rayleigh` generates a matrix filled with random floats sampled from a - univariate Rayleigh distribution of `scale`. + Returns an array populated with samples from Rayleigh + distribution. + `dpnp_rayleigh` generates a matrix filled with random + floats sampled from a univariate Rayleigh distribution + of `scale`. """ @@ -1251,57 +1919,90 @@ cpdef utils.dpnp_descriptor dpnp_rng_rayleigh(double scale, size): result_shape = utils._object_to_tuple(size) if scale == 0.0: - return utils.dpnp_descriptor(dpnp.full(result_shape, 0.0, dtype=dtype)) + return utils.dpnp_descriptor( + dpnp.full(result_shape, 0.0, dtype=dtype) + ) else: - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_RAYLEIGH_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_RAYLEIGH_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + result = utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) result_sycl_queue = result.get_array().sycl_queue q = result_sycl_queue q_ref = q.get_queue_ref() - func = kernel_data.ptr + func = ( + + kernel_data.ptr + ) # call FPTR function - event_ref = func(q_ref, result.get_data(), scale, result.size, NULL) + event_ref = func( + q_ref, result.get_data(), scale, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_shuffle(utils.dpnp_descriptor x1): +cpdef utils.dpnp_descriptor dpnp_rng_shuffle( + utils.dpnp_descriptor x1 +): """ Modify a sequence in-place by shuffling its contents. """ - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype.type) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(x1.dtype.type) + ) cdef size_t itemsize = x1.dtype.itemsize cdef size_t ndim = x1.ndim cdef size_t high_dim_size = x1.get_pyobj().size # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_SHUFFLE_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_SHUFFLE_EXT, + param1_type, param1_type, + ) x1_sycl_queue = x1.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = x1_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef c_dpctl.SyclQueue q = ( + x1_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) - cdef fptr_dpnp_rng_shuffle_c_1out_t func = < fptr_dpnp_rng_shuffle_c_1out_t > kernel_data.ptr + cdef fptr_dpnp_rng_shuffle_c_1out_t func = ( + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, x1.get_data(), itemsize, ndim, high_dim_size, x1.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, x1.get_data(), itemsize, ndim, + high_dim_size, x1.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return x1 @@ -1313,90 +2014,153 @@ cpdef dpnp_rng_srand(seed): """ - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dpnp.float64) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_SRAND, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_SRAND, param1_type, param1_type, + ) - cdef fptr_dpnp_rng_srand_c_1out_t func = < fptr_dpnp_rng_srand_c_1out_t > kernel_data.ptr + cdef fptr_dpnp_rng_srand_c_1out_t func = ( + kernel_data.ptr + ) # call FPTR function func(seed) -cpdef utils.dpnp_descriptor dpnp_rng_standard_cauchy(size): +cpdef utils.dpnp_descriptor dpnp_rng_standard_cauchy( + size, +): """ - Returns an array populated with samples from standard cauchy distribution. - `dpnp_standard_cauchy` generates a matrix filled with random floats sampled from a - univariate standard cauchy distribution. + Returns an array populated with samples from standard + cauchy distribution. + `dpnp_standard_cauchy` generates a matrix filled with + random floats sampled from a univariate standard cauchy + distribution. """ - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.default_float_type()) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dpnp.default_float_type()) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_STANDARD_CAUCHY_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_STANDARD_CAUCHY_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_dpnp_rng_standard_cauchy_c_1out_t func = < fptr_dpnp_rng_standard_cauchy_c_1out_t > kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + cdef fptr_dpnp_rng_standard_cauchy_c_1out_t func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_standard_exponential(size): +cpdef utils.dpnp_descriptor dpnp_rng_standard_exponential( + size, +): """ - Returns an array populated with samples from standard exponential distribution. - `dpnp_standard_exponential` generates a matrix filled with random floats sampled from a - standard exponential distribution. + Returns an array populated with samples from standard + exponential distribution. + `dpnp_standard_exponential` generates a matrix filled + with random floats sampled from a standard exponential + distribution. """ cdef fptr_dpnp_rng_standard_exponential_c_1out_t func - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.default_float_type()) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dpnp.default_float_type()) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_STANDARD_EXPONENTIAL_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_STANDARD_EXPONENTIAL_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - func = < fptr_dpnp_rng_standard_exponential_c_1out_t > kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_standard_gamma(double shape, size): +cpdef utils.dpnp_descriptor dpnp_rng_standard_gamma( + double shape, size +): """ - Returns an array populated with samples from standard gamma distribution. - `dpnp_standard_gamma` generates a matrix filled with random floats sampled from a - univariate standard gamma distribution. + Returns an array populated with samples from standard + gamma distribution. + `dpnp_standard_gamma` generates a matrix filled with + random floats sampled from a univariate standard gamma + distribution. """ @@ -1412,232 +2176,396 @@ cpdef utils.dpnp_descriptor dpnp_rng_standard_gamma(double shape, size): result_shape = utils._object_to_tuple(size) if shape == 0.0: - return utils.dpnp_descriptor(dpnp.full(result_shape, 0.0, dtype=dtype)) + return utils.dpnp_descriptor( + dpnp.full(result_shape, 0.0, dtype=dtype) + ) else: - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_STANDARD_GAMMA_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_STANDARD_GAMMA_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + result = utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) result_sycl_queue = result.get_array().sycl_queue q = result_sycl_queue q_ref = q.get_queue_ref() - func = kernel_data.ptr + func = ( + + kernel_data.ptr + ) # call FPTR function - event_ref = func(q_ref, result.get_data(), shape, result.size, NULL) + event_ref = func( + q_ref, result.get_data(), shape, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_standard_t(double df, size): +cpdef utils.dpnp_descriptor dpnp_rng_standard_t( + double df, size +): """ - Returns an array populated with samples from standard t distribution. - `dpnp_standard_t` generates a matrix filled with random floats sampled from a - univariate standard t distribution for a given number of degrees of freedom. + Returns an array populated with samples from standard + t distribution. + `dpnp_standard_t` generates a matrix filled with + random floats sampled from a univariate standard t + distribution for a given number of degrees of freedom. """ - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dpnp.float64) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_STANDARD_T_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_STANDARD_T_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_dpnp_rng_standard_t_c_1out_t func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + cdef fptr_dpnp_rng_standard_t_c_1out_t func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), df, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), df, result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_triangular(double left, double mode, double right, size): +cpdef utils.dpnp_descriptor dpnp_rng_triangular( + double left, double mode, double right, size +): """ - Returns an array populated with samples from triangular distribution. - `dpnp_rng_triangular` generates a matrix filled with random floats sampled from a - univariate triangular distribution. + Returns an array populated with samples from + triangular distribution. + `dpnp_rng_triangular` generates a matrix filled with + random floats sampled from a univariate triangular + distribution. """ dtype = dpnp.float64 - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dtype) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_TRIANGULAR_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_TRIANGULAR_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_dpnp_rng_triangular_c_1out_t func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + cdef fptr_dpnp_rng_triangular_c_1out_t func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), left, mode, right, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), left, mode, right, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_vonmises(double mu, double kappa, size): +cpdef utils.dpnp_descriptor dpnp_rng_vonmises( + double mu, double kappa, size +): """ - Returns an array populated with samples from Vonmises distribution. - `dpnp_rng_vonmises` generates a matrix filled with random floats sampled from a - univariate Vonmises distribution. + Returns an array populated with samples from Vonmises + distribution. + `dpnp_rng_vonmises` generates a matrix filled with + random floats sampled from a univariate Vonmises + distribution. """ - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dpnp.float64) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_VONMISES_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_VONMISES_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_dpnp_rng_vonmises_c_1out_t func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + cdef fptr_dpnp_rng_vonmises_c_1out_t func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), mu, kappa, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), mu, kappa, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_wald(double mean, double scale, size): +cpdef utils.dpnp_descriptor dpnp_rng_wald( + double mean, double scale, size +): """ - Returns an array populated with samples from Wald's distribution. - `dpnp_rng_wald` generates a matrix filled with random floats sampled from a - univariate Wald's distribution. + Returns an array populated with samples from Wald's + distribution. + `dpnp_rng_wald` generates a matrix filled with random + floats sampled from a univariate Wald's distribution. """ dtype = dpnp.float64 - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dtype) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_WALD_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_WALD_EXT, param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) - cdef fptr_dpnp_rng_wald_c_1out_t func = kernel_data.ptr + cdef fptr_dpnp_rng_wald_c_1out_t func = ( + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), mean, scale, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), mean, scale, + result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_weibull(double a, size): +cpdef utils.dpnp_descriptor dpnp_rng_weibull( + double a, size +): """ - Returns an array populated with samples from weibull distribution. - `dpnp_weibull` generates a matrix filled with random floats sampled from a - univariate weibull distribution. + Returns an array populated with samples from weibull + distribution. + `dpnp_weibull` generates a matrix filled with random + floats sampled from a univariate weibull distribution. """ - dtype = dpnp.float64 cdef DPNPFuncType param1_type cdef DPNPFuncData kernel_data cdef fptr_dpnp_rng_weibull_c_1out_t func - # convert string type names (array.dtype) to C enum DPNPFuncType + # convert string type names (array.dtype) to + # C enum DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64) # get the FPTR data structure - kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_WEIBULL_EXT, param1_type, param1_type) + kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_WEIBULL_EXT, + param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - func = kernel_data.ptr + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) + + func = ( + + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), a, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), a, result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result -cpdef utils.dpnp_descriptor dpnp_rng_zipf(double a, size): +cpdef utils.dpnp_descriptor dpnp_rng_zipf( + double a, size +): """ - Returns an array populated with samples from Zipf distribution. - `dpnp_rng_zipf` generates a matrix filled with random floats sampled from a - univariate Zipf distribution. + Returns an array populated with samples from Zipf + distribution. + `dpnp_rng_zipf` generates a matrix filled with random + floats sampled from a univariate Zipf distribution. """ - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64) + # convert string type names (array.dtype) to + # C enum DPNPFuncType + cdef DPNPFuncType param1_type = ( + dpnp_dtype_to_DPNPFuncType(dpnp.float64) + ) # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_ZIPF_EXT, param1_type, param1_type) + cdef DPNPFuncData kernel_data = get_dpnp_function_ptr( + DPNP_FN_RNG_ZIPF_EXT, param1_type, param1_type, + ) # create result array with type given by FPTR data - cdef shape_type_c result_shape = utils._object_to_tuple(size) - cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape, kernel_data.return_type, None) + cdef shape_type_c result_shape = ( + utils._object_to_tuple(size) + ) + cdef utils.dpnp_descriptor result = ( + utils.create_output_descriptor( + result_shape, kernel_data.return_type, None, + ) + ) result_sycl_queue = result.get_array().sycl_queue - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() + cdef c_dpctl.SyclQueue q = ( + result_sycl_queue + ) + cdef c_dpctl.DPCTLSyclQueueRef q_ref = ( + q.get_queue_ref() + ) - cdef fptr_dpnp_rng_zipf_c_1out_t func = kernel_data.ptr + cdef fptr_dpnp_rng_zipf_c_1out_t func = ( + kernel_data.ptr + ) # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), a, result.size, NULL) + cdef c_dpctl.DPCTLSyclEventRef event_ref = func( + q_ref, result.get_data(), a, result.size, NULL, + ) - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) + with nogil: + c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) c_dpctl.DPCTLEvent_Delete(event_ref) return result diff --git a/dpnp/tensor/_compute_follows_data.pyx b/dpnp/tensor/_compute_follows_data.pyx index 70e6bdfaeb7..c5028c55090 100644 --- a/dpnp/tensor/_compute_follows_data.pyx +++ b/dpnp/tensor/_compute_follows_data.pyx @@ -32,8 +32,8 @@ """Compute-follows-data utilities for execution queue and USM type management. -This module provides utilities to determine execution placement and USM allocation -types when combining arrays under the compute-follows-data paradigm. +This module provides utilities to determine execution placement and USM +allocation types when combining arrays under the compute-follows-data paradigm. """ diff --git a/pyproject.toml b/pyproject.toml index 02567d2f25a..773d3cb4590 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -130,6 +130,9 @@ source = [ "dpnp" ] +[tool.cython-lint] +max-line-length = 80 + [tool.isort] ensure_newline_before_comments = true force_grid_wrap = 0 From 4d566109c28f8bcc238beccf46ca189d79a8520b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 23 May 2026 13:40:07 +0200 Subject: [PATCH 35/41] Weekly pre-commit autoupdate (#2931) This PR updates the `.pre-commit-config.yaml` using `pre-commit autoupdate`. --- .pre-commit-config.yaml | 4 ++-- CMakeLists.txt | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c4a900b477b..1e357c90d07 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -64,7 +64,7 @@ repos: additional_dependencies: - tomli - repo: https://github.com/psf/black - rev: 26.3.1 + rev: 26.5.1 hooks: - id: black exclude: "dpnp/_version.py" @@ -128,7 +128,7 @@ repos: hooks: - id: actionlint - repo: https://github.com/BlankSpruce/gersemi-pre-commit - rev: 0.27.5 + rev: 0.27.6 hooks: - id: gersemi exclude: "dpnp/backend/cmake/Modules/" diff --git a/CMakeLists.txt b/CMakeLists.txt index 12de8bca3c6..0d4c677dd4d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -247,8 +247,7 @@ if(_use_onemath) FetchContent_Declare( onemath_library GIT_REPOSITORY https://github.com/uxlfoundation/oneMath.git - GIT_TAG - 6ff3a43e555dbb20357017d48f0f6c6263259895 # v0.9 + GIT_TAG 6ff3a43e555dbb20357017d48f0f6c6263259895 # v0.9 ) endif() From 92d2c81563dc29e4ff25d00c6a162bc8c74147a0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 23 May 2026 23:16:01 +0200 Subject: [PATCH 36/41] Bump github/codeql-action from 4.35.5 to 4.36.0 (#2932) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 4.35.5 to 4.36.0. --- .github/workflows/openssf-scorecard.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml index 43d019a3063..8a062e50eac 100644 --- a/.github/workflows/openssf-scorecard.yml +++ b/.github/workflows/openssf-scorecard.yml @@ -72,6 +72,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5 + uses: github/codeql-action/upload-sarif@7211b7c8077ea37d8641b6271f6a365a22a5fbfa # v4.36.0 with: sarif_file: results.sarif From 0669890679b1040b3f707e766a2f0dae874db91f Mon Sep 17 00:00:00 2001 From: ndgrigorian <46709016+ndgrigorian@users.noreply.github.com> Date: Sun, 24 May 2026 02:47:11 -0700 Subject: [PATCH 37/41] Add boolean mask validation when slicing (#2929) This PR proposes the addition of validation for the shape of masks when slicing `tensor.usm_ndarray`s and `dpnp_array`s. This aligns with NumPy for specific edge cases, such as: ``` import dpnp.tensor as dpt x = dpt.reshape(dpt.arange(3**5, dtype="i4"), (3,) * 5) ii = dpt.asarray(1) i0 = dpt.asarray(0, dtype='?') i1 = dpt.asarray(0, dtype='?') x[ii, i0[dpt.newaxis], ii, i1[dpt.newaxis], :] ``` which previously would give `usm_ndarray([], dtype=int32)`, but now raises an error, which aligns with NumPy. --- CHANGELOG.md | 1 + dpnp/tensor/_slicing.pxi | 13 +++++++++++++ dpnp/tests/tensor/test_usm_ndarray_indexing.py | 10 ++++++++++ 3 files changed, 24 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a738267346..77ca4c26bf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ This release is compatible with NumPy 2.4.5. * Fixed incorrect `dpnp.tensor.acosh` result for `complex(±0, NaN)` special case to match the Python Array API specification [#2914](https://github.com/IntelPython/dpnp/pull/2914) * Fixed fork PR documentation workflow failures by implementing conditional publishing strategy: upstream PRs publish to GitHub Pages with comment, fork PRs upload artifacts [#2910](https://github.com/IntelPython/dpnp/pull/2910) * Fixed missing `libtensor` headers in the installed `dpnp` package [#2915](https://github.com/IntelPython/dpnp/pull/2915) +* Fixed boolean mask indexing to raise `IndexError` when mask dimensions don't match the indexed array dimensions, aligning with NumPy behavior. Previously, incompatible boolean masks silently returned incorrect results instead of raising an error [#2929](https://github.com/IntelPython/dpnp/pull/2929) ### Security diff --git a/dpnp/tensor/_slicing.pxi b/dpnp/tensor/_slicing.pxi index 2f22894c4b1..9bfcee64f2b 100644 --- a/dpnp/tensor/_slicing.pxi +++ b/dpnp/tensor/_slicing.pxi @@ -107,6 +107,18 @@ cdef bint _is_boolean(object x) except *: return False +cdef _check_mask_shape(sh : tuple, ma_sh : tuple, Py_ssize_t axis): + cdef Py_ssize_t i, sh_i, ma_i + for i, ma_i in enumerate(ma_sh): + sh_i = sh[axis + i] + if ma_i not in (0, sh_i): + raise IndexError( + "boolean index did not match indexed array along dimension " + f"{axis + i}; dimension is {sh_i} but corresponding boolean " + f"dimension is {ma_i}" + ) + + def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): """ Give basic slicing index `ind` and array layout information produce @@ -353,6 +365,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): new_advanced_ind.append(ind_i) dt_k = ind_i.dtype.kind if dt_k == "b": + _check_mask_shape(shape, ind_i.shape, k) k_new = k + ind_i.ndim else: k_new = k + 1 diff --git a/dpnp/tests/tensor/test_usm_ndarray_indexing.py b/dpnp/tests/tensor/test_usm_ndarray_indexing.py index b81e5456872..5f8d208764d 100644 --- a/dpnp/tests/tensor/test_usm_ndarray_indexing.py +++ b/dpnp/tests/tensor/test_usm_ndarray_indexing.py @@ -2052,3 +2052,13 @@ def test_getitem_impl_fn_invalid_inp(): no_array_inds = (2, 3) with pytest.raises(TypeError): _take_multi_index(x, no_array_inds, 0, 0) + + +def test_boolean_mask_validation(): + x = dpt.reshape(dpt.arange(3**5, dtype="i4"), (3,) * 5) + ii = dpt.asarray(1) + i0 = dpt.asarray(0, dtype="?") + i1 = dpt.asarray(0, dtype="?") + + with pytest.raises(IndexError): + x[ii, i0[dpt.newaxis], ii, i1[dpt.newaxis], :] From 96397b2e1b3127206f5142b8ce54c77a72db30cc Mon Sep 17 00:00:00 2001 From: ndgrigorian <46709016+ndgrigorian@users.noreply.github.com> Date: Sun, 24 May 2026 06:49:15 -0700 Subject: [PATCH 38/41] Resolve gh-2882 (#2930) This PR adds a fix for gh-2882 When floats were previously converted by `convert_impl` to unsigned integral types, they would pass through an equally sized, signed integral type If the float was in bounds for the unsigned integral type but out of bounds for the signed type, however, undefined behavior could be encountered. This PR proposes a solution of casting to a sufficiently large intermediate type first, before casting into the unsigned integral type. --- CHANGELOG.md | 1 + dpnp/tensor/libtensor/include/utils/type_utils.hpp | 11 ++++++++--- dpnp/tests/tensor/test_usm_ndarray_ctor.py | 9 +++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77ca4c26bf9..d5577e47ddd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ This release is compatible with NumPy 2.4.5. * Fixed fork PR documentation workflow failures by implementing conditional publishing strategy: upstream PRs publish to GitHub Pages with comment, fork PRs upload artifacts [#2910](https://github.com/IntelPython/dpnp/pull/2910) * Fixed missing `libtensor` headers in the installed `dpnp` package [#2915](https://github.com/IntelPython/dpnp/pull/2915) * Fixed boolean mask indexing to raise `IndexError` when mask dimensions don't match the indexed array dimensions, aligning with NumPy behavior. Previously, incompatible boolean masks silently returned incorrect results instead of raising an error [#2929](https://github.com/IntelPython/dpnp/pull/2929) +* Fixed a bug in `astype` where casting floating point types to unsigned integral types could cause an intermediate signed integral type to overflow, leading to incorrect results [#2930](https://github.com/IntelPython/dpnp/pull/2930) ### Security diff --git a/dpnp/tensor/libtensor/include/utils/type_utils.hpp b/dpnp/tensor/libtensor/include/utils/type_utils.hpp index 49353310686..bb83c210b9f 100644 --- a/dpnp/tensor/libtensor/include/utils/type_utils.hpp +++ b/dpnp/tensor/libtensor/include/utils/type_utils.hpp @@ -99,9 +99,14 @@ dstTy convert_impl(const srcTy &v) else if constexpr (!std::is_integral_v && !std::is_same_v && std::is_integral_v && std::is_unsigned_v) { - // first cast to signed variant, the cast to unsigned one - using signedT = typename std::make_signed_t; - return static_cast(convert_impl(v)); + // for negative values, cast through signed integer to get two's + // complement wrapping + using intermediateT = + std::conditional_t; + return (v < srcTy{0}) + ? static_cast(static_cast(v)) + : static_cast(v); } else { return static_cast(v); diff --git a/dpnp/tests/tensor/test_usm_ndarray_ctor.py b/dpnp/tests/tensor/test_usm_ndarray_ctor.py index b03a01ad370..59cb370118d 100644 --- a/dpnp/tests/tensor/test_usm_ndarray_ctor.py +++ b/dpnp/tests/tensor/test_usm_ndarray_ctor.py @@ -1100,6 +1100,15 @@ def test_astype_gh_2121(): assert dpt.all(res == expected) +def test_astype_gh_2882(): + get_queue_or_skip() + + x = dpt.asarray([160.0, 120.0], dtype="f4") + r = dpt.astype(x, dpt.uint8) + expected = dpt.asarray([160, 120], dtype="u1") + assert dpt.all(r == expected) + + def test_copy(): try: X = dpt.usm_ndarray((5, 5), "i4")[2:4, 1:4] From def2ea146fd485b488ca631f1332b14230cc2fb7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 May 2026 15:15:36 +0200 Subject: [PATCH 39/41] Bump actions/upload-artifact from 4.3.3 to 7.0.1 (#2933) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.3 to 7.0.1. --- .github/workflows/build-sphinx.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 864f25b6708..07a228c8243 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -197,7 +197,7 @@ jobs: # Upload artifact for fork PRs - name: Upload docs artifact (Fork PRs) if: env.GH_EVENT_PR_OPEN == 'true' && steps.check_fork.outputs.is_fork == 'true' - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: pr-${{ github.event.number }}-docs path: ${{ env.PUBLISH_DIR }} From 87510bbdbc51cfe27c29cc484e06a8f025cdae6a Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Wed, 27 May 2026 13:56:25 +0200 Subject: [PATCH 40/41] Fix `dpnp.tensor.expm1` handling of complex(+-0, 0) (#2926) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR proposes to fix issue #2878 where `dpnp.tensor.expm1(complex(-0.0, 0.0))` returned `-0.0 + 0.0j` on CPU instead of `0.0 + 0.0j` as required by the Array API specification The fix adds an explicit special-case branch for `complex(±0, 0)` in `expm1.hpp` so CPU and GPU behavior are now consistent and compliant with the specification. Also `test_expm1_special_cases` test was extended to cover `complex(-0.0, 0.0)` --- CHANGELOG.md | 1 + .../include/kernels/elementwise_functions/expm1.hpp | 4 ++++ dpnp/tests/tensor/elementwise/test_expm1.py | 10 +++++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d5577e47ddd..12263e428cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ This release is compatible with NumPy 2.4.5. * Fixed missing `libtensor` headers in the installed `dpnp` package [#2915](https://github.com/IntelPython/dpnp/pull/2915) * Fixed boolean mask indexing to raise `IndexError` when mask dimensions don't match the indexed array dimensions, aligning with NumPy behavior. Previously, incompatible boolean masks silently returned incorrect results instead of raising an error [#2929](https://github.com/IntelPython/dpnp/pull/2929) * Fixed a bug in `astype` where casting floating point types to unsigned integral types could cause an intermediate signed integral type to overflow, leading to incorrect results [#2930](https://github.com/IntelPython/dpnp/pull/2930) +* Fixed incorrect `dpnp.tensor.expm1` result for `complex(±0, 0)` special case on CPU to match the Python Array API specification [#2926](https://github.com/IntelPython/dpnp/pull/2926) ### Security diff --git a/dpnp/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp b/dpnp/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp index e9e2a704cf0..735cb1fada5 100644 --- a/dpnp/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp +++ b/dpnp/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp @@ -121,6 +121,10 @@ struct Expm1Functor } } + if (x == realT(0) && y == realT(0)) { + return resT{realT(0), y}; + } + // x, y finite numbers const realT cosY_val = sycl::cos(y); const realT sinY_val = (y == 0) ? y : sycl::sin(y); diff --git a/dpnp/tests/tensor/elementwise/test_expm1.py b/dpnp/tests/tensor/elementwise/test_expm1.py index bb665c42456..26e14840177 100644 --- a/dpnp/tests/tensor/elementwise/test_expm1.py +++ b/dpnp/tests/tensor/elementwise/test_expm1.py @@ -147,6 +147,7 @@ def test_expm1_special_cases(): num_finite = 1.0 vals = [ complex(0.0, 0.0), + complex(-0.0, 0.0), complex(num_finite, dpt.inf), complex(num_finite, dpt.nan), complex(dpt.inf, 0.0), @@ -165,6 +166,7 @@ def test_expm1_special_cases(): c_nan = complex(np.nan, np.nan) res = np.asarray( [ + complex(0.0, 0.0), complex(0.0, 0.0), c_nan, c_nan, @@ -184,4 +186,10 @@ def test_expm1_special_cases(): tol = dpt.finfo(X.dtype).resolution with np.errstate(invalid="ignore"): - assert_allclose(dpt.asnumpy(dpt.expm1(X)), res, atol=tol, rtol=tol) + Y = dpt.asnumpy(dpt.expm1(X)) + assert_allclose(Y, res, atol=tol, rtol=tol) + + # assert_allclose treats +0 == -0 + # verify sign bits for zero real parts + for i in (0, 1): + assert not np.signbit(Y[i].real) From a8eca1d437d41e9ea793a8b9a00ebb3f0097cafe Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Thu, 28 May 2026 21:03:30 +0200 Subject: [PATCH 41/41] Add `--includes` and` --include-dir` to dpnp CLI (#2916) This PR adds `--includes` and `--include-dir` options to `dpnp` CLI (`python -m dpnp`) These options return the path to `dpnp/backend` include directory containing `dpnp4pybind11.hpp` --- CHANGELOG.md | 1 + doc/quick_start_guide.rst | 17 +++++++++++++++++ dpnp/__main__.py | 2 +- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12263e428cf..46e8efd99b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ This release is compatible with NumPy 2.4.5. * Added C API functions for `dpnp.tensor.usm_ndarray` setters and getters to avoid ABI breakage if `dpnp.tensor.usm_ndarray` is modified [gh-2866](https://github.com/IntelPython/dpnp/pull/2866) * Added support for buffer protocol objects as advanced index keys in `dpnp.ndarray` [#2889](https://github.com/IntelPython/dpnp/pull/2889) +* Added `--includes` and `--include-dir` options to the `dpnp` CLI [#2916](https://github.com/IntelPython/dpnp/pull/2916) ### Changed diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst index b383be91049..04d7af29f28 100644 --- a/doc/quick_start_guide.rst +++ b/doc/quick_start_guide.rst @@ -215,6 +215,23 @@ devices at the same time: python scripts/build_locally.py --target-cuda --target-hip=gfx90a +Command-Line Interface +====================== + +The ``python -m dpnp`` command provides options to query the include paths +needed when building C++ extensions with dpnp: + +.. code-block:: bash + + python -m dpnp --includes # print -I flag for dpnp include directory + python -m dpnp --include-dir # print path to dpnp include directory + python -m dpnp --tensor-includes # print -I flag for libtensor include directory + python -m dpnp --tensor-include-dir # print path to libtensor include directory + +These options are useful when building pybind11 extensions that use +``dpnp4pybind11.hpp`` or libtensor kernel headers. + + Testing ======= diff --git a/dpnp/__main__.py b/dpnp/__main__.py index 349ffcd472f..4368144a02e 100644 --- a/dpnp/__main__.py +++ b/dpnp/__main__.py @@ -50,7 +50,7 @@ def print_include_flags() -> None: def get_tensor_include_dir() -> str: - """Prints path to dpnp libtensor include directory""" + """Returns path to dpnp libtensor include directory""" libtensor_dir = os.path.join(_dpnp_dir(), "tensor", "libtensor", "include") return libtensor_dir