diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml
index 31423357e3f3..05f5b2711e24 100644
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -26,7 +26,7 @@ jobs:
name: Build and Deploy Docs
runs-on: ubuntu-22.04
- timeout-minutes: 60
+ timeout-minutes: 90
permissions:
# Needed to cancel any previous runs that are not completed for a given workflow
diff --git a/.github/workflows/check-onemath.yaml b/.github/workflows/check-onemath.yaml
index bead55d5f495..3ad8ba1ee84e 100644
--- a/.github/workflows/check-onemath.yaml
+++ b/.github/workflows/check-onemath.yaml
@@ -74,7 +74,7 @@ jobs:
os: [ubuntu-22.04] # windows-2022 - no DFT support for Windows in oneMKL
runs-on: ${{ matrix.os }}
- timeout-minutes: 60
+ timeout-minutes: 120
defaults:
run:
@@ -133,6 +133,14 @@ jobs:
if: env.rerun-tests-on-failure != 'true'
run: |
python -m pytest -ra --pyargs dpnp.tests
+ env:
+ SKIP_TENSOR_TESTS: 1
+ SYCL_CACHE_PERSISTENT: 1
+
+ - name: Run tensor tests
+ if: env.rerun-tests-on-failure != 'true'
+ run: |
+ python -m pytest -ra --pyargs dpnp.tests.tensor
env:
SYCL_CACHE_PERSISTENT: 1
@@ -150,6 +158,24 @@ jobs:
mamba activate ${{ env.test-env-name }}
python -m pytest -ra --pyargs dpnp.tests
+ env:
+ SKIP_TENSOR_TESTS: 1
+ SYCL_CACHE_PERSISTENT: 1
+
+ - name: ReRun tensor tests on Linux
+ if: env.rerun-tests-on-failure == 'true'
+ id: run_tensor_tests
+ uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2
+ with:
+ timeout_minutes: ${{ env.rerun-tests-timeout }}
+ max_attempts: ${{ env.rerun-tests-max-attempts }}
+ retry_on: any
+ command: |
+ . $CONDA/etc/profile.d/conda.sh
+ . $CONDA/etc/profile.d/mamba.sh
+ mamba activate ${{ env.test-env-name }}
+
+ python -m pytest -ra --pyargs dpnp.tests.tensor
env:
SYCL_CACHE_PERSISTENT: 1
@@ -239,6 +265,14 @@ jobs:
if: env.rerun-tests-on-failure != 'true'
run: |
python -m pytest -ra --pyargs dpnp.tests
+ env:
+ SKIP_TENSOR_TESTS: 1
+ SYCL_CACHE_PERSISTENT: 1
+
+ - name: Run tensor tests
+ if: env.rerun-tests-on-failure != 'true'
+ run: |
+ python -m pytest -ra --pyargs dpnp.tests.tensor
env:
SYCL_CACHE_PERSISTENT: 1
@@ -256,5 +290,23 @@ jobs:
mamba activate ${{ env.test-env-name }}
python -m pytest -ra --pyargs dpnp.tests
+ env:
+ SKIP_TENSOR_TESTS: 1
+ SYCL_CACHE_PERSISTENT: 1
+
+ - name: ReRun tensor tests on Linux
+ if: env.rerun-tests-on-failure == 'true'
+ id: run_tensor_tests_branch
+ uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2
+ with:
+ timeout_minutes: ${{ env.rerun-tests-timeout }}
+ max_attempts: ${{ env.rerun-tests-max-attempts }}
+ retry_on: any
+ command: |
+ . $CONDA/etc/profile.d/conda.sh
+ . $CONDA/etc/profile.d/mamba.sh
+ mamba activate ${{ env.test-env-name }}
+
+ python -m pytest -ra --pyargs dpnp.tests.tensor
env:
SYCL_CACHE_PERSISTENT: 1
diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 886204654a98..afd34ee00543 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -37,7 +37,7 @@ jobs:
actions: write
runs-on: ${{ matrix.os }}
- timeout-minutes: 60
+ timeout-minutes: 90
defaults:
run:
@@ -220,6 +220,7 @@ jobs:
- name: Run tests
if: env.rerun-tests-on-failure != 'true'
run: |
+ export SKIP_TENSOR_TESTS=1
if [[ "${{ matrix.python }}" == "${{ env.python-ver-test-all-dtypes }}" ]]; then
export DPNP_TEST_ALL_INT_TYPES=1
python -m pytest -ra --pyargs ${{ env.package-name }}.tests
@@ -239,6 +240,7 @@ jobs:
. $CONDA/etc/profile.d/conda.sh
. $CONDA/etc/profile.d/mamba.sh
mamba activate ${{ env.test-env-name }}
+ export SKIP_TENSOR_TESTS=1
if [[ "${{ matrix.python }}" == "${{ env.python-ver-test-all-dtypes }}" ]]; then
export DPNP_TEST_ALL_INT_TYPES=1
@@ -247,6 +249,26 @@ jobs:
python -m pytest -n auto -ra --pyargs ${{ env.package-name }}.tests
fi
+ - name: Run tensor tests
+ if: env.rerun-tests-on-failure != 'true'
+ run: |
+ python -m pytest -n auto -ra --pyargs dpnp.tests.tensor
+
+ - name: Run tensor tests
+ if: env.rerun-tests-on-failure == 'true'
+ id: run_tests_tensor_linux
+ uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2
+ with:
+ timeout_minutes: ${{ env.rerun-tests-timeout }}
+ max_attempts: ${{ env.rerun-tests-max-attempts }}
+ retry_on: any
+ command: |
+ . $CONDA/etc/profile.d/conda.sh
+ . $CONDA/etc/profile.d/mamba.sh
+ mamba activate ${{ env.test-env-name }}
+
+ python -m pytest -n auto -ra --pyargs dpnp.tests.tensor
+
test_windows:
name: Test
@@ -382,6 +404,7 @@ jobs:
if: env.rerun-tests-on-failure != 'true'
shell: pwsh
run: |
+ $env:SKIP_TENSOR_TESTS=1
if (${{ matrix.python }} -eq ${{ env.python-ver-test-all-dtypes }}) {
$env:DPNP_TEST_ALL_INT_TYPES=1
python -m pytest -ra --pyargs ${{ env.package-name }}.tests
@@ -399,6 +422,7 @@ jobs:
retry_on: any
shell: pwsh
command: |
+ $env:SKIP_TENSOR_TESTS=1
if ( ${{ matrix.python }} -eq ${{ env.python-ver-test-all-dtypes }} ) {
$env:DPNP_TEST_ALL_INT_TYPES=1
python -m pytest -ra --pyargs ${{ env.package-name }}.tests
@@ -406,6 +430,24 @@ jobs:
python -m pytest -n auto -ra --pyargs ${{ env.package-name }}.tests
}
+ - name: Run tensor tests
+ if: env.rerun-tests-on-failure != 'true'
+ shell: pwsh
+ run: |
+ python -m pytest -n auto -ra --pyargs dpnp.tests.tensor
+
+ - name: Run tensor tests
+ if: env.rerun-tests-on-failure == 'true'
+ id: run_tests_tensor_win
+ uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2
+ with:
+ timeout_minutes: ${{ env.rerun-tests-timeout }}
+ max_attempts: ${{ env.rerun-tests-max-attempts }}
+ retry_on: any
+ shell: pwsh
+ command: |
+ python -m pytest -n auto -ra --pyargs dpnp.tests.tensor
+
upload:
name: Upload
diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 5fd211e55a81..3d5d34531adf 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -11,7 +11,7 @@ jobs:
name: Generate coverage and push to Coveralls.io
runs-on: ubuntu-latest
- timeout-minutes: 120
+ timeout-minutes: 150
permissions:
# Needed to cancel any previous runs that are not completed for a given workflow
@@ -122,7 +122,7 @@ jobs:
uses: nick-fields/retry@ad984534de44a9489a53aefd81eb77f87c70dc60 # v4.0.0
with:
shell: bash
- timeout_minutes: 60
+ timeout_minutes: 120
max_attempts: 5
retry_on: error
command: |
@@ -130,6 +130,7 @@ jobs:
conda activate coverage
[ -f /opt/intel/oneapi/setvars.sh ] && source /opt/intel/oneapi/setvars.sh
git clean -fxd
+ export SKIP_TENSOR_TESTS=1
python scripts/gen_coverage.py
- name: Total number of coverage attempts
diff --git a/.gitignore b/.gitignore
index 5d2725d3186f..f66bfbb3fdd8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,7 @@ dpnp_pytest.*
example3
*dpnp_backend*
+dpnp/include/dpnp/tensor/*.h
dpnp/**/*.cpython*.so
dpnp/**/*.pyd
*~
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f8aaae542ec5..b2590b227e5e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [0.20.0] - MM/DD/2026
+This release introduces a major architectural change: the Array API-compliant tensor implementation has been migrated from `dpctl.tensor` into `dpnp.tensor`, simplifying maintenance, reducing cross-project dependencies, and allows the tensor implementation to evolve within `dpnp`.
This release changes the license from `BSD-2-Clause` to `BSD-3-Clause`.
This release achieves `dpnp` compatibility with Python 3.14 and enables distributing `dpnp` packages with the latest Python version.
Also, that release drops support for Python 3.9, making Python 3.10 the minimum required version.
@@ -28,6 +29,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
* Added implementation of `dpnp.isin` function [#2595](https://github.com/IntelPython/dpnp/pull/2595)
* Added implementation of `dpnp.scipy.linalg.lu` (SciPy-compatible) [#2787](https://github.com/IntelPython/dpnp/pull/2787)
* Added support for ndarray subclassing via `dpnp.ndarray.view` method with `type` parameter [#2815](https://github.com/IntelPython/dpnp/issues/2815)
+* Migrated tensor implementation from `dpctl.tensor` into `dpnp.tensor`, making `dpnp` the primary owner of the Array API-compliant tensor layer [#2856](https://github.com/IntelPython/dpnp/pull/2856)
### Changed
@@ -57,6 +59,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
* Updated QR tests to avoid element-wise comparisons for `raw` and `r` modes [#2785](https://github.com/IntelPython/dpnp/pull/2785)
* Moved all SYCL kernel functors from `backend/extensions/` to a unified `backend/kernels/` directory hierarchy [#2816](https://github.com/IntelPython/dpnp/pull/2816)
* `dpnp` uses pybind11 3.0.3 [#2834](https://github.com/IntelPython/dpnp/pull/2834)
+* Disabled `dpnp.tensor` tests by default in `conda build --test` to prevent OOM failures during package testing. Set `SKIP_TENSOR_TESTS=0` to re-enable them on systems with enough memory [#2860](https://github.com/IntelPython/dpnp/pull/2860)
### Deprecated
@@ -84,6 +87,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
* Resolved an issue with strides calculation in `dpnp.diagonal` to return correct values for empty diagonals [#2814](https://github.com/IntelPython/dpnp/pull/2814)
* Fixed test tolerance issues for float16 intermediate precision that became visible when testing against conda-forge's NumPy [#2828](https://github.com/IntelPython/dpnp/pull/2828)
* Ensured device aware dtype handling in `dpnp.identity` and `dpnp.gradient` [#2835](https://github.com/IntelPython/dpnp/pull/2835)
+* Fixed `dpnp.tensor.round` to use device-aware output dtype for boolean input [#2851](https://github.com/IntelPython/dpnp/pull/2851)
### Security
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 129bf1d87c25..b5c1068c1677 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,12 +37,23 @@ project(
)
option(DPNP_GENERATE_COVERAGE "Enable build DPNP with coverage instrumentation" OFF)
+option(
+ DPNP_TENSOR_GENERATE_COVERAGE_FOR_PYBIND11_EXTENSIONS
+ "Build dpnp tensor pybind11 offloading extensions with coverage instrumentation"
+ OFF
+)
option(DPNP_BACKEND_TESTS "Enable building of DPNP backend test suite" OFF)
option(
DPNP_WITH_REDIST
"Build DPNP assuming DPC++ redistributable is installed into Python prefix"
OFF
)
+option(
+ DPNP_TENSOR_OFFLOAD_COMPRESS
+ "Build dpnp tensor using offload section compression feature of DPC++ to reduce \
+size of shared object with offloading sections"
+ OFF
+)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED True)
@@ -106,7 +117,6 @@ find_package(Cython REQUIRED)
find_package(Dpctl REQUIRED)
message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR})
-message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})
option(DPNP_USE_ONEMATH "Build DPNP with oneMath" OFF)
set(DPNP_TARGET_CUDA
diff --git a/conda-recipe/run_test.bat b/conda-recipe/run_test.bat
index f6f6a061c5fa..41464cd43341 100644
--- a/conda-recipe/run_test.bat
+++ b/conda-recipe/run_test.bat
@@ -31,6 +31,12 @@ if not defined PYTHON (
)
+REM Skip tensor tests by default to avoid OOM in conda builds.
+REM Set SKIP_TENSOR_TESTS=0 to run them on machines with enough memory.
+if not defined SKIP_TENSOR_TESTS (
+ set "SKIP_TENSOR_TESTS=1"
+)
+
"%PYTHON%" -c "import dpnp; print(dpnp.__version__)"
if %errorlevel% neq 0 exit 1
diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh
index b2c96df36242..5ffeaf2ec1b0 100755
--- a/conda-recipe/run_test.sh
+++ b/conda-recipe/run_test.sh
@@ -33,6 +33,12 @@ if [ -z "${PYTHON}" ]; then
PYTHON=$PREFIX/bin/python
fi
+# Skip tensor tests by default to avoid OOM in conda builds.
+# Set SKIP_TENSOR_TESTS=0 to run them on machines with enough memory.
+if [ -z "${SKIP_TENSOR_TESTS}" ]; then
+ export SKIP_TENSOR_TESTS=1
+fi
+
set -e
$PYTHON -c "import dpnp; print(dpnp.__version__)"
diff --git a/doc/conf.py b/doc/conf.py
index 469e6d5f5353..57119eab5396 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -6,6 +6,7 @@
# http://www.sphinx-doc.org/en/master/config
from datetime import datetime
+from urllib.parse import urljoin
from sphinx.ext.autodoc import FunctionDocumenter
from sphinx.ext.napoleon import NumpyDocstring, docstring
@@ -231,6 +232,9 @@ def _can_document_member(member, *args, **kwargs):
autosummary_generate = True
+_DPCTL_021_BASE = "https://intelpython.github.io/dpctl/0.21.1/"
+_DPCTL_021_INV = urljoin(_DPCTL_021_BASE, "objects.inv")
+
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"numpy": ("https://numpy.org/doc/stable/", None),
@@ -302,3 +306,65 @@ def _parse_returns_section_patched(self, section: str) -> list[str]:
NumpyDocstring._parse_returns_section = _parse_returns_section_patched
+
+
+# TODO: Remove once dpnp.tensor docs are generated in dpnp
+def _load_dpctl_tensor_inventory(app):
+ """Load dpctl 0.21.1 inventory for dpnp.tensor fallback only."""
+ from sphinx.ext.intersphinx import fetch_inventory
+ from sphinx.util import logging
+
+ logger = logging.getLogger(__name__)
+
+ try:
+ inv = fetch_inventory(app, _DPCTL_021_BASE, _DPCTL_021_INV)
+ except Exception as exc:
+ logger.warning(
+ "Failed to load dpctl 0.21.1 inventory from %s: %s",
+ _DPCTL_021_INV,
+ exc,
+ )
+ inv = {}
+
+ app.builder.env._dpctl_tensor_021_inventory = inv
+
+
+# TODO: Remove once dpnp.tensor docs are generated in dpnp
+def _resolve_dpnp_tensor_refs(app, env, node, contnode):
+ """Resolve dpnp.tensor.* references to dpctl 0.21.1 documentation.
+
+ This temporary workaround is needed because dpnp.tensor documentation
+ is not generated yet, while the corresponding API is still documented
+ in dpctl 0.21.1.
+ """
+ from docutils import nodes as docutils_nodes
+
+ target = node.get("reftarget", "")
+ if not target.startswith("dpnp.tensor"):
+ return None
+
+ dpctl_target = target.replace("dpnp.tensor", "dpctl.tensor", 1)
+ dpctl_tensor_inv = getattr(env, "_dpctl_tensor_021_inventory", {})
+
+ for _objtype, objects in dpctl_tensor_inv.items():
+ if dpctl_target not in objects:
+ continue
+
+ item = objects[dpctl_target]
+ location = item.uri
+ if location.endswith("$"):
+ location = location[:-1] + dpctl_target
+
+ refuri = urljoin(_DPCTL_021_BASE, location)
+ newnode = docutils_nodes.reference(
+ "", "", internal=False, refuri=refuri
+ )
+ newnode += contnode.deepcopy()
+ return newnode
+
+ return None
+
+
+def setup(app):
+ app.connect("builder-inited", _load_dpctl_tensor_inventory, priority=400)
+ app.connect("missing-reference", _resolve_dpnp_tensor_refs, priority=400)
diff --git a/doc/index.rst b/doc/index.rst
index 38c12489636b..847680fc11d9 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -13,6 +13,7 @@ Data Parallel Extension for NumPy*
overview
quick_start_guide
reference/index
+ tensor
.. toctree::
:maxdepth: 1
diff --git a/doc/reference/exceptions.rst b/doc/reference/exceptions.rst
index 8f459b9f3aaa..69980ac8d8c2 100644
--- a/doc/reference/exceptions.rst
+++ b/doc/reference/exceptions.rst
@@ -20,7 +20,7 @@ Exceptions
.. exception:: DLPackCreationError
Given when constructing DLPack capsule from either :class:`dpnp.ndarray` or
- :class:`dpctl.tensor.usm_ndarray` based on a USM allocation
+ :class:`dpnp.tensor.usm_ndarray` based on a USM allocation
on a partitioned SYCL device.
.. rubric:: Examples
diff --git a/doc/tensor.rst b/doc/tensor.rst
new file mode 100644
index 000000000000..22a1812f38a3
--- /dev/null
+++ b/doc/tensor.rst
@@ -0,0 +1,70 @@
+.. _tensor:
+
+Tensor (``dpnp.tensor``)
+========================
+
+``dpnp.tensor`` provides a reference implementation of the
+`Python Array API `_ specification.
+The implementation uses data-parallel algorithms suitable for execution on
+accelerators, such as GPUs.
+
+It also provides the underlying Array API-compliant implementation
+used by ``dpnp``.
+
+``dpnp.tensor`` is written using C++ and
+`SYCL `_
+and oneAPI extensions implemented in
+`Intel(R) oneAPI DPC++ compiler `_.
+
+Design and Motivation
+---------------------
+
+The tensor implementation was originally developed as a standalone project and
+later integrated into the `dpctl `_
+library as ``dpctl.tensor``. It has since been migrated into ``dpnp``,
+making ``dpnp`` the primary owner and development location of the tensor implementation.
+
+This change simplifies maintenance, reduces cross-project
+dependencies, and enables independent development and release cycles.
+
+Relationship to ``dpnp.ndarray``
+--------------------------------
+
+:class:`dpnp.ndarray` is a high-level array object built on top of
+``dpnp.tensor.usm_ndarray``, storing array data in Unified Shared Memory
+(USM) allocated on a SYCL device. Most users interact with
+:class:`dpnp.ndarray` directly; ``dpnp.tensor.usm_ndarray`` may appear in error
+messages or type signatures when working with device placement or
+interoperability.
+
+Relationship to ``dpctl``
+-------------------------
+
+The migration of ``dpctl.tensor`` into ``dpnp.tensor`` does not replace
+`dpctl `_ itself.
+``dpctl`` remains responsible for device and queue management
+(:class:`dpctl.SyclDevice`, :class:`dpctl.SyclQueue`) as well as USM memory
+allocation. ``dpnp`` builds on top of these capabilities.
+
+Example
+-------
+
+.. code-block:: python
+
+ import dpnp
+ import dpnp.tensor as dpt
+
+ # Create a tensor array on the default device
+ x = dpt.asarray([1.0, 2.0, 3.0])
+
+ # dpnp.ndarray wraps the underlying usm_ndarray
+ a = dpnp.asarray([1.0, 2.0, 3.0])
+ assert isinstance(a.get_array(), dpt.usm_ndarray)
+
+.. note::
+
+ The ``dpnp.tensor`` API documentation will be added in a future release.
+
+ The current implementation remains compatible with the original
+ ``dpctl.tensor`` API. For the complete API reference, see the
+ `dpctl 0.21.1 tensor documentation `_.
diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt
index 6850b799735c..d7acf368bcd0 100644
--- a/dpnp/CMakeLists.txt
+++ b/dpnp/CMakeLists.txt
@@ -86,11 +86,96 @@ function(build_dpnp_cython_ext _trgt _src _dest)
install(TARGETS ${_trgt} LIBRARY DESTINATION ${_dest})
endfunction()
+function(build_dpnp_tensor_ext _trgt _src _dest)
+ set(options SYCL)
+ cmake_parse_arguments(BUILD_DPNP_TENSOR "${options}" "RELATIVE_PATH" "" ${ARGN})
+ add_cython_target(${_trgt} ${_src} CXX OUTPUT_VAR _generated_src)
+ set(_cythonize_trgt "${_trgt}_cythonize_pyx")
+ python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src})
+ if(BUILD_DPNP_TENSOR_SYCL)
+ add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
+ target_compile_options(${_trgt} PRIVATE -fno-sycl-id-queries-fit-in-int)
+ target_link_options(${_trgt} PRIVATE -fsycl-device-code-split=per_kernel)
+ if(DPNP_TENSOR_OFFLOAD_COMPRESS)
+ target_link_options(${_trgt} PRIVATE --offload-compress)
+ endif()
+ if(_dpnp_sycl_targets)
+ # make fat binary
+ target_compile_options(
+ ${_trgt}
+ PRIVATE ${_dpnp_sycl_target_compile_options}
+ )
+ target_link_options(${_trgt} PRIVATE ${_dpnp_sycl_target_link_options})
+ endif()
+ endif()
+ target_link_libraries(${_trgt} PRIVATE Python::NumPy)
+ if(DPNP_GENERATE_COVERAGE)
+ target_compile_definitions(${_trgt} PRIVATE CYTHON_TRACE=1 CYTHON_TRACE_NOGIL=1)
+ if(BUILD_DPNP_TENSOR_SYCL)
+ target_compile_options(${_trgt} PRIVATE -fno-sycl-use-footer)
+ endif()
+ endif()
+ # Dpctl
+ target_include_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR})
+ target_link_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR}/..)
+ target_link_libraries(${_trgt} PRIVATE DPCTLSyclInterface)
+ set(_linker_options "LINKER:${DPNP_LDFLAGS}")
+ target_link_options(${_trgt} PRIVATE ${_linker_options})
+ get_filename_component(_name_wle ${_generated_src} NAME_WLE)
+ get_filename_component(_generated_src_dir ${_generated_src} DIRECTORY)
+ set(_generated_public_h "${_generated_src_dir}/${_name_wle}.h")
+ set(_generated_api_h "${_generated_src_dir}/${_name_wle}_api.h")
+
+ # TODO: create separate folder inside build folder that contains only
+ # headers related to this target and appropriate folder structure to
+ # eliminate shadow dependencies
+ # Go up two levels to build root for "dpnp/tensor/_usmarray.h" resolution
+ get_filename_component(_parent_dir ${_generated_src_dir} DIRECTORY)
+ get_filename_component(_build_root ${_parent_dir} DIRECTORY)
+ # TODO: do not set directory if we did not generate header
+ target_include_directories(${_trgt} INTERFACE ${_build_root})
+ set(_rpath_value "$ORIGIN")
+ if(BUILD_DPNP_TENSOR_RELATIVE_PATH)
+ set(_rpath_value "${_rpath_value}/${BUILD_DPNP_TENSOR_RELATIVE_PATH}")
+ endif()
+ if(DPNP_WITH_REDIST)
+ set(_rpath_value "${_rpath_value}:${_rpath_value}/../../..")
+ endif()
+ set_target_properties(${_trgt} PROPERTIES INSTALL_RPATH ${_rpath_value})
+
+ install(TARGETS ${_trgt} LIBRARY DESTINATION ${_dest})
+ install(
+ FILES ${_generated_api_h}
+ DESTINATION ${CMAKE_INSTALL_PREFIX}/dpnp/include/${_dest}
+ OPTIONAL
+ )
+ install(
+ FILES ${_generated_public_h}
+ DESTINATION ${CMAKE_INSTALL_PREFIX}/dpnp/include/${_dest}
+ OPTIONAL
+ )
+ if(DPNP_GENERATE_COVERAGE)
+ get_filename_component(_original_src_dir ${_src} DIRECTORY)
+ file(RELATIVE_PATH _rel_dir ${CMAKE_SOURCE_DIR} ${_original_src_dir})
+ install(FILES ${_generated_src} DESTINATION ${CMAKE_INSTALL_PREFIX}/${_rel_dir})
+ endif()
+
+ # Create target with headers only, because python is managing all the
+ # library imports at runtime
+ set(_trgt_headers ${_trgt}_headers)
+ add_library(${_trgt_headers} INTERFACE)
+ add_dependencies(${_trgt_headers} ${_trgt})
+ get_target_property(_trgt_headers_dir ${_trgt} INTERFACE_INCLUDE_DIRECTORIES)
+ target_include_directories(${_trgt_headers} INTERFACE ${_trgt_headers_dir})
+endfunction()
+
function(build_dpnp_cython_ext_with_backend _trgt _src _dest)
build_dpnp_cython_ext(${_trgt} ${_src} ${_dest})
target_link_libraries(${_trgt} PRIVATE dpnp_backend_library)
endfunction()
+add_subdirectory(tensor)
+
add_subdirectory(backend)
add_subdirectory(backend/extensions/blas)
add_subdirectory(backend/extensions/fft)
diff --git a/dpnp/__init__.py b/dpnp/__init__.py
index 02420107972f..d2ea158d4d44 100644
--- a/dpnp/__init__.py
+++ b/dpnp/__init__.py
@@ -28,7 +28,6 @@
import os
import sys
-import warnings
mypath = os.path.dirname(os.path.realpath(__file__))
@@ -61,10 +60,7 @@
[os.getenv("PATH", ""), dll_path]
)
-# Borrowed from DPCTL
-with warnings.catch_warnings():
- warnings.simplefilter("ignore", DeprecationWarning)
- from dpctl.tensor import __array_api_version__, DLDeviceType
+from .tensor import __array_api_version__, DLDeviceType
from .dpnp_array import dpnp_array as ndarray
from .dpnp_array_api_info import __array_namespace_info__
diff --git a/dpnp/__main__.py b/dpnp/__main__.py
new file mode 100644
index 000000000000..1c9c652109ee
--- /dev/null
+++ b/dpnp/__main__.py
@@ -0,0 +1,78 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import argparse
+import importlib
+import os
+import os.path
+import sys
+
+
+def _dpnp_dir() -> str:
+ dpnp_dir = importlib.util.find_spec("dpnp").submodule_search_locations[0]
+ abs_dpnp_dir = os.path.abspath(dpnp_dir)
+ return abs_dpnp_dir
+
+
+def get_tensor_include_dir() -> str:
+ """Prints path to dpnp libtensor include directory"""
+ dpnp_dir = _dpnp_dir()
+ libtensor_dir = os.path.join(dpnp_dir, "tensor", "libtensor", "include")
+ return libtensor_dir
+
+
+def print_tensor_include_flags() -> None:
+ """Prints include flags for dpnp tensor library"""
+ libtensor_dir = get_tensor_include_dir()
+ print("-I " + libtensor_dir)
+
+
+def main() -> None:
+ """Main entry-point."""
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--tensor-includes",
+ action="store_true",
+ help="Include flags for dpnp libtensor headers.",
+ )
+ parser.add_argument(
+ "--tensor-include-dir",
+ action="store_true",
+ help="Path to dpnp libtensor include directory.",
+ )
+ args = parser.parse_args()
+ if not sys.argv[1:]:
+ parser.print_help()
+ if args.tensor_includes:
+ print_tensor_include_flags()
+ if args.tensor_include_dir:
+ print(get_tensor_include_dir())
+
+
+if __name__ == "__main__":
+ main()
diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index ddca557a08f4..433ab298d476 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -89,7 +89,6 @@ target_compile_definitions(${_trgt} PUBLIC PSTL_USE_PARALLEL_POLICIES=0)
target_compile_definitions(${_trgt} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0)
target_include_directories(${_trgt} PUBLIC ${Dpctl_INCLUDE_DIR})
-target_include_directories(${_trgt} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
target_link_directories(${_trgt} PUBLIC "${Dpctl_INCLUDE_DIR}/..")
target_link_libraries(${_trgt} PUBLIC DPCTLSyclInterface)
diff --git a/dpnp/backend/extensions/blas/CMakeLists.txt b/dpnp/backend/extensions/blas/CMakeLists.txt
index 5960dfcd8028..b4013d82eb40 100644
--- a/dpnp/backend/extensions/blas/CMakeLists.txt
+++ b/dpnp/backend/extensions/blas/CMakeLists.txt
@@ -39,6 +39,9 @@ set(_module_src
pybind11_add_module(${python_module_name} MODULE ${_module_src})
add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+# Ensure Cython modules build first so _usmarray.h exists
+add_dependencies(${python_module_name} _usmarray)
+
if(_dpnp_sycl_targets)
# make fat binary
target_compile_options(
@@ -65,14 +68,20 @@ set_target_properties(
target_include_directories(
${python_module_name}
- PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
+ PRIVATE
+ ${CMAKE_CURRENT_SOURCE_DIR}/../common
+ ${CMAKE_SOURCE_DIR}/dpnp/backend/include
+ ${CMAKE_SOURCE_DIR}/dpnp/tensor/libtensor/include
)
# treat below headers as system to suppress the warnings there during the build
target_include_directories(
${python_module_name}
SYSTEM
- PRIVATE ${SYCL_INCLUDE_DIR} ${Dpctl_INCLUDE_DIRS} ${Dpctl_TENSOR_INCLUDE_DIR}
+ PRIVATE
+ ${SYCL_INCLUDE_DIR}
+ ${Dpctl_INCLUDE_DIRS}
+ ${CMAKE_BINARY_DIR} # For generated Cython headers
)
if(WIN32)
diff --git a/dpnp/backend/extensions/blas/blas_py.cpp b/dpnp/backend/extensions/blas/blas_py.cpp
index d5738727c322..3e8ca01ebd8d 100644
--- a/dpnp/backend/extensions/blas/blas_py.cpp
+++ b/dpnp/backend/extensions/blas/blas_py.cpp
@@ -60,15 +60,15 @@ void init_dispatch_vectors_tables(void)
blas_ns::init_syrk_dispatch_vector();
}
-static dot_impl_fn_ptr_t dot_dispatch_vector[dpctl_td_ns::num_types];
-static dot_impl_fn_ptr_t dotc_dispatch_vector[dpctl_td_ns::num_types];
-static dot_impl_fn_ptr_t dotu_dispatch_vector[dpctl_td_ns::num_types];
+static dot_impl_fn_ptr_t dot_dispatch_vector[dpnp_td_ns::num_types];
+static dot_impl_fn_ptr_t dotc_dispatch_vector[dpnp_td_ns::num_types];
+static dot_impl_fn_ptr_t dotu_dispatch_vector[dpnp_td_ns::num_types];
PYBIND11_MODULE(_blas_impl, m)
{
init_dispatch_vectors_tables();
- using arrayT = dpctl::tensor::usm_ndarray;
+ using arrayT = dpnp::tensor::usm_ndarray;
using event_vecT = std::vector;
{
diff --git a/dpnp/backend/extensions/blas/dot.hpp b/dpnp/backend/extensions/blas/dot.hpp
index 833de60a5fed..300a5348c53e 100644
--- a/dpnp/backend/extensions/blas/dot.hpp
+++ b/dpnp/backend/extensions/blas/dot.hpp
@@ -35,7 +35,7 @@
namespace dpnp::extensions::blas
{
namespace mkl_blas = oneapi::mkl::blas;
-namespace type_utils = dpctl::tensor::type_utils;
+namespace type_utils = dpnp::tensor::type_utils;
template
static sycl::event dot_impl(sycl::queue &exec_q,
diff --git a/dpnp/backend/extensions/blas/dot_common.hpp b/dpnp/backend/extensions/blas/dot_common.hpp
index 383804ff1718..d4819be74036 100644
--- a/dpnp/backend/extensions/blas/dot_common.hpp
+++ b/dpnp/backend/extensions/blas/dot_common.hpp
@@ -29,9 +29,10 @@
#pragma once
#include
+
#include
-// dpctl tensor headers
+// dpnp tensor headers
#include "utils/memory_overlap.hpp"
#include "utils/output_validation.hpp"
#include "utils/type_dispatch.hpp"
@@ -50,14 +51,14 @@ typedef sycl::event (*dot_impl_fn_ptr_t)(sycl::queue &,
char *,
const std::vector &);
-namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+namespace dpnp_td_ns = dpnp::tensor::type_dispatch;
namespace py = pybind11;
std::pair
dot_func(sycl::queue &exec_q,
- const dpctl::tensor::usm_ndarray &vectorX,
- const dpctl::tensor::usm_ndarray &vectorY,
- const dpctl::tensor::usm_ndarray &result,
+ const dpnp::tensor::usm_ndarray &vectorX,
+ const dpnp::tensor::usm_ndarray &vectorY,
+ const dpnp::tensor::usm_ndarray &result,
const std::vector &depends,
const dot_impl_fn_ptr_t *dot_dispatch_vector)
{
@@ -83,7 +84,7 @@ std::pair
", but a 0-dimensional array is expected.");
}
- auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+ auto const &overlap = dpnp::tensor::overlap::MemoryOverlap();
if (overlap(vectorX, result)) {
throw py::value_error(
"The first input array and output array are overlapping "
@@ -95,7 +96,7 @@ std::pair
"segments of memory");
}
- if (!dpctl::utils::queues_are_compatible(
+ if (!dpnp::utils::queues_are_compatible(
exec_q,
{vectorX.get_queue(), vectorY.get_queue(), result.get_queue()})) {
throw py::value_error(
@@ -103,9 +104,9 @@ std::pair
}
const int src_nelems = 1;
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable(result);
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(result,
- src_nelems);
+ dpnp::tensor::validation::CheckWritable::throw_if_not_writable(result);
+ dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(result,
+ src_nelems);
const py::ssize_t x_size = vectorX.get_size();
const py::ssize_t y_size = vectorY.get_size();
@@ -124,7 +125,7 @@ std::pair
throw py::value_error("Given arrays must be of the same type.");
}
- auto array_types = dpctl_td_ns::usm_ndarray_types();
+ auto array_types = dpnp_td_ns::usm_ndarray_types();
const int type_id = array_types.typenum_to_lookup_id(vectorX_typenum);
dot_impl_fn_ptr_t dot_fn = dot_dispatch_vector[type_id];
@@ -162,7 +163,7 @@ std::pair
sycl::event dot_ev = dot_fn(exec_q, n, x_typeless_ptr, incx, y_typeless_ptr,
incy, r_typeless_ptr, depends);
- sycl::event args_ev = dpctl::utils::keep_args_alive(
+ sycl::event args_ev = dpnp::utils::keep_args_alive(
exec_q, {vectorX, vectorY, result}, {dot_ev});
return std::make_pair(args_ev, dot_ev);
diff --git a/dpnp/backend/extensions/blas/dotc.hpp b/dpnp/backend/extensions/blas/dotc.hpp
index 71ac1fd8df85..535f79156fb5 100644
--- a/dpnp/backend/extensions/blas/dotc.hpp
+++ b/dpnp/backend/extensions/blas/dotc.hpp
@@ -35,7 +35,7 @@
namespace dpnp::extensions::blas
{
namespace mkl_blas = oneapi::mkl::blas;
-namespace type_utils = dpctl::tensor::type_utils;
+namespace type_utils = dpnp::tensor::type_utils;
template
static sycl::event dotc_impl(sycl::queue &exec_q,
diff --git a/dpnp/backend/extensions/blas/dotu.hpp b/dpnp/backend/extensions/blas/dotu.hpp
index 7a1952234559..aafe9721339f 100644
--- a/dpnp/backend/extensions/blas/dotu.hpp
+++ b/dpnp/backend/extensions/blas/dotu.hpp
@@ -35,7 +35,7 @@
namespace dpnp::extensions::blas
{
namespace mkl_blas = oneapi::mkl::blas;
-namespace type_utils = dpctl::tensor::type_utils;
+namespace type_utils = dpnp::tensor::type_utils;
template
static sycl::event dotu_impl(sycl::queue &exec_q,
diff --git a/dpnp/backend/extensions/blas/gemm.cpp b/dpnp/backend/extensions/blas/gemm.cpp
index 86f751baf2e0..bf91f5cfb0bf 100644
--- a/dpnp/backend/extensions/blas/gemm.cpp
+++ b/dpnp/backend/extensions/blas/gemm.cpp
@@ -33,7 +33,7 @@
// utils extension header
#include "ext/common.hpp"
-// dpctl tensor headers
+// dpnp tensor headers
#include "utils/memory_overlap.hpp"
#include "utils/output_validation.hpp"
#include "utils/type_utils.hpp"
@@ -45,7 +45,7 @@ namespace dpnp::extensions::blas
{
namespace mkl_blas = oneapi::mkl::blas;
namespace py = pybind11;
-namespace type_utils = dpctl::tensor::type_utils;
+namespace type_utils = dpnp::tensor::type_utils;
using ext::common::init_dispatch_table;
@@ -64,8 +64,8 @@ typedef sycl::event (*gemm_impl_fn_ptr_t)(sycl::queue &,
const bool,
const std::vector &);
-static gemm_impl_fn_ptr_t gemm_dispatch_table[dpctl_td_ns::num_types]
- [dpctl_td_ns::num_types];
+static gemm_impl_fn_ptr_t gemm_dispatch_table[dpnp_td_ns::num_types]
+ [dpnp_td_ns::num_types];
template
static sycl::event gemm_impl(sycl::queue &exec_q,
@@ -153,9 +153,9 @@ static sycl::event gemm_impl(sycl::queue &exec_q,
std::tuple
gemm(sycl::queue &exec_q,
- const dpctl::tensor::usm_ndarray &matrixA,
- const dpctl::tensor::usm_ndarray &matrixB,
- const dpctl::tensor::usm_ndarray &resultC,
+ const dpnp::tensor::usm_ndarray &matrixA,
+ const dpnp::tensor::usm_ndarray &matrixB,
+ const dpnp::tensor::usm_ndarray &resultC,
const std::vector &depends)
{
const int matrixA_nd = matrixA.get_ndim();
@@ -167,7 +167,7 @@ std::tuple
"Input and output matrices must be two-dimensional.");
}
- auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+ auto const &overlap = dpnp::tensor::overlap::MemoryOverlap();
if (overlap(matrixA, resultC)) {
throw py::value_error(
"The first input array and output array are overlapping "
@@ -179,7 +179,7 @@ std::tuple
"segments of memory");
}
- if (!dpctl::utils::queues_are_compatible(
+ if (!dpnp::utils::queues_are_compatible(
exec_q,
{matrixA.get_queue(), matrixB.get_queue(), resultC.get_queue()})) {
throw py::value_error(
@@ -206,9 +206,9 @@ std::tuple
}
const std::size_t src_nelems = m * n;
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable(resultC);
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(resultC,
- src_nelems);
+ dpnp::tensor::validation::CheckWritable::throw_if_not_writable(resultC);
+ dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(resultC,
+ src_nelems);
const bool is_matrixA_f_contig = matrixA.is_f_contiguous();
const bool is_matrixB_f_contig = matrixB.is_f_contiguous();
@@ -300,7 +300,7 @@ std::tuple
throw py::value_error("matrixA and matrixB must be of the same type.");
}
- auto array_types = dpctl_td_ns::usm_ndarray_types();
+ auto array_types = dpnp_td_ns::usm_ndarray_types();
const int matrixAB_type_id =
array_types.typenum_to_lookup_id(matrixA_typenum);
const int resultC_type_id =
@@ -322,7 +322,7 @@ std::tuple
a_typeless_ptr, lda, b_typeless_ptr, ldb,
r_typeless_ptr, ldc, is_row_major, depends);
- sycl::event args_ev = dpctl::utils::keep_args_alive(
+ sycl::event args_ev = dpnp::utils::keep_args_alive(
exec_q, {matrixA, matrixB, resultC}, {gemm_ev});
return std::make_tuple(args_ev, gemm_ev, is_row_major);
diff --git a/dpnp/backend/extensions/blas/gemm.hpp b/dpnp/backend/extensions/blas/gemm.hpp
index 997d515f98a0..8475e4120c61 100644
--- a/dpnp/backend/extensions/blas/gemm.hpp
+++ b/dpnp/backend/extensions/blas/gemm.hpp
@@ -31,22 +31,22 @@
#include
#include
-#include
+#include "dpnp4pybind11.hpp"
namespace dpnp::extensions::blas
{
extern std::tuple
gemm(sycl::queue &exec_q,
- const dpctl::tensor::usm_ndarray &matrixA,
- const dpctl::tensor::usm_ndarray &matrixB,
- const dpctl::tensor::usm_ndarray &resultC,
+ const dpnp::tensor::usm_ndarray &matrixA,
+ const dpnp::tensor::usm_ndarray &matrixB,
+ const dpnp::tensor::usm_ndarray &resultC,
const std::vector &depends);
extern std::tuple
gemm_batch(sycl::queue &exec_q,
- const dpctl::tensor::usm_ndarray &matrixA,
- const dpctl::tensor::usm_ndarray &matrixB,
- const dpctl::tensor::usm_ndarray &resultC,
+ const dpnp::tensor::usm_ndarray &matrixA,
+ const dpnp::tensor::usm_ndarray &matrixB,
+ const dpnp::tensor::usm_ndarray &resultC,
const std::vector &depends);
extern void init_gemm_dispatch_table(void);
diff --git a/dpnp/backend/extensions/blas/gemm_batch.cpp b/dpnp/backend/extensions/blas/gemm_batch.cpp
index d02b035922c0..f1bc9f30b5dd 100644
--- a/dpnp/backend/extensions/blas/gemm_batch.cpp
+++ b/dpnp/backend/extensions/blas/gemm_batch.cpp
@@ -33,7 +33,7 @@
// utils extension header
#include "ext/common.hpp"
-// dpctl tensor headers
+// dpnp tensor headers
#include "utils/memory_overlap.hpp"
#include "utils/output_validation.hpp"
#include "utils/type_utils.hpp"
@@ -45,7 +45,7 @@ namespace dpnp::extensions::blas
{
namespace mkl_blas = oneapi::mkl::blas;
namespace py = pybind11;
-namespace type_utils = dpctl::tensor::type_utils;
+namespace type_utils = dpnp::tensor::type_utils;
using ext::common::init_dispatch_table;
@@ -70,7 +70,7 @@ typedef sycl::event (*gemm_batch_impl_fn_ptr_t)(
const std::vector &);
static gemm_batch_impl_fn_ptr_t
- gemm_batch_dispatch_table[dpctl_td_ns::num_types][dpctl_td_ns::num_types];
+ gemm_batch_dispatch_table[dpnp_td_ns::num_types][dpnp_td_ns::num_types];
template
static sycl::event gemm_batch_impl(sycl::queue &exec_q,
@@ -212,9 +212,9 @@ void standardize_strides_to_zero(std::vector &strides,
std::tuple
gemm_batch(sycl::queue &exec_q,
- const dpctl::tensor::usm_ndarray &matrixA,
- const dpctl::tensor::usm_ndarray &matrixB,
- const dpctl::tensor::usm_ndarray &resultC,
+ const dpnp::tensor::usm_ndarray &matrixA,
+ const dpnp::tensor::usm_ndarray &matrixB,
+ const dpnp::tensor::usm_ndarray &resultC,
const std::vector &depends = {})
{
const int matrixA_nd = matrixA.get_ndim();
@@ -225,7 +225,7 @@ std::tuple
throw py::value_error("The given arrays have incorrect dimensions.");
}
- auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+ auto const &overlap = dpnp::tensor::overlap::MemoryOverlap();
if (overlap(matrixA, resultC)) {
throw py::value_error("Input array 1 and output array are overlapping "
"segments of memory");
@@ -235,7 +235,7 @@ std::tuple
"segments of memory");
}
- if (!dpctl::utils::queues_are_compatible(
+ if (!dpnp::utils::queues_are_compatible(
exec_q,
{matrixA.get_queue(), matrixB.get_queue(), resultC.get_queue()})) {
throw py::value_error(
@@ -262,9 +262,9 @@ std::tuple
"the number of columns in result array.");
}
const std::int64_t src_nelems = batch_size * m * n;
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable(resultC);
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(resultC,
- src_nelems);
+ dpnp::tensor::validation::CheckWritable::throw_if_not_writable(resultC);
+ dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(resultC,
+ src_nelems);
std::vector a_stride = matrixA.get_strides_vector();
std::vector b_stride = matrixB.get_strides_vector();
@@ -374,7 +374,7 @@ std::tuple
throw py::value_error("matrixA and matrixB must be of the same type.");
}
- auto array_types = dpctl_td_ns::usm_ndarray_types();
+ auto array_types = dpnp_td_ns::usm_ndarray_types();
const int matrixAB_type_id =
array_types.typenum_to_lookup_id(matrixA_typenum);
const int resultC_type_id =
@@ -397,7 +397,7 @@ std::tuple
strideb, stridec, transA, transB, a_typeless_ptr,
b_typeless_ptr, r_typeless_ptr, is_row_major, depends);
- sycl::event args_ev = dpctl::utils::keep_args_alive(
+ sycl::event args_ev = dpnp::utils::keep_args_alive(
exec_q, {matrixA, matrixB, resultC}, {gemm_batch_ev});
return std::make_tuple(args_ev, gemm_batch_ev, is_row_major);
diff --git a/dpnp/backend/extensions/blas/gemv.cpp b/dpnp/backend/extensions/blas/gemv.cpp
index 0b6ae78bc76e..bb447c51997d 100644
--- a/dpnp/backend/extensions/blas/gemv.cpp
+++ b/dpnp/backend/extensions/blas/gemv.cpp
@@ -33,7 +33,7 @@
// utils extension header
#include "ext/common.hpp"
-// dpctl tensor headers
+// dpnp tensor headers
#include "utils/memory_overlap.hpp"
#include "utils/output_validation.hpp"
#include "utils/type_utils.hpp"
@@ -45,7 +45,7 @@ namespace dpnp::extensions::blas
{
namespace mkl_blas = oneapi::mkl::blas;
namespace py = pybind11;
-namespace type_utils = dpctl::tensor::type_utils;
+namespace type_utils = dpnp::tensor::type_utils;
using ext::common::init_dispatch_vector;
@@ -62,7 +62,7 @@ typedef sycl::event (*gemv_impl_fn_ptr_t)(sycl::queue &,
const bool,
const std::vector &);
-static gemv_impl_fn_ptr_t gemv_dispatch_vector[dpctl_td_ns::num_types];
+static gemv_impl_fn_ptr_t gemv_dispatch_vector[dpnp_td_ns::num_types];
template
static sycl::event gemv_impl(sycl::queue &exec_q,
@@ -143,9 +143,9 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
std::pair
gemv(sycl::queue &exec_q,
- const dpctl::tensor::usm_ndarray &matrixA,
- const dpctl::tensor::usm_ndarray &vectorX,
- const dpctl::tensor::usm_ndarray &vectorY,
+ const dpnp::tensor::usm_ndarray &matrixA,
+ const dpnp::tensor::usm_ndarray &vectorX,
+ const dpnp::tensor::usm_ndarray &vectorY,
const bool transpose,
const std::vector &depends)
{
@@ -157,7 +157,7 @@ std::pair
throw py::value_error("The arrays have incorrect dimensions.");
}
- auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+ auto const &overlap = dpnp::tensor::overlap::MemoryOverlap();
if (overlap(matrixA, vectorY)) {
throw py::value_error("Input matrix and output vector are overlapping "
"segments of memory");
@@ -167,7 +167,7 @@ std::pair
"segments of memory");
}
- if (!dpctl::utils::queues_are_compatible(
+ if (!dpnp::utils::queues_are_compatible(
exec_q,
{matrixA.get_queue(), vectorX.get_queue(), vectorY.get_queue()})) {
throw py::value_error(
@@ -259,9 +259,9 @@ std::pair
#endif // USE_ONEMATH_CUBLAS
const std::int64_t lda = is_row_major ? n : m;
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable(vectorY);
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(vectorY,
- src_nelems);
+ dpnp::tensor::validation::CheckWritable::throw_if_not_writable(vectorY);
+ dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(vectorY,
+ src_nelems);
const int matrixA_typenum = matrixA.get_typenum();
const int vectorX_typenum = vectorX.get_typenum();
@@ -272,7 +272,7 @@ std::pair
throw py::value_error("Given arrays must be of the same type.");
}
- auto array_types = dpctl_td_ns::usm_ndarray_types();
+ auto array_types = dpnp_td_ns::usm_ndarray_types();
const int type_id = array_types.typenum_to_lookup_id(matrixA_typenum);
gemv_impl_fn_ptr_t gemv_fn = gemv_dispatch_vector[type_id];
@@ -303,7 +303,7 @@ std::pair
gemv_fn(exec_q, transA, m, n, a_typeless_ptr, lda, x_typeless_ptr, incx,
y_typeless_ptr, incy, is_row_major, depends);
- sycl::event args_ev = dpctl::utils::keep_args_alive(
+ sycl::event args_ev = dpnp::utils::keep_args_alive(
exec_q, {matrixA, vectorX, vectorY}, {gemv_ev});
return std::make_pair(args_ev, gemv_ev);
diff --git a/dpnp/backend/extensions/blas/gemv.hpp b/dpnp/backend/extensions/blas/gemv.hpp
index afe0c6387aa9..c3e1c503fde8 100644
--- a/dpnp/backend/extensions/blas/gemv.hpp
+++ b/dpnp/backend/extensions/blas/gemv.hpp
@@ -31,15 +31,15 @@
#include
#include
-#include
+#include "dpnp4pybind11.hpp"
namespace dpnp::extensions::blas
{
extern std::pair
gemv(sycl::queue &exec_q,
- const dpctl::tensor::usm_ndarray &matrixA,
- const dpctl::tensor::usm_ndarray &vectorX,
- const dpctl::tensor::usm_ndarray &vectorY,
+ const dpnp::tensor::usm_ndarray &matrixA,
+ const dpnp::tensor::usm_ndarray &vectorX,
+ const dpnp::tensor::usm_ndarray &vectorY,
const bool transpose,
const std::vector &depends);
diff --git a/dpnp/backend/extensions/blas/syrk.cpp b/dpnp/backend/extensions/blas/syrk.cpp
index 9668e72b57f6..ad16fc399370 100644
--- a/dpnp/backend/extensions/blas/syrk.cpp
+++ b/dpnp/backend/extensions/blas/syrk.cpp
@@ -34,7 +34,7 @@
// utils extension header
#include "ext/common.hpp"
-// dpctl tensor headers
+// dpnp tensor headers
#include "utils/memory_overlap.hpp"
#include "utils/output_validation.hpp"
#include "utils/type_utils.hpp"
@@ -48,7 +48,7 @@ namespace dpnp::extensions::blas
{
namespace mkl_blas = oneapi::mkl::blas;
namespace py = pybind11;
-namespace type_utils = dpctl::tensor::type_utils;
+namespace type_utils = dpnp::tensor::type_utils;
using ext::common::init_dispatch_vector;
@@ -63,7 +63,7 @@ typedef sycl::event (*syrk_impl_fn_ptr_t)(sycl::queue &,
const bool,
const std::vector &);
-static syrk_impl_fn_ptr_t syrk_dispatch_vector[dpctl_td_ns::num_types];
+static syrk_impl_fn_ptr_t syrk_dispatch_vector[dpnp_td_ns::num_types];
template
constexpr void copy_to_lower_triangle(T *res,
@@ -230,8 +230,8 @@ static sycl::event syrk_impl(sycl::queue &exec_q,
std::pair
syrk(sycl::queue &exec_q,
- const dpctl::tensor::usm_ndarray &matrixA,
- const dpctl::tensor::usm_ndarray &resultC,
+ const dpnp::tensor::usm_ndarray &matrixA,
+ const dpnp::tensor::usm_ndarray &resultC,
const std::vector &depends)
{
const int matrixA_nd = matrixA.get_ndim();
@@ -241,13 +241,13 @@ std::pair
throw py::value_error("The given arrays have incorrect dimensions.");
}
- auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+ auto const &overlap = dpnp::tensor::overlap::MemoryOverlap();
if (overlap(matrixA, resultC)) {
throw py::value_error("Input and output matrices are overlapping "
"segments of memory");
}
- if (!dpctl::utils::queues_are_compatible(
+ if (!dpnp::utils::queues_are_compatible(
exec_q, {matrixA.get_queue(), resultC.get_queue()})) {
throw py::value_error(
"USM allocations are not compatible with the execution queue.");
@@ -305,9 +305,9 @@ std::pair
const std::int64_t lda = is_row_major ? k : n;
const std::int64_t ldc = n;
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable(resultC);
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(resultC,
- src_nelems);
+ dpnp::tensor::validation::CheckWritable::throw_if_not_writable(resultC);
+ dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(resultC,
+ src_nelems);
const int matrixA_typenum = matrixA.get_typenum();
const int resultC_typenum = resultC.get_typenum();
@@ -315,7 +315,7 @@ std::pair
throw py::value_error("Given arrays must be of the same type.");
}
- auto array_types = dpctl_td_ns::usm_ndarray_types();
+ auto array_types = dpnp_td_ns::usm_ndarray_types();
const int type_id = array_types.typenum_to_lookup_id(matrixA_typenum);
syrk_impl_fn_ptr_t syrk_fn = syrk_dispatch_vector[type_id];
if (syrk_fn == nullptr) {
@@ -331,7 +331,7 @@ std::pair
r_typeless_ptr, ldc, is_row_major, depends);
sycl::event args_ev =
- dpctl::utils::keep_args_alive(exec_q, {matrixA, resultC}, {syrk_ev});
+ dpnp::utils::keep_args_alive(exec_q, {matrixA, resultC}, {syrk_ev});
return std::make_pair(args_ev, syrk_ev);
}
diff --git a/dpnp/backend/extensions/blas/syrk.hpp b/dpnp/backend/extensions/blas/syrk.hpp
index 580239b28008..9ffe1b024951 100644
--- a/dpnp/backend/extensions/blas/syrk.hpp
+++ b/dpnp/backend/extensions/blas/syrk.hpp
@@ -31,14 +31,14 @@
#include
#include
-#include
+#include "dpnp4pybind11.hpp"
namespace dpnp::extensions::blas
{
extern std::pair
syrk(sycl::queue &exec_q,
- const dpctl::tensor::usm_ndarray &matrixA,
- const dpctl::tensor::usm_ndarray &resultC,
+ const dpnp::tensor::usm_ndarray &matrixA,
+ const dpnp::tensor::usm_ndarray &resultC,
const std::vector &depends);
extern void init_syrk_dispatch_vector(void);
diff --git a/dpnp/backend/extensions/blas/types_matrix.hpp b/dpnp/backend/extensions/blas/types_matrix.hpp
index 463319a968af..5751b5634777 100644
--- a/dpnp/backend/extensions/blas/types_matrix.hpp
+++ b/dpnp/backend/extensions/blas/types_matrix.hpp
@@ -30,11 +30,11 @@
#include
-// dpctl tensor headers
+// dpnp tensor headers
#include "utils/type_dispatch.hpp"
-// dpctl namespace for operations with types
-namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+// namespace for operations with types
+namespace dpnp_td_ns = dpnp::tensor::type_dispatch;
namespace dpnp::extensions::blas::types
{
@@ -48,11 +48,11 @@ namespace dpnp::extensions::blas::types
template
struct DotTypePairSupportFactory
{
- static constexpr bool is_defined = std::disjunction<
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- // fall-through
- dpctl_td_ns::NotDefinedEntry>::is_defined;
+ static constexpr bool is_defined =
+ std::disjunction,
+ dpnp_td_ns::TypePairDefinedEntry,
+ // fall-through
+ dpnp_td_ns::NotDefinedEntry>::is_defined;
};
/**
@@ -65,17 +65,17 @@ struct DotTypePairSupportFactory
template
struct DotcTypePairSupportFactory
{
- static constexpr bool is_defined = std::disjunction<
- dpctl_td_ns::TypePairDefinedEntry,
- T,
- std::complex>,
- dpctl_td_ns::TypePairDefinedEntry,
- T,
- std::complex>,
- // fall-through
- dpctl_td_ns::NotDefinedEntry>::is_defined;
+ static constexpr bool is_defined =
+ std::disjunction,
+ T,
+ std::complex>,
+ dpnp_td_ns::TypePairDefinedEntry,
+ T,
+ std::complex>,
+ // fall-through
+ dpnp_td_ns::NotDefinedEntry>::is_defined;
};
/**
@@ -88,17 +88,17 @@ struct DotcTypePairSupportFactory
template
struct DotuTypePairSupportFactory
{
- static constexpr bool is_defined = std::disjunction<
- dpctl_td_ns::TypePairDefinedEntry,
- T,
- std::complex>,
- dpctl_td_ns::TypePairDefinedEntry,
- T,
- std::complex>,
- // fall-through
- dpctl_td_ns::NotDefinedEntry>::is_defined;
+ static constexpr bool is_defined =
+ std::disjunction,
+ T,
+ std::complex>,
+ dpnp_td_ns::TypePairDefinedEntry,
+ T,
+ std::complex>,
+ // fall-through
+ dpnp_td_ns::NotDefinedEntry>::is_defined;
};
/**
@@ -114,23 +114,23 @@ struct GemmTypePairSupportFactory
{
static constexpr bool is_defined = std::disjunction<
#if !defined(USE_ONEMATH)
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
#endif // USE_ONEMATH
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- Tc,
- std::complex>,
- dpctl_td_ns::TypePairDefinedEntry,
- Tc,
- std::complex>,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ Tc,
+ std::complex>,
+ dpnp_td_ns::TypePairDefinedEntry,
+ Tc,
+ std::complex>,
// fall-through
- dpctl_td_ns::NotDefinedEntry>::is_defined;
+ dpnp_td_ns::NotDefinedEntry>::is_defined;
};
/**
@@ -146,23 +146,23 @@ struct GemmBatchTypePairSupportFactory
{
static constexpr bool is_defined = std::disjunction<
#if !defined(USE_ONEMATH)
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
#endif // USE_ONEMATH
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- Tc,
- std::complex>,
- dpctl_td_ns::TypePairDefinedEntry,
- Tc,
- std::complex>,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ Tc,
+ std::complex>,
+ dpnp_td_ns::TypePairDefinedEntry,
+ Tc,
+ std::complex>,
// fall-through
- dpctl_td_ns::NotDefinedEntry>::is_defined;
+ dpnp_td_ns::NotDefinedEntry>::is_defined;
};
/**
@@ -175,19 +175,19 @@ struct GemmBatchTypePairSupportFactory
template
struct GemvTypePairSupportFactory
{
- static constexpr bool is_defined = std::disjunction<
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- T,
- std::complex>,
- dpctl_td_ns::TypePairDefinedEntry,
- T,
- std::complex>,
- // fall-through
- dpctl_td_ns::NotDefinedEntry>::is_defined;
+ static constexpr bool is_defined =
+ std::disjunction,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ T,
+ std::complex>,
+ dpnp_td_ns::TypePairDefinedEntry,
+ T,
+ std::complex>,
+ // fall-through
+ dpnp_td_ns::NotDefinedEntry>::is_defined;
};
/**
@@ -200,18 +200,18 @@ struct GemvTypePairSupportFactory
template
struct SyrkTypePairSupportFactory
{
- static constexpr bool is_defined = std::disjunction<
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- dpctl_td_ns::TypePairDefinedEntry,
- T,
- std::complex>,
- dpctl_td_ns::TypePairDefinedEntry,
- T,
- std::complex>,
- // fall-through
- dpctl_td_ns::NotDefinedEntry>::is_defined;
+ static constexpr bool is_defined =
+ std::disjunction,
+ dpnp_td_ns::TypePairDefinedEntry,
+ dpnp_td_ns::TypePairDefinedEntry,
+ T,
+ std::complex>,
+ dpnp_td_ns::TypePairDefinedEntry,
+ T,
+ std::complex>,
+ // fall-through
+ dpnp_td_ns::NotDefinedEntry>::is_defined;
};
} // namespace dpnp::extensions::blas::types
diff --git a/dpnp/backend/extensions/common/ext/common.hpp b/dpnp/backend/extensions/common/ext/common.hpp
index f0ce1722bfb1..6b9a42a87d8e 100644
--- a/dpnp/backend/extensions/common/ext/common.hpp
+++ b/dpnp/backend/extensions/common/ext/common.hpp
@@ -29,17 +29,19 @@
#pragma once
#include
+
#include
#include
+
#include
-// dpctl tensor headers
+// dpnp tensor headers
#include "utils/math_utils.hpp"
#include "utils/type_dispatch.hpp"
#include "utils/type_utils.hpp"
-namespace type_utils = dpctl::tensor::type_utils;
-namespace type_dispatch = dpctl::tensor::type_dispatch;
+namespace type_utils = dpnp::tensor::type_utils;
+namespace type_dispatch = dpnp::tensor::type_dispatch;
namespace ext::common
{
@@ -82,7 +84,7 @@ struct Less
bool operator()(const T &lhs, const T &rhs) const
{
if constexpr (type_utils::is_complex_v) {
- return dpctl::tensor::math_utils::less_complex(lhs, rhs);
+ return dpnp::tensor::math_utils::less_complex(lhs, rhs);
}
else {
return std::less{}(lhs, rhs);
@@ -208,8 +210,9 @@ sycl::nd_range make_ndrange(const sycl::range &global_range,
sycl::nd_range<1>
make_ndrange(size_t global_size, size_t local_range, size_t work_per_item);
-// This function is a copy from dpctl because it is not available in the public
-// headers of dpctl.
+// This function was a copy from dpctl because it was not available in the
+// public headers of dpctl.
+// TODO: consolidate with tensor post-migration
pybind11::dtype dtype_from_typenum(int dst_typenum);
template
+#include
+#include
+
#include "ext/common.hpp"
#include "utils/type_dispatch.hpp"
-#include
-namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+namespace dpnp_td_ns = dpnp::tensor::type_dispatch;
namespace ext::common
{
@@ -89,36 +91,36 @@ inline size_t get_local_mem_size_in_bytes(const sycl::device &device,
inline pybind11::dtype dtype_from_typenum(int dst_typenum)
{
- dpctl_td_ns::typenum_t dst_typenum_t =
- static_cast(dst_typenum);
+ dpnp_td_ns::typenum_t dst_typenum_t =
+ static_cast(dst_typenum);
switch (dst_typenum_t) {
- case dpctl_td_ns::typenum_t::BOOL:
+ case dpnp_td_ns::typenum_t::BOOL:
return py::dtype("?");
- case dpctl_td_ns::typenum_t::INT8:
+ case dpnp_td_ns::typenum_t::INT8:
return py::dtype("i1");
- case dpctl_td_ns::typenum_t::UINT8:
+ case dpnp_td_ns::typenum_t::UINT8:
return py::dtype("u1");
- case dpctl_td_ns::typenum_t::INT16:
+ case dpnp_td_ns::typenum_t::INT16:
return py::dtype("i2");
- case dpctl_td_ns::typenum_t::UINT16:
+ case dpnp_td_ns::typenum_t::UINT16:
return py::dtype("u2");
- case dpctl_td_ns::typenum_t::INT32:
+ case dpnp_td_ns::typenum_t::INT32:
return py::dtype("i4");
- case dpctl_td_ns::typenum_t::UINT32:
+ case dpnp_td_ns::typenum_t::UINT32:
return py::dtype("u4");
- case dpctl_td_ns::typenum_t::INT64:
+ case dpnp_td_ns::typenum_t::INT64:
return py::dtype("i8");
- case dpctl_td_ns::typenum_t::UINT64:
+ case dpnp_td_ns::typenum_t::UINT64:
return py::dtype("u8");
- case dpctl_td_ns::typenum_t::HALF:
+ case dpnp_td_ns::typenum_t::HALF:
return py::dtype("f2");
- case dpctl_td_ns::typenum_t::FLOAT:
+ case dpnp_td_ns::typenum_t::FLOAT:
return py::dtype("f4");
- case dpctl_td_ns::typenum_t::DOUBLE:
+ case dpnp_td_ns::typenum_t::DOUBLE:
return py::dtype("f8");
- case dpctl_td_ns::typenum_t::CFLOAT:
+ case dpnp_td_ns::typenum_t::CFLOAT:
return py::dtype("c8");
- case dpctl_td_ns::typenum_t::CDOUBLE:
+ case dpnp_td_ns::typenum_t::CDOUBLE:
return py::dtype("c16");
default:
throw py::value_error("Unrecognized dst_typeid");
diff --git a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp
index 4c70d2d0d413..6a46c97293f3 100644
--- a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp
+++ b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp
@@ -36,7 +36,7 @@
#include "ext/validation_utils.hpp"
#include "utils/memory_overlap.hpp"
-namespace td_ns = dpctl::tensor::type_dispatch;
+namespace td_ns = dpnp::tensor::type_dispatch;
namespace common = ext::common;
namespace ext::validation
@@ -118,9 +118,9 @@ inline void check_no_overlap(const array_ptr &input,
return;
}
- const auto &overlap = dpctl::tensor::overlap::MemoryOverlap();
+ const auto &overlap = dpnp::tensor::overlap::MemoryOverlap();
const auto &same_logical_tensors =
- dpctl::tensor::overlap::SameLogicalTensors();
+ dpnp::tensor::overlap::SameLogicalTensors();
if (overlap(*input, *output) && !same_logical_tensors(*input, *output)) {
throw py::value_error(name_of(input, names) +
diff --git a/dpnp/backend/extensions/common/ext/dispatch_table.hpp b/dpnp/backend/extensions/common/ext/dispatch_table.hpp
index 6655f054f355..44437fb1a40f 100644
--- a/dpnp/backend/extensions/common/ext/dispatch_table.hpp
+++ b/dpnp/backend/extensions/common/ext/dispatch_table.hpp
@@ -39,7 +39,7 @@
#include "ext/common.hpp"
-namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+namespace dpnp_td_ns = dpnp::tensor::type_dispatch;
namespace py = pybind11;
namespace ext::common
@@ -69,9 +69,9 @@ template
constexpr bool one_of_v = one_of::value;
template
-using Table = FnT[dpctl_td_ns::num_types];
+using Table = FnT[dpnp_td_ns::num_types];
template
-using Table2 = Table[dpctl_td_ns::num_types];
+using Table2 = Table[dpnp_td_ns::num_types];
using TypeId = int32_t;
using TypesPair = std::pair;
@@ -119,7 +119,7 @@ struct TableBuilder
};
using type =
- dpctl_td_ns::DispatchVectorBuilder;
+ dpnp_td_ns::DispatchVectorBuilder;
};
template ;
+ dpnp_td_ns::DispatchTableBuilder;
};
template
@@ -166,7 +166,7 @@ class DispatchTable
FnT get_unsafe(int _typenum) const
{
- auto array_types = dpctl_td_ns::usm_ndarray_types();
+ auto array_types = dpnp_td_ns::usm_ndarray_types();
const int type_id = array_types.typenum_to_lookup_id(_typenum);
return table[type_id];
@@ -177,7 +177,7 @@ class DispatchTable
auto fn = get_unsafe(_typenum);
if (fn == nullptr) {
- auto array_types = dpctl_td_ns::usm_ndarray_types();
+ auto array_types = dpnp_td_ns::usm_ndarray_types();
const int _type_id = array_types.typenum_to_lookup_id(_typenum);
py::dtype _dtype = dtype_from_typenum(_type_id);
@@ -208,7 +208,7 @@ class DispatchTable
private:
void populate_supported_types()
{
- for (int i = 0; i < dpctl_td_ns::num_types; ++i) {
+ for (int i = 0; i < dpnp_td_ns::num_types; ++i) {
if (table[i] != nullptr) {
supported_types.emplace_back(dtype_from_typenum(i));
}
@@ -242,7 +242,7 @@ class DispatchTable2
FnT get_unsafe(int first_typenum, int second_typenum) const
{
- auto array_types = dpctl_td_ns::usm_ndarray_types();
+ auto array_types = dpnp_td_ns::usm_ndarray_types();
const int first_type_id =
array_types.typenum_to_lookup_id(first_typenum);
const int second_type_id =
@@ -256,7 +256,7 @@ class DispatchTable2
auto fn = get_unsafe(first_typenum, second_typenum);
if (fn == nullptr) {
- auto array_types = dpctl_td_ns::usm_ndarray_types();
+ auto array_types = dpnp_td_ns::usm_ndarray_types();
const int first_type_id =
array_types.typenum_to_lookup_id(first_typenum);
const int second_type_id =
@@ -338,8 +338,8 @@ class DispatchTable2
SupportedTypesSet second_supported_types_set;
SupportedTypesSet2 all_supported_types_set;
- for (int i = 0; i < dpctl_td_ns::num_types; ++i) {
- for (int j = 0; j < dpctl_td_ns::num_types; ++j) {
+ for (int i = 0; i < dpnp_td_ns::num_types; ++i) {
+ for (int j = 0; j < dpnp_td_ns::num_types; ++j) {
if (table[i][j] != nullptr) {
all_supported_types_set.emplace(i, j);
first_supported_types_set.emplace(i);
diff --git a/dpnp/backend/extensions/common/ext/validation_utils.hpp b/dpnp/backend/extensions/common/ext/validation_utils.hpp
index d41db8d5ca5a..fa2f892369cc 100644
--- a/dpnp/backend/extensions/common/ext/validation_utils.hpp
+++ b/dpnp/backend/extensions/common/ext/validation_utils.hpp
@@ -32,13 +32,16 @@
#include
#include
-#include "dpctl4pybind11.hpp"
+#include "dpnp4pybind11.hpp"
+
+// dpnp tensor headers
+#include "utils/type_dispatch.hpp"
namespace ext::validation
{
-using array_ptr = const dpctl::tensor::usm_ndarray *;
+using array_ptr = const dpnp::tensor::usm_ndarray *;
using array_names = std::unordered_map;
-using dpctl::tensor::type_dispatch::typenum_t;
+using dpnp::tensor::type_dispatch::typenum_t;
std::string name_of(const array_ptr &arr, const array_names &names);
diff --git a/dpnp/backend/extensions/elementwise_functions/common.hpp b/dpnp/backend/extensions/elementwise_functions/common.hpp
index f3b15c8d6774..8cfdb6b46890 100644
--- a/dpnp/backend/extensions/elementwise_functions/common.hpp
+++ b/dpnp/backend/extensions/elementwise_functions/common.hpp
@@ -36,29 +36,28 @@
#include
-// dpctl tensor headers
+// dpnp tensor headers
#include "kernels/alignment.hpp"
#include "kernels/elementwise_functions/common.hpp"
#include "utils/sycl_utils.hpp"
namespace dpnp::extensions::py_internal::elementwise_common
{
-using dpctl::tensor::kernels::alignment_utils::
- disabled_sg_loadstore_wrapper_krn;
-using dpctl::tensor::kernels::alignment_utils::is_aligned;
-using dpctl::tensor::kernels::alignment_utils::required_alignment;
+using dpnp::tensor::kernels::alignment_utils::disabled_sg_loadstore_wrapper_krn;
+using dpnp::tensor::kernels::alignment_utils::is_aligned;
+using dpnp::tensor::kernels::alignment_utils::required_alignment;
-using dpctl::tensor::kernels::elementwise_common::select_lws;
+using dpnp::tensor::kernels::elementwise_common::select_lws;
-using dpctl::tensor::sycl_utils::sub_group_load;
-using dpctl::tensor::sycl_utils::sub_group_store;
+using dpnp::tensor::sycl_utils::sub_group_load;
+using dpnp::tensor::sycl_utils::sub_group_store;
/**
* @brief Functor for evaluation of a unary function with two output arrays on
* contiguous arrays.
*
* @note It extends UnaryContigFunctor from
- * dpctl::tensor::kernels::elementwise_common namespace.
+ * dpnp::tensor::kernels::elementwise_common namespace.
*/
template class UnaryTwoOutputsType,
@@ -601,7 +600,7 @@ sycl::event
* on strided data.
*
* @note It extends unary_strided_impl from
- * dpctl::tensor::kernels::elementwise_common namespace.
+ * dpnp::tensor::kernels::elementwise_common namespace.
*/
template class UnaryTwoOutputsType,
@@ -634,7 +633,7 @@ sycl::event unary_two_outputs_strided_impl(
using res1Ty = typename UnaryTwoOutputsType::value_type1;
using res2Ty = typename UnaryTwoOutputsType::value_type2;
using IndexerT =
- typename dpctl::tensor::offset_utils::ThreeOffsets_StridedIndexer;
+ typename dpnp::tensor::offset_utils::ThreeOffsets_StridedIndexer;
const IndexerT indexer{nd, arg_offset, res1_offset, res2_offset,
shape_and_strides};
@@ -657,7 +656,7 @@ sycl::event unary_two_outputs_strided_impl(
* on contiguous arrays.
*
* @note It extends binary_contig_impl from
- * dpctl::tensor::kernels::elementwise_common namespace.
+ * dpnp::tensor::kernels::elementwise_common namespace.
*/
template <
typename argTy1,
@@ -750,7 +749,7 @@ sycl::event
* on strided data.
*
* @note It extends binary_strided_impl from
- * dpctl::tensor::kernels::elementwise_common namespace.
+ * dpnp::tensor::kernels::elementwise_common namespace.
*/
template ::value_type2;
using IndexerT =
- typename dpctl::tensor::offset_utils::FourOffsets_StridedIndexer;
+ typename dpnp::tensor::offset_utils::FourOffsets_StridedIndexer;
const IndexerT indexer{nd, arg1_offset, arg2_offset,
res1_offset, res2_offset, shape_and_strides};
diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp
index 6a29c9a33c5a..adaf77a8970e 100644
--- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp
+++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp
@@ -30,21 +30,22 @@
#include
#include
+#include
#include
#include
#include
-#include
-
-#include "dpctl4pybind11.hpp"
#include
#include
-#include
+
+#include
+
+#include "dpnp4pybind11.hpp"
#include "elementwise_functions_type_utils.hpp"
#include "simplify_iteration_space.hpp"
-// dpctl tensor headers
+// dpnp tensor headers
#include "kernels/alignment.hpp"
#include "utils/memory_overlap.hpp"
#include "utils/offset_utils.hpp"
@@ -52,15 +53,15 @@
#include "utils/sycl_alloc_utils.hpp"
#include "utils/type_dispatch.hpp"
-static_assert(std::is_same_v);
+static_assert(std::is_same_v);
namespace dpnp::extensions::py_internal
{
namespace py = pybind11;
-namespace td_ns = dpctl::tensor::type_dispatch;
+namespace td_ns = dpnp::tensor::type_dispatch;
-using dpctl::tensor::kernels::alignment_utils::is_aligned;
-using dpctl::tensor::kernels::alignment_utils::required_alignment;
+using dpnp::tensor::kernels::alignment_utils::is_aligned;
+using dpnp::tensor::kernels::alignment_utils::required_alignment;
using type_utils::_result_typeid;
@@ -69,8 +70,8 @@ template
std::pair
- py_unary_ufunc(const dpctl::tensor::usm_ndarray &src,
- const dpctl::tensor::usm_ndarray &dst,
+ py_unary_ufunc(const dpnp::tensor::usm_ndarray &src,
+ const dpnp::tensor::usm_ndarray &dst,
sycl::queue &q,
const std::vector &depends,
//
@@ -94,12 +95,12 @@ std::pair
}
// check that queues are compatible
- if (!dpctl::utils::queues_are_compatible(q, {src, dst})) {
+ if (!dpnp::utils::queues_are_compatible(q, {src, dst})) {
throw py::value_error(
"Execution queue is not compatible with allocation queues");
}
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst);
+ dpnp::tensor::validation::CheckWritable::throw_if_not_writable(dst);
// check that dimensions are the same
int src_nd = src.get_ndim();
@@ -126,12 +127,12 @@ std::pair
return std::make_pair(sycl::event(), sycl::event());
}
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, src_nelems);
+ dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(dst, src_nelems);
// check memory overlap
- auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+ auto const &overlap = dpnp::tensor::overlap::MemoryOverlap();
auto const &same_logical_tensors =
- dpctl::tensor::overlap::SameLogicalTensors();
+ dpnp::tensor::overlap::SameLogicalTensors();
if (overlap(src, dst) && !same_logical_tensors(src, dst)) {
throw py::value_error("Arrays index overlapping segments of memory");
}
@@ -160,7 +161,7 @@ std::pair
auto comp_ev = contig_fn(q, src_nelems, src_data, dst_data, depends);
sycl::event ht_ev =
- dpctl::utils::keep_args_alive(q, {src, dst}, {comp_ev});
+ dpnp::utils::keep_args_alive(q, {src, dst}, {comp_ev});
return std::make_pair(ht_ev, comp_ev);
}
@@ -205,7 +206,7 @@ std::pair
dst_data + dst_elem_size * dst_offset, depends);
sycl::event ht_ev =
- dpctl::utils::keep_args_alive(q, {src, dst}, {comp_ev});
+ dpnp::utils::keep_args_alive(q, {src, dst}, {comp_ev});
return std::make_pair(ht_ev, comp_ev);
}
@@ -219,7 +220,7 @@ std::pair
std::to_string(src_typeid));
}
- using dpctl::tensor::offset_utils::device_allocate_and_pack;
+ using dpnp::tensor::offset_utils::device_allocate_and_pack;
std::vector host_tasks{};
host_tasks.reserve(2);
@@ -236,14 +237,13 @@ std::pair
dst_data, dst_offset, depends, {copy_shape_ev});
// async free of shape_strides temporary
- sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free(
+ sycl::event tmp_cleanup_ev = dpnp::tensor::alloc_utils::async_smart_free(
q, {strided_fn_ev}, shape_strides_owner);
host_tasks.push_back(tmp_cleanup_ev);
return std::make_pair(
- dpctl::utils::keep_args_alive(q, {src, dst}, host_tasks),
- strided_fn_ev);
+ dpnp::utils::keep_args_alive(q, {src, dst}, host_tasks), strided_fn_ev);
}
/*! @brief Template implementing Python API for querying of type support by
@@ -252,7 +252,7 @@ template
py::object py_unary_ufunc_result_type(const py::dtype &input_dtype,
const output_typesT &output_types)
{
- int tn = input_dtype.num(); // NumPy type numbers are the same as in dpctl
+ int tn = input_dtype.num(); // NumPy type numbers are the same as in dpnp
int src_typeid = -1;
auto array_types = td_ns::usm_ndarray_types();
@@ -286,9 +286,9 @@ template
std::pair
- py_unary_two_outputs_ufunc(const dpctl::tensor::usm_ndarray &src,
- const dpctl::tensor::usm_ndarray &dst1,
- const dpctl::tensor::usm_ndarray &dst2,
+ py_unary_two_outputs_ufunc(const dpnp::tensor::usm_ndarray &src,
+ const dpnp::tensor::usm_ndarray &dst1,
+ const dpnp::tensor::usm_ndarray &dst2,
sycl::queue &q,
const std::vector &depends,
//
@@ -315,13 +315,13 @@ std::pair
}
// check that queues are compatible
- if (!dpctl::utils::queues_are_compatible(q, {src, dst1, dst2})) {
+ if (!dpnp::utils::queues_are_compatible(q, {src, dst1, dst2})) {
throw py::value_error(
"Execution queue is not compatible with allocation queues");
}
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst1);
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst2);
+ dpnp::tensor::validation::CheckWritable::throw_if_not_writable(dst1);
+ dpnp::tensor::validation::CheckWritable::throw_if_not_writable(dst2);
// check that dimensions are the same
int src_nd = src.get_ndim();
@@ -350,15 +350,13 @@ std::pair
return std::make_pair(sycl::event(), sycl::event());
}
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst1,
- src_nelems);
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst2,
- src_nelems);
+ dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(dst1, src_nelems);
+ dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(dst2, src_nelems);
// check memory overlap
- auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+ auto const &overlap = dpnp::tensor::overlap::MemoryOverlap();
auto const &same_logical_tensors =
- dpctl::tensor::overlap::SameLogicalTensors();
+ dpnp::tensor::overlap::SameLogicalTensors();
if ((overlap(src, dst1) && !same_logical_tensors(src, dst1)) ||
(overlap(src, dst2) && !same_logical_tensors(src, dst2)) ||
(overlap(dst1, dst2) && !same_logical_tensors(dst1, dst2))) {
@@ -396,7 +394,7 @@ std::pair
auto comp_ev =
contig_fn(q, src_nelems, src_data, dst1_data, dst2_data, depends);
sycl::event ht_ev =
- dpctl::utils::keep_args_alive(q, {src, dst1, dst2}, {comp_ev});
+ dpnp::utils::keep_args_alive(q, {src, dst1, dst2}, {comp_ev});
return std::make_pair(ht_ev, comp_ev);
}
@@ -447,7 +445,7 @@ std::pair
dst2_data + dst2_elem_size * dst2_offset, depends);
sycl::event ht_ev =
- dpctl::utils::keep_args_alive(q, {src, dst1, dst2}, {comp_ev});
+ dpnp::utils::keep_args_alive(q, {src, dst1, dst2}, {comp_ev});
return std::make_pair(ht_ev, comp_ev);
}
@@ -461,7 +459,7 @@ std::pair
std::to_string(src_typeid));
}
- using dpctl::tensor::offset_utils::device_allocate_and_pack;
+ using dpnp::tensor::offset_utils::device_allocate_and_pack;
std::vector host_tasks{};
host_tasks.reserve(2);
@@ -478,13 +476,13 @@ std::pair
dst1_offset, dst2_data, dst2_offset, depends, {copy_shape_ev});
// async free of shape_strides temporary
- sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free(
+ sycl::event tmp_cleanup_ev = dpnp::tensor::alloc_utils::async_smart_free(
q, {strided_fn_ev}, shape_strides_owner);
host_tasks.push_back(tmp_cleanup_ev);
return std::make_pair(
- dpctl::utils::keep_args_alive(q, {src, dst1, dst2}, host_tasks),
+ dpnp::utils::keep_args_alive(q, {src, dst1, dst2}, host_tasks),
strided_fn_ev);
}
@@ -497,7 +495,7 @@ std::pair
py_unary_two_outputs_ufunc_result_type(const py::dtype &input_dtype,
const output_typesT &output_types)
{
- int tn = input_dtype.num(); // NumPy type numbers are the same as in dpctl
+ int tn = input_dtype.num(); // NumPy type numbers are the same as in dpnp
int src_typeid = -1;
auto array_types = td_ns::usm_ndarray_types();
@@ -549,9 +547,9 @@ template
std::pair py_binary_ufunc(
- const dpctl::tensor::usm_ndarray &src1,
- const dpctl::tensor::usm_ndarray &src2,
- const dpctl::tensor::usm_ndarray &dst, // dst = op(src1, src2), elementwise
+ const dpnp::tensor::usm_ndarray &src1,
+ const dpnp::tensor::usm_ndarray &src2,
+ const dpnp::tensor::usm_ndarray &dst, // dst = op(src1, src2), elementwise
sycl::queue &exec_q,
const std::vector &depends,
//
@@ -581,12 +579,12 @@ std::pair py_binary_ufunc(
}
// check that queues are compatible
- if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst})) {
+ if (!dpnp::utils::queues_are_compatible(exec_q, {src1, src2, dst})) {
throw py::value_error(
"Execution queue is not compatible with allocation queues");
}
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst);
+ dpnp::tensor::validation::CheckWritable::throw_if_not_writable(dst);
// check shapes, broadcasting is assumed done by caller
// check that dimensions are the same
@@ -616,11 +614,11 @@ std::pair py_binary_ufunc(
return std::make_pair(sycl::event(), sycl::event());
}
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, src_nelems);
+ dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(dst, src_nelems);
- auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+ auto const &overlap = dpnp::tensor::overlap::MemoryOverlap();
auto const &same_logical_tensors =
- dpctl::tensor::overlap::SameLogicalTensors();
+ dpnp::tensor::overlap::SameLogicalTensors();
if ((overlap(src1, dst) && !same_logical_tensors(src1, dst)) ||
(overlap(src2, dst) && !same_logical_tensors(src2, dst))) {
throw py::value_error("Arrays index overlapping segments of memory");
@@ -652,7 +650,7 @@ std::pair py_binary_ufunc(
if (contig_fn != nullptr) {
auto comp_ev = contig_fn(exec_q, src_nelems, src1_data, 0,
src2_data, 0, dst_data, 0, depends);
- sycl::event ht_ev = dpctl::utils::keep_args_alive(
+ sycl::event ht_ev = dpnp::utils::keep_args_alive(
exec_q, {src1, src2, dst}, {comp_ev});
return std::make_pair(ht_ev, comp_ev);
@@ -696,7 +694,7 @@ std::pair py_binary_ufunc(
auto comp_ev = contig_fn(exec_q, src_nelems, src1_data,
src1_offset, src2_data, src2_offset,
dst_data, dst_offset, depends);
- sycl::event ht_ev = dpctl::utils::keep_args_alive(
+ sycl::event ht_ev = dpnp::utils::keep_args_alive(
exec_q, {src1, src2, dst}, {comp_ev});
return std::make_pair(ht_ev, comp_ev);
@@ -734,7 +732,7 @@ std::pair py_binary_ufunc(
depends);
return std::make_pair(
- dpctl::utils::keep_args_alive(
+ dpnp::utils::keep_args_alive(
exec_q, {src1, src2, dst}, host_tasks),
comp_ev);
}
@@ -766,7 +764,7 @@ std::pair py_binary_ufunc(
depends);
return std::make_pair(
- dpctl::utils::keep_args_alive(
+ dpnp::utils::keep_args_alive(
exec_q, {src1, src2, dst}, host_tasks),
comp_ev);
}
@@ -785,7 +783,7 @@ std::pair py_binary_ufunc(
" and src2_typeid=" + std::to_string(src2_typeid));
}
- using dpctl::tensor::offset_utils::device_allocate_and_pack;
+ using dpnp::tensor::offset_utils::device_allocate_and_pack;
auto ptr_sz_event_triple_ = device_allocate_and_pack(
exec_q, host_tasks, simplified_shape, simplified_src1_strides,
simplified_src2_strides, simplified_dst_strides);
@@ -799,13 +797,13 @@ std::pair py_binary_ufunc(
src2_data, src2_offset, dst_data, dst_offset, depends, {copy_shape_ev});
// async free of shape_strides temporary
- sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free(
+ sycl::event tmp_cleanup_ev = dpnp::tensor::alloc_utils::async_smart_free(
exec_q, {strided_fn_ev}, shape_strides_owner);
host_tasks.push_back(tmp_cleanup_ev);
return std::make_pair(
- dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, host_tasks),
+ dpnp::utils::keep_args_alive(exec_q, {src1, src2, dst}, host_tasks),
strided_fn_ev);
}
@@ -815,8 +813,8 @@ py::object py_binary_ufunc_result_type(const py::dtype &input1_dtype,
const py::dtype &input2_dtype,
const output_typesT &output_types_table)
{
- int tn1 = input1_dtype.num(); // NumPy type numbers are the same as in dpctl
- int tn2 = input2_dtype.num(); // NumPy type numbers are the same as in dpctl
+ int tn1 = input1_dtype.num(); // NumPy type numbers are the same as in dpnp
+ int tn2 = input2_dtype.num(); // NumPy type numbers are the same as in dpnp
int src1_typeid = -1;
int src2_typeid = -1;
@@ -855,10 +853,10 @@ template
std::pair
- py_binary_two_outputs_ufunc(const dpctl::tensor::usm_ndarray &src1,
- const dpctl::tensor::usm_ndarray &src2,
- const dpctl::tensor::usm_ndarray &dst1,
- const dpctl::tensor::usm_ndarray &dst2,
+ py_binary_two_outputs_ufunc(const dpnp::tensor::usm_ndarray &src1,
+ const dpnp::tensor::usm_ndarray &src2,
+ const dpnp::tensor::usm_ndarray &dst1,
+ const dpnp::tensor::usm_ndarray &dst2,
sycl::queue &exec_q,
const std::vector &depends,
//
@@ -888,14 +886,13 @@ std::pair
}
// check that queues are compatible
- if (!dpctl::utils::queues_are_compatible(exec_q,
- {src1, src2, dst1, dst2})) {
+ if (!dpnp::utils::queues_are_compatible(exec_q, {src1, src2, dst1, dst2})) {
throw py::value_error(
"Execution queue is not compatible with allocation queues");
}
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst1);
- dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst2);
+ dpnp::tensor::validation::CheckWritable::throw_if_not_writable(dst1);
+ dpnp::tensor::validation::CheckWritable::throw_if_not_writable(dst2);
// check shapes, broadcasting is assumed done by caller
// check that dimensions are the same
@@ -932,15 +929,13 @@ std::pair
return std::make_pair(sycl::event(), sycl::event());
}
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst1,
- src_nelems);
- dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst2,
- src_nelems);
+ dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(dst1, src_nelems);
+ dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(dst2, src_nelems);
// check memory overlap
- auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+ auto const &overlap = dpnp::tensor::overlap::MemoryOverlap();
auto const &same_logical_tensors =
- dpctl::tensor::overlap::SameLogicalTensors();
+ dpnp::tensor::overlap::SameLogicalTensors();
if ((overlap(src1, dst1) && !same_logical_tensors(src1, dst1)) ||
(overlap(src1, dst2) && !same_logical_tensors(src1, dst2)) ||
(overlap(src2, dst1) && !same_logical_tensors(src2, dst1)) ||
@@ -980,7 +975,7 @@ std::pair
auto comp_ev =
contig_fn(exec_q, src_nelems, src1_data, 0, src2_data, 0,
dst1_data, 0, dst2_data, 0, depends);
- sycl::event ht_ev = dpctl::utils::keep_args_alive(
+ sycl::event ht_ev = dpnp::utils::keep_args_alive(
exec_q, {src1, src2, dst1, dst2}, {comp_ev});
return std::make_pair(ht_ev, comp_ev);
@@ -1028,7 +1023,7 @@ std::pair
contig_fn(exec_q, src_nelems, src1_data, src1_offset, src2_data,
src2_offset, dst1_data, dst1_offset, dst2_data,
dst2_offset, depends);
- sycl::event ht_ev = dpctl::utils::keep_args_alive(
+ sycl::event ht_ev = dpnp::utils::keep_args_alive(
exec_q, {src1, src2, dst1, dst2}, {comp_ev});
return std::make_pair(ht_ev, comp_ev);
@@ -1045,7 +1040,7 @@ std::pair
" and src2_typeid=" + std::to_string(src2_typeid));
}
- using dpctl::tensor::offset_utils::device_allocate_and_pack;
+ using dpnp::tensor::offset_utils::device_allocate_and_pack;
auto ptr_sz_event_triple_ = device_allocate_and_pack(
exec_q, host_tasks, simplified_shape, simplified_src1_strides,
simplified_src2_strides, simplified_dst1_strides,
@@ -1061,11 +1056,11 @@ std::pair