diff --git a/CHANGELOG.md b/CHANGELOG.md
index b2590b227e5..6d6c5d82762 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -88,6 +88,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Fixed test tolerance issues for float16 intermediate precision that became visible when testing against conda-forge's NumPy [#2828](https://github.com/IntelPython/dpnp/pull/2828)
 * Ensured device aware dtype handling in `dpnp.identity` and `dpnp.gradient` [#2835](https://github.com/IntelPython/dpnp/pull/2835)
 * Fixed `dpnp.tensor.round` to use device-aware output dtype for boolean input [#2851](https://github.com/IntelPython/dpnp/pull/2851)
+* Resolved a deadlock in `dpnp.linalg.qr` by releasing the GIL before OneMKL `orgqr` call to prevent host tasks contention [#2850](https://github.com/IntelPython/dpnp/pull/2850)
 
 ### Security
 
diff --git a/dpnp/backend/extensions/lapack/orgqr.cpp b/dpnp/backend/extensions/lapack/orgqr.cpp
index 2297d759ea8..09c2523fd48 100644
--- a/dpnp/backend/extensions/lapack/orgqr.cpp
+++ b/dpnp/backend/extensions/lapack/orgqr.cpp
@@ -87,8 +87,17 @@ static sycl::event orgqr_impl(sycl::queue &exec_q,
 
     sycl::event orgqr_event;
     try {
+        // Release GIL to avoid serialization of host task submissions
+        // to the same queue in OneMKL
+        py::gil_scoped_release lock{};
+
         scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
 
+        // mkl_lapack::orgqr() is done through GPU-to-Host reverse offload:
+        // exec_q.submit([&](sycl::handler& cgh) {
+        //   cgh.depends_on(depends);
+        //   cgh.host_task([=]() { orgqr_host(...); });
+        // }).wait();
         orgqr_event = mkl_lapack::orgqr(
             exec_q,
             m,          // The number of rows in the matrix; (0 ≤ m).