From 957b54fbb79e5f29546d9514715fd470f4e1e9b2 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 2 Jul 2026 15:39:54 +0200 Subject: [PATCH] perf: drop explicit zeros from the constraint matrix (#814) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expressions that broadcast against a dense coordinate store one coefficient per pair, most of them structurally zero. Those explicit zeros were carried all the way into `matrices.A` and thus into every solver handoff. `highspy.Highs.addRows` (and the other direct backends' matrix loaders) scale with *stored* nnz, so the handoff spent most of its work describing zeros — e.g. sparse_network(250) stored 1.5M entries for 18k structural nonzeros (98.8% zeros). Prune them once, centrally, in `_stack` via `eliminate_zeros()`, so `A` and `indicator_A` — and hence HiGHS, gurobi, xpress, copt, mosek and the LP/MPS writers — all hand the solver only structural nonzeros. A zero coefficient never changes a constraint, so this is mathematically identical. Co-Authored-By: Claude Opus 4.8 (1M context) --- linopy/matrices.py | 14 ++++++++++++-- test/test_matrices.py | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/linopy/matrices.py b/linopy/matrices.py index 0feba9c90..9c65d06a2 100644 --- a/linopy/matrices.py +++ b/linopy/matrices.py @@ -22,10 +22,20 @@ def _stack(csrs: list) -> scipy.sparse.csr_array | None: - """Vertically stack CSR blocks, or None when there are none.""" + """ + Vertically stack CSR blocks, or None when there are none. + + Explicit zeros are dropped: expressions that broadcast against a dense + coordinate store one coefficient per pair, most of them zero, and a zero + coefficient never changes a constraint. Keeping them only inflates the + stored nnz handed to the solvers/writers (e.g. ``highspy.addRows`` scales + with stored nnz), so we prune them once, centrally, for every backend. + """ if not csrs: return None - return cast(scipy.sparse.csr_array, scipy.sparse.vstack(csrs, format="csr")) + stacked = cast(scipy.sparse.csr_array, scipy.sparse.vstack(csrs, format="csr")) + stacked.eliminate_zeros() + return stacked def _concat(arrays: list, dtype: type | None = None) -> ndarray: diff --git a/test/test_matrices.py b/test/test_matrices.py index 98a73564b..6da3eaf1d 100644 --- a/test/test_matrices.py +++ b/test/test_matrices.py @@ -68,6 +68,27 @@ def test_matrices_duplicated_variables() -> None: assert np.isin(np.unique(np.array(A)), [0.0, 2.0]).all() +def test_matrices_drops_explicit_zeros() -> None: + # https://github.com/PyPSA/linopy/issues/814 + # Expressions that broadcast against a dense coordinate store one coefficient + # per pair, most of them structurally zero. Those must not reach A, whose + # stored nnz drives the direct-solver handoff (e.g. highspy.addRows). + m = Model() + + x = m.add_variables(coords=[range(4)], name="x") + coeff = xr.DataArray( + np.eye(4), dims=["j", "i"], coords={"j": range(4), "i": range(4)} + ) + m.add_constraints((coeff * x.rename(dim_0="i")).sum("i") <= 1, name="c") + + A = m.matrices.A + assert A is not None + # 4 structural nonzeros on the diagonal; the 12 broadcast zeros are dropped. + assert A.nnz == 4 + assert not (A.data == 0).any() + assert np.array_equal(A.todense(), np.eye(4)) + + def test_matrices_float_c() -> None: # https://github.com/PyPSA/linopy/issues/200 m = Model()