Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .claude/sweep-security-state.json
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,13 @@
"severity_max": null,
"categories_found": [],
"notes": "Clean. Small (271 LOC) module computing 3x3 second-derivative stencil. Cat 1: only single output buffer matching input shape (np.empty at line 37, cupy.empty at line 101) -- bounded by caller, per audit guidance not a finding. Cat 2: _cpu numba kernel uses range(1, rows-1)/range(1, cols-1) with simple (y, x) indices; no flat indexing or queue arrays; numba range loops produce int64. Cat 3: division by cellsize*cellsize on line 44 -- cellsize comes from get_dataarray_resolution() (raster property, not user-direct); cellsize=0 is unrealistic and would produce inf consistently across backends. NaN inputs propagate correctly through float arithmetic. Cat 4: _run_gpu (line 79-86) has full bounds guard via 'i + di <= out.shape[0] - 1 and j + dj <= out.shape[1] - 1' which guarantees i < shape[0] and j < shape[1] before the out[i, j] write; no shared memory; out is pre-filled with NaN at line 102 so threads outside the guard correctly leave NaN. Cat 5: no file I/O. Cat 6: curvature() calls _validate_raster at line 253; all four backend paths explicitly cast to float32 (lines 51, 62, 97, 112) so dtype is normalized before any computation; tests cover int32/int64/uint32/uint64/float32/float64 across numpy/cupy/dask+numpy/dask+cupy."
},
"emerging_hotspots": {
"last_inspected": "2026-04-25",
"issue": 1274,
"severity_max": "HIGH",
"categories_found": [1],
"notes": "HIGH (fixed #1274): emerging_hotspots() public API only validated ndim and shape[0] >= 2. The numpy and cupy backends each materialised three full (T, H, W) cubes (a float32 input copy, gi_zscore float32, gi_bin int8) plus H*W temporaries with no memory check; a (100, 20000, 20000) input projected to ~480 GB. Fixed by adding _available_memory_bytes()/_check_memory(n_times, ny, nx) (12 bytes per cube cell budget) and calling it from the public API for non-dask inputs. Dask paths skip the guard because their map_blocks/map_overlap chunk functions do not materialise the full cube. MEDIUM (unfixed, Cat 6): public API does not call _validate_raster() so non-numeric dtypes fail later with a confusing error rather than a clean TypeError. No GPU kernels in this module (uses convolve_2d). No file I/O. Cat 3 statistical paths are robust: _mann_kendall_statistic_numpy guards var_s <= 0 before sqrt, both numpy and cupy backends raise ZeroDivisionError on global_std == 0, and _mk_pvalue handles z==0 explicitly."
}
}
}
58 changes: 58 additions & 0 deletions xrspatial/emerging_hotspots.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,57 @@
_MK_ALPHA = 0.05 # significance level for Mann-Kendall trend test


# ---------------------------------------------------------------------------
# Memory guard
# ---------------------------------------------------------------------------

# Peak working-memory footprint per cell of the (T, H, W) cube on the
# numpy/cupy backends:
# data.astype(float32) copy : 4
# gi_zscore (float32) : 4
# gi_bin (int8) : 1
# Plus 2-D temporaries (H*W) for convolved scratch, category, trend_z,
# trend_p, which are negligible relative to the cube for realistic T.
# Round up to 12 bytes per cube cell to cover small temporaries.
_BYTES_PER_CUBE_CELL = 12


def _available_memory_bytes():
"""Best-effort estimate of available memory in bytes."""
try:
with open('/proc/meminfo', 'r') as f:
for line in f:
if line.startswith('MemAvailable:'):
return int(line.split()[1]) * 1024
except (OSError, ValueError, IndexError):
pass
try:
import psutil
return psutil.virtual_memory().available
except (ImportError, AttributeError):
pass
return 2 * 1024 ** 3


def _check_memory(n_times, ny, nx):
"""Raise MemoryError if the (T, H, W) working buffers would exceed 50% RAM.

The numpy and cupy backends each materialise three full-cube arrays
(a float32 input copy, gi_zscore float32, gi_bin int8) plus small
H*W temporaries. Budget ~12 bytes per cube cell.
"""
required = int(n_times) * int(ny) * int(nx) * _BYTES_PER_CUBE_CELL
available = _available_memory_bytes()
if required > 0.5 * available:
raise MemoryError(
f"emerging_hotspots on a ({n_times}, {ny}, {nx}) cube requires "
f"~{required / 1e9:.1f} GB of working memory but only "
f"~{available / 1e9:.1f} GB is available. "
f"Use a smaller raster or pass a dask-backed DataArray for "
f"out-of-core processing."
)


# ---------------------------------------------------------------------------
# Mann-Kendall helpers (Numba-JIT for use inside pixel loops)
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -695,6 +746,13 @@ def emerging_hotspots(raster, kernel, boundary='nan'):

_validate_boundary(boundary)

# Memory guard for numpy/cupy backends only. The dask backends
# process per-chunk via map_blocks/map_overlap and do not need a
# whole-cube guard.
if da is None or not isinstance(raster.data, da.Array):
n_times, ny, nx = raster.shape
_check_memory(n_times, ny, nx)

mapper = ArrayTypeFunctionMapping(
numpy_func=partial(_emerging_hotspots_numpy, boundary=boundary),
cupy_func=partial(_emerging_hotspots_cupy, boundary=boundary),
Expand Down
84 changes: 84 additions & 0 deletions xrspatial/tests/test_emerging_hotspots.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,87 @@ def test_dask_cupy_matches_numpy(self):
dc_vals, ds_np[var].values, atol=1e-5,
err_msg=f"mismatch in {var}",
)


# ---------------------------------------------------------------------------
# Memory guard (issue #1274)
# ---------------------------------------------------------------------------

class TestMemoryGuard:
"""Memory guard on the public emerging_hotspots() API (#1274).

The numpy and cupy backends materialise three full ``(T, H, W)``
cubes (a float32 input copy, gi_zscore float32, gi_bin int8) plus
small H*W temporaries. The public API estimates this footprint and
raises ``MemoryError`` before the first allocation when it would
exceed 50% of available RAM. Dask-backed inputs are not guarded
because their map_blocks/map_overlap path processes per-chunk.
"""

def _stub_available(self, monkeypatch, n_bytes):
"""Pin _available_memory_bytes to a fixed return value."""
import sys
mod = sys.modules['xrspatial.emerging_hotspots']
monkeypatch.setattr(mod, '_available_memory_bytes', lambda: n_bytes)

def test_numpy_raises_when_budget_exceeded(self, monkeypatch):
"""numpy backend should refuse a cube too large for memory."""
# A (5, 100, 100) cube needs ~600 KB; with 1 KB "available" the
# guard must trip.
self._stub_available(monkeypatch, 1024)
rng = np.random.default_rng(0)
data = rng.standard_normal((5, 100, 100)).astype('f4')
raster = _make_raster(data)
with pytest.raises(MemoryError, match=r"emerging_hotspots"):
emerging_hotspots(raster, _kernel_3x3())

def test_numpy_passes_when_budget_ample(self, monkeypatch):
"""Small inputs should pass the guard with normal RAM headroom."""
self._stub_available(monkeypatch, 64 * 1024 ** 3)
rng = np.random.default_rng(0)
data = rng.standard_normal((5, 20, 20)).astype('f4')
raster = _make_raster(data)
ds = emerging_hotspots(raster, _kernel_3x3())
assert ds['category'].shape == (20, 20)

def test_error_message_mentions_shape_and_gb(self, monkeypatch):
"""Error message should surface the cube shape and projected size."""
self._stub_available(monkeypatch, 1024)
rng = np.random.default_rng(0)
data = rng.standard_normal((4, 50, 50)).astype('f4')
raster = _make_raster(data)
with pytest.raises(MemoryError, match=r"\(4, 50, 50\)"):
emerging_hotspots(raster, _kernel_3x3())

def test_cupy_raises_when_budget_exceeded(self, monkeypatch):
"""CuPy backend honours the same in-RAM guard."""
cp = pytest.importorskip("cupy")
self._stub_available(monkeypatch, 1024)
rng = np.random.default_rng(0)
data = cp.asarray(
rng.standard_normal((5, 100, 100)).astype('f4')
)
raster = _make_raster(data)
with pytest.raises(MemoryError, match=r"emerging_hotspots"):
emerging_hotspots(raster, _kernel_3x3())

def test_dask_skips_in_ram_guard(self, monkeypatch):
"""Dask-backed inputs should not be blocked by the in-RAM guard.

The numpy/cupy guard targets backends that materialise the full
cube; dask paths process per-chunk via map_blocks/map_overlap.
"""
da_mod = pytest.importorskip("dask.array")
# Even with 1 byte "available", the dask path should pass the
# public-API check because the guard is skipped for dask inputs.
self._stub_available(monkeypatch, 1)
rng = np.random.default_rng(0)
data = da_mod.from_array(
rng.standard_normal((5, 20, 20)).astype('f4'),
chunks=(5, 10, 10),
)
raster = _make_raster(data)
ds = emerging_hotspots(raster, _kernel_3x3())
# Materialise the lazy result to confirm the full pipeline runs.
ds = ds.compute()
assert ds['category'].shape == (20, 20)
Loading