From 539e468fa4911548af49511af40c3d2a366a35b5 Mon Sep 17 00:00:00 2001 From: Ben Sanderson Date: Sat, 23 May 2026 20:11:42 +0200 Subject: [PATCH 1/9] Fix two pandas 3.0 incompatibilities (StringDtype groupby, Series positional indexing) pandas 3.0 introduced two changes that scmdata 0.18 trips on for any multi-scenario ScmRun: 1. Default StringDtype inference. String columns now come back as pd.StringDtype rather than object. RunGroupBy.__init__ called numpy.issubdtype(col.dtype, numpy.number) to detect numeric meta columns; on StringDtype this raises 'TypeError: Cannot interpret as a data type'. Route the check through pd.api.types.is_numeric_dtype instead, which returns False for StringDtype and True for numeric dtypes. 2. Removal of Series positional integer indexing. _xarray._many_to_one ended with checker.groupby(col2).count().max()[0]. max() on a DataFrame returns a label-indexed Series and pandas 3.0 removed positional integer indexing on those, so [0] raises 'KeyError: 0'. Use .iloc[0]: same semantics, explicit positional. Both calls are exercised by every multi-scenario ScmRun. The second in particular blocks ScmRun.to_nc entirely on pandas 3.0, so any downstream that streams scenarios to disk (e.g. openscm-runner's NetCDFChunkWriter) currently cannot run. The fixes are backward-compatible: pd.api.types.is_numeric_dtype and Series.iloc[0] have been pandas's canonical APIs since well before pandas 2.0. --- src/scmdata/_xarray.py | 5 ++++- src/scmdata/groupby.py | 8 ++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/scmdata/_xarray.py b/src/scmdata/_xarray.py index c38e4830..a2510a37 100644 --- a/src/scmdata/_xarray.py +++ b/src/scmdata/_xarray.py @@ -198,7 +198,10 @@ def _many_to_one(df, col1, col2): # thanks https://stackoverflow.com/a/59091549 checker = df[[col1, col2]].drop_duplicates() - max_count = checker.groupby(col2).count().max()[0] + # ``.iloc[0]`` rather than ``[0]``: pandas 3.0 removed positional + # integer indexing on label-indexed Series, so ``[0]`` would raise + # ``KeyError: 0`` on the Series returned by the chained ``.max()``. + max_count = checker.groupby(col2).count().max().iloc[0] if max_count < 1: # pragma: no cover # emergency valve raise AssertionError diff --git a/src/scmdata/groupby.py b/src/scmdata/groupby.py index b261c5b4..83358205 100644 --- a/src/scmdata/groupby.py +++ b/src/scmdata/groupby.py @@ -57,8 +57,12 @@ def __init__( m = run.meta.reset_index(drop=True) self.na_fill_value = float(na_fill_value) - # Work around the bad handling of NaN values in groupbys - if any([np.issubdtype(m[c].dtype, np.number) for c in m]): + # Work around the bad handling of NaN values in groupbys. + # pd.api.types.is_numeric_dtype accepts every dtype scmdata + # ever emits; np.issubdtype(..., np.number) raises on + # pandas 3.0's default StringDtype with + # ``TypeError: Cannot interpret ''``. + if any([pd.api.types.is_numeric_dtype(m[c]) for c in m]): if (m == na_fill_value).any(axis=None): raise ValueError( "na_fill_value conflicts with data value. Choose a na_fill_value " From ebeb601a736f606ab2f9a719f49029177eac0f86 Mon Sep 17 00:00:00 2001 From: Ben Sanderson Date: Sat, 23 May 2026 20:12:18 +0200 Subject: [PATCH 2/9] Add changelog fragment for PR #321 --- changelog/321.fix.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/321.fix.md diff --git a/changelog/321.fix.md b/changelog/321.fix.md new file mode 100644 index 00000000..c4388cf9 --- /dev/null +++ b/changelog/321.fix.md @@ -0,0 +1 @@ +Restored compatibility with pandas 3.0 by replacing two calls that pandas 3 no longer accepts: `numpy.issubdtype(col.dtype, numpy.number)` in `RunGroupBy.__init__` (raised on `StringDtype` meta columns) and `Series[0]` positional indexing in `_xarray._many_to_one` (raised `KeyError: 0`). The previously-failing `ScmRun.groupby` and `ScmRun.to_nc` paths now run on both pandas 2 and pandas 3. From 3b8096224dc9da7808e3620f35cd4dfd7ad37572 Mon Sep 17 00:00:00 2001 From: Ben Sanderson Date: Sun, 31 May 2026 01:28:41 +0200 Subject: [PATCH 3/9] Fix _df assignment under read-only DataFrame.values on pandas 3.x pandas 3.0 makes DataFrame.values return a read-only ndarray, so the existing self._df.values[:] = ... idiom in convert_unit, _binary_op and _unary_op raises ValueError: assignment destination is read-only. Switch to .iloc[:, :] = ..., which goes through pandas' indexer rather than the underlying ndarray and so isn't affected by the read-only change. _binary_op and _unary_op additionally wrap the right-hand side in np.asarray(..., dtype=float) to preserve the prior silent bool-to- float cast that comparison ops (lt, eq, ne, etc.) relied on; .iloc is dtype-strict where the old .values write was not, so the explicit cast keeps the historical semantics of the result frame. --- src/scmdata/run.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/scmdata/run.py b/src/scmdata/run.py index ef6affce..62be9471 100644 --- a/src/scmdata/run.py +++ b/src/scmdata/run.py @@ -745,10 +745,12 @@ def _perform_op(run: Self) -> Self: res_stacked = np.vstack(res) if use_pint: - run._df.values[:] = res_stacked.magnitude.T + run._df.iloc[:, :] = np.asarray( + res_stacked.magnitude.T, dtype=float + ) run["unit"] = str(res_stacked.units) else: - run._df.values[:] = res_stacked.T + run._df.iloc[:, :] = np.asarray(res_stacked.T, dtype=float) return run return self.copy().groupby("unit").apply(_perform_op) @@ -758,7 +760,7 @@ def _unary_op(self, f: Any, *args: Any, **kwargs: Any) -> Self: res = [f(v) for v in run.values] - run._df.values[:] = np.vstack(res).T + run._df.iloc[:, :] = np.asarray(np.vstack(res).T, dtype=float) return run def drop_meta(self, columns: Iterable[str] | str, inplace: bool = False) -> Self: @@ -2177,7 +2179,7 @@ def apply_units(group): orig_unit = group.get_unique_meta("unit", no_duplicates=True) uc = UnitConverter(orig_unit, unit, context=context) - group._df.values[:] = uc.convert_from(group._df.values) + group._df.iloc[:, :] = uc.convert_from(group._df.values) group["unit"] = unit return group From 6362496974dc8697d14afd65d282d2436d388e30 Mon Sep 17 00:00:00 2001 From: Ben Sanderson Date: Sun, 31 May 2026 01:30:27 +0200 Subject: [PATCH 4/9] Route _read_nc through CFDatetimeCoder to silence xarray 2025+ deprecation xarray 2025+ deprecates the bare use_cftime kwarg on `xr.load_dataset`, recommending instead that callers pass an `xr.coders.CFDatetimeCoder` via `decode_times`. The deprecation emits a FutureWarning on every `ScmRun.from_nc()` / `nc_to_run` call, which floods downstream notebook output. Prefer the new API when `xr.coders.CFDatetimeCoder` is available and fall back to the bare kwarg on older xarray (< 2024.09) where the new coder does not exist. --- src/scmdata/netcdf.py | 8 +++++++- tests/unit/test_netcdf.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/scmdata/netcdf.py b/src/scmdata/netcdf.py index 5a7bd8a1..68682df2 100644 --- a/src/scmdata/netcdf.py +++ b/src/scmdata/netcdf.py @@ -75,7 +75,13 @@ def _write_nc( def _read_nc(cls: BaseScmRun, fname: FilePath) -> BaseScmRun: - loaded = xr.load_dataset(fname, use_cftime=True) + try: + time_coder = xr.coders.CFDatetimeCoder(use_cftime=True) + loaded = xr.load_dataset(fname, decode_times=time_coder) + except AttributeError: + # xarray older than 2024.09 has no ``xr.coders.CFDatetimeCoder``; + # fall back to the deprecated bare ``use_cftime`` kwarg. + loaded = xr.load_dataset(fname, use_cftime=True) dataframe = loaded.to_dataframe() dataframe = _reshape_to_scmrun_dataframe(dataframe, loaded) diff --git a/tests/unit/test_netcdf.py b/tests/unit/test_netcdf.py index b6f087c9..69c7fa9e 100644 --- a/tests/unit/test_netcdf.py +++ b/tests/unit/test_netcdf.py @@ -747,3 +747,31 @@ def test_run_to_nc_different_eras(scm_run, shift_times): res = nc_to_run(scm_run.__class__, out_fname) assert_scmdf_almost_equal(scm_run, res) + + +def test_nc_to_run_does_not_emit_use_cftime_futurewarning(scm_run): + # Regression test: xarray 2025+ deprecates the bare ``use_cftime`` + # kwarg on ``xr.load_dataset`` in favour of passing a + # ``CFDatetimeCoder`` via ``decode_times``. The deprecation + # previously fired a FutureWarning on every ``ScmRun.from_nc()`` / + # ``nc_to_run`` call. The fix routes through the new API when + # available. + import warnings + + if not hasattr(xr, "coders") or not hasattr(xr.coders, "CFDatetimeCoder"): + pytest.skip("xarray too old for CFDatetimeCoder; legacy path is fine") + + with tempfile.TemporaryDirectory() as tempdir: + out_fname = join(tempdir, "out.nc") + run_to_nc(scm_run, out_fname, dimensions=("scenario",)) + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + nc_to_run(scm_run.__class__, out_fname) + + use_cftime_warnings = [ + w for w in caught + if issubclass(w.category, FutureWarning) + and "use_cftime" in str(w.message) + ] + assert not use_cftime_warnings, [str(w.message) for w in use_cftime_warnings] From bb9a9beb9e3f463b6cb3da6e6985d51f30760290 Mon Sep 17 00:00:00 2001 From: Ben Sanderson Date: Sun, 31 May 2026 01:31:49 +0200 Subject: [PATCH 5/9] Expand PR #321 changelog to cover convert_unit, binary/unary ops, netcdf --- changelog/321.fix.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/changelog/321.fix.md b/changelog/321.fix.md index c4388cf9..04b6afb7 100644 --- a/changelog/321.fix.md +++ b/changelog/321.fix.md @@ -1 +1,8 @@ -Restored compatibility with pandas 3.0 by replacing two calls that pandas 3 no longer accepts: `numpy.issubdtype(col.dtype, numpy.number)` in `RunGroupBy.__init__` (raised on `StringDtype` meta columns) and `Series[0]` positional indexing in `_xarray._many_to_one` (raised `KeyError: 0`). The previously-failing `ScmRun.groupby` and `ScmRun.to_nc` paths now run on both pandas 2 and pandas 3. +Restored compatibility with pandas 3.0 and xarray 2025+ by patching four places where the older APIs are no longer accepted: + +- `RunGroupBy.__init__` previously called `numpy.issubdtype(col.dtype, numpy.number)` to detect numeric meta columns, which raises `TypeError` on `pandas.StringDtype` (the new default for inferred string columns). Routed through `pandas.api.types.is_numeric_dtype`. +- `_xarray._many_to_one` ended with `Series[0]` positional indexing, which pandas 3 has removed. Replaced with `.iloc[0]`. +- `ScmRun.convert_unit`, `_binary_op` and `_unary_op` wrote results via `self._df.values[:] = ...`. pandas 3 makes `DataFrame.values` return a read-only array, so the in-place write raises `ValueError`. Switched to `self._df.iloc[:, :] = ...`; the binary/unary op sites additionally cast their right-hand side to `float` so that comparison ops (`lt`, `eq`, `ne`, ...) which return `bool` arrays continue to land as `float64` in the underlying numeric block, preserving the prior silent-cast behaviour. +- `_read_nc` passed `use_cftime=True` to `xarray.load_dataset`, which xarray 2025+ deprecates in favour of `decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)`. The deprecation otherwise fires a `FutureWarning` on every `ScmRun.from_nc()` call. Routes through the new API where available and falls back to the bare kwarg on xarray older than 2024.09. + +The previously-failing `ScmRun.groupby`, `ScmRun.to_xarray`, `ScmRun.convert_unit`, `ScmRun.to_nc` / `ScmRun.from_nc`, and the arithmetic and comparison operators on `ScmRun` now all run on both pandas 2 and pandas 3. From 0eb5678e9d3afbaf083af26283b0355da6a3cd5e Mon Sep 17 00:00:00 2001 From: Ben Sanderson Date: Mon, 1 Jun 2026 13:31:20 +0200 Subject: [PATCH 6/9] drop 3.9 support --- .github/workflows/ci.yaml | 14 +++++++------- .github/workflows/deploy.yaml | 2 +- .github/workflows/install-conda.yml | 2 +- .github/workflows/install.yaml | 2 +- .github/workflows/release.yaml | 2 +- .readthedocs.yaml | 2 +- pyproject.toml | 9 ++------- 7 files changed, 14 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f23b25b8..a4ed450f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,7 +15,7 @@ jobs: uses: actions/checkout@v3 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "docs" poetry-dependency-install-flags: "--all-extras --only 'main,dev'" # There are still a lot of mypy errors to resolve @@ -31,7 +31,7 @@ jobs: uses: actions/checkout@v3 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "docs" poetry-dependency-install-flags: "--all-extras --only 'main,docs,notebooks'" - name: docs @@ -43,7 +43,7 @@ jobs: fail-fast: false matrix: os: [ "ubuntu-latest" ] - python-version: [ "3.9", "3.10", "3.11" ] + python-version: [ "3.10", "3.11" ] runs-on: "${{ matrix.os }}" steps: - name: Check out repository @@ -88,7 +88,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9"] + python-version: ["3.10"] steps: - name: Checkout repository @@ -107,7 +107,7 @@ jobs: fail-fast: false matrix: os: [ "ubuntu-latest" ] - python-version: [ "3.9", "3.10", "3.11" ] + python-version: [ "3.10", "3.11" ] runs-on: "${{ matrix.os }}" steps: - name: Check out repository @@ -127,7 +127,7 @@ jobs: uses: actions/checkout@v3 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "check-build-${{ runner.os }}" run-poetry-install: false poetry-dependency-install-flags: "not used" @@ -145,7 +145,7 @@ jobs: uses: actions/checkout@v3 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "licence-check" poetry-dependency-install-flags: "--all-extras" - name: Check licences of dependencies diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 2b8e1aae..fd8b56cc 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -19,7 +19,7 @@ jobs: fetch-depth: 0 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "deploy" poetry-dependency-install-flags: "--all-extras" - name: Run tests diff --git a/.github/workflows/install-conda.yml b/.github/workflows/install-conda.yml index 4baa0dad..25050dd2 100644 --- a/.github/workflows/install-conda.yml +++ b/.github/workflows/install-conda.yml @@ -12,7 +12,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.10', '3.11'] steps: - name: Setup conda diff --git a/.github/workflows/install.yaml b/.github/workflows/install.yaml index 278586ca..b51444c4 100644 --- a/.github/workflows/install.yaml +++ b/.github/workflows/install.yaml @@ -12,7 +12,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: [ "3.9", "3.10", "3.11" ] + python-version: [ "3.10", "3.11" ] steps: - name: Set up Python "${{ matrix.python-version }}" id: setup-python diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index b483deac..bec1c2f7 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -19,7 +19,7 @@ jobs: fetch-depth: 0 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "release" poetry-dependency-install-flags: "--all-extras" - name: Add version to environment diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9543d49f..2ae6ed65 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -11,7 +11,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.9" + python: "3.10" jobs: post_create_environment: - pip install poetry diff --git a/pyproject.toml b/pyproject.toml index 55ac095a..52d6e240 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,15 +20,13 @@ classifiers = [ "License :: OSI Approved :: BSD License", "Intended Audience :: Developers", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] [tool.poetry.dependencies] # TODO: get rid of this stupid upper level bound -python = ">=3.9,<4" +python = ">=3.10,<4" cftime = ">=1.5" numpy = "*" openscm-units = "*" @@ -39,10 +37,7 @@ pint-pandas = "*" python-dateutil = "*" tqdm = "*" six = "*" -xarray = [ - { version = "*", python = ">=3.10" }, - { version = "<=2025.03", python = ">=3.9,<3.10" }, -] +xarray = "*" nc-time-axis = { version = ">=1.2.0", optional = true } typing-extensions = "*" matplotlib = { version = ">=3.7.1", optional = true } From 49f51c5698aaea3c997db258a1dc92253f45da15 Mon Sep 17 00:00:00 2001 From: Ben Sanderson Date: Mon, 1 Jun 2026 14:06:22 +0200 Subject: [PATCH 7/9] add setuptools --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 52d6e240..af9bf99b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,7 @@ pre-commit = "^3.3.1" towncrier = "^23.6.0" liccheck = "^0.9.1" pandas-stubs = "<3" +setuptools = "*" [tool.poetry.group.notebooks.dependencies] myst-nb = "^0.17.0" From 6203a78085c52dade66a6b239bc302c7d2bc8cbd Mon Sep 17 00:00:00 2001 From: Ben Sanderson Date: Mon, 1 Jun 2026 17:52:31 +0200 Subject: [PATCH 8/9] pin setuptools <81 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index af9bf99b..bad04c6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,7 +78,7 @@ pre-commit = "^3.3.1" towncrier = "^23.6.0" liccheck = "^0.9.1" pandas-stubs = "<3" -setuptools = "*" +setuptools = "<81" # liccheck imports pkg_resources, removed in setuptools 81 [tool.poetry.group.notebooks.dependencies] myst-nb = "^0.17.0" From 35147dbd2403bfd9a15d93b936d29bb7d2ac1372 Mon Sep 17 00:00:00 2001 From: Ben Sanderson Date: Mon, 1 Jun 2026 18:04:05 +0200 Subject: [PATCH 9/9] license checking --- pyproject.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index bad04c6d..7449f528 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -246,12 +246,19 @@ authorized_licenses = [ "bsd", "bsd license", "BSD 3-Clause", + "BSD-2-Clause", + "BSD-3-Clause", "CC0", + "CC0-1.0", "apache", "apache 2.0", "apache software", "apache software license", "Apache License, Version 2.0", + "Apache-2.0", + # SPDX AND-expressions are not split by liccheck, so list as-is + "MPL-2.0 AND MIT", + "BSD-3-Clause AND 0BSD AND MIT AND Zlib AND CC0-1.0", "Historical Permission Notice and Disclaimer (HPND)", "isc license", "isc license (iscl)", @@ -262,7 +269,9 @@ authorized_licenses = [ "GNU Lesser General Public License v2 or later (LGPLv2+)", "mit", "mit license", + "MIT", "Mozilla Public License 2.0 (MPL 2.0)", + "MPL-2.0", "psf-2.0", "python software foundation", "python software foundation license",