diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f23b25b8..a4ed450f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,7 +15,7 @@ jobs: uses: actions/checkout@v3 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "docs" poetry-dependency-install-flags: "--all-extras --only 'main,dev'" # There are still a lot of mypy errors to resolve @@ -31,7 +31,7 @@ jobs: uses: actions/checkout@v3 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "docs" poetry-dependency-install-flags: "--all-extras --only 'main,docs,notebooks'" - name: docs @@ -43,7 +43,7 @@ jobs: fail-fast: false matrix: os: [ "ubuntu-latest" ] - python-version: [ "3.9", "3.10", "3.11" ] + python-version: [ "3.10", "3.11" ] runs-on: "${{ matrix.os }}" steps: - name: Check out repository @@ -88,7 +88,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9"] + python-version: ["3.10"] steps: - name: Checkout repository @@ -107,7 +107,7 @@ jobs: fail-fast: false matrix: os: [ "ubuntu-latest" ] - python-version: [ "3.9", "3.10", "3.11" ] + python-version: [ "3.10", "3.11" ] runs-on: "${{ matrix.os }}" steps: - name: Check out repository @@ -127,7 +127,7 @@ jobs: uses: actions/checkout@v3 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "check-build-${{ runner.os }}" run-poetry-install: false poetry-dependency-install-flags: "not used" @@ -145,7 +145,7 @@ jobs: uses: actions/checkout@v3 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "licence-check" poetry-dependency-install-flags: "--all-extras" - name: Check licences of dependencies diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 2b8e1aae..fd8b56cc 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -19,7 +19,7 @@ jobs: fetch-depth: 0 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "deploy" poetry-dependency-install-flags: "--all-extras" - name: Run tests diff --git a/.github/workflows/install-conda.yml b/.github/workflows/install-conda.yml index 4baa0dad..25050dd2 100644 --- a/.github/workflows/install-conda.yml +++ b/.github/workflows/install-conda.yml @@ -12,7 +12,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.10', '3.11'] steps: - name: Setup conda diff --git a/.github/workflows/install.yaml b/.github/workflows/install.yaml index 278586ca..b51444c4 100644 --- a/.github/workflows/install.yaml +++ b/.github/workflows/install.yaml @@ -12,7 +12,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: [ "3.9", "3.10", "3.11" ] + python-version: [ "3.10", "3.11" ] steps: - name: Set up Python "${{ matrix.python-version }}" id: setup-python diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index b483deac..bec1c2f7 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -19,7 +19,7 @@ jobs: fetch-depth: 0 - uses: ./.github/actions/setup with: - python-version: "3.9" + python-version: "3.10" venv-id: "release" poetry-dependency-install-flags: "--all-extras" - name: Add version to environment diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9543d49f..2ae6ed65 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -11,7 +11,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.9" + python: "3.10" jobs: post_create_environment: - pip install poetry diff --git a/changelog/321.fix.md b/changelog/321.fix.md new file mode 100644 index 00000000..04b6afb7 --- /dev/null +++ b/changelog/321.fix.md @@ -0,0 +1,8 @@ +Restored compatibility with pandas 3.0 and xarray 2025+ by patching four places where the older APIs are no longer accepted: + +- `RunGroupBy.__init__` previously called `numpy.issubdtype(col.dtype, numpy.number)` to detect numeric meta columns, which raises `TypeError` on `pandas.StringDtype` (the new default for inferred string columns). Routed through `pandas.api.types.is_numeric_dtype`. +- `_xarray._many_to_one` ended with `Series[0]` positional indexing, which pandas 3 has removed. Replaced with `.iloc[0]`. +- `ScmRun.convert_unit`, `_binary_op` and `_unary_op` wrote results via `self._df.values[:] = ...`. pandas 3 makes `DataFrame.values` return a read-only array, so the in-place write raises `ValueError`. Switched to `self._df.iloc[:, :] = ...`; the binary/unary op sites additionally cast their right-hand side to `float` so that comparison ops (`lt`, `eq`, `ne`, ...) which return `bool` arrays continue to land as `float64` in the underlying numeric block, preserving the prior silent-cast behaviour. +- `_read_nc` passed `use_cftime=True` to `xarray.load_dataset`, which xarray 2025+ deprecates in favour of `decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)`. The deprecation otherwise fires a `FutureWarning` on every `ScmRun.from_nc()` call. Routes through the new API where available and falls back to the bare kwarg on xarray older than 2024.09. + +The previously-failing `ScmRun.groupby`, `ScmRun.to_xarray`, `ScmRun.convert_unit`, `ScmRun.to_nc` / `ScmRun.from_nc`, and the arithmetic and comparison operators on `ScmRun` now all run on both pandas 2 and pandas 3. diff --git a/pyproject.toml b/pyproject.toml index 55ac095a..af9bf99b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,15 +20,13 @@ classifiers = [ "License :: OSI Approved :: BSD License", "Intended Audience :: Developers", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] [tool.poetry.dependencies] # TODO: get rid of this stupid upper level bound -python = ">=3.9,<4" +python = ">=3.10,<4" cftime = ">=1.5" numpy = "*" openscm-units = "*" @@ -39,10 +37,7 @@ pint-pandas = "*" python-dateutil = "*" tqdm = "*" six = "*" -xarray = [ - { version = "*", python = ">=3.10" }, - { version = "<=2025.03", python = ">=3.9,<3.10" }, -] +xarray = "*" nc-time-axis = { version = ">=1.2.0", optional = true } typing-extensions = "*" matplotlib = { version = ">=3.7.1", optional = true } @@ -83,6 +78,7 @@ pre-commit = "^3.3.1" towncrier = "^23.6.0" liccheck = "^0.9.1" pandas-stubs = "<3" +setuptools = "*" [tool.poetry.group.notebooks.dependencies] myst-nb = "^0.17.0" diff --git a/src/scmdata/_xarray.py b/src/scmdata/_xarray.py index c38e4830..a2510a37 100644 --- a/src/scmdata/_xarray.py +++ b/src/scmdata/_xarray.py @@ -198,7 +198,10 @@ def _many_to_one(df, col1, col2): # thanks https://stackoverflow.com/a/59091549 checker = df[[col1, col2]].drop_duplicates() - max_count = checker.groupby(col2).count().max()[0] + # ``.iloc[0]`` rather than ``[0]``: pandas 3.0 removed positional + # integer indexing on label-indexed Series, so ``[0]`` would raise + # ``KeyError: 0`` on the Series returned by the chained ``.max()``. + max_count = checker.groupby(col2).count().max().iloc[0] if max_count < 1: # pragma: no cover # emergency valve raise AssertionError diff --git a/src/scmdata/groupby.py b/src/scmdata/groupby.py index b261c5b4..83358205 100644 --- a/src/scmdata/groupby.py +++ b/src/scmdata/groupby.py @@ -57,8 +57,12 @@ def __init__( m = run.meta.reset_index(drop=True) self.na_fill_value = float(na_fill_value) - # Work around the bad handling of NaN values in groupbys - if any([np.issubdtype(m[c].dtype, np.number) for c in m]): + # Work around the bad handling of NaN values in groupbys. + # pd.api.types.is_numeric_dtype accepts every dtype scmdata + # ever emits; np.issubdtype(..., np.number) raises on + # pandas 3.0's default StringDtype with + # ``TypeError: Cannot interpret ''``. + if any([pd.api.types.is_numeric_dtype(m[c]) for c in m]): if (m == na_fill_value).any(axis=None): raise ValueError( "na_fill_value conflicts with data value. Choose a na_fill_value " diff --git a/src/scmdata/netcdf.py b/src/scmdata/netcdf.py index 5a7bd8a1..68682df2 100644 --- a/src/scmdata/netcdf.py +++ b/src/scmdata/netcdf.py @@ -75,7 +75,13 @@ def _write_nc( def _read_nc(cls: BaseScmRun, fname: FilePath) -> BaseScmRun: - loaded = xr.load_dataset(fname, use_cftime=True) + try: + time_coder = xr.coders.CFDatetimeCoder(use_cftime=True) + loaded = xr.load_dataset(fname, decode_times=time_coder) + except AttributeError: + # xarray older than 2024.09 has no ``xr.coders.CFDatetimeCoder``; + # fall back to the deprecated bare ``use_cftime`` kwarg. + loaded = xr.load_dataset(fname, use_cftime=True) dataframe = loaded.to_dataframe() dataframe = _reshape_to_scmrun_dataframe(dataframe, loaded) diff --git a/src/scmdata/run.py b/src/scmdata/run.py index ef6affce..62be9471 100644 --- a/src/scmdata/run.py +++ b/src/scmdata/run.py @@ -745,10 +745,12 @@ def _perform_op(run: Self) -> Self: res_stacked = np.vstack(res) if use_pint: - run._df.values[:] = res_stacked.magnitude.T + run._df.iloc[:, :] = np.asarray( + res_stacked.magnitude.T, dtype=float + ) run["unit"] = str(res_stacked.units) else: - run._df.values[:] = res_stacked.T + run._df.iloc[:, :] = np.asarray(res_stacked.T, dtype=float) return run return self.copy().groupby("unit").apply(_perform_op) @@ -758,7 +760,7 @@ def _unary_op(self, f: Any, *args: Any, **kwargs: Any) -> Self: res = [f(v) for v in run.values] - run._df.values[:] = np.vstack(res).T + run._df.iloc[:, :] = np.asarray(np.vstack(res).T, dtype=float) return run def drop_meta(self, columns: Iterable[str] | str, inplace: bool = False) -> Self: @@ -2177,7 +2179,7 @@ def apply_units(group): orig_unit = group.get_unique_meta("unit", no_duplicates=True) uc = UnitConverter(orig_unit, unit, context=context) - group._df.values[:] = uc.convert_from(group._df.values) + group._df.iloc[:, :] = uc.convert_from(group._df.values) group["unit"] = unit return group diff --git a/tests/unit/test_netcdf.py b/tests/unit/test_netcdf.py index b6f087c9..69c7fa9e 100644 --- a/tests/unit/test_netcdf.py +++ b/tests/unit/test_netcdf.py @@ -747,3 +747,31 @@ def test_run_to_nc_different_eras(scm_run, shift_times): res = nc_to_run(scm_run.__class__, out_fname) assert_scmdf_almost_equal(scm_run, res) + + +def test_nc_to_run_does_not_emit_use_cftime_futurewarning(scm_run): + # Regression test: xarray 2025+ deprecates the bare ``use_cftime`` + # kwarg on ``xr.load_dataset`` in favour of passing a + # ``CFDatetimeCoder`` via ``decode_times``. The deprecation + # previously fired a FutureWarning on every ``ScmRun.from_nc()`` / + # ``nc_to_run`` call. The fix routes through the new API when + # available. + import warnings + + if not hasattr(xr, "coders") or not hasattr(xr.coders, "CFDatetimeCoder"): + pytest.skip("xarray too old for CFDatetimeCoder; legacy path is fine") + + with tempfile.TemporaryDirectory() as tempdir: + out_fname = join(tempdir, "out.nc") + run_to_nc(scm_run, out_fname, dimensions=("scenario",)) + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + nc_to_run(scm_run.__class__, out_fname) + + use_cftime_warnings = [ + w for w in caught + if issubclass(w.category, FutureWarning) + and "use_cftime" in str(w.message) + ] + assert not use_cftime_warnings, [str(w.message) for w in use_cftime_warnings]