Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions changelog/321.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Restored compatibility with pandas 3.0 and xarray 2025+ by patching four places where the older APIs are no longer accepted:

- `RunGroupBy.__init__` previously called `numpy.issubdtype(col.dtype, numpy.number)` to detect numeric meta columns, which raises `TypeError` on `pandas.StringDtype` (the new default for inferred string columns). Routed through `pandas.api.types.is_numeric_dtype`.
- `_xarray._many_to_one` ended with `Series[0]` positional indexing, which pandas 3 has removed. Replaced with `.iloc[0]`.
- `ScmRun.convert_unit`, `_binary_op` and `_unary_op` wrote results via `self._df.values[:] = ...`. pandas 3 makes `DataFrame.values` return a read-only array, so the in-place write raises `ValueError`. Switched to `self._df.iloc[:, :] = ...`; the binary/unary op sites additionally cast their right-hand side to `float` so that comparison ops (`lt`, `eq`, `ne`, ...) which return `bool` arrays continue to land as `float64` in the underlying numeric block, preserving the prior silent-cast behaviour.
- `_read_nc` passed `use_cftime=True` to `xarray.load_dataset`, which xarray 2025+ deprecates in favour of `decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)`. The deprecation otherwise fires a `FutureWarning` on every `ScmRun.from_nc()` call. Routes through the new API where available and falls back to the bare kwarg on xarray older than 2024.09.

The previously-failing `ScmRun.groupby`, `ScmRun.to_xarray`, `ScmRun.convert_unit`, `ScmRun.to_nc` / `ScmRun.from_nc`, and the arithmetic and comparison operators on `ScmRun` now all run on both pandas 2 and pandas 3.
5 changes: 4 additions & 1 deletion src/scmdata/_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,10 @@ def _many_to_one(df, col1, col2):
# thanks https://stackoverflow.com/a/59091549
checker = df[[col1, col2]].drop_duplicates()

max_count = checker.groupby(col2).count().max()[0]
# ``.iloc[0]`` rather than ``[0]``: pandas 3.0 removed positional
# integer indexing on label-indexed Series, so ``[0]`` would raise
# ``KeyError: 0`` on the Series returned by the chained ``.max()``.
max_count = checker.groupby(col2).count().max().iloc[0]
if max_count < 1: # pragma: no cover # emergency valve
raise AssertionError

Expand Down
8 changes: 6 additions & 2 deletions src/scmdata/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,12 @@ def __init__(
m = run.meta.reset_index(drop=True)
self.na_fill_value = float(na_fill_value)

# Work around the bad handling of NaN values in groupbys
if any([np.issubdtype(m[c].dtype, np.number) for c in m]):
# Work around the bad handling of NaN values in groupbys.
# pd.api.types.is_numeric_dtype accepts every dtype scmdata
# ever emits; np.issubdtype(..., np.number) raises on
# pandas 3.0's default StringDtype with
# ``TypeError: Cannot interpret '<StringDtype(...)>'``.
if any([pd.api.types.is_numeric_dtype(m[c]) for c in m]):
if (m == na_fill_value).any(axis=None):
raise ValueError(
"na_fill_value conflicts with data value. Choose a na_fill_value "
Expand Down
8 changes: 7 additions & 1 deletion src/scmdata/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,13 @@ def _write_nc(


def _read_nc(cls: BaseScmRun, fname: FilePath) -> BaseScmRun:
loaded = xr.load_dataset(fname, use_cftime=True)
try:
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
loaded = xr.load_dataset(fname, decode_times=time_coder)
except AttributeError:
# xarray older than 2024.09 has no ``xr.coders.CFDatetimeCoder``;
# fall back to the deprecated bare ``use_cftime`` kwarg.
loaded = xr.load_dataset(fname, use_cftime=True)
dataframe = loaded.to_dataframe()

dataframe = _reshape_to_scmrun_dataframe(dataframe, loaded)
Expand Down
10 changes: 6 additions & 4 deletions src/scmdata/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,10 +745,12 @@ def _perform_op(run: Self) -> Self:
res_stacked = np.vstack(res)

if use_pint:
run._df.values[:] = res_stacked.magnitude.T
run._df.iloc[:, :] = np.asarray(
res_stacked.magnitude.T, dtype=float
)
run["unit"] = str(res_stacked.units)
else:
run._df.values[:] = res_stacked.T
run._df.iloc[:, :] = np.asarray(res_stacked.T, dtype=float)
return run

return self.copy().groupby("unit").apply(_perform_op)
Expand All @@ -758,7 +760,7 @@ def _unary_op(self, f: Any, *args: Any, **kwargs: Any) -> Self:

res = [f(v) for v in run.values]

run._df.values[:] = np.vstack(res).T
run._df.iloc[:, :] = np.asarray(np.vstack(res).T, dtype=float)
return run

def drop_meta(self, columns: Iterable[str] | str, inplace: bool = False) -> Self:
Expand Down Expand Up @@ -2177,7 +2179,7 @@ def apply_units(group):
orig_unit = group.get_unique_meta("unit", no_duplicates=True)
uc = UnitConverter(orig_unit, unit, context=context)

group._df.values[:] = uc.convert_from(group._df.values)
group._df.iloc[:, :] = uc.convert_from(group._df.values)
group["unit"] = unit

return group
Expand Down
28 changes: 28 additions & 0 deletions tests/unit/test_netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,3 +747,31 @@ def test_run_to_nc_different_eras(scm_run, shift_times):

res = nc_to_run(scm_run.__class__, out_fname)
assert_scmdf_almost_equal(scm_run, res)


def test_nc_to_run_does_not_emit_use_cftime_futurewarning(scm_run):
# Regression test: xarray 2025+ deprecates the bare ``use_cftime``
# kwarg on ``xr.load_dataset`` in favour of passing a
# ``CFDatetimeCoder`` via ``decode_times``. The deprecation
# previously fired a FutureWarning on every ``ScmRun.from_nc()`` /
# ``nc_to_run`` call. The fix routes through the new API when
# available.
import warnings

if not hasattr(xr, "coders") or not hasattr(xr.coders, "CFDatetimeCoder"):
pytest.skip("xarray too old for CFDatetimeCoder; legacy path is fine")

with tempfile.TemporaryDirectory() as tempdir:
out_fname = join(tempdir, "out.nc")
run_to_nc(scm_run, out_fname, dimensions=("scenario",))

with warnings.catch_warnings(record=True) as caught:
warnings.simplefilter("always")
nc_to_run(scm_run.__class__, out_fname)

use_cftime_warnings = [
w for w in caught
if issubclass(w.category, FutureWarning)
and "use_cftime" in str(w.message)
]
assert not use_cftime_warnings, [str(w.message) for w in use_cftime_warnings]