openscm · benmsanderson · May 23, 2026 · May 23, 2026 · May 30, 2026 · May 30, 2026
diff --git a/changelog/321.fix.md b/changelog/321.fix.md
@@ -0,0 +1,8 @@
+Restored compatibility with pandas 3.0 and xarray 2025+ by patching four places where the older APIs are no longer accepted:
+
+- `RunGroupBy.__init__` previously called `numpy.issubdtype(col.dtype, numpy.number)` to detect numeric meta columns, which raises `TypeError` on `pandas.StringDtype` (the new default for inferred string columns). Routed through `pandas.api.types.is_numeric_dtype`.
+- `_xarray._many_to_one` ended with `Series[0]` positional indexing, which pandas 3 has removed. Replaced with `.iloc[0]`.
+- `ScmRun.convert_unit`, `_binary_op` and `_unary_op` wrote results via `self._df.values[:] = ...`. pandas 3 makes `DataFrame.values` return a read-only array, so the in-place write raises `ValueError`. Switched to `self._df.iloc[:, :] = ...`; the binary/unary op sites additionally cast their right-hand side to `float` so that comparison ops (`lt`, `eq`, `ne`, ...) which return `bool` arrays continue to land as `float64` in the underlying numeric block, preserving the prior silent-cast behaviour.
+- `_read_nc` passed `use_cftime=True` to `xarray.load_dataset`, which xarray 2025+ deprecates in favour of `decode_times=xr.coders.CFDatetimeCoder(use_cftime=True)`. The deprecation otherwise fires a `FutureWarning` on every `ScmRun.from_nc()` call. Routes through the new API where available and falls back to the bare kwarg on xarray older than 2024.09.
+
+The previously-failing `ScmRun.groupby`, `ScmRun.to_xarray`, `ScmRun.convert_unit`, `ScmRun.to_nc` / `ScmRun.from_nc`, and the arithmetic and comparison operators on `ScmRun` now all run on both pandas 2 and pandas 3.
diff --git a/src/scmdata/_xarray.py b/src/scmdata/_xarray.py
@@ -198,7 +198,10 @@ def _many_to_one(df, col1, col2):
     # thanks https://stackoverflow.com/a/59091549
     checker = df[[col1, col2]].drop_duplicates()
 
-    max_count = checker.groupby(col2).count().max()[0]
+    # ``.iloc[0]`` rather than ``[0]``: pandas 3.0 removed positional
+    # integer indexing on label-indexed Series, so ``[0]`` would raise
+    # ``KeyError: 0`` on the Series returned by the chained ``.max()``.
+    max_count = checker.groupby(col2).count().max().iloc[0]
     if max_count < 1:  # pragma: no cover # emergency valve
         raise AssertionError
 

diff --git a/src/scmdata/groupby.py b/src/scmdata/groupby.py
@@ -57,8 +57,12 @@ def __init__(
         m = run.meta.reset_index(drop=True)
         self.na_fill_value = float(na_fill_value)
 
-        # Work around the bad handling of NaN values in groupbys
-        if any([np.issubdtype(m[c].dtype, np.number) for c in m]):
+        # Work around the bad handling of NaN values in groupbys.
+        # pd.api.types.is_numeric_dtype accepts every dtype scmdata
+        # ever emits; np.issubdtype(..., np.number) raises on
+        # pandas 3.0's default StringDtype with
+        # ``TypeError: Cannot interpret '<StringDtype(...)>'``.
+        if any([pd.api.types.is_numeric_dtype(m[c]) for c in m]):
             if (m == na_fill_value).any(axis=None):
                 raise ValueError(
                     "na_fill_value conflicts with data value. Choose a na_fill_value "

diff --git a/src/scmdata/netcdf.py b/src/scmdata/netcdf.py
@@ -75,7 +75,13 @@ def _write_nc(
 
 
 def _read_nc(cls: BaseScmRun, fname: FilePath) -> BaseScmRun:
-    loaded = xr.load_dataset(fname, use_cftime=True)
+    try:
+        time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
+        loaded = xr.load_dataset(fname, decode_times=time_coder)
+    except AttributeError:
+        # xarray older than 2024.09 has no ``xr.coders.CFDatetimeCoder``;
+        # fall back to the deprecated bare ``use_cftime`` kwarg.
+        loaded = xr.load_dataset(fname, use_cftime=True)
     dataframe = loaded.to_dataframe()
 
     dataframe = _reshape_to_scmrun_dataframe(dataframe, loaded)

diff --git a/src/scmdata/run.py b/src/scmdata/run.py
@@ -745,10 +745,12 @@ def _perform_op(run: Self) -> Self:
             res_stacked = np.vstack(res)
 
             if use_pint:
-                run._df.values[:] = res_stacked.magnitude.T
+                run._df.iloc[:, :] = np.asarray(
+                    res_stacked.magnitude.T, dtype=float
+                )
                 run["unit"] = str(res_stacked.units)
             else:
-                run._df.values[:] = res_stacked.T
+                run._df.iloc[:, :] = np.asarray(res_stacked.T, dtype=float)
             return run
 
         return self.copy().groupby("unit").apply(_perform_op)
@@ -758,7 +760,7 @@ def _unary_op(self, f: Any, *args: Any, **kwargs: Any) -> Self:
 
         res = [f(v) for v in run.values]
 
-        run._df.values[:] = np.vstack(res).T
+        run._df.iloc[:, :] = np.asarray(np.vstack(res).T, dtype=float)
         return run
 
     def drop_meta(self, columns: Iterable[str] | str, inplace: bool = False) -> Self:
@@ -2177,7 +2179,7 @@ def apply_units(group):
             orig_unit = group.get_unique_meta("unit", no_duplicates=True)
             uc = UnitConverter(orig_unit, unit, context=context)
 
-            group._df.values[:] = uc.convert_from(group._df.values)
+            group._df.iloc[:, :] = uc.convert_from(group._df.values)
             group["unit"] = unit
 
             return group

diff --git a/tests/unit/test_netcdf.py b/tests/unit/test_netcdf.py
@@ -747,3 +747,31 @@ def test_run_to_nc_different_eras(scm_run, shift_times):
 
         res = nc_to_run(scm_run.__class__, out_fname)
     assert_scmdf_almost_equal(scm_run, res)
+
+
+def test_nc_to_run_does_not_emit_use_cftime_futurewarning(scm_run):
+    # Regression test: xarray 2025+ deprecates the bare ``use_cftime``
+    # kwarg on ``xr.load_dataset`` in favour of passing a
+    # ``CFDatetimeCoder`` via ``decode_times``. The deprecation
+    # previously fired a FutureWarning on every ``ScmRun.from_nc()`` /
+    # ``nc_to_run`` call. The fix routes through the new API when
+    # available.
+    import warnings
+
+    if not hasattr(xr, "coders") or not hasattr(xr.coders, "CFDatetimeCoder"):
+        pytest.skip("xarray too old for CFDatetimeCoder; legacy path is fine")
+
+    with tempfile.TemporaryDirectory() as tempdir:
+        out_fname = join(tempdir, "out.nc")
+        run_to_nc(scm_run, out_fname, dimensions=("scenario",))
+
+        with warnings.catch_warnings(record=True) as caught:
+            warnings.simplefilter("always")
+            nc_to_run(scm_run.__class__, out_fname)
+
+        use_cftime_warnings = [
+            w for w in caught
+            if issubclass(w.category, FutureWarning)
+            and "use_cftime" in str(w.message)
+        ]
+    assert not use_cftime_warnings, [str(w.message) for w in use_cftime_warnings]