IAMconsortium · danielhuppmann · Apr 14, 2023 · Apr 7, 2023 · Apr 7, 2023 · Apr 7, 2023
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,5 +1,7 @@
 # Next Release
 
+- [#738](https://github.com/IAMconsortium/pyam/pull/738) Ensure compatibility with **pandas v2.0**
+
 # Release v1.8.0
 
 ## Highlights

diff --git a/docs/tutorials/quantiles.ipynb b/docs/tutorials/quantiles.ipynb
diff --git a/pyam/figures.py b/pyam/figures.py
@@ -51,7 +51,10 @@ def sankey(df, mapping):
         mapping, orient="index", columns=["source", "target"]
     ).merge(df._data, how="left", left_index=True, right_on="variable")
     label_mapping = dict(
-        [(label, i) for i, label in enumerate(set(_df["source"].append(_df["target"])))]
+        [
+            (label, i)
+            for i, label in enumerate(set(pd.concat([_df["source"], _df["target"]])))
+        ]
     )
     _df.replace(label_mapping, inplace=True)
     region = get_index_levels(_df, "region")[0]

diff --git a/pyam/iiasa.py b/pyam/iiasa.py
@@ -526,7 +526,7 @@ def query(self, default_only=True, meta=True, **kwargs):
             if islistable(meta):
                 # always merge 'version' (even if not requested explicitly)
                 # 'run_id' is required to determine `_args`, dropped later
-                _meta = _meta[set(meta).union(["version", "run_id"])]
+                _meta = _meta[list(set(meta).union(["version", "run_id"]))]
         else:
             _meta = self._query_index(default_only=default_only).set_index(META_IDX)
 

diff --git a/pyam/statistics.py b/pyam/statistics.py
@@ -198,10 +198,11 @@ def add(self, data, header, row=None, subheader=None):
                 else (levels + [[row]], [[0]] * (self.idx_depth + 1))
             )
             _stats_f.index = pd.MultiIndex(levels=lvls, codes=lbls)
-            _stats = _stats_f if _stats is None else _stats.append(_stats_f)
+            _stats = _stats_f if _stats is None else pd.concat([_stats, _stats_f])
 
         # add header
         _stats = pd.concat([_stats], keys=[header], names=[""], axis=1)
+        _stats.index.names = [None] * len(_stats.index.names)
         subheader = _stats.columns.get_level_values(1).unique()
         self._add_to_header(header, subheader)
 
@@ -272,15 +273,14 @@ def format_rows(
         legend = "{} ({})".format(
             center, "max, min" if fullrange is True else "interquartile range"
         )
-        index = row.index.droplevel(2).drop_duplicates()
-        count_arg = dict(tuples=[("count", "")], names=[None, legend])
+
+        row_index = row.index.droplevel(2).drop_duplicates()
+        ret_index = pd.MultiIndex.from_tuples([("count", "")]).append(row_index)
+        ret_index.names = [None, legend]
     else:
-        msg = "displaying multiple range formats simultaneously not supported"
-        raise NotImplementedError(msg)
+        raise ValueError("Use either fullrange or interquartile range.")
 
-    ret = pd.Series(
-        index=pd.MultiIndex.from_tuples(**count_arg).append(index), dtype=float
-    )
+    ret = pd.Series(index=ret_index, dtype=float)
 
     row = row.sort_index()
     center = "50%" if center == "median" else center
@@ -295,7 +295,7 @@ def format_rows(
     upper, lower = ("max", "min") if fullrange is True else ("75%", "25%")
 
     # format `describe()` columns to string output
-    for i in index:
+    for i in row_index:
         x = row.loc[i]
         _count = x["count"]
         if np.isnan(_count) or _count == 0:

diff --git a/pyam/utils.py b/pyam/utils.py
@@ -401,7 +401,8 @@ def format_data(df, index, **kwargs):
             df = _format_from_legacy_database(df)
 
         # replace missing units by an empty string for user-friendly filtering
-        df = df.assign(unit=df["unit"].fillna(""))
+        if "unit" in df.columns:
+            df = df.assign(unit=df["unit"].fillna(""))
 
         df, time_col, extra_cols = _format_data_to_series(df, index)
 

diff --git a/setup.cfg b/setup.cfg
@@ -66,7 +66,7 @@ optional_io_formats =
 tutorials =
     pypandoc
     nbformat
-    nbconvert
+    nbconvert >= 7.3
     jupyter_client
     ipykernel
 docs =

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -86,8 +86,8 @@ def test_init_df_with_float_cols_raises(test_pd_df):
 
 
 def test_init_df_with_duplicates_raises(test_df):
-    _df = test_df.timeseries()
-    _df = _df.append(_df.iloc[0]).reset_index()
+    _df = test_df.timeseries().reset_index()
+    _df = pd.concat([_df, _df.iloc[0].to_frame().T])
     match = "0  model_a   scen_a  World  Primary Energy  EJ/yr"
     with pytest.raises(ValueError, match=match):
         IamDataFrame(_df)
@@ -604,7 +604,7 @@ def test_interpolate_datetimes(test_df):
         test_df.interpolate(some_date, inplace=True)
         obs = test_df.filter(time=some_date).data["value"].reset_index(drop=True)
         exp = pd.Series([3, 1.5, 4], name="value")
-        pd.testing.assert_series_equal(obs, exp, check_less_precise=True)
+        pd.testing.assert_series_equal(obs, exp, rtol=0.01)
         # redo the interpolation and check that no duplicates are added
         test_df.interpolate(some_date, inplace=True)
         assert not test_df.filter()._data.index.duplicated().any()

diff --git a/tests/test_filter.py b/tests/test_filter.py
@@ -55,7 +55,8 @@ def test_filter_mixed_time_domain(test_df_mixed, arg_year, arg_time):
     assert obs.time_col == "year"
     assert obs.time_domain == "year"
     assert obs.year == [2005]
-    pdt.assert_index_equal(obs.time, pd.Int64Index([2005], name="time"))
+    pdt.assert_index_equal(obs.time, pd.Index([2005], name="time"))
+    assert obs.time.dtype == "int64"
 
 
 def test_filter_time_domain_raises(test_df_year):
@@ -106,10 +107,12 @@ def test_filter_day(test_df, test_day):
 
 def test_filter_with_numpy_64_date_vals(test_df):
     dates = test_df[test_df.time_col].unique()
-    key = "year" if test_df.time_col == "year" else "time"
-    res_0 = test_df.filter(**{key: dates[0]})
-    res = test_df.filter(**{key: dates})
-    assert np.equal(res_0.data[res_0.time_col].values, dates[0]).all()
+    res_0 = test_df.filter(**{test_df.time_col: dates[0]})
+    res = test_df.filter(**{test_df.time_col: dates})
+    if test_df.time_col == "year":
+        assert res_0.data[res_0.time_col].values[0] == dates[0]
+    else:
+        assert res_0.data[res_0.time_col].values[0] == np.datetime64(dates[0])
     assert res.equals(test_df)
 
 

diff --git a/tests/test_statistics.py b/tests/test_statistics.py
@@ -9,6 +9,7 @@ def stats_add(stats, plot_df):
     # test describing as pd.DataFrame
     primary = plot_df.filter(variable="Primary Energy", year=2005).timeseries()
     stats.add(data=primary, header="primary")
+
     # test describing as unamed pd.Series with `subheader` arg
     coal = plot_df.filter(variable="Primary Energy|Coal").timeseries()[2010]
     coal.name = None
@@ -48,7 +49,6 @@ def test_statistics(plot_df):
     idx = pd.MultiIndex(
         levels=[["category", "scen"], ["b", "a", "test"]],
         codes=[[0, 0, 1], [0, 1, 2]],
-        names=["", ""],
     )
     cols = pd.MultiIndex(
         levels=[["count", "primary", "coal"], ["", 2005]],

diff --git a/tests/test_time.py b/tests/test_time.py
@@ -39,7 +39,7 @@ def get_subannual_df(date1, date2):
 @pytest.mark.parametrize(
     "time, domain, index",
     [
-        (TEST_YEARS, "year", pd.Int64Index([2005, 2010], name="time")),
+        (TEST_YEARS, "year", pd.Index([2005, 2010], name="time")),
         (TEST_DTS, "datetime", pd.DatetimeIndex(TEST_DTS, name="time")),
         (TEST_TIME_STR, "datetime", pd.DatetimeIndex(TEST_DTS, name="time")),
         (TEST_TIME_STR_HR, "datetime", pd.DatetimeIndex(TEST_TIME_STR_HR, name="time")),

diff --git a/tests/test_tutorials.py b/tests/test_tutorials.py
@@ -1,138 +1,46 @@
-import io
-import os
-import subprocess
-import sys
 import pytest
 
-from .conftest import here, IIASA_UNAVAILABLE
-
 try:
     import nbformat
-except:
+    from nbconvert.preprocessors import ExecutePreprocessor
+except ModuleNotFoundError:
     pytest.skip(
         "Missing Jupyter Notebook and related dependencies", allow_module_level=True
     )
 
-tut_path = os.path.join(here, "..", "docs", "tutorials")
-
-
-# taken from the excellent example here:
-# https://blog.thedataincubator.com/2016/06/testing-jupyter-notebooks/
-
-
-def _notebook_run(path, kernel=None, timeout=60, capsys=None):
-    """Execute a notebook via nbconvert and collect output.
-    :returns (parsed nb object, execution errors)
-    """
-    major_version = sys.version_info[0]
-    dirname, __ = os.path.split(path)
-    os.chdir(dirname)
-    fname = os.path.join(here, "test.ipynb")
-    args = [
-        "jupyter",
-        "nbconvert",
-        "--to",
-        "notebook",
-        "--execute",
-        "--ExecutePreprocessor.timeout={}".format(timeout),
-        "--output",
-        fname,
-        path,
-    ]
-    subprocess.check_call(args)
-
-    nb = nbformat.read(io.open(fname, encoding="utf-8"), nbformat.current_nbformat)
-
-    errors = [
-        output
-        for cell in nb.cells
-        if "outputs" in cell
-        for output in cell["outputs"]
-        if output.output_type == "error"
-    ]
-
-    # removing files fails on CI (GitHub Actions) on Windows & py3.8
-    try:
-        os.remove(fname)
-    except PermissionError:
-        pass
-
-    return nb, errors
-
-
-def test_pyam_first_steps(capsys):
-    fname = os.path.join(tut_path, "pyam_first_steps.ipynb")
-    nb, errors = _notebook_run(fname, capsys=capsys)
-    assert errors == []
-    assert os.path.exists(os.path.join(tut_path, "tutorial_export.xlsx"))
-
-    def has_log_output(cell):
-        return cell["cell_type"] == "code" and any(
-            "Running in a notebook" in output.get("text", "")
-            for output in cell["outputs"]
-        )
-
-    assert any(has_log_output(cell) for cell in nb["cells"])
-
-
-def test_data_table_formats():
-    fname = os.path.join(tut_path, "data_table_formats.ipynb")
-    nb, errors = _notebook_run(fname)
-    assert errors == []
-
-
-def test_unit_conversion():
-    fname = os.path.join(tut_path, "unit_conversion.ipynb")
-    nb, errors = _notebook_run(fname)
-    assert errors == []
-
-
-def test_aggregating_downscaling_consistency():
-    fname = os.path.join(tut_path, "aggregating_downscaling_consistency.ipynb")
-    nb, errors = _notebook_run(fname)
-    assert errors == []
-
-
-def test_subannual_time_resolution():
-    fname = os.path.join(tut_path, "subannual_time_resolution.ipynb")
-    nb, errors = _notebook_run(fname)
-    assert errors == []
-
-
-def test_pyam_logo():
-    fname = os.path.join(tut_path, "pyam_logo.ipynb")
-    nb, errors = _notebook_run(fname)
-    assert errors == []
+from .conftest import here, IIASA_UNAVAILABLE
 
+nb_path = here.parent / "docs" / "tutorials"
 
-def test_ipcc_colors():
-    fname = os.path.join(tut_path, "ipcc_colors.ipynb")
-    nb, errors = _notebook_run(fname)
-    assert errors == []
 
+def _run_notebook(file, timeout=30):
+    """Execute a notebook file"""
+    with open(nb_path / f"{file}.ipynb") as f:
+        nb = nbformat.read(f, as_version=4)
 
-def test_legends():
-    fname = os.path.join(tut_path, "legends.ipynb")
-    nb, errors = _notebook_run(fname)
-    assert errors == []
+    ep = ExecutePreprocessor(timeout=timeout)
+    ep.preprocess(nb, {"metadata": {"path": nb_path}})
 
 
-def test_ops():
-    fname = os.path.join(tut_path, "algebraic_operations.ipynb")
-    nb, errors = _notebook_run(fname)
-    assert errors == []
+@pytest.mark.parametrize(
+    "file",
+    [
+        "pyam_first_steps",
+        "data_table_formats",
+        "unit_conversion",
+        "aggregating_downscaling_consistency",
+        "subannual_time_resolution",
+        "pyam_logo",
+        "ipcc_colors",
+        "legends",
+        "algebraic_operations",
+        "aggregating_variables_and_plotting_with_negative_values",
+    ],
+)
+def test_tutorial_notebook(file):
+    _run_notebook(file)
 
 
 @pytest.mark.skipif(IIASA_UNAVAILABLE, reason="IIASA database API unavailable")
-def test_iiasa_dbs():
-    fname = os.path.join(tut_path, "iiasa_dbs.ipynb")
-    nb, errors = _notebook_run(fname, timeout=600)
-    assert errors == []
-
-
-def test_aggregating_variables_and_plotting_with_negative_values():
-    fname = os.path.join(
-        tut_path, "aggregating_variables_and_plotting_with_negative_values.ipynb"
-    )
-    nb, errors = _notebook_run(fname)
-    assert errors == []
+def test_tutorial_iiasa_dbs():
+    _run_notebook("iiasa_dbs")