diff --git a/calliope/backend/pyomo/interface.py b/calliope/backend/pyomo/interface.py index 40a62f599..19f755ea7 100644 --- a/calliope/backend/pyomo/interface.py +++ b/calliope/backend/pyomo/interface.py @@ -5,7 +5,7 @@ from calliope.backend import run as backend_run from calliope.backend.pyomo import model as run_pyomo -from calliope.core.util.dataset import reorganise_dataset_dimensions +from calliope.core.util.dataset import reorganise_xarray_dimensions from calliope.core.util.logging import log_time from calliope import exceptions from calliope.core.attrdict import AttrDict @@ -24,7 +24,7 @@ def access_pyomo_model_inputs(backend_model): if isinstance(i, po.base.param.IndexedParam) } - return reorganise_dataset_dimensions(xr.Dataset(all_params)) + return reorganise_xarray_dimensions(xr.Dataset(all_params)) def update_pyomo_param(backend_model, param, index, value): diff --git a/calliope/backend/pyomo/model.py b/calliope/backend/pyomo/model.py index 977a26bd7..8359ea65f 100644 --- a/calliope/backend/pyomo/model.py +++ b/calliope/backend/pyomo/model.py @@ -23,7 +23,7 @@ from calliope.backend.pyomo.util import get_var from calliope.core.util.tools import load_function from calliope.core.util.logging import LogWriter, logger -from calliope.core.util.dataset import reorganise_dataset_dimensions +from calliope.core.util.dataset import reorganise_xarray_dimensions from calliope import exceptions from calliope.core.attrdict import AttrDict @@ -228,10 +228,10 @@ def get_result_array(backend_model, model_data): i.name not in model_data.data_vars.keys() } - results = reorganise_dataset_dimensions(xr.Dataset(all_variables)) + results = reorganise_xarray_dimensions(xr.Dataset(all_variables)) if all_params: - additional_inputs = reorganise_dataset_dimensions(xr.Dataset(all_params)) + additional_inputs = reorganise_xarray_dimensions(xr.Dataset(all_params)) for var in additional_inputs.data_vars: additional_inputs[var].attrs['is_result'] = 0 model_data.update(additional_inputs) diff --git a/calliope/core/io.py b/calliope/core/io.py index d94880def..460990b96 100644 --- a/calliope/core/io.py +++ b/calliope/core/io.py @@ -95,7 +95,7 @@ def save_csv(model_data, path, dropna=True): for var in data_vars: in_out = 'results' if model_data[var].attrs['is_result'] else 'inputs' out_path = os.path.join(path, '{}_{}.csv'.format(in_out, var)) - series = split_loc_techs(model_data[var], as_='Series') + series = split_loc_techs(model_data[var], return_as='Series') if dropna: series = series.dropna() series.to_csv(out_path) diff --git a/calliope/core/model.py b/calliope/core/model.py index 0bd733553..c404be72c 100644 --- a/calliope/core/model.py +++ b/calliope/core/model.py @@ -242,7 +242,7 @@ def run(self, force_rerun=False, **kwargs): self.backend = interface(self) - def get_formatted_array(self, var): + def get_formatted_array(self, var, index_format='index'): """ Return an xr.DataArray with locs, techs, and carriers as separate dimensions. @@ -251,12 +251,23 @@ def get_formatted_array(self, var): ---------- var : str Decision variable for which to return a DataArray. - + index_format : str, default = 'index' + 'index' to return the `loc_tech(_carrier)` dimensions as individual + indexes, 'multiindex' to return them as a MultiIndex. The latter + has the benefit of having a smaller memory footprint, but you cannot + undertake dimension specific operations (e.g. formatted_array.sum('locs')) """ if var not in self._model_data.data_vars: raise KeyError("Variable {} not in Model data".format(var)) - return split_loc_techs(self._model_data[var]) + if index_format not in ['index', 'multiindex']: + raise ValueError("Argument 'index_format' must be one of 'index' or 'multiindex'") + elif index_format == 'index': + return_as = 'DataArray' + elif index_format == 'multiindex': + return_as = 'MultiIndex DataArray' + + return split_loc_techs(self._model_data[var], return_as=return_as) def to_netcdf(self, path): """ diff --git a/calliope/core/preprocess/time.py b/calliope/core/preprocess/time.py index ddf244d64..3e3bdd49d 100644 --- a/calliope/core/preprocess/time.py +++ b/calliope/core/preprocess/time.py @@ -16,7 +16,7 @@ from calliope.core.attrdict import AttrDict from calliope.core.util.tools import plugin_load from calliope.core.preprocess import checks -from calliope.core.util.dataset import reorganise_dataset_dimensions +from calliope.core.util.dataset import reorganise_xarray_dimensions def apply_time_clustering(model_data, model_run): @@ -243,7 +243,7 @@ def final_timedimension_processing(model_data): model_data, final_check_comments, warns, errors = checks.check_model_data(model_data) exceptions.print_warnings_and_raise_errors(warnings=warns, errors=errors) - model_data = reorganise_dataset_dimensions(model_data) + model_data = reorganise_xarray_dimensions(model_data) model_data = add_max_demand_timesteps(model_data) return model_data diff --git a/calliope/core/util/dataset.py b/calliope/core/util/dataset.py index 72c59cc53..c079c6937 100644 --- a/calliope/core/util/dataset.py +++ b/calliope/core/util/dataset.py @@ -5,7 +5,6 @@ """ from calliope import exceptions - import xarray as xr import pandas as pd @@ -54,7 +53,32 @@ def get_loc_techs(loc_techs, tech=None, loc=None): return relevant_loc_techs -def split_loc_techs(data_var, as_='DataArray'): +def reorganise_xarray_dimensions(data): + """ + Reorganise Dataset or DataArray dimensions to be alphabetical *except* + `timesteps`, which must always come last in any DataArray's dimensions + """ + + if not (isinstance(data, xr.Dataset) or isinstance(data, xr.DataArray)): + raise TypeError('Must provide either xarray Dataset or DataArray to be reorganised') + + steps = [i for i in ['datesteps', 'timesteps'] if i in data.dims] + + if isinstance(data, xr.Dataset): + new_dims = ( + sorted(list(set(data.dims.keys()) - set(steps))) + ) + steps + elif isinstance(data, xr.DataArray): + new_dims = ( + sorted(list(set(data.dims) - set(steps))) + ) + steps + + updated_data = data.transpose(*new_dims).reindex({k: data[k] for k in new_dims}) + + return updated_data + + +def split_loc_techs(data_var, return_as='DataArray'): """ Get a DataArray with locations technologies, and possibly carriers split into separate coordinates. @@ -63,9 +87,10 @@ def split_loc_techs(data_var, as_='DataArray'): ---------- data_var : xarray DataArray Variable from Calliope model_data, to split loc_techs dimension - as_ : string - 'DataArray' to return xarray DataArray or 'Series' to return pandas - Series with dimensions as a MultiIndex + return_as : string + 'DataArray' to return xarray DataArray, 'MultiIndex DataArray' to return + xarray DataArray with loc_techs as a MultiIndex, + or 'Series' to return pandas Series with dimensions as a MultiIndex Returns ------- @@ -76,68 +101,46 @@ def split_loc_techs(data_var, as_='DataArray'): loc_tech_dim = [i for i in data_var.dims if 'loc_tech' in i] if not loc_tech_dim: loc_tech_dim = [i for i in data_var.dims if 'loc_carrier' in i] - non_loc_tech_dims = list(set(data_var.dims).difference(loc_tech_dim)) if not loc_tech_dim: - if as_ == 'Series': + if return_as == 'Series': return data_var.to_series() - elif as_ == 'DataArray': + elif return_as in ['DataArray', 'MultiIndex DataArray']: return data_var else: - raise ValueError('`as_` must be `DataArray` or `Series`, ' - 'but `{}` given'.format(as_)) + raise ValueError('`return_as` must be `DataArray`, `Series`, or ' + '`MultiIndex DataArray`, but `{}` given'.format(return_as)) elif len(loc_tech_dim) > 1: e = exceptions.ModelError - raise e("Cannot split loc_techs or loc_techs_carrier dimension " + raise e("Cannot split loc_techs or loc_tech_carriers dimension " "for DataArray {}".format(data_var.name)) loc_tech_dim = loc_tech_dim[0] # xr.Datarray -> pd.Series allows for string operations - data_var_df = data_var.to_series().unstack(non_loc_tech_dims) - index_list = data_var_df.index.str.split('::').tolist() + data_var_idx = data_var[loc_tech_dim].to_index() + index_list = data_var_idx.str.split('::').tolist() # carrier_prod, carrier_con, and carrier_export will return an index_list # of size 3, all others will be an index list of size 2 possible_names = ['loc', 'tech', 'carrier'] names = [i + 's' for i in possible_names if i in loc_tech_dim] - data_var_df.index = pd.MultiIndex.from_tuples(index_list, names=names) + data_var_midx = pd.MultiIndex.from_tuples(index_list, names=names) - # If there were no other dimensions other than loc_techs(_carriers) then - # nothing was unstacked on creating data_var_df, so nothing is stacked now - if isinstance(data_var_df, pd.Series): - data_var_series = data_var_df - else: - data_var_series = data_var_df.stack(non_loc_tech_dims) - - if as_ == "Series": - return data_var_series - - elif as_ == "DataArray": - updated_data_var = xr.DataArray.from_series(data_var_series) - updated_data_var.attrs = data_var.attrs - updated_data_var.name = data_var.name + # Replace the Datarray loc_tech_dim with this new MultiIndex + updated_data_var = data_var.copy() + updated_data_var.coords[loc_tech_dim] = data_var_midx + if return_as == 'MultiIndex DataArray': return updated_data_var - else: - raise ValueError('`as_` must be `DataArray` or `Series`, ' - 'but `{}` given'.format(as_)) + elif return_as == "Series": + return reorganise_xarray_dimensions(updated_data_var.unstack()).to_series() + elif return_as == "DataArray": + return reorganise_xarray_dimensions(updated_data_var.unstack()) -def reorganise_dataset_dimensions(dataset): - """ - Reorganise the Dataset dimensions to be alphabetical *except* - `timesteps`, which must always come last in any DataArray's dimensions - """ - steps = ['datesteps', 'timesteps'] if 'datesteps' in dataset.dims else ['timesteps'] - - new_dims = ( - sorted(list(set(dataset.dims.keys()) - set(steps))) - ) + steps - - updated_dataset = dataset.transpose(*new_dims).reindex( - {k:dataset[k] for k in new_dims}) - - return updated_dataset + else: + raise ValueError('`return_as` must be `DataArray`, `Series`, or ' + '`MultiIndex DataArray`, but `{}` given'.format(return_as)) diff --git a/calliope/test/test_constraint_results.py b/calliope/test/test_constraint_results.py index a998bfc92..5064ed7ad 100644 --- a/calliope/test/test_constraint_results.py +++ b/calliope/test/test_constraint_results.py @@ -34,8 +34,8 @@ def test_group_cap_max(self): model.run() cap_share = ( - model.get_formatted_array('energy_cap').to_pandas().loc[:, ['cold_fusion', 'csp']].sum().sum() / - model.get_formatted_array('energy_cap').to_pandas().loc[:, ['ccgt', 'cold_fusion', 'csp']].sum().sum() + model.get_formatted_array('energy_cap').loc[{'techs': ['cold_fusion', 'csp']}].sum() / + model.get_formatted_array('energy_cap').loc[{'techs': ['ccgt', 'cold_fusion', 'csp']}].sum() ) assert cap_share == approx(0.2) @@ -184,13 +184,10 @@ class TestGroupConstraints: def test_no_group_constraint(self): model = build_model(model_file="group_constraints.yaml") model.run() - expensive_generation = (model.get_formatted_array("carrier_prod") - .to_dataframe() - .reset_index() - .groupby("techs") - .carrier_prod - .sum() - .loc["expensive_supply"]) + expensive_generation = ( + model.get_formatted_array("carrier_prod") + .loc[{'techs': 'expensive_supply'}].sum().item() + ) assert expensive_generation == 0 def test_switched_off_group_constraint(self): @@ -199,13 +196,10 @@ def test_switched_off_group_constraint(self): scenario="switching_off_group_constraint" ) model.run() - expensive_generation = (model.get_formatted_array("carrier_prod") - .to_dataframe() - .reset_index() - .groupby("techs") - .carrier_prod - .sum() - .loc["expensive_supply"]) + expensive_generation = ( + model.get_formatted_array("carrier_prod") + .loc[{'techs': 'expensive_supply'}].sum().item() + ) assert expensive_generation == 0 @pytest.mark.xfail(reason="Check not yet implemented.") @@ -285,12 +279,10 @@ def test_location_specific_demand_share_max_constraint(self): scenario='demand_share_max_location_0' ) model.run() - generation = (model.get_formatted_array("carrier_prod") - .sum(dim='timesteps') - .to_dataframe()["carrier_prod"]) + generation = model.get_formatted_array("carrier_prod").sum(dim='timesteps').loc[{'carriers': "electricity"}] demand0 = -model.get_formatted_array("carrier_con").loc[{'locs': '0'}].sum().item() - cheap_generation0 = generation.loc[("0", "cheap_elec_supply", "electricity")] - expensive_generation1 = generation.loc[("1", "expensive_elec_supply", "electricity")] + cheap_generation0 = generation.loc[{'locs': "0", 'techs': "cheap_elec_supply"}].item() + expensive_generation1 = generation.loc[{'locs': "1", 'techs': "expensive_elec_supply"}].item() assert round(cheap_generation0 / demand0, 5) <= 0.3 assert expensive_generation1 == 0 @@ -300,12 +292,10 @@ def test_location_specific_demand_share_min_constraint(self): scenario='demand_share_min_location_0' ) model.run() - generation = (model.get_formatted_array("carrier_prod") - .sum(dim='timesteps') - .to_dataframe()["carrier_prod"]) + generation = model.get_formatted_array("carrier_prod").sum(dim='timesteps').loc[{'carriers': "electricity"}] demand0 = -model.get_formatted_array("carrier_con").loc[{'locs': '0'}].sum().item() - expensive_generation0 = generation.loc[("0", "expensive_elec_supply", "electricity")] - expensive_generation1 = generation.loc[("1", "expensive_elec_supply", "electricity")] + expensive_generation0 = generation.loc[{'locs': "0", 'techs': "expensive_elec_supply"}].item() + expensive_generation1 = generation.loc[{'locs': "1", 'techs': "expensive_elec_supply"}].item() assert round(expensive_generation0 / demand0, 5) >= 0.6 assert expensive_generation1 == 0 @@ -413,12 +403,7 @@ def test_no_energy_cap_share_constraint(self): model = build_model(model_file='resource_area.yaml') model.run() cheap_resource_area = (model.get_formatted_array("resource_area") - .to_dataframe() - .reset_index() - .groupby("techs") - .resource_area - .sum() - .loc["cheap_supply"]) + .loc[{'techs': "cheap_supply"}].sum()).item() assert cheap_resource_area == 40 def test_systemwide_resource_area_max_constraint(self): @@ -428,12 +413,7 @@ def test_systemwide_resource_area_max_constraint(self): ) model.run() cheap_resource_area = (model.get_formatted_array("resource_area") - .to_dataframe() - .reset_index() - .groupby("techs") - .resource_area - .sum() - .loc["cheap_supply"]) + .loc[{'techs': "cheap_supply"}].sum()).item() assert cheap_resource_area == 20 def test_systemwide_resource_area_min_constraint(self): @@ -442,14 +422,9 @@ def test_systemwide_resource_area_min_constraint(self): scenario='resource_area_min_systemwide' ) model.run() - resource_area = (model.get_formatted_array("resource_area") - .to_dataframe() - .reset_index() - .groupby("techs") - .resource_area - .sum()) - assert resource_area["cheap_supply"] == 0 - assert resource_area["expensive_supply"] == 20 + resource_area = model.get_formatted_array("resource_area") + assert resource_area.loc[{'techs': "cheap_supply"}].sum().item() == 0 + assert resource_area.loc[{'techs': "expensive_supply"}].sum().item() == 20 def test_location_specific_resource_area_max_constraint(self): model = build_model( @@ -457,10 +432,9 @@ def test_location_specific_resource_area_max_constraint(self): scenario='resource_area_max_location_0' ) model.run() - resource_area = (model.get_formatted_array("resource_area") - .to_dataframe()["resource_area"]) - cheap_resource_area0 = resource_area.loc[("0", "cheap_supply")] - cheap_resource_area1 = resource_area.loc[("1", "cheap_supply")] + resource_area = model.get_formatted_array("resource_area") + cheap_resource_area0 = resource_area.loc[{'locs': "0", 'techs': 'cheap_supply'}].item() + cheap_resource_area1 = resource_area.loc[{'locs': "1", 'techs': 'cheap_supply'}].item() assert cheap_resource_area0 == 10 assert cheap_resource_area1 == 20 @@ -470,10 +444,9 @@ def test_location_specific_resource_area_min_constraint(self): scenario='resource_area_min_location_0' ) model.run() - resource_area = (model.get_formatted_array("resource_area") - .to_dataframe()["resource_area"]) - expensive_resource_area0 = resource_area.loc[("0", "expensive_supply")] - expensive_resource_area1 = resource_area.loc[("1", "expensive_supply")] + resource_area = model.get_formatted_array("resource_area") + expensive_resource_area0 = resource_area.loc[{'locs': "0", 'techs': "expensive_supply"}].item() + expensive_resource_area1 = resource_area.loc[{'locs': "1", 'techs': "expensive_supply"}].item() assert expensive_resource_area0 == 10 assert expensive_resource_area1 == 0 @@ -630,12 +603,7 @@ def test_no_supply_share_constraint(self): model = build_model(model_file='supply_share.yaml') model.run() expensive_generation = (model.get_formatted_array("carrier_prod") - .to_dataframe() - .reset_index() - .groupby("techs") - .carrier_prod - .sum() - .loc["expensive_supply"]) + .loc[{'techs': "expensive_supply"}].sum()).item() assert expensive_generation == 0 def test_systemwide_supply_share_max_constraint(self): @@ -677,11 +645,10 @@ def test_location_specific_supply_share_max_constraint(self): ) model.run() generation = (model.get_formatted_array("carrier_prod") - .sum(dim='timesteps') - .to_dataframe()["carrier_prod"]) - cheap_generation0 = generation.loc[("0", "cheap_supply", "electricity")] - expensive_generation0 = generation.loc[("0", "expensive_supply", "electricity")] - expensive_generation1 = generation.loc[("1", "expensive_supply", "electricity")] + .sum(dim='timesteps').loc[{'carriers': 'electricity'}]) + cheap_generation0 = generation.loc[{'locs': "0", 'techs': "cheap_supply"}].item() + expensive_generation0 = generation.loc[{'locs': "0", 'techs': "expensive_supply"}].item() + expensive_generation1 = generation.loc[{'locs': "1", 'techs': "expensive_supply"}].item() assert round(cheap_generation0 / (cheap_generation0 + expensive_generation0), 5) <= 0.4 assert expensive_generation1 == 0 @@ -692,11 +659,10 @@ def test_location_specific_supply_share_min_constraint(self): ) model.run() generation = (model.get_formatted_array("carrier_prod") - .sum(dim='timesteps') - .to_dataframe()["carrier_prod"]) - cheap_generation0 = generation.loc[("0", "cheap_supply", "electricity")] - expensive_generation0 = generation.loc[("0", "expensive_supply", "electricity")] - expensive_generation1 = generation.loc[("1", "expensive_supply", "electricity")] + .sum(dim='timesteps').loc[{'carriers': 'electricity'}]) + cheap_generation0 = generation.loc[{'locs': "0", 'techs': "cheap_supply"}].item() + expensive_generation0 = generation.loc[{'locs': "0", 'techs': "expensive_supply"}].item() + expensive_generation1 = generation.loc[{'locs': "1", 'techs': "expensive_supply"}].item() assert round(expensive_generation0 / (cheap_generation0 + expensive_generation0), 5) >= 0.6 assert expensive_generation1 == 0 @@ -707,12 +673,7 @@ def test_no_energy_cap_share_constraint(self): model = build_model(model_file='energy_cap_share.yaml') model.run() expensive_capacity = (model.get_formatted_array("energy_cap") - .to_dataframe() - .reset_index() - .groupby("techs") - .energy_cap - .sum() - .loc["expensive_supply"]) + .loc[{'techs': "expensive_supply"}].sum()) assert expensive_capacity == 0 def test_systemwide_energy_cap_share_max_constraint(self): @@ -755,11 +716,10 @@ def test_location_specific_energy_cap_share_max_constraint(self): scenario='energy_cap_share_max_location_0' ) model.run() - capacity = (model.get_formatted_array("energy_cap") - .to_dataframe()["energy_cap"]) - cheap_capacity0 = capacity.loc[("0", "cheap_supply")] - expensive_capacity0 = capacity.loc[("0", "expensive_supply")] - expensive_capacity1 = capacity.loc[("1", "expensive_supply")] + capacity = model.get_formatted_array("energy_cap") + cheap_capacity0 = capacity.loc[{'locs': "0", 'techs': "cheap_supply"}].item() + expensive_capacity0 = capacity.loc[{'locs': "0", 'techs': "expensive_supply"}].item() + expensive_capacity1 = capacity.loc[{'locs': "1", 'techs': "expensive_supply"}].item() assert cheap_capacity0 / (cheap_capacity0 + expensive_capacity0) <= 0.4 assert expensive_capacity1 == 0 @@ -769,11 +729,10 @@ def test_location_specific_energy_cap_share_min_constraint(self): scenario='energy_cap_share_min_location_0' ) model.run() - capacity = (model.get_formatted_array("energy_cap") - .to_dataframe()["energy_cap"]) - cheap_capacity0 = capacity.loc[("0", "cheap_supply")] - expensive_capacity0 = capacity.loc[("0", "expensive_supply")] - expensive_capacity1 = capacity.loc[("1", "expensive_supply")] + capacity = model.get_formatted_array("energy_cap") + cheap_capacity0 = capacity.loc[{'locs': "0", 'techs': "cheap_supply"}].item() + expensive_capacity0 = capacity.loc[{'locs': "0", 'techs': "expensive_supply"}].item() + expensive_capacity1 = capacity.loc[{'locs': "1", 'techs': "expensive_supply"}].item() assert expensive_capacity0 / (cheap_capacity0 + expensive_capacity0) >= 0.6 assert expensive_capacity1 == 0 @@ -784,12 +743,7 @@ def test_no_energy_cap_constraint(self): model = build_model(model_file='energy_cap.yaml') model.run() expensive_capacity = (model.get_formatted_array("energy_cap") - .to_dataframe() - .reset_index() - .groupby("techs") - .energy_cap - .sum() - .loc["expensive_supply"]) + .loc[{'techs': "expensive_supply"}].sum()).item() assert expensive_capacity == 0 def test_systemwide_energy_cap_max_constraint(self): @@ -799,12 +753,7 @@ def test_systemwide_energy_cap_max_constraint(self): ) model.run() cheap_capacity = (model.get_formatted_array("energy_cap") - .to_dataframe() - .reset_index() - .groupby("techs") - .energy_cap - .sum() - .loc["cheap_supply"]) + .loc[{'techs': "cheap_supply"}].sum()).item() assert round(cheap_capacity, 5) <= 14 def test_systemwide_energy_cap_min_constraint(self): @@ -814,12 +763,7 @@ def test_systemwide_energy_cap_min_constraint(self): ) model.run() expensive_capacity = (model.get_formatted_array("energy_cap") - .to_dataframe() - .reset_index() - .groupby("techs") - .energy_cap - .sum() - .loc["expensive_supply"]) + .loc[{'techs': "expensive_supply"}].sum()).item() assert round(expensive_capacity, 5) >= 6 def test_location_specific_energy_cap_max_constraint(self): @@ -828,10 +772,9 @@ def test_location_specific_energy_cap_max_constraint(self): scenario='energy_cap_max_location_0' ) model.run() - capacity = (model.get_formatted_array("energy_cap") - .to_dataframe()["energy_cap"]) - cheap_capacity0 = capacity.loc[("0", "cheap_supply")] - expensive_capacity1 = capacity.loc[("1", "expensive_supply")] + capacity = model.get_formatted_array("energy_cap") + cheap_capacity0 = capacity.loc[{'locs': "0", 'techs': "cheap_supply"}].item() + expensive_capacity1 = capacity.loc[{'locs': "1", 'techs': "expensive_supply"}].item() assert round(cheap_capacity0, 5) <= 4 assert expensive_capacity1 == 0 @@ -841,10 +784,9 @@ def test_location_specific_energy_cap_min_constraint(self): scenario='energy_cap_min_location_0' ) model.run() - capacity = (model.get_formatted_array("energy_cap") - .to_dataframe()["energy_cap"]) - expensive_capacity0 = capacity.loc[("0", "expensive_supply")] - expensive_capacity1 = capacity.loc[("1", "expensive_supply")] + capacity = model.get_formatted_array("energy_cap") + expensive_capacity0 = capacity.loc[{'locs': "0", 'techs': "expensive_supply"}].item() + expensive_capacity1 = capacity.loc[{'locs': "1", 'techs': "expensive_supply"}].item() assert round(expensive_capacity0, 5) >= 6 assert expensive_capacity1 == 0 diff --git a/calliope/test/test_core_model.py b/calliope/test/test_core_model.py index de602635c..6f5ccb988 100644 --- a/calliope/test/test_core_model.py +++ b/calliope/test/test_core_model.py @@ -2,9 +2,10 @@ import pytest import tempfile +import pandas as pd import calliope - +from calliope.test.common.util import check_error_or_warning class TestModel: @pytest.fixture(scope="module") @@ -34,3 +35,30 @@ def test_info_minimal_model(self): model = calliope.Model(model_location) model.info() + + def test_get_formatted_array_index(self, national_scale_example): + array = national_scale_example.get_formatted_array('resource', index_format='index') + + assert array.dims == ('locs', 'techs', 'timesteps') + + def test_get_formatted_array_multiindex(self, national_scale_example): + array = national_scale_example.get_formatted_array('resource', index_format='multiindex') + + assert array.dims == ('loc_techs_finite_resource', 'timesteps') + assert isinstance(array.loc_techs_finite_resource.to_index(), pd.MultiIndex) + + def test_get_formatted_array_unknown_format(self, national_scale_example): + with pytest.raises(ValueError) as excinfo: + national_scale_example.get_formatted_array('resource', index_format='foo') + + assert check_error_or_warning( + excinfo, "Argument 'index_format' must be one of 'index' or 'multiindex'" + ) + + def test_get_formatted_array_unknown_var(self, national_scale_example): + with pytest.raises(KeyError) as excinfo: + national_scale_example.get_formatted_array('foo') + + assert check_error_or_warning( + excinfo, "Variable foo not in Model data" + ) diff --git a/calliope/test/test_core_util.py b/calliope/test/test_core_util.py index 18377201c..e6b6a1c97 100644 --- a/calliope/test/test_core_util.py +++ b/calliope/test/test_core_util.py @@ -6,6 +6,7 @@ import tempfile import xarray as xr +import pandas as pd from calliope.core.util import dataset, observed_dict @@ -13,11 +14,13 @@ memoize, \ memoize_instancemethod +from calliope import exceptions from calliope.core.util.logging import log_time from calliope.core.util.generate_runs import generate_runs from calliope.test.common.util import ( python36_or_higher, - check_error_or_warning + check_error_or_warning, + build_test_model ) _MODEL_NATIONAL = os.path.join( @@ -40,6 +43,22 @@ def loc_techs(self): ] return loc_techs + @pytest.fixture() + def example_dataarray(self): + return xr.DataArray( + [[[0], [1], [2]], [[3], [4], [5]]], dims=('timesteps', 'loc_techs_bar', 'costs'), + coords={'timesteps': ['foo', 'bar'], 'loc_techs_bar': ['1::foo', '2::bar', '3::baz'], + 'costs': ['foo']} + ) + + @pytest.fixture() + def example_one_dim_dataarray(self): + return xr.DataArray([0, 1, 2], dims=('timesteps'), coords={'timesteps': ['foo', 'bar', 'baz']}) + + @pytest.fixture() + def example_dataset(self, example_dataarray): + return xr.Dataset({'foo': example_dataarray, 'bar': example_dataarray.squeeze()}) + def test_get_loc_techs_tech(self, loc_techs): loc_techs = dataset.get_loc_techs(loc_techs, tech='csp') assert loc_techs == [ @@ -57,6 +76,70 @@ def test_get_loc_techs_loc_and_tech(self, loc_techs): loc_techs, tech='demand_power', loc='region1') assert loc_techs == ['region1::demand_power'] + def test_split_loc_tech_to_dataarray(self, example_dataarray): + formatted_array = dataset.split_loc_techs(example_dataarray) + assert isinstance(formatted_array, xr.DataArray) + assert formatted_array.dims == ('costs', 'locs', 'techs', 'timesteps') + + def test_split_loc_tech_to_series(self, example_dataarray): + formatted_series = dataset.split_loc_techs(example_dataarray, return_as='Series') + assert isinstance(formatted_series, pd.Series) + assert formatted_series.index.names == ['costs', 'locs', 'techs', 'timesteps'] + + def test_split_loc_tech_to_multiindex_dataarray(self, example_dataarray): + formatted_array = dataset.split_loc_techs(example_dataarray, return_as='MultiIndex DataArray') + assert isinstance(formatted_array, xr.DataArray) + assert formatted_array.dims == ('timesteps', 'loc_techs_bar', 'costs') + assert isinstance(formatted_array.loc_techs_bar.to_index(), pd.MultiIndex) + + def test_split_loc_tech_too_many_loc_tech_dims(self, example_dataarray): + _array = example_dataarray.rename({'costs': 'loc_techs_2'}) + with pytest.raises(exceptions.ModelError) as excinfo: + dataset.split_loc_techs(_array) + assert check_error_or_warning( + excinfo, 'Cannot split loc_techs or loc_tech_carriers dimension' + ) + + def test_split_loc_tech_one_dim_to_dataarray(self, example_one_dim_dataarray): + formatted_array = dataset.split_loc_techs(example_one_dim_dataarray) + assert isinstance(formatted_array, xr.DataArray) + assert formatted_array.dims == ('timesteps',) + + def test_split_loc_tech_one_dim_to_series(self, example_one_dim_dataarray): + formatted_series = dataset.split_loc_techs(example_one_dim_dataarray, return_as='Series') + assert isinstance(formatted_series, pd.Series) + assert formatted_series.index.names == ['timesteps'] + + def test_split_loc_tech_one_dim_to_multiindex_dataarray(self, example_one_dim_dataarray): + formatted_array = dataset.split_loc_techs(example_one_dim_dataarray, return_as='MultiIndex DataArray') + assert isinstance(formatted_array, xr.DataArray) + assert formatted_array.dims == ('timesteps',) + + def test_split_loc_tech_unknown_output(self, example_dataarray, example_one_dim_dataarray): + for array in [example_dataarray, example_one_dim_dataarray]: + with pytest.raises(ValueError) as excinfo: + dataset.split_loc_techs(array, return_as='foo') + assert check_error_or_warning( + excinfo, + '`return_as` must be `DataArray`, `Series`, or `MultiIndex DataArray`' + ) + + def test_reorganise_dataset_dimensions(self, example_dataset): + reorganised_dataset = dataset.reorganise_xarray_dimensions(example_dataset) + dataset_dims = [i for i in reorganised_dataset.dims.keys()] + assert dataset_dims == ['costs', 'loc_techs_bar', 'timesteps'] + + def test_reorganise_dataarray_dimensions(self, example_dataarray): + reorganised_dataset = dataset.reorganise_xarray_dimensions(example_dataarray) + assert reorganised_dataset.dims == ('costs', 'loc_techs_bar', 'timesteps') + + def test_fail_reorganise_dimensions(self): + with pytest.raises(TypeError) as excinfo: + dataset.reorganise_xarray_dimensions(['timesteps', 'loc_techs_bar', 'costs']) + assert check_error_or_warning( + excinfo, 'Must provide either xarray Dataset or DataArray to be reorganised' + ) + class TestMemoization: @memoize_instancemethod diff --git a/calliope/test/test_example_models.py b/calliope/test/test_example_models.py index 6f9ff21a9..822c82687 100644 --- a/calliope/test/test_example_models.py +++ b/calliope/test/test_example_models.py @@ -55,10 +55,10 @@ def example_tester(self, solver='cbc', solver_io=None): assert float(model.results.cost.sum()) == approx(38988.7442) assert float( - model.results.systemwide_levelised_cost.loc[dict(carriers='power')].to_pandas().T['battery'] + model.results.systemwide_levelised_cost.loc[{'carriers': 'power', 'techs': 'battery'}].item() ) == approx(0.063543, abs=0.000001) assert float( - model.results.systemwide_capacity_factor.loc[dict(carriers='power')].to_pandas().T['battery'] + model.results.systemwide_capacity_factor.loc[{'carriers': 'power', 'techs': 'battery'}].item() ) == approx(0.2642256, abs=0.000001) def test_nationalscale_example_results_cbc(self): @@ -173,10 +173,10 @@ def example_tester(self, solver='cbc', solver_io=None): assert float(model.results.cost.sum()) == approx(37344.221869) assert float( - model.results.systemwide_levelised_cost.loc[dict(carriers='power')].to_pandas().T['battery'] + model.results.systemwide_levelised_cost.loc[{'carriers': 'power', 'techs': 'battery'}].item() ) == approx(0.063543, abs=0.000001) assert float( - model.results.systemwide_capacity_factor.loc[dict(carriers='power')].to_pandas().T['battery'] + model.results.systemwide_capacity_factor.loc[{'carriers': 'power', 'techs': 'battery'}].item() ) == approx(0.25, abs=0.000001) def test_nationalscale_resampled_example_results_cbc(self): @@ -221,12 +221,12 @@ def example_tester_closest(self, solver='cbc', solver_io=None): # Full 1-hourly model run: 0.296973 assert float( - model.results.systemwide_levelised_cost.loc[dict(carriers='power')].to_pandas().T['battery'] + model.results.systemwide_levelised_cost.loc[{'carriers': 'power', 'techs': 'battery'}].item() ) == approx(0.111456, abs=0.000001) # Full 1-hourly model run: 0.064362 assert float( - model.results.systemwide_capacity_factor.loc[dict(carriers='power')].to_pandas().T['battery'] + model.results.systemwide_capacity_factor.loc[{'carriers': 'power', 'techs': 'battery'}].item() ) == approx(0.074809, abs=0.000001) def example_tester_mean(self, solver='cbc', solver_io=None): @@ -236,7 +236,7 @@ def example_tester_mean(self, solver='cbc', solver_io=None): # Full 1-hourly model run: 0.296973 assert float( - model.results.systemwide_levelised_cost.loc[dict(carriers='power')].to_pandas().T['battery'] + model.results.systemwide_levelised_cost.loc[{'carriers': 'power', 'techs': 'battery'}].item() ) == approx(0.126099, abs=0.000001) # Full 1-hourly model run: 0.064362 @@ -252,12 +252,12 @@ def example_tester_storage_inter_cluster(self): # Full 1-hourly model run: 0.296973 assert float( - model.results.systemwide_levelised_cost.loc[dict(carriers='power')].to_pandas().T['battery'] + model.results.systemwide_levelised_cost.loc[{'carriers': 'power', 'techs': 'battery'}].item() ) == approx(0.115866, abs=0.000001) # Full 1-hourly model run: 0.064362 assert float( - model.results.systemwide_capacity_factor.loc[dict(carriers='power')].to_pandas().T['battery'] + model.results.systemwide_capacity_factor.loc[{'carriers': 'power', 'techs': 'battery'}].item() ) == approx(0.074167, abs=0.000001) def test_nationalscale_clustered_example_closest_results_cbc(self): @@ -288,12 +288,12 @@ def test_storage_inter_cluster_cyclic(self): # Full 1-hourly model run: 0.296973 assert float( - model.results.systemwide_levelised_cost.loc[dict(carriers='power')].to_pandas().T['battery'] + model.results.systemwide_levelised_cost.loc[{'carriers': 'power', 'techs': 'battery'}].item() ) == approx(0.133111, abs=0.000001) # Full 1-hourly model run: 0.064362 assert float( - model.results.systemwide_capacity_factor.loc[dict(carriers='power')].to_pandas().T['battery'] + model.results.systemwide_capacity_factor.loc[{'carriers': 'power', 'techs': 'battery'}].item() ) == approx(0.071411, abs=0.000001) def test_storage_inter_cluster_no_storage(self): diff --git a/changelog.rst b/changelog.rst index 79e95612d..b2064ac2e 100644 --- a/changelog.rst +++ b/changelog.rst @@ -26,6 +26,8 @@ Release History |new| The ratio of energy capacity and storage capacity can be constrained with a new `energy_cap_per_storage_cap_min` constraint. +|changed| `get_formatted_array` improved in both speed and memory consumption. + |changed| Error on defining a technology in both directions of the same link. |changed| Any inexistent locations and / or technologies defined in model-wide (group) constraints will be caught and filtered out, raising a warning of their existence in the process.