Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up get_formatted_array #231

Merged
merged 6 commits into from
May 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions calliope/backend/pyomo/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from calliope.backend import run as backend_run
from calliope.backend.pyomo import model as run_pyomo

from calliope.core.util.dataset import reorganise_dataset_dimensions
from calliope.core.util.dataset import reorganise_xarray_dimensions
from calliope.core.util.logging import log_time
from calliope import exceptions
from calliope.core.attrdict import AttrDict
Expand All @@ -24,7 +24,7 @@ def access_pyomo_model_inputs(backend_model):
if isinstance(i, po.base.param.IndexedParam)
}

return reorganise_dataset_dimensions(xr.Dataset(all_params))
return reorganise_xarray_dimensions(xr.Dataset(all_params))


def update_pyomo_param(backend_model, param, index, value):
Expand Down
6 changes: 3 additions & 3 deletions calliope/backend/pyomo/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from calliope.backend.pyomo.util import get_var
from calliope.core.util.tools import load_function
from calliope.core.util.logging import LogWriter, logger
from calliope.core.util.dataset import reorganise_dataset_dimensions
from calliope.core.util.dataset import reorganise_xarray_dimensions
from calliope import exceptions
from calliope.core.attrdict import AttrDict

Expand Down Expand Up @@ -228,10 +228,10 @@ def get_result_array(backend_model, model_data):
i.name not in model_data.data_vars.keys()
}

results = reorganise_dataset_dimensions(xr.Dataset(all_variables))
results = reorganise_xarray_dimensions(xr.Dataset(all_variables))

if all_params:
additional_inputs = reorganise_dataset_dimensions(xr.Dataset(all_params))
additional_inputs = reorganise_xarray_dimensions(xr.Dataset(all_params))
for var in additional_inputs.data_vars:
additional_inputs[var].attrs['is_result'] = 0
model_data.update(additional_inputs)
Expand Down
2 changes: 1 addition & 1 deletion calliope/core/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def save_csv(model_data, path, dropna=True):
for var in data_vars:
in_out = 'results' if model_data[var].attrs['is_result'] else 'inputs'
out_path = os.path.join(path, '{}_{}.csv'.format(in_out, var))
series = split_loc_techs(model_data[var], as_='Series')
series = split_loc_techs(model_data[var], return_as='Series')
if dropna:
series = series.dropna()
series.to_csv(out_path)
Expand Down
17 changes: 14 additions & 3 deletions calliope/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def run(self, force_rerun=False, **kwargs):

self.backend = interface(self)

def get_formatted_array(self, var):
def get_formatted_array(self, var, index_format='index'):
"""
Return an xr.DataArray with locs, techs, and carriers as
separate dimensions.
Expand All @@ -251,12 +251,23 @@ def get_formatted_array(self, var):
----------
var : str
Decision variable for which to return a DataArray.

index_format : str, default = 'index'
'index' to return the `loc_tech(_carrier)` dimensions as individual
indexes, 'multiindex' to return them as a MultiIndex. The latter
has the benefit of having a smaller memory footprint, but you cannot
undertake dimension specific operations (e.g. formatted_array.sum('locs'))
"""
if var not in self._model_data.data_vars:
raise KeyError("Variable {} not in Model data".format(var))

return split_loc_techs(self._model_data[var])
if index_format not in ['index', 'multiindex']:
raise ValueError("Argument 'index_format' must be one of 'index' or 'multiindex'")
elif index_format == 'index':
return_as = 'DataArray'
elif index_format == 'multiindex':
return_as = 'MultiIndex DataArray'

return split_loc_techs(self._model_data[var], return_as=return_as)

def to_netcdf(self, path):
"""
Expand Down
4 changes: 2 additions & 2 deletions calliope/core/preprocess/time.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from calliope.core.attrdict import AttrDict
from calliope.core.util.tools import plugin_load
from calliope.core.preprocess import checks
from calliope.core.util.dataset import reorganise_dataset_dimensions
from calliope.core.util.dataset import reorganise_xarray_dimensions


def apply_time_clustering(model_data, model_run):
Expand Down Expand Up @@ -243,7 +243,7 @@ def final_timedimension_processing(model_data):
model_data, final_check_comments, warns, errors = checks.check_model_data(model_data)
exceptions.print_warnings_and_raise_errors(warnings=warns, errors=errors)

model_data = reorganise_dataset_dimensions(model_data)
model_data = reorganise_xarray_dimensions(model_data)
model_data = add_max_demand_timesteps(model_data)

return model_data
95 changes: 49 additions & 46 deletions calliope/core/util/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"""

from calliope import exceptions

import xarray as xr
import pandas as pd

Expand Down Expand Up @@ -54,7 +53,32 @@ def get_loc_techs(loc_techs, tech=None, loc=None):
return relevant_loc_techs


def split_loc_techs(data_var, as_='DataArray'):
def reorganise_xarray_dimensions(data):
"""
Reorganise Dataset or DataArray dimensions to be alphabetical *except*
`timesteps`, which must always come last in any DataArray's dimensions
"""

if not (isinstance(data, xr.Dataset) or isinstance(data, xr.DataArray)):
raise TypeError('Must provide either xarray Dataset or DataArray to be reorganised')

steps = [i for i in ['datesteps', 'timesteps'] if i in data.dims]

if isinstance(data, xr.Dataset):
new_dims = (
sorted(list(set(data.dims.keys()) - set(steps)))
) + steps
elif isinstance(data, xr.DataArray):
new_dims = (
sorted(list(set(data.dims) - set(steps)))
) + steps

updated_data = data.transpose(*new_dims).reindex({k: data[k] for k in new_dims})

return updated_data


def split_loc_techs(data_var, return_as='DataArray'):
"""
Get a DataArray with locations technologies, and possibly carriers
split into separate coordinates.
Expand All @@ -63,9 +87,10 @@ def split_loc_techs(data_var, as_='DataArray'):
----------
data_var : xarray DataArray
Variable from Calliope model_data, to split loc_techs dimension
as_ : string
'DataArray' to return xarray DataArray or 'Series' to return pandas
Series with dimensions as a MultiIndex
return_as : string
'DataArray' to return xarray DataArray, 'MultiIndex DataArray' to return
xarray DataArray with loc_techs as a MultiIndex,
or 'Series' to return pandas Series with dimensions as a MultiIndex

Returns
-------
Expand All @@ -76,68 +101,46 @@ def split_loc_techs(data_var, as_='DataArray'):
loc_tech_dim = [i for i in data_var.dims if 'loc_tech' in i]
if not loc_tech_dim:
loc_tech_dim = [i for i in data_var.dims if 'loc_carrier' in i]
non_loc_tech_dims = list(set(data_var.dims).difference(loc_tech_dim))

if not loc_tech_dim:
if as_ == 'Series':
if return_as == 'Series':
return data_var.to_series()
elif as_ == 'DataArray':
elif return_as in ['DataArray', 'MultiIndex DataArray']:
return data_var
else:
raise ValueError('`as_` must be `DataArray` or `Series`, '
'but `{}` given'.format(as_))
raise ValueError('`return_as` must be `DataArray`, `Series`, or '
'`MultiIndex DataArray`, but `{}` given'.format(return_as))

elif len(loc_tech_dim) > 1:
e = exceptions.ModelError
raise e("Cannot split loc_techs or loc_techs_carrier dimension "
raise e("Cannot split loc_techs or loc_tech_carriers dimension "
"for DataArray {}".format(data_var.name))

loc_tech_dim = loc_tech_dim[0]
# xr.Datarray -> pd.Series allows for string operations
data_var_df = data_var.to_series().unstack(non_loc_tech_dims)
index_list = data_var_df.index.str.split('::').tolist()
data_var_idx = data_var[loc_tech_dim].to_index()
index_list = data_var_idx.str.split('::').tolist()

# carrier_prod, carrier_con, and carrier_export will return an index_list
# of size 3, all others will be an index list of size 2
possible_names = ['loc', 'tech', 'carrier']
names = [i + 's' for i in possible_names if i in loc_tech_dim]

data_var_df.index = pd.MultiIndex.from_tuples(index_list, names=names)
data_var_midx = pd.MultiIndex.from_tuples(index_list, names=names)

# If there were no other dimensions other than loc_techs(_carriers) then
# nothing was unstacked on creating data_var_df, so nothing is stacked now
if isinstance(data_var_df, pd.Series):
data_var_series = data_var_df
else:
data_var_series = data_var_df.stack(non_loc_tech_dims)

if as_ == "Series":
return data_var_series

elif as_ == "DataArray":
updated_data_var = xr.DataArray.from_series(data_var_series)
updated_data_var.attrs = data_var.attrs
updated_data_var.name = data_var.name
# Replace the Datarray loc_tech_dim with this new MultiIndex
updated_data_var = data_var.copy()
updated_data_var.coords[loc_tech_dim] = data_var_midx

if return_as == 'MultiIndex DataArray':
return updated_data_var

else:
raise ValueError('`as_` must be `DataArray` or `Series`, '
'but `{}` given'.format(as_))
elif return_as == "Series":
return reorganise_xarray_dimensions(updated_data_var.unstack()).to_series()

elif return_as == "DataArray":
return reorganise_xarray_dimensions(updated_data_var.unstack())

def reorganise_dataset_dimensions(dataset):
"""
Reorganise the Dataset dimensions to be alphabetical *except*
`timesteps`, which must always come last in any DataArray's dimensions
"""
steps = ['datesteps', 'timesteps'] if 'datesteps' in dataset.dims else ['timesteps']

new_dims = (
sorted(list(set(dataset.dims.keys()) - set(steps)))
) + steps

updated_dataset = dataset.transpose(*new_dims).reindex(
{k:dataset[k] for k in new_dims})

return updated_dataset
else:
raise ValueError('`return_as` must be `DataArray`, `Series`, or '
'`MultiIndex DataArray`, but `{}` given'.format(return_as))
Loading