From d9a701c42432303b97fefb42ac9faa6b18fe59d8 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:13:47 +0100 Subject: [PATCH 001/111] add /app to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index de278369..94da1122 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ secrets_emhass.yaml .vscode/launch.json .vscode/settings.json .vscode/tasks.json +**/app # Byte-compiled / optimized / DLL files __pycache__/ From c8b71e11ce0aef8ee3315fc758285cfed7de3c55 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:24:21 +0100 Subject: [PATCH 002/111] Add csv-prediction --- src/emhass/command_line.py | 46 ++++++++++++ src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++ src/emhass/retrieve_hass.py | 10 +++ src/emhass/utils.py | 25 +++++++ src/emhass/web_server.py | 6 ++ 5 files changed, 226 insertions(+) create mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index f05aa42e..4d4f75c9 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -20,6 +20,7 @@ from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster from emhass.optimization import Optimization +from emhass.csv_predictor import CsvPredictor from emhass import utils @@ -153,6 +154,12 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, if not rh.get_data(days_list, var_list): return False df_input_data = rh.df_final.copy() + elif set_type == "csv-predict": + df_input_data, df_input_data_dayahead = None, None + P_PV_forecast, P_load_forecast = None, None + days_list = None + params = json.loads(params) + elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None @@ -433,6 +440,45 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf +def csv_predict(input_data_dict: dict, logger: logging.Logger, + debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: + """Perform a forecast model fit from training data retrieved from Home Assistant. + + :param input_data_dict: A dictionnary with multiple data used by the action functions + :type input_data_dict: dict + :param logger: The passed logger object + :type logger: logging.Logger + :param debug: True to debug, useful for unit testing, defaults to False + :type debug: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object + :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor] + """ + data = copy.deepcopy(input_data_dict['df_input_data']) + model_type = input_data_dict['params']['passed_data']['model_type'] + csv_file = input_data_dict['params']['passed_data']['csv_file'] + sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] + perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] + independent_variables = input_data_dict['params']['passed_data']['independent_variables'] + dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] + new_values = input_data_dict['params']['passed_data']['new_values'] + root = input_data_dict['root'] + # The ML forecaster object + csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + # Fit the ML model + prediction = csv.predict(perform_backtest=perform_backtest) + + csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] + csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] + csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name'] + # Publish Load forecast + idx = 0 + input_data_dict['rh'].post_data(prediction, idx, + csv_predict_entity_id, + csv_predict_unit_of_measurement, + csv_predict_friendly_name, + type_var = 'csv_predictor') + return prediction + def publish_data(input_data_dict: dict, logger: logging.Logger, save_data_to_file: Optional[bool] = False, opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame: diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py new file mode 100644 index 00000000..a1c5576b --- /dev/null +++ b/src/emhass/csv_predictor.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +import copy +import pathlib +import time +from typing import Optional +# from typing import Optional, Tuple +import pandas as pd +import numpy as np + +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import ElasticNet +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsRegressor +# from sklearn.metrics import r2_score + +# from skforecast.ForecasterAutoreg import ForecasterAutoreg +# from skforecast.model_selection import bayesian_search_forecaster +# from skforecast.model_selection import backtesting_forecaster + +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) + +class CsvPredictor: + r""" + A forecaster class using machine learning models. + + This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + + It exposes one main method: + + - `predict`: to obtain a forecast from a pre-trained model. + + """ + + def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + logger: logging.Logger) -> None: + r"""Define constructor for the forecast class. + + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str + :param var_model: The name of the sensor to retrieve data from Home Assistant. \ + Example: `sensor.power_load_no_var_loads`. + :type var_model: str + :param sklearn_model: The `scikit-learn` model that will be used. For now only \ + this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. + :type sklearn_model: str + :param num_lags: The number of auto-regression lags to consider. A good starting point \ + is to fix this as one day. For example if your time step is 30 minutes, then fix this \ + to 48, if the time step is 1 hour the fix this to 24 and so on. + :type num_lags: int + :param root: The parent folder of the path where the config.yaml file is located + :type root: str + :param logger: The passed logger object + :type logger: logging.Logger + """ + self.data = data + self.model_type = model_type + self.csv_file = csv_file + self.independent_variables = independent_variables + self.dependent_variable = dependent_variable + self.sklearn_model = sklearn_model + self.new_values = new_values + self.root = root + self.logger = logger + self.is_tuned = False + + + def load_data(self): + filename_path = pathlib.Path(self.root) / self.csv_file + if filename_path.is_file(): + with open(filename_path, 'rb') as inp: + data = pd.read_csv(filename_path) + else: + self.logger.error("The cvs file was not found.") + return + + required_columns = self.independent_variables + + if not set(required_columns).issubset(data.columns): + raise ValueError( + f"CSV file should contain the following columns: {', '.join(required_columns)}" + ) + return data + + def prepare_data(self, data): + X = data[self.independent_variables].values + y = data[self.dependent_variable].values + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + return X_train, y_train + + + def predict(self, perform_backtest: Optional[bool] = False + ) -> pd.Series: + r"""The fit method to train the ML model. + + :param split_date_delta: The delta from now to `split_date_delta` that will be used \ + as the test period to evaluate the model, defaults to '48h' + :type split_date_delta: Optional[str], optional + :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ + the performance of the model on the complete train set, defaults to False + :type perform_backtest: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest + :rtype: Tuple[pd.DataFrame, pd.DataFrame] + """ + self.logger.info("Performing a forecast model fit for "+self.model_type) + # Preparing the data: adding exogenous features + data = self.load_data() + X, y = self.prepare_data(data) + + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) + + return prediction + + + + \ No newline at end of file diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py index 5aeba613..b4708f78 100644 --- a/src/emhass/retrieve_hass.py +++ b/src/emhass/retrieve_hass.py @@ -303,6 +303,8 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, state = np.round(data_df.loc[data_df.index[idx]],4) elif type_var == 'optim_status': state = data_df.loc[data_df.index[idx]] + elif type_var == 'csv_predictor': + state = data_df[idx] else: state = np.round(data_df.loc[data_df.index[idx]],2) if type_var == 'power': @@ -334,6 +336,14 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, "friendly_name": friendly_name } } + elif type_var == 'csv_predictor': + data = { + "state": state, + "attributes": { + "unit_of_measurement": unit_of_measurement, + "friendly_name": friendly_name + } + } else: data = { "state": "{:.2f}".format(state), diff --git a/src/emhass/utils.py b/src/emhass/utils.py index fb3b8a9c..22043d54 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -155,6 +155,16 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic freq = int(retrieve_hass_conf['freq'].seconds/60.0) delta_forecast = int(optim_conf['delta_forecast'].days) forecast_dates = get_forecast_dates(freq, delta_forecast) + if set_type == "csv-predict": + csv_file = runtimeparams['csv_file'] + independent_variables = runtimeparams['independent_variables'] + dependent_variable = runtimeparams['dependent_variable'] + new_values = runtimeparams['new_values'] + params['passed_data']['csv_file'] = csv_file + params['passed_data']['independent_variables'] = independent_variables + params['passed_data']['dependent_variable'] = dependent_variable + params['passed_data']['new_values'] = new_values + # Treating special data passed for MPC control case if set_type == 'naive-mpc-optim': if 'prediction_horizon' not in runtimeparams.keys(): @@ -320,6 +330,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic else: model_predict_friendly_name = runtimeparams['model_predict_friendly_name'] params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name + if 'csv_predict_entity_id' not in runtimeparams.keys(): + csv_predict_entity_id = "sensor.csv_predictor" + else: + csv_predict_entity_id = runtimeparams['csv_predict_entity_id'] + params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id + if 'csv_predict_unit_of_measurement' not in runtimeparams.keys(): + csv_predict_unit_of_measurement = None + else: + csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement'] + params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement + if 'csv_predict_friendly_name' not in runtimeparams.keys(): + csv_predict_friendly_name = "Csv predictor" + else: + csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name'] + params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name # Treat optimization configuration parameters passed at runtime if 'num_def_loads' in runtimeparams.keys(): optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index 3f7099e2..40274095 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -13,6 +13,7 @@ from emhass.command_line import set_input_data_dict from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune +from emhass.command_line import csv_predict from emhass.command_line import publish_data from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \ get_injection_dict_forecast_model_tune, build_params @@ -189,6 +190,11 @@ def action_call(action_name): if not checkFileLog(ActionStr): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) + elif action_name == 'csv-predict': + app.logger.info(" >> Performing a csv predict...") + csv_predict(input_data_dict, app.logger) + msg = f'EMHASS >> Action csv-predict executed... \n' + return make_response(msg, 201) else: app.logger.error("ERROR: passed action is not valid") msg = f'EMHASS >> ERROR: Passed action is not valid... \n' From 0e2f535a31e51b718724adce548dda48e340e9a4 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 9 Jan 2024 21:11:13 +0100 Subject: [PATCH 003/111] cleanup --- src/emhass/command_line.py | 12 ++++++---- src/emhass/csv_predictor.py | 48 ++++++++++++++++++++++++++----------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 4d4f75c9..b255872e 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -453,19 +453,21 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger, :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor] """ - data = copy.deepcopy(input_data_dict['df_input_data']) - model_type = input_data_dict['params']['passed_data']['model_type'] + # data = copy.deepcopy(input_data_dict['df_input_data']) + # model_type = input_data_dict['params']['passed_data']['model_type'] csv_file = input_data_dict['params']['passed_data']['csv_file'] sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] + # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] new_values = input_data_dict['params']['passed_data']['new_values'] root = input_data_dict['root'] # The ML forecaster object - csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) # Fit the ML model - prediction = csv.predict(perform_backtest=perform_backtest) + prediction = csv.predict() + # prediction = csv.predict(perform_backtest=perform_backtest) csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index a1c5576b..9f012f8d 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -31,11 +31,13 @@ class CsvPredictor: It exposes one main method: - - `predict`: to obtain a forecast from a pre-trained model. + - `predict`: to obtain a forecast from a csv file. """ - def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + # logger: logging.Logger) -> None: + def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -44,23 +46,28 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe :param model_type: A unique name defining this model and useful to identify \ for what it will be used for. :type model_type: str - :param var_model: The name of the sensor to retrieve data from Home Assistant. \ - Example: `sensor.power_load_no_var_loads`. - :type var_model: str + :param csv_file: The name of the csv file to retrieve data from. \ + Example: `prediction.csv`. + :type csv_file: str + :param independent_variables: A list of independent variables. \ + Example: [`solar`, `degree_days`]. + :type independent_variables: list + :param dependent_variable: The dependent variable(to be predicted). \ + Example: `hours`. + :type dependent_variable: str :param sklearn_model: The `scikit-learn` model that will be used. For now only \ this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. :type sklearn_model: str - :param num_lags: The number of auto-regression lags to consider. A good starting point \ - is to fix this as one day. For example if your time step is 30 minutes, then fix this \ - to 48, if the time step is 1 hour the fix this to 24 and so on. - :type num_lags: int + :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ + Example: [2.24, 5.68]. + :type new_values: list :param root: The parent folder of the path where the config.yaml file is located :type root: str :param logger: The passed logger object :type logger: logging.Logger """ - self.data = data - self.model_type = model_type + # self.data = data + # self.model_type = model_type self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable @@ -86,18 +93,30 @@ def load_data(self): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) + print(type(data)) return data def prepare_data(self, data): + """ + Prepare the data. + + :param data: Input Data + :return: Input DataFrame with freq defined + :rtype: pd.DataFrame + + """ X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + print(type(X_train)) + print(type(y_train)) return X_train, y_train - def predict(self, perform_backtest: Optional[bool] = False - ) -> pd.Series: + # def predict(self, perform_backtest: Optional[bool] = False + # ) -> pd.Series: + def predict(self): r"""The fit method to train the ML model. :param split_date_delta: The delta from now to `split_date_delta` that will be used \ @@ -109,7 +128,7 @@ def predict(self, perform_backtest: Optional[bool] = False :return: The DataFrame containing the forecast data results without and with backtest :rtype: Tuple[pd.DataFrame, pd.DataFrame] """ - self.logger.info("Performing a forecast model fit for "+self.model_type) + self.logger.info("Performing a prediction for "+self.csv_file) # Preparing the data: adding exogenous features data = self.load_data() X, y = self.prepare_data(data) @@ -131,6 +150,7 @@ def predict(self, perform_backtest: Optional[bool] = False self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") new_values = np.array([self.new_values]) prediction = self.forecaster.predict(new_values) + print(type(prediction)) return prediction From 61f64fd3f41e458db6c6fa9f8af7083c8416663f Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Thu, 18 Jan 2024 10:46:38 +0100 Subject: [PATCH 004/111] more cleanup --- src/emhass/command_line.py | 17 +++---- src/emhass/csv_predictor.py | 92 ++++++++++++++----------------------- 2 files changed, 40 insertions(+), 69 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index b255872e..60c342b6 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -441,8 +441,8 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, return df_pred_optim, mlf def csv_predict(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: - """Perform a forecast model fit from training data retrieved from Home Assistant. + debug: Optional[bool] = False) -> np.ndarray: + """Perform a prediction from csv file. :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict @@ -450,29 +450,24 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger, :type logger: logging.Logger :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object - :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor] + :return: The np.ndarray containing the predicted value. + :rtype: np.ndarray """ - # data = copy.deepcopy(input_data_dict['df_input_data']) - # model_type = input_data_dict['params']['passed_data']['model_type'] csv_file = input_data_dict['params']['passed_data']['csv_file'] sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] new_values = input_data_dict['params']['passed_data']['new_values'] root = input_data_dict['root'] # The ML forecaster object - # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) - # Fit the ML model + # Predict from csv file prediction = csv.predict() - # prediction = csv.predict(perform_backtest=perform_backtest) csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name'] - # Publish Load forecast + # Publish prediction idx = 0 input_data_dict['rh'].post_data(prediction, idx, csv_predict_entity_id, diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 9f012f8d..9550c157 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -2,11 +2,9 @@ # -*- coding: utf-8 -*- import logging -import copy import pathlib import time -from typing import Optional -# from typing import Optional, Tuple +from typing import Tuple import pandas as pd import numpy as np @@ -14,11 +12,6 @@ from sklearn.linear_model import ElasticNet from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsRegressor -# from sklearn.metrics import r2_score - -# from skforecast.ForecasterAutoreg import ForecasterAutoreg -# from skforecast.model_selection import bayesian_search_forecaster -# from skforecast.model_selection import backtesting_forecaster import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) @@ -34,18 +27,10 @@ class CsvPredictor: - `predict`: to obtain a forecast from a csv file. """ - - # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - # logger: logging.Logger) -> None: def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. - :param data: The data that will be used for train/test - :type data: pd.DataFrame - :param model_type: A unique name defining this model and useful to identify \ - for what it will be used for. - :type model_type: str :param csv_file: The name of the csv file to retrieve data from. \ Example: `prediction.csv`. :type csv_file: str @@ -66,8 +51,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl :param logger: The passed logger object :type logger: logging.Logger """ - # self.data = data - # self.model_type = model_type self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable @@ -78,14 +61,17 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl self.is_tuned = False - def load_data(self): + def load_data(self) -> pd.DataFrame: + """Load the data.""" filename_path = pathlib.Path(self.root) / self.csv_file if filename_path.is_file(): with open(filename_path, 'rb') as inp: data = pd.read_csv(filename_path) else: self.logger.error("The cvs file was not found.") - return + raise ValueError( + f"The CSV file "+ self.csv_file +" was not found." + ) required_columns = self.independent_variables @@ -93,66 +79,56 @@ def load_data(self): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) - print(type(data)) return data - def prepare_data(self, data): + def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: """ Prepare the data. :param data: Input Data - :return: Input DataFrame with freq defined - :rtype: pd.DataFrame + :type data: pd.DataFrame + :return: A tuple containing the train data. + :rtype: Tuple[np.ndarray, np.ndarray] """ X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - print(type(X_train)) - print(type(y_train)) return X_train, y_train - # def predict(self, perform_backtest: Optional[bool] = False - # ) -> pd.Series: - def predict(self): - r"""The fit method to train the ML model. + def predict(self) -> np.ndarray: + r"""The predict method to generate a forecast from a csv file. - :param split_date_delta: The delta from now to `split_date_delta` that will be used \ - as the test period to evaluate the model, defaults to '48h' - :type split_date_delta: Optional[str], optional - :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ - the performance of the model on the complete train set, defaults to False - :type perform_backtest: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest - :rtype: Tuple[pd.DataFrame, pd.DataFrame] + :return: The np.ndarray containing the predicted value. + :rtype: np.ndarray """ self.logger.info("Performing a prediction for "+self.csv_file) # Preparing the data: adding exogenous features data = self.load_data() - X, y = self.prepare_data(data) + if data is not None: + X, y = self.prepare_data(data) - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Training a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - print(type(prediction)) + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Predict through a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) - return prediction + return prediction From 11b3a3d2d3ca77ab3e13a322928e594c5d527ace Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 19 Jan 2024 11:34:33 +0100 Subject: [PATCH 005/111] filename_path -> inp --- src/emhass/csv_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 9550c157..499903d0 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -66,7 +66,7 @@ def load_data(self) -> pd.DataFrame: filename_path = pathlib.Path(self.root) / self.csv_file if filename_path.is_file(): with open(filename_path, 'rb') as inp: - data = pd.read_csv(filename_path) + data = pd.read_csv(inp) else: self.logger.error("The cvs file was not found.") raise ValueError( From 779c6b8eaabdaecb5a184d1c02eb3720830f90db Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 29 Jan 2024 11:24:45 +0100 Subject: [PATCH 006/111] resolve some comments --- src/emhass/csv_predictor.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 499903d0..1f478c01 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -5,6 +5,8 @@ import pathlib import time from typing import Tuple +import warnings + import pandas as pd import numpy as np @@ -13,14 +15,14 @@ from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsRegressor -import warnings -warnings.filterwarnings("ignore", category=DeprecationWarning) + +warnings.filterwarnings("ignore", category=DeprecationWarning) class CsvPredictor: r""" A forecaster class using machine learning models. - This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. It exposes one main method: @@ -28,11 +30,11 @@ class CsvPredictor: """ def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - logger: logging.Logger) -> None: + logger: logging.Logger) -> None: r"""Define constructor for the forecast class. :param csv_file: The name of the csv file to retrieve data from. \ - Example: `prediction.csv`. + Example: `input_train_data.csv`. :type csv_file: str :param independent_variables: A list of independent variables. \ Example: [`solar`, `degree_days`]. @@ -60,7 +62,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl self.logger = logger self.is_tuned = False - def load_data(self) -> pd.DataFrame: """Load the data.""" filename_path = pathlib.Path(self.root) / self.csv_file @@ -69,18 +70,16 @@ def load_data(self) -> pd.DataFrame: data = pd.read_csv(inp) else: self.logger.error("The cvs file was not found.") - raise ValueError( - f"The CSV file "+ self.csv_file +" was not found." - ) + raise ValueError("The CSV file " + self.csv_file + " was not found.") required_columns = self.independent_variables - + if not set(required_columns).issubset(data.columns): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) return data - + def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: """ Prepare the data. @@ -94,10 +93,10 @@ def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - + return X_train, y_train - - + + def predict(self) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. @@ -109,7 +108,7 @@ def predict(self) -> np.ndarray: data = self.load_data() if data is not None: X, y = self.prepare_data(data) - + if self.sklearn_model == 'LinearRegression': base_model = LinearRegression() elif self.sklearn_model == 'ElasticNet': @@ -127,9 +126,5 @@ def predict(self) -> np.ndarray: self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") new_values = np.array([self.new_values]) prediction = self.forecaster.predict(new_values) - + return prediction - - - - \ No newline at end of file From 2d0c460e02649366d55a4280eb31b6b3a1a28bd2 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:45:02 +0100 Subject: [PATCH 007/111] Use gridsearchcv and split up fit and predict --- src/emhass/command_line.py | 87 +++++++++++++++--- src/emhass/csv_predictor.py | 173 +++++++++++++++++++++++------------- src/emhass/utils.py | 16 +++- src/emhass/web_server.py | 11 ++- 4 files changed, 210 insertions(+), 77 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 60c342b6..e2b86335 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -154,7 +154,36 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, if not rh.get_data(days_list, var_list): return False df_input_data = rh.df_final.copy() - elif set_type == "csv-predict": + + elif set_type == "csv-model-fit": + + df_input_data_dayahead = None + P_PV_forecast, P_load_forecast = None, None + params = json.loads(params) + days_list = None + csv_file = params['passed_data']['csv_file'] + independent_variables = params['passed_data']['independent_variables'] + dependent_variable = params['passed_data']['dependent_variable'] + timestamp = params['passed_data']['timestamp'] + filename_path = pathlib.Path(base_path) / csv_file + if filename_path.is_file(): + df_input_data = pd.read_csv(filename_path, parse_dates=True) + + else: + logger.error("The cvs file was not found.") + raise ValueError("The CSV file " + csv_file + " was not found.") + required_columns = [] + required_columns.extend(independent_variables) + required_columns.append(dependent_variable) + if timestamp is not None: + required_columns.append(timestamp) + + if not set(required_columns).issubset(df_input_data.columns): + logger.error("The cvs file does not contain the required columns.") + raise ValueError( + f"CSV file should contain the following columns: {', '.join(required_columns)}" + ) + elif set_type == "csv-model-predict": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None @@ -440,7 +469,41 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf -def csv_predict(input_data_dict: dict, logger: logging.Logger, +def csv_model_fit(input_data_dict: dict, logger: logging.Logger, + debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: + """Perform a forecast model fit from training data retrieved from Home Assistant. + + :param input_data_dict: A dictionnary with multiple data used by the action functions + :type input_data_dict: dict + :param logger: The passed logger object + :type logger: logging.Logger + :param debug: True to debug, useful for unit testing, defaults to False + :type debug: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object + :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster] + """ + data = copy.deepcopy(input_data_dict['df_input_data']) + # csv_file = input_data_dict['params']['passed_data']['csv_file'] + model_type = input_data_dict['params']['passed_data']['model_type'] + # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] + independent_variables = input_data_dict['params']['passed_data']['independent_variables'] + dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] + timestamp = input_data_dict['params']['passed_data']['timestamp'] + # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] + date_features = input_data_dict['params']['passed_data']['date_features'] + root = input_data_dict['root'] + # The ML forecaster object + csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger) + # Fit the ML model + df_pred = csv.fit(date_features=date_features) + # Save model + if not debug: + filename = model_type+'_csv.pkl' + with open(pathlib.Path(root) / filename, 'wb') as outp: + pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL) + # return df_pred, csv + +def csv_model_predict(input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False) -> np.ndarray: """Perform a prediction from csv file. @@ -453,16 +516,20 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger, :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - csv_file = input_data_dict['params']['passed_data']['csv_file'] - sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - independent_variables = input_data_dict['params']['passed_data']['independent_variables'] - dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] - new_values = input_data_dict['params']['passed_data']['new_values'] + model_type = input_data_dict['params']['passed_data']['model_type'] root = input_data_dict['root'] - # The ML forecaster object - csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + filename = model_type+'_csv.pkl' + filename_path = pathlib.Path(root) / filename + if not debug: + if filename_path.is_file(): + with open(filename_path, 'rb') as inp: + csv = pickle.load(inp) + else: + logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") + return + new_values = input_data_dict['params']['passed_data']['new_values'] # Predict from csv file - prediction = csv.predict() + prediction = csv.predict(new_values) csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1f478c01..636d5835 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -1,19 +1,22 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import copy +from datetime import datetime import logging import pathlib import time -from typing import Tuple +from typing import Optional, Tuple import warnings import pandas as pd import numpy as np +from sklearn.metrics import classification_report, r2_score from sklearn.linear_model import LinearRegression -from sklearn.linear_model import ElasticNet -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsRegressor +from sklearn.model_selection import GridSearchCV, train_test_split +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler warnings.filterwarnings("ignore", category=DeprecationWarning) @@ -29,7 +32,7 @@ class CsvPredictor: - `predict`: to obtain a forecast from a csv file. """ - def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -53,78 +56,124 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl :param logger: The passed logger object :type logger: logging.Logger """ - self.csv_file = csv_file + self.data = data self.independent_variables = independent_variables self.dependent_variable = dependent_variable - self.sklearn_model = sklearn_model - self.new_values = new_values - self.root = root + self.timestamp = timestamp + self.model_type = model_type self.logger = logger self.is_tuned = False + self.data.sort_index(inplace=True) + self.data = self.data[~self.data.index.duplicated(keep='first')] + + @staticmethod + def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + """Add date features from the input DataFrame timestamp - def load_data(self) -> pd.DataFrame: - """Load the data.""" - filename_path = pathlib.Path(self.root) / self.csv_file - if filename_path.is_file(): - with open(filename_path, 'rb') as inp: - data = pd.read_csv(inp) - else: - self.logger.error("The cvs file was not found.") - raise ValueError("The CSV file " + self.csv_file + " was not found.") - - required_columns = self.independent_variables - - if not set(required_columns).issubset(data.columns): - raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}" - ) - return data - - def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: + :param data: The input DataFrame + :type data: pd.DataFrame + :return: The DataFrame with the added features + :rtype: pd.DataFrame + """ + df = copy.deepcopy(data) + df['timestamp']= pd.to_datetime(df['timestamp']) + if 'year' in date_features: + df['year'] = [i.month for i in df['timestamp']] + if 'month' in date_features: + df['month'] = [i.month for i in df['timestamp']] + if 'day_of_week' in date_features: + df['day_of_week'] = [i.dayofweek for i in df['timestamp']] + if 'day_of_year' in date_features: + df['day_of_year'] = [i.dayofyear for i in df['timestamp']] + if 'day' in date_features: + df['day'] = [i.day for i in df['timestamp']] + if 'hour' in date_features: + df['hour'] = [i.day for i in df['timestamp']] + + return df + + def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]: """ - Prepare the data. + Fit the model using the provided data. :param data: Input Data :type data: pd.DataFrame - :return: A tuple containing the train data. - :rtype: Tuple[np.ndarray, np.ndarray] - """ - X = data[self.independent_variables].values - y = data[self.dependent_variable].values - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + self.logger.info("Performing a forecast model fit for "+self.model_type) + self.data_exo = pd.DataFrame(self.data) + self.data_exo[self.independent_variables] = self.data[self.independent_variables] + self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] + keep_columns = [] + keep_columns.extend(self.independent_variables) + if self.timestamp is not None: + keep_columns.append(self.timestamp) + keep_columns.append(self.dependent_variable) + self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] + self.data_exo.reset_index(drop=True, inplace=True) + # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp') + if len(date_features) > 0: + if self.timestamp is not None: + self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) + else: + self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") - return X_train, y_train + y = self.data_exo[self.dependent_variable] + self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1) + if self.timestamp is not None: + self.data_exo = self.data_exo.drop(self.timestamp,axis=1) + X = self.data_exo + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + self.steps = len(X_test) + + # Define the model + self.model = Pipeline([ + ('scaler', StandardScaler()), + ('regressor', LinearRegression()) + ]) + # Define the parameters to tune + param_grid = { + 'regressor__fit_intercept': [True, False], + 'regressor__positive': [True, False], + } + + # Create a grid search object + self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # Fit the grid search object to the data + self.logger.info("Fitting the model...") + start_time = time.time() + self.grid_search.fit(X_train.values, y_train.values) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + + self.model = self.grid_search.best_estimator_ + + + # Make predictions + predictions = self.model.predict(X_test.values) + predictions = pd.Series(predictions, index=X_test.index) + pred_metric = r2_score(y_test,predictions) + self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + + # Prepare forecast DataFrame + df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred']) + df_pred['train'] = y_train + df_pred['test'] = y_test + df_pred['pred'] = predictions + print(df_pred) + # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp') + + + + # return df_pred + - def predict(self) -> np.ndarray: + def predict(self, new_values:list) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - self.logger.info("Performing a prediction for "+self.csv_file) - # Preparing the data: adding exogenous features - data = self.load_data() - if data is not None: - X, y = self.prepare_data(data) - - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Predict through a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - - return prediction + self.logger.info("Performing a prediction for "+self.model_type) + new_values = np.array([new_values]) + + return self.model.predict(new_values) diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 22043d54..ddc834ae 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -155,14 +155,26 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic freq = int(retrieve_hass_conf['freq'].seconds/60.0) delta_forecast = int(optim_conf['delta_forecast'].days) forecast_dates = get_forecast_dates(freq, delta_forecast) - if set_type == "csv-predict": + if set_type == "csv-model-fit": csv_file = runtimeparams['csv_file'] independent_variables = runtimeparams['independent_variables'] dependent_variable = runtimeparams['dependent_variable'] - new_values = runtimeparams['new_values'] params['passed_data']['csv_file'] = csv_file params['passed_data']['independent_variables'] = independent_variables params['passed_data']['dependent_variable'] = dependent_variable + if 'timestamp' not in runtimeparams.keys(): + params['passed_data']['timestamp'] = None + else: + timestamp = runtimeparams['timestamp'] + params['passed_data']['timestamp'] = timestamp + if 'date_features' not in runtimeparams.keys(): + params['passed_data']['date_features'] = [] + else: + date_features = runtimeparams['date_features'] + params['passed_data']['date_features'] = date_features + + if set_type == "csv-model-predict": + new_values = runtimeparams['new_values'] params['passed_data']['new_values'] = new_values # Treating special data passed for MPC control case diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index 40274095..8db64091 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -13,7 +13,7 @@ from emhass.command_line import set_input_data_dict from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune -from emhass.command_line import csv_predict +from emhass.command_line import csv_model_fit, csv_model_predict from emhass.command_line import publish_data from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \ get_injection_dict_forecast_model_tune, build_params @@ -190,9 +190,14 @@ def action_call(action_name): if not checkFileLog(ActionStr): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) - elif action_name == 'csv-predict': + elif action_name == 'csv-model-fit': + app.logger.info(" >> Performing a csv fit...") + csv_model_fit(input_data_dict, app.logger) + msg = f'EMHASS >> Action csv-fit executed... \n' + return make_response(msg, 201) + elif action_name == 'csv-model-predict': app.logger.info(" >> Performing a csv predict...") - csv_predict(input_data_dict, app.logger) + csv_model_predict(input_data_dict, app.logger) msg = f'EMHASS >> Action csv-predict executed... \n' return make_response(msg, 201) else: From 19337101bbba1a134379b26a20edf565e7b44606 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:56:19 +0100 Subject: [PATCH 008/111] remove backtest --- src/emhass/csv_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 636d5835..1b2396b5 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -92,7 +92,7 @@ def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: return df - def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]: + def fit(self, date_features: Optional[list] = []) -> None: """ Fit the model using the provided data. From 0bf50a2b3c7cac5166c7b939668c5bd5aefb4105 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 12:11:41 +0100 Subject: [PATCH 009/111] cleanup --- src/emhass/csv_predictor.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1b2396b5..1e46927d 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -2,16 +2,14 @@ # -*- coding: utf-8 -*- import copy -from datetime import datetime import logging -import pathlib import time -from typing import Optional, Tuple +from typing import Optional import warnings import pandas as pd import numpy as np -from sklearn.metrics import classification_report, r2_score +from sklearn.metrics import r2_score from sklearn.linear_model import LinearRegression from sklearn.model_selection import GridSearchCV, train_test_split @@ -110,7 +108,6 @@ def fit(self, date_features: Optional[list] = []) -> None: keep_columns.append(self.dependent_variable) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] self.data_exo.reset_index(drop=True, inplace=True) - # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp') if len(date_features) > 0: if self.timestamp is not None: self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) @@ -153,18 +150,6 @@ def fit(self, date_features: Optional[list] = []) -> None: predictions = pd.Series(predictions, index=X_test.index) pred_metric = r2_score(y_test,predictions) self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") - - # Prepare forecast DataFrame - df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred']) - df_pred['train'] = y_train - df_pred['test'] = y_test - df_pred['pred'] = predictions - print(df_pred) - # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp') - - - - # return df_pred def predict(self, new_values:list) -> np.ndarray: From ac45455f57eabe4205da8043481c1f0710dd7703 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 13:41:06 +0100 Subject: [PATCH 010/111] cleanup + docstrings --- src/emhass/command_line.py | 17 ++++---------- src/emhass/csv_predictor.py | 45 ++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 34 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index e2b86335..33af80c0 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -470,7 +470,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, return df_pred_optim, mlf def csv_model_fit(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: + debug: Optional[bool] = False) -> None: """Perform a forecast model fit from training data retrieved from Home Assistant. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -479,32 +479,26 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger, :type logger: logging.Logger :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object - :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster] """ data = copy.deepcopy(input_data_dict['df_input_data']) - # csv_file = input_data_dict['params']['passed_data']['csv_file'] model_type = input_data_dict['params']['passed_data']['model_type'] - # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] timestamp = input_data_dict['params']['passed_data']['timestamp'] - # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] date_features = input_data_dict['params']['passed_data']['date_features'] root = input_data_dict['root'] - # The ML forecaster object + # The CSV forecaster object csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger) # Fit the ML model - df_pred = csv.fit(date_features=date_features) + csv.fit(date_features=date_features) # Save model if not debug: filename = model_type+'_csv.pkl' with open(pathlib.Path(root) / filename, 'wb') as outp: pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL) - # return df_pred, csv def csv_model_predict(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> np.ndarray: + debug: Optional[bool] = False) -> None: """Perform a prediction from csv file. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -513,8 +507,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger, :type logger: logging.Logger :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional - :return: The np.ndarray containing the predicted value. - :rtype: np.ndarray """ model_type = input_data_dict['params']['passed_data']['model_type'] root = input_data_dict['root'] @@ -541,7 +533,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger, csv_predict_unit_of_measurement, csv_predict_friendly_name, type_var = 'csv_predictor') - return prediction def publish_data(input_data_dict: dict, logger: logging.Logger, save_data_to_file: Optional[bool] = False, diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1e46927d..57d61791 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -25,32 +25,30 @@ class CsvPredictor: This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. - It exposes one main method: + It exposes two main methods: - - `predict`: to obtain a forecast from a csv file. + - `fit`: to train a model with the passed data. + + - `predict`: to obtain a forecast from a pre-trained model. """ def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. - :param csv_file: The name of the csv file to retrieve data from. \ - Example: `input_train_data.csv`. - :type csv_file: str + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str :param independent_variables: A list of independent variables. \ Example: [`solar`, `degree_days`]. :type independent_variables: list :param dependent_variable: The dependent variable(to be predicted). \ Example: `hours`. :type dependent_variable: str - :param sklearn_model: The `scikit-learn` model that will be used. For now only \ - this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. - :type sklearn_model: str - :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ - Example: [2.24, 5.68]. - :type new_values: list - :param root: The parent folder of the path where the config.yaml file is located - :type root: str + :param timestamp: If defined, the column key that has to be used of timestamp. + :type timestamp: str :param logger: The passed logger object :type logger: logging.Logger """ @@ -60,23 +58,24 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent self.timestamp = timestamp self.model_type = model_type self.logger = logger - self.is_tuned = False self.data.sort_index(inplace=True) self.data = self.data[~self.data.index.duplicated(keep='first')] @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: """Add date features from the input DataFrame timestamp :param data: The input DataFrame :type data: pd.DataFrame + :param timestamp: The column containing the timestamp + :type timestamp: str :return: The DataFrame with the added features :rtype: pd.DataFrame """ df = copy.deepcopy(data) - df['timestamp']= pd.to_datetime(df['timestamp']) + df[timestamp]= pd.to_datetime(df['timestamp']) if 'year' in date_features: - df['year'] = [i.month for i in df['timestamp']] + df['year'] = [i.year for i in df['timestamp']] if 'month' in date_features: df['month'] = [i.month for i in df['timestamp']] if 'day_of_week' in date_features: @@ -94,10 +93,10 @@ def fit(self, date_features: Optional[list] = []) -> None: """ Fit the model using the provided data. - :param data: Input Data - :type data: pd.DataFrame + :param date_features: A list of 'date_features' to take into account when fitting the model. + :type data: list """ - self.logger.info("Performing a forecast model fit for "+self.model_type) + self.logger.info("Performing a csv model fit for "+self.model_type) self.data_exo = pd.DataFrame(self.data) self.data_exo[self.independent_variables] = self.data[self.independent_variables] self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] @@ -110,7 +109,7 @@ def fit(self, date_features: Optional[list] = []) -> None: self.data_exo.reset_index(drop=True, inplace=True) if len(date_features) > 0: if self.timestamp is not None: - self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) + self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp) else: self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") @@ -155,6 +154,10 @@ def fit(self, date_features: Optional[list] = []) -> None: def predict(self, new_values:list) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. + + :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ + Example: [2.24, 5.68]. + :type new_values: list :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ From f3574c8db0ff4034ddb1af603df57c782c13f203 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 11 Mar 2024 09:59:27 +0100 Subject: [PATCH 011/111] add other regression methods --- src/emhass/csv_predictor.py | 87 +++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 28 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 57d61791..2b6fb86a 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -9,9 +9,10 @@ import pandas as pd import numpy as np +from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor from sklearn.metrics import r2_score -from sklearn.linear_model import LinearRegression +from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler @@ -122,33 +123,63 @@ def fit(self, date_features: Optional[list] = []) -> None: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) self.steps = len(X_test) - # Define the model - self.model = Pipeline([ - ('scaler', StandardScaler()), - ('regressor', LinearRegression()) - ]) - # Define the parameters to tune - param_grid = { - 'regressor__fit_intercept': [True, False], - 'regressor__positive': [True, False], - } - - # Create a grid search object - self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # Fit the grid search object to the data - self.logger.info("Fitting the model...") - start_time = time.time() - self.grid_search.fit(X_train.values, y_train.values) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - - self.model = self.grid_search.best_estimator_ - - - # Make predictions - predictions = self.model.predict(X_test.values) - predictions = pd.Series(predictions, index=X_test.index) - pred_metric = r2_score(y_test,predictions) - self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + regression_methods = [ + ('Linear Regression', LinearRegression(), {}), + ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), + ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), + ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), + ('Gradient Boosting Regression', GradientBoostingRegressor(), { + 'gradientboostingregressor__n_estimators': [50, 100, 200], + 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] + }), + ('AdaBoost Regression', AdaBoostRegressor(), { + 'adaboostregressor__n_estimators': [50, 100, 200], + 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] + }) + ] + + # Define the models + for name, model, param_grid in regression_methods: + pipeline = Pipeline([ + ('scaler', StandardScaler()), + (name, model) + ]) + + # Use GridSearchCV to find the best hyperparameters for each model + grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5) + grid_search.fit(X_train, y_train) + + # Get the best model and print its mean squared error on the test set + best_model = grid_search.best_estimator_ + print(best_model) + predictions = best_model.predict(X_test) + print(predictions) + # self.model = Pipeline([ + # ('scaler', StandardScaler()), + # ('regressor', LinearRegression()) + # ]) + # # Define the parameters to tune + # param_grid = { + # 'regressor__fit_intercept': [True, False], + # 'regressor__positive': [True, False], + # } + + # # Create a grid search object + # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # # Fit the grid search object to the data + # self.logger.info("Fitting the model...") + # start_time = time.time() + # self.grid_search.fit(X_train.values, y_train.values) + # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + + # self.model = self.grid_search.best_estimator_ + + + # # Make predictions + # predictions = self.model.predict(X_test.values) + # predictions = pd.Series(predictions, index=X_test.index) + # pred_metric = r2_score(y_test,predictions) + # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") def predict(self, new_values:list) -> np.ndarray: From b9fa914ca93733156d802ac17c95228a3276562c Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:10:15 +0100 Subject: [PATCH 012/111] add --editable --- .vscode/tasks.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 11a92388..0b25f4f1 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -9,7 +9,11 @@ "isDefault": true }, "args": [ - "install", "--no-deps", "--force-reinstall", "." + "install", + "--no-deps", + "--force-reinstall", + "--editable", + "." ], "presentation": { "echo": true, From f7fc59ffeecf862601d0dd896e73da4eaf6c6739 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:16:17 +0100 Subject: [PATCH 013/111] Add sklearn model --- src/emhass/command_line.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 33af80c0..ae98aa59 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -482,13 +482,14 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger, """ data = copy.deepcopy(input_data_dict['df_input_data']) model_type = input_data_dict['params']['passed_data']['model_type'] + sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] timestamp = input_data_dict['params']['passed_data']['timestamp'] date_features = input_data_dict['params']['passed_data']['date_features'] root = input_data_dict['root'] # The CSV forecaster object - csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger) + csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) # Fit the ML model csv.fit(date_features=date_features) # Save model From 7177ad2cdcba20c2982dcf1a5d86fccc2c11bad0 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:16:44 +0100 Subject: [PATCH 014/111] multiple regression methods --- src/emhass/csv_predictor.py | 141 +++++++++++++++++++++++++----------- 1 file changed, 100 insertions(+), 41 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 2b6fb86a..3ffeba27 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -14,7 +14,7 @@ from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import GridSearchCV, train_test_split -from sklearn.pipeline import Pipeline +from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler @@ -33,7 +33,7 @@ class CsvPredictor: - `predict`: to obtain a forecast from a pre-trained model. """ - def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, + def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -58,9 +58,14 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent self.dependent_variable = dependent_variable self.timestamp = timestamp self.model_type = model_type + self.sklearn_model = sklearn_model self.logger = logger self.data.sort_index(inplace=True) self.data = self.data[~self.data.index.duplicated(keep='first')] + self.data_exo = None + self.steps = None + self.model = None + self.grid_search =None @staticmethod def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: @@ -123,63 +128,117 @@ def fit(self, date_features: Optional[list] = []) -> None: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) self.steps = len(X_test) - regression_methods = [ - ('Linear Regression', LinearRegression(), {}), - ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), - ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), - ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), - ('Gradient Boosting Regression', GradientBoostingRegressor(), { + regression_methods = { + 'LinearRegression': {"model": LinearRegression(), "param_grid": { + 'linearregression__fit_intercept': [True, False], + 'linearregression__positive': [True, False], + }}, + 'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}}, + 'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}}, + 'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}}, + 'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": { 'gradientboostingregressor__n_estimators': [50, 100, 200], 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - }), - ('AdaBoost Regression', AdaBoostRegressor(), { + }}, + 'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": { 'adaboostregressor__n_estimators': [50, 100, 200], 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - }) - ] + }} + } + # regression_methods = [ + # ('LinearRegression', LinearRegression(), { + # 'linearregression__fit_intercept': [True, False], + # 'linearregression__positive': [True, False], + # }), + # ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), + # ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), + # ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), + # ('GradientBoostingRegression', GradientBoostingRegressor(), { + # 'gradientboostingregressor__n_estimators': [50, 100, 200], + # 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] + # }), + # ('AdaBoostRegression', AdaBoostRegressor(), { + # 'adaboostregressor__n_estimators': [50, 100, 200], + # 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] + # }) + # ] + + if self.sklearn_model == 'LinearRegression': + base_model = regression_methods['LinearRegression']['model'] + param_grid = regression_methods['LinearRegression']['param_grid'] + elif self.sklearn_model == 'RidgeRegression': + base_model = regression_methods['RidgeRegression']['model'] + param_grid = regression_methods['RidgeRegression']['param_grid'] + elif self.sklearn_model == 'LassoRegression': + base_model = regression_methods['LassoRegression']['model'] + param_grid = regression_methods['LassoRegression']['param_grid'] + elif self.sklearn_model == 'RandomForestRegression': + base_model = regression_methods['RandomForestRegression']['model'] + param_grid = regression_methods['RandomForestRegression']['param_grid'] + elif self.sklearn_model == 'GradientBoostingRegression': + base_model = regression_methods['GradientBoostingRegression']['model'] + param_grid = regression_methods['GradientBoostingRegression']['param_grid'] + elif self.sklearn_model == 'AdaBoostRegression': + base_model = regression_methods['AdaBoostRegression']['model'] + param_grid = regression_methods['AdaBoostRegression']['param_grid'] + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the models - for name, model, param_grid in regression_methods: - pipeline = Pipeline([ - ('scaler', StandardScaler()), - (name, model) - ]) + # for name, model, param_grid in regression_methods: + # self.model = make_pipeline( + # StandardScaler(), + # model + # ) + # # self.model = Pipeline([ + # # ('scaler', StandardScaler()), + # # (name, model) + # # ]) - # Use GridSearchCV to find the best hyperparameters for each model - grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5) - grid_search.fit(X_train, y_train) - - # Get the best model and print its mean squared error on the test set - best_model = grid_search.best_estimator_ - print(best_model) - predictions = best_model.predict(X_test) - print(predictions) + # # Use GridSearchCV to find the best hyperparameters for each model + # grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # grid_search.fit(X_train, y_train) + + # # Get the best model and print its mean squared error on the test set + # best_model = grid_search.best_estimator_ + # print(best_model) + # predictions = best_model.predict(X_test) + # print(predictions) + + self.model = make_pipeline( + StandardScaler(), + base_model + ) # self.model = Pipeline([ # ('scaler', StandardScaler()), - # ('regressor', LinearRegression()) + # ('regressor', base_model) # ]) - # # Define the parameters to tune + # Define the parameters to tune # param_grid = { # 'regressor__fit_intercept': [True, False], # 'regressor__positive': [True, False], # } - # # Create a grid search object - # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # # Fit the grid search object to the data - # self.logger.info("Fitting the model...") - # start_time = time.time() - # self.grid_search.fit(X_train.values, y_train.values) - # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + # Create a grid search object + self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1) + + # Fit the grid search object to the data + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.grid_search.fit(X_train.values, y_train.values) + print("Best value for lambda : ",self.grid_search.best_params_) + print("Best score for cost function: ", self.grid_search.best_score_) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - # self.model = self.grid_search.best_estimator_ + self.model = self.grid_search.best_estimator_ - # # Make predictions - # predictions = self.model.predict(X_test.values) - # predictions = pd.Series(predictions, index=X_test.index) - # pred_metric = r2_score(y_test,predictions) - # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + # Make predictions + predictions = self.model.predict(X_test.values) + predictions = pd.Series(predictions, index=X_test.index) + pred_metric = r2_score(y_test,predictions) + self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") def predict(self, new_values:list) -> np.ndarray: From 11664bf628cc1910f745c0d193a6125fff888960 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:42:27 +0100 Subject: [PATCH 015/111] change to MLRegressor --- src/emhass/command_line.py | 40 +++++++++---------- ...ictor.py => machine_learning_regressor.py} | 4 +- src/emhass/utils.py | 28 ++++++------- src/emhass/web_server.py | 18 ++++----- 4 files changed, 45 insertions(+), 45 deletions(-) rename src/emhass/{csv_predictor.py => machine_learning_regressor.py} (98%) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index ae98aa59..17be0098 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -20,7 +20,7 @@ from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster from emhass.optimization import Optimization -from emhass.csv_predictor import CsvPredictor +from emhass.machine_learning_regressor import MLRegressor from emhass import utils @@ -155,7 +155,7 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, return False df_input_data = rh.df_final.copy() - elif set_type == "csv-model-fit": + elif set_type == "regressor-model-fit": df_input_data_dayahead = None P_PV_forecast, P_load_forecast = None, None @@ -183,7 +183,7 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) - elif set_type == "csv-model-predict": + elif set_type == "regressor-model-predict": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None @@ -469,7 +469,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf -def csv_model_fit(input_data_dict: dict, logger: logging.Logger, +def regressor_model_fit(input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False) -> None: """Perform a forecast model fit from training data retrieved from Home Assistant. @@ -488,17 +488,17 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger, timestamp = input_data_dict['params']['passed_data']['timestamp'] date_features = input_data_dict['params']['passed_data']['date_features'] root = input_data_dict['root'] - # The CSV forecaster object - csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) + # The MLRegressor object + mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) # Fit the ML model - csv.fit(date_features=date_features) + mlr.fit(date_features=date_features) # Save model if not debug: - filename = model_type+'_csv.pkl' + filename = model_type+'_mlr.pkl' with open(pathlib.Path(root) / filename, 'wb') as outp: - pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL) + pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL) -def csv_model_predict(input_data_dict: dict, logger: logging.Logger, +def regressor_model_predict(input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False) -> None: """Perform a prediction from csv file. @@ -511,29 +511,29 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger, """ model_type = input_data_dict['params']['passed_data']['model_type'] root = input_data_dict['root'] - filename = model_type+'_csv.pkl' + filename = model_type+'_mlr.pkl' filename_path = pathlib.Path(root) / filename if not debug: if filename_path.is_file(): with open(filename_path, 'rb') as inp: - csv = pickle.load(inp) + mlr = pickle.load(inp) else: logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") return new_values = input_data_dict['params']['passed_data']['new_values'] # Predict from csv file - prediction = csv.predict(new_values) + prediction = mlr.predict(new_values) - csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] - csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] - csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name'] + mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id'] + mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement'] + mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name'] # Publish prediction idx = 0 input_data_dict['rh'].post_data(prediction, idx, - csv_predict_entity_id, - csv_predict_unit_of_measurement, - csv_predict_friendly_name, - type_var = 'csv_predictor') + mlr_predict_entity_id, + mlr_predict_unit_of_measurement, + mlr_predict_friendly_name, + type_var = 'mlregressor') def publish_data(input_data_dict: dict, logger: logging.Logger, save_data_to_file: Optional[bool] = False, diff --git a/src/emhass/csv_predictor.py b/src/emhass/machine_learning_regressor.py similarity index 98% rename from src/emhass/csv_predictor.py rename to src/emhass/machine_learning_regressor.py index 3ffeba27..d70df3ec 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/machine_learning_regressor.py @@ -20,7 +20,7 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) -class CsvPredictor: +class MLRegressor: r""" A forecaster class using machine learning models. @@ -115,7 +115,7 @@ def fit(self, date_features: Optional[list] = []) -> None: self.data_exo.reset_index(drop=True, inplace=True) if len(date_features) > 0: if self.timestamp is not None: - self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp) + self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp) else: self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") diff --git a/src/emhass/utils.py b/src/emhass/utils.py index ddc834ae..5f9f249b 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -155,7 +155,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic freq = int(retrieve_hass_conf['freq'].seconds/60.0) delta_forecast = int(optim_conf['delta_forecast'].days) forecast_dates = get_forecast_dates(freq, delta_forecast) - if set_type == "csv-model-fit": + if set_type == "regressor-model-fit": csv_file = runtimeparams['csv_file'] independent_variables = runtimeparams['independent_variables'] dependent_variable = runtimeparams['dependent_variable'] @@ -173,7 +173,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic date_features = runtimeparams['date_features'] params['passed_data']['date_features'] = date_features - if set_type == "csv-model-predict": + if set_type == "regressor-model-predict": new_values = runtimeparams['new_values'] params['passed_data']['new_values'] = new_values @@ -342,21 +342,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic else: model_predict_friendly_name = runtimeparams['model_predict_friendly_name'] params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name - if 'csv_predict_entity_id' not in runtimeparams.keys(): - csv_predict_entity_id = "sensor.csv_predictor" + if 'mlr_predict_entity_id' not in runtimeparams.keys(): + mlr_predict_entity_id = "sensor.mlr_predict" else: - csv_predict_entity_id = runtimeparams['csv_predict_entity_id'] - params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id - if 'csv_predict_unit_of_measurement' not in runtimeparams.keys(): - csv_predict_unit_of_measurement = None + mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id'] + params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id + if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys(): + mlr_predict_unit_of_measurement = None else: - csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement'] - params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement - if 'csv_predict_friendly_name' not in runtimeparams.keys(): - csv_predict_friendly_name = "Csv predictor" + mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement'] + params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement + if 'mlr_predict_friendly_name' not in runtimeparams.keys(): + mlr_predict_friendly_name = "mlr predictor" else: - csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name'] - params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name + mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name'] + params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name # Treat optimization configuration parameters passed at runtime if 'num_def_loads' in runtimeparams.keys(): optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index 8db64091..cdb98b00 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -13,7 +13,7 @@ from emhass.command_line import set_input_data_dict from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune -from emhass.command_line import csv_model_fit, csv_model_predict +from emhass.command_line import regressor_model_fit, regressor_model_predict from emhass.command_line import publish_data from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \ get_injection_dict_forecast_model_tune, build_params @@ -190,15 +190,15 @@ def action_call(action_name): if not checkFileLog(ActionStr): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) - elif action_name == 'csv-model-fit': - app.logger.info(" >> Performing a csv fit...") - csv_model_fit(input_data_dict, app.logger) - msg = f'EMHASS >> Action csv-fit executed... \n' + elif action_name == 'regressor-model-fit': + app.logger.info(" >> Performing a regressor fit...") + regressor_model_fit(input_data_dict, app.logger) + msg = f'EMHASS >> Action regressor-fit executed... \n' return make_response(msg, 201) - elif action_name == 'csv-model-predict': - app.logger.info(" >> Performing a csv predict...") - csv_model_predict(input_data_dict, app.logger) - msg = f'EMHASS >> Action csv-predict executed... \n' + elif action_name == 'regressor-model-predict': + app.logger.info(" >> Performing a regressor predict...") + regressor_model_predict(input_data_dict, app.logger) + msg = f'EMHASS >> Action regressor-predict executed... \n' return make_response(msg, 201) else: app.logger.error("ERROR: passed action is not valid") From 5b168cd68c3a978a4d4e1c2a008185122db6b1ef Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 13:13:51 +0100 Subject: [PATCH 016/111] change naming and some formatting --- src/emhass/command_line.py | 1019 +++++++++++------- src/emhass/machine_learning_regressor.py | 285 +++-- src/emhass/retrieve_hass.py | 372 +++++-- src/emhass/utils.py | 1238 ++++++++++++++-------- 4 files changed, 1882 insertions(+), 1032 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 17be0098..b4a9050c 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -8,14 +8,15 @@ import json import copy import pickle -import time -import numpy as np -import pandas as pd from datetime import datetime, timezone from typing import Optional, Tuple +from importlib.metadata import version +import numpy as np +import pandas as pd + from distutils.util import strtobool -from importlib.metadata import version + from emhass.retrieve_hass import RetrieveHass from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster @@ -24,12 +25,19 @@ from emhass import utils -def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, - params: str, runtimeparams: str, set_type: str, logger: logging.Logger, - get_data_from_file: Optional[bool] = False) -> dict: +def set_input_data_dict( + config_path: pathlib.Path, + base_path: str, + costfun: str, + params: str, + runtimeparams: str, + set_type: str, + logger: logging.Logger, + get_data_from_file: Optional[bool] = False, +) -> dict: """ Set up some of the data needed for the different actions. - + :param config_path: The complete absolute path where the config.yaml file is located :type config_path: pathlib.Path :param base_path: The parent folder of the config_path @@ -53,118 +61,196 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, logger.info("Setting up needed data") # Parsing yaml retrieve_hass_conf, optim_conf, plant_conf = utils.get_yaml_parse( - config_path, use_secrets=not(get_data_from_file), params=params) + config_path, use_secrets=not (get_data_from_file), params=params + ) # Treat runtimeparams params, retrieve_hass_conf, optim_conf, plant_conf = utils.treat_runtimeparams( - runtimeparams, params, retrieve_hass_conf, - optim_conf, plant_conf, set_type, logger) + runtimeparams, + params, + retrieve_hass_conf, + optim_conf, + plant_conf, + set_type, + logger, + ) # Define main objects - rh = RetrieveHass(retrieve_hass_conf['hass_url'], retrieve_hass_conf['long_lived_token'], - retrieve_hass_conf['freq'], retrieve_hass_conf['time_zone'], - params, base_path, logger, get_data_from_file=get_data_from_file) - fcst = Forecast(retrieve_hass_conf, optim_conf, plant_conf, - params, base_path, logger, get_data_from_file=get_data_from_file) - opt = Optimization(retrieve_hass_conf, optim_conf, plant_conf, - fcst.var_load_cost, fcst.var_prod_price, - costfun, base_path, logger) + rh = RetrieveHass( + retrieve_hass_conf["hass_url"], + retrieve_hass_conf["long_lived_token"], + retrieve_hass_conf["freq"], + retrieve_hass_conf["time_zone"], + params, + base_path, + logger, + get_data_from_file=get_data_from_file, + ) + fcst = Forecast( + retrieve_hass_conf, + optim_conf, + plant_conf, + params, + base_path, + logger, + get_data_from_file=get_data_from_file, + ) + opt = Optimization( + retrieve_hass_conf, + optim_conf, + plant_conf, + fcst.var_load_cost, + fcst.var_prod_price, + costfun, + base_path, + logger, + ) # Perform setup based on type of action if set_type == "perfect-optim": # Retrieve data from hass if get_data_from_file: - with open(pathlib.Path(base_path) / 'data' / 'test_df_final.pkl', 'rb') as inp: + with open( + pathlib.Path(base_path) / "data" / "test_df_final.pkl", "rb" + ) as inp: rh.df_final, days_list, var_list = pickle.load(inp) else: - days_list = utils.get_days_list(retrieve_hass_conf['days_to_retrieve']) - var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']] - if not rh.get_data(days_list, var_list, - minimal_response=False, significant_changes_only=False): - return False - if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'], - set_zero_min = retrieve_hass_conf['set_zero_min'], - var_replace_zero = retrieve_hass_conf['var_replace_zero'], - var_interp = retrieve_hass_conf['var_interp']): + days_list = utils.get_days_list(retrieve_hass_conf["days_to_retrieve"]) + var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]] + if not rh.get_data( + days_list, + var_list, + minimal_response=False, + significant_changes_only=False, + ): + return False + if not rh.prepare_data( + retrieve_hass_conf["var_load"], + load_negative=retrieve_hass_conf["load_negative"], + set_zero_min=retrieve_hass_conf["set_zero_min"], + var_replace_zero=retrieve_hass_conf["var_replace_zero"], + var_interp=retrieve_hass_conf["var_interp"], + ): return False df_input_data = rh.df_final.copy() # What we don't need for this type of action P_PV_forecast, P_load_forecast, df_input_data_dayahead = None, None, None elif set_type == "dayahead-optim": # Get PV and load forecasts - df_weather = fcst.get_weather_forecast(method=optim_conf['weather_forecast_method']) + df_weather = fcst.get_weather_forecast( + method=optim_conf["weather_forecast_method"] + ) P_PV_forecast = fcst.get_power_from_weather(df_weather) P_load_forecast = fcst.get_load_forecast(method=optim_conf['load_forecast_method']) if isinstance(P_load_forecast,bool) and not P_load_forecast: logger.error("Unable to get sensor power photovoltaics, or sensor power load no var loads. Check HA sensors and their daily data") return False - df_input_data_dayahead = pd.DataFrame(np.transpose(np.vstack([P_PV_forecast.values,P_load_forecast.values])), - index=P_PV_forecast.index, - columns=['P_PV_forecast', 'P_load_forecast']) + df_input_data_dayahead = pd.DataFrame( + np.transpose(np.vstack([P_PV_forecast.values, P_load_forecast.values])), + index=P_PV_forecast.index, + columns=["P_PV_forecast", "P_load_forecast"], + ) df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead) params = json.loads(params) - if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None: - prediction_horizon = params['passed_data']['prediction_horizon'] - df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]] + if ( + "prediction_horizon" in params["passed_data"] + and params["passed_data"]["prediction_horizon"] is not None + ): + prediction_horizon = params["passed_data"]["prediction_horizon"] + df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[ + df_input_data_dayahead.index[0] : df_input_data_dayahead.index[ + prediction_horizon - 1 + ] + ] # What we don't need for this type of action df_input_data, days_list = None, None elif set_type == "naive-mpc-optim": # Retrieve data from hass if get_data_from_file: - with open(pathlib.Path(base_path) / 'data' / 'test_df_final.pkl', 'rb') as inp: + with open( + pathlib.Path(base_path) / "data" / "test_df_final.pkl", "rb" + ) as inp: rh.df_final, days_list, var_list = pickle.load(inp) else: days_list = utils.get_days_list(1) - var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']] - if not rh.get_data(days_list, var_list, - minimal_response=False, significant_changes_only=False): + var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]] + if not rh.get_data( + days_list, + var_list, + minimal_response=False, + significant_changes_only=False, + ): return False - if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'], - set_zero_min = retrieve_hass_conf['set_zero_min'], - var_replace_zero = retrieve_hass_conf['var_replace_zero'], - var_interp = retrieve_hass_conf['var_interp']): + if not rh.prepare_data( + retrieve_hass_conf["var_load"], + load_negative=retrieve_hass_conf["load_negative"], + set_zero_min=retrieve_hass_conf["set_zero_min"], + var_replace_zero=retrieve_hass_conf["var_replace_zero"], + var_interp=retrieve_hass_conf["var_interp"], + ): return False df_input_data = rh.df_final.copy() # Get PV and load forecasts - df_weather = fcst.get_weather_forecast(method=optim_conf['weather_forecast_method']) - P_PV_forecast = fcst.get_power_from_weather(df_weather, set_mix_forecast=True, df_now=df_input_data) - P_load_forecast = fcst.get_load_forecast(method=optim_conf['load_forecast_method'], set_mix_forecast=True, df_now=df_input_data) + df_weather = fcst.get_weather_forecast( + method=optim_conf["weather_forecast_method"] + ) + P_PV_forecast = fcst.get_power_from_weather( + df_weather, set_mix_forecast=True, df_now=df_input_data + ) + P_load_forecast = fcst.get_load_forecast( + method=optim_conf["load_forecast_method"], + set_mix_forecast=True, + df_now=df_input_data, + ) df_input_data_dayahead = pd.concat([P_PV_forecast, P_load_forecast], axis=1) df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead) - df_input_data_dayahead.columns = ['P_PV_forecast', 'P_load_forecast'] + df_input_data_dayahead.columns = ["P_PV_forecast", "P_load_forecast"] params = json.loads(params) - if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None: - prediction_horizon = params['passed_data']['prediction_horizon'] - df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]] - elif set_type == "forecast-model-fit" or set_type == "forecast-model-predict" or set_type == "forecast-model-tune": + if ( + "prediction_horizon" in params["passed_data"] + and params["passed_data"]["prediction_horizon"] is not None + ): + prediction_horizon = params["passed_data"]["prediction_horizon"] + df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[ + df_input_data_dayahead.index[0] : df_input_data_dayahead.index[ + prediction_horizon - 1 + ] + ] + elif ( + set_type == "forecast-model-fit" + or set_type == "forecast-model-predict" + or set_type == "forecast-model-tune" + ): df_input_data_dayahead = None P_PV_forecast, P_load_forecast = None, None params = json.loads(params) # Retrieve data from hass - days_to_retrieve = params['passed_data']['days_to_retrieve'] - model_type = params['passed_data']['model_type'] - var_model = params['passed_data']['var_model'] + days_to_retrieve = params["passed_data"]["days_to_retrieve"] + model_type = params["passed_data"]["model_type"] + var_model = params["passed_data"]["var_model"] if get_data_from_file: days_list = None - filename = 'data_train_'+model_type+'.pkl' - data_path = pathlib.Path(base_path) / 'data' / filename - with open(data_path, 'rb') as inp: + filename = "data_train_" + model_type + ".pkl" + data_path = pathlib.Path(base_path) / "data" / filename + with open(data_path, "rb") as inp: df_input_data, _ = pickle.load(inp) - df_input_data = df_input_data[df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve):] + df_input_data = df_input_data[ + df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve) : + ] else: days_list = utils.get_days_list(days_to_retrieve) var_list = [var_model] if not rh.get_data(days_list, var_list): return False df_input_data = rh.df_final.copy() - + elif set_type == "regressor-model-fit": - + df_input_data_dayahead = None P_PV_forecast, P_load_forecast = None, None params = json.loads(params) days_list = None - csv_file = params['passed_data']['csv_file'] - independent_variables = params['passed_data']['independent_variables'] - dependent_variable = params['passed_data']['dependent_variable'] - timestamp = params['passed_data']['timestamp'] + csv_file = params["passed_data"]["csv_file"] + features = params["passed_data"]["features"] + target = params["passed_data"]["target"] + timestamp = params["passed_data"]["timestamp"] filename_path = pathlib.Path(base_path) / csv_file if filename_path.is_file(): df_input_data = pd.read_csv(filename_path, parse_dates=True) @@ -173,8 +259,8 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, logger.error("The cvs file was not found.") raise ValueError("The CSV file " + csv_file + " was not found.") required_columns = [] - required_columns.extend(independent_variables) - required_columns.append(dependent_variable) + required_columns.extend(features) + required_columns.append(target) if timestamp is not None: required_columns.append(timestamp) @@ -188,39 +274,46 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, P_PV_forecast, P_load_forecast = None, None days_list = None params = json.loads(params) - + elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None else: - logger.error("The passed action argument and hence the set_type parameter for setup is not valid") + logger.error( + "The passed action argument and hence the set_type parameter for setup is not valid" + ) df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None # The input data dictionnary to return input_data_dict = { - 'root': base_path, - 'retrieve_hass_conf': retrieve_hass_conf, - 'rh': rh, - 'opt': opt, - 'fcst': fcst, - 'df_input_data': df_input_data, - 'df_input_data_dayahead': df_input_data_dayahead, - 'P_PV_forecast': P_PV_forecast, - 'P_load_forecast': P_load_forecast, - 'costfun': costfun, - 'params': params, - 'days_list': days_list + "root": base_path, + "retrieve_hass_conf": retrieve_hass_conf, + "rh": rh, + "opt": opt, + "fcst": fcst, + "df_input_data": df_input_data, + "df_input_data_dayahead": df_input_data_dayahead, + "P_PV_forecast": P_PV_forecast, + "P_load_forecast": P_load_forecast, + "costfun": costfun, + "params": params, + "days_list": days_list, } return input_data_dict - -def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = True, debug: Optional[bool] = False) -> pd.DataFrame: + + +def perfect_forecast_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = True, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the perfect forecast optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -235,26 +328,38 @@ def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger, """ logger.info("Performing perfect forecast optimization") # Load cost and prod price forecast - df_input_data = input_data_dict['fcst'].get_load_cost_forecast( - input_data_dict['df_input_data'], - method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method']) - df_input_data = input_data_dict['fcst'].get_prod_price_forecast( - df_input_data, method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method']) - opt_res = input_data_dict['opt'].perform_perfect_forecast_optim(df_input_data, input_data_dict['days_list']) + df_input_data = input_data_dict["fcst"].get_load_cost_forecast( + input_data_dict["df_input_data"], + method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"], + ) + df_input_data = input_data_dict["fcst"].get_prod_price_forecast( + df_input_data, + method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"], + ) + opt_res = input_data_dict["opt"].perform_perfect_forecast_optim( + df_input_data, input_data_dict["days_list"] + ) # Save CSV file for analysis if save_data_to_file: - filename = 'opt_res_perfect_optim_'+input_data_dict['costfun']+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + filename = "opt_res_perfect_optim_" + input_data_dict["costfun"] + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: - opt_res.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp') + opt_res.to_csv( + pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp" + ) return opt_res - -def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame: + + +def dayahead_forecast_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the day-ahead optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -269,29 +374,43 @@ def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger, """ logger.info("Performing day-ahead forecast optimization") # Load cost and prod price forecast - df_input_data_dayahead = input_data_dict['fcst'].get_load_cost_forecast( - input_data_dict['df_input_data_dayahead'], - method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method']) - df_input_data_dayahead = input_data_dict['fcst'].get_prod_price_forecast( - df_input_data_dayahead, - method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method']) - opt_res_dayahead = input_data_dict['opt'].perform_dayahead_forecast_optim( - df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast']) + df_input_data_dayahead = input_data_dict["fcst"].get_load_cost_forecast( + input_data_dict["df_input_data_dayahead"], + method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"], + ) + df_input_data_dayahead = input_data_dict["fcst"].get_prod_price_forecast( + df_input_data_dayahead, + method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"], + ) + opt_res_dayahead = input_data_dict["opt"].perform_dayahead_forecast_optim( + df_input_data_dayahead, + input_data_dict["P_PV_forecast"], + input_data_dict["P_load_forecast"], + ) # Save CSV file for publish_data if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: - opt_res_dayahead.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp') + opt_res_dayahead.to_csv( + pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp" + ) return opt_res_dayahead -def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame: + +def naive_mpc_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the naive Model Predictive Controller optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -306,33 +425,50 @@ def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger, """ logger.info("Performing naive MPC optimization") # Load cost and prod price forecast - df_input_data_dayahead = input_data_dict['fcst'].get_load_cost_forecast( - input_data_dict['df_input_data_dayahead'], - method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method']) - df_input_data_dayahead = input_data_dict['fcst'].get_prod_price_forecast( - df_input_data_dayahead, method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method']) + df_input_data_dayahead = input_data_dict["fcst"].get_load_cost_forecast( + input_data_dict["df_input_data_dayahead"], + method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"], + ) + df_input_data_dayahead = input_data_dict["fcst"].get_prod_price_forecast( + df_input_data_dayahead, + method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"], + ) # The specifics params for the MPC at runtime - prediction_horizon = input_data_dict['params']['passed_data']['prediction_horizon'] - soc_init = input_data_dict['params']['passed_data']['soc_init'] - soc_final = input_data_dict['params']['passed_data']['soc_final'] - def_total_hours = input_data_dict['params']['passed_data']['def_total_hours'] - def_start_timestep = input_data_dict['params']['passed_data']['def_start_timestep'] - def_end_timestep = input_data_dict['params']['passed_data']['def_end_timestep'] - opt_res_naive_mpc = input_data_dict['opt'].perform_naive_mpc_optim( - df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast'], - prediction_horizon, soc_init, soc_final, def_total_hours, def_start_timestep, def_end_timestep) + prediction_horizon = input_data_dict["params"]["passed_data"]["prediction_horizon"] + soc_init = input_data_dict["params"]["passed_data"]["soc_init"] + soc_final = input_data_dict["params"]["passed_data"]["soc_final"] + def_total_hours = input_data_dict["params"]["passed_data"]["def_total_hours"] + def_start_timestep = input_data_dict["params"]["passed_data"]["def_start_timestep"] + def_end_timestep = input_data_dict["params"]["passed_data"]["def_end_timestep"] + opt_res_naive_mpc = input_data_dict["opt"].perform_naive_mpc_optim( + df_input_data_dayahead, + input_data_dict["P_PV_forecast"], + input_data_dict["P_load_forecast"], + prediction_horizon, + soc_init, + soc_final, + def_total_hours, + def_start_timestep, + def_end_timestep, + ) # Save CSV file for publish_data if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_naive_mpc_'+today.strftime("%Y_%m_%d")+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_naive_mpc_" + today.strftime("%Y_%m_%d") + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: - opt_res_naive_mpc.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp') + opt_res_naive_mpc.to_csv( + pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp" + ) return opt_res_naive_mpc -def forecast_model_fit(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]: + +def forecast_model_fit( + input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False +) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]: """Perform a forecast model fit from training data retrieved from Home Assistant. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -344,29 +480,37 @@ def forecast_model_fit(input_data_dict: dict, logger: logging.Logger, :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster] """ - data = copy.deepcopy(input_data_dict['df_input_data']) - model_type = input_data_dict['params']['passed_data']['model_type'] - var_model = input_data_dict['params']['passed_data']['var_model'] - sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - num_lags = input_data_dict['params']['passed_data']['num_lags'] - split_date_delta = input_data_dict['params']['passed_data']['split_date_delta'] - perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] - root = input_data_dict['root'] + data = copy.deepcopy(input_data_dict["df_input_data"]) + model_type = input_data_dict["params"]["passed_data"]["model_type"] + var_model = input_data_dict["params"]["passed_data"]["var_model"] + sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"] + num_lags = input_data_dict["params"]["passed_data"]["num_lags"] + split_date_delta = input_data_dict["params"]["passed_data"]["split_date_delta"] + perform_backtest = input_data_dict["params"]["passed_data"]["perform_backtest"] + root = input_data_dict["root"] # The ML forecaster object - mlf = MLForecaster(data, model_type, var_model, sklearn_model, num_lags, root, logger) + mlf = MLForecaster( + data, model_type, var_model, sklearn_model, num_lags, root, logger + ) # Fit the ML model - df_pred, df_pred_backtest = mlf.fit(split_date_delta=split_date_delta, - perform_backtest=perform_backtest) + df_pred, df_pred_backtest = mlf.fit( + split_date_delta=split_date_delta, perform_backtest=perform_backtest + ) # Save model if not debug: - filename = model_type+'_mlf.pkl' - with open(pathlib.Path(root) / filename, 'wb') as outp: + filename = model_type + "_mlf.pkl" + with open(pathlib.Path(root) / filename, "wb") as outp: pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred, df_pred_backtest, mlf -def forecast_model_predict(input_data_dict: dict, logger: logging.Logger, - use_last_window: Optional[bool] = True, debug: Optional[bool] = False, - mlf: Optional[MLForecaster] = None) -> pd.DataFrame: + +def forecast_model_predict( + input_data_dict: dict, + logger: logging.Logger, + use_last_window: Optional[bool] = True, + debug: Optional[bool] = False, + mlf: Optional[MLForecaster] = None, +) -> pd.DataFrame: r"""Perform a forecast model predict using a previously trained skforecast model. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -388,52 +532,79 @@ def forecast_model_predict(input_data_dict: dict, logger: logging.Logger, :rtype: pd.DataFrame """ # Load model - model_type = input_data_dict['params']['passed_data']['model_type'] - root = input_data_dict['root'] - filename = model_type+'_mlf.pkl' + model_type = input_data_dict["params"]["passed_data"]["model_type"] + root = input_data_dict["root"] + filename = model_type + "_mlf.pkl" filename_path = pathlib.Path(root) / filename if not debug: if filename_path.is_file(): - with open(filename_path, 'rb') as inp: + with open(filename_path, "rb") as inp: mlf = pickle.load(inp) else: - logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") + logger.error( + "The ML forecaster file was not found, please run a model fit method before this predict method" + ) return # Make predictions if use_last_window: - data_last_window = copy.deepcopy(input_data_dict['df_input_data']) + data_last_window = copy.deepcopy(input_data_dict["df_input_data"]) else: data_last_window = None predictions = mlf.predict(data_last_window) # Publish data to a Home Assistant sensor - model_predict_publish = input_data_dict['params']['passed_data']['model_predict_publish'] - model_predict_entity_id = input_data_dict['params']['passed_data']['model_predict_entity_id'] - model_predict_unit_of_measurement = input_data_dict['params']['passed_data']['model_predict_unit_of_measurement'] - model_predict_friendly_name = input_data_dict['params']['passed_data']['model_predict_friendly_name'] - publish_prefix = input_data_dict['params']['passed_data']['publish_prefix'] + model_predict_publish = input_data_dict["params"]["passed_data"][ + "model_predict_publish" + ] + model_predict_entity_id = input_data_dict["params"]["passed_data"][ + "model_predict_entity_id" + ] + model_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][ + "model_predict_unit_of_measurement" + ] + model_predict_friendly_name = input_data_dict["params"]["passed_data"][ + "model_predict_friendly_name" + ] + publish_prefix = input_data_dict["params"]["passed_data"]["publish_prefix"] if model_predict_publish is True: # Estimate the current index - now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0) - if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest': - idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first': - idx_closest = predictions.index.get_indexer([now_precise], method='ffill')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last': - idx_closest = predictions.index.get_indexer([now_precise], method='bfill')[0] + now_precise = datetime.now( + input_data_dict["retrieve_hass_conf"]["time_zone"] + ).replace(second=0, microsecond=0) + if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest": + idx_closest = predictions.index.get_indexer( + [now_precise], method="nearest" + )[0] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first": + idx_closest = predictions.index.get_indexer([now_precise], method="ffill")[ + 0 + ] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last": + idx_closest = predictions.index.get_indexer([now_precise], method="bfill")[ + 0 + ] if idx_closest == -1: - idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0] + idx_closest = predictions.index.get_indexer( + [now_precise], method="nearest" + )[0] # Publish Load forecast - input_data_dict['rh'].post_data(predictions, idx_closest, - model_predict_entity_id, - model_predict_unit_of_measurement, - model_predict_friendly_name, - type_var = 'mlforecaster', - publish_prefix=publish_prefix) + input_data_dict["rh"].post_data( + predictions, + idx_closest, + model_predict_entity_id, + model_predict_unit_of_measurement, + model_predict_friendly_name, + type_var="mlforecaster", + publish_prefix=publish_prefix, + ) return predictions -def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False, mlf: Optional[MLForecaster] = None - ) -> Tuple[pd.DataFrame, MLForecaster]: + +def forecast_model_tune( + input_data_dict: dict, + logger: logging.Logger, + debug: Optional[bool] = False, + mlf: Optional[MLForecaster] = None, +) -> Tuple[pd.DataFrame, MLForecaster]: """Tune a forecast model hyperparameters using bayesian optimization. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -449,28 +620,32 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, :rtype: pd.DataFrame """ # Load model - model_type = input_data_dict['params']['passed_data']['model_type'] - root = input_data_dict['root'] - filename = model_type+'_mlf.pkl' + model_type = input_data_dict["params"]["passed_data"]["model_type"] + root = input_data_dict["root"] + filename = model_type + "_mlf.pkl" filename_path = pathlib.Path(root) / filename if not debug: if filename_path.is_file(): - with open(filename_path, 'rb') as inp: + with open(filename_path, "rb") as inp: mlf = pickle.load(inp) else: - logger.error("The ML forecaster file was not found, please run a model fit method before this tune method") + logger.error( + "The ML forecaster file was not found, please run a model fit method before this tune method" + ) return None, None # Tune the model df_pred_optim = mlf.tune(debug=debug) # Save model if not debug: - filename = model_type+'_mlf.pkl' - with open(pathlib.Path(root) / filename, 'wb') as outp: + filename = model_type + "_mlf.pkl" + with open(pathlib.Path(root) / filename, "wb") as outp: pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf -def regressor_model_fit(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> None: + +def regressor_model_fit( + input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False +) -> None: """Perform a forecast model fit from training data retrieved from Home Assistant. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -480,26 +655,30 @@ def regressor_model_fit(input_data_dict: dict, logger: logging.Logger, :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional """ - data = copy.deepcopy(input_data_dict['df_input_data']) - model_type = input_data_dict['params']['passed_data']['model_type'] - sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - independent_variables = input_data_dict['params']['passed_data']['independent_variables'] - dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] - timestamp = input_data_dict['params']['passed_data']['timestamp'] - date_features = input_data_dict['params']['passed_data']['date_features'] - root = input_data_dict['root'] + data = copy.deepcopy(input_data_dict["df_input_data"]) + model_type = input_data_dict["params"]["passed_data"]["model_type"] + sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"] + features = input_data_dict["params"]["passed_data"]["features"] + target = input_data_dict["params"]["passed_data"]["target"] + timestamp = input_data_dict["params"]["passed_data"]["timestamp"] + date_features = input_data_dict["params"]["passed_data"]["date_features"] + root = input_data_dict["root"] # The MLRegressor object - mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) + mlr = MLRegressor( + data, model_type, sklearn_model, features, target, timestamp, logger + ) # Fit the ML model mlr.fit(date_features=date_features) # Save model if not debug: - filename = model_type+'_mlr.pkl' - with open(pathlib.Path(root) / filename, 'wb') as outp: + filename = model_type + "_mlr.pkl" + with open(pathlib.Path(root) / filename, "wb") as outp: pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL) -def regressor_model_predict(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> None: + +def regressor_model_predict( + input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False +) -> None: """Perform a prediction from csv file. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -509,38 +688,53 @@ def regressor_model_predict(input_data_dict: dict, logger: logging.Logger, :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional """ - model_type = input_data_dict['params']['passed_data']['model_type'] - root = input_data_dict['root'] - filename = model_type+'_mlr.pkl' + model_type = input_data_dict["params"]["passed_data"]["model_type"] + root = input_data_dict["root"] + filename = model_type + "_mlr.pkl" filename_path = pathlib.Path(root) / filename if not debug: if filename_path.is_file(): - with open(filename_path, 'rb') as inp: + with open(filename_path, "rb") as inp: mlr = pickle.load(inp) else: - logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") + logger.error( + "The ML forecaster file was not found, please run a model fit method before this predict method" + ) return - new_values = input_data_dict['params']['passed_data']['new_values'] + new_values = input_data_dict["params"]["passed_data"]["new_values"] # Predict from csv file prediction = mlr.predict(new_values) - mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id'] - mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement'] - mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name'] + mlr_predict_entity_id = input_data_dict["params"]["passed_data"][ + "mlr_predict_entity_id" + ] + mlr_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][ + "mlr_predict_unit_of_measurement" + ] + mlr_predict_friendly_name = input_data_dict["params"]["passed_data"][ + "mlr_predict_friendly_name" + ] # Publish prediction idx = 0 - input_data_dict['rh'].post_data(prediction, idx, - mlr_predict_entity_id, - mlr_predict_unit_of_measurement, - mlr_predict_friendly_name, - type_var = 'mlregressor') - -def publish_data(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, - opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame: + input_data_dict["rh"].post_data( + prediction, + idx, + mlr_predict_entity_id, + mlr_predict_unit_of_measurement, + mlr_predict_friendly_name, + type_var="mlregressor", + ) + + +def publish_data( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + opt_res_latest: Optional[pd.DataFrame] = None, +) -> pd.DataFrame: """ Publish the data obtained from the optimization results. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -554,166 +748,245 @@ def publish_data(input_data_dict: dict, logger: logging.Logger, logger.info("Publishing data to HASS instance") # Check if a day ahead optimization has been performed (read CSV file) if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv" else: - filename = 'opt_res_latest.csv' + filename = "opt_res_latest.csv" if opt_res_latest is None: - if not os.path.isfile(pathlib.Path(input_data_dict['root']) / filename): + if not os.path.isfile(pathlib.Path(input_data_dict["root"]) / filename): logger.error("File not found error, run an optimization task first.") return else: - opt_res_latest = pd.read_csv(pathlib.Path(input_data_dict['root']) / filename, index_col='timestamp') + opt_res_latest = pd.read_csv( + pathlib.Path(input_data_dict["root"]) / filename, index_col="timestamp" + ) opt_res_latest.index = pd.to_datetime(opt_res_latest.index) - opt_res_latest.index.freq = input_data_dict['retrieve_hass_conf']['freq'] + opt_res_latest.index.freq = input_data_dict["retrieve_hass_conf"]["freq"] # Estimate the current index - now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0) - if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='ffill')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='bfill')[0] + now_precise = datetime.now( + input_data_dict["retrieve_hass_conf"]["time_zone"] + ).replace(second=0, microsecond=0) + if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[ + 0 + ] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="ffill")[0] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="bfill")[0] if idx_closest == -1: - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0] + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[ + 0 + ] # Publish the data - params = json.loads(input_data_dict['params']) - publish_prefix = params['passed_data']['publish_prefix'] + params = json.loads(input_data_dict["params"]) + publish_prefix = params["passed_data"]["publish_prefix"] # Publish PV forecast - custom_pv_forecast_id = params['passed_data']['custom_pv_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_PV'], idx_closest, - custom_pv_forecast_id["entity_id"], - custom_pv_forecast_id["unit_of_measurement"], - custom_pv_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) + custom_pv_forecast_id = params["passed_data"]["custom_pv_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_PV"], + idx_closest, + custom_pv_forecast_id["entity_id"], + custom_pv_forecast_id["unit_of_measurement"], + custom_pv_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) # Publish Load forecast - custom_load_forecast_id = params['passed_data']['custom_load_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_Load'], idx_closest, - custom_load_forecast_id["entity_id"], - custom_load_forecast_id["unit_of_measurement"], - custom_load_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) - cols_published = ['P_PV', 'P_Load'] + custom_load_forecast_id = params["passed_data"]["custom_load_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_Load"], + idx_closest, + custom_load_forecast_id["entity_id"], + custom_load_forecast_id["unit_of_measurement"], + custom_load_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) + cols_published = ["P_PV", "P_Load"] # Publish deferrable loads - custom_deferrable_forecast_id = params['passed_data']['custom_deferrable_forecast_id'] - for k in range(input_data_dict['opt'].optim_conf['num_def_loads']): + custom_deferrable_forecast_id = params["passed_data"][ + "custom_deferrable_forecast_id" + ] + for k in range(input_data_dict["opt"].optim_conf["num_def_loads"]): if "P_deferrable{}".format(k) not in opt_res_latest.columns: - logger.error("P_deferrable{}".format(k)+" was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.") + logger.error( + "P_deferrable{}".format(k) + + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution." + ) else: - input_data_dict['rh'].post_data(opt_res_latest["P_deferrable{}".format(k)], idx_closest, - custom_deferrable_forecast_id[k]["entity_id"], - custom_deferrable_forecast_id[k]["unit_of_measurement"], - custom_deferrable_forecast_id[k]["friendly_name"], - type_var = 'deferrable', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_deferrable{}".format(k)] + input_data_dict["rh"].post_data( + opt_res_latest["P_deferrable{}".format(k)], + idx_closest, + custom_deferrable_forecast_id[k]["entity_id"], + custom_deferrable_forecast_id[k]["unit_of_measurement"], + custom_deferrable_forecast_id[k]["friendly_name"], + type_var="deferrable", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_deferrable{}".format(k)] # Publish battery power - if input_data_dict['opt'].optim_conf['set_use_battery']: - if 'P_batt' not in opt_res_latest.columns: - logger.error("P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.") + if input_data_dict["opt"].optim_conf["set_use_battery"]: + if "P_batt" not in opt_res_latest.columns: + logger.error( + "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution." + ) else: - custom_batt_forecast_id = params['passed_data']['custom_batt_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_batt'], idx_closest, - custom_batt_forecast_id["entity_id"], - custom_batt_forecast_id["unit_of_measurement"], - custom_batt_forecast_id["friendly_name"], - type_var = 'batt', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_batt"] - custom_batt_soc_forecast_id = params['passed_data']['custom_batt_soc_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['SOC_opt']*100, idx_closest, - custom_batt_soc_forecast_id["entity_id"], - custom_batt_soc_forecast_id["unit_of_measurement"], - custom_batt_soc_forecast_id["friendly_name"], - type_var = 'SOC', - publish_prefix = publish_prefix) - cols_published = cols_published+["SOC_opt"] + custom_batt_forecast_id = params["passed_data"]["custom_batt_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_batt"], + idx_closest, + custom_batt_forecast_id["entity_id"], + custom_batt_forecast_id["unit_of_measurement"], + custom_batt_forecast_id["friendly_name"], + type_var="batt", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_batt"] + custom_batt_soc_forecast_id = params["passed_data"][ + "custom_batt_soc_forecast_id" + ] + input_data_dict["rh"].post_data( + opt_res_latest["SOC_opt"] * 100, + idx_closest, + custom_batt_soc_forecast_id["entity_id"], + custom_batt_soc_forecast_id["unit_of_measurement"], + custom_batt_soc_forecast_id["friendly_name"], + type_var="SOC", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["SOC_opt"] # Publish grid power - custom_grid_forecast_id = params['passed_data']['custom_grid_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_grid'], idx_closest, - custom_grid_forecast_id["entity_id"], - custom_grid_forecast_id["unit_of_measurement"], - custom_grid_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_grid"] + custom_grid_forecast_id = params["passed_data"]["custom_grid_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_grid"], + idx_closest, + custom_grid_forecast_id["entity_id"], + custom_grid_forecast_id["unit_of_measurement"], + custom_grid_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_grid"] # Publish total value of cost function - custom_cost_fun_id = params['passed_data']['custom_cost_fun_id'] - col_cost_fun = [i for i in opt_res_latest.columns if 'cost_fun_' in i] - input_data_dict['rh'].post_data(opt_res_latest[col_cost_fun], idx_closest, - custom_cost_fun_id["entity_id"], - custom_cost_fun_id["unit_of_measurement"], - custom_cost_fun_id["friendly_name"], - type_var = 'cost_fun', - publish_prefix = publish_prefix) + custom_cost_fun_id = params["passed_data"]["custom_cost_fun_id"] + col_cost_fun = [i for i in opt_res_latest.columns if "cost_fun_" in i] + input_data_dict["rh"].post_data( + opt_res_latest[col_cost_fun], + idx_closest, + custom_cost_fun_id["entity_id"], + custom_cost_fun_id["unit_of_measurement"], + custom_cost_fun_id["friendly_name"], + type_var="cost_fun", + publish_prefix=publish_prefix, + ) # Publish the optimization status - custom_cost_fun_id = params['passed_data']['custom_optim_status_id'] + custom_cost_fun_id = params["passed_data"]["custom_optim_status_id"] if "optim_status" not in opt_res_latest: - opt_res_latest["optim_status"] = 'Optimal' - logger.warning("no optim_status in opt_res_latest, run an optimization task first") - input_data_dict['rh'].post_data(opt_res_latest['optim_status'], idx_closest, - custom_cost_fun_id["entity_id"], - custom_cost_fun_id["unit_of_measurement"], - custom_cost_fun_id["friendly_name"], - type_var = 'optim_status', - publish_prefix = publish_prefix) - cols_published = cols_published+["optim_status"] + opt_res_latest["optim_status"] = "Optimal" + logger.warning( + "no optim_status in opt_res_latest, run an optimization task first" + ) + input_data_dict["rh"].post_data( + opt_res_latest["optim_status"], + idx_closest, + custom_cost_fun_id["entity_id"], + custom_cost_fun_id["unit_of_measurement"], + custom_cost_fun_id["friendly_name"], + type_var="optim_status", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["optim_status"] # Publish unit_load_cost - custom_unit_load_cost_id = params['passed_data']['custom_unit_load_cost_id'] - input_data_dict['rh'].post_data(opt_res_latest['unit_load_cost'], idx_closest, - custom_unit_load_cost_id["entity_id"], - custom_unit_load_cost_id["unit_of_measurement"], - custom_unit_load_cost_id["friendly_name"], - type_var = 'unit_load_cost', - publish_prefix = publish_prefix) - cols_published = cols_published+["unit_load_cost"] + custom_unit_load_cost_id = params["passed_data"]["custom_unit_load_cost_id"] + input_data_dict["rh"].post_data( + opt_res_latest["unit_load_cost"], + idx_closest, + custom_unit_load_cost_id["entity_id"], + custom_unit_load_cost_id["unit_of_measurement"], + custom_unit_load_cost_id["friendly_name"], + type_var="unit_load_cost", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["unit_load_cost"] # Publish unit_prod_price - custom_unit_prod_price_id = params['passed_data']['custom_unit_prod_price_id'] - input_data_dict['rh'].post_data(opt_res_latest['unit_prod_price'], idx_closest, - custom_unit_prod_price_id["entity_id"], - custom_unit_prod_price_id["unit_of_measurement"], - custom_unit_prod_price_id["friendly_name"], - type_var = 'unit_prod_price', - publish_prefix = publish_prefix) - cols_published = cols_published+["unit_prod_price"] + custom_unit_prod_price_id = params["passed_data"]["custom_unit_prod_price_id"] + input_data_dict["rh"].post_data( + opt_res_latest["unit_prod_price"], + idx_closest, + custom_unit_prod_price_id["entity_id"], + custom_unit_prod_price_id["unit_of_measurement"], + custom_unit_prod_price_id["friendly_name"], + type_var="unit_prod_price", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["unit_prod_price"] # Create a DF resuming what has been published opt_res = opt_res_latest[cols_published].loc[[opt_res_latest.index[idx_closest]]] return opt_res - - + + def main(): r"""Define the main command line entry function. This function may take several arguments as inputs. You can type `emhass --help` to see the list of options: - + - action: Set the desired action, options are: perfect-optim, dayahead-optim, naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune - + - config: Define path to the config.yaml file - + - costfun: Define the type of cost function, options are: profit, cost, self-consumption - + - log2file: Define if we should log to a file or not - + - params: Configuration parameters passed from data/options.json if using the add-on - + - runtimeparams: Pass runtime optimization parameters as dictionnary - + - debug: Use True for testing purposes - + """ # Parsing arguments parser = argparse.ArgumentParser() - parser.add_argument('--action', type=str, help='Set the desired action, options are: perfect-optim, dayahead-optim,\ - naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune') - parser.add_argument('--config', type=str, help='Define path to the config.yaml file') - parser.add_argument('--costfun', type=str, default='profit', help='Define the type of cost function, options are: profit, cost, self-consumption') - parser.add_argument('--log2file', type=strtobool, default='False', help='Define if we should log to a file or not') - parser.add_argument('--params', type=str, default=None, help='Configuration parameters passed from data/options.json') - parser.add_argument('--runtimeparams', type=str, default=None, help='Pass runtime optimization parameters as dictionnary') - parser.add_argument('--debug', type=strtobool, default='False', help='Use True for testing purposes') + parser.add_argument( + "--action", + type=str, + help="Set the desired action, options are: perfect-optim, dayahead-optim,\ + naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune", + ) + parser.add_argument( + "--config", type=str, help="Define path to the config.yaml file" + ) + parser.add_argument( + "--costfun", + type=str, + default="profit", + help="Define the type of cost function, options are: profit, cost, self-consumption", + ) + parser.add_argument( + "--log2file", + type=strtobool, + default="False", + help="Define if we should log to a file or not", + ) + parser.add_argument( + "--params", + type=str, + default=None, + help="Configuration parameters passed from data/options.json", + ) + parser.add_argument( + "--runtimeparams", + type=str, + default=None, + help="Pass runtime optimization parameters as dictionnary", + ) + parser.add_argument( + "--debug", type=strtobool, default="False", help="Use True for testing purposes" + ) args = parser.parse_args() # The path to the configuration files config_path = pathlib.Path(args.config) @@ -722,39 +995,56 @@ def main(): logger, ch = utils.get_logger(__name__, base_path, save_to_file=bool(args.log2file)) # Additionnal argument try: - parser.add_argument('--version', action='version', version='%(prog)s '+version('emhass')) + parser.add_argument( + "--version", action="version", version="%(prog)s " + version("emhass") + ) args = parser.parse_args() except Exception: - logger.info("Version not found for emhass package. Or importlib exited with PackageNotFoundError.") + logger.info( + "Version not found for emhass package. Or importlib exited with PackageNotFoundError." + ) # Setup parameters - input_data_dict = set_input_data_dict(config_path, base_path, - args.costfun, args.params, args.runtimeparams, args.action, - logger, args.debug) + input_data_dict = set_input_data_dict( + config_path, + base_path, + args.costfun, + args.params, + args.runtimeparams, + args.action, + logger, + args.debug, + ) # Perform selected action - if args.action == 'perfect-optim': + if args.action == "perfect-optim": opt_res = perfect_forecast_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'dayahead-optim': + elif args.action == "dayahead-optim": opt_res = dayahead_forecast_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'naive-mpc-optim': + elif args.action == "naive-mpc-optim": opt_res = naive_mpc_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'forecast-model-fit': - df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) + elif args.action == "forecast-model-fit": + df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit( + input_data_dict, logger, debug=args.debug + ) opt_res = None - elif args.action == 'forecast-model-predict': + elif args.action == "forecast-model-predict": if args.debug: _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) else: mlf = None - df_pred = forecast_model_predict(input_data_dict, logger, debug=args.debug, mlf=mlf) + df_pred = forecast_model_predict( + input_data_dict, logger, debug=args.debug, mlf=mlf + ) opt_res = None - elif args.action == 'forecast-model-tune': + elif args.action == "forecast-model-tune": if args.debug: _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) else: mlf = None - df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=args.debug, mlf=mlf) + df_pred_optim, mlf = forecast_model_tune( + input_data_dict, logger, debug=args.debug, mlf=mlf + ) opt_res = None - elif args.action == 'publish-data': + elif args.action == "publish-data": opt_res = publish_data(input_data_dict, logger) else: logger.error("The passed action argument is not valid") @@ -763,15 +1053,20 @@ def main(): # Flush the logger ch.close() logger.removeHandler(ch) - if args.action == 'perfect-optim' or args.action == 'dayahead-optim' or \ - args.action == 'naive-mpc-optim' or args.action == 'publish-data': + if ( + args.action == "perfect-optim" + or args.action == "dayahead-optim" + or args.action == "naive-mpc-optim" + or args.action == "publish-data" + ): return opt_res - elif args.action == 'forecast-model-fit': + elif args.action == "forecast-model-fit": return df_fit_pred, df_fit_pred_backtest, mlf - elif args.action == 'forecast-model-predict': + elif args.action == "forecast-model-predict": return df_pred - elif args.action == 'forecast-model-tune': + elif args.action == "forecast-model-tune": return df_pred_optim, mlf -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index d70df3ec..80ddd74f 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -9,8 +9,12 @@ import pandas as pd import numpy as np -from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor -from sklearn.metrics import r2_score +from sklearn.ensemble import ( + AdaBoostRegressor, + GradientBoostingRegressor, + RandomForestRegressor, +) +from sklearn.metrics import r2_score from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import GridSearchCV, train_test_split @@ -20,21 +24,31 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) + class MLRegressor: r""" A forecaster class using machine learning models. - + This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. - + It exposes two main methods: - + - `fit`: to train a model with the passed data. - + - `predict`: to obtain a forecast from a pre-trained model. - + """ - def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str, - logger: logging.Logger) -> None: + + def __init__( + self, + data, + model_type: str, + sklearn_model: str, + features: list, + target: str, + timestamp: str, + logger: logging.Logger, + ) -> None: r"""Define constructor for the forecast class. :param data: The data that will be used for train/test @@ -42,33 +56,35 @@ def __init__(self, data, model_type: str, sklearn_model: str, independent_variab :param model_type: A unique name defining this model and useful to identify \ for what it will be used for. :type model_type: str - :param independent_variables: A list of independent variables. \ + :param features: A list of features. \ Example: [`solar`, `degree_days`]. - :type independent_variables: list - :param dependent_variable: The dependent variable(to be predicted). \ + :type features: list + :param target: The target(to be predicted). \ Example: `hours`. - :type dependent_variable: str + :type target: str :param timestamp: If defined, the column key that has to be used of timestamp. :type timestamp: str :param logger: The passed logger object :type logger: logging.Logger """ self.data = data - self.independent_variables = independent_variables - self.dependent_variable = dependent_variable + self.features = features + self.target = target self.timestamp = timestamp self.model_type = model_type self.sklearn_model = sklearn_model self.logger = logger self.data.sort_index(inplace=True) - self.data = self.data[~self.data.index.duplicated(keep='first')] + self.data = self.data[~self.data.index.duplicated(keep="first")] self.data_exo = None self.steps = None self.model = None - self.grid_search =None - + self.grid_search = None + @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: + def add_date_features( + data: pd.DataFrame, date_features: list, timestamp: str + ) -> pd.DataFrame: """Add date features from the input DataFrame timestamp :param data: The input DataFrame @@ -79,179 +95,162 @@ def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) - :rtype: pd.DataFrame """ df = copy.deepcopy(data) - df[timestamp]= pd.to_datetime(df['timestamp']) - if 'year' in date_features: - df['year'] = [i.year for i in df['timestamp']] - if 'month' in date_features: - df['month'] = [i.month for i in df['timestamp']] - if 'day_of_week' in date_features: - df['day_of_week'] = [i.dayofweek for i in df['timestamp']] - if 'day_of_year' in date_features: - df['day_of_year'] = [i.dayofyear for i in df['timestamp']] - if 'day' in date_features: - df['day'] = [i.day for i in df['timestamp']] - if 'hour' in date_features: - df['hour'] = [i.day for i in df['timestamp']] + df[timestamp] = pd.to_datetime(df["timestamp"]) + if "year" in date_features: + df["year"] = [i.year for i in df["timestamp"]] + if "month" in date_features: + df["month"] = [i.month for i in df["timestamp"]] + if "day_of_week" in date_features: + df["day_of_week"] = [i.dayofweek for i in df["timestamp"]] + if "day_of_year" in date_features: + df["day_of_year"] = [i.dayofyear for i in df["timestamp"]] + if "day" in date_features: + df["day"] = [i.day for i in df["timestamp"]] + if "hour" in date_features: + df["hour"] = [i.day for i in df["timestamp"]] return df def fit(self, date_features: Optional[list] = []) -> None: """ Fit the model using the provided data. - + :param date_features: A list of 'date_features' to take into account when fitting the model. :type data: list """ - self.logger.info("Performing a csv model fit for "+self.model_type) + self.logger.info("Performing a csv model fit for " + self.model_type) self.data_exo = pd.DataFrame(self.data) - self.data_exo[self.independent_variables] = self.data[self.independent_variables] - self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] + self.data_exo[self.features] = self.data[self.features] + self.data_exo[self.target] = self.data[self.target] keep_columns = [] - keep_columns.extend(self.independent_variables) + keep_columns.extend(self.features) if self.timestamp is not None: keep_columns.append(self.timestamp) - keep_columns.append(self.dependent_variable) + keep_columns.append(self.target) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] self.data_exo.reset_index(drop=True, inplace=True) if len(date_features) > 0: if self.timestamp is not None: - self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp) + self.data_exo = MLRegressor.add_date_features( + self.data_exo, date_features, self.timestamp + ) else: - self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") + self.logger.error( + "If no timestamp provided, you can't use date_features, going further without date_features." + ) - y = self.data_exo[self.dependent_variable] - self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1) + y = self.data_exo[self.target] + self.data_exo = self.data_exo.drop(self.target, axis=1) if self.timestamp is not None: - self.data_exo = self.data_exo.drop(self.timestamp,axis=1) + self.data_exo = self.data_exo.drop(self.timestamp, axis=1) X = self.data_exo - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 + ) self.steps = len(X_test) regression_methods = { - 'LinearRegression': {"model": LinearRegression(), "param_grid": { - 'linearregression__fit_intercept': [True, False], - 'linearregression__positive': [True, False], - }}, - 'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}}, - 'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}}, - 'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}}, - 'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": { - 'gradientboostingregressor__n_estimators': [50, 100, 200], - 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - }}, - 'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": { - 'adaboostregressor__n_estimators': [50, 100, 200], - 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - }} + "LinearRegression": { + "model": LinearRegression(), + "param_grid": { + "linearregression__fit_intercept": [True, False], + "linearregression__positive": [True, False], + }, + }, + "RidgeRegression": { + "model": Ridge(), + "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, + }, + "LassoRegression": { + "model": Lasso(), + "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, + }, + "RandomForestRegression": { + "model": RandomForestRegressor(), + "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, + }, + "GradientBoostingRegression": { + "model": GradientBoostingRegressor(), + "param_grid": { + "gradientboostingregressor__n_estimators": [50, 100, 200], + "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + "AdaBoostRegression": { + "model": AdaBoostRegressor(), + "param_grid": { + "adaboostregressor__n_estimators": [50, 100, 200], + "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, } - # regression_methods = [ - # ('LinearRegression', LinearRegression(), { - # 'linearregression__fit_intercept': [True, False], - # 'linearregression__positive': [True, False], - # }), - # ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), - # ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), - # ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), - # ('GradientBoostingRegression', GradientBoostingRegressor(), { - # 'gradientboostingregressor__n_estimators': [50, 100, 200], - # 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - # }), - # ('AdaBoostRegression', AdaBoostRegressor(), { - # 'adaboostregressor__n_estimators': [50, 100, 200], - # 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - # }) - # ] - - if self.sklearn_model == 'LinearRegression': - base_model = regression_methods['LinearRegression']['model'] - param_grid = regression_methods['LinearRegression']['param_grid'] - elif self.sklearn_model == 'RidgeRegression': - base_model = regression_methods['RidgeRegression']['model'] - param_grid = regression_methods['RidgeRegression']['param_grid'] - elif self.sklearn_model == 'LassoRegression': - base_model = regression_methods['LassoRegression']['model'] - param_grid = regression_methods['LassoRegression']['param_grid'] - elif self.sklearn_model == 'RandomForestRegression': - base_model = regression_methods['RandomForestRegression']['model'] - param_grid = regression_methods['RandomForestRegression']['param_grid'] - elif self.sklearn_model == 'GradientBoostingRegression': - base_model = regression_methods['GradientBoostingRegression']['model'] - param_grid = regression_methods['GradientBoostingRegression']['param_grid'] - elif self.sklearn_model == 'AdaBoostRegression': - base_model = regression_methods['AdaBoostRegression']['model'] - param_grid = regression_methods['AdaBoostRegression']['param_grid'] + + if self.sklearn_model == "LinearRegression": + base_model = regression_methods["LinearRegression"]["model"] + param_grid = regression_methods["LinearRegression"]["param_grid"] + elif self.sklearn_model == "RidgeRegression": + base_model = regression_methods["RidgeRegression"]["model"] + param_grid = regression_methods["RidgeRegression"]["param_grid"] + elif self.sklearn_model == "LassoRegression": + base_model = regression_methods["LassoRegression"]["model"] + param_grid = regression_methods["LassoRegression"]["param_grid"] + elif self.sklearn_model == "RandomForestRegression": + base_model = regression_methods["RandomForestRegression"]["model"] + param_grid = regression_methods["RandomForestRegression"]["param_grid"] + elif self.sklearn_model == "GradientBoostingRegression": + base_model = regression_methods["GradientBoostingRegression"]["model"] + param_grid = regression_methods["GradientBoostingRegression"]["param_grid"] + elif self.sklearn_model == "AdaBoostRegression": + base_model = regression_methods["AdaBoostRegression"]["model"] + param_grid = regression_methods["AdaBoostRegression"]["param_grid"] else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - - - # Define the models - # for name, model, param_grid in regression_methods: - # self.model = make_pipeline( - # StandardScaler(), - # model - # ) - # # self.model = Pipeline([ - # # ('scaler', StandardScaler()), - # # (name, model) - # # ]) - - # # Use GridSearchCV to find the best hyperparameters for each model - # grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # grid_search.fit(X_train, y_train) - - # # Get the best model and print its mean squared error on the test set - # best_model = grid_search.best_estimator_ - # print(best_model) - # predictions = best_model.predict(X_test) - # print(predictions) - - self.model = make_pipeline( - StandardScaler(), - base_model - ) - # self.model = Pipeline([ - # ('scaler', StandardScaler()), - # ('regressor', base_model) - # ]) - # Define the parameters to tune - # param_grid = { - # 'regressor__fit_intercept': [True, False], - # 'regressor__positive': [True, False], - # } + self.logger.error( + "Passed sklearn model " + self.sklearn_model + " is not valid" + ) + + self.model = make_pipeline(StandardScaler(), base_model) # Create a grid search object - self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1) - + self.grid_search = GridSearchCV( + self.model, + param_grid, + cv=5, + scoring="neg_mean_squared_error", + refit=True, + verbose=0, + n_jobs=-1, + ) + # Fit the grid search object to the data - self.logger.info("Training a "+self.sklearn_model+" model") + self.logger.info("Training a " + self.sklearn_model + " model") start_time = time.time() self.grid_search.fit(X_train.values, y_train.values) - print("Best value for lambda : ",self.grid_search.best_params_) + print("Best value for lambda : ", self.grid_search.best_params_) print("Best score for cost function: ", self.grid_search.best_score_) self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") self.model = self.grid_search.best_estimator_ - # Make predictions predictions = self.model.predict(X_test.values) predictions = pd.Series(predictions, index=X_test.index) - pred_metric = r2_score(y_test,predictions) - self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") - + pred_metric = r2_score(y_test, predictions) + self.logger.info( + f"Prediction R2 score of fitted model on test data: {pred_metric}" + ) - def predict(self, new_values:list) -> np.ndarray: + def predict(self, new_values: list) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. - :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ + :param new_values: The new values for the features(in the same order as the features list). \ Example: [2.24, 5.68]. :type new_values: list :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - self.logger.info("Performing a prediction for "+self.model_type) + self.logger.info("Performing a prediction for " + self.model_type) new_values = np.array([new_values]) return self.model.predict(new_values) diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py index b4708f78..9f47efef 100644 --- a/src/emhass/retrieve_hass.py +++ b/src/emhass/retrieve_hass.py @@ -30,12 +30,20 @@ class RetrieveHass: """ - def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, - time_zone: datetime.timezone, params: str, base_path: str, logger: logging.Logger, - get_data_from_file: Optional[bool] = False) -> None: + def __init__( + self, + hass_url: str, + long_lived_token: str, + freq: pd.Timedelta, + time_zone: datetime.timezone, + params: str, + base_path: str, + logger: logging.Logger, + get_data_from_file: Optional[bool] = False, + ) -> None: """ Define constructor for RetrieveHass class. - + :param hass_url: The URL of the Home Assistant instance :type hass_url: str :param long_lived_token: The long lived token retrieved from the configuration pane @@ -50,7 +58,7 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, :type base_path: str :param logger: The passed logger object :type logger: logging object - :param get_data_from_file: Select if data should be retrieved from a + :param get_data_from_file: Select if data should be retrieved from a previously saved pickle useful for testing or directly from connection to hass database :type get_data_from_file: bool, optional @@ -65,9 +73,14 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, self.logger = logger self.get_data_from_file = get_data_from_file - def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: Optional[bool] = False, - significant_changes_only: Optional[bool] = False, - test_url: Optional[str] = 'empty') -> None: + def get_data( + self, + days_list: pd.date_range, + var_list: list, + minimal_response: Optional[bool] = False, + significant_changes_only: Optional[bool] = False, + test_url: Optional[str] = "empty", + ) -> None: r""" Retrieve the actual data from hass. @@ -92,20 +105,36 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O """ self.logger.info("Retrieve hass get data method initiated...") self.df_final = pd.DataFrame() - x = 0 #iterate based on days + x = 0 # iterate based on days # Looping on each day from days list for day in days_list: - + for i, var in enumerate(var_list): - - if test_url == 'empty': - if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API - url = self.hass_url+"/history/period/"+day.isoformat()+"?filter_entity_id="+var - else: # Otherwise the Home Assistant Core API it is - url = self.hass_url+"api/history/period/"+day.isoformat()+"?filter_entity_id="+var - if minimal_response: # A support for minimal response + + if test_url == "empty": + if ( + self.hass_url == "http://supervisor/core/api" + ): # If we are using the supervisor API + url = ( + self.hass_url + + "/history/period/" + + day.isoformat() + + "?filter_entity_id=" + + var + ) + else: # Otherwise the Home Assistant Core API it is + url = ( + self.hass_url + + "api/history/period/" + + day.isoformat() + + "?filter_entity_id=" + + var + ) + if minimal_response: # A support for minimal response url = url + "?minimal_response" - if significant_changes_only: # And for signicant changes only (check the HASS restful API for more info) + if ( + significant_changes_only + ): # And for signicant changes only (check the HASS restful API for more info) url = url + "?significant_changes_only" else: url = test_url @@ -116,59 +145,96 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O try: response = get(url, headers=headers) except Exception: - self.logger.error("Unable to access Home Assistance instance, check URL") - self.logger.error("If using addon, try setting url and token to 'empty'") + self.logger.error( + "Unable to access Home Assistance instance, check URL" + ) + self.logger.error( + "If using addon, try setting url and token to 'empty'" + ) return False else: if response.status_code == 401: - self.logger.error("Unable to access Home Assistance instance, TOKEN/KEY") - self.logger.error("If using addon, try setting url and token to 'empty'") + self.logger.error( + "Unable to access Home Assistance instance, TOKEN/KEY" + ) + self.logger.error( + "If using addon, try setting url and token to 'empty'" + ) return False if response.status_code > 299: return f"Request Get Error: {response.status_code}" - '''import bz2 # Uncomment to save a serialized data for tests + """import bz2 # Uncomment to save a serialized data for tests import _pickle as cPickle with bz2.BZ2File("data/test_response_get_data_get_method.pbz2", "w") as f: - cPickle.dump(response, f)''' - try: # Sometimes when there are connection problems we need to catch empty retrieved json + cPickle.dump(response, f)""" + try: # Sometimes when there are connection problems we need to catch empty retrieved json data = response.json()[0] except IndexError: if x is 0: - self.logger.error("The retrieved JSON is empty, A sensor:" + var + " may have 0 days of history or passed sensor may not be correct") + self.logger.error( + "The retrieved JSON is empty, A sensor:" + + var + + " may have 0 days of history or passed sensor may not be correct" + ) else: self.logger.error("The retrieved JSON is empty for day:"+ str(day) +", days_to_retrieve may be larger than the recorded history of sensor:" + var + " (check your recorder settings)") return False df_raw = pd.DataFrame.from_dict(data) if len(df_raw) == 0: if x is 0: - self.logger.error("The retrieved Dataframe is empty, A sensor:" + var + " may have 0 days of history or passed sensor may not be correct") + self.logger.error( + "The retrieved Dataframe is empty, A sensor:" + + var + + " may have 0 days of history or passed sensor may not be correct" + ) else: self.logger.error("Retrieved empty Dataframe for day:"+ str(day) +", days_to_retrieve may be larger than the recorded history of sensor:" + var + " (check your recorder settings)") return False - if i == 0: # Defining the DataFrame container - from_date = pd.to_datetime(df_raw['last_changed'], format="ISO8601").min() - to_date = pd.to_datetime(df_raw['last_changed'], format="ISO8601").max() - ts = pd.to_datetime(pd.date_range(start=from_date, end=to_date, freq=self.freq), - format='%Y-%d-%m %H:%M').round(self.freq, ambiguous='infer', nonexistent=self.freq) - df_day = pd.DataFrame(index = ts) + if i == 0: # Defining the DataFrame container + from_date = pd.to_datetime( + df_raw["last_changed"], format="ISO8601" + ).min() + to_date = pd.to_datetime( + df_raw["last_changed"], format="ISO8601" + ).max() + ts = pd.to_datetime( + pd.date_range(start=from_date, end=to_date, freq=self.freq), + format="%Y-%d-%m %H:%M", + ).round(self.freq, ambiguous="infer", nonexistent=self.freq) + df_day = pd.DataFrame(index=ts) # Caution with undefined string data: unknown, unavailable, etc. - df_tp = df_raw.copy()[['state']].replace( - ['unknown', 'unavailable', ''], np.nan).astype(float).rename(columns={'state': var}) + df_tp = ( + df_raw.copy()[["state"]] + .replace(["unknown", "unavailable", ""], np.nan) + .astype(float) + .rename(columns={"state": var}) + ) # Setting index, resampling and concatenation - df_tp.set_index(pd.to_datetime(df_raw['last_changed'], format="ISO8601"), inplace=True) + df_tp.set_index( + pd.to_datetime(df_raw["last_changed"], format="ISO8601"), + inplace=True, + ) df_tp = df_tp.resample(self.freq).mean() df_day = pd.concat([df_day, df_tp], axis=1) - + x += 1 self.df_final = pd.concat([self.df_final, df_day], axis=0) self.df_final = set_df_index_freq(self.df_final) if self.df_final.index.freq != self.freq: - self.logger.error("The inferred freq from data is not equal to the defined freq in passed parameters") + self.logger.error( + "The inferred freq from data is not equal to the defined freq in passed parameters" + ) return False return True - - def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set_zero_min: Optional[bool] = True, - var_replace_zero: Optional[list] = None, var_interp: Optional[list] = None) -> None: + + def prepare_data( + self, + var_load: str, + load_negative: Optional[bool] = False, + set_zero_min: Optional[bool] = True, + var_replace_zero: Optional[list] = None, + var_interp: Optional[list] = None, + ) -> None: r""" Apply some data treatment in preparation for the optimization task. @@ -192,18 +258,24 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set """ try: - if load_negative: # Apply the correct sign to load power - self.df_final[var_load+'_positive'] = -self.df_final[var_load] + if load_negative: # Apply the correct sign to load power + self.df_final[var_load + "_positive"] = -self.df_final[var_load] else: - self.df_final[var_load+'_positive'] = self.df_final[var_load] + self.df_final[var_load + "_positive"] = self.df_final[var_load] self.df_final.drop([var_load], inplace=True, axis=1) except KeyError: - self.logger.error("Variable "+var_load+" was not found. This is typically because no data could be retrieved from Home Assistant") + self.logger.error( + "Variable " + + var_load + + " was not found. This is typically because no data could be retrieved from Home Assistant" + ) return False except ValueError: - self.logger.error("sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same") - return False - if set_zero_min: # Apply minimum values + self.logger.error( + "sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same" + ) + return False + if set_zero_min: # Apply minimum values self.df_final.clip(lower=0.0, inplace=True, axis=1) self.df_final.replace(to_replace=0.0, value=np.nan, inplace=True) new_var_replace_zero = [] @@ -211,59 +283,74 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set # Just changing the names of variables to contain the fact that they are considered positive if var_replace_zero is not None: for string in var_replace_zero: - new_string = string.replace(var_load, var_load+'_positive') + new_string = string.replace(var_load, var_load + "_positive") new_var_replace_zero.append(new_string) else: new_var_replace_zero = None if var_interp is not None: for string in var_interp: - new_string = string.replace(var_load, var_load+'_positive') + new_string = string.replace(var_load, var_load + "_positive") new_var_interp.append(new_string) else: new_var_interp = None # Treating NaN replacement: either by zeros or by linear interpolation if new_var_replace_zero is not None: - self.df_final[new_var_replace_zero] = self.df_final[new_var_replace_zero].fillna(0.0) + self.df_final[new_var_replace_zero] = self.df_final[ + new_var_replace_zero + ].fillna(0.0) if new_var_interp is not None: self.df_final[new_var_interp] = self.df_final[new_var_interp].interpolate( - method='linear', axis=0, limit=None) + method="linear", axis=0, limit=None + ) self.df_final[new_var_interp] = self.df_final[new_var_interp].fillna(0.0) # Setting the correct time zone on DF index if self.time_zone is not None: self.df_final.index = self.df_final.index.tz_convert(self.time_zone) # Drop datetimeindex duplicates on final DF - self.df_final = self.df_final[~self.df_final.index.duplicated(keep='first')] + self.df_final = self.df_final[~self.df_final.index.duplicated(keep="first")] return True - + @staticmethod - def get_attr_data_dict(data_df: pd.DataFrame, idx: int, entity_id: str, - unit_of_measurement: str, friendly_name: str, - list_name: str, state: float) -> dict: - list_df = copy.deepcopy(data_df).loc[data_df.index[idx]:].reset_index() - list_df.columns = ['timestamps', entity_id] - ts_list = [str(i) for i in list_df['timestamps'].tolist()] - vals_list = [str(np.round(i,2)) for i in list_df[entity_id].tolist()] + def get_attr_data_dict( + data_df: pd.DataFrame, + idx: int, + entity_id: str, + unit_of_measurement: str, + friendly_name: str, + list_name: str, + state: float, + ) -> dict: + list_df = copy.deepcopy(data_df).loc[data_df.index[idx] :].reset_index() + list_df.columns = ["timestamps", entity_id] + ts_list = [str(i) for i in list_df["timestamps"].tolist()] + vals_list = [str(np.round(i, 2)) for i in list_df[entity_id].tolist()] forecast_list = [] for i, ts in enumerate(ts_list): datum = {} datum["date"] = ts - datum[entity_id.split('sensor.')[1]] = vals_list[i] + datum[entity_id.split("sensor.")[1]] = vals_list[i] forecast_list.append(datum) data = { "state": "{:.2f}".format(state), "attributes": { "unit_of_measurement": unit_of_measurement, "friendly_name": friendly_name, - list_name: forecast_list - } + list_name: forecast_list, + }, } return data - - def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, - unit_of_measurement: str, friendly_name: str, - type_var: str, - from_mlforecaster: Optional[bool]=False, - publish_prefix: Optional[str]="") -> None: + + def post_data( + self, + data_df: pd.DataFrame, + idx: int, + entity_id: str, + unit_of_measurement: str, + friendly_name: str, + type_var: str, + from_mlforecaster: Optional[bool] = False, + publish_prefix: Optional[str] = "", + ) -> None: r""" Post passed data to hass. @@ -286,82 +373,139 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, """ # Add a possible prefix to the entity ID - entity_id = entity_id.replace('sensor.', 'sensor.'+publish_prefix) + entity_id = entity_id.replace("sensor.", "sensor." + publish_prefix) # Set the URL - if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API - url = self.hass_url+"/states/"+entity_id - else: # Otherwise the Home Assistant Core API it is - url = self.hass_url+"api/states/"+entity_id + if ( + self.hass_url == "http://supervisor/core/api" + ): # If we are using the supervisor API + url = self.hass_url + "/states/" + entity_id + else: # Otherwise the Home Assistant Core API it is + url = self.hass_url + "api/states/" + entity_id headers = { "Authorization": "Bearer " + self.long_lived_token, "content-type": "application/json", } # Preparing the data dict to be published - if type_var == 'cost_fun': - state = np.round(data_df.sum()[0],2) - elif type_var == 'unit_load_cost' or type_var == 'unit_prod_price': - state = np.round(data_df.loc[data_df.index[idx]],4) - elif type_var == 'optim_status': + if type_var == "cost_fun": + state = np.round(data_df.sum()[0], 2) + elif type_var == "unit_load_cost" or type_var == "unit_prod_price": + state = np.round(data_df.loc[data_df.index[idx]], 4) + elif type_var == "optim_status": state = data_df.loc[data_df.index[idx]] - elif type_var == 'csv_predictor': + elif type_var == "mlregressor": state = data_df[idx] else: - state = np.round(data_df.loc[data_df.index[idx]],2) - if type_var == 'power': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "forecasts", state) - elif type_var == 'deferrable': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "deferrables_schedule", state) - elif type_var == 'batt': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "battery_scheduled_power", state) - elif type_var == 'SOC': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "battery_scheduled_soc", state) - elif type_var == 'unit_load_cost': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "unit_load_cost_forecasts", state) - elif type_var == 'unit_prod_price': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "unit_prod_price_forecasts", state) - elif type_var == 'mlforecaster': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "scheduled_forecast", state) - elif type_var == 'optim_status': + state = np.round(data_df.loc[data_df.index[idx]], 2) + if type_var == "power": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "forecasts", + state, + ) + elif type_var == "deferrable": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "deferrables_schedule", + state, + ) + elif type_var == "batt": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "battery_scheduled_power", + state, + ) + elif type_var == "SOC": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "battery_scheduled_soc", + state, + ) + elif type_var == "unit_load_cost": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "unit_load_cost_forecasts", + state, + ) + elif type_var == "unit_prod_price": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "unit_prod_price_forecasts", + state, + ) + elif type_var == "mlforecaster": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "scheduled_forecast", + state, + ) + elif type_var == "optim_status": data = { "state": state, "attributes": { "unit_of_measurement": unit_of_measurement, - "friendly_name": friendly_name - } + "friendly_name": friendly_name, + }, } - elif type_var == 'csv_predictor': + elif type_var == "mlregressor": data = { "state": state, "attributes": { "unit_of_measurement": unit_of_measurement, - "friendly_name": friendly_name - } + "friendly_name": friendly_name, + }, } else: data = { "state": "{:.2f}".format(state), "attributes": { "unit_of_measurement": unit_of_measurement, - "friendly_name": friendly_name - } + "friendly_name": friendly_name, + }, } # Actually post the data if self.get_data_from_file: - class response: pass + + class response: + pass + response.status_code = 200 response.ok = True else: response = post(url, headers=headers, data=json.dumps(data)) # Treating the response status and posting them on the logger if response.ok: - self.logger.info("Successfully posted to "+entity_id+" = "+str(state)) + self.logger.info("Successfully posted to " + entity_id + " = " + str(state)) else: - self.logger.info("The status code for received curl command response is: "+str(response.status_code)) + self.logger.info( + "The status code for received curl command response is: " + + str(response.status_code) + ) return response, data diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 5f9f249b..02db0e09 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -2,10 +2,19 @@ # -*- coding: utf-8 -*- from typing import Tuple, Optional -import numpy as np, pandas as pd -import yaml, pytz, logging, pathlib, json, copy from datetime import datetime, timedelta, timezone +import logging +import pathlib +import json +import copy +import numpy as np +import pandas as pd +import yaml +import pytz + + import plotly.express as px + pd.options.plotting.backend = "plotly" from emhass.machine_learning_forecaster import MLForecaster @@ -14,13 +23,13 @@ def get_root(file: str, num_parent: Optional[int] = 3) -> str: """ Get the root absolute path of the working directory. - + :param file: The passed file path with __file__ :return: The root path :param num_parent: The number of parents levels up to desired root folder :type num_parent: int, optional :rtype: str - + """ if num_parent == 3: root = pathlib.Path(file).resolve().parent.parent.parent @@ -32,11 +41,16 @@ def get_root(file: str, num_parent: Optional[int] = 3) -> str: raise ValueError("num_parent value not valid, must be between 1 and 3") return root -def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = True, - logging_level: Optional[str] = "DEBUG") -> Tuple[logging.Logger, logging.StreamHandler]: + +def get_logger( + fun_name: str, + config_path: str, + save_to_file: Optional[bool] = True, + logging_level: Optional[str] = "DEBUG", +) -> Tuple[logging.Logger, logging.StreamHandler]: """ Create a simple logger object. - + :param fun_name: The Python function object name where the logger will be used :type fun_name: str :param config_path: The path to the yaml configuration file @@ -45,14 +59,14 @@ def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = T :type save_to_file: bool, optional :return: The logger object and the handler :rtype: object - + """ - # create logger object + # create logger object logger = logging.getLogger(fun_name) logger.propagate = True logger.fileSetting = save_to_file if save_to_file: - ch = logging.FileHandler(config_path + '/data/logger_emhass.log') + ch = logging.FileHandler(config_path + "/data/logger_emhass.log") else: ch = logging.StreamHandler() if logging_level == "DEBUG": @@ -70,14 +84,18 @@ def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = T else: logger.setLevel(logging.DEBUG) ch.setLevel(logging.DEBUG) - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) ch.setFormatter(formatter) logger.addHandler(ch) return logger, ch -def get_forecast_dates(freq: int, delta_forecast: int, - timedelta_days: Optional[int] = 0) -> pd.core.indexes.datetimes.DatetimeIndex: + +def get_forecast_dates( + freq: int, delta_forecast: int, timedelta_days: Optional[int] = 0 +) -> pd.core.indexes.datetimes.DatetimeIndex: """ Get the date_range list of the needed future dates using the delta_forecast parameter. @@ -89,7 +107,7 @@ def get_forecast_dates(freq: int, delta_forecast: int, :type timedelta_days: Optional[int], optional :return: A list of future forecast dates. :rtype: pd.core.indexes.datetimes.DatetimeIndex - + """ freq = pd.to_timedelta(freq, "minutes") start_forecast = pd.Timestamp(datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0) @@ -99,11 +117,19 @@ def get_forecast_dates(freq: int, delta_forecast: int, freq=freq).round(freq, ambiguous='infer', nonexistent=freq) return forecast_dates -def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict, - set_type: str, logger: logging.Logger) -> Tuple[str, dict]: + +def treat_runtimeparams( + runtimeparams: str, + params: str, + retrieve_hass_conf: dict, + optim_conf: dict, + plant_conf: dict, + set_type: str, + logger: logging.Logger, +) -> Tuple[str, dict]: """ - Treat the passed optimization runtime parameters. - + Treat the passed optimization runtime parameters. + :param runtimeparams: Json string containing the runtime parameters dict. :type runtimeparams: str :param params: Configuration parameters passed from data/options.json @@ -120,310 +146,479 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic :type logger: logging.Logger :return: Returning the params and optimization parameter container. :rtype: Tuple[str, dict] - + """ - if (params != None) and (params != 'null'): + if (params != None) and (params != "null"): params = json.loads(params) else: params = {} # Some default data needed custom_deferrable_forecast_id = [] - for k in range(optim_conf['num_def_loads']): - custom_deferrable_forecast_id.append({ - "entity_id": "sensor.p_deferrable{}".format(k), - "unit_of_measurement": "W", - "friendly_name": "Deferrable Load {}".format(k) - }) - default_passed_dict = {'custom_pv_forecast_id': {"entity_id": "sensor.p_pv_forecast", "unit_of_measurement": "W", "friendly_name": "PV Power Forecast"}, - 'custom_load_forecast_id': {"entity_id": "sensor.p_load_forecast", "unit_of_measurement": "W", "friendly_name": "Load Power Forecast"}, - 'custom_batt_forecast_id': {"entity_id": "sensor.p_batt_forecast", "unit_of_measurement": "W", "friendly_name": "Battery Power Forecast"}, - 'custom_batt_soc_forecast_id': {"entity_id": "sensor.soc_batt_forecast", "unit_of_measurement": "%", "friendly_name": "Battery SOC Forecast"}, - 'custom_grid_forecast_id': {"entity_id": "sensor.p_grid_forecast", "unit_of_measurement": "W", "friendly_name": "Grid Power Forecast"}, - 'custom_cost_fun_id': {"entity_id": "sensor.total_cost_fun_value", "unit_of_measurement": "", "friendly_name": "Total cost function value"}, - 'custom_optim_status_id': {"entity_id": "sensor.optim_status", "unit_of_measurement": "", "friendly_name": "EMHASS optimization status"}, - 'custom_unit_load_cost_id': {"entity_id": "sensor.unit_load_cost", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Load Cost"}, - 'custom_unit_prod_price_id': {"entity_id": "sensor.unit_prod_price", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Prod Price"}, - 'custom_deferrable_forecast_id': custom_deferrable_forecast_id, - 'publish_prefix': ""} - if 'passed_data' in params.keys(): + for k in range(optim_conf["num_def_loads"]): + custom_deferrable_forecast_id.append( + { + "entity_id": "sensor.p_deferrable{}".format(k), + "unit_of_measurement": "W", + "friendly_name": "Deferrable Load {}".format(k), + } + ) + default_passed_dict = { + "custom_pv_forecast_id": { + "entity_id": "sensor.p_pv_forecast", + "unit_of_measurement": "W", + "friendly_name": "PV Power Forecast", + }, + "custom_load_forecast_id": { + "entity_id": "sensor.p_load_forecast", + "unit_of_measurement": "W", + "friendly_name": "Load Power Forecast", + }, + "custom_batt_forecast_id": { + "entity_id": "sensor.p_batt_forecast", + "unit_of_measurement": "W", + "friendly_name": "Battery Power Forecast", + }, + "custom_batt_soc_forecast_id": { + "entity_id": "sensor.soc_batt_forecast", + "unit_of_measurement": "%", + "friendly_name": "Battery SOC Forecast", + }, + "custom_grid_forecast_id": { + "entity_id": "sensor.p_grid_forecast", + "unit_of_measurement": "W", + "friendly_name": "Grid Power Forecast", + }, + "custom_cost_fun_id": { + "entity_id": "sensor.total_cost_fun_value", + "unit_of_measurement": "", + "friendly_name": "Total cost function value", + }, + "custom_optim_status_id": { + "entity_id": "sensor.optim_status", + "unit_of_measurement": "", + "friendly_name": "EMHASS optimization status", + }, + "custom_unit_load_cost_id": { + "entity_id": "sensor.unit_load_cost", + "unit_of_measurement": "€/kWh", + "friendly_name": "Unit Load Cost", + }, + "custom_unit_prod_price_id": { + "entity_id": "sensor.unit_prod_price", + "unit_of_measurement": "€/kWh", + "friendly_name": "Unit Prod Price", + }, + "custom_deferrable_forecast_id": custom_deferrable_forecast_id, + "publish_prefix": "", + } + if "passed_data" in params.keys(): for key, value in default_passed_dict.items(): - params['passed_data'][key] = value + params["passed_data"][key] = value else: - params['passed_data'] = default_passed_dict + params["passed_data"] = default_passed_dict if runtimeparams is not None: runtimeparams = json.loads(runtimeparams) - freq = int(retrieve_hass_conf['freq'].seconds/60.0) - delta_forecast = int(optim_conf['delta_forecast'].days) + freq = int(retrieve_hass_conf["freq"].seconds / 60.0) + delta_forecast = int(optim_conf["delta_forecast"].days) forecast_dates = get_forecast_dates(freq, delta_forecast) if set_type == "regressor-model-fit": - csv_file = runtimeparams['csv_file'] - independent_variables = runtimeparams['independent_variables'] - dependent_variable = runtimeparams['dependent_variable'] - params['passed_data']['csv_file'] = csv_file - params['passed_data']['independent_variables'] = independent_variables - params['passed_data']['dependent_variable'] = dependent_variable - if 'timestamp' not in runtimeparams.keys(): - params['passed_data']['timestamp'] = None + csv_file = runtimeparams["csv_file"] + features = runtimeparams["features"] + target = runtimeparams["target"] + params["passed_data"]["csv_file"] = csv_file + params["passed_data"]["features"] = features + params["passed_data"]["target"] = target + if "timestamp" not in runtimeparams.keys(): + params["passed_data"]["timestamp"] = None else: - timestamp = runtimeparams['timestamp'] - params['passed_data']['timestamp'] = timestamp - if 'date_features' not in runtimeparams.keys(): - params['passed_data']['date_features'] = [] + timestamp = runtimeparams["timestamp"] + params["passed_data"]["timestamp"] = timestamp + if "date_features" not in runtimeparams.keys(): + params["passed_data"]["date_features"] = [] else: - date_features = runtimeparams['date_features'] - params['passed_data']['date_features'] = date_features - + date_features = runtimeparams["date_features"] + params["passed_data"]["date_features"] = date_features + if set_type == "regressor-model-predict": - new_values = runtimeparams['new_values'] - params['passed_data']['new_values'] = new_values + new_values = runtimeparams["new_values"] + params["passed_data"]["new_values"] = new_values # Treating special data passed for MPC control case - if set_type == 'naive-mpc-optim': - if 'prediction_horizon' not in runtimeparams.keys(): - prediction_horizon = 10 # 10 time steps by default + if set_type == "naive-mpc-optim": + if "prediction_horizon" not in runtimeparams.keys(): + prediction_horizon = 10 # 10 time steps by default else: - prediction_horizon = runtimeparams['prediction_horizon'] - params['passed_data']['prediction_horizon'] = prediction_horizon - if 'soc_init' not in runtimeparams.keys(): - soc_init = plant_conf['SOCtarget'] + prediction_horizon = runtimeparams["prediction_horizon"] + params["passed_data"]["prediction_horizon"] = prediction_horizon + if "soc_init" not in runtimeparams.keys(): + soc_init = plant_conf["SOCtarget"] else: - soc_init = runtimeparams['soc_init'] - params['passed_data']['soc_init'] = soc_init - if 'soc_final' not in runtimeparams.keys(): - soc_final = plant_conf['SOCtarget'] + soc_init = runtimeparams["soc_init"] + params["passed_data"]["soc_init"] = soc_init + if "soc_final" not in runtimeparams.keys(): + soc_final = plant_conf["SOCtarget"] else: - soc_final = runtimeparams['soc_final'] - params['passed_data']['soc_final'] = soc_final - if 'def_total_hours' not in runtimeparams.keys(): - def_total_hours = optim_conf['def_total_hours'] + soc_final = runtimeparams["soc_final"] + params["passed_data"]["soc_final"] = soc_final + if "def_total_hours" not in runtimeparams.keys(): + def_total_hours = optim_conf["def_total_hours"] else: - def_total_hours = runtimeparams['def_total_hours'] - params['passed_data']['def_total_hours'] = def_total_hours - if 'def_start_timestep' not in runtimeparams.keys(): - def_start_timestep = optim_conf['def_start_timestep'] + def_total_hours = runtimeparams["def_total_hours"] + params["passed_data"]["def_total_hours"] = def_total_hours + if "def_start_timestep" not in runtimeparams.keys(): + def_start_timestep = optim_conf["def_start_timestep"] else: - def_start_timestep = runtimeparams['def_start_timestep'] - params['passed_data']['def_start_timestep'] = def_start_timestep - if 'def_end_timestep' not in runtimeparams.keys(): - def_end_timestep = optim_conf['def_end_timestep'] + def_start_timestep = runtimeparams["def_start_timestep"] + params["passed_data"]["def_start_timestep"] = def_start_timestep + if "def_end_timestep" not in runtimeparams.keys(): + def_end_timestep = optim_conf["def_end_timestep"] else: - def_end_timestep = runtimeparams['def_end_timestep'] - params['passed_data']['def_end_timestep'] = def_end_timestep - if 'alpha' not in runtimeparams.keys(): + def_end_timestep = runtimeparams["def_end_timestep"] + params["passed_data"]["def_end_timestep"] = def_end_timestep + if "alpha" not in runtimeparams.keys(): alpha = 0.5 else: - alpha = runtimeparams['alpha'] - params['passed_data']['alpha'] = alpha - if 'beta' not in runtimeparams.keys(): + alpha = runtimeparams["alpha"] + params["passed_data"]["alpha"] = alpha + if "beta" not in runtimeparams.keys(): beta = 0.5 else: - beta = runtimeparams['beta'] - params['passed_data']['beta'] = beta + beta = runtimeparams["beta"] + params["passed_data"]["beta"] = beta forecast_dates = copy.deepcopy(forecast_dates)[0:prediction_horizon] else: - params['passed_data']['prediction_horizon'] = None - params['passed_data']['soc_init'] = None - params['passed_data']['soc_final'] = None - params['passed_data']['def_total_hours'] = None - params['passed_data']['def_start_timestep'] = None - params['passed_data']['def_end_timestep'] = None - params['passed_data']['alpha'] = None - params['passed_data']['beta'] = None + params["passed_data"]["prediction_horizon"] = None + params["passed_data"]["soc_init"] = None + params["passed_data"]["soc_final"] = None + params["passed_data"]["def_total_hours"] = None + params["passed_data"]["def_start_timestep"] = None + params["passed_data"]["def_end_timestep"] = None + params["passed_data"]["alpha"] = None + params["passed_data"]["beta"] = None # Treat passed forecast data lists - if 'pv_power_forecast' in runtimeparams.keys(): - if type(runtimeparams['pv_power_forecast']) == list and len(runtimeparams['pv_power_forecast']) >= len(forecast_dates): - params['passed_data']['pv_power_forecast'] = runtimeparams['pv_power_forecast'] - optim_conf['weather_forecast_method'] = 'list' + if "pv_power_forecast" in runtimeparams.keys(): + if type(runtimeparams["pv_power_forecast"]) == list and len( + runtimeparams["pv_power_forecast"] + ) >= len(forecast_dates): + params["passed_data"]["pv_power_forecast"] = runtimeparams[ + "pv_power_forecast" + ] + optim_conf["weather_forecast_method"] = "list" else: - logger.error("ERROR: The passed data is either not a list or the length is not correct, length should be "+str(len(forecast_dates))) - logger.error("Passed type is "+str(type(runtimeparams['pv_power_forecast']))+" and length is "+str(len(runtimeparams['pv_power_forecast']))) - list_non_digits = [x for x in runtimeparams['pv_power_forecast'] if not (isinstance(x, int) or isinstance(x, float))] + logger.error( + "ERROR: The passed data is either not a list or the length is not correct, length should be " + + str(len(forecast_dates)) + ) + logger.error( + "Passed type is " + + str(type(runtimeparams["pv_power_forecast"])) + + " and length is " + + str(len(runtimeparams["pv_power_forecast"])) + ) + list_non_digits = [ + x + for x in runtimeparams["pv_power_forecast"] + if not (isinstance(x, int) or isinstance(x, float)) + ] if len(list_non_digits) > 0: - logger.warning("There are non numeric values on the passed data for pv_power_forecast, check for missing values (nans, null, etc)") + logger.warning( + "There are non numeric values on the passed data for pv_power_forecast, check for missing values (nans, null, etc)" + ) for x in list_non_digits: - logger.warning("This value in pv_power_forecast was detected as non digits: "+str(x)) + logger.warning( + "This value in pv_power_forecast was detected as non digits: " + + str(x) + ) else: - params['passed_data']['pv_power_forecast'] = None - if 'load_power_forecast' in runtimeparams.keys(): - if type(runtimeparams['load_power_forecast']) == list and len(runtimeparams['load_power_forecast']) >= len(forecast_dates): - params['passed_data']['load_power_forecast'] = runtimeparams['load_power_forecast'] - optim_conf['load_forecast_method'] = 'list' + params["passed_data"]["pv_power_forecast"] = None + if "load_power_forecast" in runtimeparams.keys(): + if type(runtimeparams["load_power_forecast"]) == list and len( + runtimeparams["load_power_forecast"] + ) >= len(forecast_dates): + params["passed_data"]["load_power_forecast"] = runtimeparams[ + "load_power_forecast" + ] + optim_conf["load_forecast_method"] = "list" else: - logger.error("ERROR: The passed data is either not a list or the length is not correct, length should be "+str(len(forecast_dates))) - logger.error("Passed type is "+str(type(runtimeparams['load_power_forecast']))+" and length is "+str(len(runtimeparams['load_power_forecast']))) - list_non_digits = [x for x in runtimeparams['load_power_forecast'] if not (isinstance(x, int) or isinstance(x, float))] + logger.error( + "ERROR: The passed data is either not a list or the length is not correct, length should be " + + str(len(forecast_dates)) + ) + logger.error( + "Passed type is " + + str(type(runtimeparams["load_power_forecast"])) + + " and length is " + + str(len(runtimeparams["load_power_forecast"])) + ) + list_non_digits = [ + x + for x in runtimeparams["load_power_forecast"] + if not (isinstance(x, int) or isinstance(x, float)) + ] if len(list_non_digits) > 0: - logger.warning("There are non numeric values on the passed data for load_power_forecast, check for missing values (nans, null, etc)") + logger.warning( + "There are non numeric values on the passed data for load_power_forecast, check for missing values (nans, null, etc)" + ) for x in list_non_digits: - logger.warning("This value in load_power_forecast was detected as non digits: "+str(x)) + logger.warning( + "This value in load_power_forecast was detected as non digits: " + + str(x) + ) else: - params['passed_data']['load_power_forecast'] = None - if 'load_cost_forecast' in runtimeparams.keys(): - if type(runtimeparams['load_cost_forecast']) == list and len(runtimeparams['load_cost_forecast']) >= len(forecast_dates): - params['passed_data']['load_cost_forecast'] = runtimeparams['load_cost_forecast'] - optim_conf['load_cost_forecast_method'] = 'list' + params["passed_data"]["load_power_forecast"] = None + if "load_cost_forecast" in runtimeparams.keys(): + if type(runtimeparams["load_cost_forecast"]) == list and len( + runtimeparams["load_cost_forecast"] + ) >= len(forecast_dates): + params["passed_data"]["load_cost_forecast"] = runtimeparams[ + "load_cost_forecast" + ] + optim_conf["load_cost_forecast_method"] = "list" else: - logger.error("ERROR: The passed data is either not a list or the length is not correct, length should be "+str(len(forecast_dates))) - logger.error("Passed type is "+str(type(runtimeparams['load_cost_forecast']))+" and length is "+str(len(runtimeparams['load_cost_forecast']))) - list_non_digits = [x for x in runtimeparams['load_cost_forecast'] if not (isinstance(x, int) or isinstance(x, float))] + logger.error( + "ERROR: The passed data is either not a list or the length is not correct, length should be " + + str(len(forecast_dates)) + ) + logger.error( + "Passed type is " + + str(type(runtimeparams["load_cost_forecast"])) + + " and length is " + + str(len(runtimeparams["load_cost_forecast"])) + ) + list_non_digits = [ + x + for x in runtimeparams["load_cost_forecast"] + if not (isinstance(x, int) or isinstance(x, float)) + ] if len(list_non_digits) > 0: - logger.warning("There are non numeric values on the passed data or load_cost_forecast, check for missing values (nans, null, etc)") + logger.warning( + "There are non numeric values on the passed data or load_cost_forecast, check for missing values (nans, null, etc)" + ) for x in list_non_digits: - logger.warning("This value in load_cost_forecast was detected as non digits: "+str(x)) + logger.warning( + "This value in load_cost_forecast was detected as non digits: " + + str(x) + ) else: - params['passed_data']['load_cost_forecast'] = None - if 'prod_price_forecast' in runtimeparams.keys(): - if type(runtimeparams['prod_price_forecast']) == list and len(runtimeparams['prod_price_forecast']) >= len(forecast_dates): - params['passed_data']['prod_price_forecast'] = runtimeparams['prod_price_forecast'] - optim_conf['prod_price_forecast_method'] = 'list' + params["passed_data"]["load_cost_forecast"] = None + if "prod_price_forecast" in runtimeparams.keys(): + if type(runtimeparams["prod_price_forecast"]) == list and len( + runtimeparams["prod_price_forecast"] + ) >= len(forecast_dates): + params["passed_data"]["prod_price_forecast"] = runtimeparams[ + "prod_price_forecast" + ] + optim_conf["prod_price_forecast_method"] = "list" else: - logger.error("ERROR: The passed data is either not a list or the length is not correct, length should be "+str(len(forecast_dates))) - logger.error("Passed type is "+str(type(runtimeparams['prod_price_forecast']))+" and length is "+str(len(runtimeparams['prod_price_forecast']))) - list_non_digits = [x for x in runtimeparams['prod_price_forecast'] if not (isinstance(x, int) or isinstance(x, float))] + logger.error( + "ERROR: The passed data is either not a list or the length is not correct, length should be " + + str(len(forecast_dates)) + ) + logger.error( + "Passed type is " + + str(type(runtimeparams["prod_price_forecast"])) + + " and length is " + + str(len(runtimeparams["prod_price_forecast"])) + ) + list_non_digits = [ + x + for x in runtimeparams["prod_price_forecast"] + if not (isinstance(x, int) or isinstance(x, float)) + ] if len(list_non_digits) > 0: - logger.warning("There are non numeric values on the passed data for prod_price_forecast, check for missing values (nans, null, etc)") + logger.warning( + "There are non numeric values on the passed data for prod_price_forecast, check for missing values (nans, null, etc)" + ) for x in list_non_digits: - logger.warning("This value in prod_price_forecast was detected as non digits: "+str(x)) + logger.warning( + "This value in prod_price_forecast was detected as non digits: " + + str(x) + ) else: - params['passed_data']['prod_price_forecast'] = None + params["passed_data"]["prod_price_forecast"] = None # Treat passed data for forecast model fit/predict/tune at runtime - if 'days_to_retrieve' not in runtimeparams.keys(): + if "days_to_retrieve" not in runtimeparams.keys(): days_to_retrieve = 9 else: - days_to_retrieve = runtimeparams['days_to_retrieve'] - params['passed_data']['days_to_retrieve'] = days_to_retrieve - if 'model_type' not in runtimeparams.keys(): + days_to_retrieve = runtimeparams["days_to_retrieve"] + params["passed_data"]["days_to_retrieve"] = days_to_retrieve + if "model_type" not in runtimeparams.keys(): model_type = "load_forecast" else: - model_type = runtimeparams['model_type'] - params['passed_data']['model_type'] = model_type - if 'var_model' not in runtimeparams.keys(): + model_type = runtimeparams["model_type"] + params["passed_data"]["model_type"] = model_type + if "var_model" not in runtimeparams.keys(): var_model = "sensor.power_load_no_var_loads" else: - var_model = runtimeparams['var_model'] - params['passed_data']['var_model'] = var_model - if 'sklearn_model' not in runtimeparams.keys(): + var_model = runtimeparams["var_model"] + params["passed_data"]["var_model"] = var_model + if "sklearn_model" not in runtimeparams.keys(): sklearn_model = "KNeighborsRegressor" else: - sklearn_model = runtimeparams['sklearn_model'] - params['passed_data']['sklearn_model'] = sklearn_model - if 'num_lags' not in runtimeparams.keys(): + sklearn_model = runtimeparams["sklearn_model"] + params["passed_data"]["sklearn_model"] = sklearn_model + if "num_lags" not in runtimeparams.keys(): num_lags = 48 else: - num_lags = runtimeparams['num_lags'] - params['passed_data']['num_lags'] = num_lags - if 'split_date_delta' not in runtimeparams.keys(): - split_date_delta = '48h' + num_lags = runtimeparams["num_lags"] + params["passed_data"]["num_lags"] = num_lags + if "split_date_delta" not in runtimeparams.keys(): + split_date_delta = "48h" else: - split_date_delta = runtimeparams['split_date_delta'] - params['passed_data']['split_date_delta'] = split_date_delta - if 'perform_backtest' not in runtimeparams.keys(): + split_date_delta = runtimeparams["split_date_delta"] + params["passed_data"]["split_date_delta"] = split_date_delta + if "perform_backtest" not in runtimeparams.keys(): perform_backtest = False else: - perform_backtest = eval(str(runtimeparams['perform_backtest']).capitalize()) - params['passed_data']['perform_backtest'] = perform_backtest - if 'model_predict_publish' not in runtimeparams.keys(): + perform_backtest = eval(str(runtimeparams["perform_backtest"]).capitalize()) + params["passed_data"]["perform_backtest"] = perform_backtest + if "model_predict_publish" not in runtimeparams.keys(): model_predict_publish = False else: - model_predict_publish = eval(str(runtimeparams['model_predict_publish']).capitalize()) - params['passed_data']['model_predict_publish'] = model_predict_publish - if 'model_predict_entity_id' not in runtimeparams.keys(): + model_predict_publish = eval( + str(runtimeparams["model_predict_publish"]).capitalize() + ) + params["passed_data"]["model_predict_publish"] = model_predict_publish + if "model_predict_entity_id" not in runtimeparams.keys(): model_predict_entity_id = "sensor.p_load_forecast_custom_model" else: - model_predict_entity_id = runtimeparams['model_predict_entity_id'] - params['passed_data']['model_predict_entity_id'] = model_predict_entity_id - if 'model_predict_unit_of_measurement' not in runtimeparams.keys(): + model_predict_entity_id = runtimeparams["model_predict_entity_id"] + params["passed_data"]["model_predict_entity_id"] = model_predict_entity_id + if "model_predict_unit_of_measurement" not in runtimeparams.keys(): model_predict_unit_of_measurement = "W" else: - model_predict_unit_of_measurement = runtimeparams['model_predict_unit_of_measurement'] - params['passed_data']['model_predict_unit_of_measurement'] = model_predict_unit_of_measurement - if 'model_predict_friendly_name' not in runtimeparams.keys(): + model_predict_unit_of_measurement = runtimeparams[ + "model_predict_unit_of_measurement" + ] + params["passed_data"][ + "model_predict_unit_of_measurement" + ] = model_predict_unit_of_measurement + if "model_predict_friendly_name" not in runtimeparams.keys(): model_predict_friendly_name = "Load Power Forecast custom ML model" else: - model_predict_friendly_name = runtimeparams['model_predict_friendly_name'] - params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name - if 'mlr_predict_entity_id' not in runtimeparams.keys(): + model_predict_friendly_name = runtimeparams["model_predict_friendly_name"] + params["passed_data"][ + "model_predict_friendly_name" + ] = model_predict_friendly_name + if "mlr_predict_entity_id" not in runtimeparams.keys(): mlr_predict_entity_id = "sensor.mlr_predict" else: - mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id'] - params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id - if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys(): + mlr_predict_entity_id = runtimeparams["mlr_predict_entity_id"] + params["passed_data"]["mlr_predict_entity_id"] = mlr_predict_entity_id + if "mlr_predict_unit_of_measurement" not in runtimeparams.keys(): mlr_predict_unit_of_measurement = None else: - mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement'] - params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement - if 'mlr_predict_friendly_name' not in runtimeparams.keys(): + mlr_predict_unit_of_measurement = runtimeparams[ + "mlr_predict_unit_of_measurement" + ] + params["passed_data"][ + "mlr_predict_unit_of_measurement" + ] = mlr_predict_unit_of_measurement + if "mlr_predict_friendly_name" not in runtimeparams.keys(): mlr_predict_friendly_name = "mlr predictor" else: - mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name'] - params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name - # Treat optimization configuration parameters passed at runtime - if 'num_def_loads' in runtimeparams.keys(): - optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] - if 'P_deferrable_nom' in runtimeparams.keys(): - optim_conf['P_deferrable_nom'] = runtimeparams['P_deferrable_nom'] - if 'def_total_hours' in runtimeparams.keys(): - optim_conf['def_total_hours'] = runtimeparams['def_total_hours'] - if 'def_start_timestep' in runtimeparams.keys(): - optim_conf['def_start_timestep'] = runtimeparams['def_start_timestep'] - if 'def_end_timestep' in runtimeparams.keys(): - optim_conf['def_end_timestep'] = runtimeparams['def_end_timestep'] - if 'treat_def_as_semi_cont' in runtimeparams.keys(): - optim_conf['treat_def_as_semi_cont'] = [eval(str(k).capitalize()) for k in runtimeparams['treat_def_as_semi_cont']] - if 'set_def_constant' in runtimeparams.keys(): - optim_conf['set_def_constant'] = [eval(str(k).capitalize()) for k in runtimeparams['set_def_constant']] - if 'solcast_api_key' in runtimeparams.keys(): - retrieve_hass_conf['solcast_api_key'] = runtimeparams['solcast_api_key'] - optim_conf['weather_forecast_method'] = 'solcast' - if 'solcast_rooftop_id' in runtimeparams.keys(): - retrieve_hass_conf['solcast_rooftop_id'] = runtimeparams['solcast_rooftop_id'] - optim_conf['weather_forecast_method'] = 'solcast' - if 'solar_forecast_kwp' in runtimeparams.keys(): - retrieve_hass_conf['solar_forecast_kwp'] = runtimeparams['solar_forecast_kwp'] - optim_conf['weather_forecast_method'] = 'solar.forecast' - if 'weight_battery_discharge' in runtimeparams.keys(): - optim_conf['weight_battery_discharge'] = runtimeparams['weight_battery_discharge'] - if 'weight_battery_charge' in runtimeparams.keys(): - optim_conf['weight_battery_charge'] = runtimeparams['weight_battery_charge'] + mlr_predict_friendly_name = runtimeparams["mlr_predict_friendly_name"] + params["passed_data"]["mlr_predict_friendly_name"] = mlr_predict_friendly_name + # Treat optimization configuration parameters passed at runtime + if "num_def_loads" in runtimeparams.keys(): + optim_conf["num_def_loads"] = runtimeparams["num_def_loads"] + if "P_deferrable_nom" in runtimeparams.keys(): + optim_conf["P_deferrable_nom"] = runtimeparams["P_deferrable_nom"] + if "def_total_hours" in runtimeparams.keys(): + optim_conf["def_total_hours"] = runtimeparams["def_total_hours"] + if "def_start_timestep" in runtimeparams.keys(): + optim_conf["def_start_timestep"] = runtimeparams["def_start_timestep"] + if "def_end_timestep" in runtimeparams.keys(): + optim_conf["def_end_timestep"] = runtimeparams["def_end_timestep"] + if "treat_def_as_semi_cont" in runtimeparams.keys(): + optim_conf["treat_def_as_semi_cont"] = [ + eval(str(k).capitalize()) + for k in runtimeparams["treat_def_as_semi_cont"] + ] + if "set_def_constant" in runtimeparams.keys(): + optim_conf["set_def_constant"] = [ + eval(str(k).capitalize()) for k in runtimeparams["set_def_constant"] + ] + if "solcast_api_key" in runtimeparams.keys(): + retrieve_hass_conf["solcast_api_key"] = runtimeparams["solcast_api_key"] + optim_conf["weather_forecast_method"] = "solcast" + if "solcast_rooftop_id" in runtimeparams.keys(): + retrieve_hass_conf["solcast_rooftop_id"] = runtimeparams[ + "solcast_rooftop_id" + ] + optim_conf["weather_forecast_method"] = "solcast" + if "solar_forecast_kwp" in runtimeparams.keys(): + retrieve_hass_conf["solar_forecast_kwp"] = runtimeparams[ + "solar_forecast_kwp" + ] + optim_conf["weather_forecast_method"] = "solar.forecast" + if "weight_battery_discharge" in runtimeparams.keys(): + optim_conf["weight_battery_discharge"] = runtimeparams[ + "weight_battery_discharge" + ] + if "weight_battery_charge" in runtimeparams.keys(): + optim_conf["weight_battery_charge"] = runtimeparams["weight_battery_charge"] # Treat plant configuration parameters passed at runtime - if 'SOCtarget' in runtimeparams.keys(): - plant_conf['SOCtarget'] = runtimeparams['SOCtarget'] + if "SOCtarget" in runtimeparams.keys(): + plant_conf["SOCtarget"] = runtimeparams["SOCtarget"] # Treat custom entities id's and friendly names for variables - if 'custom_pv_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_pv_forecast_id'] = runtimeparams['custom_pv_forecast_id'] - if 'custom_load_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_load_forecast_id'] = runtimeparams['custom_load_forecast_id'] - if 'custom_batt_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_batt_forecast_id'] = runtimeparams['custom_batt_forecast_id'] - if 'custom_batt_soc_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_batt_soc_forecast_id'] = runtimeparams['custom_batt_soc_forecast_id'] - if 'custom_grid_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_grid_forecast_id'] = runtimeparams['custom_grid_forecast_id'] - if 'custom_cost_fun_id' in runtimeparams.keys(): - params['passed_data']['custom_cost_fun_id'] = runtimeparams['custom_cost_fun_id'] - if 'custom_optim_status_id' in runtimeparams.keys(): - params['passed_data']['custom_optim_status_id'] = runtimeparams['custom_optim_status_id'] - if 'custom_unit_load_cost_id' in runtimeparams.keys(): - params['passed_data']['custom_unit_load_cost_id'] = runtimeparams['custom_unit_load_cost_id'] - if 'custom_unit_prod_price_id' in runtimeparams.keys(): - params['passed_data']['custom_unit_prod_price_id'] = runtimeparams['custom_unit_prod_price_id'] - if 'custom_deferrable_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_deferrable_forecast_id'] = runtimeparams['custom_deferrable_forecast_id'] + if "custom_pv_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_pv_forecast_id"] = runtimeparams[ + "custom_pv_forecast_id" + ] + if "custom_load_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_load_forecast_id"] = runtimeparams[ + "custom_load_forecast_id" + ] + if "custom_batt_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_batt_forecast_id"] = runtimeparams[ + "custom_batt_forecast_id" + ] + if "custom_batt_soc_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_batt_soc_forecast_id"] = runtimeparams[ + "custom_batt_soc_forecast_id" + ] + if "custom_grid_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_grid_forecast_id"] = runtimeparams[ + "custom_grid_forecast_id" + ] + if "custom_cost_fun_id" in runtimeparams.keys(): + params["passed_data"]["custom_cost_fun_id"] = runtimeparams[ + "custom_cost_fun_id" + ] + if "custom_optim_status_id" in runtimeparams.keys(): + params["passed_data"]["custom_optim_status_id"] = runtimeparams[ + "custom_optim_status_id" + ] + if "custom_unit_load_cost_id" in runtimeparams.keys(): + params["passed_data"]["custom_unit_load_cost_id"] = runtimeparams[ + "custom_unit_load_cost_id" + ] + if "custom_unit_prod_price_id" in runtimeparams.keys(): + params["passed_data"]["custom_unit_prod_price_id"] = runtimeparams[ + "custom_unit_prod_price_id" + ] + if "custom_deferrable_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_deferrable_forecast_id"] = runtimeparams[ + "custom_deferrable_forecast_id" + ] # A condition to put a prefix on all published data - if 'publish_prefix' not in runtimeparams.keys(): + if "publish_prefix" not in runtimeparams.keys(): publish_prefix = "" else: - publish_prefix = runtimeparams['publish_prefix'] - params['passed_data']['publish_prefix'] = publish_prefix + publish_prefix = runtimeparams["publish_prefix"] + params["passed_data"]["publish_prefix"] = publish_prefix # Serialize the final params params = json.dumps(params) return params, retrieve_hass_conf, optim_conf, plant_conf -def get_yaml_parse(config_path: str, use_secrets: Optional[bool] = True, - params: Optional[str] = None) -> Tuple[dict, dict, dict]: + +def get_yaml_parse( + config_path: str, use_secrets: Optional[bool] = True, params: Optional[str] = None +) -> Tuple[dict, dict, dict]: """ Perform parsing of the config.yaml file. - + :param config_path: The path to the yaml configuration file :type config_path: str :param use_secrets: Indicate if we should use a secrets file or not. @@ -437,49 +632,54 @@ def get_yaml_parse(config_path: str, use_secrets: Optional[bool] = True, """ base = config_path.parent if params is None: - with open(config_path, 'r') as file: + with open(config_path, "r") as file: input_conf = yaml.load(file, Loader=yaml.FullLoader) else: input_conf = json.loads(params) if use_secrets: if params is None: - with open(base / 'secrets_emhass.yaml', 'r') as file: + with open(base / "secrets_emhass.yaml", "r") as file: input_secrets = yaml.load(file, Loader=yaml.FullLoader) else: - input_secrets = input_conf.pop('params_secrets', None) - - if (type(input_conf['retrieve_hass_conf']) == list): #if using old config version - retrieve_hass_conf = dict({key:d[key] for d in input_conf['retrieve_hass_conf'] for key in d}) + input_secrets = input_conf.pop("params_secrets", None) + + if type(input_conf["retrieve_hass_conf"]) == list: # if using old config version + retrieve_hass_conf = dict( + {key: d[key] for d in input_conf["retrieve_hass_conf"] for key in d} + ) else: - retrieve_hass_conf = input_conf.get('retrieve_hass_conf', {}) - + retrieve_hass_conf = input_conf.get("retrieve_hass_conf", {}) + if use_secrets: retrieve_hass_conf.update(input_secrets) else: - retrieve_hass_conf['hass_url'] = 'http://supervisor/core/api' - retrieve_hass_conf['long_lived_token'] = '${SUPERVISOR_TOKEN}' - retrieve_hass_conf['time_zone'] = 'Europe/Paris' - retrieve_hass_conf['lat'] = 45.83 - retrieve_hass_conf['lon'] = 6.86 - retrieve_hass_conf['alt'] = 4807.8 - retrieve_hass_conf['freq'] = pd.to_timedelta(retrieve_hass_conf['freq'], "minutes") - retrieve_hass_conf['time_zone'] = pytz.timezone(retrieve_hass_conf['time_zone']) - - if (type(input_conf['optim_conf']) == list): - optim_conf = dict({key:d[key] for d in input_conf['optim_conf'] for key in d}) + retrieve_hass_conf["hass_url"] = "http://supervisor/core/api" + retrieve_hass_conf["long_lived_token"] = "${SUPERVISOR_TOKEN}" + retrieve_hass_conf["time_zone"] = "Europe/Paris" + retrieve_hass_conf["lat"] = 45.83 + retrieve_hass_conf["lon"] = 6.86 + retrieve_hass_conf["alt"] = 4807.8 + retrieve_hass_conf["freq"] = pd.to_timedelta(retrieve_hass_conf["freq"], "minutes") + retrieve_hass_conf["time_zone"] = pytz.timezone(retrieve_hass_conf["time_zone"]) + + if type(input_conf["optim_conf"]) == list: + optim_conf = dict({key: d[key] for d in input_conf["optim_conf"] for key in d}) else: - optim_conf = input_conf.get('optim_conf', {}) + optim_conf = input_conf.get("optim_conf", {}) - optim_conf['list_hp_periods'] = dict((key,d[key]) for d in optim_conf['list_hp_periods'] for key in d) - optim_conf['delta_forecast'] = pd.Timedelta(days=optim_conf['delta_forecast']) - - if (type(input_conf['plant_conf']) == list): - plant_conf = dict({key:d[key] for d in input_conf['plant_conf'] for key in d}) + optim_conf["list_hp_periods"] = dict( + (key, d[key]) for d in optim_conf["list_hp_periods"] for key in d + ) + optim_conf["delta_forecast"] = pd.Timedelta(days=optim_conf["delta_forecast"]) + + if type(input_conf["plant_conf"]) == list: + plant_conf = dict({key: d[key] for d in input_conf["plant_conf"] for key in d}) else: - plant_conf = input_conf.get('plant_conf', {}) - + plant_conf = input_conf.get("plant_conf", {}) + return retrieve_hass_conf, optim_conf, plant_conf + def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dict: """ Build a dictionary with graphs and tables for the webui. @@ -490,61 +690,86 @@ def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dic :type plot_size: Optional[int], optional :return: A dictionary containing the graphs and tables in html format :rtype: dict - + """ - cols_p = [i for i in df.columns.to_list() if 'P_' in i] + cols_p = [i for i in df.columns.to_list() if "P_" in i] # Let's round the data in the DF - optim_status = df['optim_status'].unique().item() - df.drop('optim_status', axis=1, inplace=True) - cols_else = [i for i in df.columns.to_list() if 'P_' not in i] + optim_status = df["optim_status"].unique().item() + df.drop("optim_status", axis=1, inplace=True) + cols_else = [i for i in df.columns.to_list() if "P_" not in i] df = df.apply(pd.to_numeric) df[cols_p] = df[cols_p].astype(int) df[cols_else] = df[cols_else].round(3) # Create plots n_colors = len(cols_p) - colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)]) - fig_0 = px.line(df[cols_p], title='Systems powers schedule after optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_0.update_layout(xaxis_title='Timestamp', yaxis_title='System powers (W)') - if 'SOC_opt' in df.columns.to_list(): - fig_1 = px.line(df['SOC_opt'], title='Battery state of charge schedule after optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_1.update_layout(xaxis_title='Timestamp', yaxis_title='Battery SOC (%)') - cols_cost = [i for i in df.columns.to_list() if 'cost_' in i or 'unit_' in i] + colors = px.colors.sample_colorscale( + "jet", [n / (n_colors - 1) for n in range(n_colors)] + ) + fig_0 = px.line( + df[cols_p], + title="Systems powers schedule after optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_0.update_layout(xaxis_title="Timestamp", yaxis_title="System powers (W)") + if "SOC_opt" in df.columns.to_list(): + fig_1 = px.line( + df["SOC_opt"], + title="Battery state of charge schedule after optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_1.update_layout(xaxis_title="Timestamp", yaxis_title="Battery SOC (%)") + cols_cost = [i for i in df.columns.to_list() if "cost_" in i or "unit_" in i] n_colors = len(cols_cost) - colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)]) - fig_2 = px.line(df[cols_cost], title='Systems costs obtained from optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_2.update_layout(xaxis_title='Timestamp', yaxis_title='System costs (currency)') + colors = px.colors.sample_colorscale( + "jet", [n / (n_colors - 1) for n in range(n_colors)] + ) + fig_2 = px.line( + df[cols_cost], + title="Systems costs obtained from optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_2.update_layout(xaxis_title="Timestamp", yaxis_title="System costs (currency)") # Get full path to image - image_path_0 = fig_0.to_html(full_html=False, default_width='75%') - if 'SOC_opt' in df.columns.to_list(): - image_path_1 = fig_1.to_html(full_html=False, default_width='75%') - image_path_2 = fig_2.to_html(full_html=False, default_width='75%') + image_path_0 = fig_0.to_html(full_html=False, default_width="75%") + if "SOC_opt" in df.columns.to_list(): + image_path_1 = fig_1.to_html(full_html=False, default_width="75%") + image_path_2 = fig_2.to_html(full_html=False, default_width="75%") # The tables - table1 = df.reset_index().to_html(classes='mystyle', index=False) - cost_cols = [i for i in df.columns if 'cost_' in i] + table1 = df.reset_index().to_html(classes="mystyle", index=False) + cost_cols = [i for i in df.columns if "cost_" in i] table2 = df[cost_cols].reset_index().sum(numeric_only=True) - table2['optim_status'] = optim_status - table2 = table2.to_frame(name='Value').reset_index(names='Variable').to_html(classes='mystyle', index=False) + table2["optim_status"] = optim_status + table2 = ( + table2.to_frame(name="Value") + .reset_index(names="Variable") + .to_html(classes="mystyle", index=False) + ) # The dict of plots injection_dict = {} - injection_dict['title'] = '

EMHASS optimization results

' - injection_dict['subsubtitle0'] = '

Plotting latest optimization results

' - injection_dict['figure_0'] = image_path_0 - if 'SOC_opt' in df.columns.to_list(): - injection_dict['figure_1'] = image_path_1 - injection_dict['figure_2'] = image_path_2 - injection_dict['subsubtitle1'] = '

Last run optimization results table

' - injection_dict['table1'] = table1 - injection_dict['subsubtitle2'] = '

Summary table for latest optimization results

' - injection_dict['table2'] = table2 + injection_dict["title"] = "

EMHASS optimization results

" + injection_dict["subsubtitle0"] = "

Plotting latest optimization results

" + injection_dict["figure_0"] = image_path_0 + if "SOC_opt" in df.columns.to_list(): + injection_dict["figure_1"] = image_path_1 + injection_dict["figure_2"] = image_path_2 + injection_dict["subsubtitle1"] = "

Last run optimization results table

" + injection_dict["table1"] = table1 + injection_dict["subsubtitle2"] = ( + "

Summary table for latest optimization results

" + ) + injection_dict["table2"] = table2 return injection_dict -def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLForecaster) -> dict: + +def get_injection_dict_forecast_model_fit( + df_fit_pred: pd.DataFrame, mlf: MLForecaster +) -> dict: """ Build a dictionary with graphs and tables for the webui for special MLF fit case. @@ -556,19 +781,26 @@ def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLFore :rtype: dict """ fig = df_fit_pred.plot() - fig.layout.template = 'presentation' - fig.update_yaxes(title_text = mlf.model_type) - fig.update_xaxes(title_text = "Time") - image_path_0 = fig.to_html(full_html=False, default_width='75%') + fig.layout.template = "presentation" + fig.update_yaxes(title_text=mlf.model_type) + fig.update_xaxes(title_text="Time") + image_path_0 = fig.to_html(full_html=False, default_width="75%") # The dict of plots injection_dict = {} - injection_dict['title'] = '

Custom machine learning forecast model fit

' - injection_dict['subsubtitle0'] = '

Plotting train/test forecast model results for '+mlf.model_type+'

' - injection_dict['subsubtitle0'] = '

Forecasting variable '+mlf.var_model+'

' - injection_dict['figure_0'] = image_path_0 + injection_dict["title"] = "

Custom machine learning forecast model fit

" + injection_dict["subsubtitle0"] = ( + "

Plotting train/test forecast model results for " + mlf.model_type + "

" + ) + injection_dict["subsubtitle0"] = ( + "

Forecasting variable " + mlf.var_model + "

" + ) + injection_dict["figure_0"] = image_path_0 return injection_dict -def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLForecaster) -> dict: + +def get_injection_dict_forecast_model_tune( + df_pred_optim: pd.DataFrame, mlf: MLForecaster +) -> dict: """ Build a dictionary with graphs and tables for the webui for special MLF tune case. @@ -580,19 +812,32 @@ def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLF :rtype: dict """ fig = df_pred_optim.plot() - fig.layout.template = 'presentation' - fig.update_yaxes(title_text = mlf.model_type) - fig.update_xaxes(title_text = "Time") - image_path_0 = fig.to_html(full_html=False, default_width='75%') + fig.layout.template = "presentation" + fig.update_yaxes(title_text=mlf.model_type) + fig.update_xaxes(title_text="Time") + image_path_0 = fig.to_html(full_html=False, default_width="75%") # The dict of plots injection_dict = {} - injection_dict['title'] = '

Custom machine learning forecast model tune

' - injection_dict['subsubtitle0'] = '

Performed a tuning routine using bayesian optimization for '+mlf.model_type+'

' - injection_dict['subsubtitle0'] = '

Forecasting variable '+mlf.var_model+'

' - injection_dict['figure_0'] = image_path_0 + injection_dict["title"] = "

Custom machine learning forecast model tune

" + injection_dict["subsubtitle0"] = ( + "

Performed a tuning routine using bayesian optimization for " + + mlf.model_type + + "

" + ) + injection_dict["subsubtitle0"] = ( + "

Forecasting variable " + mlf.var_model + "

" + ) + injection_dict["figure_0"] = image_path_0 return injection_dict -def build_params(params: dict, params_secrets: dict, options: dict, addon: int, logger: logging.Logger) -> dict: + +def build_params( + params: dict, + params_secrets: dict, + options: dict, + addon: int, + logger: logging.Logger, +) -> dict: """ Build the main params dictionary from the loaded options.json when using the add-on. @@ -611,90 +856,241 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int, """ if addon == 1: # Updating variables in retrieve_hass_conf - params['retrieve_hass_conf']['freq'] = options.get('optimization_time_step',params['retrieve_hass_conf']['freq']) - params['retrieve_hass_conf']['days_to_retrieve'] = options.get('historic_days_to_retrieve',params['retrieve_hass_conf']['days_to_retrieve']) - params['retrieve_hass_conf']['var_PV'] = options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV']) - params['retrieve_hass_conf']['var_load'] = options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load']) - params['retrieve_hass_conf']['load_negative'] = options.get('load_negative',params['retrieve_hass_conf']['load_negative']) - params['retrieve_hass_conf']['set_zero_min'] = options.get('set_zero_min',params['retrieve_hass_conf']['set_zero_min']) - params['retrieve_hass_conf']['var_replace_zero'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_replace_zero'])] - params['retrieve_hass_conf']['var_interp'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV']), options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load'])] - params['retrieve_hass_conf']['method_ts_round'] = options.get('method_ts_round',params['retrieve_hass_conf']['method_ts_round']) + params["retrieve_hass_conf"]["freq"] = options.get( + "optimization_time_step", params["retrieve_hass_conf"]["freq"] + ) + params["retrieve_hass_conf"]["days_to_retrieve"] = options.get( + "historic_days_to_retrieve", + params["retrieve_hass_conf"]["days_to_retrieve"], + ) + params["retrieve_hass_conf"]["var_PV"] = options.get( + "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"] + ) + params["retrieve_hass_conf"]["var_load"] = options.get( + "sensor_power_load_no_var_loads", params["retrieve_hass_conf"]["var_load"] + ) + params["retrieve_hass_conf"]["load_negative"] = options.get( + "load_negative", params["retrieve_hass_conf"]["load_negative"] + ) + params["retrieve_hass_conf"]["set_zero_min"] = options.get( + "set_zero_min", params["retrieve_hass_conf"]["set_zero_min"] + ) + params["retrieve_hass_conf"]["var_replace_zero"] = [ + options.get( + "sensor_power_photovoltaics", + params["retrieve_hass_conf"]["var_replace_zero"], + ) + ] + params["retrieve_hass_conf"]["var_interp"] = [ + options.get( + "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"] + ), + options.get( + "sensor_power_load_no_var_loads", + params["retrieve_hass_conf"]["var_load"], + ), + ] + params["retrieve_hass_conf"]["method_ts_round"] = options.get( + "method_ts_round", params["retrieve_hass_conf"]["method_ts_round"] + ) # Update params Secrets if specified - params['params_secrets'] = params_secrets - params['params_secrets']['time_zone'] = options.get('time_zone',params_secrets['time_zone']) - params['params_secrets']['lat'] = options.get('Latitude',params_secrets['lat']) - params['params_secrets']['lon'] = options.get('Longitude',params_secrets['lon']) - params['params_secrets']['alt'] = options.get('Altitude',params_secrets['alt']) + params["params_secrets"] = params_secrets + params["params_secrets"]["time_zone"] = options.get( + "time_zone", params_secrets["time_zone"] + ) + params["params_secrets"]["lat"] = options.get("Latitude", params_secrets["lat"]) + params["params_secrets"]["lon"] = options.get( + "Longitude", params_secrets["lon"] + ) + params["params_secrets"]["alt"] = options.get("Altitude", params_secrets["alt"]) # Updating variables in optim_conf - params['optim_conf']['set_use_battery'] = options.get('set_use_battery',params['optim_conf']['set_use_battery']) - params['optim_conf']['num_def_loads'] = options.get('number_of_deferrable_loads',params['optim_conf']['num_def_loads']) - if options.get('list_nominal_power_of_deferrable_loads',None) != None: - params['optim_conf']['P_deferrable_nom'] = [i['nominal_power_of_deferrable_loads'] for i in options.get('list_nominal_power_of_deferrable_loads')] - if options.get('list_operating_hours_of_each_deferrable_load',None) != None: - params['optim_conf']['def_total_hours'] = [i['operating_hours_of_each_deferrable_load'] for i in options.get('list_operating_hours_of_each_deferrable_load')] - if options.get('list_treat_deferrable_load_as_semi_cont',None) != None: - params['optim_conf']['treat_def_as_semi_cont'] = [i['treat_deferrable_load_as_semi_cont'] for i in options.get('list_treat_deferrable_load_as_semi_cont')] - params['optim_conf']['weather_forecast_method'] = options.get('weather_forecast_method',params['optim_conf']['weather_forecast_method']) + params["optim_conf"]["set_use_battery"] = options.get( + "set_use_battery", params["optim_conf"]["set_use_battery"] + ) + params["optim_conf"]["num_def_loads"] = options.get( + "number_of_deferrable_loads", params["optim_conf"]["num_def_loads"] + ) + if options.get("list_nominal_power_of_deferrable_loads", None) != None: + params["optim_conf"]["P_deferrable_nom"] = [ + i["nominal_power_of_deferrable_loads"] + for i in options.get("list_nominal_power_of_deferrable_loads") + ] + if options.get("list_operating_hours_of_each_deferrable_load", None) != None: + params["optim_conf"]["def_total_hours"] = [ + i["operating_hours_of_each_deferrable_load"] + for i in options.get("list_operating_hours_of_each_deferrable_load") + ] + if options.get("list_treat_deferrable_load_as_semi_cont", None) != None: + params["optim_conf"]["treat_def_as_semi_cont"] = [ + i["treat_deferrable_load_as_semi_cont"] + for i in options.get("list_treat_deferrable_load_as_semi_cont") + ] + params["optim_conf"]["weather_forecast_method"] = options.get( + "weather_forecast_method", params["optim_conf"]["weather_forecast_method"] + ) # Update optional param secrets - if params['optim_conf']['weather_forecast_method'] == "solcast": - params['params_secrets']['solcast_api_key'] = options.get('optional_solcast_api_key',params_secrets.get('solcast_api_key',"123456")) - params['params_secrets']['solcast_rooftop_id'] = options.get('optional_solcast_rooftop_id',params_secrets.get('solcast_rooftop_id',"123456")) - elif params['optim_conf']['weather_forecast_method'] == "solar.forecast": - params['params_secrets']['solar_forecast_kwp'] = options.get('optional_solar_forecast_kwp',params_secrets.get('solar_forecast_kwp',5)) - params['optim_conf']['load_forecast_method'] = options.get('load_forecast_method',params['optim_conf']['load_forecast_method']) - params['optim_conf']['delta_forecast'] = options.get('delta_forecast_daily',params['optim_conf']['delta_forecast']) - params['optim_conf']['load_cost_forecast_method'] = options.get('load_cost_forecast_method',params['optim_conf']['load_cost_forecast_method']) - if options.get('list_set_deferrable_load_single_constant',None) != None: - params['optim_conf']['set_def_constant'] = [i['set_deferrable_load_single_constant'] for i in options.get('list_set_deferrable_load_single_constant')] - if options.get('list_peak_hours_periods_start_hours',None) != None and options.get('list_peak_hours_periods_end_hours',None) != None: - start_hours_list = [i['peak_hours_periods_start_hours'] for i in options['list_peak_hours_periods_start_hours']] - end_hours_list = [i['peak_hours_periods_end_hours'] for i in options['list_peak_hours_periods_end_hours']] + if params["optim_conf"]["weather_forecast_method"] == "solcast": + params["params_secrets"]["solcast_api_key"] = options.get( + "optional_solcast_api_key", + params_secrets.get("solcast_api_key", "123456"), + ) + params["params_secrets"]["solcast_rooftop_id"] = options.get( + "optional_solcast_rooftop_id", + params_secrets.get("solcast_rooftop_id", "123456"), + ) + elif params["optim_conf"]["weather_forecast_method"] == "solar.forecast": + params["params_secrets"]["solar_forecast_kwp"] = options.get( + "optional_solar_forecast_kwp", + params_secrets.get("solar_forecast_kwp", 5), + ) + params["optim_conf"]["load_forecast_method"] = options.get( + "load_forecast_method", params["optim_conf"]["load_forecast_method"] + ) + params["optim_conf"]["delta_forecast"] = options.get( + "delta_forecast_daily", params["optim_conf"]["delta_forecast"] + ) + params["optim_conf"]["load_cost_forecast_method"] = options.get( + "load_cost_forecast_method", + params["optim_conf"]["load_cost_forecast_method"], + ) + if options.get("list_set_deferrable_load_single_constant", None) != None: + params["optim_conf"]["set_def_constant"] = [ + i["set_deferrable_load_single_constant"] + for i in options.get("list_set_deferrable_load_single_constant") + ] + if ( + options.get("list_peak_hours_periods_start_hours", None) != None + and options.get("list_peak_hours_periods_end_hours", None) != None + ): + start_hours_list = [ + i["peak_hours_periods_start_hours"] + for i in options["list_peak_hours_periods_start_hours"] + ] + end_hours_list = [ + i["peak_hours_periods_end_hours"] + for i in options["list_peak_hours_periods_end_hours"] + ] num_peak_hours = len(start_hours_list) - list_hp_periods_list = [{'period_hp_'+str(i+1):[{'start':start_hours_list[i]},{'end':end_hours_list[i]}]} for i in range(num_peak_hours)] - params['optim_conf']['list_hp_periods'] = list_hp_periods_list - params['optim_conf']['load_cost_hp'] = options.get('load_peak_hours_cost',params['optim_conf']['load_cost_hp']) - params['optim_conf']['load_cost_hc'] = options.get('load_offpeak_hours_cost', params['optim_conf']['load_cost_hc']) - params['optim_conf']['prod_price_forecast_method'] = options.get('production_price_forecast_method', params['optim_conf']['prod_price_forecast_method']) - params['optim_conf']['prod_sell_price'] = options.get('photovoltaic_production_sell_price',params['optim_conf']['prod_sell_price']) - params['optim_conf']['set_total_pv_sell'] = options.get('set_total_pv_sell',params['optim_conf']['set_total_pv_sell']) - params['optim_conf']['lp_solver'] = options.get('lp_solver',params['optim_conf']['lp_solver']) - params['optim_conf']['lp_solver_path'] = options.get('lp_solver_path',params['optim_conf']['lp_solver_path']) - params['optim_conf']['set_nocharge_from_grid'] = options.get('set_nocharge_from_grid',params['optim_conf']['set_nocharge_from_grid']) - params['optim_conf']['set_nodischarge_to_grid'] = options.get('set_nodischarge_to_grid',params['optim_conf']['set_nodischarge_to_grid']) - params['optim_conf']['set_battery_dynamic'] = options.get('set_battery_dynamic',params['optim_conf']['set_battery_dynamic']) - params['optim_conf']['battery_dynamic_max'] = options.get('battery_dynamic_max',params['optim_conf']['battery_dynamic_max']) - params['optim_conf']['battery_dynamic_min'] = options.get('battery_dynamic_min',params['optim_conf']['battery_dynamic_min']) - params['optim_conf']['weight_battery_discharge'] = options.get('weight_battery_discharge',params['optim_conf']['weight_battery_discharge']) - params['optim_conf']['weight_battery_charge'] = options.get('weight_battery_charge',params['optim_conf']['weight_battery_charge']) - if options.get('list_start_timesteps_of_each_deferrable_load',None) != None: - params['optim_conf']['def_start_timestep'] = [i['start_timesteps_of_each_deferrable_load'] for i in options.get('list_start_timesteps_of_each_deferrable_load')] - if options.get('list_end_timesteps_of_each_deferrable_load',None) != None: - params['optim_conf']['def_end_timestep'] = [i['end_timesteps_of_each_deferrable_load'] for i in options.get('list_end_timesteps_of_each_deferrable_load')] - # Updating variables in plant_con - params['plant_conf']['P_grid_max'] = options.get('maximum_power_from_grid',params['plant_conf']['P_grid_max']) - if options.get('list_pv_module_model',None) != None: - params['plant_conf']['module_model'] = [i['pv_module_model'] for i in options.get('list_pv_module_model')] - if options.get('list_pv_inverter_model',None) != None: - params['plant_conf']['inverter_model'] = [i['pv_inverter_model'] for i in options.get('list_pv_inverter_model')] - if options.get('list_surface_tilt',None) != None: - params['plant_conf']['surface_tilt'] = [i['surface_tilt'] for i in options.get('list_surface_tilt')] - if options.get('list_surface_azimuth',None) != None: - params['plant_conf']['surface_azimuth'] = [i['surface_azimuth'] for i in options.get('list_surface_azimuth')] - if options.get('list_modules_per_string',None) != None: - params['plant_conf']['modules_per_string'] = [i['modules_per_string'] for i in options.get('list_modules_per_string')] - if options.get('list_strings_per_inverter',None) != None: - params['plant_conf']['strings_per_inverter'] = [i['strings_per_inverter'] for i in options.get('list_strings_per_inverter')] - params['plant_conf']['Pd_max'] = options.get('battery_discharge_power_max',params['plant_conf']['Pd_max']) - params['plant_conf']['Pc_max'] = options.get('battery_charge_power_max',params['plant_conf']['Pc_max']) - params['plant_conf']['eta_disch'] = options.get('battery_discharge_efficiency',params['plant_conf']['eta_disch']) - params['plant_conf']['eta_ch'] = options.get('battery_charge_efficiency',params['plant_conf']['eta_ch']) - params['plant_conf']['Enom'] = options.get('battery_nominal_energy_capacity',params['plant_conf']['Enom']) - params['plant_conf']['SOCmin'] = options.get('battery_minimum_state_of_charge',params['plant_conf']['SOCmin']) - params['plant_conf']['SOCmax'] = options.get('battery_maximum_state_of_charge',params['plant_conf']['SOCmax']) - params['plant_conf']['SOCtarget'] = options.get('battery_target_state_of_charge',params['plant_conf']['SOCtarget']) - - # Check parameter lists have the same amounts as deferrable loads + list_hp_periods_list = [ + { + "period_hp_" + + str(i + 1): [ + {"start": start_hours_list[i]}, + {"end": end_hours_list[i]}, + ] + } + for i in range(num_peak_hours) + ] + params["optim_conf"]["list_hp_periods"] = list_hp_periods_list + params["optim_conf"]["load_cost_hp"] = options.get( + "load_peak_hours_cost", params["optim_conf"]["load_cost_hp"] + ) + params["optim_conf"]["load_cost_hc"] = options.get( + "load_offpeak_hours_cost", params["optim_conf"]["load_cost_hc"] + ) + params["optim_conf"]["prod_price_forecast_method"] = options.get( + "production_price_forecast_method", + params["optim_conf"]["prod_price_forecast_method"], + ) + params["optim_conf"]["prod_sell_price"] = options.get( + "photovoltaic_production_sell_price", + params["optim_conf"]["prod_sell_price"], + ) + params["optim_conf"]["set_total_pv_sell"] = options.get( + "set_total_pv_sell", params["optim_conf"]["set_total_pv_sell"] + ) + params["optim_conf"]["lp_solver"] = options.get( + "lp_solver", params["optim_conf"]["lp_solver"] + ) + params["optim_conf"]["lp_solver_path"] = options.get( + "lp_solver_path", params["optim_conf"]["lp_solver_path"] + ) + params["optim_conf"]["set_nocharge_from_grid"] = options.get( + "set_nocharge_from_grid", params["optim_conf"]["set_nocharge_from_grid"] + ) + params["optim_conf"]["set_nodischarge_to_grid"] = options.get( + "set_nodischarge_to_grid", params["optim_conf"]["set_nodischarge_to_grid"] + ) + params["optim_conf"]["set_battery_dynamic"] = options.get( + "set_battery_dynamic", params["optim_conf"]["set_battery_dynamic"] + ) + params["optim_conf"]["battery_dynamic_max"] = options.get( + "battery_dynamic_max", params["optim_conf"]["battery_dynamic_max"] + ) + params["optim_conf"]["battery_dynamic_min"] = options.get( + "battery_dynamic_min", params["optim_conf"]["battery_dynamic_min"] + ) + params["optim_conf"]["weight_battery_discharge"] = options.get( + "weight_battery_discharge", params["optim_conf"]["weight_battery_discharge"] + ) + params["optim_conf"]["weight_battery_charge"] = options.get( + "weight_battery_charge", params["optim_conf"]["weight_battery_charge"] + ) + if options.get("list_start_timesteps_of_each_deferrable_load", None) != None: + params["optim_conf"]["def_start_timestep"] = [ + i["start_timesteps_of_each_deferrable_load"] + for i in options.get("list_start_timesteps_of_each_deferrable_load") + ] + if options.get("list_end_timesteps_of_each_deferrable_load", None) != None: + params["optim_conf"]["def_end_timestep"] = [ + i["end_timesteps_of_each_deferrable_load"] + for i in options.get("list_end_timesteps_of_each_deferrable_load") + ] + # Updating variables in plant_con + params["plant_conf"]["P_grid_max"] = options.get( + "maximum_power_from_grid", params["plant_conf"]["P_grid_max"] + ) + if options.get("list_pv_module_model", None) != None: + params["plant_conf"]["module_model"] = [ + i["pv_module_model"] for i in options.get("list_pv_module_model") + ] + if options.get("list_pv_inverter_model", None) != None: + params["plant_conf"]["inverter_model"] = [ + i["pv_inverter_model"] for i in options.get("list_pv_inverter_model") + ] + if options.get("list_surface_tilt", None) != None: + params["plant_conf"]["surface_tilt"] = [ + i["surface_tilt"] for i in options.get("list_surface_tilt") + ] + if options.get("list_surface_azimuth", None) != None: + params["plant_conf"]["surface_azimuth"] = [ + i["surface_azimuth"] for i in options.get("list_surface_azimuth") + ] + if options.get("list_modules_per_string", None) != None: + params["plant_conf"]["modules_per_string"] = [ + i["modules_per_string"] for i in options.get("list_modules_per_string") + ] + if options.get("list_strings_per_inverter", None) != None: + params["plant_conf"]["strings_per_inverter"] = [ + i["strings_per_inverter"] + for i in options.get("list_strings_per_inverter") + ] + params["plant_conf"]["Pd_max"] = options.get( + "battery_discharge_power_max", params["plant_conf"]["Pd_max"] + ) + params["plant_conf"]["Pc_max"] = options.get( + "battery_charge_power_max", params["plant_conf"]["Pc_max"] + ) + params["plant_conf"]["eta_disch"] = options.get( + "battery_discharge_efficiency", params["plant_conf"]["eta_disch"] + ) + params["plant_conf"]["eta_ch"] = options.get( + "battery_charge_efficiency", params["plant_conf"]["eta_ch"] + ) + params["plant_conf"]["Enom"] = options.get( + "battery_nominal_energy_capacity", params["plant_conf"]["Enom"] + ) + params["plant_conf"]["SOCmin"] = options.get( + "battery_minimum_state_of_charge", params["plant_conf"]["SOCmin"] + ) + params["plant_conf"]["SOCmax"] = options.get( + "battery_maximum_state_of_charge", params["plant_conf"]["SOCmax"] + ) + params["plant_conf"]["SOCtarget"] = options.get( + "battery_target_state_of_charge", params["plant_conf"]["SOCtarget"] + ) + + # Check parameter lists have the same amounts as deferrable loads # If not, set defaults it fill in gaps if params['optim_conf']['num_def_loads'] is not len(params['optim_conf']['def_start_timestep']): logger.warning("def_start_timestep / list_start_timesteps_of_each_deferrable_load does not match number in num_def_loads, adding default values to parameter") @@ -721,20 +1117,35 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int, for x in range(len(params['optim_conf']['P_deferrable_nom']), params['optim_conf']['num_def_loads']): params['optim_conf']['P_deferrable_nom'].append(0) # days_to_retrieve should be no less then 2 - if params['retrieve_hass_conf']['days_to_retrieve'] < 2: - params['retrieve_hass_conf']['days_to_retrieve'] = 2 - logger.warning("days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history") + if params["retrieve_hass_conf"]["days_to_retrieve"] < 2: + params["retrieve_hass_conf"]["days_to_retrieve"] = 2 + logger.warning( + "days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history" + ) else: - params['params_secrets'] = params_secrets + params["params_secrets"] = params_secrets # The params dict - params['passed_data'] = {'pv_power_forecast':None,'load_power_forecast':None,'load_cost_forecast':None,'prod_price_forecast':None, - 'prediction_horizon':None,'soc_init':None,'soc_final':None,'def_total_hours':None,'def_start_timestep':None,'def_end_timestep':None,'alpha':None,'beta':None} + params["passed_data"] = { + "pv_power_forecast": None, + "load_power_forecast": None, + "load_cost_forecast": None, + "prod_price_forecast": None, + "prediction_horizon": None, + "soc_init": None, + "soc_final": None, + "def_total_hours": None, + "def_start_timestep": None, + "def_end_timestep": None, + "alpha": None, + "beta": None, + } return params + def get_days_list(days_to_retrieve: int) -> pd.date_range: """ Get list of past days from today to days_to_retrieve. - + :param days_to_retrieve: Total number of days to retrieve from the past :type days_to_retrieve: int :return: The list of days @@ -743,19 +1154,20 @@ def get_days_list(days_to_retrieve: int) -> pd.date_range: """ today = datetime.now(timezone.utc).replace(minute=0, second=0, microsecond=0) d = (today - timedelta(days=days_to_retrieve)).isoformat() - days_list = pd.date_range(start=d, end=today.isoformat(), freq='D') - + days_list = pd.date_range(start=d, end=today.isoformat(), freq="D") + return days_list + def set_df_index_freq(df: pd.DataFrame) -> pd.DataFrame: """ Set the freq of a DataFrame DateTimeIndex. - + :param df: Input DataFrame :type df: pd.DataFrame :return: Input DataFrame with freq defined :rtype: pd.DataFrame - + """ idx_diff = np.diff(df.index) sampling = pd.to_timedelta(np.median(idx_diff)) From 4ced7571faab7a11030270e7e5a97e2f89ad2788 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 14:55:30 +0100 Subject: [PATCH 017/111] sklearn-model -> regression-model --- src/emhass/command_line.py | 4 ++-- src/emhass/utils.py | 5 +++++ src/emhass/web_server.py | 8 ++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index b4a9050c..1706d34c 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -657,7 +657,7 @@ def regressor_model_fit( """ data = copy.deepcopy(input_data_dict["df_input_data"]) model_type = input_data_dict["params"]["passed_data"]["model_type"] - sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"] + regression_model = input_data_dict["params"]["passed_data"]["regression_model"] features = input_data_dict["params"]["passed_data"]["features"] target = input_data_dict["params"]["passed_data"]["target"] timestamp = input_data_dict["params"]["passed_data"]["timestamp"] @@ -665,7 +665,7 @@ def regressor_model_fit( root = input_data_dict["root"] # The MLRegressor object mlr = MLRegressor( - data, model_type, sklearn_model, features, target, timestamp, logger + data, model_type, regression_model, features, target, timestamp, logger ) # Fit the ML model mlr.fit(date_features=date_features) diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 02db0e09..3886686f 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -457,6 +457,11 @@ def treat_runtimeparams( else: sklearn_model = runtimeparams["sklearn_model"] params["passed_data"]["sklearn_model"] = sklearn_model + if "regression_model" not in runtimeparams.keys(): + regression_model = "LinearRegression" + else: + regression_model = runtimeparams["regression_model"] + params["passed_data"]["regression_model"] = regression_model if "num_lags" not in runtimeparams.keys(): num_lags = 48 else: diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index cdb98b00..64c690a9 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -191,14 +191,14 @@ def action_call(action_name): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) elif action_name == 'regressor-model-fit': - app.logger.info(" >> Performing a regressor fit...") + app.logger.info(" >> Performing a machine learning regressor fit...") regressor_model_fit(input_data_dict, app.logger) - msg = f'EMHASS >> Action regressor-fit executed... \n' + msg = f'EMHASS >> Action regressor-model-fit executed... \n' return make_response(msg, 201) elif action_name == 'regressor-model-predict': - app.logger.info(" >> Performing a regressor predict...") + app.logger.info(" >> Performing a machine learning regressor predict...") regressor_model_predict(input_data_dict, app.logger) - msg = f'EMHASS >> Action regressor-predict executed... \n' + msg = f'EMHASS >> Action regressor-model-predict executed... \n' return make_response(msg, 201) else: app.logger.error("ERROR: passed action is not valid") From 1cb2ed5682ee9831a9c62da069c9d71f4efe83c7 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 15:18:57 +0100 Subject: [PATCH 018/111] REGRESSION_METHODS const --- src/emhass/machine_learning_regressor.py | 127 ++++++++++++----------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index 80ddd74f..9e7795d0 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -24,6 +24,41 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) +REGRESSION_METHODS = { + "LinearRegression": { + "model": LinearRegression(), + "param_grid": { + "linearregression__fit_intercept": [True, False], + "linearregression__positive": [True, False], + }, + }, + "RidgeRegression": { + "model": Ridge(), + "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, + }, + "LassoRegression": { + "model": Lasso(), + "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, + }, + "RandomForestRegression": { + "model": RandomForestRegressor(), + "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, + }, + "GradientBoostingRegression": { + "model": GradientBoostingRegressor(), + "param_grid": { + "gradientboostingregressor__n_estimators": [50, 100, 200], + "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + "AdaBoostRegression": { + "model": AdaBoostRegressor(), + "param_grid": { + "adaboostregressor__n_estimators": [50, 100, 200], + "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + } class MLRegressor: r""" @@ -43,7 +78,7 @@ def __init__( self, data, model_type: str, - sklearn_model: str, + regression_model: str, features: list, target: str, timestamp: str, @@ -56,11 +91,15 @@ def __init__( :param model_type: A unique name defining this model and useful to identify \ for what it will be used for. :type model_type: str + :param regression_model: The model that will be used. For now only \ + this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \ + `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`. + :type regression_model: str :param features: A list of features. \ - Example: [`solar`, `degree_days`]. + Example: [`solar_production`, `degree_days`]. :type features: list :param target: The target(to be predicted). \ - Example: `hours`. + Example: `heating_hours`. :type target: str :param timestamp: If defined, the column key that has to be used of timestamp. :type timestamp: str @@ -72,7 +111,7 @@ def __init__( self.target = target self.timestamp = timestamp self.model_type = model_type - self.sklearn_model = sklearn_model + self.regression_model = regression_model self.logger = logger self.data.sort_index(inplace=True) self.data = self.data[~self.data.index.duplicated(keep="first")] @@ -111,7 +150,7 @@ def add_date_features( return df - def fit(self, date_features: Optional[list] = []) -> None: + def fit(self, date_features: Optional[list] = None) -> None: """ Fit the model using the provided data. @@ -129,7 +168,7 @@ def fit(self, date_features: Optional[list] = []) -> None: keep_columns.append(self.target) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] self.data_exo.reset_index(drop=True, inplace=True) - if len(date_features) > 0: + if date_features is not None: if self.timestamp is not None: self.data_exo = MLRegressor.add_date_features( self.data_exo, date_features, self.timestamp @@ -150,63 +189,27 @@ def fit(self, date_features: Optional[list] = []) -> None: ) self.steps = len(X_test) - regression_methods = { - "LinearRegression": { - "model": LinearRegression(), - "param_grid": { - "linearregression__fit_intercept": [True, False], - "linearregression__positive": [True, False], - }, - }, - "RidgeRegression": { - "model": Ridge(), - "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, - }, - "LassoRegression": { - "model": Lasso(), - "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, - }, - "RandomForestRegression": { - "model": RandomForestRegressor(), - "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, - }, - "GradientBoostingRegression": { - "model": GradientBoostingRegressor(), - "param_grid": { - "gradientboostingregressor__n_estimators": [50, 100, 200], - "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - "AdaBoostRegression": { - "model": AdaBoostRegressor(), - "param_grid": { - "adaboostregressor__n_estimators": [50, 100, 200], - "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - } - - if self.sklearn_model == "LinearRegression": - base_model = regression_methods["LinearRegression"]["model"] - param_grid = regression_methods["LinearRegression"]["param_grid"] - elif self.sklearn_model == "RidgeRegression": - base_model = regression_methods["RidgeRegression"]["model"] - param_grid = regression_methods["RidgeRegression"]["param_grid"] - elif self.sklearn_model == "LassoRegression": - base_model = regression_methods["LassoRegression"]["model"] - param_grid = regression_methods["LassoRegression"]["param_grid"] - elif self.sklearn_model == "RandomForestRegression": - base_model = regression_methods["RandomForestRegression"]["model"] - param_grid = regression_methods["RandomForestRegression"]["param_grid"] - elif self.sklearn_model == "GradientBoostingRegression": - base_model = regression_methods["GradientBoostingRegression"]["model"] - param_grid = regression_methods["GradientBoostingRegression"]["param_grid"] - elif self.sklearn_model == "AdaBoostRegression": - base_model = regression_methods["AdaBoostRegression"]["model"] - param_grid = regression_methods["AdaBoostRegression"]["param_grid"] + if self.regression_model == "LinearRegression": + base_model = REGRESSION_METHODS["LinearRegression"]["model"] + param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"] + elif self.regression_model == "RidgeRegression": + base_model = REGRESSION_METHODS["RidgeRegression"]["model"] + param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"] + elif self.regression_model == "LassoRegression": + base_model = REGRESSION_METHODS["LassoRegression"]["model"] + param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"] + elif self.regression_model == "RandomForestRegression": + base_model = REGRESSION_METHODS["RandomForestRegression"]["model"] + param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"] + elif self.regression_model == "GradientBoostingRegression": + base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"] + param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"] + elif self.regression_model == "AdaBoostRegression": + base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"] + param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] else: self.logger.error( - "Passed sklearn model " + self.sklearn_model + " is not valid" + "Passed sklearn model " + self.regression_model + " is not valid" ) self.model = make_pipeline(StandardScaler(), base_model) @@ -223,7 +226,7 @@ def fit(self, date_features: Optional[list] = []) -> None: ) # Fit the grid search object to the data - self.logger.info("Training a " + self.sklearn_model + " model") + self.logger.info("Training a " + self.regression_model + " model") start_time = time.time() self.grid_search.fit(X_train.values, y_train.values) print("Best value for lambda : ", self.grid_search.best_params_) From 40adc0fff5baa1d8b865dd3378ca0a867eeb6c45 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 11:12:28 +0100 Subject: [PATCH 019/111] Some cleanup --- src/emhass/machine_learning_regressor.py | 220 +++++++++++++---------- 1 file changed, 125 insertions(+), 95 deletions(-) diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index 9e7795d0..95f624b3 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -1,70 +1,72 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- +"""Machine learning regressor module.""" + +from __future__ import annotations import copy -import logging import time -from typing import Optional import warnings +from typing import TYPE_CHECKING -import pandas as pd import numpy as np +import pandas as pd from sklearn.ensemble import ( AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, ) -from sklearn.metrics import r2_score - from sklearn.linear_model import Lasso, LinearRegression, Ridge +from sklearn.metrics import r2_score from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler +if TYPE_CHECKING: + import logging warnings.filterwarnings("ignore", category=DeprecationWarning) REGRESSION_METHODS = { - "LinearRegression": { - "model": LinearRegression(), - "param_grid": { - "linearregression__fit_intercept": [True, False], - "linearregression__positive": [True, False], - }, - }, - "RidgeRegression": { - "model": Ridge(), - "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, - }, - "LassoRegression": { - "model": Lasso(), - "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, - }, - "RandomForestRegression": { - "model": RandomForestRegressor(), - "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, - }, - "GradientBoostingRegression": { - "model": GradientBoostingRegressor(), - "param_grid": { - "gradientboostingregressor__n_estimators": [50, 100, 200], - "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - "AdaBoostRegression": { - "model": AdaBoostRegressor(), - "param_grid": { - "adaboostregressor__n_estimators": [50, 100, 200], - "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - } + "LinearRegression": { + "model": LinearRegression(), + "param_grid": { + "linearregression__fit_intercept": [True, False], + "linearregression__positive": [True, False], + }, + }, + "RidgeRegression": { + "model": Ridge(), + "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, + }, + "LassoRegression": { + "model": Lasso(), + "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, + }, + "RandomForestRegression": { + "model": RandomForestRegressor(), + "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, + }, + "GradientBoostingRegression": { + "model": GradientBoostingRegressor(), + "param_grid": { + "gradientboostingregressor__n_estimators": [50, 100, 200], + "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + "AdaBoostRegression": { + "model": AdaBoostRegressor(), + "param_grid": { + "adaboostregressor__n_estimators": [50, 100, 200], + "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, +} + class MLRegressor: - r""" - A forecaster class using machine learning models. + r"""A forecaster class using machine learning models. - This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. + This class uses the `sklearn` module and the machine learning models are \ + from `scikit-learn`. It exposes two main methods: @@ -74,9 +76,9 @@ class MLRegressor: """ - def __init__( - self, - data, + def __init__( # noqa: PLR0913 + self: MLRegressor, + data: pd.DataFrame, model_type: str, regression_model: str, features: list, @@ -92,8 +94,9 @@ def __init__( for what it will be used for. :type model_type: str :param regression_model: The model that will be used. For now only \ - this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \ - `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`. + this options are possible: `LinearRegression`, `RidgeRegression`, \ + `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \ + `GradientBoostingRegression` and `AdaBoostRegression`. :type regression_model: str :param features: A list of features. \ Example: [`solar_production`, `degree_days`]. @@ -113,7 +116,7 @@ def __init__( self.model_type = model_type self.regression_model = regression_model self.logger = logger - self.data.sort_index(inplace=True) + self.data = self.data.sort_index() self.data = self.data[~self.data.index.duplicated(keep="first")] self.data_exo = None self.steps = None @@ -122,9 +125,11 @@ def __init__( @staticmethod def add_date_features( - data: pd.DataFrame, date_features: list, timestamp: str + data: pd.DataFrame, + date_features: list, + timestamp: str, ) -> pd.DataFrame: - """Add date features from the input DataFrame timestamp + """Add date features from the input DataFrame timestamp. :param data: The input DataFrame :type data: pd.DataFrame @@ -133,7 +138,7 @@ def add_date_features( :return: The DataFrame with the added features :rtype: pd.DataFrame """ - df = copy.deepcopy(data) + df = copy.deepcopy(data) # noqa: PD901 df[timestamp] = pd.to_datetime(df["timestamp"]) if "year" in date_features: df["year"] = [i.year for i in df["timestamp"]] @@ -150,14 +155,54 @@ def add_date_features( return df - def fit(self, date_features: Optional[list] = None) -> None: + def get_regression_model(self: MLRegressor) -> tuple[str, str]: + """Get the base model and parameter grid for the specified regression model. + + Returns a tuple containing the base model and parameter grid corresponding to \ + the specified regression model. + + Args: + ---- + self: The instance of the MLRegressor class. + + Returns: + ------- + A tuple containing the base model and parameter grid. + """ - Fit the model using the provided data. + if self.regression_model == "LinearRegression": + base_model = REGRESSION_METHODS["LinearRegression"]["model"] + param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"] + elif self.regression_model == "RidgeRegression": + base_model = REGRESSION_METHODS["RidgeRegression"]["model"] + param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"] + elif self.regression_model == "LassoRegression": + base_model = REGRESSION_METHODS["LassoRegression"]["model"] + param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"] + elif self.regression_model == "RandomForestRegression": + base_model = REGRESSION_METHODS["RandomForestRegression"]["model"] + param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"] + elif self.regression_model == "GradientBoostingRegression": + base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"] + param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"] + elif self.regression_model == "AdaBoostRegression": + base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"] + param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] + else: + self.logger.error( + "Passed sklearn model %s is not valid", + self.regression_model, + ) + return base_model, param_grid - :param date_features: A list of 'date_features' to take into account when fitting the model. + def fit(self: MLRegressor, date_features: list | None = None) -> None: + """Fit the model using the provided data. + + :param date_features: A list of 'date_features' to take into account when \ + fitting the model. :type data: list """ - self.logger.info("Performing a csv model fit for " + self.model_type) + self.logger.info("Performing a MLRegressor fit for %s", self.model_type) self.data_exo = pd.DataFrame(self.data) self.data_exo[self.features] = self.data[self.features] self.data_exo[self.target] = self.data[self.target] @@ -167,50 +212,36 @@ def fit(self, date_features: Optional[list] = None) -> None: keep_columns.append(self.timestamp) keep_columns.append(self.target) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] - self.data_exo.reset_index(drop=True, inplace=True) + self.data_exo = self.data_exo.reset_index(drop=True) if date_features is not None: if self.timestamp is not None: self.data_exo = MLRegressor.add_date_features( - self.data_exo, date_features, self.timestamp + self.data_exo, + date_features, + self.timestamp, ) else: self.logger.error( - "If no timestamp provided, you can't use date_features, going further without date_features." + "If no timestamp provided, you can't use date_features, going \ + further without date_features.", ) y = self.data_exo[self.target] self.data_exo = self.data_exo.drop(self.target, axis=1) if self.timestamp is not None: self.data_exo = self.data_exo.drop(self.timestamp, axis=1) - X = self.data_exo + X = self.data_exo # noqa: N806 - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, random_state=42 + X_train, X_test, y_train, y_test = train_test_split( # noqa: N806 + X, + y, + test_size=0.2, + random_state=42, ) + self.steps = len(X_test) - if self.regression_model == "LinearRegression": - base_model = REGRESSION_METHODS["LinearRegression"]["model"] - param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"] - elif self.regression_model == "RidgeRegression": - base_model = REGRESSION_METHODS["RidgeRegression"]["model"] - param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"] - elif self.regression_model == "LassoRegression": - base_model = REGRESSION_METHODS["LassoRegression"]["model"] - param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"] - elif self.regression_model == "RandomForestRegression": - base_model = REGRESSION_METHODS["RandomForestRegression"]["model"] - param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"] - elif self.regression_model == "GradientBoostingRegression": - base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"] - param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"] - elif self.regression_model == "AdaBoostRegression": - base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"] - param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] - else: - self.logger.error( - "Passed sklearn model " + self.regression_model + " is not valid" - ) + base_model, param_grid = self.get_regression_model() self.model = make_pipeline(StandardScaler(), base_model) @@ -226,12 +257,10 @@ def fit(self, date_features: Optional[list] = None) -> None: ) # Fit the grid search object to the data - self.logger.info("Training a " + self.regression_model + " model") + self.logger.info("Training a %s model", self.regression_model) start_time = time.time() self.grid_search.fit(X_train.values, y_train.values) - print("Best value for lambda : ", self.grid_search.best_params_) - print("Best score for cost function: ", self.grid_search.best_score_) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + self.logger.info("Elapsed time for model fit: %s", time.time() - start_time) self.model = self.grid_search.best_estimator_ @@ -240,20 +269,21 @@ def fit(self, date_features: Optional[list] = None) -> None: predictions = pd.Series(predictions, index=X_test.index) pred_metric = r2_score(y_test, predictions) self.logger.info( - f"Prediction R2 score of fitted model on test data: {pred_metric}" + "Prediction R2 score of fitted model on test data: %s", + pred_metric, ) - def predict(self, new_values: list) -> np.ndarray: - r"""The predict method to generate a forecast from a csv file. - + def predict(self: MLRegressor, new_values: list) -> np.ndarray: + """Predict a new value. - :param new_values: The new values for the features(in the same order as the features list). \ + :param new_values: The new values for the features \ + (in the same order as the features list). \ Example: [2.24, 5.68]. :type new_values: list :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - self.logger.info("Performing a prediction for " + self.model_type) + self.logger.info("Performing a prediction for %s", self.model_type) new_values = np.array([new_values]) return self.model.predict(new_values) From bbfbc3ab2922f4f1d4958a86daafc43ea45b1651 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:24:21 +0100 Subject: [PATCH 020/111] Add csv-prediction --- src/emhass/command_line.py | 1 - src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 1706d34c..e6940518 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -16,7 +16,6 @@ from distutils.util import strtobool - from emhass.retrieve_hass import RetrieveHass from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py new file mode 100644 index 00000000..a1c5576b --- /dev/null +++ b/src/emhass/csv_predictor.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +import copy +import pathlib +import time +from typing import Optional +# from typing import Optional, Tuple +import pandas as pd +import numpy as np + +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import ElasticNet +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsRegressor +# from sklearn.metrics import r2_score + +# from skforecast.ForecasterAutoreg import ForecasterAutoreg +# from skforecast.model_selection import bayesian_search_forecaster +# from skforecast.model_selection import backtesting_forecaster + +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) + +class CsvPredictor: + r""" + A forecaster class using machine learning models. + + This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + + It exposes one main method: + + - `predict`: to obtain a forecast from a pre-trained model. + + """ + + def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + logger: logging.Logger) -> None: + r"""Define constructor for the forecast class. + + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str + :param var_model: The name of the sensor to retrieve data from Home Assistant. \ + Example: `sensor.power_load_no_var_loads`. + :type var_model: str + :param sklearn_model: The `scikit-learn` model that will be used. For now only \ + this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. + :type sklearn_model: str + :param num_lags: The number of auto-regression lags to consider. A good starting point \ + is to fix this as one day. For example if your time step is 30 minutes, then fix this \ + to 48, if the time step is 1 hour the fix this to 24 and so on. + :type num_lags: int + :param root: The parent folder of the path where the config.yaml file is located + :type root: str + :param logger: The passed logger object + :type logger: logging.Logger + """ + self.data = data + self.model_type = model_type + self.csv_file = csv_file + self.independent_variables = independent_variables + self.dependent_variable = dependent_variable + self.sklearn_model = sklearn_model + self.new_values = new_values + self.root = root + self.logger = logger + self.is_tuned = False + + + def load_data(self): + filename_path = pathlib.Path(self.root) / self.csv_file + if filename_path.is_file(): + with open(filename_path, 'rb') as inp: + data = pd.read_csv(filename_path) + else: + self.logger.error("The cvs file was not found.") + return + + required_columns = self.independent_variables + + if not set(required_columns).issubset(data.columns): + raise ValueError( + f"CSV file should contain the following columns: {', '.join(required_columns)}" + ) + return data + + def prepare_data(self, data): + X = data[self.independent_variables].values + y = data[self.dependent_variable].values + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + return X_train, y_train + + + def predict(self, perform_backtest: Optional[bool] = False + ) -> pd.Series: + r"""The fit method to train the ML model. + + :param split_date_delta: The delta from now to `split_date_delta` that will be used \ + as the test period to evaluate the model, defaults to '48h' + :type split_date_delta: Optional[str], optional + :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ + the performance of the model on the complete train set, defaults to False + :type perform_backtest: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest + :rtype: Tuple[pd.DataFrame, pd.DataFrame] + """ + self.logger.info("Performing a forecast model fit for "+self.model_type) + # Preparing the data: adding exogenous features + data = self.load_data() + X, y = self.prepare_data(data) + + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) + + return prediction + + + + \ No newline at end of file From b5c2b95e6d1a47b6fc5440db787f62a90f6cb7a0 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:45:02 +0100 Subject: [PATCH 021/111] Use gridsearchcv and split up fit and predict --- src/emhass/csv_predictor.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index a1c5576b..4e4ca37e 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import copy +from datetime import datetime import logging import copy import pathlib @@ -9,6 +11,7 @@ # from typing import Optional, Tuple import pandas as pd import numpy as np +from sklearn.metrics import classification_report, r2_score from sklearn.linear_model import LinearRegression from sklearn.linear_model import ElasticNet @@ -64,11 +67,16 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable - self.sklearn_model = sklearn_model - self.new_values = new_values - self.root = root + self.timestamp = timestamp + self.model_type = model_type self.logger = logger self.is_tuned = False + self.data.sort_index(inplace=True) + self.data = self.data[~self.data.index.duplicated(keep='first')] + + @staticmethod + def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + """Add date features from the input DataFrame timestamp def load_data(self): From 9eaf4883ee77fa28ae149de5793eaa6a2914b60f Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 15:48:12 +0100 Subject: [PATCH 022/111] gitignore fun --- .vscode/launch.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 10313c97..b953c7d3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,7 +2,7 @@ "configurations": [ { "name": "Python: Current File", - "type": "debugpy", + "type": "python", "request": "launch", "program": "${file}", "console": "integratedTerminal", @@ -10,10 +10,11 @@ }, { "name": "EMHASS run", - "type": "debugpy", + "type": "python", "request": "launch", - "module": "emhass.web_server", + "program": "web_server.py", "console": "integratedTerminal", + "cwd": "${workspaceFolder}/src/emhass/", "purpose":["debug-in-terminal"], "justMyCode": true, "env": { @@ -21,15 +22,15 @@ "OPTIONS_PATH": "/workspaces/emhass/options.json", "SECRETS_PATH": "/workspaces/emhass/secrets_emhass.yaml", "DATA_PATH": "/workspaces/emhass/data/", - "LOGGING_LEVEL": "DEBUG" } }, { "name": "EMHASS run ADDON", - "type": "debugpy", + "type": "python", "request": "launch", - "module": "emhass.web_server", + "program": "web_server.py", "console": "integratedTerminal", + "cwd": "${workspaceFolder}/src/emhass/", "args": ["--addon", "true", "--no_response", "true"], "purpose":["debug-in-terminal"], "justMyCode": true, @@ -44,7 +45,6 @@ "LAT": "45.83", //optional change "LON": "6.86", //optional change "ALT": "4807.8", //optional change - "LOGGING_LEVEL": "DEBUG" //optional change }, } From 8f0cab3914dfb9f2c1117216ce454d276ea26f24 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 11:39:39 +0100 Subject: [PATCH 023/111] python -> debugpy --- .vscode/launch.json | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index b953c7d3..ec6c6987 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,7 +2,7 @@ "configurations": [ { "name": "Python: Current File", - "type": "python", + "type": "debugpy", "request": "launch", "program": "${file}", "console": "integratedTerminal", @@ -10,12 +10,14 @@ }, { "name": "EMHASS run", - "type": "python", + "type": "debugpy", "request": "launch", "program": "web_server.py", "console": "integratedTerminal", "cwd": "${workspaceFolder}/src/emhass/", - "purpose":["debug-in-terminal"], + "purpose": [ + "debug-in-terminal" + ], "justMyCode": true, "env": { "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml", @@ -26,13 +28,20 @@ }, { "name": "EMHASS run ADDON", - "type": "python", + "type": "debugpy", "request": "launch", "program": "web_server.py", "console": "integratedTerminal", "cwd": "${workspaceFolder}/src/emhass/", - "args": ["--addon", "true", "--no_response", "true"], - "purpose":["debug-in-terminal"], + "args": [ + "--addon", + "true", + "--no_response", + "true" + ], + "purpose": [ + "debug-in-terminal" + ], "justMyCode": true, "env": { "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml", @@ -46,7 +55,6 @@ "LON": "6.86", //optional change "ALT": "4807.8", //optional change }, - - } + } ] } \ No newline at end of file From c27ea5cae6d1e9dbe74047dd1add150188c13529 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 12:38:07 +0100 Subject: [PATCH 024/111] launch.json --- .vscode/launch.json | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index ec6c6987..f0ceae3a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,9 +12,8 @@ "name": "EMHASS run", "type": "debugpy", "request": "launch", - "program": "web_server.py", + "module": "emhass.web_server", "console": "integratedTerminal", - "cwd": "${workspaceFolder}/src/emhass/", "purpose": [ "debug-in-terminal" ], @@ -30,9 +29,8 @@ "name": "EMHASS run ADDON", "type": "debugpy", "request": "launch", - "program": "web_server.py", + "module": "emhass.web_server", "console": "integratedTerminal", - "cwd": "${workspaceFolder}/src/emhass/", "args": [ "--addon", "true", From e1543803baff1a57e3ae3ab32bb3c4c9b6cf1a2e Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 12:40:45 +0100 Subject: [PATCH 025/111] delete csv-predictor --- src/emhass/csv_predictor.py | 147 ------------------------------------ 1 file changed, 147 deletions(-) delete mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py deleted file mode 100644 index 4e4ca37e..00000000 --- a/src/emhass/csv_predictor.py +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import copy -from datetime import datetime -import logging -import copy -import pathlib -import time -from typing import Optional -# from typing import Optional, Tuple -import pandas as pd -import numpy as np -from sklearn.metrics import classification_report, r2_score - -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import ElasticNet -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsRegressor -# from sklearn.metrics import r2_score - -# from skforecast.ForecasterAutoreg import ForecasterAutoreg -# from skforecast.model_selection import bayesian_search_forecaster -# from skforecast.model_selection import backtesting_forecaster - -import warnings -warnings.filterwarnings("ignore", category=DeprecationWarning) - -class CsvPredictor: - r""" - A forecaster class using machine learning models. - - This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. - - It exposes one main method: - - - `predict`: to obtain a forecast from a pre-trained model. - - """ - - def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - logger: logging.Logger) -> None: - r"""Define constructor for the forecast class. - - :param data: The data that will be used for train/test - :type data: pd.DataFrame - :param model_type: A unique name defining this model and useful to identify \ - for what it will be used for. - :type model_type: str - :param var_model: The name of the sensor to retrieve data from Home Assistant. \ - Example: `sensor.power_load_no_var_loads`. - :type var_model: str - :param sklearn_model: The `scikit-learn` model that will be used. For now only \ - this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. - :type sklearn_model: str - :param num_lags: The number of auto-regression lags to consider. A good starting point \ - is to fix this as one day. For example if your time step is 30 minutes, then fix this \ - to 48, if the time step is 1 hour the fix this to 24 and so on. - :type num_lags: int - :param root: The parent folder of the path where the config.yaml file is located - :type root: str - :param logger: The passed logger object - :type logger: logging.Logger - """ - self.data = data - self.model_type = model_type - self.csv_file = csv_file - self.independent_variables = independent_variables - self.dependent_variable = dependent_variable - self.timestamp = timestamp - self.model_type = model_type - self.logger = logger - self.is_tuned = False - self.data.sort_index(inplace=True) - self.data = self.data[~self.data.index.duplicated(keep='first')] - - @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: - """Add date features from the input DataFrame timestamp - - - def load_data(self): - filename_path = pathlib.Path(self.root) / self.csv_file - if filename_path.is_file(): - with open(filename_path, 'rb') as inp: - data = pd.read_csv(filename_path) - else: - self.logger.error("The cvs file was not found.") - return - - required_columns = self.independent_variables - - if not set(required_columns).issubset(data.columns): - raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}" - ) - return data - - def prepare_data(self, data): - X = data[self.independent_variables].values - y = data[self.dependent_variable].values - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - - return X_train, y_train - - - def predict(self, perform_backtest: Optional[bool] = False - ) -> pd.Series: - r"""The fit method to train the ML model. - - :param split_date_delta: The delta from now to `split_date_delta` that will be used \ - as the test period to evaluate the model, defaults to '48h' - :type split_date_delta: Optional[str], optional - :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ - the performance of the model on the complete train set, defaults to False - :type perform_backtest: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest - :rtype: Tuple[pd.DataFrame, pd.DataFrame] - """ - self.logger.info("Performing a forecast model fit for "+self.model_type) - # Preparing the data: adding exogenous features - data = self.load_data() - X, y = self.prepare_data(data) - - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Training a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - - return prediction - - - - \ No newline at end of file From e946d18667abf39accc9394bbe44768fd9746e1b Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 18 Mar 2024 09:33:20 +0100 Subject: [PATCH 026/111] remove KNeighborsRegressor --- src/emhass/machine_learning_regressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index 95f624b3..732b4266 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -95,7 +95,7 @@ def __init__( # noqa: PLR0913 :type model_type: str :param regression_model: The model that will be used. For now only \ this options are possible: `LinearRegression`, `RidgeRegression`, \ - `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \ + `LassoRegression`, `RandomForestRegression`, \ `GradientBoostingRegression` and `AdaBoostRegression`. :type regression_model: str :param features: A list of features. \ From 06920aad01e43ddd5c0329b43758074ae709a562 Mon Sep 17 00:00:00 2001 From: gieljnssns Date: Tue, 19 Mar 2024 04:40:02 +0100 Subject: [PATCH 027/111] first documentation for mlregressor --- docs/index.md | 4 +- docs/mlregressor.md | 91 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 docs/mlregressor.md diff --git a/docs/index.md b/docs/index.md index cf015a3f..cc9f33a8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,6 +6,7 @@ # EMHASS: Energy Management for Home Assistant ```{image} images/emhass_logo.png + ``` Welcome to the documentation of EMHASS. With this package written in Python you will be able to implement a real Energy Management System for your household. This software was designed to be easy configurable and with a fast integration with Home Assistant: @@ -21,6 +22,7 @@ differences.md lpems.md forecasts.md mlforecaster.md +mlregressor.md study_case.md config.md emhass.md @@ -32,5 +34,3 @@ develop.md - {ref}`genindex` - {ref}`modindex` - {ref}`search` - - diff --git a/docs/mlregressor.md b/docs/mlregressor.md new file mode 100644 index 00000000..7206af99 --- /dev/null +++ b/docs/mlregressor.md @@ -0,0 +1,91 @@ +# The machine learning regressor + +Starting with v0.9.0, a new framework is proposed within EMHASS. It provides a machine learning module to predict values from a csv file using different regression models. + +This API provides two main methods: + +- fit: To train a model with the passed data. This method is exposed with the `regressor-model-fit` end point. + +- predict: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point. + +## A basic model fit + +To train a model use the `regressor-model-fit` end point. + +Some paramters can be optionally defined at runtime: + +- `csv_file`: The name of the csv file containing your data. + +- `features`: A list of features, you can provide new values for this. + +- `target`: The target, the value that has to be predicted. + +- `model_type`: Define the name of the model regressor that this will be used for. For example: `heating_hours_degreeday`. This should be an unique name if you are using multiple custom regressor models. + +- `regression_model`: The regression model that will be used. For now only this options are possible: `LinearRegression`, `RidgeRegression`, `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`. + +- `timestamp`: If defined, the column key that has to be used for timestamp. + +- `date_features`: A list of 'date_features' to take into account when fitting the model. Possibilities are `year`, `month`, `day_of_week` (monday=0, sunday=6), `day_of_year`, `day`(day_of_month) and `hour` + +``` +runtimeparams = { + "csv_file": "heating_prediction.csv", + "features":["degreeday", "solar"], + "target": "heating_hours", + "regression_model": "RandomForestRegression", + "model_type": "heating_hours_degreeday", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"] + } +``` + +A correct `curl` call to launch a model fit can look like this: + +``` +curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-fit +``` + +After applying the `curl` command to fit the model the following information is logged by EMHASS: + + 2023-02-20 22:05:22,658 - __main__ - INFO - Training a LinearRegression model + 2023-02-20 22:05:23,882 - __main__ - INFO - Elapsed time: 1.2236599922180176 + 2023-02-20 22:05:24,612 - __main__ - INFO - Prediction R2 score: 0.2654560762747957 + +## The predict method + +To obtain a prediction using a previously trained model use the `regressor-model-predict` end point. + +``` +curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-predict +``` + +If needed pass the correct `model_type` like this: + +``` +curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "load_forecast"}' http://localhost:5000/action/regressor-model-predict +``` + +It is possible to publish the predict method results to a Home Assistant sensor. + +The list of parameters needed to set the data publish task is: + +- `mlr_predict_entity_id`: The unique `entity_id` to be used. + +- `mlr_predict_unit_of_measurement`: The `unit_of_measurement` to be used. + +- `mlr_predict_friendly_name`: The `friendly_name` to be used. + +- `new_values`: The new values for the features (in the same order as the features list). Also when using date_features, add these to the new values. + +- `model_type`: The model type that has to be predicted + +``` +runtimeparams = { + "mlr_predict_entity_id": "sensor.mlr_predict", + "mlr_predict_unit_of_measurement": None, + "mlr_predict_friendly_name": "mlr predictor", + "new_values": [8.2, 7.23, 2, 6], + "model_type": "heating_hours_degreeday" +} +``` From a5be2b8df7a0336849f8e6a85619d61d55fdc5d5 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:13:47 +0100 Subject: [PATCH 028/111] add /app to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5dc21af8..581080c8 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ secrets_emhass.yaml *.html *.pkl data/actionLogs.txt +**/app # Byte-compiled / optimized / DLL files From 7c712038c13d06a83e97a0dffab0ed81913ea747 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:24:21 +0100 Subject: [PATCH 029/111] Add csv-prediction --- src/emhass/command_line.py | 46 ++++++++++++ src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++ src/emhass/retrieve_hass.py | 10 +++ src/emhass/utils.py | 25 +++++++ src/emhass/web_server.py | 6 ++ 5 files changed, 226 insertions(+) create mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 605c19e3..0eb69e4a 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -20,6 +20,7 @@ from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster from emhass.optimization import Optimization +from emhass.csv_predictor import CsvPredictor from emhass import utils @@ -153,6 +154,12 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, if not rh.get_data(days_list, var_list): return False df_input_data = rh.df_final.copy() + elif set_type == "csv-predict": + df_input_data, df_input_data_dayahead = None, None + P_PV_forecast, P_load_forecast = None, None + days_list = None + params = json.loads(params) + elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None @@ -435,6 +442,45 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf +def csv_predict(input_data_dict: dict, logger: logging.Logger, + debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: + """Perform a forecast model fit from training data retrieved from Home Assistant. + + :param input_data_dict: A dictionnary with multiple data used by the action functions + :type input_data_dict: dict + :param logger: The passed logger object + :type logger: logging.Logger + :param debug: True to debug, useful for unit testing, defaults to False + :type debug: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object + :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor] + """ + data = copy.deepcopy(input_data_dict['df_input_data']) + model_type = input_data_dict['params']['passed_data']['model_type'] + csv_file = input_data_dict['params']['passed_data']['csv_file'] + sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] + perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] + independent_variables = input_data_dict['params']['passed_data']['independent_variables'] + dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] + new_values = input_data_dict['params']['passed_data']['new_values'] + root = input_data_dict['root'] + # The ML forecaster object + csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + # Fit the ML model + prediction = csv.predict(perform_backtest=perform_backtest) + + csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] + csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] + csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name'] + # Publish Load forecast + idx = 0 + input_data_dict['rh'].post_data(prediction, idx, + csv_predict_entity_id, + csv_predict_unit_of_measurement, + csv_predict_friendly_name, + type_var = 'csv_predictor') + return prediction + def publish_data(input_data_dict: dict, logger: logging.Logger, save_data_to_file: Optional[bool] = False, opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame: diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py new file mode 100644 index 00000000..a1c5576b --- /dev/null +++ b/src/emhass/csv_predictor.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +import copy +import pathlib +import time +from typing import Optional +# from typing import Optional, Tuple +import pandas as pd +import numpy as np + +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import ElasticNet +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsRegressor +# from sklearn.metrics import r2_score + +# from skforecast.ForecasterAutoreg import ForecasterAutoreg +# from skforecast.model_selection import bayesian_search_forecaster +# from skforecast.model_selection import backtesting_forecaster + +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) + +class CsvPredictor: + r""" + A forecaster class using machine learning models. + + This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + + It exposes one main method: + + - `predict`: to obtain a forecast from a pre-trained model. + + """ + + def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + logger: logging.Logger) -> None: + r"""Define constructor for the forecast class. + + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str + :param var_model: The name of the sensor to retrieve data from Home Assistant. \ + Example: `sensor.power_load_no_var_loads`. + :type var_model: str + :param sklearn_model: The `scikit-learn` model that will be used. For now only \ + this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. + :type sklearn_model: str + :param num_lags: The number of auto-regression lags to consider. A good starting point \ + is to fix this as one day. For example if your time step is 30 minutes, then fix this \ + to 48, if the time step is 1 hour the fix this to 24 and so on. + :type num_lags: int + :param root: The parent folder of the path where the config.yaml file is located + :type root: str + :param logger: The passed logger object + :type logger: logging.Logger + """ + self.data = data + self.model_type = model_type + self.csv_file = csv_file + self.independent_variables = independent_variables + self.dependent_variable = dependent_variable + self.sklearn_model = sklearn_model + self.new_values = new_values + self.root = root + self.logger = logger + self.is_tuned = False + + + def load_data(self): + filename_path = pathlib.Path(self.root) / self.csv_file + if filename_path.is_file(): + with open(filename_path, 'rb') as inp: + data = pd.read_csv(filename_path) + else: + self.logger.error("The cvs file was not found.") + return + + required_columns = self.independent_variables + + if not set(required_columns).issubset(data.columns): + raise ValueError( + f"CSV file should contain the following columns: {', '.join(required_columns)}" + ) + return data + + def prepare_data(self, data): + X = data[self.independent_variables].values + y = data[self.dependent_variable].values + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + return X_train, y_train + + + def predict(self, perform_backtest: Optional[bool] = False + ) -> pd.Series: + r"""The fit method to train the ML model. + + :param split_date_delta: The delta from now to `split_date_delta` that will be used \ + as the test period to evaluate the model, defaults to '48h' + :type split_date_delta: Optional[str], optional + :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ + the performance of the model on the complete train set, defaults to False + :type perform_backtest: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest + :rtype: Tuple[pd.DataFrame, pd.DataFrame] + """ + self.logger.info("Performing a forecast model fit for "+self.model_type) + # Preparing the data: adding exogenous features + data = self.load_data() + X, y = self.prepare_data(data) + + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) + + return prediction + + + + \ No newline at end of file diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py index f3f0649a..ca20ce40 100644 --- a/src/emhass/retrieve_hass.py +++ b/src/emhass/retrieve_hass.py @@ -303,6 +303,8 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, state = np.round(data_df.loc[data_df.index[idx]],4) elif type_var == 'optim_status': state = data_df.loc[data_df.index[idx]] + elif type_var == 'csv_predictor': + state = data_df[idx] else: state = np.round(data_df.loc[data_df.index[idx]],2) if type_var == 'power': @@ -334,6 +336,14 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, "friendly_name": friendly_name } } + elif type_var == 'csv_predictor': + data = { + "state": state, + "attributes": { + "unit_of_measurement": unit_of_measurement, + "friendly_name": friendly_name + } + } else: data = { "state": "{:.2f}".format(state), diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 61acab3b..a5d3002c 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -155,6 +155,16 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic freq = int(retrieve_hass_conf['freq'].seconds/60.0) delta_forecast = int(optim_conf['delta_forecast'].days) forecast_dates = get_forecast_dates(freq, delta_forecast) + if set_type == "csv-predict": + csv_file = runtimeparams['csv_file'] + independent_variables = runtimeparams['independent_variables'] + dependent_variable = runtimeparams['dependent_variable'] + new_values = runtimeparams['new_values'] + params['passed_data']['csv_file'] = csv_file + params['passed_data']['independent_variables'] = independent_variables + params['passed_data']['dependent_variable'] = dependent_variable + params['passed_data']['new_values'] = new_values + # Treating special data passed for MPC control case if set_type == 'naive-mpc-optim': if 'prediction_horizon' not in runtimeparams.keys(): @@ -281,6 +291,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic else: model_predict_friendly_name = runtimeparams['model_predict_friendly_name'] params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name + if 'csv_predict_entity_id' not in runtimeparams.keys(): + csv_predict_entity_id = "sensor.csv_predictor" + else: + csv_predict_entity_id = runtimeparams['csv_predict_entity_id'] + params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id + if 'csv_predict_unit_of_measurement' not in runtimeparams.keys(): + csv_predict_unit_of_measurement = None + else: + csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement'] + params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement + if 'csv_predict_friendly_name' not in runtimeparams.keys(): + csv_predict_friendly_name = "Csv predictor" + else: + csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name'] + params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name # Treat optimization configuration parameters passed at runtime if 'num_def_loads' in runtimeparams.keys(): optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index 989298d4..886f9304 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -13,6 +13,7 @@ from emhass.command_line import set_input_data_dict from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune +from emhass.command_line import csv_predict from emhass.command_line import publish_data from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \ get_injection_dict_forecast_model_tune, build_params @@ -193,6 +194,11 @@ def action_call(action_name): if not checkFileLog(ActionStr): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) + elif action_name == 'csv-predict': + app.logger.info(" >> Performing a csv predict...") + csv_predict(input_data_dict, app.logger) + msg = f'EMHASS >> Action csv-predict executed... \n' + return make_response(msg, 201) else: app.logger.error("ERROR: passed action is not valid") msg = f'EMHASS >> ERROR: Passed action is not valid... \n' From 9b7472a7bb3033f6404ad4623dcd28becc62bae1 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 9 Jan 2024 21:11:13 +0100 Subject: [PATCH 030/111] cleanup --- src/emhass/command_line.py | 12 ++++++---- src/emhass/csv_predictor.py | 48 ++++++++++++++++++++++++++----------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 0eb69e4a..528efcfb 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -455,19 +455,21 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger, :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor] """ - data = copy.deepcopy(input_data_dict['df_input_data']) - model_type = input_data_dict['params']['passed_data']['model_type'] + # data = copy.deepcopy(input_data_dict['df_input_data']) + # model_type = input_data_dict['params']['passed_data']['model_type'] csv_file = input_data_dict['params']['passed_data']['csv_file'] sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] + # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] new_values = input_data_dict['params']['passed_data']['new_values'] root = input_data_dict['root'] # The ML forecaster object - csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) # Fit the ML model - prediction = csv.predict(perform_backtest=perform_backtest) + prediction = csv.predict() + # prediction = csv.predict(perform_backtest=perform_backtest) csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index a1c5576b..9f012f8d 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -31,11 +31,13 @@ class CsvPredictor: It exposes one main method: - - `predict`: to obtain a forecast from a pre-trained model. + - `predict`: to obtain a forecast from a csv file. """ - def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + # logger: logging.Logger) -> None: + def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -44,23 +46,28 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe :param model_type: A unique name defining this model and useful to identify \ for what it will be used for. :type model_type: str - :param var_model: The name of the sensor to retrieve data from Home Assistant. \ - Example: `sensor.power_load_no_var_loads`. - :type var_model: str + :param csv_file: The name of the csv file to retrieve data from. \ + Example: `prediction.csv`. + :type csv_file: str + :param independent_variables: A list of independent variables. \ + Example: [`solar`, `degree_days`]. + :type independent_variables: list + :param dependent_variable: The dependent variable(to be predicted). \ + Example: `hours`. + :type dependent_variable: str :param sklearn_model: The `scikit-learn` model that will be used. For now only \ this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. :type sklearn_model: str - :param num_lags: The number of auto-regression lags to consider. A good starting point \ - is to fix this as one day. For example if your time step is 30 minutes, then fix this \ - to 48, if the time step is 1 hour the fix this to 24 and so on. - :type num_lags: int + :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ + Example: [2.24, 5.68]. + :type new_values: list :param root: The parent folder of the path where the config.yaml file is located :type root: str :param logger: The passed logger object :type logger: logging.Logger """ - self.data = data - self.model_type = model_type + # self.data = data + # self.model_type = model_type self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable @@ -86,18 +93,30 @@ def load_data(self): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) + print(type(data)) return data def prepare_data(self, data): + """ + Prepare the data. + + :param data: Input Data + :return: Input DataFrame with freq defined + :rtype: pd.DataFrame + + """ X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + print(type(X_train)) + print(type(y_train)) return X_train, y_train - def predict(self, perform_backtest: Optional[bool] = False - ) -> pd.Series: + # def predict(self, perform_backtest: Optional[bool] = False + # ) -> pd.Series: + def predict(self): r"""The fit method to train the ML model. :param split_date_delta: The delta from now to `split_date_delta` that will be used \ @@ -109,7 +128,7 @@ def predict(self, perform_backtest: Optional[bool] = False :return: The DataFrame containing the forecast data results without and with backtest :rtype: Tuple[pd.DataFrame, pd.DataFrame] """ - self.logger.info("Performing a forecast model fit for "+self.model_type) + self.logger.info("Performing a prediction for "+self.csv_file) # Preparing the data: adding exogenous features data = self.load_data() X, y = self.prepare_data(data) @@ -131,6 +150,7 @@ def predict(self, perform_backtest: Optional[bool] = False self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") new_values = np.array([self.new_values]) prediction = self.forecaster.predict(new_values) + print(type(prediction)) return prediction From b975b74b9b51ab20daf92054ca3ab5efe367f721 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Thu, 18 Jan 2024 10:46:38 +0100 Subject: [PATCH 031/111] more cleanup --- src/emhass/command_line.py | 17 +++---- src/emhass/csv_predictor.py | 92 ++++++++++++++----------------------- 2 files changed, 40 insertions(+), 69 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 528efcfb..ea249f8d 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -443,8 +443,8 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, return df_pred_optim, mlf def csv_predict(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: - """Perform a forecast model fit from training data retrieved from Home Assistant. + debug: Optional[bool] = False) -> np.ndarray: + """Perform a prediction from csv file. :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict @@ -452,29 +452,24 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger, :type logger: logging.Logger :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object - :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor] + :return: The np.ndarray containing the predicted value. + :rtype: np.ndarray """ - # data = copy.deepcopy(input_data_dict['df_input_data']) - # model_type = input_data_dict['params']['passed_data']['model_type'] csv_file = input_data_dict['params']['passed_data']['csv_file'] sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] new_values = input_data_dict['params']['passed_data']['new_values'] root = input_data_dict['root'] # The ML forecaster object - # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) - # Fit the ML model + # Predict from csv file prediction = csv.predict() - # prediction = csv.predict(perform_backtest=perform_backtest) csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name'] - # Publish Load forecast + # Publish prediction idx = 0 input_data_dict['rh'].post_data(prediction, idx, csv_predict_entity_id, diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 9f012f8d..9550c157 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -2,11 +2,9 @@ # -*- coding: utf-8 -*- import logging -import copy import pathlib import time -from typing import Optional -# from typing import Optional, Tuple +from typing import Tuple import pandas as pd import numpy as np @@ -14,11 +12,6 @@ from sklearn.linear_model import ElasticNet from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsRegressor -# from sklearn.metrics import r2_score - -# from skforecast.ForecasterAutoreg import ForecasterAutoreg -# from skforecast.model_selection import bayesian_search_forecaster -# from skforecast.model_selection import backtesting_forecaster import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) @@ -34,18 +27,10 @@ class CsvPredictor: - `predict`: to obtain a forecast from a csv file. """ - - # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - # logger: logging.Logger) -> None: def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. - :param data: The data that will be used for train/test - :type data: pd.DataFrame - :param model_type: A unique name defining this model and useful to identify \ - for what it will be used for. - :type model_type: str :param csv_file: The name of the csv file to retrieve data from. \ Example: `prediction.csv`. :type csv_file: str @@ -66,8 +51,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl :param logger: The passed logger object :type logger: logging.Logger """ - # self.data = data - # self.model_type = model_type self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable @@ -78,14 +61,17 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl self.is_tuned = False - def load_data(self): + def load_data(self) -> pd.DataFrame: + """Load the data.""" filename_path = pathlib.Path(self.root) / self.csv_file if filename_path.is_file(): with open(filename_path, 'rb') as inp: data = pd.read_csv(filename_path) else: self.logger.error("The cvs file was not found.") - return + raise ValueError( + f"The CSV file "+ self.csv_file +" was not found." + ) required_columns = self.independent_variables @@ -93,66 +79,56 @@ def load_data(self): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) - print(type(data)) return data - def prepare_data(self, data): + def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: """ Prepare the data. :param data: Input Data - :return: Input DataFrame with freq defined - :rtype: pd.DataFrame + :type data: pd.DataFrame + :return: A tuple containing the train data. + :rtype: Tuple[np.ndarray, np.ndarray] """ X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - print(type(X_train)) - print(type(y_train)) return X_train, y_train - # def predict(self, perform_backtest: Optional[bool] = False - # ) -> pd.Series: - def predict(self): - r"""The fit method to train the ML model. + def predict(self) -> np.ndarray: + r"""The predict method to generate a forecast from a csv file. - :param split_date_delta: The delta from now to `split_date_delta` that will be used \ - as the test period to evaluate the model, defaults to '48h' - :type split_date_delta: Optional[str], optional - :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ - the performance of the model on the complete train set, defaults to False - :type perform_backtest: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest - :rtype: Tuple[pd.DataFrame, pd.DataFrame] + :return: The np.ndarray containing the predicted value. + :rtype: np.ndarray """ self.logger.info("Performing a prediction for "+self.csv_file) # Preparing the data: adding exogenous features data = self.load_data() - X, y = self.prepare_data(data) + if data is not None: + X, y = self.prepare_data(data) - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Training a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - print(type(prediction)) + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Predict through a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) - return prediction + return prediction From 36ba25f1a5138f8ddc13917926c2e3e35738aa22 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 19 Jan 2024 11:34:33 +0100 Subject: [PATCH 032/111] filename_path -> inp --- src/emhass/csv_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 9550c157..499903d0 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -66,7 +66,7 @@ def load_data(self) -> pd.DataFrame: filename_path = pathlib.Path(self.root) / self.csv_file if filename_path.is_file(): with open(filename_path, 'rb') as inp: - data = pd.read_csv(filename_path) + data = pd.read_csv(inp) else: self.logger.error("The cvs file was not found.") raise ValueError( From 259ced37b9e6aa0725f57ff590e18d29e427b272 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 29 Jan 2024 11:24:45 +0100 Subject: [PATCH 033/111] resolve some comments --- src/emhass/csv_predictor.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 499903d0..1f478c01 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -5,6 +5,8 @@ import pathlib import time from typing import Tuple +import warnings + import pandas as pd import numpy as np @@ -13,14 +15,14 @@ from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsRegressor -import warnings -warnings.filterwarnings("ignore", category=DeprecationWarning) + +warnings.filterwarnings("ignore", category=DeprecationWarning) class CsvPredictor: r""" A forecaster class using machine learning models. - This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. It exposes one main method: @@ -28,11 +30,11 @@ class CsvPredictor: """ def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - logger: logging.Logger) -> None: + logger: logging.Logger) -> None: r"""Define constructor for the forecast class. :param csv_file: The name of the csv file to retrieve data from. \ - Example: `prediction.csv`. + Example: `input_train_data.csv`. :type csv_file: str :param independent_variables: A list of independent variables. \ Example: [`solar`, `degree_days`]. @@ -60,7 +62,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl self.logger = logger self.is_tuned = False - def load_data(self) -> pd.DataFrame: """Load the data.""" filename_path = pathlib.Path(self.root) / self.csv_file @@ -69,18 +70,16 @@ def load_data(self) -> pd.DataFrame: data = pd.read_csv(inp) else: self.logger.error("The cvs file was not found.") - raise ValueError( - f"The CSV file "+ self.csv_file +" was not found." - ) + raise ValueError("The CSV file " + self.csv_file + " was not found.") required_columns = self.independent_variables - + if not set(required_columns).issubset(data.columns): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) return data - + def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: """ Prepare the data. @@ -94,10 +93,10 @@ def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - + return X_train, y_train - - + + def predict(self) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. @@ -109,7 +108,7 @@ def predict(self) -> np.ndarray: data = self.load_data() if data is not None: X, y = self.prepare_data(data) - + if self.sklearn_model == 'LinearRegression': base_model = LinearRegression() elif self.sklearn_model == 'ElasticNet': @@ -127,9 +126,5 @@ def predict(self) -> np.ndarray: self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") new_values = np.array([self.new_values]) prediction = self.forecaster.predict(new_values) - + return prediction - - - - \ No newline at end of file From e200dc841452697b48d1e006395c64ddd2ed2913 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:45:02 +0100 Subject: [PATCH 034/111] Use gridsearchcv and split up fit and predict --- src/emhass/command_line.py | 87 +++++++++++++++--- src/emhass/csv_predictor.py | 173 +++++++++++++++++++++++------------- src/emhass/utils.py | 16 +++- src/emhass/web_server.py | 11 ++- 4 files changed, 210 insertions(+), 77 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index ea249f8d..4dd795ae 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -154,7 +154,36 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, if not rh.get_data(days_list, var_list): return False df_input_data = rh.df_final.copy() - elif set_type == "csv-predict": + + elif set_type == "csv-model-fit": + + df_input_data_dayahead = None + P_PV_forecast, P_load_forecast = None, None + params = json.loads(params) + days_list = None + csv_file = params['passed_data']['csv_file'] + independent_variables = params['passed_data']['independent_variables'] + dependent_variable = params['passed_data']['dependent_variable'] + timestamp = params['passed_data']['timestamp'] + filename_path = pathlib.Path(base_path) / csv_file + if filename_path.is_file(): + df_input_data = pd.read_csv(filename_path, parse_dates=True) + + else: + logger.error("The cvs file was not found.") + raise ValueError("The CSV file " + csv_file + " was not found.") + required_columns = [] + required_columns.extend(independent_variables) + required_columns.append(dependent_variable) + if timestamp is not None: + required_columns.append(timestamp) + + if not set(required_columns).issubset(df_input_data.columns): + logger.error("The cvs file does not contain the required columns.") + raise ValueError( + f"CSV file should contain the following columns: {', '.join(required_columns)}" + ) + elif set_type == "csv-model-predict": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None @@ -442,7 +471,41 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf -def csv_predict(input_data_dict: dict, logger: logging.Logger, +def csv_model_fit(input_data_dict: dict, logger: logging.Logger, + debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: + """Perform a forecast model fit from training data retrieved from Home Assistant. + + :param input_data_dict: A dictionnary with multiple data used by the action functions + :type input_data_dict: dict + :param logger: The passed logger object + :type logger: logging.Logger + :param debug: True to debug, useful for unit testing, defaults to False + :type debug: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object + :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster] + """ + data = copy.deepcopy(input_data_dict['df_input_data']) + # csv_file = input_data_dict['params']['passed_data']['csv_file'] + model_type = input_data_dict['params']['passed_data']['model_type'] + # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] + independent_variables = input_data_dict['params']['passed_data']['independent_variables'] + dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] + timestamp = input_data_dict['params']['passed_data']['timestamp'] + # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] + date_features = input_data_dict['params']['passed_data']['date_features'] + root = input_data_dict['root'] + # The ML forecaster object + csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger) + # Fit the ML model + df_pred = csv.fit(date_features=date_features) + # Save model + if not debug: + filename = model_type+'_csv.pkl' + with open(pathlib.Path(root) / filename, 'wb') as outp: + pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL) + # return df_pred, csv + +def csv_model_predict(input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False) -> np.ndarray: """Perform a prediction from csv file. @@ -455,16 +518,20 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger, :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - csv_file = input_data_dict['params']['passed_data']['csv_file'] - sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - independent_variables = input_data_dict['params']['passed_data']['independent_variables'] - dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] - new_values = input_data_dict['params']['passed_data']['new_values'] + model_type = input_data_dict['params']['passed_data']['model_type'] root = input_data_dict['root'] - # The ML forecaster object - csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + filename = model_type+'_csv.pkl' + filename_path = pathlib.Path(root) / filename + if not debug: + if filename_path.is_file(): + with open(filename_path, 'rb') as inp: + csv = pickle.load(inp) + else: + logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") + return + new_values = input_data_dict['params']['passed_data']['new_values'] # Predict from csv file - prediction = csv.predict() + prediction = csv.predict(new_values) csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1f478c01..636d5835 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -1,19 +1,22 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import copy +from datetime import datetime import logging import pathlib import time -from typing import Tuple +from typing import Optional, Tuple import warnings import pandas as pd import numpy as np +from sklearn.metrics import classification_report, r2_score from sklearn.linear_model import LinearRegression -from sklearn.linear_model import ElasticNet -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsRegressor +from sklearn.model_selection import GridSearchCV, train_test_split +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler warnings.filterwarnings("ignore", category=DeprecationWarning) @@ -29,7 +32,7 @@ class CsvPredictor: - `predict`: to obtain a forecast from a csv file. """ - def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -53,78 +56,124 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl :param logger: The passed logger object :type logger: logging.Logger """ - self.csv_file = csv_file + self.data = data self.independent_variables = independent_variables self.dependent_variable = dependent_variable - self.sklearn_model = sklearn_model - self.new_values = new_values - self.root = root + self.timestamp = timestamp + self.model_type = model_type self.logger = logger self.is_tuned = False + self.data.sort_index(inplace=True) + self.data = self.data[~self.data.index.duplicated(keep='first')] + + @staticmethod + def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + """Add date features from the input DataFrame timestamp - def load_data(self) -> pd.DataFrame: - """Load the data.""" - filename_path = pathlib.Path(self.root) / self.csv_file - if filename_path.is_file(): - with open(filename_path, 'rb') as inp: - data = pd.read_csv(inp) - else: - self.logger.error("The cvs file was not found.") - raise ValueError("The CSV file " + self.csv_file + " was not found.") - - required_columns = self.independent_variables - - if not set(required_columns).issubset(data.columns): - raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}" - ) - return data - - def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: + :param data: The input DataFrame + :type data: pd.DataFrame + :return: The DataFrame with the added features + :rtype: pd.DataFrame + """ + df = copy.deepcopy(data) + df['timestamp']= pd.to_datetime(df['timestamp']) + if 'year' in date_features: + df['year'] = [i.month for i in df['timestamp']] + if 'month' in date_features: + df['month'] = [i.month for i in df['timestamp']] + if 'day_of_week' in date_features: + df['day_of_week'] = [i.dayofweek for i in df['timestamp']] + if 'day_of_year' in date_features: + df['day_of_year'] = [i.dayofyear for i in df['timestamp']] + if 'day' in date_features: + df['day'] = [i.day for i in df['timestamp']] + if 'hour' in date_features: + df['hour'] = [i.day for i in df['timestamp']] + + return df + + def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]: """ - Prepare the data. + Fit the model using the provided data. :param data: Input Data :type data: pd.DataFrame - :return: A tuple containing the train data. - :rtype: Tuple[np.ndarray, np.ndarray] - """ - X = data[self.independent_variables].values - y = data[self.dependent_variable].values - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + self.logger.info("Performing a forecast model fit for "+self.model_type) + self.data_exo = pd.DataFrame(self.data) + self.data_exo[self.independent_variables] = self.data[self.independent_variables] + self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] + keep_columns = [] + keep_columns.extend(self.independent_variables) + if self.timestamp is not None: + keep_columns.append(self.timestamp) + keep_columns.append(self.dependent_variable) + self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] + self.data_exo.reset_index(drop=True, inplace=True) + # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp') + if len(date_features) > 0: + if self.timestamp is not None: + self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) + else: + self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") - return X_train, y_train + y = self.data_exo[self.dependent_variable] + self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1) + if self.timestamp is not None: + self.data_exo = self.data_exo.drop(self.timestamp,axis=1) + X = self.data_exo + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + self.steps = len(X_test) + + # Define the model + self.model = Pipeline([ + ('scaler', StandardScaler()), + ('regressor', LinearRegression()) + ]) + # Define the parameters to tune + param_grid = { + 'regressor__fit_intercept': [True, False], + 'regressor__positive': [True, False], + } + + # Create a grid search object + self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # Fit the grid search object to the data + self.logger.info("Fitting the model...") + start_time = time.time() + self.grid_search.fit(X_train.values, y_train.values) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + + self.model = self.grid_search.best_estimator_ + + + # Make predictions + predictions = self.model.predict(X_test.values) + predictions = pd.Series(predictions, index=X_test.index) + pred_metric = r2_score(y_test,predictions) + self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + + # Prepare forecast DataFrame + df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred']) + df_pred['train'] = y_train + df_pred['test'] = y_test + df_pred['pred'] = predictions + print(df_pred) + # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp') + + + + # return df_pred + - def predict(self) -> np.ndarray: + def predict(self, new_values:list) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - self.logger.info("Performing a prediction for "+self.csv_file) - # Preparing the data: adding exogenous features - data = self.load_data() - if data is not None: - X, y = self.prepare_data(data) - - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Predict through a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - - return prediction + self.logger.info("Performing a prediction for "+self.model_type) + new_values = np.array([new_values]) + + return self.model.predict(new_values) diff --git a/src/emhass/utils.py b/src/emhass/utils.py index a5d3002c..b57528b2 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -155,14 +155,26 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic freq = int(retrieve_hass_conf['freq'].seconds/60.0) delta_forecast = int(optim_conf['delta_forecast'].days) forecast_dates = get_forecast_dates(freq, delta_forecast) - if set_type == "csv-predict": + if set_type == "csv-model-fit": csv_file = runtimeparams['csv_file'] independent_variables = runtimeparams['independent_variables'] dependent_variable = runtimeparams['dependent_variable'] - new_values = runtimeparams['new_values'] params['passed_data']['csv_file'] = csv_file params['passed_data']['independent_variables'] = independent_variables params['passed_data']['dependent_variable'] = dependent_variable + if 'timestamp' not in runtimeparams.keys(): + params['passed_data']['timestamp'] = None + else: + timestamp = runtimeparams['timestamp'] + params['passed_data']['timestamp'] = timestamp + if 'date_features' not in runtimeparams.keys(): + params['passed_data']['date_features'] = [] + else: + date_features = runtimeparams['date_features'] + params['passed_data']['date_features'] = date_features + + if set_type == "csv-model-predict": + new_values = runtimeparams['new_values'] params['passed_data']['new_values'] = new_values # Treating special data passed for MPC control case diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index 886f9304..db8d0b13 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -13,7 +13,7 @@ from emhass.command_line import set_input_data_dict from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune -from emhass.command_line import csv_predict +from emhass.command_line import csv_model_fit, csv_model_predict from emhass.command_line import publish_data from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \ get_injection_dict_forecast_model_tune, build_params @@ -194,9 +194,14 @@ def action_call(action_name): if not checkFileLog(ActionStr): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) - elif action_name == 'csv-predict': + elif action_name == 'csv-model-fit': + app.logger.info(" >> Performing a csv fit...") + csv_model_fit(input_data_dict, app.logger) + msg = f'EMHASS >> Action csv-fit executed... \n' + return make_response(msg, 201) + elif action_name == 'csv-model-predict': app.logger.info(" >> Performing a csv predict...") - csv_predict(input_data_dict, app.logger) + csv_model_predict(input_data_dict, app.logger) msg = f'EMHASS >> Action csv-predict executed... \n' return make_response(msg, 201) else: From d09cbe30c0f08ba8d6f2f3cfd4b38d1d153030cc Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:56:19 +0100 Subject: [PATCH 035/111] remove backtest --- src/emhass/csv_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 636d5835..1b2396b5 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -92,7 +92,7 @@ def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: return df - def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]: + def fit(self, date_features: Optional[list] = []) -> None: """ Fit the model using the provided data. From cb2050a245ccd89ad000ee2c9a66b0c36c1d756c Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 12:11:41 +0100 Subject: [PATCH 036/111] cleanup --- src/emhass/csv_predictor.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1b2396b5..1e46927d 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -2,16 +2,14 @@ # -*- coding: utf-8 -*- import copy -from datetime import datetime import logging -import pathlib import time -from typing import Optional, Tuple +from typing import Optional import warnings import pandas as pd import numpy as np -from sklearn.metrics import classification_report, r2_score +from sklearn.metrics import r2_score from sklearn.linear_model import LinearRegression from sklearn.model_selection import GridSearchCV, train_test_split @@ -110,7 +108,6 @@ def fit(self, date_features: Optional[list] = []) -> None: keep_columns.append(self.dependent_variable) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] self.data_exo.reset_index(drop=True, inplace=True) - # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp') if len(date_features) > 0: if self.timestamp is not None: self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) @@ -153,18 +150,6 @@ def fit(self, date_features: Optional[list] = []) -> None: predictions = pd.Series(predictions, index=X_test.index) pred_metric = r2_score(y_test,predictions) self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") - - # Prepare forecast DataFrame - df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred']) - df_pred['train'] = y_train - df_pred['test'] = y_test - df_pred['pred'] = predictions - print(df_pred) - # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp') - - - - # return df_pred def predict(self, new_values:list) -> np.ndarray: From 264a8ca312b0ef6192625e36f2357f887611d594 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 13:41:06 +0100 Subject: [PATCH 037/111] cleanup + docstrings --- src/emhass/command_line.py | 17 ++++---------- src/emhass/csv_predictor.py | 45 ++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 34 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 4dd795ae..f1135527 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -472,7 +472,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, return df_pred_optim, mlf def csv_model_fit(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: + debug: Optional[bool] = False) -> None: """Perform a forecast model fit from training data retrieved from Home Assistant. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -481,32 +481,26 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger, :type logger: logging.Logger :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object - :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster] """ data = copy.deepcopy(input_data_dict['df_input_data']) - # csv_file = input_data_dict['params']['passed_data']['csv_file'] model_type = input_data_dict['params']['passed_data']['model_type'] - # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] timestamp = input_data_dict['params']['passed_data']['timestamp'] - # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] date_features = input_data_dict['params']['passed_data']['date_features'] root = input_data_dict['root'] - # The ML forecaster object + # The CSV forecaster object csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger) # Fit the ML model - df_pred = csv.fit(date_features=date_features) + csv.fit(date_features=date_features) # Save model if not debug: filename = model_type+'_csv.pkl' with open(pathlib.Path(root) / filename, 'wb') as outp: pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL) - # return df_pred, csv def csv_model_predict(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> np.ndarray: + debug: Optional[bool] = False) -> None: """Perform a prediction from csv file. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -515,8 +509,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger, :type logger: logging.Logger :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional - :return: The np.ndarray containing the predicted value. - :rtype: np.ndarray """ model_type = input_data_dict['params']['passed_data']['model_type'] root = input_data_dict['root'] @@ -543,7 +535,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger, csv_predict_unit_of_measurement, csv_predict_friendly_name, type_var = 'csv_predictor') - return prediction def publish_data(input_data_dict: dict, logger: logging.Logger, save_data_to_file: Optional[bool] = False, diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1e46927d..57d61791 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -25,32 +25,30 @@ class CsvPredictor: This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. - It exposes one main method: + It exposes two main methods: - - `predict`: to obtain a forecast from a csv file. + - `fit`: to train a model with the passed data. + + - `predict`: to obtain a forecast from a pre-trained model. """ def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. - :param csv_file: The name of the csv file to retrieve data from. \ - Example: `input_train_data.csv`. - :type csv_file: str + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str :param independent_variables: A list of independent variables. \ Example: [`solar`, `degree_days`]. :type independent_variables: list :param dependent_variable: The dependent variable(to be predicted). \ Example: `hours`. :type dependent_variable: str - :param sklearn_model: The `scikit-learn` model that will be used. For now only \ - this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. - :type sklearn_model: str - :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ - Example: [2.24, 5.68]. - :type new_values: list - :param root: The parent folder of the path where the config.yaml file is located - :type root: str + :param timestamp: If defined, the column key that has to be used of timestamp. + :type timestamp: str :param logger: The passed logger object :type logger: logging.Logger """ @@ -60,23 +58,24 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent self.timestamp = timestamp self.model_type = model_type self.logger = logger - self.is_tuned = False self.data.sort_index(inplace=True) self.data = self.data[~self.data.index.duplicated(keep='first')] @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: """Add date features from the input DataFrame timestamp :param data: The input DataFrame :type data: pd.DataFrame + :param timestamp: The column containing the timestamp + :type timestamp: str :return: The DataFrame with the added features :rtype: pd.DataFrame """ df = copy.deepcopy(data) - df['timestamp']= pd.to_datetime(df['timestamp']) + df[timestamp]= pd.to_datetime(df['timestamp']) if 'year' in date_features: - df['year'] = [i.month for i in df['timestamp']] + df['year'] = [i.year for i in df['timestamp']] if 'month' in date_features: df['month'] = [i.month for i in df['timestamp']] if 'day_of_week' in date_features: @@ -94,10 +93,10 @@ def fit(self, date_features: Optional[list] = []) -> None: """ Fit the model using the provided data. - :param data: Input Data - :type data: pd.DataFrame + :param date_features: A list of 'date_features' to take into account when fitting the model. + :type data: list """ - self.logger.info("Performing a forecast model fit for "+self.model_type) + self.logger.info("Performing a csv model fit for "+self.model_type) self.data_exo = pd.DataFrame(self.data) self.data_exo[self.independent_variables] = self.data[self.independent_variables] self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] @@ -110,7 +109,7 @@ def fit(self, date_features: Optional[list] = []) -> None: self.data_exo.reset_index(drop=True, inplace=True) if len(date_features) > 0: if self.timestamp is not None: - self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) + self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp) else: self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") @@ -155,6 +154,10 @@ def fit(self, date_features: Optional[list] = []) -> None: def predict(self, new_values:list) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. + + :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ + Example: [2.24, 5.68]. + :type new_values: list :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ From c375042c2acfdd652e0a4d118a496a0770fdb178 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 11 Mar 2024 09:59:27 +0100 Subject: [PATCH 038/111] add other regression methods --- src/emhass/csv_predictor.py | 87 +++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 28 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 57d61791..2b6fb86a 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -9,9 +9,10 @@ import pandas as pd import numpy as np +from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor from sklearn.metrics import r2_score -from sklearn.linear_model import LinearRegression +from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler @@ -122,33 +123,63 @@ def fit(self, date_features: Optional[list] = []) -> None: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) self.steps = len(X_test) - # Define the model - self.model = Pipeline([ - ('scaler', StandardScaler()), - ('regressor', LinearRegression()) - ]) - # Define the parameters to tune - param_grid = { - 'regressor__fit_intercept': [True, False], - 'regressor__positive': [True, False], - } - - # Create a grid search object - self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # Fit the grid search object to the data - self.logger.info("Fitting the model...") - start_time = time.time() - self.grid_search.fit(X_train.values, y_train.values) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - - self.model = self.grid_search.best_estimator_ - - - # Make predictions - predictions = self.model.predict(X_test.values) - predictions = pd.Series(predictions, index=X_test.index) - pred_metric = r2_score(y_test,predictions) - self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + regression_methods = [ + ('Linear Regression', LinearRegression(), {}), + ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), + ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), + ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), + ('Gradient Boosting Regression', GradientBoostingRegressor(), { + 'gradientboostingregressor__n_estimators': [50, 100, 200], + 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] + }), + ('AdaBoost Regression', AdaBoostRegressor(), { + 'adaboostregressor__n_estimators': [50, 100, 200], + 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] + }) + ] + + # Define the models + for name, model, param_grid in regression_methods: + pipeline = Pipeline([ + ('scaler', StandardScaler()), + (name, model) + ]) + + # Use GridSearchCV to find the best hyperparameters for each model + grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5) + grid_search.fit(X_train, y_train) + + # Get the best model and print its mean squared error on the test set + best_model = grid_search.best_estimator_ + print(best_model) + predictions = best_model.predict(X_test) + print(predictions) + # self.model = Pipeline([ + # ('scaler', StandardScaler()), + # ('regressor', LinearRegression()) + # ]) + # # Define the parameters to tune + # param_grid = { + # 'regressor__fit_intercept': [True, False], + # 'regressor__positive': [True, False], + # } + + # # Create a grid search object + # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # # Fit the grid search object to the data + # self.logger.info("Fitting the model...") + # start_time = time.time() + # self.grid_search.fit(X_train.values, y_train.values) + # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + + # self.model = self.grid_search.best_estimator_ + + + # # Make predictions + # predictions = self.model.predict(X_test.values) + # predictions = pd.Series(predictions, index=X_test.index) + # pred_metric = r2_score(y_test,predictions) + # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") def predict(self, new_values:list) -> np.ndarray: From 79752dfae93396dd20e520c6cc8ab7ec138fc6c7 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:10:15 +0100 Subject: [PATCH 039/111] add --editable --- .vscode/tasks.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 11a92388..0b25f4f1 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -9,7 +9,11 @@ "isDefault": true }, "args": [ - "install", "--no-deps", "--force-reinstall", "." + "install", + "--no-deps", + "--force-reinstall", + "--editable", + "." ], "presentation": { "echo": true, From 79c478e4ce4ad080bce1ea755db293f332577da1 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:16:17 +0100 Subject: [PATCH 040/111] Add sklearn model --- src/emhass/command_line.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index f1135527..1845c857 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -484,13 +484,14 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger, """ data = copy.deepcopy(input_data_dict['df_input_data']) model_type = input_data_dict['params']['passed_data']['model_type'] + sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] timestamp = input_data_dict['params']['passed_data']['timestamp'] date_features = input_data_dict['params']['passed_data']['date_features'] root = input_data_dict['root'] # The CSV forecaster object - csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger) + csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) # Fit the ML model csv.fit(date_features=date_features) # Save model From d4c36f09c2ede5c6a4c4a4518d013c8312686418 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:16:44 +0100 Subject: [PATCH 041/111] multiple regression methods --- src/emhass/csv_predictor.py | 141 +++++++++++++++++++++++++----------- 1 file changed, 100 insertions(+), 41 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 2b6fb86a..3ffeba27 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -14,7 +14,7 @@ from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import GridSearchCV, train_test_split -from sklearn.pipeline import Pipeline +from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler @@ -33,7 +33,7 @@ class CsvPredictor: - `predict`: to obtain a forecast from a pre-trained model. """ - def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, + def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -58,9 +58,14 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent self.dependent_variable = dependent_variable self.timestamp = timestamp self.model_type = model_type + self.sklearn_model = sklearn_model self.logger = logger self.data.sort_index(inplace=True) self.data = self.data[~self.data.index.duplicated(keep='first')] + self.data_exo = None + self.steps = None + self.model = None + self.grid_search =None @staticmethod def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: @@ -123,63 +128,117 @@ def fit(self, date_features: Optional[list] = []) -> None: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) self.steps = len(X_test) - regression_methods = [ - ('Linear Regression', LinearRegression(), {}), - ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), - ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), - ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), - ('Gradient Boosting Regression', GradientBoostingRegressor(), { + regression_methods = { + 'LinearRegression': {"model": LinearRegression(), "param_grid": { + 'linearregression__fit_intercept': [True, False], + 'linearregression__positive': [True, False], + }}, + 'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}}, + 'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}}, + 'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}}, + 'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": { 'gradientboostingregressor__n_estimators': [50, 100, 200], 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - }), - ('AdaBoost Regression', AdaBoostRegressor(), { + }}, + 'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": { 'adaboostregressor__n_estimators': [50, 100, 200], 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - }) - ] + }} + } + # regression_methods = [ + # ('LinearRegression', LinearRegression(), { + # 'linearregression__fit_intercept': [True, False], + # 'linearregression__positive': [True, False], + # }), + # ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), + # ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), + # ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), + # ('GradientBoostingRegression', GradientBoostingRegressor(), { + # 'gradientboostingregressor__n_estimators': [50, 100, 200], + # 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] + # }), + # ('AdaBoostRegression', AdaBoostRegressor(), { + # 'adaboostregressor__n_estimators': [50, 100, 200], + # 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] + # }) + # ] + + if self.sklearn_model == 'LinearRegression': + base_model = regression_methods['LinearRegression']['model'] + param_grid = regression_methods['LinearRegression']['param_grid'] + elif self.sklearn_model == 'RidgeRegression': + base_model = regression_methods['RidgeRegression']['model'] + param_grid = regression_methods['RidgeRegression']['param_grid'] + elif self.sklearn_model == 'LassoRegression': + base_model = regression_methods['LassoRegression']['model'] + param_grid = regression_methods['LassoRegression']['param_grid'] + elif self.sklearn_model == 'RandomForestRegression': + base_model = regression_methods['RandomForestRegression']['model'] + param_grid = regression_methods['RandomForestRegression']['param_grid'] + elif self.sklearn_model == 'GradientBoostingRegression': + base_model = regression_methods['GradientBoostingRegression']['model'] + param_grid = regression_methods['GradientBoostingRegression']['param_grid'] + elif self.sklearn_model == 'AdaBoostRegression': + base_model = regression_methods['AdaBoostRegression']['model'] + param_grid = regression_methods['AdaBoostRegression']['param_grid'] + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the models - for name, model, param_grid in regression_methods: - pipeline = Pipeline([ - ('scaler', StandardScaler()), - (name, model) - ]) + # for name, model, param_grid in regression_methods: + # self.model = make_pipeline( + # StandardScaler(), + # model + # ) + # # self.model = Pipeline([ + # # ('scaler', StandardScaler()), + # # (name, model) + # # ]) - # Use GridSearchCV to find the best hyperparameters for each model - grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5) - grid_search.fit(X_train, y_train) - - # Get the best model and print its mean squared error on the test set - best_model = grid_search.best_estimator_ - print(best_model) - predictions = best_model.predict(X_test) - print(predictions) + # # Use GridSearchCV to find the best hyperparameters for each model + # grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # grid_search.fit(X_train, y_train) + + # # Get the best model and print its mean squared error on the test set + # best_model = grid_search.best_estimator_ + # print(best_model) + # predictions = best_model.predict(X_test) + # print(predictions) + + self.model = make_pipeline( + StandardScaler(), + base_model + ) # self.model = Pipeline([ # ('scaler', StandardScaler()), - # ('regressor', LinearRegression()) + # ('regressor', base_model) # ]) - # # Define the parameters to tune + # Define the parameters to tune # param_grid = { # 'regressor__fit_intercept': [True, False], # 'regressor__positive': [True, False], # } - # # Create a grid search object - # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # # Fit the grid search object to the data - # self.logger.info("Fitting the model...") - # start_time = time.time() - # self.grid_search.fit(X_train.values, y_train.values) - # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + # Create a grid search object + self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1) + + # Fit the grid search object to the data + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.grid_search.fit(X_train.values, y_train.values) + print("Best value for lambda : ",self.grid_search.best_params_) + print("Best score for cost function: ", self.grid_search.best_score_) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - # self.model = self.grid_search.best_estimator_ + self.model = self.grid_search.best_estimator_ - # # Make predictions - # predictions = self.model.predict(X_test.values) - # predictions = pd.Series(predictions, index=X_test.index) - # pred_metric = r2_score(y_test,predictions) - # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + # Make predictions + predictions = self.model.predict(X_test.values) + predictions = pd.Series(predictions, index=X_test.index) + pred_metric = r2_score(y_test,predictions) + self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") def predict(self, new_values:list) -> np.ndarray: From bf64255238dfe8a8bc955eb3c27fd8fe603a9fe0 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:42:27 +0100 Subject: [PATCH 042/111] change to MLRegressor --- src/emhass/command_line.py | 40 +++++++++---------- ...ictor.py => machine_learning_regressor.py} | 4 +- src/emhass/utils.py | 28 ++++++------- src/emhass/web_server.py | 18 ++++----- 4 files changed, 45 insertions(+), 45 deletions(-) rename src/emhass/{csv_predictor.py => machine_learning_regressor.py} (98%) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 1845c857..5c5b4483 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -20,7 +20,7 @@ from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster from emhass.optimization import Optimization -from emhass.csv_predictor import CsvPredictor +from emhass.machine_learning_regressor import MLRegressor from emhass import utils @@ -155,7 +155,7 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, return False df_input_data = rh.df_final.copy() - elif set_type == "csv-model-fit": + elif set_type == "regressor-model-fit": df_input_data_dayahead = None P_PV_forecast, P_load_forecast = None, None @@ -183,7 +183,7 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) - elif set_type == "csv-model-predict": + elif set_type == "regressor-model-predict": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None @@ -471,7 +471,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf -def csv_model_fit(input_data_dict: dict, logger: logging.Logger, +def regressor_model_fit(input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False) -> None: """Perform a forecast model fit from training data retrieved from Home Assistant. @@ -490,17 +490,17 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger, timestamp = input_data_dict['params']['passed_data']['timestamp'] date_features = input_data_dict['params']['passed_data']['date_features'] root = input_data_dict['root'] - # The CSV forecaster object - csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) + # The MLRegressor object + mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) # Fit the ML model - csv.fit(date_features=date_features) + mlr.fit(date_features=date_features) # Save model if not debug: - filename = model_type+'_csv.pkl' + filename = model_type+'_mlr.pkl' with open(pathlib.Path(root) / filename, 'wb') as outp: - pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL) + pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL) -def csv_model_predict(input_data_dict: dict, logger: logging.Logger, +def regressor_model_predict(input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False) -> None: """Perform a prediction from csv file. @@ -513,29 +513,29 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger, """ model_type = input_data_dict['params']['passed_data']['model_type'] root = input_data_dict['root'] - filename = model_type+'_csv.pkl' + filename = model_type+'_mlr.pkl' filename_path = pathlib.Path(root) / filename if not debug: if filename_path.is_file(): with open(filename_path, 'rb') as inp: - csv = pickle.load(inp) + mlr = pickle.load(inp) else: logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") return new_values = input_data_dict['params']['passed_data']['new_values'] # Predict from csv file - prediction = csv.predict(new_values) + prediction = mlr.predict(new_values) - csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] - csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] - csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name'] + mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id'] + mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement'] + mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name'] # Publish prediction idx = 0 input_data_dict['rh'].post_data(prediction, idx, - csv_predict_entity_id, - csv_predict_unit_of_measurement, - csv_predict_friendly_name, - type_var = 'csv_predictor') + mlr_predict_entity_id, + mlr_predict_unit_of_measurement, + mlr_predict_friendly_name, + type_var = 'mlregressor') def publish_data(input_data_dict: dict, logger: logging.Logger, save_data_to_file: Optional[bool] = False, diff --git a/src/emhass/csv_predictor.py b/src/emhass/machine_learning_regressor.py similarity index 98% rename from src/emhass/csv_predictor.py rename to src/emhass/machine_learning_regressor.py index 3ffeba27..d70df3ec 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/machine_learning_regressor.py @@ -20,7 +20,7 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) -class CsvPredictor: +class MLRegressor: r""" A forecaster class using machine learning models. @@ -115,7 +115,7 @@ def fit(self, date_features: Optional[list] = []) -> None: self.data_exo.reset_index(drop=True, inplace=True) if len(date_features) > 0: if self.timestamp is not None: - self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp) + self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp) else: self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") diff --git a/src/emhass/utils.py b/src/emhass/utils.py index b57528b2..4bbac11c 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -155,7 +155,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic freq = int(retrieve_hass_conf['freq'].seconds/60.0) delta_forecast = int(optim_conf['delta_forecast'].days) forecast_dates = get_forecast_dates(freq, delta_forecast) - if set_type == "csv-model-fit": + if set_type == "regressor-model-fit": csv_file = runtimeparams['csv_file'] independent_variables = runtimeparams['independent_variables'] dependent_variable = runtimeparams['dependent_variable'] @@ -173,7 +173,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic date_features = runtimeparams['date_features'] params['passed_data']['date_features'] = date_features - if set_type == "csv-model-predict": + if set_type == "regressor-model-predict": new_values = runtimeparams['new_values'] params['passed_data']['new_values'] = new_values @@ -303,21 +303,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic else: model_predict_friendly_name = runtimeparams['model_predict_friendly_name'] params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name - if 'csv_predict_entity_id' not in runtimeparams.keys(): - csv_predict_entity_id = "sensor.csv_predictor" + if 'mlr_predict_entity_id' not in runtimeparams.keys(): + mlr_predict_entity_id = "sensor.mlr_predict" else: - csv_predict_entity_id = runtimeparams['csv_predict_entity_id'] - params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id - if 'csv_predict_unit_of_measurement' not in runtimeparams.keys(): - csv_predict_unit_of_measurement = None + mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id'] + params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id + if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys(): + mlr_predict_unit_of_measurement = None else: - csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement'] - params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement - if 'csv_predict_friendly_name' not in runtimeparams.keys(): - csv_predict_friendly_name = "Csv predictor" + mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement'] + params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement + if 'mlr_predict_friendly_name' not in runtimeparams.keys(): + mlr_predict_friendly_name = "mlr predictor" else: - csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name'] - params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name + mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name'] + params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name # Treat optimization configuration parameters passed at runtime if 'num_def_loads' in runtimeparams.keys(): optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index db8d0b13..e72022fe 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -13,7 +13,7 @@ from emhass.command_line import set_input_data_dict from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune -from emhass.command_line import csv_model_fit, csv_model_predict +from emhass.command_line import regressor_model_fit, regressor_model_predict from emhass.command_line import publish_data from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \ get_injection_dict_forecast_model_tune, build_params @@ -194,15 +194,15 @@ def action_call(action_name): if not checkFileLog(ActionStr): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) - elif action_name == 'csv-model-fit': - app.logger.info(" >> Performing a csv fit...") - csv_model_fit(input_data_dict, app.logger) - msg = f'EMHASS >> Action csv-fit executed... \n' + elif action_name == 'regressor-model-fit': + app.logger.info(" >> Performing a regressor fit...") + regressor_model_fit(input_data_dict, app.logger) + msg = f'EMHASS >> Action regressor-fit executed... \n' return make_response(msg, 201) - elif action_name == 'csv-model-predict': - app.logger.info(" >> Performing a csv predict...") - csv_model_predict(input_data_dict, app.logger) - msg = f'EMHASS >> Action csv-predict executed... \n' + elif action_name == 'regressor-model-predict': + app.logger.info(" >> Performing a regressor predict...") + regressor_model_predict(input_data_dict, app.logger) + msg = f'EMHASS >> Action regressor-predict executed... \n' return make_response(msg, 201) else: app.logger.error("ERROR: passed action is not valid") From d5adde325856134dc70e68ae46075dc2df182179 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 13:13:51 +0100 Subject: [PATCH 043/111] change naming and some formatting --- src/emhass/command_line.py | 1021 ++++++++++++++-------- src/emhass/machine_learning_regressor.py | 285 +++--- src/emhass/retrieve_hass.py | 376 +++++--- src/emhass/utils.py | 910 ++++++++++++------- 4 files changed, 1640 insertions(+), 952 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 5c5b4483..b4a9050c 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -8,14 +8,15 @@ import json import copy import pickle -import time -import numpy as np -import pandas as pd from datetime import datetime, timezone from typing import Optional, Tuple +from importlib.metadata import version +import numpy as np +import pandas as pd + from distutils.util import strtobool -from importlib.metadata import version + from emhass.retrieve_hass import RetrieveHass from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster @@ -24,12 +25,19 @@ from emhass import utils -def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, - params: str, runtimeparams: str, set_type: str, logger: logging.Logger, - get_data_from_file: Optional[bool] = False) -> dict: +def set_input_data_dict( + config_path: pathlib.Path, + base_path: str, + costfun: str, + params: str, + runtimeparams: str, + set_type: str, + logger: logging.Logger, + get_data_from_file: Optional[bool] = False, +) -> dict: """ Set up some of the data needed for the different actions. - + :param config_path: The complete absolute path where the config.yaml file is located :type config_path: pathlib.Path :param base_path: The parent folder of the config_path @@ -53,118 +61,196 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, logger.info("Setting up needed data") # Parsing yaml retrieve_hass_conf, optim_conf, plant_conf = utils.get_yaml_parse( - config_path, use_secrets=not(get_data_from_file), params=params) + config_path, use_secrets=not (get_data_from_file), params=params + ) # Treat runtimeparams params, retrieve_hass_conf, optim_conf, plant_conf = utils.treat_runtimeparams( - runtimeparams, params, retrieve_hass_conf, - optim_conf, plant_conf, set_type, logger) + runtimeparams, + params, + retrieve_hass_conf, + optim_conf, + plant_conf, + set_type, + logger, + ) # Define main objects - rh = RetrieveHass(retrieve_hass_conf['hass_url'], retrieve_hass_conf['long_lived_token'], - retrieve_hass_conf['freq'], retrieve_hass_conf['time_zone'], - params, base_path, logger, get_data_from_file=get_data_from_file) - fcst = Forecast(retrieve_hass_conf, optim_conf, plant_conf, - params, base_path, logger, get_data_from_file=get_data_from_file) - opt = Optimization(retrieve_hass_conf, optim_conf, plant_conf, - fcst.var_load_cost, fcst.var_prod_price, - costfun, base_path, logger) + rh = RetrieveHass( + retrieve_hass_conf["hass_url"], + retrieve_hass_conf["long_lived_token"], + retrieve_hass_conf["freq"], + retrieve_hass_conf["time_zone"], + params, + base_path, + logger, + get_data_from_file=get_data_from_file, + ) + fcst = Forecast( + retrieve_hass_conf, + optim_conf, + plant_conf, + params, + base_path, + logger, + get_data_from_file=get_data_from_file, + ) + opt = Optimization( + retrieve_hass_conf, + optim_conf, + plant_conf, + fcst.var_load_cost, + fcst.var_prod_price, + costfun, + base_path, + logger, + ) # Perform setup based on type of action if set_type == "perfect-optim": # Retrieve data from hass if get_data_from_file: - with open(pathlib.Path(base_path) / 'data' / 'test_df_final.pkl', 'rb') as inp: + with open( + pathlib.Path(base_path) / "data" / "test_df_final.pkl", "rb" + ) as inp: rh.df_final, days_list, var_list = pickle.load(inp) else: - days_list = utils.get_days_list(retrieve_hass_conf['days_to_retrieve']) - var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']] - if not rh.get_data(days_list, var_list, - minimal_response=False, significant_changes_only=False): - return False - if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'], - set_zero_min = retrieve_hass_conf['set_zero_min'], - var_replace_zero = retrieve_hass_conf['var_replace_zero'], - var_interp = retrieve_hass_conf['var_interp']): + days_list = utils.get_days_list(retrieve_hass_conf["days_to_retrieve"]) + var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]] + if not rh.get_data( + days_list, + var_list, + minimal_response=False, + significant_changes_only=False, + ): + return False + if not rh.prepare_data( + retrieve_hass_conf["var_load"], + load_negative=retrieve_hass_conf["load_negative"], + set_zero_min=retrieve_hass_conf["set_zero_min"], + var_replace_zero=retrieve_hass_conf["var_replace_zero"], + var_interp=retrieve_hass_conf["var_interp"], + ): return False df_input_data = rh.df_final.copy() # What we don't need for this type of action P_PV_forecast, P_load_forecast, df_input_data_dayahead = None, None, None elif set_type == "dayahead-optim": # Get PV and load forecasts - df_weather = fcst.get_weather_forecast(method=optim_conf['weather_forecast_method']) + df_weather = fcst.get_weather_forecast( + method=optim_conf["weather_forecast_method"] + ) P_PV_forecast = fcst.get_power_from_weather(df_weather) P_load_forecast = fcst.get_load_forecast(method=optim_conf['load_forecast_method']) if isinstance(P_load_forecast,bool) and not P_load_forecast: logger.error("Unable to get sensor power photovoltaics, or sensor power load no var loads. Check HA sensors and their daily data") return False - df_input_data_dayahead = pd.DataFrame(np.transpose(np.vstack([P_PV_forecast.values,P_load_forecast.values])), - index=P_PV_forecast.index, - columns=['P_PV_forecast', 'P_load_forecast']) + df_input_data_dayahead = pd.DataFrame( + np.transpose(np.vstack([P_PV_forecast.values, P_load_forecast.values])), + index=P_PV_forecast.index, + columns=["P_PV_forecast", "P_load_forecast"], + ) df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead) params = json.loads(params) - if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None: - prediction_horizon = params['passed_data']['prediction_horizon'] - df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]] + if ( + "prediction_horizon" in params["passed_data"] + and params["passed_data"]["prediction_horizon"] is not None + ): + prediction_horizon = params["passed_data"]["prediction_horizon"] + df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[ + df_input_data_dayahead.index[0] : df_input_data_dayahead.index[ + prediction_horizon - 1 + ] + ] # What we don't need for this type of action df_input_data, days_list = None, None elif set_type == "naive-mpc-optim": # Retrieve data from hass if get_data_from_file: - with open(pathlib.Path(base_path) / 'data' / 'test_df_final.pkl', 'rb') as inp: + with open( + pathlib.Path(base_path) / "data" / "test_df_final.pkl", "rb" + ) as inp: rh.df_final, days_list, var_list = pickle.load(inp) else: days_list = utils.get_days_list(1) - var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']] - if not rh.get_data(days_list, var_list, - minimal_response=False, significant_changes_only=False): + var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]] + if not rh.get_data( + days_list, + var_list, + minimal_response=False, + significant_changes_only=False, + ): return False - if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'], - set_zero_min = retrieve_hass_conf['set_zero_min'], - var_replace_zero = retrieve_hass_conf['var_replace_zero'], - var_interp = retrieve_hass_conf['var_interp']): + if not rh.prepare_data( + retrieve_hass_conf["var_load"], + load_negative=retrieve_hass_conf["load_negative"], + set_zero_min=retrieve_hass_conf["set_zero_min"], + var_replace_zero=retrieve_hass_conf["var_replace_zero"], + var_interp=retrieve_hass_conf["var_interp"], + ): return False df_input_data = rh.df_final.copy() # Get PV and load forecasts - df_weather = fcst.get_weather_forecast(method=optim_conf['weather_forecast_method']) - P_PV_forecast = fcst.get_power_from_weather(df_weather, set_mix_forecast=True, df_now=df_input_data) - P_load_forecast = fcst.get_load_forecast(method=optim_conf['load_forecast_method'], set_mix_forecast=True, df_now=df_input_data) + df_weather = fcst.get_weather_forecast( + method=optim_conf["weather_forecast_method"] + ) + P_PV_forecast = fcst.get_power_from_weather( + df_weather, set_mix_forecast=True, df_now=df_input_data + ) + P_load_forecast = fcst.get_load_forecast( + method=optim_conf["load_forecast_method"], + set_mix_forecast=True, + df_now=df_input_data, + ) df_input_data_dayahead = pd.concat([P_PV_forecast, P_load_forecast], axis=1) df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead) - df_input_data_dayahead.columns = ['P_PV_forecast', 'P_load_forecast'] + df_input_data_dayahead.columns = ["P_PV_forecast", "P_load_forecast"] params = json.loads(params) - if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None: - prediction_horizon = params['passed_data']['prediction_horizon'] - df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]] - elif set_type == "forecast-model-fit" or set_type == "forecast-model-predict" or set_type == "forecast-model-tune": + if ( + "prediction_horizon" in params["passed_data"] + and params["passed_data"]["prediction_horizon"] is not None + ): + prediction_horizon = params["passed_data"]["prediction_horizon"] + df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[ + df_input_data_dayahead.index[0] : df_input_data_dayahead.index[ + prediction_horizon - 1 + ] + ] + elif ( + set_type == "forecast-model-fit" + or set_type == "forecast-model-predict" + or set_type == "forecast-model-tune" + ): df_input_data_dayahead = None P_PV_forecast, P_load_forecast = None, None params = json.loads(params) # Retrieve data from hass - days_to_retrieve = params['passed_data']['days_to_retrieve'] - model_type = params['passed_data']['model_type'] - var_model = params['passed_data']['var_model'] + days_to_retrieve = params["passed_data"]["days_to_retrieve"] + model_type = params["passed_data"]["model_type"] + var_model = params["passed_data"]["var_model"] if get_data_from_file: days_list = None - filename = 'data_train_'+model_type+'.pkl' - data_path = pathlib.Path(base_path) / 'data' / filename - with open(data_path, 'rb') as inp: + filename = "data_train_" + model_type + ".pkl" + data_path = pathlib.Path(base_path) / "data" / filename + with open(data_path, "rb") as inp: df_input_data, _ = pickle.load(inp) - df_input_data = df_input_data[df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve):] + df_input_data = df_input_data[ + df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve) : + ] else: days_list = utils.get_days_list(days_to_retrieve) var_list = [var_model] if not rh.get_data(days_list, var_list): return False df_input_data = rh.df_final.copy() - + elif set_type == "regressor-model-fit": - + df_input_data_dayahead = None P_PV_forecast, P_load_forecast = None, None params = json.loads(params) days_list = None - csv_file = params['passed_data']['csv_file'] - independent_variables = params['passed_data']['independent_variables'] - dependent_variable = params['passed_data']['dependent_variable'] - timestamp = params['passed_data']['timestamp'] + csv_file = params["passed_data"]["csv_file"] + features = params["passed_data"]["features"] + target = params["passed_data"]["target"] + timestamp = params["passed_data"]["timestamp"] filename_path = pathlib.Path(base_path) / csv_file if filename_path.is_file(): df_input_data = pd.read_csv(filename_path, parse_dates=True) @@ -173,8 +259,8 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, logger.error("The cvs file was not found.") raise ValueError("The CSV file " + csv_file + " was not found.") required_columns = [] - required_columns.extend(independent_variables) - required_columns.append(dependent_variable) + required_columns.extend(features) + required_columns.append(target) if timestamp is not None: required_columns.append(timestamp) @@ -188,39 +274,46 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, P_PV_forecast, P_load_forecast = None, None days_list = None params = json.loads(params) - + elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None else: - logger.error("The passed action argument and hence the set_type parameter for setup is not valid") + logger.error( + "The passed action argument and hence the set_type parameter for setup is not valid" + ) df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None # The input data dictionnary to return input_data_dict = { - 'root': base_path, - 'retrieve_hass_conf': retrieve_hass_conf, - 'rh': rh, - 'opt': opt, - 'fcst': fcst, - 'df_input_data': df_input_data, - 'df_input_data_dayahead': df_input_data_dayahead, - 'P_PV_forecast': P_PV_forecast, - 'P_load_forecast': P_load_forecast, - 'costfun': costfun, - 'params': params, - 'days_list': days_list + "root": base_path, + "retrieve_hass_conf": retrieve_hass_conf, + "rh": rh, + "opt": opt, + "fcst": fcst, + "df_input_data": df_input_data, + "df_input_data_dayahead": df_input_data_dayahead, + "P_PV_forecast": P_PV_forecast, + "P_load_forecast": P_load_forecast, + "costfun": costfun, + "params": params, + "days_list": days_list, } return input_data_dict - -def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = True, debug: Optional[bool] = False) -> pd.DataFrame: + + +def perfect_forecast_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = True, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the perfect forecast optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -235,28 +328,38 @@ def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger, """ logger.info("Performing perfect forecast optimization") # Load cost and prod price forecast - df_input_data = input_data_dict['fcst'].get_load_cost_forecast( - input_data_dict['df_input_data'], - method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method'], - list_and_perfect=True) - df_input_data = input_data_dict['fcst'].get_prod_price_forecast( - df_input_data, method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method'], - list_and_perfect=True) - opt_res = input_data_dict['opt'].perform_perfect_forecast_optim(df_input_data, input_data_dict['days_list']) + df_input_data = input_data_dict["fcst"].get_load_cost_forecast( + input_data_dict["df_input_data"], + method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"], + ) + df_input_data = input_data_dict["fcst"].get_prod_price_forecast( + df_input_data, + method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"], + ) + opt_res = input_data_dict["opt"].perform_perfect_forecast_optim( + df_input_data, input_data_dict["days_list"] + ) # Save CSV file for analysis if save_data_to_file: - filename = 'opt_res_perfect_optim_'+input_data_dict['costfun']+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + filename = "opt_res_perfect_optim_" + input_data_dict["costfun"] + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: - opt_res.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp') + opt_res.to_csv( + pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp" + ) return opt_res - -def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame: + + +def dayahead_forecast_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the day-ahead optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -271,29 +374,43 @@ def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger, """ logger.info("Performing day-ahead forecast optimization") # Load cost and prod price forecast - df_input_data_dayahead = input_data_dict['fcst'].get_load_cost_forecast( - input_data_dict['df_input_data_dayahead'], - method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method']) - df_input_data_dayahead = input_data_dict['fcst'].get_prod_price_forecast( - df_input_data_dayahead, - method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method']) - opt_res_dayahead = input_data_dict['opt'].perform_dayahead_forecast_optim( - df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast']) + df_input_data_dayahead = input_data_dict["fcst"].get_load_cost_forecast( + input_data_dict["df_input_data_dayahead"], + method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"], + ) + df_input_data_dayahead = input_data_dict["fcst"].get_prod_price_forecast( + df_input_data_dayahead, + method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"], + ) + opt_res_dayahead = input_data_dict["opt"].perform_dayahead_forecast_optim( + df_input_data_dayahead, + input_data_dict["P_PV_forecast"], + input_data_dict["P_load_forecast"], + ) # Save CSV file for publish_data if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: - opt_res_dayahead.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp') + opt_res_dayahead.to_csv( + pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp" + ) return opt_res_dayahead -def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame: + +def naive_mpc_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the naive Model Predictive Controller optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -308,33 +425,50 @@ def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger, """ logger.info("Performing naive MPC optimization") # Load cost and prod price forecast - df_input_data_dayahead = input_data_dict['fcst'].get_load_cost_forecast( - input_data_dict['df_input_data_dayahead'], - method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method']) - df_input_data_dayahead = input_data_dict['fcst'].get_prod_price_forecast( - df_input_data_dayahead, method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method']) + df_input_data_dayahead = input_data_dict["fcst"].get_load_cost_forecast( + input_data_dict["df_input_data_dayahead"], + method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"], + ) + df_input_data_dayahead = input_data_dict["fcst"].get_prod_price_forecast( + df_input_data_dayahead, + method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"], + ) # The specifics params for the MPC at runtime - prediction_horizon = input_data_dict['params']['passed_data']['prediction_horizon'] - soc_init = input_data_dict['params']['passed_data']['soc_init'] - soc_final = input_data_dict['params']['passed_data']['soc_final'] - def_total_hours = input_data_dict['params']['passed_data']['def_total_hours'] - def_start_timestep = input_data_dict['params']['passed_data']['def_start_timestep'] - def_end_timestep = input_data_dict['params']['passed_data']['def_end_timestep'] - opt_res_naive_mpc = input_data_dict['opt'].perform_naive_mpc_optim( - df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast'], - prediction_horizon, soc_init, soc_final, def_total_hours, def_start_timestep, def_end_timestep) + prediction_horizon = input_data_dict["params"]["passed_data"]["prediction_horizon"] + soc_init = input_data_dict["params"]["passed_data"]["soc_init"] + soc_final = input_data_dict["params"]["passed_data"]["soc_final"] + def_total_hours = input_data_dict["params"]["passed_data"]["def_total_hours"] + def_start_timestep = input_data_dict["params"]["passed_data"]["def_start_timestep"] + def_end_timestep = input_data_dict["params"]["passed_data"]["def_end_timestep"] + opt_res_naive_mpc = input_data_dict["opt"].perform_naive_mpc_optim( + df_input_data_dayahead, + input_data_dict["P_PV_forecast"], + input_data_dict["P_load_forecast"], + prediction_horizon, + soc_init, + soc_final, + def_total_hours, + def_start_timestep, + def_end_timestep, + ) # Save CSV file for publish_data if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_naive_mpc_'+today.strftime("%Y_%m_%d")+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_naive_mpc_" + today.strftime("%Y_%m_%d") + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: - opt_res_naive_mpc.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp') + opt_res_naive_mpc.to_csv( + pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp" + ) return opt_res_naive_mpc -def forecast_model_fit(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]: + +def forecast_model_fit( + input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False +) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]: """Perform a forecast model fit from training data retrieved from Home Assistant. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -346,29 +480,37 @@ def forecast_model_fit(input_data_dict: dict, logger: logging.Logger, :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster] """ - data = copy.deepcopy(input_data_dict['df_input_data']) - model_type = input_data_dict['params']['passed_data']['model_type'] - var_model = input_data_dict['params']['passed_data']['var_model'] - sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - num_lags = input_data_dict['params']['passed_data']['num_lags'] - split_date_delta = input_data_dict['params']['passed_data']['split_date_delta'] - perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] - root = input_data_dict['root'] + data = copy.deepcopy(input_data_dict["df_input_data"]) + model_type = input_data_dict["params"]["passed_data"]["model_type"] + var_model = input_data_dict["params"]["passed_data"]["var_model"] + sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"] + num_lags = input_data_dict["params"]["passed_data"]["num_lags"] + split_date_delta = input_data_dict["params"]["passed_data"]["split_date_delta"] + perform_backtest = input_data_dict["params"]["passed_data"]["perform_backtest"] + root = input_data_dict["root"] # The ML forecaster object - mlf = MLForecaster(data, model_type, var_model, sklearn_model, num_lags, root, logger) + mlf = MLForecaster( + data, model_type, var_model, sklearn_model, num_lags, root, logger + ) # Fit the ML model - df_pred, df_pred_backtest = mlf.fit(split_date_delta=split_date_delta, - perform_backtest=perform_backtest) + df_pred, df_pred_backtest = mlf.fit( + split_date_delta=split_date_delta, perform_backtest=perform_backtest + ) # Save model if not debug: - filename = model_type+'_mlf.pkl' - with open(pathlib.Path(root) / filename, 'wb') as outp: + filename = model_type + "_mlf.pkl" + with open(pathlib.Path(root) / filename, "wb") as outp: pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred, df_pred_backtest, mlf -def forecast_model_predict(input_data_dict: dict, logger: logging.Logger, - use_last_window: Optional[bool] = True, debug: Optional[bool] = False, - mlf: Optional[MLForecaster] = None) -> pd.DataFrame: + +def forecast_model_predict( + input_data_dict: dict, + logger: logging.Logger, + use_last_window: Optional[bool] = True, + debug: Optional[bool] = False, + mlf: Optional[MLForecaster] = None, +) -> pd.DataFrame: r"""Perform a forecast model predict using a previously trained skforecast model. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -390,52 +532,79 @@ def forecast_model_predict(input_data_dict: dict, logger: logging.Logger, :rtype: pd.DataFrame """ # Load model - model_type = input_data_dict['params']['passed_data']['model_type'] - root = input_data_dict['root'] - filename = model_type+'_mlf.pkl' + model_type = input_data_dict["params"]["passed_data"]["model_type"] + root = input_data_dict["root"] + filename = model_type + "_mlf.pkl" filename_path = pathlib.Path(root) / filename if not debug: if filename_path.is_file(): - with open(filename_path, 'rb') as inp: + with open(filename_path, "rb") as inp: mlf = pickle.load(inp) else: - logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") + logger.error( + "The ML forecaster file was not found, please run a model fit method before this predict method" + ) return # Make predictions if use_last_window: - data_last_window = copy.deepcopy(input_data_dict['df_input_data']) + data_last_window = copy.deepcopy(input_data_dict["df_input_data"]) else: data_last_window = None predictions = mlf.predict(data_last_window) # Publish data to a Home Assistant sensor - model_predict_publish = input_data_dict['params']['passed_data']['model_predict_publish'] - model_predict_entity_id = input_data_dict['params']['passed_data']['model_predict_entity_id'] - model_predict_unit_of_measurement = input_data_dict['params']['passed_data']['model_predict_unit_of_measurement'] - model_predict_friendly_name = input_data_dict['params']['passed_data']['model_predict_friendly_name'] - publish_prefix = input_data_dict['params']['passed_data']['publish_prefix'] + model_predict_publish = input_data_dict["params"]["passed_data"][ + "model_predict_publish" + ] + model_predict_entity_id = input_data_dict["params"]["passed_data"][ + "model_predict_entity_id" + ] + model_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][ + "model_predict_unit_of_measurement" + ] + model_predict_friendly_name = input_data_dict["params"]["passed_data"][ + "model_predict_friendly_name" + ] + publish_prefix = input_data_dict["params"]["passed_data"]["publish_prefix"] if model_predict_publish is True: # Estimate the current index - now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0) - if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest': - idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first': - idx_closest = predictions.index.get_indexer([now_precise], method='ffill')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last': - idx_closest = predictions.index.get_indexer([now_precise], method='bfill')[0] + now_precise = datetime.now( + input_data_dict["retrieve_hass_conf"]["time_zone"] + ).replace(second=0, microsecond=0) + if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest": + idx_closest = predictions.index.get_indexer( + [now_precise], method="nearest" + )[0] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first": + idx_closest = predictions.index.get_indexer([now_precise], method="ffill")[ + 0 + ] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last": + idx_closest = predictions.index.get_indexer([now_precise], method="bfill")[ + 0 + ] if idx_closest == -1: - idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0] + idx_closest = predictions.index.get_indexer( + [now_precise], method="nearest" + )[0] # Publish Load forecast - input_data_dict['rh'].post_data(predictions, idx_closest, - model_predict_entity_id, - model_predict_unit_of_measurement, - model_predict_friendly_name, - type_var = 'mlforecaster', - publish_prefix=publish_prefix) + input_data_dict["rh"].post_data( + predictions, + idx_closest, + model_predict_entity_id, + model_predict_unit_of_measurement, + model_predict_friendly_name, + type_var="mlforecaster", + publish_prefix=publish_prefix, + ) return predictions -def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False, mlf: Optional[MLForecaster] = None - ) -> Tuple[pd.DataFrame, MLForecaster]: + +def forecast_model_tune( + input_data_dict: dict, + logger: logging.Logger, + debug: Optional[bool] = False, + mlf: Optional[MLForecaster] = None, +) -> Tuple[pd.DataFrame, MLForecaster]: """Tune a forecast model hyperparameters using bayesian optimization. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -451,28 +620,32 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, :rtype: pd.DataFrame """ # Load model - model_type = input_data_dict['params']['passed_data']['model_type'] - root = input_data_dict['root'] - filename = model_type+'_mlf.pkl' + model_type = input_data_dict["params"]["passed_data"]["model_type"] + root = input_data_dict["root"] + filename = model_type + "_mlf.pkl" filename_path = pathlib.Path(root) / filename if not debug: if filename_path.is_file(): - with open(filename_path, 'rb') as inp: + with open(filename_path, "rb") as inp: mlf = pickle.load(inp) else: - logger.error("The ML forecaster file was not found, please run a model fit method before this tune method") + logger.error( + "The ML forecaster file was not found, please run a model fit method before this tune method" + ) return None, None # Tune the model df_pred_optim = mlf.tune(debug=debug) # Save model if not debug: - filename = model_type+'_mlf.pkl' - with open(pathlib.Path(root) / filename, 'wb') as outp: + filename = model_type + "_mlf.pkl" + with open(pathlib.Path(root) / filename, "wb") as outp: pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf -def regressor_model_fit(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> None: + +def regressor_model_fit( + input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False +) -> None: """Perform a forecast model fit from training data retrieved from Home Assistant. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -482,26 +655,30 @@ def regressor_model_fit(input_data_dict: dict, logger: logging.Logger, :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional """ - data = copy.deepcopy(input_data_dict['df_input_data']) - model_type = input_data_dict['params']['passed_data']['model_type'] - sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - independent_variables = input_data_dict['params']['passed_data']['independent_variables'] - dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] - timestamp = input_data_dict['params']['passed_data']['timestamp'] - date_features = input_data_dict['params']['passed_data']['date_features'] - root = input_data_dict['root'] + data = copy.deepcopy(input_data_dict["df_input_data"]) + model_type = input_data_dict["params"]["passed_data"]["model_type"] + sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"] + features = input_data_dict["params"]["passed_data"]["features"] + target = input_data_dict["params"]["passed_data"]["target"] + timestamp = input_data_dict["params"]["passed_data"]["timestamp"] + date_features = input_data_dict["params"]["passed_data"]["date_features"] + root = input_data_dict["root"] # The MLRegressor object - mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) + mlr = MLRegressor( + data, model_type, sklearn_model, features, target, timestamp, logger + ) # Fit the ML model mlr.fit(date_features=date_features) # Save model if not debug: - filename = model_type+'_mlr.pkl' - with open(pathlib.Path(root) / filename, 'wb') as outp: + filename = model_type + "_mlr.pkl" + with open(pathlib.Path(root) / filename, "wb") as outp: pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL) -def regressor_model_predict(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> None: + +def regressor_model_predict( + input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False +) -> None: """Perform a prediction from csv file. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -511,38 +688,53 @@ def regressor_model_predict(input_data_dict: dict, logger: logging.Logger, :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional """ - model_type = input_data_dict['params']['passed_data']['model_type'] - root = input_data_dict['root'] - filename = model_type+'_mlr.pkl' + model_type = input_data_dict["params"]["passed_data"]["model_type"] + root = input_data_dict["root"] + filename = model_type + "_mlr.pkl" filename_path = pathlib.Path(root) / filename if not debug: if filename_path.is_file(): - with open(filename_path, 'rb') as inp: + with open(filename_path, "rb") as inp: mlr = pickle.load(inp) else: - logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") + logger.error( + "The ML forecaster file was not found, please run a model fit method before this predict method" + ) return - new_values = input_data_dict['params']['passed_data']['new_values'] + new_values = input_data_dict["params"]["passed_data"]["new_values"] # Predict from csv file prediction = mlr.predict(new_values) - mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id'] - mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement'] - mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name'] + mlr_predict_entity_id = input_data_dict["params"]["passed_data"][ + "mlr_predict_entity_id" + ] + mlr_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][ + "mlr_predict_unit_of_measurement" + ] + mlr_predict_friendly_name = input_data_dict["params"]["passed_data"][ + "mlr_predict_friendly_name" + ] # Publish prediction idx = 0 - input_data_dict['rh'].post_data(prediction, idx, - mlr_predict_entity_id, - mlr_predict_unit_of_measurement, - mlr_predict_friendly_name, - type_var = 'mlregressor') - -def publish_data(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, - opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame: + input_data_dict["rh"].post_data( + prediction, + idx, + mlr_predict_entity_id, + mlr_predict_unit_of_measurement, + mlr_predict_friendly_name, + type_var="mlregressor", + ) + + +def publish_data( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + opt_res_latest: Optional[pd.DataFrame] = None, +) -> pd.DataFrame: """ Publish the data obtained from the optimization results. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -556,166 +748,245 @@ def publish_data(input_data_dict: dict, logger: logging.Logger, logger.info("Publishing data to HASS instance") # Check if a day ahead optimization has been performed (read CSV file) if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv" else: - filename = 'opt_res_latest.csv' + filename = "opt_res_latest.csv" if opt_res_latest is None: - if not os.path.isfile(pathlib.Path(input_data_dict['root']) / filename): + if not os.path.isfile(pathlib.Path(input_data_dict["root"]) / filename): logger.error("File not found error, run an optimization task first.") return else: - opt_res_latest = pd.read_csv(pathlib.Path(input_data_dict['root']) / filename, index_col='timestamp') + opt_res_latest = pd.read_csv( + pathlib.Path(input_data_dict["root"]) / filename, index_col="timestamp" + ) opt_res_latest.index = pd.to_datetime(opt_res_latest.index) - opt_res_latest.index.freq = input_data_dict['retrieve_hass_conf']['freq'] + opt_res_latest.index.freq = input_data_dict["retrieve_hass_conf"]["freq"] # Estimate the current index - now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0) - if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='ffill')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='bfill')[0] + now_precise = datetime.now( + input_data_dict["retrieve_hass_conf"]["time_zone"] + ).replace(second=0, microsecond=0) + if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[ + 0 + ] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="ffill")[0] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="bfill")[0] if idx_closest == -1: - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0] + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[ + 0 + ] # Publish the data - params = json.loads(input_data_dict['params']) - publish_prefix = params['passed_data']['publish_prefix'] + params = json.loads(input_data_dict["params"]) + publish_prefix = params["passed_data"]["publish_prefix"] # Publish PV forecast - custom_pv_forecast_id = params['passed_data']['custom_pv_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_PV'], idx_closest, - custom_pv_forecast_id["entity_id"], - custom_pv_forecast_id["unit_of_measurement"], - custom_pv_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) + custom_pv_forecast_id = params["passed_data"]["custom_pv_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_PV"], + idx_closest, + custom_pv_forecast_id["entity_id"], + custom_pv_forecast_id["unit_of_measurement"], + custom_pv_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) # Publish Load forecast - custom_load_forecast_id = params['passed_data']['custom_load_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_Load'], idx_closest, - custom_load_forecast_id["entity_id"], - custom_load_forecast_id["unit_of_measurement"], - custom_load_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) - cols_published = ['P_PV', 'P_Load'] + custom_load_forecast_id = params["passed_data"]["custom_load_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_Load"], + idx_closest, + custom_load_forecast_id["entity_id"], + custom_load_forecast_id["unit_of_measurement"], + custom_load_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) + cols_published = ["P_PV", "P_Load"] # Publish deferrable loads - custom_deferrable_forecast_id = params['passed_data']['custom_deferrable_forecast_id'] - for k in range(input_data_dict['opt'].optim_conf['num_def_loads']): + custom_deferrable_forecast_id = params["passed_data"][ + "custom_deferrable_forecast_id" + ] + for k in range(input_data_dict["opt"].optim_conf["num_def_loads"]): if "P_deferrable{}".format(k) not in opt_res_latest.columns: - logger.error("P_deferrable{}".format(k)+" was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.") + logger.error( + "P_deferrable{}".format(k) + + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution." + ) else: - input_data_dict['rh'].post_data(opt_res_latest["P_deferrable{}".format(k)], idx_closest, - custom_deferrable_forecast_id[k]["entity_id"], - custom_deferrable_forecast_id[k]["unit_of_measurement"], - custom_deferrable_forecast_id[k]["friendly_name"], - type_var = 'deferrable', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_deferrable{}".format(k)] + input_data_dict["rh"].post_data( + opt_res_latest["P_deferrable{}".format(k)], + idx_closest, + custom_deferrable_forecast_id[k]["entity_id"], + custom_deferrable_forecast_id[k]["unit_of_measurement"], + custom_deferrable_forecast_id[k]["friendly_name"], + type_var="deferrable", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_deferrable{}".format(k)] # Publish battery power - if input_data_dict['opt'].optim_conf['set_use_battery']: - if 'P_batt' not in opt_res_latest.columns: - logger.error("P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.") + if input_data_dict["opt"].optim_conf["set_use_battery"]: + if "P_batt" not in opt_res_latest.columns: + logger.error( + "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution." + ) else: - custom_batt_forecast_id = params['passed_data']['custom_batt_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_batt'], idx_closest, - custom_batt_forecast_id["entity_id"], - custom_batt_forecast_id["unit_of_measurement"], - custom_batt_forecast_id["friendly_name"], - type_var = 'batt', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_batt"] - custom_batt_soc_forecast_id = params['passed_data']['custom_batt_soc_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['SOC_opt']*100, idx_closest, - custom_batt_soc_forecast_id["entity_id"], - custom_batt_soc_forecast_id["unit_of_measurement"], - custom_batt_soc_forecast_id["friendly_name"], - type_var = 'SOC', - publish_prefix = publish_prefix) - cols_published = cols_published+["SOC_opt"] + custom_batt_forecast_id = params["passed_data"]["custom_batt_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_batt"], + idx_closest, + custom_batt_forecast_id["entity_id"], + custom_batt_forecast_id["unit_of_measurement"], + custom_batt_forecast_id["friendly_name"], + type_var="batt", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_batt"] + custom_batt_soc_forecast_id = params["passed_data"][ + "custom_batt_soc_forecast_id" + ] + input_data_dict["rh"].post_data( + opt_res_latest["SOC_opt"] * 100, + idx_closest, + custom_batt_soc_forecast_id["entity_id"], + custom_batt_soc_forecast_id["unit_of_measurement"], + custom_batt_soc_forecast_id["friendly_name"], + type_var="SOC", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["SOC_opt"] # Publish grid power - custom_grid_forecast_id = params['passed_data']['custom_grid_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_grid'], idx_closest, - custom_grid_forecast_id["entity_id"], - custom_grid_forecast_id["unit_of_measurement"], - custom_grid_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_grid"] + custom_grid_forecast_id = params["passed_data"]["custom_grid_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_grid"], + idx_closest, + custom_grid_forecast_id["entity_id"], + custom_grid_forecast_id["unit_of_measurement"], + custom_grid_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_grid"] # Publish total value of cost function - custom_cost_fun_id = params['passed_data']['custom_cost_fun_id'] - col_cost_fun = [i for i in opt_res_latest.columns if 'cost_fun_' in i] - input_data_dict['rh'].post_data(opt_res_latest[col_cost_fun], idx_closest, - custom_cost_fun_id["entity_id"], - custom_cost_fun_id["unit_of_measurement"], - custom_cost_fun_id["friendly_name"], - type_var = 'cost_fun', - publish_prefix = publish_prefix) + custom_cost_fun_id = params["passed_data"]["custom_cost_fun_id"] + col_cost_fun = [i for i in opt_res_latest.columns if "cost_fun_" in i] + input_data_dict["rh"].post_data( + opt_res_latest[col_cost_fun], + idx_closest, + custom_cost_fun_id["entity_id"], + custom_cost_fun_id["unit_of_measurement"], + custom_cost_fun_id["friendly_name"], + type_var="cost_fun", + publish_prefix=publish_prefix, + ) # Publish the optimization status - custom_cost_fun_id = params['passed_data']['custom_optim_status_id'] + custom_cost_fun_id = params["passed_data"]["custom_optim_status_id"] if "optim_status" not in opt_res_latest: - opt_res_latest["optim_status"] = 'Optimal' - logger.warning("no optim_status in opt_res_latest, run an optimization task first") - input_data_dict['rh'].post_data(opt_res_latest['optim_status'], idx_closest, - custom_cost_fun_id["entity_id"], - custom_cost_fun_id["unit_of_measurement"], - custom_cost_fun_id["friendly_name"], - type_var = 'optim_status', - publish_prefix = publish_prefix) - cols_published = cols_published+["optim_status"] + opt_res_latest["optim_status"] = "Optimal" + logger.warning( + "no optim_status in opt_res_latest, run an optimization task first" + ) + input_data_dict["rh"].post_data( + opt_res_latest["optim_status"], + idx_closest, + custom_cost_fun_id["entity_id"], + custom_cost_fun_id["unit_of_measurement"], + custom_cost_fun_id["friendly_name"], + type_var="optim_status", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["optim_status"] # Publish unit_load_cost - custom_unit_load_cost_id = params['passed_data']['custom_unit_load_cost_id'] - input_data_dict['rh'].post_data(opt_res_latest['unit_load_cost'], idx_closest, - custom_unit_load_cost_id["entity_id"], - custom_unit_load_cost_id["unit_of_measurement"], - custom_unit_load_cost_id["friendly_name"], - type_var = 'unit_load_cost', - publish_prefix = publish_prefix) - cols_published = cols_published+["unit_load_cost"] + custom_unit_load_cost_id = params["passed_data"]["custom_unit_load_cost_id"] + input_data_dict["rh"].post_data( + opt_res_latest["unit_load_cost"], + idx_closest, + custom_unit_load_cost_id["entity_id"], + custom_unit_load_cost_id["unit_of_measurement"], + custom_unit_load_cost_id["friendly_name"], + type_var="unit_load_cost", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["unit_load_cost"] # Publish unit_prod_price - custom_unit_prod_price_id = params['passed_data']['custom_unit_prod_price_id'] - input_data_dict['rh'].post_data(opt_res_latest['unit_prod_price'], idx_closest, - custom_unit_prod_price_id["entity_id"], - custom_unit_prod_price_id["unit_of_measurement"], - custom_unit_prod_price_id["friendly_name"], - type_var = 'unit_prod_price', - publish_prefix = publish_prefix) - cols_published = cols_published+["unit_prod_price"] + custom_unit_prod_price_id = params["passed_data"]["custom_unit_prod_price_id"] + input_data_dict["rh"].post_data( + opt_res_latest["unit_prod_price"], + idx_closest, + custom_unit_prod_price_id["entity_id"], + custom_unit_prod_price_id["unit_of_measurement"], + custom_unit_prod_price_id["friendly_name"], + type_var="unit_prod_price", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["unit_prod_price"] # Create a DF resuming what has been published opt_res = opt_res_latest[cols_published].loc[[opt_res_latest.index[idx_closest]]] return opt_res - - + + def main(): r"""Define the main command line entry function. This function may take several arguments as inputs. You can type `emhass --help` to see the list of options: - + - action: Set the desired action, options are: perfect-optim, dayahead-optim, naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune - + - config: Define path to the config.yaml file - + - costfun: Define the type of cost function, options are: profit, cost, self-consumption - + - log2file: Define if we should log to a file or not - + - params: Configuration parameters passed from data/options.json if using the add-on - + - runtimeparams: Pass runtime optimization parameters as dictionnary - + - debug: Use True for testing purposes - + """ # Parsing arguments parser = argparse.ArgumentParser() - parser.add_argument('--action', type=str, help='Set the desired action, options are: perfect-optim, dayahead-optim,\ - naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune') - parser.add_argument('--config', type=str, help='Define path to the config.yaml file') - parser.add_argument('--costfun', type=str, default='profit', help='Define the type of cost function, options are: profit, cost, self-consumption') - parser.add_argument('--log2file', type=strtobool, default='False', help='Define if we should log to a file or not') - parser.add_argument('--params', type=str, default=None, help='Configuration parameters passed from data/options.json') - parser.add_argument('--runtimeparams', type=str, default=None, help='Pass runtime optimization parameters as dictionnary') - parser.add_argument('--debug', type=strtobool, default='False', help='Use True for testing purposes') + parser.add_argument( + "--action", + type=str, + help="Set the desired action, options are: perfect-optim, dayahead-optim,\ + naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune", + ) + parser.add_argument( + "--config", type=str, help="Define path to the config.yaml file" + ) + parser.add_argument( + "--costfun", + type=str, + default="profit", + help="Define the type of cost function, options are: profit, cost, self-consumption", + ) + parser.add_argument( + "--log2file", + type=strtobool, + default="False", + help="Define if we should log to a file or not", + ) + parser.add_argument( + "--params", + type=str, + default=None, + help="Configuration parameters passed from data/options.json", + ) + parser.add_argument( + "--runtimeparams", + type=str, + default=None, + help="Pass runtime optimization parameters as dictionnary", + ) + parser.add_argument( + "--debug", type=strtobool, default="False", help="Use True for testing purposes" + ) args = parser.parse_args() # The path to the configuration files config_path = pathlib.Path(args.config) @@ -724,39 +995,56 @@ def main(): logger, ch = utils.get_logger(__name__, base_path, save_to_file=bool(args.log2file)) # Additionnal argument try: - parser.add_argument('--version', action='version', version='%(prog)s '+version('emhass')) + parser.add_argument( + "--version", action="version", version="%(prog)s " + version("emhass") + ) args = parser.parse_args() except Exception: - logger.info("Version not found for emhass package. Or importlib exited with PackageNotFoundError.") + logger.info( + "Version not found for emhass package. Or importlib exited with PackageNotFoundError." + ) # Setup parameters - input_data_dict = set_input_data_dict(config_path, base_path, - args.costfun, args.params, args.runtimeparams, args.action, - logger, args.debug) + input_data_dict = set_input_data_dict( + config_path, + base_path, + args.costfun, + args.params, + args.runtimeparams, + args.action, + logger, + args.debug, + ) # Perform selected action - if args.action == 'perfect-optim': + if args.action == "perfect-optim": opt_res = perfect_forecast_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'dayahead-optim': + elif args.action == "dayahead-optim": opt_res = dayahead_forecast_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'naive-mpc-optim': + elif args.action == "naive-mpc-optim": opt_res = naive_mpc_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'forecast-model-fit': - df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) + elif args.action == "forecast-model-fit": + df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit( + input_data_dict, logger, debug=args.debug + ) opt_res = None - elif args.action == 'forecast-model-predict': + elif args.action == "forecast-model-predict": if args.debug: _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) else: mlf = None - df_pred = forecast_model_predict(input_data_dict, logger, debug=args.debug, mlf=mlf) + df_pred = forecast_model_predict( + input_data_dict, logger, debug=args.debug, mlf=mlf + ) opt_res = None - elif args.action == 'forecast-model-tune': + elif args.action == "forecast-model-tune": if args.debug: _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) else: mlf = None - df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=args.debug, mlf=mlf) + df_pred_optim, mlf = forecast_model_tune( + input_data_dict, logger, debug=args.debug, mlf=mlf + ) opt_res = None - elif args.action == 'publish-data': + elif args.action == "publish-data": opt_res = publish_data(input_data_dict, logger) else: logger.error("The passed action argument is not valid") @@ -765,15 +1053,20 @@ def main(): # Flush the logger ch.close() logger.removeHandler(ch) - if args.action == 'perfect-optim' or args.action == 'dayahead-optim' or \ - args.action == 'naive-mpc-optim' or args.action == 'publish-data': + if ( + args.action == "perfect-optim" + or args.action == "dayahead-optim" + or args.action == "naive-mpc-optim" + or args.action == "publish-data" + ): return opt_res - elif args.action == 'forecast-model-fit': + elif args.action == "forecast-model-fit": return df_fit_pred, df_fit_pred_backtest, mlf - elif args.action == 'forecast-model-predict': + elif args.action == "forecast-model-predict": return df_pred - elif args.action == 'forecast-model-tune': + elif args.action == "forecast-model-tune": return df_pred_optim, mlf -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index d70df3ec..80ddd74f 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -9,8 +9,12 @@ import pandas as pd import numpy as np -from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor -from sklearn.metrics import r2_score +from sklearn.ensemble import ( + AdaBoostRegressor, + GradientBoostingRegressor, + RandomForestRegressor, +) +from sklearn.metrics import r2_score from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import GridSearchCV, train_test_split @@ -20,21 +24,31 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) + class MLRegressor: r""" A forecaster class using machine learning models. - + This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. - + It exposes two main methods: - + - `fit`: to train a model with the passed data. - + - `predict`: to obtain a forecast from a pre-trained model. - + """ - def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str, - logger: logging.Logger) -> None: + + def __init__( + self, + data, + model_type: str, + sklearn_model: str, + features: list, + target: str, + timestamp: str, + logger: logging.Logger, + ) -> None: r"""Define constructor for the forecast class. :param data: The data that will be used for train/test @@ -42,33 +56,35 @@ def __init__(self, data, model_type: str, sklearn_model: str, independent_variab :param model_type: A unique name defining this model and useful to identify \ for what it will be used for. :type model_type: str - :param independent_variables: A list of independent variables. \ + :param features: A list of features. \ Example: [`solar`, `degree_days`]. - :type independent_variables: list - :param dependent_variable: The dependent variable(to be predicted). \ + :type features: list + :param target: The target(to be predicted). \ Example: `hours`. - :type dependent_variable: str + :type target: str :param timestamp: If defined, the column key that has to be used of timestamp. :type timestamp: str :param logger: The passed logger object :type logger: logging.Logger """ self.data = data - self.independent_variables = independent_variables - self.dependent_variable = dependent_variable + self.features = features + self.target = target self.timestamp = timestamp self.model_type = model_type self.sklearn_model = sklearn_model self.logger = logger self.data.sort_index(inplace=True) - self.data = self.data[~self.data.index.duplicated(keep='first')] + self.data = self.data[~self.data.index.duplicated(keep="first")] self.data_exo = None self.steps = None self.model = None - self.grid_search =None - + self.grid_search = None + @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: + def add_date_features( + data: pd.DataFrame, date_features: list, timestamp: str + ) -> pd.DataFrame: """Add date features from the input DataFrame timestamp :param data: The input DataFrame @@ -79,179 +95,162 @@ def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) - :rtype: pd.DataFrame """ df = copy.deepcopy(data) - df[timestamp]= pd.to_datetime(df['timestamp']) - if 'year' in date_features: - df['year'] = [i.year for i in df['timestamp']] - if 'month' in date_features: - df['month'] = [i.month for i in df['timestamp']] - if 'day_of_week' in date_features: - df['day_of_week'] = [i.dayofweek for i in df['timestamp']] - if 'day_of_year' in date_features: - df['day_of_year'] = [i.dayofyear for i in df['timestamp']] - if 'day' in date_features: - df['day'] = [i.day for i in df['timestamp']] - if 'hour' in date_features: - df['hour'] = [i.day for i in df['timestamp']] + df[timestamp] = pd.to_datetime(df["timestamp"]) + if "year" in date_features: + df["year"] = [i.year for i in df["timestamp"]] + if "month" in date_features: + df["month"] = [i.month for i in df["timestamp"]] + if "day_of_week" in date_features: + df["day_of_week"] = [i.dayofweek for i in df["timestamp"]] + if "day_of_year" in date_features: + df["day_of_year"] = [i.dayofyear for i in df["timestamp"]] + if "day" in date_features: + df["day"] = [i.day for i in df["timestamp"]] + if "hour" in date_features: + df["hour"] = [i.day for i in df["timestamp"]] return df def fit(self, date_features: Optional[list] = []) -> None: """ Fit the model using the provided data. - + :param date_features: A list of 'date_features' to take into account when fitting the model. :type data: list """ - self.logger.info("Performing a csv model fit for "+self.model_type) + self.logger.info("Performing a csv model fit for " + self.model_type) self.data_exo = pd.DataFrame(self.data) - self.data_exo[self.independent_variables] = self.data[self.independent_variables] - self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] + self.data_exo[self.features] = self.data[self.features] + self.data_exo[self.target] = self.data[self.target] keep_columns = [] - keep_columns.extend(self.independent_variables) + keep_columns.extend(self.features) if self.timestamp is not None: keep_columns.append(self.timestamp) - keep_columns.append(self.dependent_variable) + keep_columns.append(self.target) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] self.data_exo.reset_index(drop=True, inplace=True) if len(date_features) > 0: if self.timestamp is not None: - self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp) + self.data_exo = MLRegressor.add_date_features( + self.data_exo, date_features, self.timestamp + ) else: - self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") + self.logger.error( + "If no timestamp provided, you can't use date_features, going further without date_features." + ) - y = self.data_exo[self.dependent_variable] - self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1) + y = self.data_exo[self.target] + self.data_exo = self.data_exo.drop(self.target, axis=1) if self.timestamp is not None: - self.data_exo = self.data_exo.drop(self.timestamp,axis=1) + self.data_exo = self.data_exo.drop(self.timestamp, axis=1) X = self.data_exo - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 + ) self.steps = len(X_test) regression_methods = { - 'LinearRegression': {"model": LinearRegression(), "param_grid": { - 'linearregression__fit_intercept': [True, False], - 'linearregression__positive': [True, False], - }}, - 'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}}, - 'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}}, - 'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}}, - 'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": { - 'gradientboostingregressor__n_estimators': [50, 100, 200], - 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - }}, - 'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": { - 'adaboostregressor__n_estimators': [50, 100, 200], - 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - }} + "LinearRegression": { + "model": LinearRegression(), + "param_grid": { + "linearregression__fit_intercept": [True, False], + "linearregression__positive": [True, False], + }, + }, + "RidgeRegression": { + "model": Ridge(), + "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, + }, + "LassoRegression": { + "model": Lasso(), + "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, + }, + "RandomForestRegression": { + "model": RandomForestRegressor(), + "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, + }, + "GradientBoostingRegression": { + "model": GradientBoostingRegressor(), + "param_grid": { + "gradientboostingregressor__n_estimators": [50, 100, 200], + "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + "AdaBoostRegression": { + "model": AdaBoostRegressor(), + "param_grid": { + "adaboostregressor__n_estimators": [50, 100, 200], + "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, } - # regression_methods = [ - # ('LinearRegression', LinearRegression(), { - # 'linearregression__fit_intercept': [True, False], - # 'linearregression__positive': [True, False], - # }), - # ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), - # ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), - # ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), - # ('GradientBoostingRegression', GradientBoostingRegressor(), { - # 'gradientboostingregressor__n_estimators': [50, 100, 200], - # 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - # }), - # ('AdaBoostRegression', AdaBoostRegressor(), { - # 'adaboostregressor__n_estimators': [50, 100, 200], - # 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - # }) - # ] - - if self.sklearn_model == 'LinearRegression': - base_model = regression_methods['LinearRegression']['model'] - param_grid = regression_methods['LinearRegression']['param_grid'] - elif self.sklearn_model == 'RidgeRegression': - base_model = regression_methods['RidgeRegression']['model'] - param_grid = regression_methods['RidgeRegression']['param_grid'] - elif self.sklearn_model == 'LassoRegression': - base_model = regression_methods['LassoRegression']['model'] - param_grid = regression_methods['LassoRegression']['param_grid'] - elif self.sklearn_model == 'RandomForestRegression': - base_model = regression_methods['RandomForestRegression']['model'] - param_grid = regression_methods['RandomForestRegression']['param_grid'] - elif self.sklearn_model == 'GradientBoostingRegression': - base_model = regression_methods['GradientBoostingRegression']['model'] - param_grid = regression_methods['GradientBoostingRegression']['param_grid'] - elif self.sklearn_model == 'AdaBoostRegression': - base_model = regression_methods['AdaBoostRegression']['model'] - param_grid = regression_methods['AdaBoostRegression']['param_grid'] + + if self.sklearn_model == "LinearRegression": + base_model = regression_methods["LinearRegression"]["model"] + param_grid = regression_methods["LinearRegression"]["param_grid"] + elif self.sklearn_model == "RidgeRegression": + base_model = regression_methods["RidgeRegression"]["model"] + param_grid = regression_methods["RidgeRegression"]["param_grid"] + elif self.sklearn_model == "LassoRegression": + base_model = regression_methods["LassoRegression"]["model"] + param_grid = regression_methods["LassoRegression"]["param_grid"] + elif self.sklearn_model == "RandomForestRegression": + base_model = regression_methods["RandomForestRegression"]["model"] + param_grid = regression_methods["RandomForestRegression"]["param_grid"] + elif self.sklearn_model == "GradientBoostingRegression": + base_model = regression_methods["GradientBoostingRegression"]["model"] + param_grid = regression_methods["GradientBoostingRegression"]["param_grid"] + elif self.sklearn_model == "AdaBoostRegression": + base_model = regression_methods["AdaBoostRegression"]["model"] + param_grid = regression_methods["AdaBoostRegression"]["param_grid"] else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - - - # Define the models - # for name, model, param_grid in regression_methods: - # self.model = make_pipeline( - # StandardScaler(), - # model - # ) - # # self.model = Pipeline([ - # # ('scaler', StandardScaler()), - # # (name, model) - # # ]) - - # # Use GridSearchCV to find the best hyperparameters for each model - # grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # grid_search.fit(X_train, y_train) - - # # Get the best model and print its mean squared error on the test set - # best_model = grid_search.best_estimator_ - # print(best_model) - # predictions = best_model.predict(X_test) - # print(predictions) - - self.model = make_pipeline( - StandardScaler(), - base_model - ) - # self.model = Pipeline([ - # ('scaler', StandardScaler()), - # ('regressor', base_model) - # ]) - # Define the parameters to tune - # param_grid = { - # 'regressor__fit_intercept': [True, False], - # 'regressor__positive': [True, False], - # } + self.logger.error( + "Passed sklearn model " + self.sklearn_model + " is not valid" + ) + + self.model = make_pipeline(StandardScaler(), base_model) # Create a grid search object - self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1) - + self.grid_search = GridSearchCV( + self.model, + param_grid, + cv=5, + scoring="neg_mean_squared_error", + refit=True, + verbose=0, + n_jobs=-1, + ) + # Fit the grid search object to the data - self.logger.info("Training a "+self.sklearn_model+" model") + self.logger.info("Training a " + self.sklearn_model + " model") start_time = time.time() self.grid_search.fit(X_train.values, y_train.values) - print("Best value for lambda : ",self.grid_search.best_params_) + print("Best value for lambda : ", self.grid_search.best_params_) print("Best score for cost function: ", self.grid_search.best_score_) self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") self.model = self.grid_search.best_estimator_ - # Make predictions predictions = self.model.predict(X_test.values) predictions = pd.Series(predictions, index=X_test.index) - pred_metric = r2_score(y_test,predictions) - self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") - + pred_metric = r2_score(y_test, predictions) + self.logger.info( + f"Prediction R2 score of fitted model on test data: {pred_metric}" + ) - def predict(self, new_values:list) -> np.ndarray: + def predict(self, new_values: list) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. - :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ + :param new_values: The new values for the features(in the same order as the features list). \ Example: [2.24, 5.68]. :type new_values: list :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - self.logger.info("Performing a prediction for "+self.model_type) + self.logger.info("Performing a prediction for " + self.model_type) new_values = np.array([new_values]) return self.model.predict(new_values) diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py index ca20ce40..9f47efef 100644 --- a/src/emhass/retrieve_hass.py +++ b/src/emhass/retrieve_hass.py @@ -30,12 +30,20 @@ class RetrieveHass: """ - def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, - time_zone: datetime.timezone, params: str, base_path: str, logger: logging.Logger, - get_data_from_file: Optional[bool] = False) -> None: + def __init__( + self, + hass_url: str, + long_lived_token: str, + freq: pd.Timedelta, + time_zone: datetime.timezone, + params: str, + base_path: str, + logger: logging.Logger, + get_data_from_file: Optional[bool] = False, + ) -> None: """ Define constructor for RetrieveHass class. - + :param hass_url: The URL of the Home Assistant instance :type hass_url: str :param long_lived_token: The long lived token retrieved from the configuration pane @@ -50,7 +58,7 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, :type base_path: str :param logger: The passed logger object :type logger: logging object - :param get_data_from_file: Select if data should be retrieved from a + :param get_data_from_file: Select if data should be retrieved from a previously saved pickle useful for testing or directly from connection to hass database :type get_data_from_file: bool, optional @@ -65,9 +73,14 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, self.logger = logger self.get_data_from_file = get_data_from_file - def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: Optional[bool] = False, - significant_changes_only: Optional[bool] = False, - test_url: Optional[str] = 'empty') -> None: + def get_data( + self, + days_list: pd.date_range, + var_list: list, + minimal_response: Optional[bool] = False, + significant_changes_only: Optional[bool] = False, + test_url: Optional[str] = "empty", + ) -> None: r""" Retrieve the actual data from hass. @@ -92,20 +105,36 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O """ self.logger.info("Retrieve hass get data method initiated...") self.df_final = pd.DataFrame() - x = 0 #iterate based on days + x = 0 # iterate based on days # Looping on each day from days list for day in days_list: - + for i, var in enumerate(var_list): - - if test_url == 'empty': - if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API - url = self.hass_url+"/history/period/"+day.isoformat()+"?filter_entity_id="+var - else: # Otherwise the Home Assistant Core API it is - url = self.hass_url+"api/history/period/"+day.isoformat()+"?filter_entity_id="+var - if minimal_response: # A support for minimal response + + if test_url == "empty": + if ( + self.hass_url == "http://supervisor/core/api" + ): # If we are using the supervisor API + url = ( + self.hass_url + + "/history/period/" + + day.isoformat() + + "?filter_entity_id=" + + var + ) + else: # Otherwise the Home Assistant Core API it is + url = ( + self.hass_url + + "api/history/period/" + + day.isoformat() + + "?filter_entity_id=" + + var + ) + if minimal_response: # A support for minimal response url = url + "?minimal_response" - if significant_changes_only: # And for signicant changes only (check the HASS restful API for more info) + if ( + significant_changes_only + ): # And for signicant changes only (check the HASS restful API for more info) url = url + "?significant_changes_only" else: url = test_url @@ -116,59 +145,96 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O try: response = get(url, headers=headers) except Exception: - self.logger.error("Unable to access Home Assistance instance, check URL") - self.logger.error("If using addon, try setting url and token to 'empty'") + self.logger.error( + "Unable to access Home Assistance instance, check URL" + ) + self.logger.error( + "If using addon, try setting url and token to 'empty'" + ) return False else: if response.status_code == 401: - self.logger.error("Unable to access Home Assistance instance, TOKEN/KEY") - self.logger.error("If using addon, try setting url and token to 'empty'") + self.logger.error( + "Unable to access Home Assistance instance, TOKEN/KEY" + ) + self.logger.error( + "If using addon, try setting url and token to 'empty'" + ) return False if response.status_code > 299: return f"Request Get Error: {response.status_code}" - '''import bz2 # Uncomment to save a serialized data for tests + """import bz2 # Uncomment to save a serialized data for tests import _pickle as cPickle with bz2.BZ2File("data/test_response_get_data_get_method.pbz2", "w") as f: - cPickle.dump(response, f)''' - try: # Sometimes when there are connection problems we need to catch empty retrieved json + cPickle.dump(response, f)""" + try: # Sometimes when there are connection problems we need to catch empty retrieved json data = response.json()[0] except IndexError: - if x == 0: - self.logger.error("The retrieved JSON is empty, A sensor:" + var + " may have 0 days of history or passed sensor may not be correct") + if x is 0: + self.logger.error( + "The retrieved JSON is empty, A sensor:" + + var + + " may have 0 days of history or passed sensor may not be correct" + ) else: self.logger.error("The retrieved JSON is empty for day:"+ str(day) +", days_to_retrieve may be larger than the recorded history of sensor:" + var + " (check your recorder settings)") return False df_raw = pd.DataFrame.from_dict(data) if len(df_raw) == 0: - if x == 0: - self.logger.error("The retrieved Dataframe is empty, A sensor:" + var + " may have 0 days of history or passed sensor may not be correct") + if x is 0: + self.logger.error( + "The retrieved Dataframe is empty, A sensor:" + + var + + " may have 0 days of history or passed sensor may not be correct" + ) else: self.logger.error("Retrieved empty Dataframe for day:"+ str(day) +", days_to_retrieve may be larger than the recorded history of sensor:" + var + " (check your recorder settings)") return False - if i == 0: # Defining the DataFrame container - from_date = pd.to_datetime(df_raw['last_changed'], format="ISO8601").min() - to_date = pd.to_datetime(df_raw['last_changed'], format="ISO8601").max() - ts = pd.to_datetime(pd.date_range(start=from_date, end=to_date, freq=self.freq), - format='%Y-%d-%m %H:%M').round(self.freq, ambiguous='infer', nonexistent='shift_forward') - df_day = pd.DataFrame(index = ts) + if i == 0: # Defining the DataFrame container + from_date = pd.to_datetime( + df_raw["last_changed"], format="ISO8601" + ).min() + to_date = pd.to_datetime( + df_raw["last_changed"], format="ISO8601" + ).max() + ts = pd.to_datetime( + pd.date_range(start=from_date, end=to_date, freq=self.freq), + format="%Y-%d-%m %H:%M", + ).round(self.freq, ambiguous="infer", nonexistent=self.freq) + df_day = pd.DataFrame(index=ts) # Caution with undefined string data: unknown, unavailable, etc. - df_tp = df_raw.copy()[['state']].replace( - ['unknown', 'unavailable', ''], np.nan).astype(float).rename(columns={'state': var}) + df_tp = ( + df_raw.copy()[["state"]] + .replace(["unknown", "unavailable", ""], np.nan) + .astype(float) + .rename(columns={"state": var}) + ) # Setting index, resampling and concatenation - df_tp.set_index(pd.to_datetime(df_raw['last_changed'], format="ISO8601"), inplace=True) + df_tp.set_index( + pd.to_datetime(df_raw["last_changed"], format="ISO8601"), + inplace=True, + ) df_tp = df_tp.resample(self.freq).mean() df_day = pd.concat([df_day, df_tp], axis=1) - + x += 1 self.df_final = pd.concat([self.df_final, df_day], axis=0) self.df_final = set_df_index_freq(self.df_final) if self.df_final.index.freq != self.freq: - self.logger.error("The inferred freq from data is not equal to the defined freq in passed parameters") + self.logger.error( + "The inferred freq from data is not equal to the defined freq in passed parameters" + ) return False return True - - def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set_zero_min: Optional[bool] = True, - var_replace_zero: Optional[list] = None, var_interp: Optional[list] = None) -> None: + + def prepare_data( + self, + var_load: str, + load_negative: Optional[bool] = False, + set_zero_min: Optional[bool] = True, + var_replace_zero: Optional[list] = None, + var_interp: Optional[list] = None, + ) -> None: r""" Apply some data treatment in preparation for the optimization task. @@ -192,18 +258,24 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set """ try: - if load_negative: # Apply the correct sign to load power - self.df_final[var_load+'_positive'] = -self.df_final[var_load] + if load_negative: # Apply the correct sign to load power + self.df_final[var_load + "_positive"] = -self.df_final[var_load] else: - self.df_final[var_load+'_positive'] = self.df_final[var_load] + self.df_final[var_load + "_positive"] = self.df_final[var_load] self.df_final.drop([var_load], inplace=True, axis=1) except KeyError: - self.logger.error("Variable "+var_load+" was not found. This is typically because no data could be retrieved from Home Assistant") + self.logger.error( + "Variable " + + var_load + + " was not found. This is typically because no data could be retrieved from Home Assistant" + ) return False except ValueError: - self.logger.error("sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same") - return False - if set_zero_min: # Apply minimum values + self.logger.error( + "sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same" + ) + return False + if set_zero_min: # Apply minimum values self.df_final.clip(lower=0.0, inplace=True, axis=1) self.df_final.replace(to_replace=0.0, value=np.nan, inplace=True) new_var_replace_zero = [] @@ -211,59 +283,74 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set # Just changing the names of variables to contain the fact that they are considered positive if var_replace_zero is not None: for string in var_replace_zero: - new_string = string.replace(var_load, var_load+'_positive') + new_string = string.replace(var_load, var_load + "_positive") new_var_replace_zero.append(new_string) else: new_var_replace_zero = None if var_interp is not None: for string in var_interp: - new_string = string.replace(var_load, var_load+'_positive') + new_string = string.replace(var_load, var_load + "_positive") new_var_interp.append(new_string) else: new_var_interp = None # Treating NaN replacement: either by zeros or by linear interpolation if new_var_replace_zero is not None: - self.df_final[new_var_replace_zero] = self.df_final[new_var_replace_zero].fillna(0.0) + self.df_final[new_var_replace_zero] = self.df_final[ + new_var_replace_zero + ].fillna(0.0) if new_var_interp is not None: self.df_final[new_var_interp] = self.df_final[new_var_interp].interpolate( - method='linear', axis=0, limit=None) + method="linear", axis=0, limit=None + ) self.df_final[new_var_interp] = self.df_final[new_var_interp].fillna(0.0) # Setting the correct time zone on DF index if self.time_zone is not None: self.df_final.index = self.df_final.index.tz_convert(self.time_zone) # Drop datetimeindex duplicates on final DF - self.df_final = self.df_final[~self.df_final.index.duplicated(keep='first')] + self.df_final = self.df_final[~self.df_final.index.duplicated(keep="first")] return True - + @staticmethod - def get_attr_data_dict(data_df: pd.DataFrame, idx: int, entity_id: str, - unit_of_measurement: str, friendly_name: str, - list_name: str, state: float) -> dict: - list_df = copy.deepcopy(data_df).loc[data_df.index[idx]:].reset_index() - list_df.columns = ['timestamps', entity_id] - ts_list = [str(i) for i in list_df['timestamps'].tolist()] - vals_list = [str(np.round(i,2)) for i in list_df[entity_id].tolist()] + def get_attr_data_dict( + data_df: pd.DataFrame, + idx: int, + entity_id: str, + unit_of_measurement: str, + friendly_name: str, + list_name: str, + state: float, + ) -> dict: + list_df = copy.deepcopy(data_df).loc[data_df.index[idx] :].reset_index() + list_df.columns = ["timestamps", entity_id] + ts_list = [str(i) for i in list_df["timestamps"].tolist()] + vals_list = [str(np.round(i, 2)) for i in list_df[entity_id].tolist()] forecast_list = [] for i, ts in enumerate(ts_list): datum = {} datum["date"] = ts - datum[entity_id.split('sensor.')[1]] = vals_list[i] + datum[entity_id.split("sensor.")[1]] = vals_list[i] forecast_list.append(datum) data = { "state": "{:.2f}".format(state), "attributes": { "unit_of_measurement": unit_of_measurement, "friendly_name": friendly_name, - list_name: forecast_list - } + list_name: forecast_list, + }, } return data - - def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, - unit_of_measurement: str, friendly_name: str, - type_var: str, - from_mlforecaster: Optional[bool]=False, - publish_prefix: Optional[str]="") -> None: + + def post_data( + self, + data_df: pd.DataFrame, + idx: int, + entity_id: str, + unit_of_measurement: str, + friendly_name: str, + type_var: str, + from_mlforecaster: Optional[bool] = False, + publish_prefix: Optional[str] = "", + ) -> None: r""" Post passed data to hass. @@ -286,82 +373,139 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, """ # Add a possible prefix to the entity ID - entity_id = entity_id.replace('sensor.', 'sensor.'+publish_prefix) + entity_id = entity_id.replace("sensor.", "sensor." + publish_prefix) # Set the URL - if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API - url = self.hass_url+"/states/"+entity_id - else: # Otherwise the Home Assistant Core API it is - url = self.hass_url+"api/states/"+entity_id + if ( + self.hass_url == "http://supervisor/core/api" + ): # If we are using the supervisor API + url = self.hass_url + "/states/" + entity_id + else: # Otherwise the Home Assistant Core API it is + url = self.hass_url + "api/states/" + entity_id headers = { "Authorization": "Bearer " + self.long_lived_token, "content-type": "application/json", } # Preparing the data dict to be published - if type_var == 'cost_fun': - state = np.round(data_df.sum()[0],2) - elif type_var == 'unit_load_cost' or type_var == 'unit_prod_price': - state = np.round(data_df.loc[data_df.index[idx]],4) - elif type_var == 'optim_status': + if type_var == "cost_fun": + state = np.round(data_df.sum()[0], 2) + elif type_var == "unit_load_cost" or type_var == "unit_prod_price": + state = np.round(data_df.loc[data_df.index[idx]], 4) + elif type_var == "optim_status": state = data_df.loc[data_df.index[idx]] - elif type_var == 'csv_predictor': + elif type_var == "mlregressor": state = data_df[idx] else: - state = np.round(data_df.loc[data_df.index[idx]],2) - if type_var == 'power': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "forecasts", state) - elif type_var == 'deferrable': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "deferrables_schedule", state) - elif type_var == 'batt': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "battery_scheduled_power", state) - elif type_var == 'SOC': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "battery_scheduled_soc", state) - elif type_var == 'unit_load_cost': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "unit_load_cost_forecasts", state) - elif type_var == 'unit_prod_price': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "unit_prod_price_forecasts", state) - elif type_var == 'mlforecaster': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "scheduled_forecast", state) - elif type_var == 'optim_status': + state = np.round(data_df.loc[data_df.index[idx]], 2) + if type_var == "power": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "forecasts", + state, + ) + elif type_var == "deferrable": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "deferrables_schedule", + state, + ) + elif type_var == "batt": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "battery_scheduled_power", + state, + ) + elif type_var == "SOC": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "battery_scheduled_soc", + state, + ) + elif type_var == "unit_load_cost": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "unit_load_cost_forecasts", + state, + ) + elif type_var == "unit_prod_price": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "unit_prod_price_forecasts", + state, + ) + elif type_var == "mlforecaster": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "scheduled_forecast", + state, + ) + elif type_var == "optim_status": data = { "state": state, "attributes": { "unit_of_measurement": unit_of_measurement, - "friendly_name": friendly_name - } + "friendly_name": friendly_name, + }, } - elif type_var == 'csv_predictor': + elif type_var == "mlregressor": data = { "state": state, "attributes": { "unit_of_measurement": unit_of_measurement, - "friendly_name": friendly_name - } + "friendly_name": friendly_name, + }, } else: data = { "state": "{:.2f}".format(state), "attributes": { "unit_of_measurement": unit_of_measurement, - "friendly_name": friendly_name - } + "friendly_name": friendly_name, + }, } # Actually post the data if self.get_data_from_file: - class response: pass + + class response: + pass + response.status_code = 200 response.ok = True else: response = post(url, headers=headers, data=json.dumps(data)) # Treating the response status and posting them on the logger if response.ok: - self.logger.info("Successfully posted to "+entity_id+" = "+str(state)) + self.logger.info("Successfully posted to " + entity_id + " = " + str(state)) else: - self.logger.info("The status code for received curl command response is: "+str(response.status_code)) + self.logger.info( + "The status code for received curl command response is: " + + str(response.status_code) + ) return response, data diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 4bbac11c..3a2cadd3 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -2,10 +2,19 @@ # -*- coding: utf-8 -*- from typing import Tuple, Optional -import numpy as np, pandas as pd -import yaml, pytz, logging, pathlib, json, copy from datetime import datetime, timedelta, timezone +import logging +import pathlib +import json +import copy +import numpy as np +import pandas as pd +import yaml +import pytz + + import plotly.express as px + pd.options.plotting.backend = "plotly" from emhass.machine_learning_forecaster import MLForecaster @@ -14,13 +23,13 @@ def get_root(file: str, num_parent: Optional[int] = 3) -> str: """ Get the root absolute path of the working directory. - + :param file: The passed file path with __file__ :return: The root path :param num_parent: The number of parents levels up to desired root folder :type num_parent: int, optional :rtype: str - + """ if num_parent == 3: root = pathlib.Path(file).resolve().parent.parent.parent @@ -32,11 +41,16 @@ def get_root(file: str, num_parent: Optional[int] = 3) -> str: raise ValueError("num_parent value not valid, must be between 1 and 3") return root -def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = True, - logging_level: Optional[str] = "DEBUG") -> Tuple[logging.Logger, logging.StreamHandler]: + +def get_logger( + fun_name: str, + config_path: str, + save_to_file: Optional[bool] = True, + logging_level: Optional[str] = "DEBUG", +) -> Tuple[logging.Logger, logging.StreamHandler]: """ Create a simple logger object. - + :param fun_name: The Python function object name where the logger will be used :type fun_name: str :param config_path: The path to the yaml configuration file @@ -45,14 +59,14 @@ def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = T :type save_to_file: bool, optional :return: The logger object and the handler :rtype: object - + """ - # create logger object + # create logger object logger = logging.getLogger(fun_name) logger.propagate = True logger.fileSetting = save_to_file if save_to_file: - ch = logging.FileHandler(config_path + '/data/logger_emhass.log') + ch = logging.FileHandler(config_path + "/data/logger_emhass.log") else: ch = logging.StreamHandler() if logging_level == "DEBUG": @@ -70,14 +84,18 @@ def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = T else: logger.setLevel(logging.DEBUG) ch.setLevel(logging.DEBUG) - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) ch.setFormatter(formatter) logger.addHandler(ch) return logger, ch -def get_forecast_dates(freq: int, delta_forecast: int, - timedelta_days: Optional[int] = 0) -> pd.core.indexes.datetimes.DatetimeIndex: + +def get_forecast_dates( + freq: int, delta_forecast: int, timedelta_days: Optional[int] = 0 +) -> pd.core.indexes.datetimes.DatetimeIndex: """ Get the date_range list of the needed future dates using the delta_forecast parameter. @@ -89,7 +107,7 @@ def get_forecast_dates(freq: int, delta_forecast: int, :type timedelta_days: Optional[int], optional :return: A list of future forecast dates. :rtype: pd.core.indexes.datetimes.DatetimeIndex - + """ freq = pd.to_timedelta(freq, "minutes") start_forecast = pd.Timestamp(datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0) @@ -99,11 +117,19 @@ def get_forecast_dates(freq: int, delta_forecast: int, freq=freq).round(freq, ambiguous='infer', nonexistent='shift_forward') return forecast_dates -def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict, - set_type: str, logger: logging.Logger) -> Tuple[str, dict]: + +def treat_runtimeparams( + runtimeparams: str, + params: str, + retrieve_hass_conf: dict, + optim_conf: dict, + plant_conf: dict, + set_type: str, + logger: logging.Logger, +) -> Tuple[str, dict]: """ - Treat the passed optimization runtime parameters. - + Treat the passed optimization runtime parameters. + :param runtimeparams: Json string containing the runtime parameters dict. :type runtimeparams: str :param params: Configuration parameters passed from data/options.json @@ -120,115 +146,155 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic :type logger: logging.Logger :return: Returning the params and optimization parameter container. :rtype: Tuple[str, dict] - + """ - if (params != None) and (params != 'null'): + if (params != None) and (params != "null"): params = json.loads(params) else: params = {} # Some default data needed custom_deferrable_forecast_id = [] - for k in range(optim_conf['num_def_loads']): - custom_deferrable_forecast_id.append({ - "entity_id": "sensor.p_deferrable{}".format(k), - "unit_of_measurement": "W", - "friendly_name": "Deferrable Load {}".format(k) - }) - default_passed_dict = {'custom_pv_forecast_id': {"entity_id": "sensor.p_pv_forecast", "unit_of_measurement": "W", "friendly_name": "PV Power Forecast"}, - 'custom_load_forecast_id': {"entity_id": "sensor.p_load_forecast", "unit_of_measurement": "W", "friendly_name": "Load Power Forecast"}, - 'custom_batt_forecast_id': {"entity_id": "sensor.p_batt_forecast", "unit_of_measurement": "W", "friendly_name": "Battery Power Forecast"}, - 'custom_batt_soc_forecast_id': {"entity_id": "sensor.soc_batt_forecast", "unit_of_measurement": "%", "friendly_name": "Battery SOC Forecast"}, - 'custom_grid_forecast_id': {"entity_id": "sensor.p_grid_forecast", "unit_of_measurement": "W", "friendly_name": "Grid Power Forecast"}, - 'custom_cost_fun_id': {"entity_id": "sensor.total_cost_fun_value", "unit_of_measurement": "", "friendly_name": "Total cost function value"}, - 'custom_optim_status_id': {"entity_id": "sensor.optim_status", "unit_of_measurement": "", "friendly_name": "EMHASS optimization status"}, - 'custom_unit_load_cost_id': {"entity_id": "sensor.unit_load_cost", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Load Cost"}, - 'custom_unit_prod_price_id': {"entity_id": "sensor.unit_prod_price", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Prod Price"}, - 'custom_deferrable_forecast_id': custom_deferrable_forecast_id, - 'publish_prefix': ""} - if 'passed_data' in params.keys(): + for k in range(optim_conf["num_def_loads"]): + custom_deferrable_forecast_id.append( + { + "entity_id": "sensor.p_deferrable{}".format(k), + "unit_of_measurement": "W", + "friendly_name": "Deferrable Load {}".format(k), + } + ) + default_passed_dict = { + "custom_pv_forecast_id": { + "entity_id": "sensor.p_pv_forecast", + "unit_of_measurement": "W", + "friendly_name": "PV Power Forecast", + }, + "custom_load_forecast_id": { + "entity_id": "sensor.p_load_forecast", + "unit_of_measurement": "W", + "friendly_name": "Load Power Forecast", + }, + "custom_batt_forecast_id": { + "entity_id": "sensor.p_batt_forecast", + "unit_of_measurement": "W", + "friendly_name": "Battery Power Forecast", + }, + "custom_batt_soc_forecast_id": { + "entity_id": "sensor.soc_batt_forecast", + "unit_of_measurement": "%", + "friendly_name": "Battery SOC Forecast", + }, + "custom_grid_forecast_id": { + "entity_id": "sensor.p_grid_forecast", + "unit_of_measurement": "W", + "friendly_name": "Grid Power Forecast", + }, + "custom_cost_fun_id": { + "entity_id": "sensor.total_cost_fun_value", + "unit_of_measurement": "", + "friendly_name": "Total cost function value", + }, + "custom_optim_status_id": { + "entity_id": "sensor.optim_status", + "unit_of_measurement": "", + "friendly_name": "EMHASS optimization status", + }, + "custom_unit_load_cost_id": { + "entity_id": "sensor.unit_load_cost", + "unit_of_measurement": "€/kWh", + "friendly_name": "Unit Load Cost", + }, + "custom_unit_prod_price_id": { + "entity_id": "sensor.unit_prod_price", + "unit_of_measurement": "€/kWh", + "friendly_name": "Unit Prod Price", + }, + "custom_deferrable_forecast_id": custom_deferrable_forecast_id, + "publish_prefix": "", + } + if "passed_data" in params.keys(): for key, value in default_passed_dict.items(): - params['passed_data'][key] = value + params["passed_data"][key] = value else: - params['passed_data'] = default_passed_dict + params["passed_data"] = default_passed_dict if runtimeparams is not None: runtimeparams = json.loads(runtimeparams) - freq = int(retrieve_hass_conf['freq'].seconds/60.0) - delta_forecast = int(optim_conf['delta_forecast'].days) + freq = int(retrieve_hass_conf["freq"].seconds / 60.0) + delta_forecast = int(optim_conf["delta_forecast"].days) forecast_dates = get_forecast_dates(freq, delta_forecast) if set_type == "regressor-model-fit": - csv_file = runtimeparams['csv_file'] - independent_variables = runtimeparams['independent_variables'] - dependent_variable = runtimeparams['dependent_variable'] - params['passed_data']['csv_file'] = csv_file - params['passed_data']['independent_variables'] = independent_variables - params['passed_data']['dependent_variable'] = dependent_variable - if 'timestamp' not in runtimeparams.keys(): - params['passed_data']['timestamp'] = None + csv_file = runtimeparams["csv_file"] + features = runtimeparams["features"] + target = runtimeparams["target"] + params["passed_data"]["csv_file"] = csv_file + params["passed_data"]["features"] = features + params["passed_data"]["target"] = target + if "timestamp" not in runtimeparams.keys(): + params["passed_data"]["timestamp"] = None else: - timestamp = runtimeparams['timestamp'] - params['passed_data']['timestamp'] = timestamp - if 'date_features' not in runtimeparams.keys(): - params['passed_data']['date_features'] = [] + timestamp = runtimeparams["timestamp"] + params["passed_data"]["timestamp"] = timestamp + if "date_features" not in runtimeparams.keys(): + params["passed_data"]["date_features"] = [] else: - date_features = runtimeparams['date_features'] - params['passed_data']['date_features'] = date_features - + date_features = runtimeparams["date_features"] + params["passed_data"]["date_features"] = date_features + if set_type == "regressor-model-predict": - new_values = runtimeparams['new_values'] - params['passed_data']['new_values'] = new_values + new_values = runtimeparams["new_values"] + params["passed_data"]["new_values"] = new_values # Treating special data passed for MPC control case - if set_type == 'naive-mpc-optim': - if 'prediction_horizon' not in runtimeparams.keys(): - prediction_horizon = 10 # 10 time steps by default + if set_type == "naive-mpc-optim": + if "prediction_horizon" not in runtimeparams.keys(): + prediction_horizon = 10 # 10 time steps by default else: - prediction_horizon = runtimeparams['prediction_horizon'] - params['passed_data']['prediction_horizon'] = prediction_horizon - if 'soc_init' not in runtimeparams.keys(): - soc_init = plant_conf['SOCtarget'] + prediction_horizon = runtimeparams["prediction_horizon"] + params["passed_data"]["prediction_horizon"] = prediction_horizon + if "soc_init" not in runtimeparams.keys(): + soc_init = plant_conf["SOCtarget"] else: - soc_init = runtimeparams['soc_init'] - params['passed_data']['soc_init'] = soc_init - if 'soc_final' not in runtimeparams.keys(): - soc_final = plant_conf['SOCtarget'] + soc_init = runtimeparams["soc_init"] + params["passed_data"]["soc_init"] = soc_init + if "soc_final" not in runtimeparams.keys(): + soc_final = plant_conf["SOCtarget"] else: - soc_final = runtimeparams['soc_final'] - params['passed_data']['soc_final'] = soc_final - if 'def_total_hours' not in runtimeparams.keys(): - def_total_hours = optim_conf['def_total_hours'] + soc_final = runtimeparams["soc_final"] + params["passed_data"]["soc_final"] = soc_final + if "def_total_hours" not in runtimeparams.keys(): + def_total_hours = optim_conf["def_total_hours"] else: - def_total_hours = runtimeparams['def_total_hours'] - params['passed_data']['def_total_hours'] = def_total_hours - if 'def_start_timestep' not in runtimeparams.keys(): - def_start_timestep = optim_conf['def_start_timestep'] + def_total_hours = runtimeparams["def_total_hours"] + params["passed_data"]["def_total_hours"] = def_total_hours + if "def_start_timestep" not in runtimeparams.keys(): + def_start_timestep = optim_conf["def_start_timestep"] else: - def_start_timestep = runtimeparams['def_start_timestep'] - params['passed_data']['def_start_timestep'] = def_start_timestep - if 'def_end_timestep' not in runtimeparams.keys(): - def_end_timestep = optim_conf['def_end_timestep'] + def_start_timestep = runtimeparams["def_start_timestep"] + params["passed_data"]["def_start_timestep"] = def_start_timestep + if "def_end_timestep" not in runtimeparams.keys(): + def_end_timestep = optim_conf["def_end_timestep"] else: - def_end_timestep = runtimeparams['def_end_timestep'] - params['passed_data']['def_end_timestep'] = def_end_timestep - if 'alpha' not in runtimeparams.keys(): + def_end_timestep = runtimeparams["def_end_timestep"] + params["passed_data"]["def_end_timestep"] = def_end_timestep + if "alpha" not in runtimeparams.keys(): alpha = 0.5 else: - alpha = runtimeparams['alpha'] - params['passed_data']['alpha'] = alpha - if 'beta' not in runtimeparams.keys(): + alpha = runtimeparams["alpha"] + params["passed_data"]["alpha"] = alpha + if "beta" not in runtimeparams.keys(): beta = 0.5 else: - beta = runtimeparams['beta'] - params['passed_data']['beta'] = beta + beta = runtimeparams["beta"] + params["passed_data"]["beta"] = beta forecast_dates = copy.deepcopy(forecast_dates)[0:prediction_horizon] else: - params['passed_data']['prediction_horizon'] = None - params['passed_data']['soc_init'] = None - params['passed_data']['soc_final'] = None - params['passed_data']['def_total_hours'] = None - params['passed_data']['def_start_timestep'] = None - params['passed_data']['def_end_timestep'] = None - params['passed_data']['alpha'] = None - params['passed_data']['beta'] = None + params["passed_data"]["prediction_horizon"] = None + params["passed_data"]["soc_init"] = None + params["passed_data"]["soc_final"] = None + params["passed_data"]["def_total_hours"] = None + params["passed_data"]["def_start_timestep"] = None + params["passed_data"]["def_end_timestep"] = None + params["passed_data"]["alpha"] = None + params["passed_data"]["beta"] = None # Treat passed forecast data lists list_forecast_key = ['pv_power_forecast', 'load_power_forecast', 'load_cost_forecast', 'prod_price_forecast'] forecast_methods = ['weather_forecast_method', 'load_forecast_method', 'load_cost_forecast_method', 'prod_price_forecast_method'] @@ -248,143 +314,188 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic else: params['passed_data'][forecast_key] = None # Treat passed data for forecast model fit/predict/tune at runtime - if 'days_to_retrieve' not in runtimeparams.keys(): + if "days_to_retrieve" not in runtimeparams.keys(): days_to_retrieve = 9 else: - days_to_retrieve = runtimeparams['days_to_retrieve'] - params['passed_data']['days_to_retrieve'] = days_to_retrieve - if 'model_type' not in runtimeparams.keys(): + days_to_retrieve = runtimeparams["days_to_retrieve"] + params["passed_data"]["days_to_retrieve"] = days_to_retrieve + if "model_type" not in runtimeparams.keys(): model_type = "load_forecast" else: - model_type = runtimeparams['model_type'] - params['passed_data']['model_type'] = model_type - if 'var_model' not in runtimeparams.keys(): + model_type = runtimeparams["model_type"] + params["passed_data"]["model_type"] = model_type + if "var_model" not in runtimeparams.keys(): var_model = "sensor.power_load_no_var_loads" else: - var_model = runtimeparams['var_model'] - params['passed_data']['var_model'] = var_model - if 'sklearn_model' not in runtimeparams.keys(): + var_model = runtimeparams["var_model"] + params["passed_data"]["var_model"] = var_model + if "sklearn_model" not in runtimeparams.keys(): sklearn_model = "KNeighborsRegressor" else: - sklearn_model = runtimeparams['sklearn_model'] - params['passed_data']['sklearn_model'] = sklearn_model - if 'num_lags' not in runtimeparams.keys(): + sklearn_model = runtimeparams["sklearn_model"] + params["passed_data"]["sklearn_model"] = sklearn_model + if "num_lags" not in runtimeparams.keys(): num_lags = 48 else: - num_lags = runtimeparams['num_lags'] - params['passed_data']['num_lags'] = num_lags - if 'split_date_delta' not in runtimeparams.keys(): - split_date_delta = '48h' + num_lags = runtimeparams["num_lags"] + params["passed_data"]["num_lags"] = num_lags + if "split_date_delta" not in runtimeparams.keys(): + split_date_delta = "48h" else: - split_date_delta = runtimeparams['split_date_delta'] - params['passed_data']['split_date_delta'] = split_date_delta - if 'perform_backtest' not in runtimeparams.keys(): + split_date_delta = runtimeparams["split_date_delta"] + params["passed_data"]["split_date_delta"] = split_date_delta + if "perform_backtest" not in runtimeparams.keys(): perform_backtest = False else: - perform_backtest = eval(str(runtimeparams['perform_backtest']).capitalize()) - params['passed_data']['perform_backtest'] = perform_backtest - if 'model_predict_publish' not in runtimeparams.keys(): + perform_backtest = eval(str(runtimeparams["perform_backtest"]).capitalize()) + params["passed_data"]["perform_backtest"] = perform_backtest + if "model_predict_publish" not in runtimeparams.keys(): model_predict_publish = False else: - model_predict_publish = eval(str(runtimeparams['model_predict_publish']).capitalize()) - params['passed_data']['model_predict_publish'] = model_predict_publish - if 'model_predict_entity_id' not in runtimeparams.keys(): + model_predict_publish = eval( + str(runtimeparams["model_predict_publish"]).capitalize() + ) + params["passed_data"]["model_predict_publish"] = model_predict_publish + if "model_predict_entity_id" not in runtimeparams.keys(): model_predict_entity_id = "sensor.p_load_forecast_custom_model" else: - model_predict_entity_id = runtimeparams['model_predict_entity_id'] - params['passed_data']['model_predict_entity_id'] = model_predict_entity_id - if 'model_predict_unit_of_measurement' not in runtimeparams.keys(): + model_predict_entity_id = runtimeparams["model_predict_entity_id"] + params["passed_data"]["model_predict_entity_id"] = model_predict_entity_id + if "model_predict_unit_of_measurement" not in runtimeparams.keys(): model_predict_unit_of_measurement = "W" else: - model_predict_unit_of_measurement = runtimeparams['model_predict_unit_of_measurement'] - params['passed_data']['model_predict_unit_of_measurement'] = model_predict_unit_of_measurement - if 'model_predict_friendly_name' not in runtimeparams.keys(): + model_predict_unit_of_measurement = runtimeparams[ + "model_predict_unit_of_measurement" + ] + params["passed_data"][ + "model_predict_unit_of_measurement" + ] = model_predict_unit_of_measurement + if "model_predict_friendly_name" not in runtimeparams.keys(): model_predict_friendly_name = "Load Power Forecast custom ML model" else: - model_predict_friendly_name = runtimeparams['model_predict_friendly_name'] - params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name - if 'mlr_predict_entity_id' not in runtimeparams.keys(): + model_predict_friendly_name = runtimeparams["model_predict_friendly_name"] + params["passed_data"][ + "model_predict_friendly_name" + ] = model_predict_friendly_name + if "mlr_predict_entity_id" not in runtimeparams.keys(): mlr_predict_entity_id = "sensor.mlr_predict" else: - mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id'] - params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id - if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys(): + mlr_predict_entity_id = runtimeparams["mlr_predict_entity_id"] + params["passed_data"]["mlr_predict_entity_id"] = mlr_predict_entity_id + if "mlr_predict_unit_of_measurement" not in runtimeparams.keys(): mlr_predict_unit_of_measurement = None else: - mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement'] - params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement - if 'mlr_predict_friendly_name' not in runtimeparams.keys(): + mlr_predict_unit_of_measurement = runtimeparams[ + "mlr_predict_unit_of_measurement" + ] + params["passed_data"][ + "mlr_predict_unit_of_measurement" + ] = mlr_predict_unit_of_measurement + if "mlr_predict_friendly_name" not in runtimeparams.keys(): mlr_predict_friendly_name = "mlr predictor" else: - mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name'] - params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name - # Treat optimization configuration parameters passed at runtime - if 'num_def_loads' in runtimeparams.keys(): - optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] - if 'P_deferrable_nom' in runtimeparams.keys(): - optim_conf['P_deferrable_nom'] = runtimeparams['P_deferrable_nom'] - if 'def_total_hours' in runtimeparams.keys(): - optim_conf['def_total_hours'] = runtimeparams['def_total_hours'] - if 'def_start_timestep' in runtimeparams.keys(): - optim_conf['def_start_timestep'] = runtimeparams['def_start_timestep'] - if 'def_end_timestep' in runtimeparams.keys(): - optim_conf['def_end_timestep'] = runtimeparams['def_end_timestep'] - if 'treat_def_as_semi_cont' in runtimeparams.keys(): - optim_conf['treat_def_as_semi_cont'] = [eval(str(k).capitalize()) for k in runtimeparams['treat_def_as_semi_cont']] - if 'set_def_constant' in runtimeparams.keys(): - optim_conf['set_def_constant'] = [eval(str(k).capitalize()) for k in runtimeparams['set_def_constant']] - if 'solcast_api_key' in runtimeparams.keys(): - retrieve_hass_conf['solcast_api_key'] = runtimeparams['solcast_api_key'] - optim_conf['weather_forecast_method'] = 'solcast' - if 'solcast_rooftop_id' in runtimeparams.keys(): - retrieve_hass_conf['solcast_rooftop_id'] = runtimeparams['solcast_rooftop_id'] - optim_conf['weather_forecast_method'] = 'solcast' - if 'solar_forecast_kwp' in runtimeparams.keys(): - retrieve_hass_conf['solar_forecast_kwp'] = runtimeparams['solar_forecast_kwp'] - optim_conf['weather_forecast_method'] = 'solar.forecast' - if 'weight_battery_discharge' in runtimeparams.keys(): - optim_conf['weight_battery_discharge'] = runtimeparams['weight_battery_discharge'] - if 'weight_battery_charge' in runtimeparams.keys(): - optim_conf['weight_battery_charge'] = runtimeparams['weight_battery_charge'] + mlr_predict_friendly_name = runtimeparams["mlr_predict_friendly_name"] + params["passed_data"]["mlr_predict_friendly_name"] = mlr_predict_friendly_name + # Treat optimization configuration parameters passed at runtime + if "num_def_loads" in runtimeparams.keys(): + optim_conf["num_def_loads"] = runtimeparams["num_def_loads"] + if "P_deferrable_nom" in runtimeparams.keys(): + optim_conf["P_deferrable_nom"] = runtimeparams["P_deferrable_nom"] + if "def_total_hours" in runtimeparams.keys(): + optim_conf["def_total_hours"] = runtimeparams["def_total_hours"] + if "def_start_timestep" in runtimeparams.keys(): + optim_conf["def_start_timestep"] = runtimeparams["def_start_timestep"] + if "def_end_timestep" in runtimeparams.keys(): + optim_conf["def_end_timestep"] = runtimeparams["def_end_timestep"] + if "treat_def_as_semi_cont" in runtimeparams.keys(): + optim_conf["treat_def_as_semi_cont"] = [ + eval(str(k).capitalize()) + for k in runtimeparams["treat_def_as_semi_cont"] + ] + if "set_def_constant" in runtimeparams.keys(): + optim_conf["set_def_constant"] = [ + eval(str(k).capitalize()) for k in runtimeparams["set_def_constant"] + ] + if "solcast_api_key" in runtimeparams.keys(): + retrieve_hass_conf["solcast_api_key"] = runtimeparams["solcast_api_key"] + optim_conf["weather_forecast_method"] = "solcast" + if "solcast_rooftop_id" in runtimeparams.keys(): + retrieve_hass_conf["solcast_rooftop_id"] = runtimeparams[ + "solcast_rooftop_id" + ] + optim_conf["weather_forecast_method"] = "solcast" + if "solar_forecast_kwp" in runtimeparams.keys(): + retrieve_hass_conf["solar_forecast_kwp"] = runtimeparams[ + "solar_forecast_kwp" + ] + optim_conf["weather_forecast_method"] = "solar.forecast" + if "weight_battery_discharge" in runtimeparams.keys(): + optim_conf["weight_battery_discharge"] = runtimeparams[ + "weight_battery_discharge" + ] + if "weight_battery_charge" in runtimeparams.keys(): + optim_conf["weight_battery_charge"] = runtimeparams["weight_battery_charge"] # Treat plant configuration parameters passed at runtime - if 'SOCtarget' in runtimeparams.keys(): - plant_conf['SOCtarget'] = runtimeparams['SOCtarget'] + if "SOCtarget" in runtimeparams.keys(): + plant_conf["SOCtarget"] = runtimeparams["SOCtarget"] # Treat custom entities id's and friendly names for variables - if 'custom_pv_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_pv_forecast_id'] = runtimeparams['custom_pv_forecast_id'] - if 'custom_load_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_load_forecast_id'] = runtimeparams['custom_load_forecast_id'] - if 'custom_batt_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_batt_forecast_id'] = runtimeparams['custom_batt_forecast_id'] - if 'custom_batt_soc_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_batt_soc_forecast_id'] = runtimeparams['custom_batt_soc_forecast_id'] - if 'custom_grid_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_grid_forecast_id'] = runtimeparams['custom_grid_forecast_id'] - if 'custom_cost_fun_id' in runtimeparams.keys(): - params['passed_data']['custom_cost_fun_id'] = runtimeparams['custom_cost_fun_id'] - if 'custom_optim_status_id' in runtimeparams.keys(): - params['passed_data']['custom_optim_status_id'] = runtimeparams['custom_optim_status_id'] - if 'custom_unit_load_cost_id' in runtimeparams.keys(): - params['passed_data']['custom_unit_load_cost_id'] = runtimeparams['custom_unit_load_cost_id'] - if 'custom_unit_prod_price_id' in runtimeparams.keys(): - params['passed_data']['custom_unit_prod_price_id'] = runtimeparams['custom_unit_prod_price_id'] - if 'custom_deferrable_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_deferrable_forecast_id'] = runtimeparams['custom_deferrable_forecast_id'] + if "custom_pv_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_pv_forecast_id"] = runtimeparams[ + "custom_pv_forecast_id" + ] + if "custom_load_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_load_forecast_id"] = runtimeparams[ + "custom_load_forecast_id" + ] + if "custom_batt_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_batt_forecast_id"] = runtimeparams[ + "custom_batt_forecast_id" + ] + if "custom_batt_soc_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_batt_soc_forecast_id"] = runtimeparams[ + "custom_batt_soc_forecast_id" + ] + if "custom_grid_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_grid_forecast_id"] = runtimeparams[ + "custom_grid_forecast_id" + ] + if "custom_cost_fun_id" in runtimeparams.keys(): + params["passed_data"]["custom_cost_fun_id"] = runtimeparams[ + "custom_cost_fun_id" + ] + if "custom_optim_status_id" in runtimeparams.keys(): + params["passed_data"]["custom_optim_status_id"] = runtimeparams[ + "custom_optim_status_id" + ] + if "custom_unit_load_cost_id" in runtimeparams.keys(): + params["passed_data"]["custom_unit_load_cost_id"] = runtimeparams[ + "custom_unit_load_cost_id" + ] + if "custom_unit_prod_price_id" in runtimeparams.keys(): + params["passed_data"]["custom_unit_prod_price_id"] = runtimeparams[ + "custom_unit_prod_price_id" + ] + if "custom_deferrable_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_deferrable_forecast_id"] = runtimeparams[ + "custom_deferrable_forecast_id" + ] # A condition to put a prefix on all published data - if 'publish_prefix' not in runtimeparams.keys(): + if "publish_prefix" not in runtimeparams.keys(): publish_prefix = "" else: - publish_prefix = runtimeparams['publish_prefix'] - params['passed_data']['publish_prefix'] = publish_prefix + publish_prefix = runtimeparams["publish_prefix"] + params["passed_data"]["publish_prefix"] = publish_prefix # Serialize the final params params = json.dumps(params) return params, retrieve_hass_conf, optim_conf, plant_conf -def get_yaml_parse(config_path: str, use_secrets: Optional[bool] = True, - params: Optional[str] = None) -> Tuple[dict, dict, dict]: + +def get_yaml_parse( + config_path: str, use_secrets: Optional[bool] = True, params: Optional[str] = None +) -> Tuple[dict, dict, dict]: """ Perform parsing of the config.yaml file. - + :param config_path: The path to the yaml configuration file :type config_path: str :param use_secrets: Indicate if we should use a secrets file or not. @@ -398,49 +509,54 @@ def get_yaml_parse(config_path: str, use_secrets: Optional[bool] = True, """ base = config_path.parent if params is None: - with open(config_path, 'r') as file: + with open(config_path, "r") as file: input_conf = yaml.load(file, Loader=yaml.FullLoader) else: input_conf = json.loads(params) if use_secrets: if params is None: - with open(base / 'secrets_emhass.yaml', 'r') as file: + with open(base / "secrets_emhass.yaml", "r") as file: input_secrets = yaml.load(file, Loader=yaml.FullLoader) else: - input_secrets = input_conf.pop('params_secrets', None) - - if (type(input_conf['retrieve_hass_conf']) == list): #if using old config version - retrieve_hass_conf = dict({key:d[key] for d in input_conf['retrieve_hass_conf'] for key in d}) + input_secrets = input_conf.pop("params_secrets", None) + + if type(input_conf["retrieve_hass_conf"]) == list: # if using old config version + retrieve_hass_conf = dict( + {key: d[key] for d in input_conf["retrieve_hass_conf"] for key in d} + ) else: - retrieve_hass_conf = input_conf.get('retrieve_hass_conf', {}) - + retrieve_hass_conf = input_conf.get("retrieve_hass_conf", {}) + if use_secrets: retrieve_hass_conf.update(input_secrets) else: - retrieve_hass_conf['hass_url'] = 'http://supervisor/core/api' - retrieve_hass_conf['long_lived_token'] = '${SUPERVISOR_TOKEN}' - retrieve_hass_conf['time_zone'] = 'Europe/Paris' - retrieve_hass_conf['lat'] = 45.83 - retrieve_hass_conf['lon'] = 6.86 - retrieve_hass_conf['alt'] = 4807.8 - retrieve_hass_conf['freq'] = pd.to_timedelta(retrieve_hass_conf['freq'], "minutes") - retrieve_hass_conf['time_zone'] = pytz.timezone(retrieve_hass_conf['time_zone']) - - if (type(input_conf['optim_conf']) == list): - optim_conf = dict({key:d[key] for d in input_conf['optim_conf'] for key in d}) + retrieve_hass_conf["hass_url"] = "http://supervisor/core/api" + retrieve_hass_conf["long_lived_token"] = "${SUPERVISOR_TOKEN}" + retrieve_hass_conf["time_zone"] = "Europe/Paris" + retrieve_hass_conf["lat"] = 45.83 + retrieve_hass_conf["lon"] = 6.86 + retrieve_hass_conf["alt"] = 4807.8 + retrieve_hass_conf["freq"] = pd.to_timedelta(retrieve_hass_conf["freq"], "minutes") + retrieve_hass_conf["time_zone"] = pytz.timezone(retrieve_hass_conf["time_zone"]) + + if type(input_conf["optim_conf"]) == list: + optim_conf = dict({key: d[key] for d in input_conf["optim_conf"] for key in d}) else: - optim_conf = input_conf.get('optim_conf', {}) + optim_conf = input_conf.get("optim_conf", {}) + + optim_conf["list_hp_periods"] = dict( + (key, d[key]) for d in optim_conf["list_hp_periods"] for key in d + ) + optim_conf["delta_forecast"] = pd.Timedelta(days=optim_conf["delta_forecast"]) - optim_conf['list_hp_periods'] = dict((key,d[key]) for d in optim_conf['list_hp_periods'] for key in d) - optim_conf['delta_forecast'] = pd.Timedelta(days=optim_conf['delta_forecast']) - - if (type(input_conf['plant_conf']) == list): - plant_conf = dict({key:d[key] for d in input_conf['plant_conf'] for key in d}) + if type(input_conf["plant_conf"]) == list: + plant_conf = dict({key: d[key] for d in input_conf["plant_conf"] for key in d}) else: - plant_conf = input_conf.get('plant_conf', {}) - + plant_conf = input_conf.get("plant_conf", {}) + return retrieve_hass_conf, optim_conf, plant_conf + def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dict: """ Build a dictionary with graphs and tables for the webui. @@ -451,61 +567,86 @@ def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dic :type plot_size: Optional[int], optional :return: A dictionary containing the graphs and tables in html format :rtype: dict - + """ - cols_p = [i for i in df.columns.to_list() if 'P_' in i] + cols_p = [i for i in df.columns.to_list() if "P_" in i] # Let's round the data in the DF - optim_status = df['optim_status'].unique().item() - df.drop('optim_status', axis=1, inplace=True) - cols_else = [i for i in df.columns.to_list() if 'P_' not in i] + optim_status = df["optim_status"].unique().item() + df.drop("optim_status", axis=1, inplace=True) + cols_else = [i for i in df.columns.to_list() if "P_" not in i] df = df.apply(pd.to_numeric) df[cols_p] = df[cols_p].astype(int) df[cols_else] = df[cols_else].round(3) # Create plots n_colors = len(cols_p) - colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)]) - fig_0 = px.line(df[cols_p], title='Systems powers schedule after optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_0.update_layout(xaxis_title='Timestamp', yaxis_title='System powers (W)') - if 'SOC_opt' in df.columns.to_list(): - fig_1 = px.line(df['SOC_opt'], title='Battery state of charge schedule after optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_1.update_layout(xaxis_title='Timestamp', yaxis_title='Battery SOC (%)') - cols_cost = [i for i in df.columns.to_list() if 'cost_' in i or 'unit_' in i] + colors = px.colors.sample_colorscale( + "jet", [n / (n_colors - 1) for n in range(n_colors)] + ) + fig_0 = px.line( + df[cols_p], + title="Systems powers schedule after optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_0.update_layout(xaxis_title="Timestamp", yaxis_title="System powers (W)") + if "SOC_opt" in df.columns.to_list(): + fig_1 = px.line( + df["SOC_opt"], + title="Battery state of charge schedule after optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_1.update_layout(xaxis_title="Timestamp", yaxis_title="Battery SOC (%)") + cols_cost = [i for i in df.columns.to_list() if "cost_" in i or "unit_" in i] n_colors = len(cols_cost) - colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)]) - fig_2 = px.line(df[cols_cost], title='Systems costs obtained from optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_2.update_layout(xaxis_title='Timestamp', yaxis_title='System costs (currency)') + colors = px.colors.sample_colorscale( + "jet", [n / (n_colors - 1) for n in range(n_colors)] + ) + fig_2 = px.line( + df[cols_cost], + title="Systems costs obtained from optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_2.update_layout(xaxis_title="Timestamp", yaxis_title="System costs (currency)") # Get full path to image - image_path_0 = fig_0.to_html(full_html=False, default_width='75%') - if 'SOC_opt' in df.columns.to_list(): - image_path_1 = fig_1.to_html(full_html=False, default_width='75%') - image_path_2 = fig_2.to_html(full_html=False, default_width='75%') + image_path_0 = fig_0.to_html(full_html=False, default_width="75%") + if "SOC_opt" in df.columns.to_list(): + image_path_1 = fig_1.to_html(full_html=False, default_width="75%") + image_path_2 = fig_2.to_html(full_html=False, default_width="75%") # The tables - table1 = df.reset_index().to_html(classes='mystyle', index=False) - cost_cols = [i for i in df.columns if 'cost_' in i] + table1 = df.reset_index().to_html(classes="mystyle", index=False) + cost_cols = [i for i in df.columns if "cost_" in i] table2 = df[cost_cols].reset_index().sum(numeric_only=True) - table2['optim_status'] = optim_status - table2 = table2.to_frame(name='Value').reset_index(names='Variable').to_html(classes='mystyle', index=False) + table2["optim_status"] = optim_status + table2 = ( + table2.to_frame(name="Value") + .reset_index(names="Variable") + .to_html(classes="mystyle", index=False) + ) # The dict of plots injection_dict = {} - injection_dict['title'] = '

EMHASS optimization results

' - injection_dict['subsubtitle0'] = '

Plotting latest optimization results

' - injection_dict['figure_0'] = image_path_0 - if 'SOC_opt' in df.columns.to_list(): - injection_dict['figure_1'] = image_path_1 - injection_dict['figure_2'] = image_path_2 - injection_dict['subsubtitle1'] = '

Last run optimization results table

' - injection_dict['table1'] = table1 - injection_dict['subsubtitle2'] = '

Summary table for latest optimization results

' - injection_dict['table2'] = table2 + injection_dict["title"] = "

EMHASS optimization results

" + injection_dict["subsubtitle0"] = "

Plotting latest optimization results

" + injection_dict["figure_0"] = image_path_0 + if "SOC_opt" in df.columns.to_list(): + injection_dict["figure_1"] = image_path_1 + injection_dict["figure_2"] = image_path_2 + injection_dict["subsubtitle1"] = "

Last run optimization results table

" + injection_dict["table1"] = table1 + injection_dict["subsubtitle2"] = ( + "

Summary table for latest optimization results

" + ) + injection_dict["table2"] = table2 return injection_dict -def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLForecaster) -> dict: + +def get_injection_dict_forecast_model_fit( + df_fit_pred: pd.DataFrame, mlf: MLForecaster +) -> dict: """ Build a dictionary with graphs and tables for the webui for special MLF fit case. @@ -517,19 +658,26 @@ def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLFore :rtype: dict """ fig = df_fit_pred.plot() - fig.layout.template = 'presentation' - fig.update_yaxes(title_text = mlf.model_type) - fig.update_xaxes(title_text = "Time") - image_path_0 = fig.to_html(full_html=False, default_width='75%') + fig.layout.template = "presentation" + fig.update_yaxes(title_text=mlf.model_type) + fig.update_xaxes(title_text="Time") + image_path_0 = fig.to_html(full_html=False, default_width="75%") # The dict of plots injection_dict = {} - injection_dict['title'] = '

Custom machine learning forecast model fit

' - injection_dict['subsubtitle0'] = '

Plotting train/test forecast model results for '+mlf.model_type+'

' - injection_dict['subsubtitle0'] = '

Forecasting variable '+mlf.var_model+'

' - injection_dict['figure_0'] = image_path_0 + injection_dict["title"] = "

Custom machine learning forecast model fit

" + injection_dict["subsubtitle0"] = ( + "

Plotting train/test forecast model results for " + mlf.model_type + "

" + ) + injection_dict["subsubtitle0"] = ( + "

Forecasting variable " + mlf.var_model + "

" + ) + injection_dict["figure_0"] = image_path_0 return injection_dict -def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLForecaster) -> dict: + +def get_injection_dict_forecast_model_tune( + df_pred_optim: pd.DataFrame, mlf: MLForecaster +) -> dict: """ Build a dictionary with graphs and tables for the webui for special MLF tune case. @@ -541,19 +689,32 @@ def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLF :rtype: dict """ fig = df_pred_optim.plot() - fig.layout.template = 'presentation' - fig.update_yaxes(title_text = mlf.model_type) - fig.update_xaxes(title_text = "Time") - image_path_0 = fig.to_html(full_html=False, default_width='75%') + fig.layout.template = "presentation" + fig.update_yaxes(title_text=mlf.model_type) + fig.update_xaxes(title_text="Time") + image_path_0 = fig.to_html(full_html=False, default_width="75%") # The dict of plots injection_dict = {} - injection_dict['title'] = '

Custom machine learning forecast model tune

' - injection_dict['subsubtitle0'] = '

Performed a tuning routine using bayesian optimization for '+mlf.model_type+'

' - injection_dict['subsubtitle0'] = '

Forecasting variable '+mlf.var_model+'

' - injection_dict['figure_0'] = image_path_0 + injection_dict["title"] = "

Custom machine learning forecast model tune

" + injection_dict["subsubtitle0"] = ( + "

Performed a tuning routine using bayesian optimization for " + + mlf.model_type + + "

" + ) + injection_dict["subsubtitle0"] = ( + "

Forecasting variable " + mlf.var_model + "

" + ) + injection_dict["figure_0"] = image_path_0 return injection_dict -def build_params(params: dict, params_secrets: dict, options: dict, addon: int, logger: logging.Logger) -> dict: + +def build_params( + params: dict, + params_secrets: dict, + options: dict, + addon: int, + logger: logging.Logger, +) -> dict: """ Build the main params dictionary from the loaded options.json when using the add-on. @@ -572,45 +733,120 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int, """ if addon == 1: # Updating variables in retrieve_hass_conf - params['retrieve_hass_conf']['freq'] = options.get('optimization_time_step',params['retrieve_hass_conf']['freq']) - params['retrieve_hass_conf']['days_to_retrieve'] = options.get('historic_days_to_retrieve',params['retrieve_hass_conf']['days_to_retrieve']) - params['retrieve_hass_conf']['var_PV'] = options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV']) - params['retrieve_hass_conf']['var_load'] = options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load']) - params['retrieve_hass_conf']['load_negative'] = options.get('load_negative',params['retrieve_hass_conf']['load_negative']) - params['retrieve_hass_conf']['set_zero_min'] = options.get('set_zero_min',params['retrieve_hass_conf']['set_zero_min']) - params['retrieve_hass_conf']['var_replace_zero'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_replace_zero'])] - params['retrieve_hass_conf']['var_interp'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV']), options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load'])] - params['retrieve_hass_conf']['method_ts_round'] = options.get('method_ts_round',params['retrieve_hass_conf']['method_ts_round']) + params["retrieve_hass_conf"]["freq"] = options.get( + "optimization_time_step", params["retrieve_hass_conf"]["freq"] + ) + params["retrieve_hass_conf"]["days_to_retrieve"] = options.get( + "historic_days_to_retrieve", + params["retrieve_hass_conf"]["days_to_retrieve"], + ) + params["retrieve_hass_conf"]["var_PV"] = options.get( + "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"] + ) + params["retrieve_hass_conf"]["var_load"] = options.get( + "sensor_power_load_no_var_loads", params["retrieve_hass_conf"]["var_load"] + ) + params["retrieve_hass_conf"]["load_negative"] = options.get( + "load_negative", params["retrieve_hass_conf"]["load_negative"] + ) + params["retrieve_hass_conf"]["set_zero_min"] = options.get( + "set_zero_min", params["retrieve_hass_conf"]["set_zero_min"] + ) + params["retrieve_hass_conf"]["var_replace_zero"] = [ + options.get( + "sensor_power_photovoltaics", + params["retrieve_hass_conf"]["var_replace_zero"], + ) + ] + params["retrieve_hass_conf"]["var_interp"] = [ + options.get( + "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"] + ), + options.get( + "sensor_power_load_no_var_loads", + params["retrieve_hass_conf"]["var_load"], + ), + ] + params["retrieve_hass_conf"]["method_ts_round"] = options.get( + "method_ts_round", params["retrieve_hass_conf"]["method_ts_round"] + ) # Update params Secrets if specified - params['params_secrets'] = params_secrets - params['params_secrets']['time_zone'] = options.get('time_zone',params_secrets['time_zone']) - params['params_secrets']['lat'] = options.get('Latitude',params_secrets['lat']) - params['params_secrets']['lon'] = options.get('Longitude',params_secrets['lon']) - params['params_secrets']['alt'] = options.get('Altitude',params_secrets['alt']) + params["params_secrets"] = params_secrets + params["params_secrets"]["time_zone"] = options.get( + "time_zone", params_secrets["time_zone"] + ) + params["params_secrets"]["lat"] = options.get("Latitude", params_secrets["lat"]) + params["params_secrets"]["lon"] = options.get( + "Longitude", params_secrets["lon"] + ) + params["params_secrets"]["alt"] = options.get("Altitude", params_secrets["alt"]) # Updating variables in optim_conf - params['optim_conf']['set_use_battery'] = options.get('set_use_battery',params['optim_conf']['set_use_battery']) - params['optim_conf']['num_def_loads'] = options.get('number_of_deferrable_loads',params['optim_conf']['num_def_loads']) - if options.get('list_nominal_power_of_deferrable_loads',None) != None: - params['optim_conf']['P_deferrable_nom'] = [i['nominal_power_of_deferrable_loads'] for i in options.get('list_nominal_power_of_deferrable_loads')] - if options.get('list_operating_hours_of_each_deferrable_load',None) != None: - params['optim_conf']['def_total_hours'] = [i['operating_hours_of_each_deferrable_load'] for i in options.get('list_operating_hours_of_each_deferrable_load')] - if options.get('list_treat_deferrable_load_as_semi_cont',None) != None: - params['optim_conf']['treat_def_as_semi_cont'] = [i['treat_deferrable_load_as_semi_cont'] for i in options.get('list_treat_deferrable_load_as_semi_cont')] - params['optim_conf']['weather_forecast_method'] = options.get('weather_forecast_method',params['optim_conf']['weather_forecast_method']) + params["optim_conf"]["set_use_battery"] = options.get( + "set_use_battery", params["optim_conf"]["set_use_battery"] + ) + params["optim_conf"]["num_def_loads"] = options.get( + "number_of_deferrable_loads", params["optim_conf"]["num_def_loads"] + ) + if options.get("list_nominal_power_of_deferrable_loads", None) != None: + params["optim_conf"]["P_deferrable_nom"] = [ + i["nominal_power_of_deferrable_loads"] + for i in options.get("list_nominal_power_of_deferrable_loads") + ] + if options.get("list_operating_hours_of_each_deferrable_load", None) != None: + params["optim_conf"]["def_total_hours"] = [ + i["operating_hours_of_each_deferrable_load"] + for i in options.get("list_operating_hours_of_each_deferrable_load") + ] + if options.get("list_treat_deferrable_load_as_semi_cont", None) != None: + params["optim_conf"]["treat_def_as_semi_cont"] = [ + i["treat_deferrable_load_as_semi_cont"] + for i in options.get("list_treat_deferrable_load_as_semi_cont") + ] + params["optim_conf"]["weather_forecast_method"] = options.get( + "weather_forecast_method", params["optim_conf"]["weather_forecast_method"] + ) # Update optional param secrets - if params['optim_conf']['weather_forecast_method'] == "solcast": - params['params_secrets']['solcast_api_key'] = options.get('optional_solcast_api_key',params_secrets.get('solcast_api_key',"123456")) - params['params_secrets']['solcast_rooftop_id'] = options.get('optional_solcast_rooftop_id',params_secrets.get('solcast_rooftop_id',"123456")) - elif params['optim_conf']['weather_forecast_method'] == "solar.forecast": - params['params_secrets']['solar_forecast_kwp'] = options.get('optional_solar_forecast_kwp',params_secrets.get('solar_forecast_kwp',5)) - params['optim_conf']['load_forecast_method'] = options.get('load_forecast_method',params['optim_conf']['load_forecast_method']) - params['optim_conf']['delta_forecast'] = options.get('delta_forecast_daily',params['optim_conf']['delta_forecast']) - params['optim_conf']['load_cost_forecast_method'] = options.get('load_cost_forecast_method',params['optim_conf']['load_cost_forecast_method']) - if options.get('list_set_deferrable_load_single_constant',None) != None: - params['optim_conf']['set_def_constant'] = [i['set_deferrable_load_single_constant'] for i in options.get('list_set_deferrable_load_single_constant')] - if options.get('list_peak_hours_periods_start_hours',None) != None and options.get('list_peak_hours_periods_end_hours',None) != None: - start_hours_list = [i['peak_hours_periods_start_hours'] for i in options['list_peak_hours_periods_start_hours']] - end_hours_list = [i['peak_hours_periods_end_hours'] for i in options['list_peak_hours_periods_end_hours']] + if params["optim_conf"]["weather_forecast_method"] == "solcast": + params["params_secrets"]["solcast_api_key"] = options.get( + "optional_solcast_api_key", + params_secrets.get("solcast_api_key", "123456"), + ) + params["params_secrets"]["solcast_rooftop_id"] = options.get( + "optional_solcast_rooftop_id", + params_secrets.get("solcast_rooftop_id", "123456"), + ) + elif params["optim_conf"]["weather_forecast_method"] == "solar.forecast": + params["params_secrets"]["solar_forecast_kwp"] = options.get( + "optional_solar_forecast_kwp", + params_secrets.get("solar_forecast_kwp", 5), + ) + params["optim_conf"]["load_forecast_method"] = options.get( + "load_forecast_method", params["optim_conf"]["load_forecast_method"] + ) + params["optim_conf"]["delta_forecast"] = options.get( + "delta_forecast_daily", params["optim_conf"]["delta_forecast"] + ) + params["optim_conf"]["load_cost_forecast_method"] = options.get( + "load_cost_forecast_method", + params["optim_conf"]["load_cost_forecast_method"], + ) + if options.get("list_set_deferrable_load_single_constant", None) != None: + params["optim_conf"]["set_def_constant"] = [ + i["set_deferrable_load_single_constant"] + for i in options.get("list_set_deferrable_load_single_constant") + ] + if ( + options.get("list_peak_hours_periods_start_hours", None) != None + and options.get("list_peak_hours_periods_end_hours", None) != None + ): + start_hours_list = [ + i["peak_hours_periods_start_hours"] + for i in options["list_peak_hours_periods_start_hours"] + ] + end_hours_list = [ + i["peak_hours_periods_end_hours"] + for i in options["list_peak_hours_periods_end_hours"] + ] num_peak_hours = len(start_hours_list) list_hp_periods_list = [{'period_hp_'+str(i+1):[{'start':start_hours_list[i]},{'end':end_hours_list[i]}]} for i in range(num_peak_hours)] params['optim_conf']['list_hp_periods'] = list_hp_periods_list @@ -682,20 +918,35 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int, for x in range(len(params['optim_conf']['P_deferrable_nom']), params['optim_conf']['num_def_loads']): params['optim_conf']['P_deferrable_nom'].append(0) # days_to_retrieve should be no less then 2 - if params['retrieve_hass_conf']['days_to_retrieve'] < 2: - params['retrieve_hass_conf']['days_to_retrieve'] = 2 - logger.warning("days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history") + if params["retrieve_hass_conf"]["days_to_retrieve"] < 2: + params["retrieve_hass_conf"]["days_to_retrieve"] = 2 + logger.warning( + "days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history" + ) else: - params['params_secrets'] = params_secrets + params["params_secrets"] = params_secrets # The params dict - params['passed_data'] = {'pv_power_forecast':None,'load_power_forecast':None,'load_cost_forecast':None,'prod_price_forecast':None, - 'prediction_horizon':None,'soc_init':None,'soc_final':None,'def_total_hours':None,'def_start_timestep':None,'def_end_timestep':None,'alpha':None,'beta':None} + params["passed_data"] = { + "pv_power_forecast": None, + "load_power_forecast": None, + "load_cost_forecast": None, + "prod_price_forecast": None, + "prediction_horizon": None, + "soc_init": None, + "soc_final": None, + "def_total_hours": None, + "def_start_timestep": None, + "def_end_timestep": None, + "alpha": None, + "beta": None, + } return params + def get_days_list(days_to_retrieve: int) -> pd.date_range: """ Get list of past days from today to days_to_retrieve. - + :param days_to_retrieve: Total number of days to retrieve from the past :type days_to_retrieve: int :return: The list of days @@ -704,19 +955,20 @@ def get_days_list(days_to_retrieve: int) -> pd.date_range: """ today = datetime.now(timezone.utc).replace(minute=0, second=0, microsecond=0) d = (today - timedelta(days=days_to_retrieve)).isoformat() - days_list = pd.date_range(start=d, end=today.isoformat(), freq='D') - + days_list = pd.date_range(start=d, end=today.isoformat(), freq="D") + return days_list + def set_df_index_freq(df: pd.DataFrame) -> pd.DataFrame: """ Set the freq of a DataFrame DateTimeIndex. - + :param df: Input DataFrame :type df: pd.DataFrame :return: Input DataFrame with freq defined :rtype: pd.DataFrame - + """ idx_diff = np.diff(df.index) sampling = pd.to_timedelta(np.median(idx_diff)) From b15914dfb0893f0a9c71d386474475b43d8bcf20 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 14:55:30 +0100 Subject: [PATCH 044/111] sklearn-model -> regression-model --- src/emhass/command_line.py | 4 ++-- src/emhass/utils.py | 5 +++++ src/emhass/web_server.py | 8 ++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index b4a9050c..1706d34c 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -657,7 +657,7 @@ def regressor_model_fit( """ data = copy.deepcopy(input_data_dict["df_input_data"]) model_type = input_data_dict["params"]["passed_data"]["model_type"] - sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"] + regression_model = input_data_dict["params"]["passed_data"]["regression_model"] features = input_data_dict["params"]["passed_data"]["features"] target = input_data_dict["params"]["passed_data"]["target"] timestamp = input_data_dict["params"]["passed_data"]["timestamp"] @@ -665,7 +665,7 @@ def regressor_model_fit( root = input_data_dict["root"] # The MLRegressor object mlr = MLRegressor( - data, model_type, sklearn_model, features, target, timestamp, logger + data, model_type, regression_model, features, target, timestamp, logger ) # Fit the ML model mlr.fit(date_features=date_features) diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 3a2cadd3..836f1085 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -334,6 +334,11 @@ def treat_runtimeparams( else: sklearn_model = runtimeparams["sklearn_model"] params["passed_data"]["sklearn_model"] = sklearn_model + if "regression_model" not in runtimeparams.keys(): + regression_model = "LinearRegression" + else: + regression_model = runtimeparams["regression_model"] + params["passed_data"]["regression_model"] = regression_model if "num_lags" not in runtimeparams.keys(): num_lags = 48 else: diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index e72022fe..39afe939 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -195,14 +195,14 @@ def action_call(action_name): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) elif action_name == 'regressor-model-fit': - app.logger.info(" >> Performing a regressor fit...") + app.logger.info(" >> Performing a machine learning regressor fit...") regressor_model_fit(input_data_dict, app.logger) - msg = f'EMHASS >> Action regressor-fit executed... \n' + msg = f'EMHASS >> Action regressor-model-fit executed... \n' return make_response(msg, 201) elif action_name == 'regressor-model-predict': - app.logger.info(" >> Performing a regressor predict...") + app.logger.info(" >> Performing a machine learning regressor predict...") regressor_model_predict(input_data_dict, app.logger) - msg = f'EMHASS >> Action regressor-predict executed... \n' + msg = f'EMHASS >> Action regressor-model-predict executed... \n' return make_response(msg, 201) else: app.logger.error("ERROR: passed action is not valid") From 9807ed6db2be141a99bbe93e7877bd8a66019a31 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 15:18:57 +0100 Subject: [PATCH 045/111] REGRESSION_METHODS const --- src/emhass/machine_learning_regressor.py | 127 ++++++++++++----------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index 80ddd74f..9e7795d0 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -24,6 +24,41 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) +REGRESSION_METHODS = { + "LinearRegression": { + "model": LinearRegression(), + "param_grid": { + "linearregression__fit_intercept": [True, False], + "linearregression__positive": [True, False], + }, + }, + "RidgeRegression": { + "model": Ridge(), + "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, + }, + "LassoRegression": { + "model": Lasso(), + "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, + }, + "RandomForestRegression": { + "model": RandomForestRegressor(), + "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, + }, + "GradientBoostingRegression": { + "model": GradientBoostingRegressor(), + "param_grid": { + "gradientboostingregressor__n_estimators": [50, 100, 200], + "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + "AdaBoostRegression": { + "model": AdaBoostRegressor(), + "param_grid": { + "adaboostregressor__n_estimators": [50, 100, 200], + "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + } class MLRegressor: r""" @@ -43,7 +78,7 @@ def __init__( self, data, model_type: str, - sklearn_model: str, + regression_model: str, features: list, target: str, timestamp: str, @@ -56,11 +91,15 @@ def __init__( :param model_type: A unique name defining this model and useful to identify \ for what it will be used for. :type model_type: str + :param regression_model: The model that will be used. For now only \ + this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \ + `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`. + :type regression_model: str :param features: A list of features. \ - Example: [`solar`, `degree_days`]. + Example: [`solar_production`, `degree_days`]. :type features: list :param target: The target(to be predicted). \ - Example: `hours`. + Example: `heating_hours`. :type target: str :param timestamp: If defined, the column key that has to be used of timestamp. :type timestamp: str @@ -72,7 +111,7 @@ def __init__( self.target = target self.timestamp = timestamp self.model_type = model_type - self.sklearn_model = sklearn_model + self.regression_model = regression_model self.logger = logger self.data.sort_index(inplace=True) self.data = self.data[~self.data.index.duplicated(keep="first")] @@ -111,7 +150,7 @@ def add_date_features( return df - def fit(self, date_features: Optional[list] = []) -> None: + def fit(self, date_features: Optional[list] = None) -> None: """ Fit the model using the provided data. @@ -129,7 +168,7 @@ def fit(self, date_features: Optional[list] = []) -> None: keep_columns.append(self.target) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] self.data_exo.reset_index(drop=True, inplace=True) - if len(date_features) > 0: + if date_features is not None: if self.timestamp is not None: self.data_exo = MLRegressor.add_date_features( self.data_exo, date_features, self.timestamp @@ -150,63 +189,27 @@ def fit(self, date_features: Optional[list] = []) -> None: ) self.steps = len(X_test) - regression_methods = { - "LinearRegression": { - "model": LinearRegression(), - "param_grid": { - "linearregression__fit_intercept": [True, False], - "linearregression__positive": [True, False], - }, - }, - "RidgeRegression": { - "model": Ridge(), - "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, - }, - "LassoRegression": { - "model": Lasso(), - "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, - }, - "RandomForestRegression": { - "model": RandomForestRegressor(), - "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, - }, - "GradientBoostingRegression": { - "model": GradientBoostingRegressor(), - "param_grid": { - "gradientboostingregressor__n_estimators": [50, 100, 200], - "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - "AdaBoostRegression": { - "model": AdaBoostRegressor(), - "param_grid": { - "adaboostregressor__n_estimators": [50, 100, 200], - "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - } - - if self.sklearn_model == "LinearRegression": - base_model = regression_methods["LinearRegression"]["model"] - param_grid = regression_methods["LinearRegression"]["param_grid"] - elif self.sklearn_model == "RidgeRegression": - base_model = regression_methods["RidgeRegression"]["model"] - param_grid = regression_methods["RidgeRegression"]["param_grid"] - elif self.sklearn_model == "LassoRegression": - base_model = regression_methods["LassoRegression"]["model"] - param_grid = regression_methods["LassoRegression"]["param_grid"] - elif self.sklearn_model == "RandomForestRegression": - base_model = regression_methods["RandomForestRegression"]["model"] - param_grid = regression_methods["RandomForestRegression"]["param_grid"] - elif self.sklearn_model == "GradientBoostingRegression": - base_model = regression_methods["GradientBoostingRegression"]["model"] - param_grid = regression_methods["GradientBoostingRegression"]["param_grid"] - elif self.sklearn_model == "AdaBoostRegression": - base_model = regression_methods["AdaBoostRegression"]["model"] - param_grid = regression_methods["AdaBoostRegression"]["param_grid"] + if self.regression_model == "LinearRegression": + base_model = REGRESSION_METHODS["LinearRegression"]["model"] + param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"] + elif self.regression_model == "RidgeRegression": + base_model = REGRESSION_METHODS["RidgeRegression"]["model"] + param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"] + elif self.regression_model == "LassoRegression": + base_model = REGRESSION_METHODS["LassoRegression"]["model"] + param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"] + elif self.regression_model == "RandomForestRegression": + base_model = REGRESSION_METHODS["RandomForestRegression"]["model"] + param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"] + elif self.regression_model == "GradientBoostingRegression": + base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"] + param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"] + elif self.regression_model == "AdaBoostRegression": + base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"] + param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] else: self.logger.error( - "Passed sklearn model " + self.sklearn_model + " is not valid" + "Passed sklearn model " + self.regression_model + " is not valid" ) self.model = make_pipeline(StandardScaler(), base_model) @@ -223,7 +226,7 @@ def fit(self, date_features: Optional[list] = []) -> None: ) # Fit the grid search object to the data - self.logger.info("Training a " + self.sklearn_model + " model") + self.logger.info("Training a " + self.regression_model + " model") start_time = time.time() self.grid_search.fit(X_train.values, y_train.values) print("Best value for lambda : ", self.grid_search.best_params_) From 063ccf2b140dd2e0ba8a6093084838e775a9fd2a Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 11:12:28 +0100 Subject: [PATCH 046/111] Some cleanup --- src/emhass/machine_learning_regressor.py | 220 +++++++++++++---------- 1 file changed, 125 insertions(+), 95 deletions(-) diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index 9e7795d0..95f624b3 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -1,70 +1,72 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- +"""Machine learning regressor module.""" + +from __future__ import annotations import copy -import logging import time -from typing import Optional import warnings +from typing import TYPE_CHECKING -import pandas as pd import numpy as np +import pandas as pd from sklearn.ensemble import ( AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, ) -from sklearn.metrics import r2_score - from sklearn.linear_model import Lasso, LinearRegression, Ridge +from sklearn.metrics import r2_score from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler +if TYPE_CHECKING: + import logging warnings.filterwarnings("ignore", category=DeprecationWarning) REGRESSION_METHODS = { - "LinearRegression": { - "model": LinearRegression(), - "param_grid": { - "linearregression__fit_intercept": [True, False], - "linearregression__positive": [True, False], - }, - }, - "RidgeRegression": { - "model": Ridge(), - "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, - }, - "LassoRegression": { - "model": Lasso(), - "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, - }, - "RandomForestRegression": { - "model": RandomForestRegressor(), - "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, - }, - "GradientBoostingRegression": { - "model": GradientBoostingRegressor(), - "param_grid": { - "gradientboostingregressor__n_estimators": [50, 100, 200], - "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - "AdaBoostRegression": { - "model": AdaBoostRegressor(), - "param_grid": { - "adaboostregressor__n_estimators": [50, 100, 200], - "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - } + "LinearRegression": { + "model": LinearRegression(), + "param_grid": { + "linearregression__fit_intercept": [True, False], + "linearregression__positive": [True, False], + }, + }, + "RidgeRegression": { + "model": Ridge(), + "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, + }, + "LassoRegression": { + "model": Lasso(), + "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, + }, + "RandomForestRegression": { + "model": RandomForestRegressor(), + "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, + }, + "GradientBoostingRegression": { + "model": GradientBoostingRegressor(), + "param_grid": { + "gradientboostingregressor__n_estimators": [50, 100, 200], + "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + "AdaBoostRegression": { + "model": AdaBoostRegressor(), + "param_grid": { + "adaboostregressor__n_estimators": [50, 100, 200], + "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, +} + class MLRegressor: - r""" - A forecaster class using machine learning models. + r"""A forecaster class using machine learning models. - This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. + This class uses the `sklearn` module and the machine learning models are \ + from `scikit-learn`. It exposes two main methods: @@ -74,9 +76,9 @@ class MLRegressor: """ - def __init__( - self, - data, + def __init__( # noqa: PLR0913 + self: MLRegressor, + data: pd.DataFrame, model_type: str, regression_model: str, features: list, @@ -92,8 +94,9 @@ def __init__( for what it will be used for. :type model_type: str :param regression_model: The model that will be used. For now only \ - this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \ - `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`. + this options are possible: `LinearRegression`, `RidgeRegression`, \ + `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \ + `GradientBoostingRegression` and `AdaBoostRegression`. :type regression_model: str :param features: A list of features. \ Example: [`solar_production`, `degree_days`]. @@ -113,7 +116,7 @@ def __init__( self.model_type = model_type self.regression_model = regression_model self.logger = logger - self.data.sort_index(inplace=True) + self.data = self.data.sort_index() self.data = self.data[~self.data.index.duplicated(keep="first")] self.data_exo = None self.steps = None @@ -122,9 +125,11 @@ def __init__( @staticmethod def add_date_features( - data: pd.DataFrame, date_features: list, timestamp: str + data: pd.DataFrame, + date_features: list, + timestamp: str, ) -> pd.DataFrame: - """Add date features from the input DataFrame timestamp + """Add date features from the input DataFrame timestamp. :param data: The input DataFrame :type data: pd.DataFrame @@ -133,7 +138,7 @@ def add_date_features( :return: The DataFrame with the added features :rtype: pd.DataFrame """ - df = copy.deepcopy(data) + df = copy.deepcopy(data) # noqa: PD901 df[timestamp] = pd.to_datetime(df["timestamp"]) if "year" in date_features: df["year"] = [i.year for i in df["timestamp"]] @@ -150,14 +155,54 @@ def add_date_features( return df - def fit(self, date_features: Optional[list] = None) -> None: + def get_regression_model(self: MLRegressor) -> tuple[str, str]: + """Get the base model and parameter grid for the specified regression model. + + Returns a tuple containing the base model and parameter grid corresponding to \ + the specified regression model. + + Args: + ---- + self: The instance of the MLRegressor class. + + Returns: + ------- + A tuple containing the base model and parameter grid. + """ - Fit the model using the provided data. + if self.regression_model == "LinearRegression": + base_model = REGRESSION_METHODS["LinearRegression"]["model"] + param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"] + elif self.regression_model == "RidgeRegression": + base_model = REGRESSION_METHODS["RidgeRegression"]["model"] + param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"] + elif self.regression_model == "LassoRegression": + base_model = REGRESSION_METHODS["LassoRegression"]["model"] + param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"] + elif self.regression_model == "RandomForestRegression": + base_model = REGRESSION_METHODS["RandomForestRegression"]["model"] + param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"] + elif self.regression_model == "GradientBoostingRegression": + base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"] + param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"] + elif self.regression_model == "AdaBoostRegression": + base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"] + param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] + else: + self.logger.error( + "Passed sklearn model %s is not valid", + self.regression_model, + ) + return base_model, param_grid - :param date_features: A list of 'date_features' to take into account when fitting the model. + def fit(self: MLRegressor, date_features: list | None = None) -> None: + """Fit the model using the provided data. + + :param date_features: A list of 'date_features' to take into account when \ + fitting the model. :type data: list """ - self.logger.info("Performing a csv model fit for " + self.model_type) + self.logger.info("Performing a MLRegressor fit for %s", self.model_type) self.data_exo = pd.DataFrame(self.data) self.data_exo[self.features] = self.data[self.features] self.data_exo[self.target] = self.data[self.target] @@ -167,50 +212,36 @@ def fit(self, date_features: Optional[list] = None) -> None: keep_columns.append(self.timestamp) keep_columns.append(self.target) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] - self.data_exo.reset_index(drop=True, inplace=True) + self.data_exo = self.data_exo.reset_index(drop=True) if date_features is not None: if self.timestamp is not None: self.data_exo = MLRegressor.add_date_features( - self.data_exo, date_features, self.timestamp + self.data_exo, + date_features, + self.timestamp, ) else: self.logger.error( - "If no timestamp provided, you can't use date_features, going further without date_features." + "If no timestamp provided, you can't use date_features, going \ + further without date_features.", ) y = self.data_exo[self.target] self.data_exo = self.data_exo.drop(self.target, axis=1) if self.timestamp is not None: self.data_exo = self.data_exo.drop(self.timestamp, axis=1) - X = self.data_exo + X = self.data_exo # noqa: N806 - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, random_state=42 + X_train, X_test, y_train, y_test = train_test_split( # noqa: N806 + X, + y, + test_size=0.2, + random_state=42, ) + self.steps = len(X_test) - if self.regression_model == "LinearRegression": - base_model = REGRESSION_METHODS["LinearRegression"]["model"] - param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"] - elif self.regression_model == "RidgeRegression": - base_model = REGRESSION_METHODS["RidgeRegression"]["model"] - param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"] - elif self.regression_model == "LassoRegression": - base_model = REGRESSION_METHODS["LassoRegression"]["model"] - param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"] - elif self.regression_model == "RandomForestRegression": - base_model = REGRESSION_METHODS["RandomForestRegression"]["model"] - param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"] - elif self.regression_model == "GradientBoostingRegression": - base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"] - param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"] - elif self.regression_model == "AdaBoostRegression": - base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"] - param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] - else: - self.logger.error( - "Passed sklearn model " + self.regression_model + " is not valid" - ) + base_model, param_grid = self.get_regression_model() self.model = make_pipeline(StandardScaler(), base_model) @@ -226,12 +257,10 @@ def fit(self, date_features: Optional[list] = None) -> None: ) # Fit the grid search object to the data - self.logger.info("Training a " + self.regression_model + " model") + self.logger.info("Training a %s model", self.regression_model) start_time = time.time() self.grid_search.fit(X_train.values, y_train.values) - print("Best value for lambda : ", self.grid_search.best_params_) - print("Best score for cost function: ", self.grid_search.best_score_) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + self.logger.info("Elapsed time for model fit: %s", time.time() - start_time) self.model = self.grid_search.best_estimator_ @@ -240,20 +269,21 @@ def fit(self, date_features: Optional[list] = None) -> None: predictions = pd.Series(predictions, index=X_test.index) pred_metric = r2_score(y_test, predictions) self.logger.info( - f"Prediction R2 score of fitted model on test data: {pred_metric}" + "Prediction R2 score of fitted model on test data: %s", + pred_metric, ) - def predict(self, new_values: list) -> np.ndarray: - r"""The predict method to generate a forecast from a csv file. - + def predict(self: MLRegressor, new_values: list) -> np.ndarray: + """Predict a new value. - :param new_values: The new values for the features(in the same order as the features list). \ + :param new_values: The new values for the features \ + (in the same order as the features list). \ Example: [2.24, 5.68]. :type new_values: list :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - self.logger.info("Performing a prediction for " + self.model_type) + self.logger.info("Performing a prediction for %s", self.model_type) new_values = np.array([new_values]) return self.model.predict(new_values) From 6fae7a4280043240c07bbdb941a0c03e20ef0d8e Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:24:21 +0100 Subject: [PATCH 047/111] Add csv-prediction --- src/emhass/command_line.py | 1 - src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 1706d34c..e6940518 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -16,7 +16,6 @@ from distutils.util import strtobool - from emhass.retrieve_hass import RetrieveHass from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py new file mode 100644 index 00000000..a1c5576b --- /dev/null +++ b/src/emhass/csv_predictor.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +import copy +import pathlib +import time +from typing import Optional +# from typing import Optional, Tuple +import pandas as pd +import numpy as np + +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import ElasticNet +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsRegressor +# from sklearn.metrics import r2_score + +# from skforecast.ForecasterAutoreg import ForecasterAutoreg +# from skforecast.model_selection import bayesian_search_forecaster +# from skforecast.model_selection import backtesting_forecaster + +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) + +class CsvPredictor: + r""" + A forecaster class using machine learning models. + + This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + + It exposes one main method: + + - `predict`: to obtain a forecast from a pre-trained model. + + """ + + def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + logger: logging.Logger) -> None: + r"""Define constructor for the forecast class. + + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str + :param var_model: The name of the sensor to retrieve data from Home Assistant. \ + Example: `sensor.power_load_no_var_loads`. + :type var_model: str + :param sklearn_model: The `scikit-learn` model that will be used. For now only \ + this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. + :type sklearn_model: str + :param num_lags: The number of auto-regression lags to consider. A good starting point \ + is to fix this as one day. For example if your time step is 30 minutes, then fix this \ + to 48, if the time step is 1 hour the fix this to 24 and so on. + :type num_lags: int + :param root: The parent folder of the path where the config.yaml file is located + :type root: str + :param logger: The passed logger object + :type logger: logging.Logger + """ + self.data = data + self.model_type = model_type + self.csv_file = csv_file + self.independent_variables = independent_variables + self.dependent_variable = dependent_variable + self.sklearn_model = sklearn_model + self.new_values = new_values + self.root = root + self.logger = logger + self.is_tuned = False + + + def load_data(self): + filename_path = pathlib.Path(self.root) / self.csv_file + if filename_path.is_file(): + with open(filename_path, 'rb') as inp: + data = pd.read_csv(filename_path) + else: + self.logger.error("The cvs file was not found.") + return + + required_columns = self.independent_variables + + if not set(required_columns).issubset(data.columns): + raise ValueError( + f"CSV file should contain the following columns: {', '.join(required_columns)}" + ) + return data + + def prepare_data(self, data): + X = data[self.independent_variables].values + y = data[self.dependent_variable].values + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + return X_train, y_train + + + def predict(self, perform_backtest: Optional[bool] = False + ) -> pd.Series: + r"""The fit method to train the ML model. + + :param split_date_delta: The delta from now to `split_date_delta` that will be used \ + as the test period to evaluate the model, defaults to '48h' + :type split_date_delta: Optional[str], optional + :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ + the performance of the model on the complete train set, defaults to False + :type perform_backtest: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest + :rtype: Tuple[pd.DataFrame, pd.DataFrame] + """ + self.logger.info("Performing a forecast model fit for "+self.model_type) + # Preparing the data: adding exogenous features + data = self.load_data() + X, y = self.prepare_data(data) + + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) + + return prediction + + + + \ No newline at end of file From b4293c009588a47c2bed748b6dd4c6ba15472020 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:45:02 +0100 Subject: [PATCH 048/111] Use gridsearchcv and split up fit and predict --- src/emhass/csv_predictor.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index a1c5576b..4e4ca37e 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import copy +from datetime import datetime import logging import copy import pathlib @@ -9,6 +11,7 @@ # from typing import Optional, Tuple import pandas as pd import numpy as np +from sklearn.metrics import classification_report, r2_score from sklearn.linear_model import LinearRegression from sklearn.linear_model import ElasticNet @@ -64,11 +67,16 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable - self.sklearn_model = sklearn_model - self.new_values = new_values - self.root = root + self.timestamp = timestamp + self.model_type = model_type self.logger = logger self.is_tuned = False + self.data.sort_index(inplace=True) + self.data = self.data[~self.data.index.duplicated(keep='first')] + + @staticmethod + def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + """Add date features from the input DataFrame timestamp def load_data(self): From fa8f6c0cb8e6dc430eacdd54cb37fb87daea3afc Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 15:48:12 +0100 Subject: [PATCH 049/111] gitignore fun --- .vscode/launch.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 10313c97..b953c7d3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,7 +2,7 @@ "configurations": [ { "name": "Python: Current File", - "type": "debugpy", + "type": "python", "request": "launch", "program": "${file}", "console": "integratedTerminal", @@ -10,10 +10,11 @@ }, { "name": "EMHASS run", - "type": "debugpy", + "type": "python", "request": "launch", - "module": "emhass.web_server", + "program": "web_server.py", "console": "integratedTerminal", + "cwd": "${workspaceFolder}/src/emhass/", "purpose":["debug-in-terminal"], "justMyCode": true, "env": { @@ -21,15 +22,15 @@ "OPTIONS_PATH": "/workspaces/emhass/options.json", "SECRETS_PATH": "/workspaces/emhass/secrets_emhass.yaml", "DATA_PATH": "/workspaces/emhass/data/", - "LOGGING_LEVEL": "DEBUG" } }, { "name": "EMHASS run ADDON", - "type": "debugpy", + "type": "python", "request": "launch", - "module": "emhass.web_server", + "program": "web_server.py", "console": "integratedTerminal", + "cwd": "${workspaceFolder}/src/emhass/", "args": ["--addon", "true", "--no_response", "true"], "purpose":["debug-in-terminal"], "justMyCode": true, @@ -44,7 +45,6 @@ "LAT": "45.83", //optional change "LON": "6.86", //optional change "ALT": "4807.8", //optional change - "LOGGING_LEVEL": "DEBUG" //optional change }, } From 54966d45987ac21ba69ef6f0633b346e37154b5b Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 11:39:39 +0100 Subject: [PATCH 050/111] python -> debugpy --- .vscode/launch.json | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index b953c7d3..ec6c6987 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,7 +2,7 @@ "configurations": [ { "name": "Python: Current File", - "type": "python", + "type": "debugpy", "request": "launch", "program": "${file}", "console": "integratedTerminal", @@ -10,12 +10,14 @@ }, { "name": "EMHASS run", - "type": "python", + "type": "debugpy", "request": "launch", "program": "web_server.py", "console": "integratedTerminal", "cwd": "${workspaceFolder}/src/emhass/", - "purpose":["debug-in-terminal"], + "purpose": [ + "debug-in-terminal" + ], "justMyCode": true, "env": { "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml", @@ -26,13 +28,20 @@ }, { "name": "EMHASS run ADDON", - "type": "python", + "type": "debugpy", "request": "launch", "program": "web_server.py", "console": "integratedTerminal", "cwd": "${workspaceFolder}/src/emhass/", - "args": ["--addon", "true", "--no_response", "true"], - "purpose":["debug-in-terminal"], + "args": [ + "--addon", + "true", + "--no_response", + "true" + ], + "purpose": [ + "debug-in-terminal" + ], "justMyCode": true, "env": { "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml", @@ -46,7 +55,6 @@ "LON": "6.86", //optional change "ALT": "4807.8", //optional change }, - - } + } ] } \ No newline at end of file From ef4e17703503a1e23910faa459a98b4e03b3ad67 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 12:38:07 +0100 Subject: [PATCH 051/111] launch.json --- .vscode/launch.json | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index ec6c6987..f0ceae3a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,9 +12,8 @@ "name": "EMHASS run", "type": "debugpy", "request": "launch", - "program": "web_server.py", + "module": "emhass.web_server", "console": "integratedTerminal", - "cwd": "${workspaceFolder}/src/emhass/", "purpose": [ "debug-in-terminal" ], @@ -30,9 +29,8 @@ "name": "EMHASS run ADDON", "type": "debugpy", "request": "launch", - "program": "web_server.py", + "module": "emhass.web_server", "console": "integratedTerminal", - "cwd": "${workspaceFolder}/src/emhass/", "args": [ "--addon", "true", From 1a502e1d3ef6c86fee14574a50070beca9005f78 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 12:40:45 +0100 Subject: [PATCH 052/111] delete csv-predictor --- src/emhass/csv_predictor.py | 147 ------------------------------------ 1 file changed, 147 deletions(-) delete mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py deleted file mode 100644 index 4e4ca37e..00000000 --- a/src/emhass/csv_predictor.py +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import copy -from datetime import datetime -import logging -import copy -import pathlib -import time -from typing import Optional -# from typing import Optional, Tuple -import pandas as pd -import numpy as np -from sklearn.metrics import classification_report, r2_score - -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import ElasticNet -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsRegressor -# from sklearn.metrics import r2_score - -# from skforecast.ForecasterAutoreg import ForecasterAutoreg -# from skforecast.model_selection import bayesian_search_forecaster -# from skforecast.model_selection import backtesting_forecaster - -import warnings -warnings.filterwarnings("ignore", category=DeprecationWarning) - -class CsvPredictor: - r""" - A forecaster class using machine learning models. - - This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. - - It exposes one main method: - - - `predict`: to obtain a forecast from a pre-trained model. - - """ - - def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - logger: logging.Logger) -> None: - r"""Define constructor for the forecast class. - - :param data: The data that will be used for train/test - :type data: pd.DataFrame - :param model_type: A unique name defining this model and useful to identify \ - for what it will be used for. - :type model_type: str - :param var_model: The name of the sensor to retrieve data from Home Assistant. \ - Example: `sensor.power_load_no_var_loads`. - :type var_model: str - :param sklearn_model: The `scikit-learn` model that will be used. For now only \ - this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. - :type sklearn_model: str - :param num_lags: The number of auto-regression lags to consider. A good starting point \ - is to fix this as one day. For example if your time step is 30 minutes, then fix this \ - to 48, if the time step is 1 hour the fix this to 24 and so on. - :type num_lags: int - :param root: The parent folder of the path where the config.yaml file is located - :type root: str - :param logger: The passed logger object - :type logger: logging.Logger - """ - self.data = data - self.model_type = model_type - self.csv_file = csv_file - self.independent_variables = independent_variables - self.dependent_variable = dependent_variable - self.timestamp = timestamp - self.model_type = model_type - self.logger = logger - self.is_tuned = False - self.data.sort_index(inplace=True) - self.data = self.data[~self.data.index.duplicated(keep='first')] - - @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: - """Add date features from the input DataFrame timestamp - - - def load_data(self): - filename_path = pathlib.Path(self.root) / self.csv_file - if filename_path.is_file(): - with open(filename_path, 'rb') as inp: - data = pd.read_csv(filename_path) - else: - self.logger.error("The cvs file was not found.") - return - - required_columns = self.independent_variables - - if not set(required_columns).issubset(data.columns): - raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}" - ) - return data - - def prepare_data(self, data): - X = data[self.independent_variables].values - y = data[self.dependent_variable].values - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - - return X_train, y_train - - - def predict(self, perform_backtest: Optional[bool] = False - ) -> pd.Series: - r"""The fit method to train the ML model. - - :param split_date_delta: The delta from now to `split_date_delta` that will be used \ - as the test period to evaluate the model, defaults to '48h' - :type split_date_delta: Optional[str], optional - :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ - the performance of the model on the complete train set, defaults to False - :type perform_backtest: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest - :rtype: Tuple[pd.DataFrame, pd.DataFrame] - """ - self.logger.info("Performing a forecast model fit for "+self.model_type) - # Preparing the data: adding exogenous features - data = self.load_data() - X, y = self.prepare_data(data) - - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Training a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - - return prediction - - - - \ No newline at end of file From 6095a2c77d361c129db35bdac958b68a8a0ad655 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 18 Mar 2024 09:33:20 +0100 Subject: [PATCH 053/111] remove KNeighborsRegressor --- src/emhass/machine_learning_regressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index 95f624b3..732b4266 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -95,7 +95,7 @@ def __init__( # noqa: PLR0913 :type model_type: str :param regression_model: The model that will be used. For now only \ this options are possible: `LinearRegression`, `RidgeRegression`, \ - `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \ + `LassoRegression`, `RandomForestRegression`, \ `GradientBoostingRegression` and `AdaBoostRegression`. :type regression_model: str :param features: A list of features. \ From 8fd7fbf0d912fe2986034218d0ab4e7fbfb22433 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Wed, 17 Apr 2024 14:07:21 +0200 Subject: [PATCH 054/111] add tests --- src/emhass/command_line.py | 109 ++++--- tests/test_command_line_utils.py | 344 ++++++++++++++++++++--- tests/test_machine_learning_regressor.py | 113 ++++++++ 3 files changed, 488 insertions(+), 78 deletions(-) create mode 100644 tests/test_machine_learning_regressor.py diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index e6940518..bafb84f2 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -240,9 +240,9 @@ def set_input_data_dict( return False df_input_data = rh.df_final.copy() - elif set_type == "regressor-model-fit": + elif set_type == "regressor-model-fit" or set_type == "regressor-model-predict": - df_input_data_dayahead = None + df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None params = json.loads(params) days_list = None @@ -250,7 +250,13 @@ def set_input_data_dict( features = params["passed_data"]["features"] target = params["passed_data"]["target"] timestamp = params["passed_data"]["timestamp"] - filename_path = pathlib.Path(base_path) / csv_file + if get_data_from_file: + base_path = base_path + "/data" + filename_path = pathlib.Path(base_path) / csv_file + + else: + filename_path = pathlib.Path(base_path) / csv_file + if filename_path.is_file(): df_input_data = pd.read_csv(filename_path, parse_dates=True) @@ -266,13 +272,8 @@ def set_input_data_dict( if not set(required_columns).issubset(df_input_data.columns): logger.error("The cvs file does not contain the required columns.") raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}" + f"CSV file should contain the following columns: {', '.join(required_columns)}", ) - elif set_type == "regressor-model-predict": - df_input_data, df_input_data_dayahead = None, None - P_PV_forecast, P_load_forecast = None, None - days_list = None - params = json.loads(params) elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None @@ -280,7 +281,7 @@ def set_input_data_dict( days_list = None else: logger.error( - "The passed action argument and hence the set_type parameter for setup is not valid" + "The passed action argument and hence the set_type parameter for setup is not valid", ) df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None @@ -541,7 +542,7 @@ def forecast_model_predict( mlf = pickle.load(inp) else: logger.error( - "The ML forecaster file was not found, please run a model fit method before this predict method" + "The ML forecaster file was not found, please run a model fit method before this predict method", ) return # Make predictions @@ -629,7 +630,7 @@ def forecast_model_tune( mlf = pickle.load(inp) else: logger.error( - "The ML forecaster file was not found, please run a model fit method before this tune method" + "The ML forecaster file was not found, please run a model fit method before this tune method", ) return None, None # Tune the model @@ -643,7 +644,9 @@ def forecast_model_tune( def regressor_model_fit( - input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False + input_data_dict: dict, + logger: logging.Logger, + debug: Optional[bool] = False, ) -> None: """Perform a forecast model fit from training data retrieved from Home Assistant. @@ -662,9 +665,16 @@ def regressor_model_fit( timestamp = input_data_dict["params"]["passed_data"]["timestamp"] date_features = input_data_dict["params"]["passed_data"]["date_features"] root = input_data_dict["root"] + # The MLRegressor object mlr = MLRegressor( - data, model_type, regression_model, features, target, timestamp, logger + data, + model_type, + regression_model, + features, + target, + timestamp, + logger, ) # Fit the ML model mlr.fit(date_features=date_features) @@ -673,10 +683,14 @@ def regressor_model_fit( filename = model_type + "_mlr.pkl" with open(pathlib.Path(root) / filename, "wb") as outp: pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL) + return mlr def regressor_model_predict( - input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False + input_data_dict: dict, + logger: logging.Logger, + debug: Optional[bool] = False, + mlr: Optional[MLRegressor] = None, ) -> None: """Perform a prediction from csv file. @@ -697,7 +711,7 @@ def regressor_model_predict( mlr = pickle.load(inp) else: logger.error( - "The ML forecaster file was not found, please run a model fit method before this predict method" + "The ML forecaster file was not found, please run a model fit method before this predict method", ) return new_values = input_data_dict["params"]["passed_data"]["new_values"] @@ -715,14 +729,16 @@ def regressor_model_predict( ] # Publish prediction idx = 0 - input_data_dict["rh"].post_data( - prediction, - idx, - mlr_predict_entity_id, - mlr_predict_unit_of_measurement, - mlr_predict_friendly_name, - type_var="mlregressor", - ) + if not debug: + input_data_dict["rh"].post_data( + prediction, + idx, + mlr_predict_entity_id, + mlr_predict_unit_of_measurement, + mlr_predict_friendly_name, + type_var="mlregressor", + ) + return prediction def publish_data( @@ -813,7 +829,7 @@ def publish_data( if "P_deferrable{}".format(k) not in opt_res_latest.columns: logger.error( "P_deferrable{}".format(k) - + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution." + + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.", ) else: input_data_dict["rh"].post_data( @@ -830,7 +846,7 @@ def publish_data( if input_data_dict["opt"].optim_conf["set_use_battery"]: if "P_batt" not in opt_res_latest.columns: logger.error( - "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution." + "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.", ) else: custom_batt_forecast_id = params["passed_data"]["custom_batt_forecast_id"] @@ -886,7 +902,7 @@ def publish_data( if "optim_status" not in opt_res_latest: opt_res_latest["optim_status"] = "Optimal" logger.warning( - "no optim_status in opt_res_latest, run an optimization task first" + "no optim_status in opt_res_latest, run an optimization task first", ) input_data_dict["rh"].post_data( opt_res_latest["optim_status"], @@ -957,7 +973,9 @@ def main(): naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune", ) parser.add_argument( - "--config", type=str, help="Define path to the config.yaml file" + "--config", + type=str, + help="Define path to the config.yaml file", ) parser.add_argument( "--costfun", @@ -984,7 +1002,10 @@ def main(): help="Pass runtime optimization parameters as dictionnary", ) parser.add_argument( - "--debug", type=strtobool, default="False", help="Use True for testing purposes" + "--debug", + type=strtobool, + default="False", + help="Use True for testing purposes", ) args = parser.parse_args() # The path to the configuration files @@ -995,12 +1016,14 @@ def main(): # Additionnal argument try: parser.add_argument( - "--version", action="version", version="%(prog)s " + version("emhass") + "--version", + action="version", + version="%(prog)s " + version("emhass"), ) args = parser.parse_args() except Exception: logger.info( - "Version not found for emhass package. Or importlib exited with PackageNotFoundError." + "Version not found for emhass package. Or importlib exited with PackageNotFoundError.", ) # Setup parameters input_data_dict = set_input_data_dict( @@ -1040,7 +1063,25 @@ def main(): else: mlf = None df_pred_optim, mlf = forecast_model_tune( - input_data_dict, logger, debug=args.debug, mlf=mlf + input_data_dict, + logger, + debug=args.debug, + mlf=mlf, + ) + opt_res = None + elif args.action == "regressor-model-fit": + mlr = regressor_model_fit(input_data_dict, logger, debug=args.debug) + opt_res = None + elif args.action == "regressor-model-predict": + if args.debug: + mlr = regressor_model_fit(input_data_dict, logger, debug=args.debug) + else: + mlr = None + prediction = regressor_model_predict( + input_data_dict, + logger, + debug=args.debug, + mlr=mlr, ) opt_res = None elif args.action == "publish-data": @@ -1063,6 +1104,10 @@ def main(): return df_fit_pred, df_fit_pred_backtest, mlf elif args.action == "forecast-model-predict": return df_pred + elif args.action == "regressor-model-fit": + return mlr + elif args.action == "regressor-model-predict": + return prediction elif args.action == "forecast-model-tune": return df_pred_optim, mlf diff --git a/tests/test_command_line_utils.py b/tests/test_command_line_utils.py index d23aeb06..597b20e7 100644 --- a/tests/test_command_line_utils.py +++ b/tests/test_command_line_utils.py @@ -5,10 +5,21 @@ from unittest.mock import patch import pandas as pd import pathlib, json, yaml, copy +import numpy as np from emhass.command_line import set_input_data_dict -from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim -from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune +from emhass.command_line import ( + perfect_forecast_optim, + dayahead_forecast_optim, + naive_mpc_optim, +) +from emhass.command_line import ( + forecast_model_fit, + forecast_model_predict, + forecast_model_tune, + regressor_model_fit, + regressor_model_predict, +) from emhass.command_line import publish_data from emhass.command_line import main from emhass import utils @@ -316,46 +327,183 @@ def test_forecast_model_fit_predict_tune(self): self.assertIsInstance(df_pred, pd.Series) self.assertTrue(df_pred.isnull().sum().sum() == 0) # Test the tune method - df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=True, mlf=mlf) + df_pred_optim, mlf = forecast_model_tune( + input_data_dict, logger, debug=True, mlf=mlf + ) self.assertIsInstance(df_pred_optim, pd.DataFrame) self.assertTrue(mlf.is_tuned == True) - # Test ijection_dict for tune method on webui + # Test injection_dict for tune method on webui injection_dict = utils.get_injection_dict_forecast_model_tune(df_fit_pred, mlf) self.assertIsInstance(injection_dict, dict) - self.assertIsInstance(injection_dict['figure_0'], str) - - @patch('sys.argv', ['main', '--action', 'test', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), - '--debug', 'True']) + self.assertIsInstance(injection_dict["figure_0"], str) + + def test_regressor_model_fit_predict(self): + config_path = pathlib.Path(root + "/config_emhass.yaml") + base_path = str(config_path.parent) + costfun = "profit" + action = "regressor-model-fit" # fit and predict methods + params = TestCommandLineUtils.get_test_params() + runtimeparams = { + "csv_file": "prediction.csv", + "features": ["dd", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_dd", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "mlr_predict_entity_id": "sensor.predicted_hours_test", + "mlr_predict_unit_of_measurement": "h", + "mlr_predict_friendly_name": "Predicted hours", + "new_values": [12.79, 4.766, 1, 2], + } + runtimeparams_json = json.dumps(runtimeparams) + params_json = json.dumps(params) + input_data_dict = set_input_data_dict( + config_path, + base_path, + costfun, + params_json, + runtimeparams_json, + action, + logger, + get_data_from_file=True, + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd", + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["regression_model"] + == "AdaBoostRegression", + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["csv_file"] == "prediction.csv", + ) + mlr = regressor_model_fit(input_data_dict, logger, debug=True) + + # def test_regressor_model_predict(self): + config_path = pathlib.Path(root + "/config_emhass.yaml") + base_path = str(config_path.parent) # + "/data" + costfun = "profit" + action = "regressor-model-predict" # predict methods + params = TestCommandLineUtils.get_test_params() + runtimeparams = { + "csv_file": "prediction.csv", + "features": ["dd", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_dd", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "mlr_predict_entity_id": "sensor.predicted_hours_test", + "mlr_predict_unit_of_measurement": "h", + "mlr_predict_friendly_name": "Predicted hours", + "new_values": [12.79, 4.766, 1, 2], + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params_json = json.dumps(params) + + input_data_dict = set_input_data_dict( + config_path, + base_path, + costfun, + params_json, + runtimeparams_json, + action, + logger, + get_data_from_file=True, + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd", + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["mlr_predict_friendly_name"] + == "Predicted hours", + ) + + regressor_model_predict(input_data_dict, logger, debug=True, mlr=mlr) + + @patch( + "sys.argv", + [ + "main", + "--action", + "test", + "--config", + str(pathlib.Path(root + "/config_emhass.yaml")), + "--debug", + "True", + ], + ) def test_main_wrong_action(self): opt_res = main() self.assertEqual(opt_res, None) - - @patch('sys.argv', ['main', '--action', 'perfect-optim', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), - '--debug', 'True']) + + @patch( + "sys.argv", + [ + "main", + "--action", + "perfect-optim", + "--config", + str(pathlib.Path(root + "/config_emhass.yaml")), + "--debug", + "True", + ], + ) def test_main_perfect_forecast_optim(self): opt_res = main() self.assertIsInstance(opt_res, pd.DataFrame) - self.assertTrue(opt_res.isnull().sum().sum()==0) + self.assertTrue(opt_res.isnull().sum().sum() == 0) self.assertIsInstance(opt_res.index, pd.core.indexes.datetimes.DatetimeIndex) - self.assertIsInstance(opt_res.index.dtype, pd.core.dtypes.dtypes.DatetimeTZDtype) - + self.assertIsInstance( + opt_res.index.dtype, + pd.core.dtypes.dtypes.DatetimeTZDtype, + ) + def test_main_dayahead_forecast_optim(self): - with patch('sys.argv', ['main', '--action', 'dayahead-optim', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), - '--params', self.params_json, '--runtimeparams', self.runtimeparams_json, - '--debug', 'True']): + with patch( + "sys.argv", + [ + "main", + "--action", + "dayahead-optim", + "--config", + str(pathlib.Path(root + "/config_emhass.yaml")), + "--params", + self.params_json, + "--runtimeparams", + self.runtimeparams_json, + "--debug", + "True", + ], + ): opt_res = main() self.assertIsInstance(opt_res, pd.DataFrame) - self.assertTrue(opt_res.isnull().sum().sum()==0) - + self.assertTrue(opt_res.isnull().sum().sum() == 0) + def test_main_naive_mpc_optim(self): - with patch('sys.argv', ['main', '--action', 'naive-mpc-optim', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), - '--params', self.params_json, '--runtimeparams', self.runtimeparams_json, - '--debug', 'True']): + with patch( + "sys.argv", + [ + "main", + "--action", + "naive-mpc-optim", + "--config", + str(pathlib.Path(root + "/config_emhass.yaml")), + "--params", + self.params_json, + "--runtimeparams", + self.runtimeparams_json, + "--debug", + "True", + ], + ): opt_res = main() self.assertIsInstance(opt_res, pd.DataFrame) - self.assertTrue(opt_res.isnull().sum().sum()==0) - self.assertTrue(len(opt_res)==10) - + self.assertTrue(opt_res.isnull().sum().sum() == 0) + self.assertTrue(len(opt_res) == 10) + def test_main_forecast_model_fit(self): params = copy.deepcopy(json.loads(self.params_json)) runtimeparams = { @@ -386,20 +534,33 @@ def test_main_forecast_model_predict(self): "var_model": "sensor.power_load_no_var_loads", "sklearn_model": "KNeighborsRegressor", "num_lags": 48, - "split_date_delta": '48h', - "perform_backtest": False + "split_date_delta": "48h", + "perform_backtest": False, } runtimeparams_json = json.dumps(runtimeparams) - params['passed_data'] = runtimeparams - params['optim_conf']['load_forecast_method'] = 'skforecast' + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" params_json = json.dumps(params) - with patch('sys.argv', ['main', '--action', 'forecast-model-predict', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), - '--params', params_json, '--runtimeparams', runtimeparams_json, - '--debug', 'True']): + with patch( + "sys.argv", + [ + "main", + "--action", + "forecast-model-predict", + "--config", + str(pathlib.Path(root + "/config_emhass.yaml")), + "--params", + params_json, + "--runtimeparams", + runtimeparams_json, + "--debug", + "True", + ], + ): df_pred = main() self.assertIsInstance(df_pred, pd.Series) self.assertTrue(df_pred.isnull().sum().sum() == 0) - + def test_main_forecast_model_tune(self): params = copy.deepcopy(json.loads(self.params_json)) runtimeparams = { @@ -408,27 +569,118 @@ def test_main_forecast_model_tune(self): "var_model": "sensor.power_load_no_var_loads", "sklearn_model": "KNeighborsRegressor", "num_lags": 48, - "split_date_delta": '48h', - "perform_backtest": False + "split_date_delta": "48h", + "perform_backtest": False, } runtimeparams_json = json.dumps(runtimeparams) - params['passed_data'] = runtimeparams - params['optim_conf']['load_forecast_method'] = 'skforecast' + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" params_json = json.dumps(params) - with patch('sys.argv', ['main', '--action', 'forecast-model-tune', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), - '--params', params_json, '--runtimeparams', runtimeparams_json, - '--debug', 'True']): + with patch( + "sys.argv", + [ + "main", + "--action", + "forecast-model-tune", + "--config", + str(pathlib.Path(root + "/config_emhass.yaml")), + "--params", + params_json, + "--runtimeparams", + runtimeparams_json, + "--debug", + "True", + ], + ): df_pred_optim, mlf = main() self.assertIsInstance(df_pred_optim, pd.DataFrame) self.assertTrue(mlf.is_tuned == True) - - @patch('sys.argv', ['main', '--action', 'publish-data', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), - '--debug', 'True']) + + def test_main_regressor_model_fit(self): + params = copy.deepcopy(json.loads(self.params_json)) + runtimeparams = { + "csv_file": "prediction.csv", + "features": ["dd", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_dd", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params_json = json.dumps(params) + with patch( + "sys.argv", + [ + "main", + "--action", + "regressor-model-fit", + "--config", + str(pathlib.Path(root + "/config_emhass.yaml")), + "--params", + params_json, + "--runtimeparams", + runtimeparams_json, + "--debug", + "True", + ], + ): + mlr = main() + + def test_main_regressor_model_predict(self): + params = copy.deepcopy(json.loads(self.params_json)) + runtimeparams = { + "csv_file": "prediction.csv", + "features": ["dd", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_dd", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "new_values": [12.79, 4.766, 1, 2], + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" + params_json = json.dumps(params) + with patch( + "sys.argv", + [ + "main", + "--action", + "regressor-model-predict", + "--config", + str(pathlib.Path(root + "/config_emhass.yaml")), + "--params", + params_json, + "--runtimeparams", + runtimeparams_json, + "--debug", + "True", + ], + ): + prediction = main() + self.assertIsInstance(prediction, np.ndarray) + + @patch( + "sys.argv", + [ + "main", + "--action", + "publish-data", + "--config", + str(pathlib.Path(root + "/config_emhass.yaml")), + "--debug", + "True", + ], + ) def test_main_publish_data(self): opt_res = main() - self.assertTrue(opt_res==None) - -if __name__ == '__main__': + self.assertTrue(opt_res == None) + + +if __name__ == "__main__": unittest.main() ch.close() logger.removeHandler(ch) diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py new file mode 100644 index 00000000..88137b0d --- /dev/null +++ b/tests/test_machine_learning_regressor.py @@ -0,0 +1,113 @@ +"""Machine learning regressor test module.""" + +import copy +import json +import pathlib +import unittest + +import numpy as np +import pandas as pd +from sklearn.pipeline import Pipeline +import yaml +from emhass import utils +from emhass.command_line import set_input_data_dict +from emhass.machine_learning_regressor import MLRegressor +from sklearn.ensemble import ( + AdaBoostRegressor, +) + +# the root folder +root = str(utils.get_root(__file__, num_parent=2)) +# create logger +logger, ch = utils.get_logger(__name__, root, save_to_file=False) + + +class TestMLRegressor(unittest.TestCase): + @staticmethod + def get_test_params(): + with open(root + "/config_emhass.yaml", "r") as file: + params = yaml.load(file, Loader=yaml.FullLoader) + params.update( + { + "params_secrets": { + "hass_url": "http://supervisor/core/api", + "long_lived_token": "${SUPERVISOR_TOKEN}", + "time_zone": "Europe/Paris", + "lat": 45.83, + "lon": 6.86, + "alt": 8000.0, + }, + }, + ) + return params + + def setUp(self): + params = TestMLRegressor.get_test_params() + params_json = json.dumps(params) + config_path = pathlib.Path(root + "/config_emhass.yaml") + base_path = str(config_path.parent) # + "/data" + costfun = "profit" + action = "regressor-model-fit" # fit and predict methods + params = copy.deepcopy(json.loads(params_json)) + runtimeparams = { + "csv_file": "prediction.csv", + "features": ["dd", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_dd", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "new_values": [12.79, 4.766, 1, 2], + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" + params_json = json.dumps(params) + self.input_data_dict = set_input_data_dict( + config_path, + base_path, + costfun, + params_json, + runtimeparams_json, + action, + logger, + get_data_from_file=True, + ) + data = copy.deepcopy(self.input_data_dict["df_input_data"]) + self.assertIsInstance(data, pd.DataFrame) + self.csv_file = self.input_data_dict["params"]["passed_data"]["csv_file"] + features = self.input_data_dict["params"]["passed_data"]["features"] + target = self.input_data_dict["params"]["passed_data"]["target"] + regression_model = self.input_data_dict["params"]["passed_data"][ + "regression_model" + ] + model_type = self.input_data_dict["params"]["passed_data"]["model_type"] + timestamp = self.input_data_dict["params"]["passed_data"]["timestamp"] + self.date_features = self.input_data_dict["params"]["passed_data"][ + "date_features" + ] + self.new_values = self.input_data_dict["params"]["passed_data"]["new_values"] + self.mlr = MLRegressor( + data, + model_type, + regression_model, + features, + target, + timestamp, + logger, + ) + + def test_fit(self): + self.mlr.fit(self.date_features) + self.assertIsInstance(self.mlr.model, Pipeline) + + def test_predict(self): + self.mlr.fit(self.date_features) + predictions = self.mlr.predict(self.new_values) + self.assertIsInstance(predictions, np.ndarray) + + +if __name__ == "__main__": + unittest.main() + ch.close() + logger.removeHandler(ch) From c51d54049f1569a0b8547f8e04af3329f82a18d7 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Wed, 17 Apr 2024 15:45:58 +0200 Subject: [PATCH 055/111] Rename paragrams --- tests/test_command_line_utils.py | 33 +++++++++++++----------- tests/test_machine_learning_regressor.py | 11 +++----- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/test_command_line_utils.py b/tests/test_command_line_utils.py index 597b20e7..eaad3adb 100644 --- a/tests/test_command_line_utils.py +++ b/tests/test_command_line_utils.py @@ -344,11 +344,11 @@ def test_regressor_model_fit_predict(self): action = "regressor-model-fit" # fit and predict methods params = TestCommandLineUtils.get_test_params() runtimeparams = { - "csv_file": "prediction.csv", - "features": ["dd", "solar"], + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], "target": "hour", "regression_model": "AdaBoostRegression", - "model_type": "heating_dd", + "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "mlr_predict_entity_id": "sensor.predicted_hours_test", @@ -369,14 +369,16 @@ def test_regressor_model_fit_predict(self): get_data_from_file=True, ) self.assertTrue( - input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd", + input_data_dict["params"]["passed_data"]["model_type"] + == "heating_hours_degreeday", ) self.assertTrue( input_data_dict["params"]["passed_data"]["regression_model"] == "AdaBoostRegression", ) self.assertTrue( - input_data_dict["params"]["passed_data"]["csv_file"] == "prediction.csv", + input_data_dict["params"]["passed_data"]["csv_file"] + == "heating_prediction.csv", ) mlr = regressor_model_fit(input_data_dict, logger, debug=True) @@ -387,11 +389,11 @@ def test_regressor_model_fit_predict(self): action = "regressor-model-predict" # predict methods params = TestCommandLineUtils.get_test_params() runtimeparams = { - "csv_file": "prediction.csv", - "features": ["dd", "solar"], + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], "target": "hour", "regression_model": "AdaBoostRegression", - "model_type": "heating_dd", + "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "mlr_predict_entity_id": "sensor.predicted_hours_test", @@ -414,7 +416,8 @@ def test_regressor_model_fit_predict(self): get_data_from_file=True, ) self.assertTrue( - input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd", + input_data_dict["params"]["passed_data"]["model_type"] + == "heating_hours_degreeday", ) self.assertTrue( input_data_dict["params"]["passed_data"]["mlr_predict_friendly_name"] @@ -599,11 +602,11 @@ def test_main_forecast_model_tune(self): def test_main_regressor_model_fit(self): params = copy.deepcopy(json.loads(self.params_json)) runtimeparams = { - "csv_file": "prediction.csv", - "features": ["dd", "solar"], + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], "target": "hour", "regression_model": "AdaBoostRegression", - "model_type": "heating_dd", + "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], } @@ -631,11 +634,11 @@ def test_main_regressor_model_fit(self): def test_main_regressor_model_predict(self): params = copy.deepcopy(json.loads(self.params_json)) runtimeparams = { - "csv_file": "prediction.csv", - "features": ["dd", "solar"], + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], "target": "hour", "regression_model": "AdaBoostRegression", - "model_type": "heating_dd", + "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2], diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py index 88137b0d..74702b6f 100644 --- a/tests/test_machine_learning_regressor.py +++ b/tests/test_machine_learning_regressor.py @@ -7,14 +7,11 @@ import numpy as np import pandas as pd -from sklearn.pipeline import Pipeline import yaml from emhass import utils from emhass.command_line import set_input_data_dict from emhass.machine_learning_regressor import MLRegressor -from sklearn.ensemble import ( - AdaBoostRegressor, -) +from sklearn.pipeline import Pipeline # the root folder root = str(utils.get_root(__file__, num_parent=2)) @@ -50,11 +47,11 @@ def setUp(self): action = "regressor-model-fit" # fit and predict methods params = copy.deepcopy(json.loads(params_json)) runtimeparams = { - "csv_file": "prediction.csv", - "features": ["dd", "solar"], + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], "target": "hour", "regression_model": "AdaBoostRegression", - "model_type": "heating_dd", + "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2], From 1590404ecab146805a9d2103b5d4cf32cbec2783 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Thu, 18 Apr 2024 11:40:54 +0200 Subject: [PATCH 056/111] Ready for review (I think) --- docs/mlregressor.md | 106 +++++++++++++++++++---- src/emhass/command_line.py | 53 +++++++----- src/emhass/machine_learning_regressor.py | 3 +- src/emhass/utils.py | 15 +++- 4 files changed, 133 insertions(+), 44 deletions(-) diff --git a/docs/mlregressor.md b/docs/mlregressor.md index 7206af99..dee5fccd 100644 --- a/docs/mlregressor.md +++ b/docs/mlregressor.md @@ -8,6 +8,7 @@ This API provides two main methods: - predict: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point. + ## A basic model fit To train a model use the `regressor-model-fit` end point. @@ -45,28 +46,38 @@ A correct `curl` call to launch a model fit can look like this: ``` curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-fit ``` - -After applying the `curl` command to fit the model the following information is logged by EMHASS: - - 2023-02-20 22:05:22,658 - __main__ - INFO - Training a LinearRegression model - 2023-02-20 22:05:23,882 - __main__ - INFO - Elapsed time: 1.2236599922180176 - 2023-02-20 22:05:24,612 - __main__ - INFO - Prediction R2 score: 0.2654560762747957 - -## The predict method - -To obtain a prediction using a previously trained model use the `regressor-model-predict` end point. +A Home Assistant `rest_command` can look like this: ``` -curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-predict +fit_heating_hours: + url: http://127.0.0.1:5000/action/regressor-model-fit + method: POST + content_type: "application/json" + payload: >- + { + "csv_file": "heating_prediction.csv", + "features":["degreeday", "solar"], + "target": "hours", + "regression_model": "RandomForestRegression", + "model_type": "heating_hours_degreeday", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"] + } ``` +After fitting the model the following information is logged by EMHASS: -If needed pass the correct `model_type` like this: + 2024-04-17 12:41:50,019 - web_server - INFO - Passed runtime parameters: {'csv_file': 'heating_prediction.csv', 'features': ['degreeday', 'solar'], 'target': 'heating_hours', 'regression_model': 'RandomForestRegression', 'model_type': 'heating_hours_degreeday', 'timestamp': 'timestamp', 'date_features': ['month', 'day_of_week']} + 2024-04-17 12:41:50,020 - web_server - INFO - >> Setting input data dict + 2024-04-17 12:41:50,021 - web_server - INFO - Setting up needed data + 2024-04-17 12:41:50,048 - web_server - INFO - >> Performing a machine learning regressor fit... + 2024-04-17 12:41:50,049 - web_server - INFO - Performing a MLRegressor fit for heating_hours_degreeday + 2024-04-17 12:41:50,064 - web_server - INFO - Training a RandomForestRegression model + 2024-04-17 12:41:57,852 - web_server - INFO - Elapsed time for model fit: 7.78800106048584 + 2024-04-17 12:41:57,862 - web_server - INFO - Prediction R2 score of fitted model on test data: -0.5667567505914477 -``` -curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "load_forecast"}' http://localhost:5000/action/regressor-model-predict -``` +## The predict method -It is possible to publish the predict method results to a Home Assistant sensor. +To obtain a prediction using a previously trained model use the `regressor-model-predict` end point. The list of parameters needed to set the data publish task is: @@ -89,3 +100,66 @@ runtimeparams = { "model_type": "heating_hours_degreeday" } ``` + +Pass the correct `model_type` like this: + +``` +curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "heating_hours_degreeday"}' http://localhost:5000/action/regressor-model-predict +``` + +A Home Assistant `rest_command` can look like this: + +``` +predict_heating_hours: + url: http://localhost:5001/action/regressor-model-predict + method: POST + content_type: "application/json" + payload: >- + { + "mlr_predict_entity_id": "sensor.predicted_hours", + "mlr_predict_unit_of_measurement": "h", + "mlr_predict_friendly_name": "Predicted hours", + "new_values": [8.2, 7.23, 2, 6], + "model_type": "heating_hours_degreeday" + } +``` +After predicting the model the following information is logged by EMHASS: + +``` +2024-04-17 14:25:40,695 - web_server - INFO - Passed runtime parameters: {'mlr_predict_entity_id': 'sensor.predicted_hours', 'mlr_predict_unit_of_measurement': 'h', 'mlr_predict_friendly_name': 'Predicted hours', 'new_values': [8.2, 7.23, 2, 6], 'model_type': 'heating_hours_degreeday'} +2024-04-17 14:25:40,696 - web_server - INFO - >> Setting input data dict +2024-04-17 14:25:40,696 - web_server - INFO - Setting up needed data +2024-04-17 14:25:40,700 - web_server - INFO - >> Performing a machine learning regressor predict... +2024-04-17 14:25:40,715 - web_server - INFO - Performing a prediction for heating_hours_degreeday +2024-04-17 14:25:40,750 - web_server - INFO - Successfully posted to sensor.predicted_hours = 3.716600000000001 +``` +The predict method will publish the result to a Home Assistant sensor. + + +## How to store data in a csv file from Home Assistant +Notify to a file +``` +notify: + - platform: file + name: heating_hours_prediction + timestamp: false + filename: /share/heating_prediction.csv +``` +Then you need an automation to notify to this file +``` +alias: "Heating csv" +id: 157b1d57-73d9-4f39-82c6-13ce0cf42 +trigger: + - platform: time + at: "23:59:32" +action: + - service: notify.heating_hours_prediction + data: + message: > + {% set degreeday = states('sensor.degree_day_daily') |float %} + {% set heating_hours = states('sensor.heating_hours_today') |float | round(2) %} + {% set solar = states('sensor.solar_daily') |float | round(3) %} + {% set time = now() %} + + {{time}},{{degreeday}},{{solar}},{{heating_hours}} +``` \ No newline at end of file diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 313cf885..feee327f 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -246,34 +246,39 @@ def set_input_data_dict( P_PV_forecast, P_load_forecast = None, None params = json.loads(params) days_list = None - csv_file = params["passed_data"]["csv_file"] - features = params["passed_data"]["features"] - target = params["passed_data"]["target"] - timestamp = params["passed_data"]["timestamp"] - if get_data_from_file: - base_path = base_path + "/data" - filename_path = pathlib.Path(base_path) / csv_file + csv_file = params["passed_data"].get("csv_file", None) + if "features" in params["passed_data"]: + features = params["passed_data"]["features"] + if "target" in params["passed_data"]: + target = params["passed_data"]["target"] + if "timestamp" in params["passed_data"]: + timestamp = params["passed_data"]["timestamp"] + if csv_file: + if get_data_from_file: + base_path = base_path + "/data" + filename_path = pathlib.Path(base_path) / csv_file - else: - filename_path = pathlib.Path(base_path) / csv_file + else: + filename_path = pathlib.Path(base_path) / csv_file - if filename_path.is_file(): - df_input_data = pd.read_csv(filename_path, parse_dates=True) + if filename_path.is_file(): + df_input_data = pd.read_csv(filename_path, parse_dates=True) - else: - logger.error("The cvs file was not found.") - raise ValueError("The CSV file " + csv_file + " was not found.") - required_columns = [] - required_columns.extend(features) - required_columns.append(target) - if timestamp is not None: - required_columns.append(timestamp) + else: + logger.error("The cvs file was not found.") + raise ValueError("The CSV file " + csv_file + " was not found.") + required_columns = [] + required_columns.extend(features) + required_columns.append(target) + if timestamp is not None: + required_columns.append(timestamp) - if not set(required_columns).issubset(df_input_data.columns): - logger.error("The cvs file does not contain the required columns.") - raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}", - ) + if not set(required_columns).issubset(df_input_data.columns): + logger.error("The cvs file does not contain the required columns.") + msg = f"CSV file should contain the following columns: {', '.join(required_columns)}" + raise ValueError( + msg, + ) elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index 732b4266..f0d3c532 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -190,9 +190,10 @@ def get_regression_model(self: MLRegressor) -> tuple[str, str]: param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] else: self.logger.error( - "Passed sklearn model %s is not valid", + "Passed model %s is not valid", self.regression_model, ) + return None return base_model, param_grid def fit(self: MLRegressor, date_features: list | None = None) -> None: diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 836f1085..2517f4b2 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -228,12 +228,12 @@ def treat_runtimeparams( params["passed_data"]["csv_file"] = csv_file params["passed_data"]["features"] = features params["passed_data"]["target"] = target - if "timestamp" not in runtimeparams.keys(): + if "timestamp" not in runtimeparams: params["passed_data"]["timestamp"] = None else: timestamp = runtimeparams["timestamp"] params["passed_data"]["timestamp"] = timestamp - if "date_features" not in runtimeparams.keys(): + if "date_features" not in runtimeparams: params["passed_data"]["date_features"] = [] else: date_features = runtimeparams["date_features"] @@ -242,6 +242,15 @@ def treat_runtimeparams( if set_type == "regressor-model-predict": new_values = runtimeparams["new_values"] params["passed_data"]["new_values"] = new_values + if "csv_file" in runtimeparams: + csv_file = runtimeparams["csv_file"] + params["passed_data"]["csv_file"] = csv_file + if "features" in runtimeparams: + features = runtimeparams["features"] + params["passed_data"]["features"] = features + if "target" in runtimeparams: + target = runtimeparams["target"] + params["passed_data"]["target"] = target # Treating special data passed for MPC control case if set_type == "naive-mpc-optim": @@ -335,7 +344,7 @@ def treat_runtimeparams( sklearn_model = runtimeparams["sklearn_model"] params["passed_data"]["sklearn_model"] = sklearn_model if "regression_model" not in runtimeparams.keys(): - regression_model = "LinearRegression" + regression_model = "AdaBoostRegression" else: regression_model = runtimeparams["regression_model"] params["passed_data"]["regression_model"] = regression_model From f8b43aaf685fe6bc6f4d518d9f94416d22d96a25 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Thu, 18 Apr 2024 14:11:23 +0200 Subject: [PATCH 057/111] remove *.csv from .gitignore to upload heating_prediction.csv --- .gitignore | 2 +- data/heating_prediction.csv | 130 ++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 data/heating_prediction.csv diff --git a/.gitignore b/.gitignore index 581080c8..604a82a1 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,7 @@ secrets_emhass.yaml .vscode/launch.json .vscode/settings.json .vscode/tasks.json -*.csv +# *.csv *.html *.pkl data/actionLogs.txt diff --git a/data/heating_prediction.csv b/data/heating_prediction.csv new file mode 100644 index 00000000..f50a8a49 --- /dev/null +++ b/data/heating_prediction.csv @@ -0,0 +1,130 @@ +timestamp,degreeday,solar,hour +2023-11-10 23:59:32.458039+01:00,12.23,3.982,2.87 +2023-11-11 23:59:32.459778+01:00,12.94,13.723,3.14 +2023-11-12 23:59:32.462220+01:00,14.45,4.925,3.5 +2023-11-13 23:59:32.462167+01:00,8.49,2.138,3.19 +2023-11-14 23:59:32.338942+01:00,8.61,2.444,2.91 +2023-11-15 23:59:32.195198+01:00,9.83,10.685,2.88 +2023-11-16 23:59:32.501044+01:00,12.8,1.955,1.28 +2023-11-17 23:59:32.316366+01:00,13.35,8.742,2.97 +2023-11-18 23:59:32.082785+01:00,11.84,0.849,3.42 +2023-11-19 23:59:32.077198+01:00,7.3,10.85,1.9 +2023-11-20 23:59:32.431964+01:00,9.91,6.395,2.48 +2023-11-21 23:59:32.295705+01:00,11.44,2.678,2.91 +2023-11-22 23:59:32.377740+01:00,16.14,2.994,2.96 +2023-11-23 23:59:32.385890+01:00,9.31,5.346,2.91 +2023-11-24 23:59:32.376194+01:00,12.96,8.61,2.9 +2023-11-25 23:59:32.373666+01:00,14.91,12.31,3.47 +2023-11-26 23:59:32.373647+01:00,14.79,2.589,3.69 +2023-11-27 23:59:32.379920+01:00,14.92,0.322,6.05 +2023-11-28 23:59:32.213947+01:00,18.59,20.342,2.94 +2023-11-29 23:59:32.217384+01:00,19.05,5.393,5.41 +2023-11-30 23:59:32.222641+01:00,21.27,1.899,6.77 +2023-12-01 23:59:32.224533+01:00,21.3,1.233,5.75 +2023-12-02 23:59:32.107119+01:00,21.97,14.653,2.96 +2023-12-03 23:59:32.107436+01:00,20.61,4.766,8.89 +2023-12-04 23:59:32.116642+01:00,18.36,1.349,6.73 +2023-12-05 23:59:32.191254+01:00,16.93,0.869,6.17 +2023-12-06 23:59:32.176803+01:00,16.8,5.413,5.38 +2023-12-07 23:59:32.251031+01:00,17.67,8.089,5.98 +2023-12-08 23:59:32.255888+01:00,14.37,1.203,5.63 +2023-12-09 23:59:32.109040+01:00,11.94,0.814,5.08 +2023-12-10 23:59:32.103738+01:00,9.72,6.051,3.42 +2023-12-11 23:59:32.497717+01:00,9.83,1.459,3.87 +2023-12-12 23:59:32.502503+01:00,11.18,4.176,3.31 +2023-12-13 23:59:32.504794+01:00,11.09,2.91,3.1 +2023-12-14 23:59:32.177489+01:00,13.88,7.53,2.89 +2023-12-15 23:59:32.186292+01:00,12.18,2.129,5.68 +2023-12-16 23:59:32.176812+01:00,11.75,1.641,3.46 +2023-12-17 23:59:32.119874+01:00,12.18,14.868,3.46 +2023-12-18 23:59:32.120168+01:00,14.75,1.283,3.12 +2023-12-19 23:59:32.120101+01:00,12.82,0.09,5.07 +2023-12-20 23:59:32.249731+01:00,12.8,3.803,3.6 +2023-12-21 23:59:32.249135+01:00,8.73,2.096,3.55 +2023-12-22 23:59:32.385164+01:00,9.12,1.278,0.85 +2023-12-23 23:59:32.382910+01:00,8.99,1.848,0.0 +2023-12-24 23:59:32.382457+01:00,8.04,0.165,7.42 +2023-12-25 23:59:32.303520+01:00,7.56,1.028,2.93 +2023-12-26 23:59:32.105788+01:00,10.55,9.274,2.92 +2023-12-27 23:59:32.183107+01:00,11.78,2.026,3.39 +2023-12-28 23:59:32.183405+01:00,8.91,3.68,3.19 +2023-12-29 23:59:32.399740+01:00,9.35,2.464,2.95 +2023-12-30 23:59:32.091110+01:00,11.07,7.948,3.44 +2023-12-31 23:59:32.257530+01:00,10.51,3.5,3.48 +2024-01-01 23:59:32.106161+01:00,12.75,4.046,3.08 +2024-01-02 23:59:32.103187+01:00,8.81,0.562,4.46 +2024-01-03 23:59:32.429947+01:00,10.03,2.184,3.26 +2024-01-04 23:59:32.436773+01:00,11.22,5.662,2.97 +2024-01-05 23:59:32.165969+01:00,12.42,1.199,3.6 +2024-01-06 23:59:32.110208+01:00,15.35,0.295,4.32 +2024-01-07 23:59:32.147775+01:00,19.88,0.896,6.19 +2024-01-08 23:59:32.242815+01:00,22.74,6.468,5.82 +2024-01-09 23:59:32.201342+01:00,24.38,21.307,6.92 +2024-01-10 23:59:32.411136+01:00,24.84,18.89,1.53 +2024-01-11 23:59:32.399433+01:00,23.57,19.27,3.05 +2024-01-12 23:59:32.467622+01:00,18.22,1.977,13.98 +2024-01-13 23:59:32.077428+01:00,17.9,0.472,6.93 +2024-01-14 23:59:32.127844+01:00,19.65,1.346,6.95 +2024-01-15 23:59:32.125062+01:00,19.49,4.35,7.82 +2024-01-16 23:59:32.280474+01:00,21.21,9.238,5.7 +2024-01-17 23:59:32.283951+01:00,23.17,1.193,7.37 +2024-01-18 23:59:32.361241+01:00,21.61,17.307,6.67 +2024-01-19 23:59:32.341654+01:00,22.06,21.004,6.24 +2024-01-20 23:59:32.359151+01:00,21.95,12.912,6.43 +2024-01-21 23:59:32.126221+01:00,17.38,3.28,7.45 +2024-01-22 23:59:32.126346+01:00,9.47,7.645,6.1 +2024-01-23 23:59:32.417727+01:00,11.87,7.689,4.76 +2024-01-24 23:59:32.420933+01:00,8.15,10.052,3.62 +2024-01-25 23:59:32.419138+01:00,12.38,3.785,3.98 +2024-01-26 23:59:32.422066+01:00,11.4,11.94,3.1 +2024-01-27 23:59:32.176538+01:00,17.96,19.741,3.45 +2024-01-28 23:59:32.168328+01:00,16.72,20.366,4.85 +2024-01-29 23:59:32.173916+01:00,13.11,16.972,4.51 +2024-01-30 23:59:32.503034+01:00,11.21,4.013,3.99 +2024-01-31 23:59:32.179265+01:00,12.79,4.766,3.73 +2024-02-01 23:59:32.487147+01:00,12.74,23.924,2.98 +2024-02-02 23:59:32.570084+01:00,13.0,2.98,5.04 +2024-02-03 23:59:32.484878+01:00,9.26,1.413,3.48 +2024-02-04 23:59:32.472168+01:00,8.35,4.306,3.47 +2024-02-05 23:59:32.409856+01:00,9.78,5.704,0.0 +2024-02-06 23:59:32.439147+01:00,9.15,2.431,6.56 +2024-02-07 23:59:32.235231+01:00,14.42,3.839,3.07 +2024-02-08 23:59:32.441543+01:00,13.9,1.412,5.94 +2024-02-09 23:59:32.443230+01:00,8.2,7.246,2.96 +2024-02-10 23:59:32.504326+01:00,8.37,8.567,3.48 +2024-02-11 23:59:32.452959+01:00,10.44,5.304,0.0 +2024-02-12 23:59:32.450999+01:00,12.65,16.004,3.42 +2024-02-13 23:59:32.343162+01:00,13.84,19.809,3.16 +2024-02-14 23:59:32.339408+01:00,8.48,1.98,4.52 +2024-02-15 23:59:32.339971+01:00,6.13,9.952,2.98 +2024-02-16 23:59:32.455273+01:00,7.66,3.675,3.06 +2024-02-17 23:59:32.097937+01:00,8.56,12.269,3.48 +2024-02-18 23:59:32.126377+01:00,9.59,2.205,3.04 +2024-02-19 23:59:32.421243+01:00,10.22,3.731,2.97 +2024-02-20 23:59:32.421985+01:00,11.61,13.775,0.0 +2024-02-21 23:59:32.371300+01:00,10.52,4.856,3.02 +2024-02-22 23:59:32.373153+01:00,9.53,4.256,3.48 +2024-02-23 23:59:32.372545+01:00,13.66,8.743,4.09 +2024-02-24 23:59:32.197044+01:00,14.44,7.842,4.3 +2024-02-25 23:59:32.196386+01:00,12.41,16.235,3.48 +2024-02-26 23:59:32.409648+01:00,14.63,2.096,5.05 +2024-02-27 23:59:32.373347+01:00,14.5,29.437,3.21 +2024-02-28 23:59:32.407538+01:00,15.38,6.475,4.88 +2024-02-29 23:59:32.194724+01:00,11.83,3.238,4.68 +2024-03-01 23:59:32.084520+01:00,10.56,14.352,3.8 +2024-03-02 23:59:32.066434+01:00,9.94,25.356,3.49 +2024-03-03 23:59:32.270878+01:00,8.9,10.577,3.19 +2024-03-04 23:59:32.274918+01:00,10.67,28.096,2.08 +2024-03-05 23:59:32.315023+01:00,12.19,10.553,2.95 +2024-03-06 23:59:32.441001+01:00,11.38,32.597,2.91 +2024-03-07 23:59:32.440044+01:00,12.39,28.856,2.96 +2024-03-08 23:59:32.228265+01:00,12.01,37.395,2.96 +2024-03-09 23:59:32.081874+01:00,8.72,17.66,3.5 +2024-03-10 23:59:32.335321+01:00,8.0,12.207,3.47 +2024-03-11 23:59:32.139531+01:00,10.39,2.526,2.96 +2024-03-12 23:59:32.136709+01:00,10.24,8.211,2.98 +2024-03-13 23:59:32.407174+01:00,7.19,6.425,2.95 +2024-03-14 23:59:32.342436+01:00,6.06,33.389,1.64 +2024-03-15 23:59:32.266278+01:00,5.63,12.628,2.96 +2024-03-16 23:59:32.155245+01:00,9.57,12.103,3.0 +2024-03-17 23:59:32.366155+01:00,8.43,14.302,0.25 From a7f301cfd6d5a51f5587c1de644f4379a9ca74ff Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:13:47 +0100 Subject: [PATCH 058/111] add /app to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5dc21af8..581080c8 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ secrets_emhass.yaml *.html *.pkl data/actionLogs.txt +**/app # Byte-compiled / optimized / DLL files From d40da8622332712d691cf066822c3ca7990a6a3c Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:24:21 +0100 Subject: [PATCH 059/111] Add csv-prediction --- src/emhass/command_line.py | 46 ++++++++++++ src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++ src/emhass/retrieve_hass.py | 10 +++ src/emhass/utils.py | 25 +++++++ src/emhass/web_server.py | 6 ++ 5 files changed, 226 insertions(+) create mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 538f4ae6..24ab6132 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -20,6 +20,7 @@ from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster from emhass.optimization import Optimization +from emhass.csv_predictor import CsvPredictor from emhass import utils @@ -154,6 +155,12 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, if not rh.get_data(days_list, var_list): return False df_input_data = rh.df_final.copy() + elif set_type == "csv-predict": + df_input_data, df_input_data_dayahead = None, None + P_PV_forecast, P_load_forecast = None, None + days_list = None + params = json.loads(params) + elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None @@ -447,6 +454,45 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf +def csv_predict(input_data_dict: dict, logger: logging.Logger, + debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: + """Perform a forecast model fit from training data retrieved from Home Assistant. + + :param input_data_dict: A dictionnary with multiple data used by the action functions + :type input_data_dict: dict + :param logger: The passed logger object + :type logger: logging.Logger + :param debug: True to debug, useful for unit testing, defaults to False + :type debug: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object + :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor] + """ + data = copy.deepcopy(input_data_dict['df_input_data']) + model_type = input_data_dict['params']['passed_data']['model_type'] + csv_file = input_data_dict['params']['passed_data']['csv_file'] + sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] + perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] + independent_variables = input_data_dict['params']['passed_data']['independent_variables'] + dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] + new_values = input_data_dict['params']['passed_data']['new_values'] + root = input_data_dict['root'] + # The ML forecaster object + csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + # Fit the ML model + prediction = csv.predict(perform_backtest=perform_backtest) + + csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] + csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] + csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name'] + # Publish Load forecast + idx = 0 + input_data_dict['rh'].post_data(prediction, idx, + csv_predict_entity_id, + csv_predict_unit_of_measurement, + csv_predict_friendly_name, + type_var = 'csv_predictor') + return prediction + def publish_data(input_data_dict: dict, logger: logging.Logger, save_data_to_file: Optional[bool] = False, opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame: diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py new file mode 100644 index 00000000..a1c5576b --- /dev/null +++ b/src/emhass/csv_predictor.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +import copy +import pathlib +import time +from typing import Optional +# from typing import Optional, Tuple +import pandas as pd +import numpy as np + +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import ElasticNet +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsRegressor +# from sklearn.metrics import r2_score + +# from skforecast.ForecasterAutoreg import ForecasterAutoreg +# from skforecast.model_selection import bayesian_search_forecaster +# from skforecast.model_selection import backtesting_forecaster + +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) + +class CsvPredictor: + r""" + A forecaster class using machine learning models. + + This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + + It exposes one main method: + + - `predict`: to obtain a forecast from a pre-trained model. + + """ + + def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + logger: logging.Logger) -> None: + r"""Define constructor for the forecast class. + + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str + :param var_model: The name of the sensor to retrieve data from Home Assistant. \ + Example: `sensor.power_load_no_var_loads`. + :type var_model: str + :param sklearn_model: The `scikit-learn` model that will be used. For now only \ + this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. + :type sklearn_model: str + :param num_lags: The number of auto-regression lags to consider. A good starting point \ + is to fix this as one day. For example if your time step is 30 minutes, then fix this \ + to 48, if the time step is 1 hour the fix this to 24 and so on. + :type num_lags: int + :param root: The parent folder of the path where the config.yaml file is located + :type root: str + :param logger: The passed logger object + :type logger: logging.Logger + """ + self.data = data + self.model_type = model_type + self.csv_file = csv_file + self.independent_variables = independent_variables + self.dependent_variable = dependent_variable + self.sklearn_model = sklearn_model + self.new_values = new_values + self.root = root + self.logger = logger + self.is_tuned = False + + + def load_data(self): + filename_path = pathlib.Path(self.root) / self.csv_file + if filename_path.is_file(): + with open(filename_path, 'rb') as inp: + data = pd.read_csv(filename_path) + else: + self.logger.error("The cvs file was not found.") + return + + required_columns = self.independent_variables + + if not set(required_columns).issubset(data.columns): + raise ValueError( + f"CSV file should contain the following columns: {', '.join(required_columns)}" + ) + return data + + def prepare_data(self, data): + X = data[self.independent_variables].values + y = data[self.dependent_variable].values + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + return X_train, y_train + + + def predict(self, perform_backtest: Optional[bool] = False + ) -> pd.Series: + r"""The fit method to train the ML model. + + :param split_date_delta: The delta from now to `split_date_delta` that will be used \ + as the test period to evaluate the model, defaults to '48h' + :type split_date_delta: Optional[str], optional + :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ + the performance of the model on the complete train set, defaults to False + :type perform_backtest: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest + :rtype: Tuple[pd.DataFrame, pd.DataFrame] + """ + self.logger.info("Performing a forecast model fit for "+self.model_type) + # Preparing the data: adding exogenous features + data = self.load_data() + X, y = self.prepare_data(data) + + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) + + return prediction + + + + \ No newline at end of file diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py index f5fa82de..52397c87 100644 --- a/src/emhass/retrieve_hass.py +++ b/src/emhass/retrieve_hass.py @@ -307,6 +307,8 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, state = np.round(data_df.loc[data_df.index[idx]],4) elif type_var == 'optim_status': state = data_df.loc[data_df.index[idx]] + elif type_var == 'csv_predictor': + state = data_df[idx] else: state = np.round(data_df.loc[data_df.index[idx]],2) if type_var == 'power': @@ -338,6 +340,14 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, "friendly_name": friendly_name } } + elif type_var == 'csv_predictor': + data = { + "state": state, + "attributes": { + "unit_of_measurement": unit_of_measurement, + "friendly_name": friendly_name + } + } else: data = { "state": "{:.2f}".format(state), diff --git a/src/emhass/utils.py b/src/emhass/utils.py index a2b0df8e..44152dd4 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -155,6 +155,16 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic freq = int(retrieve_hass_conf['freq'].seconds/60.0) delta_forecast = int(optim_conf['delta_forecast'].days) forecast_dates = get_forecast_dates(freq, delta_forecast) + if set_type == "csv-predict": + csv_file = runtimeparams['csv_file'] + independent_variables = runtimeparams['independent_variables'] + dependent_variable = runtimeparams['dependent_variable'] + new_values = runtimeparams['new_values'] + params['passed_data']['csv_file'] = csv_file + params['passed_data']['independent_variables'] = independent_variables + params['passed_data']['dependent_variable'] = dependent_variable + params['passed_data']['new_values'] = new_values + # Treating special data passed for MPC control case if set_type == 'naive-mpc-optim': if 'prediction_horizon' not in runtimeparams.keys(): @@ -281,6 +291,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic else: model_predict_friendly_name = runtimeparams['model_predict_friendly_name'] params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name + if 'csv_predict_entity_id' not in runtimeparams.keys(): + csv_predict_entity_id = "sensor.csv_predictor" + else: + csv_predict_entity_id = runtimeparams['csv_predict_entity_id'] + params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id + if 'csv_predict_unit_of_measurement' not in runtimeparams.keys(): + csv_predict_unit_of_measurement = None + else: + csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement'] + params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement + if 'csv_predict_friendly_name' not in runtimeparams.keys(): + csv_predict_friendly_name = "Csv predictor" + else: + csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name'] + params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name # Treat optimization configuration parameters passed at runtime if 'num_def_loads' in runtimeparams.keys(): optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index 2fdd2861..6a4549b8 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -13,6 +13,7 @@ from emhass.command_line import set_input_data_dict from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune +from emhass.command_line import csv_predict from emhass.command_line import publish_data from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \ get_injection_dict_forecast_model_tune, build_params @@ -193,6 +194,11 @@ def action_call(action_name): if not checkFileLog(ActionStr): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) + elif action_name == 'csv-predict': + app.logger.info(" >> Performing a csv predict...") + csv_predict(input_data_dict, app.logger) + msg = f'EMHASS >> Action csv-predict executed... \n' + return make_response(msg, 201) else: app.logger.error("ERROR: passed action is not valid") msg = f'EMHASS >> ERROR: Passed action is not valid... \n' From 86b9fec42e9ca8a2219a2cc4cc78d7f4e06b4996 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 9 Jan 2024 21:11:13 +0100 Subject: [PATCH 060/111] cleanup --- src/emhass/command_line.py | 12 ++++++---- src/emhass/csv_predictor.py | 48 ++++++++++++++++++++++++++----------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 24ab6132..0cec14fa 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -467,19 +467,21 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger, :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor] """ - data = copy.deepcopy(input_data_dict['df_input_data']) - model_type = input_data_dict['params']['passed_data']['model_type'] + # data = copy.deepcopy(input_data_dict['df_input_data']) + # model_type = input_data_dict['params']['passed_data']['model_type'] csv_file = input_data_dict['params']['passed_data']['csv_file'] sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] + # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] new_values = input_data_dict['params']['passed_data']['new_values'] root = input_data_dict['root'] # The ML forecaster object - csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) # Fit the ML model - prediction = csv.predict(perform_backtest=perform_backtest) + prediction = csv.predict() + # prediction = csv.predict(perform_backtest=perform_backtest) csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index a1c5576b..9f012f8d 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -31,11 +31,13 @@ class CsvPredictor: It exposes one main method: - - `predict`: to obtain a forecast from a pre-trained model. + - `predict`: to obtain a forecast from a csv file. """ - def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + # logger: logging.Logger) -> None: + def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -44,23 +46,28 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe :param model_type: A unique name defining this model and useful to identify \ for what it will be used for. :type model_type: str - :param var_model: The name of the sensor to retrieve data from Home Assistant. \ - Example: `sensor.power_load_no_var_loads`. - :type var_model: str + :param csv_file: The name of the csv file to retrieve data from. \ + Example: `prediction.csv`. + :type csv_file: str + :param independent_variables: A list of independent variables. \ + Example: [`solar`, `degree_days`]. + :type independent_variables: list + :param dependent_variable: The dependent variable(to be predicted). \ + Example: `hours`. + :type dependent_variable: str :param sklearn_model: The `scikit-learn` model that will be used. For now only \ this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. :type sklearn_model: str - :param num_lags: The number of auto-regression lags to consider. A good starting point \ - is to fix this as one day. For example if your time step is 30 minutes, then fix this \ - to 48, if the time step is 1 hour the fix this to 24 and so on. - :type num_lags: int + :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ + Example: [2.24, 5.68]. + :type new_values: list :param root: The parent folder of the path where the config.yaml file is located :type root: str :param logger: The passed logger object :type logger: logging.Logger """ - self.data = data - self.model_type = model_type + # self.data = data + # self.model_type = model_type self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable @@ -86,18 +93,30 @@ def load_data(self): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) + print(type(data)) return data def prepare_data(self, data): + """ + Prepare the data. + + :param data: Input Data + :return: Input DataFrame with freq defined + :rtype: pd.DataFrame + + """ X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + print(type(X_train)) + print(type(y_train)) return X_train, y_train - def predict(self, perform_backtest: Optional[bool] = False - ) -> pd.Series: + # def predict(self, perform_backtest: Optional[bool] = False + # ) -> pd.Series: + def predict(self): r"""The fit method to train the ML model. :param split_date_delta: The delta from now to `split_date_delta` that will be used \ @@ -109,7 +128,7 @@ def predict(self, perform_backtest: Optional[bool] = False :return: The DataFrame containing the forecast data results without and with backtest :rtype: Tuple[pd.DataFrame, pd.DataFrame] """ - self.logger.info("Performing a forecast model fit for "+self.model_type) + self.logger.info("Performing a prediction for "+self.csv_file) # Preparing the data: adding exogenous features data = self.load_data() X, y = self.prepare_data(data) @@ -131,6 +150,7 @@ def predict(self, perform_backtest: Optional[bool] = False self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") new_values = np.array([self.new_values]) prediction = self.forecaster.predict(new_values) + print(type(prediction)) return prediction From 21e486cd45cc92f805697d689308cc939be16f85 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Thu, 18 Jan 2024 10:46:38 +0100 Subject: [PATCH 061/111] more cleanup --- src/emhass/command_line.py | 17 +++---- src/emhass/csv_predictor.py | 92 ++++++++++++++----------------------- 2 files changed, 40 insertions(+), 69 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 0cec14fa..b82f96b5 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -455,8 +455,8 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, return df_pred_optim, mlf def csv_predict(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: - """Perform a forecast model fit from training data retrieved from Home Assistant. + debug: Optional[bool] = False) -> np.ndarray: + """Perform a prediction from csv file. :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict @@ -464,29 +464,24 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger, :type logger: logging.Logger :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object - :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor] + :return: The np.ndarray containing the predicted value. + :rtype: np.ndarray """ - # data = copy.deepcopy(input_data_dict['df_input_data']) - # model_type = input_data_dict['params']['passed_data']['model_type'] csv_file = input_data_dict['params']['passed_data']['csv_file'] sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] new_values = input_data_dict['params']['passed_data']['new_values'] root = input_data_dict['root'] # The ML forecaster object - # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) - # Fit the ML model + # Predict from csv file prediction = csv.predict() - # prediction = csv.predict(perform_backtest=perform_backtest) csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name'] - # Publish Load forecast + # Publish prediction idx = 0 input_data_dict['rh'].post_data(prediction, idx, csv_predict_entity_id, diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 9f012f8d..9550c157 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -2,11 +2,9 @@ # -*- coding: utf-8 -*- import logging -import copy import pathlib import time -from typing import Optional -# from typing import Optional, Tuple +from typing import Tuple import pandas as pd import numpy as np @@ -14,11 +12,6 @@ from sklearn.linear_model import ElasticNet from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsRegressor -# from sklearn.metrics import r2_score - -# from skforecast.ForecasterAutoreg import ForecasterAutoreg -# from skforecast.model_selection import bayesian_search_forecaster -# from skforecast.model_selection import backtesting_forecaster import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) @@ -34,18 +27,10 @@ class CsvPredictor: - `predict`: to obtain a forecast from a csv file. """ - - # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - # logger: logging.Logger) -> None: def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. - :param data: The data that will be used for train/test - :type data: pd.DataFrame - :param model_type: A unique name defining this model and useful to identify \ - for what it will be used for. - :type model_type: str :param csv_file: The name of the csv file to retrieve data from. \ Example: `prediction.csv`. :type csv_file: str @@ -66,8 +51,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl :param logger: The passed logger object :type logger: logging.Logger """ - # self.data = data - # self.model_type = model_type self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable @@ -78,14 +61,17 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl self.is_tuned = False - def load_data(self): + def load_data(self) -> pd.DataFrame: + """Load the data.""" filename_path = pathlib.Path(self.root) / self.csv_file if filename_path.is_file(): with open(filename_path, 'rb') as inp: data = pd.read_csv(filename_path) else: self.logger.error("The cvs file was not found.") - return + raise ValueError( + f"The CSV file "+ self.csv_file +" was not found." + ) required_columns = self.independent_variables @@ -93,66 +79,56 @@ def load_data(self): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) - print(type(data)) return data - def prepare_data(self, data): + def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: """ Prepare the data. :param data: Input Data - :return: Input DataFrame with freq defined - :rtype: pd.DataFrame + :type data: pd.DataFrame + :return: A tuple containing the train data. + :rtype: Tuple[np.ndarray, np.ndarray] """ X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - print(type(X_train)) - print(type(y_train)) return X_train, y_train - # def predict(self, perform_backtest: Optional[bool] = False - # ) -> pd.Series: - def predict(self): - r"""The fit method to train the ML model. + def predict(self) -> np.ndarray: + r"""The predict method to generate a forecast from a csv file. - :param split_date_delta: The delta from now to `split_date_delta` that will be used \ - as the test period to evaluate the model, defaults to '48h' - :type split_date_delta: Optional[str], optional - :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ - the performance of the model on the complete train set, defaults to False - :type perform_backtest: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest - :rtype: Tuple[pd.DataFrame, pd.DataFrame] + :return: The np.ndarray containing the predicted value. + :rtype: np.ndarray """ self.logger.info("Performing a prediction for "+self.csv_file) # Preparing the data: adding exogenous features data = self.load_data() - X, y = self.prepare_data(data) + if data is not None: + X, y = self.prepare_data(data) - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Training a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - print(type(prediction)) + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Predict through a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) - return prediction + return prediction From 483898f815b1b9f8449fa64009363b4afadc7089 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 19 Jan 2024 11:34:33 +0100 Subject: [PATCH 062/111] filename_path -> inp --- src/emhass/csv_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 9550c157..499903d0 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -66,7 +66,7 @@ def load_data(self) -> pd.DataFrame: filename_path = pathlib.Path(self.root) / self.csv_file if filename_path.is_file(): with open(filename_path, 'rb') as inp: - data = pd.read_csv(filename_path) + data = pd.read_csv(inp) else: self.logger.error("The cvs file was not found.") raise ValueError( From 3559c2842feed5291b05bde3fe6d4d96e758a45e Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 29 Jan 2024 11:24:45 +0100 Subject: [PATCH 063/111] resolve some comments --- src/emhass/csv_predictor.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 499903d0..1f478c01 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -5,6 +5,8 @@ import pathlib import time from typing import Tuple +import warnings + import pandas as pd import numpy as np @@ -13,14 +15,14 @@ from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsRegressor -import warnings -warnings.filterwarnings("ignore", category=DeprecationWarning) + +warnings.filterwarnings("ignore", category=DeprecationWarning) class CsvPredictor: r""" A forecaster class using machine learning models. - This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. It exposes one main method: @@ -28,11 +30,11 @@ class CsvPredictor: """ def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - logger: logging.Logger) -> None: + logger: logging.Logger) -> None: r"""Define constructor for the forecast class. :param csv_file: The name of the csv file to retrieve data from. \ - Example: `prediction.csv`. + Example: `input_train_data.csv`. :type csv_file: str :param independent_variables: A list of independent variables. \ Example: [`solar`, `degree_days`]. @@ -60,7 +62,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl self.logger = logger self.is_tuned = False - def load_data(self) -> pd.DataFrame: """Load the data.""" filename_path = pathlib.Path(self.root) / self.csv_file @@ -69,18 +70,16 @@ def load_data(self) -> pd.DataFrame: data = pd.read_csv(inp) else: self.logger.error("The cvs file was not found.") - raise ValueError( - f"The CSV file "+ self.csv_file +" was not found." - ) + raise ValueError("The CSV file " + self.csv_file + " was not found.") required_columns = self.independent_variables - + if not set(required_columns).issubset(data.columns): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) return data - + def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: """ Prepare the data. @@ -94,10 +93,10 @@ def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - + return X_train, y_train - - + + def predict(self) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. @@ -109,7 +108,7 @@ def predict(self) -> np.ndarray: data = self.load_data() if data is not None: X, y = self.prepare_data(data) - + if self.sklearn_model == 'LinearRegression': base_model = LinearRegression() elif self.sklearn_model == 'ElasticNet': @@ -127,9 +126,5 @@ def predict(self) -> np.ndarray: self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") new_values = np.array([self.new_values]) prediction = self.forecaster.predict(new_values) - + return prediction - - - - \ No newline at end of file From c928f2b7338474a7595d32116b799d1db776a8f5 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:45:02 +0100 Subject: [PATCH 064/111] Use gridsearchcv and split up fit and predict --- src/emhass/command_line.py | 87 +++++++++++++++--- src/emhass/csv_predictor.py | 173 +++++++++++++++++++++++------------- src/emhass/utils.py | 16 +++- src/emhass/web_server.py | 11 ++- 4 files changed, 210 insertions(+), 77 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index b82f96b5..ae690ae9 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -155,7 +155,36 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, if not rh.get_data(days_list, var_list): return False df_input_data = rh.df_final.copy() - elif set_type == "csv-predict": + + elif set_type == "csv-model-fit": + + df_input_data_dayahead = None + P_PV_forecast, P_load_forecast = None, None + params = json.loads(params) + days_list = None + csv_file = params['passed_data']['csv_file'] + independent_variables = params['passed_data']['independent_variables'] + dependent_variable = params['passed_data']['dependent_variable'] + timestamp = params['passed_data']['timestamp'] + filename_path = pathlib.Path(base_path) / csv_file + if filename_path.is_file(): + df_input_data = pd.read_csv(filename_path, parse_dates=True) + + else: + logger.error("The cvs file was not found.") + raise ValueError("The CSV file " + csv_file + " was not found.") + required_columns = [] + required_columns.extend(independent_variables) + required_columns.append(dependent_variable) + if timestamp is not None: + required_columns.append(timestamp) + + if not set(required_columns).issubset(df_input_data.columns): + logger.error("The cvs file does not contain the required columns.") + raise ValueError( + f"CSV file should contain the following columns: {', '.join(required_columns)}" + ) + elif set_type == "csv-model-predict": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None @@ -454,7 +483,41 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf -def csv_predict(input_data_dict: dict, logger: logging.Logger, +def csv_model_fit(input_data_dict: dict, logger: logging.Logger, + debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: + """Perform a forecast model fit from training data retrieved from Home Assistant. + + :param input_data_dict: A dictionnary with multiple data used by the action functions + :type input_data_dict: dict + :param logger: The passed logger object + :type logger: logging.Logger + :param debug: True to debug, useful for unit testing, defaults to False + :type debug: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object + :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster] + """ + data = copy.deepcopy(input_data_dict['df_input_data']) + # csv_file = input_data_dict['params']['passed_data']['csv_file'] + model_type = input_data_dict['params']['passed_data']['model_type'] + # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] + independent_variables = input_data_dict['params']['passed_data']['independent_variables'] + dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] + timestamp = input_data_dict['params']['passed_data']['timestamp'] + # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] + date_features = input_data_dict['params']['passed_data']['date_features'] + root = input_data_dict['root'] + # The ML forecaster object + csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger) + # Fit the ML model + df_pred = csv.fit(date_features=date_features) + # Save model + if not debug: + filename = model_type+'_csv.pkl' + with open(pathlib.Path(root) / filename, 'wb') as outp: + pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL) + # return df_pred, csv + +def csv_model_predict(input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False) -> np.ndarray: """Perform a prediction from csv file. @@ -467,16 +530,20 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger, :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - csv_file = input_data_dict['params']['passed_data']['csv_file'] - sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - independent_variables = input_data_dict['params']['passed_data']['independent_variables'] - dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] - new_values = input_data_dict['params']['passed_data']['new_values'] + model_type = input_data_dict['params']['passed_data']['model_type'] root = input_data_dict['root'] - # The ML forecaster object - csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger) + filename = model_type+'_csv.pkl' + filename_path = pathlib.Path(root) / filename + if not debug: + if filename_path.is_file(): + with open(filename_path, 'rb') as inp: + csv = pickle.load(inp) + else: + logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") + return + new_values = input_data_dict['params']['passed_data']['new_values'] # Predict from csv file - prediction = csv.predict() + prediction = csv.predict(new_values) csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1f478c01..636d5835 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -1,19 +1,22 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import copy +from datetime import datetime import logging import pathlib import time -from typing import Tuple +from typing import Optional, Tuple import warnings import pandas as pd import numpy as np +from sklearn.metrics import classification_report, r2_score from sklearn.linear_model import LinearRegression -from sklearn.linear_model import ElasticNet -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsRegressor +from sklearn.model_selection import GridSearchCV, train_test_split +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler warnings.filterwarnings("ignore", category=DeprecationWarning) @@ -29,7 +32,7 @@ class CsvPredictor: - `predict`: to obtain a forecast from a csv file. """ - def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -53,78 +56,124 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl :param logger: The passed logger object :type logger: logging.Logger """ - self.csv_file = csv_file + self.data = data self.independent_variables = independent_variables self.dependent_variable = dependent_variable - self.sklearn_model = sklearn_model - self.new_values = new_values - self.root = root + self.timestamp = timestamp + self.model_type = model_type self.logger = logger self.is_tuned = False + self.data.sort_index(inplace=True) + self.data = self.data[~self.data.index.duplicated(keep='first')] + + @staticmethod + def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + """Add date features from the input DataFrame timestamp - def load_data(self) -> pd.DataFrame: - """Load the data.""" - filename_path = pathlib.Path(self.root) / self.csv_file - if filename_path.is_file(): - with open(filename_path, 'rb') as inp: - data = pd.read_csv(inp) - else: - self.logger.error("The cvs file was not found.") - raise ValueError("The CSV file " + self.csv_file + " was not found.") - - required_columns = self.independent_variables - - if not set(required_columns).issubset(data.columns): - raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}" - ) - return data - - def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: + :param data: The input DataFrame + :type data: pd.DataFrame + :return: The DataFrame with the added features + :rtype: pd.DataFrame + """ + df = copy.deepcopy(data) + df['timestamp']= pd.to_datetime(df['timestamp']) + if 'year' in date_features: + df['year'] = [i.month for i in df['timestamp']] + if 'month' in date_features: + df['month'] = [i.month for i in df['timestamp']] + if 'day_of_week' in date_features: + df['day_of_week'] = [i.dayofweek for i in df['timestamp']] + if 'day_of_year' in date_features: + df['day_of_year'] = [i.dayofyear for i in df['timestamp']] + if 'day' in date_features: + df['day'] = [i.day for i in df['timestamp']] + if 'hour' in date_features: + df['hour'] = [i.day for i in df['timestamp']] + + return df + + def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]: """ - Prepare the data. + Fit the model using the provided data. :param data: Input Data :type data: pd.DataFrame - :return: A tuple containing the train data. - :rtype: Tuple[np.ndarray, np.ndarray] - """ - X = data[self.independent_variables].values - y = data[self.dependent_variable].values - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + self.logger.info("Performing a forecast model fit for "+self.model_type) + self.data_exo = pd.DataFrame(self.data) + self.data_exo[self.independent_variables] = self.data[self.independent_variables] + self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] + keep_columns = [] + keep_columns.extend(self.independent_variables) + if self.timestamp is not None: + keep_columns.append(self.timestamp) + keep_columns.append(self.dependent_variable) + self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] + self.data_exo.reset_index(drop=True, inplace=True) + # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp') + if len(date_features) > 0: + if self.timestamp is not None: + self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) + else: + self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") - return X_train, y_train + y = self.data_exo[self.dependent_variable] + self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1) + if self.timestamp is not None: + self.data_exo = self.data_exo.drop(self.timestamp,axis=1) + X = self.data_exo + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + self.steps = len(X_test) + + # Define the model + self.model = Pipeline([ + ('scaler', StandardScaler()), + ('regressor', LinearRegression()) + ]) + # Define the parameters to tune + param_grid = { + 'regressor__fit_intercept': [True, False], + 'regressor__positive': [True, False], + } + + # Create a grid search object + self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # Fit the grid search object to the data + self.logger.info("Fitting the model...") + start_time = time.time() + self.grid_search.fit(X_train.values, y_train.values) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + + self.model = self.grid_search.best_estimator_ + + + # Make predictions + predictions = self.model.predict(X_test.values) + predictions = pd.Series(predictions, index=X_test.index) + pred_metric = r2_score(y_test,predictions) + self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + + # Prepare forecast DataFrame + df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred']) + df_pred['train'] = y_train + df_pred['test'] = y_test + df_pred['pred'] = predictions + print(df_pred) + # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp') + + + + # return df_pred + - def predict(self) -> np.ndarray: + def predict(self, new_values:list) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - self.logger.info("Performing a prediction for "+self.csv_file) - # Preparing the data: adding exogenous features - data = self.load_data() - if data is not None: - X, y = self.prepare_data(data) - - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Predict through a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - - return prediction + self.logger.info("Performing a prediction for "+self.model_type) + new_values = np.array([new_values]) + + return self.model.predict(new_values) diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 44152dd4..4931d8c2 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -155,14 +155,26 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic freq = int(retrieve_hass_conf['freq'].seconds/60.0) delta_forecast = int(optim_conf['delta_forecast'].days) forecast_dates = get_forecast_dates(freq, delta_forecast) - if set_type == "csv-predict": + if set_type == "csv-model-fit": csv_file = runtimeparams['csv_file'] independent_variables = runtimeparams['independent_variables'] dependent_variable = runtimeparams['dependent_variable'] - new_values = runtimeparams['new_values'] params['passed_data']['csv_file'] = csv_file params['passed_data']['independent_variables'] = independent_variables params['passed_data']['dependent_variable'] = dependent_variable + if 'timestamp' not in runtimeparams.keys(): + params['passed_data']['timestamp'] = None + else: + timestamp = runtimeparams['timestamp'] + params['passed_data']['timestamp'] = timestamp + if 'date_features' not in runtimeparams.keys(): + params['passed_data']['date_features'] = [] + else: + date_features = runtimeparams['date_features'] + params['passed_data']['date_features'] = date_features + + if set_type == "csv-model-predict": + new_values = runtimeparams['new_values'] params['passed_data']['new_values'] = new_values # Treating special data passed for MPC control case diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index 6a4549b8..ad71bb1e 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -13,7 +13,7 @@ from emhass.command_line import set_input_data_dict from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune -from emhass.command_line import csv_predict +from emhass.command_line import csv_model_fit, csv_model_predict from emhass.command_line import publish_data from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \ get_injection_dict_forecast_model_tune, build_params @@ -194,9 +194,14 @@ def action_call(action_name): if not checkFileLog(ActionStr): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) - elif action_name == 'csv-predict': + elif action_name == 'csv-model-fit': + app.logger.info(" >> Performing a csv fit...") + csv_model_fit(input_data_dict, app.logger) + msg = f'EMHASS >> Action csv-fit executed... \n' + return make_response(msg, 201) + elif action_name == 'csv-model-predict': app.logger.info(" >> Performing a csv predict...") - csv_predict(input_data_dict, app.logger) + csv_model_predict(input_data_dict, app.logger) msg = f'EMHASS >> Action csv-predict executed... \n' return make_response(msg, 201) else: From e744c5e9af9b97cb534824227c3fbfc1457c68ed Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:56:19 +0100 Subject: [PATCH 065/111] remove backtest --- src/emhass/csv_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 636d5835..1b2396b5 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -92,7 +92,7 @@ def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: return df - def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]: + def fit(self, date_features: Optional[list] = []) -> None: """ Fit the model using the provided data. From cf5657f85107865978596853a2c3a7578efe943b Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 12:11:41 +0100 Subject: [PATCH 066/111] cleanup --- src/emhass/csv_predictor.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1b2396b5..1e46927d 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -2,16 +2,14 @@ # -*- coding: utf-8 -*- import copy -from datetime import datetime import logging -import pathlib import time -from typing import Optional, Tuple +from typing import Optional import warnings import pandas as pd import numpy as np -from sklearn.metrics import classification_report, r2_score +from sklearn.metrics import r2_score from sklearn.linear_model import LinearRegression from sklearn.model_selection import GridSearchCV, train_test_split @@ -110,7 +108,6 @@ def fit(self, date_features: Optional[list] = []) -> None: keep_columns.append(self.dependent_variable) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] self.data_exo.reset_index(drop=True, inplace=True) - # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp') if len(date_features) > 0: if self.timestamp is not None: self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) @@ -153,18 +150,6 @@ def fit(self, date_features: Optional[list] = []) -> None: predictions = pd.Series(predictions, index=X_test.index) pred_metric = r2_score(y_test,predictions) self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") - - # Prepare forecast DataFrame - df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred']) - df_pred['train'] = y_train - df_pred['test'] = y_test - df_pred['pred'] = predictions - print(df_pred) - # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp') - - - - # return df_pred def predict(self, new_values:list) -> np.ndarray: From b2d1eb2177391cc56ef0e2c56e72bfb1a4c3d79b Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 13:41:06 +0100 Subject: [PATCH 067/111] cleanup + docstrings --- src/emhass/command_line.py | 17 ++++---------- src/emhass/csv_predictor.py | 45 ++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 34 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index ae690ae9..55d8f74e 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -484,7 +484,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, return df_pred_optim, mlf def csv_model_fit(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]: + debug: Optional[bool] = False) -> None: """Perform a forecast model fit from training data retrieved from Home Assistant. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -493,32 +493,26 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger, :type logger: logging.Logger :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object - :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster] """ data = copy.deepcopy(input_data_dict['df_input_data']) - # csv_file = input_data_dict['params']['passed_data']['csv_file'] model_type = input_data_dict['params']['passed_data']['model_type'] - # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] timestamp = input_data_dict['params']['passed_data']['timestamp'] - # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest'] date_features = input_data_dict['params']['passed_data']['date_features'] root = input_data_dict['root'] - # The ML forecaster object + # The CSV forecaster object csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger) # Fit the ML model - df_pred = csv.fit(date_features=date_features) + csv.fit(date_features=date_features) # Save model if not debug: filename = model_type+'_csv.pkl' with open(pathlib.Path(root) / filename, 'wb') as outp: pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL) - # return df_pred, csv def csv_model_predict(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> np.ndarray: + debug: Optional[bool] = False) -> None: """Perform a prediction from csv file. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -527,8 +521,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger, :type logger: logging.Logger :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional - :return: The np.ndarray containing the predicted value. - :rtype: np.ndarray """ model_type = input_data_dict['params']['passed_data']['model_type'] root = input_data_dict['root'] @@ -555,7 +547,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger, csv_predict_unit_of_measurement, csv_predict_friendly_name, type_var = 'csv_predictor') - return prediction def publish_data(input_data_dict: dict, logger: logging.Logger, save_data_to_file: Optional[bool] = False, diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1e46927d..57d61791 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -25,32 +25,30 @@ class CsvPredictor: This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. - It exposes one main method: + It exposes two main methods: - - `predict`: to obtain a forecast from a csv file. + - `fit`: to train a model with the passed data. + + - `predict`: to obtain a forecast from a pre-trained model. """ def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. - :param csv_file: The name of the csv file to retrieve data from. \ - Example: `input_train_data.csv`. - :type csv_file: str + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str :param independent_variables: A list of independent variables. \ Example: [`solar`, `degree_days`]. :type independent_variables: list :param dependent_variable: The dependent variable(to be predicted). \ Example: `hours`. :type dependent_variable: str - :param sklearn_model: The `scikit-learn` model that will be used. For now only \ - this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. - :type sklearn_model: str - :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ - Example: [2.24, 5.68]. - :type new_values: list - :param root: The parent folder of the path where the config.yaml file is located - :type root: str + :param timestamp: If defined, the column key that has to be used of timestamp. + :type timestamp: str :param logger: The passed logger object :type logger: logging.Logger """ @@ -60,23 +58,24 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent self.timestamp = timestamp self.model_type = model_type self.logger = logger - self.is_tuned = False self.data.sort_index(inplace=True) self.data = self.data[~self.data.index.duplicated(keep='first')] @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: """Add date features from the input DataFrame timestamp :param data: The input DataFrame :type data: pd.DataFrame + :param timestamp: The column containing the timestamp + :type timestamp: str :return: The DataFrame with the added features :rtype: pd.DataFrame """ df = copy.deepcopy(data) - df['timestamp']= pd.to_datetime(df['timestamp']) + df[timestamp]= pd.to_datetime(df['timestamp']) if 'year' in date_features: - df['year'] = [i.month for i in df['timestamp']] + df['year'] = [i.year for i in df['timestamp']] if 'month' in date_features: df['month'] = [i.month for i in df['timestamp']] if 'day_of_week' in date_features: @@ -94,10 +93,10 @@ def fit(self, date_features: Optional[list] = []) -> None: """ Fit the model using the provided data. - :param data: Input Data - :type data: pd.DataFrame + :param date_features: A list of 'date_features' to take into account when fitting the model. + :type data: list """ - self.logger.info("Performing a forecast model fit for "+self.model_type) + self.logger.info("Performing a csv model fit for "+self.model_type) self.data_exo = pd.DataFrame(self.data) self.data_exo[self.independent_variables] = self.data[self.independent_variables] self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] @@ -110,7 +109,7 @@ def fit(self, date_features: Optional[list] = []) -> None: self.data_exo.reset_index(drop=True, inplace=True) if len(date_features) > 0: if self.timestamp is not None: - self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) + self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp) else: self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") @@ -155,6 +154,10 @@ def fit(self, date_features: Optional[list] = []) -> None: def predict(self, new_values:list) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. + + :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ + Example: [2.24, 5.68]. + :type new_values: list :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ From 714f66bbd09f26d7f7ae7277e712c65ead0c51df Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 11 Mar 2024 09:59:27 +0100 Subject: [PATCH 068/111] add other regression methods --- src/emhass/csv_predictor.py | 87 +++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 28 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 57d61791..2b6fb86a 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -9,9 +9,10 @@ import pandas as pd import numpy as np +from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor from sklearn.metrics import r2_score -from sklearn.linear_model import LinearRegression +from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler @@ -122,33 +123,63 @@ def fit(self, date_features: Optional[list] = []) -> None: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) self.steps = len(X_test) - # Define the model - self.model = Pipeline([ - ('scaler', StandardScaler()), - ('regressor', LinearRegression()) - ]) - # Define the parameters to tune - param_grid = { - 'regressor__fit_intercept': [True, False], - 'regressor__positive': [True, False], - } - - # Create a grid search object - self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # Fit the grid search object to the data - self.logger.info("Fitting the model...") - start_time = time.time() - self.grid_search.fit(X_train.values, y_train.values) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - - self.model = self.grid_search.best_estimator_ - - - # Make predictions - predictions = self.model.predict(X_test.values) - predictions = pd.Series(predictions, index=X_test.index) - pred_metric = r2_score(y_test,predictions) - self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + regression_methods = [ + ('Linear Regression', LinearRegression(), {}), + ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), + ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), + ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), + ('Gradient Boosting Regression', GradientBoostingRegressor(), { + 'gradientboostingregressor__n_estimators': [50, 100, 200], + 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] + }), + ('AdaBoost Regression', AdaBoostRegressor(), { + 'adaboostregressor__n_estimators': [50, 100, 200], + 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] + }) + ] + + # Define the models + for name, model, param_grid in regression_methods: + pipeline = Pipeline([ + ('scaler', StandardScaler()), + (name, model) + ]) + + # Use GridSearchCV to find the best hyperparameters for each model + grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5) + grid_search.fit(X_train, y_train) + + # Get the best model and print its mean squared error on the test set + best_model = grid_search.best_estimator_ + print(best_model) + predictions = best_model.predict(X_test) + print(predictions) + # self.model = Pipeline([ + # ('scaler', StandardScaler()), + # ('regressor', LinearRegression()) + # ]) + # # Define the parameters to tune + # param_grid = { + # 'regressor__fit_intercept': [True, False], + # 'regressor__positive': [True, False], + # } + + # # Create a grid search object + # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # # Fit the grid search object to the data + # self.logger.info("Fitting the model...") + # start_time = time.time() + # self.grid_search.fit(X_train.values, y_train.values) + # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + + # self.model = self.grid_search.best_estimator_ + + + # # Make predictions + # predictions = self.model.predict(X_test.values) + # predictions = pd.Series(predictions, index=X_test.index) + # pred_metric = r2_score(y_test,predictions) + # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") def predict(self, new_values:list) -> np.ndarray: From 667611c4d779afe0f30d5b4bcd1a1821300d1c07 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:10:15 +0100 Subject: [PATCH 069/111] add --editable --- .vscode/tasks.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index ffe440eb..ee23d121 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -9,7 +9,11 @@ "isDefault": true }, "args": [ - "install", "--no-deps", "--force-reinstall", "." + "install", + "--no-deps", + "--force-reinstall", + "--editable", + "." ], "presentation": { "echo": true, From a63546cba66cf7b2a546715d763881a2f25d347f Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:16:17 +0100 Subject: [PATCH 070/111] Add sklearn model --- src/emhass/command_line.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 55d8f74e..f08f108c 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -496,13 +496,14 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger, """ data = copy.deepcopy(input_data_dict['df_input_data']) model_type = input_data_dict['params']['passed_data']['model_type'] + sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] independent_variables = input_data_dict['params']['passed_data']['independent_variables'] dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] timestamp = input_data_dict['params']['passed_data']['timestamp'] date_features = input_data_dict['params']['passed_data']['date_features'] root = input_data_dict['root'] # The CSV forecaster object - csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger) + csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) # Fit the ML model csv.fit(date_features=date_features) # Save model From 6f720eb92effbf3a9c630a3edfa030c431a957fe Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:16:44 +0100 Subject: [PATCH 071/111] multiple regression methods --- src/emhass/csv_predictor.py | 141 +++++++++++++++++++++++++----------- 1 file changed, 100 insertions(+), 41 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 2b6fb86a..3ffeba27 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -14,7 +14,7 @@ from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import GridSearchCV, train_test_split -from sklearn.pipeline import Pipeline +from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler @@ -33,7 +33,7 @@ class CsvPredictor: - `predict`: to obtain a forecast from a pre-trained model. """ - def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, + def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -58,9 +58,14 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent self.dependent_variable = dependent_variable self.timestamp = timestamp self.model_type = model_type + self.sklearn_model = sklearn_model self.logger = logger self.data.sort_index(inplace=True) self.data = self.data[~self.data.index.duplicated(keep='first')] + self.data_exo = None + self.steps = None + self.model = None + self.grid_search =None @staticmethod def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: @@ -123,63 +128,117 @@ def fit(self, date_features: Optional[list] = []) -> None: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) self.steps = len(X_test) - regression_methods = [ - ('Linear Regression', LinearRegression(), {}), - ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), - ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), - ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), - ('Gradient Boosting Regression', GradientBoostingRegressor(), { + regression_methods = { + 'LinearRegression': {"model": LinearRegression(), "param_grid": { + 'linearregression__fit_intercept': [True, False], + 'linearregression__positive': [True, False], + }}, + 'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}}, + 'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}}, + 'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}}, + 'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": { 'gradientboostingregressor__n_estimators': [50, 100, 200], 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - }), - ('AdaBoost Regression', AdaBoostRegressor(), { + }}, + 'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": { 'adaboostregressor__n_estimators': [50, 100, 200], 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - }) - ] + }} + } + # regression_methods = [ + # ('LinearRegression', LinearRegression(), { + # 'linearregression__fit_intercept': [True, False], + # 'linearregression__positive': [True, False], + # }), + # ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), + # ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), + # ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), + # ('GradientBoostingRegression', GradientBoostingRegressor(), { + # 'gradientboostingregressor__n_estimators': [50, 100, 200], + # 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] + # }), + # ('AdaBoostRegression', AdaBoostRegressor(), { + # 'adaboostregressor__n_estimators': [50, 100, 200], + # 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] + # }) + # ] + + if self.sklearn_model == 'LinearRegression': + base_model = regression_methods['LinearRegression']['model'] + param_grid = regression_methods['LinearRegression']['param_grid'] + elif self.sklearn_model == 'RidgeRegression': + base_model = regression_methods['RidgeRegression']['model'] + param_grid = regression_methods['RidgeRegression']['param_grid'] + elif self.sklearn_model == 'LassoRegression': + base_model = regression_methods['LassoRegression']['model'] + param_grid = regression_methods['LassoRegression']['param_grid'] + elif self.sklearn_model == 'RandomForestRegression': + base_model = regression_methods['RandomForestRegression']['model'] + param_grid = regression_methods['RandomForestRegression']['param_grid'] + elif self.sklearn_model == 'GradientBoostingRegression': + base_model = regression_methods['GradientBoostingRegression']['model'] + param_grid = regression_methods['GradientBoostingRegression']['param_grid'] + elif self.sklearn_model == 'AdaBoostRegression': + base_model = regression_methods['AdaBoostRegression']['model'] + param_grid = regression_methods['AdaBoostRegression']['param_grid'] + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the models - for name, model, param_grid in regression_methods: - pipeline = Pipeline([ - ('scaler', StandardScaler()), - (name, model) - ]) + # for name, model, param_grid in regression_methods: + # self.model = make_pipeline( + # StandardScaler(), + # model + # ) + # # self.model = Pipeline([ + # # ('scaler', StandardScaler()), + # # (name, model) + # # ]) - # Use GridSearchCV to find the best hyperparameters for each model - grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5) - grid_search.fit(X_train, y_train) - - # Get the best model and print its mean squared error on the test set - best_model = grid_search.best_estimator_ - print(best_model) - predictions = best_model.predict(X_test) - print(predictions) + # # Use GridSearchCV to find the best hyperparameters for each model + # grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # grid_search.fit(X_train, y_train) + + # # Get the best model and print its mean squared error on the test set + # best_model = grid_search.best_estimator_ + # print(best_model) + # predictions = best_model.predict(X_test) + # print(predictions) + + self.model = make_pipeline( + StandardScaler(), + base_model + ) # self.model = Pipeline([ # ('scaler', StandardScaler()), - # ('regressor', LinearRegression()) + # ('regressor', base_model) # ]) - # # Define the parameters to tune + # Define the parameters to tune # param_grid = { # 'regressor__fit_intercept': [True, False], # 'regressor__positive': [True, False], # } - # # Create a grid search object - # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # # Fit the grid search object to the data - # self.logger.info("Fitting the model...") - # start_time = time.time() - # self.grid_search.fit(X_train.values, y_train.values) - # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + # Create a grid search object + self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1) + + # Fit the grid search object to the data + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.grid_search.fit(X_train.values, y_train.values) + print("Best value for lambda : ",self.grid_search.best_params_) + print("Best score for cost function: ", self.grid_search.best_score_) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - # self.model = self.grid_search.best_estimator_ + self.model = self.grid_search.best_estimator_ - # # Make predictions - # predictions = self.model.predict(X_test.values) - # predictions = pd.Series(predictions, index=X_test.index) - # pred_metric = r2_score(y_test,predictions) - # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + # Make predictions + predictions = self.model.predict(X_test.values) + predictions = pd.Series(predictions, index=X_test.index) + pred_metric = r2_score(y_test,predictions) + self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") def predict(self, new_values:list) -> np.ndarray: From be01f8fa75d5359e5397d476d29be9570b1426b2 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:42:27 +0100 Subject: [PATCH 072/111] change to MLRegressor --- src/emhass/command_line.py | 40 +++++++++---------- ...ictor.py => machine_learning_regressor.py} | 4 +- src/emhass/utils.py | 28 ++++++------- src/emhass/web_server.py | 18 ++++----- 4 files changed, 45 insertions(+), 45 deletions(-) rename src/emhass/{csv_predictor.py => machine_learning_regressor.py} (98%) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index f08f108c..d0a8f71a 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -20,7 +20,7 @@ from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster from emhass.optimization import Optimization -from emhass.csv_predictor import CsvPredictor +from emhass.machine_learning_regressor import MLRegressor from emhass import utils @@ -156,7 +156,7 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, return False df_input_data = rh.df_final.copy() - elif set_type == "csv-model-fit": + elif set_type == "regressor-model-fit": df_input_data_dayahead = None P_PV_forecast, P_load_forecast = None, None @@ -184,7 +184,7 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) - elif set_type == "csv-model-predict": + elif set_type == "regressor-model-predict": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None @@ -483,7 +483,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf -def csv_model_fit(input_data_dict: dict, logger: logging.Logger, +def regressor_model_fit(input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False) -> None: """Perform a forecast model fit from training data retrieved from Home Assistant. @@ -502,17 +502,17 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger, timestamp = input_data_dict['params']['passed_data']['timestamp'] date_features = input_data_dict['params']['passed_data']['date_features'] root = input_data_dict['root'] - # The CSV forecaster object - csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) + # The MLRegressor object + mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) # Fit the ML model - csv.fit(date_features=date_features) + mlr.fit(date_features=date_features) # Save model if not debug: - filename = model_type+'_csv.pkl' + filename = model_type+'_mlr.pkl' with open(pathlib.Path(root) / filename, 'wb') as outp: - pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL) + pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL) -def csv_model_predict(input_data_dict: dict, logger: logging.Logger, +def regressor_model_predict(input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False) -> None: """Perform a prediction from csv file. @@ -525,29 +525,29 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger, """ model_type = input_data_dict['params']['passed_data']['model_type'] root = input_data_dict['root'] - filename = model_type+'_csv.pkl' + filename = model_type+'_mlr.pkl' filename_path = pathlib.Path(root) / filename if not debug: if filename_path.is_file(): with open(filename_path, 'rb') as inp: - csv = pickle.load(inp) + mlr = pickle.load(inp) else: logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") return new_values = input_data_dict['params']['passed_data']['new_values'] # Predict from csv file - prediction = csv.predict(new_values) + prediction = mlr.predict(new_values) - csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id'] - csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement'] - csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name'] + mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id'] + mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement'] + mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name'] # Publish prediction idx = 0 input_data_dict['rh'].post_data(prediction, idx, - csv_predict_entity_id, - csv_predict_unit_of_measurement, - csv_predict_friendly_name, - type_var = 'csv_predictor') + mlr_predict_entity_id, + mlr_predict_unit_of_measurement, + mlr_predict_friendly_name, + type_var = 'mlregressor') def publish_data(input_data_dict: dict, logger: logging.Logger, save_data_to_file: Optional[bool] = False, diff --git a/src/emhass/csv_predictor.py b/src/emhass/machine_learning_regressor.py similarity index 98% rename from src/emhass/csv_predictor.py rename to src/emhass/machine_learning_regressor.py index 3ffeba27..d70df3ec 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/machine_learning_regressor.py @@ -20,7 +20,7 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) -class CsvPredictor: +class MLRegressor: r""" A forecaster class using machine learning models. @@ -115,7 +115,7 @@ def fit(self, date_features: Optional[list] = []) -> None: self.data_exo.reset_index(drop=True, inplace=True) if len(date_features) > 0: if self.timestamp is not None: - self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp) + self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp) else: self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 4931d8c2..38a4e424 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -155,7 +155,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic freq = int(retrieve_hass_conf['freq'].seconds/60.0) delta_forecast = int(optim_conf['delta_forecast'].days) forecast_dates = get_forecast_dates(freq, delta_forecast) - if set_type == "csv-model-fit": + if set_type == "regressor-model-fit": csv_file = runtimeparams['csv_file'] independent_variables = runtimeparams['independent_variables'] dependent_variable = runtimeparams['dependent_variable'] @@ -173,7 +173,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic date_features = runtimeparams['date_features'] params['passed_data']['date_features'] = date_features - if set_type == "csv-model-predict": + if set_type == "regressor-model-predict": new_values = runtimeparams['new_values'] params['passed_data']['new_values'] = new_values @@ -303,21 +303,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic else: model_predict_friendly_name = runtimeparams['model_predict_friendly_name'] params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name - if 'csv_predict_entity_id' not in runtimeparams.keys(): - csv_predict_entity_id = "sensor.csv_predictor" + if 'mlr_predict_entity_id' not in runtimeparams.keys(): + mlr_predict_entity_id = "sensor.mlr_predict" else: - csv_predict_entity_id = runtimeparams['csv_predict_entity_id'] - params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id - if 'csv_predict_unit_of_measurement' not in runtimeparams.keys(): - csv_predict_unit_of_measurement = None + mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id'] + params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id + if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys(): + mlr_predict_unit_of_measurement = None else: - csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement'] - params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement - if 'csv_predict_friendly_name' not in runtimeparams.keys(): - csv_predict_friendly_name = "Csv predictor" + mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement'] + params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement + if 'mlr_predict_friendly_name' not in runtimeparams.keys(): + mlr_predict_friendly_name = "mlr predictor" else: - csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name'] - params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name + mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name'] + params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name # Treat optimization configuration parameters passed at runtime if 'num_def_loads' in runtimeparams.keys(): optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index ad71bb1e..50241590 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -13,7 +13,7 @@ from emhass.command_line import set_input_data_dict from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune -from emhass.command_line import csv_model_fit, csv_model_predict +from emhass.command_line import regressor_model_fit, regressor_model_predict from emhass.command_line import publish_data from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \ get_injection_dict_forecast_model_tune, build_params @@ -194,15 +194,15 @@ def action_call(action_name): if not checkFileLog(ActionStr): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) - elif action_name == 'csv-model-fit': - app.logger.info(" >> Performing a csv fit...") - csv_model_fit(input_data_dict, app.logger) - msg = f'EMHASS >> Action csv-fit executed... \n' + elif action_name == 'regressor-model-fit': + app.logger.info(" >> Performing a regressor fit...") + regressor_model_fit(input_data_dict, app.logger) + msg = f'EMHASS >> Action regressor-fit executed... \n' return make_response(msg, 201) - elif action_name == 'csv-model-predict': - app.logger.info(" >> Performing a csv predict...") - csv_model_predict(input_data_dict, app.logger) - msg = f'EMHASS >> Action csv-predict executed... \n' + elif action_name == 'regressor-model-predict': + app.logger.info(" >> Performing a regressor predict...") + regressor_model_predict(input_data_dict, app.logger) + msg = f'EMHASS >> Action regressor-predict executed... \n' return make_response(msg, 201) else: app.logger.error("ERROR: passed action is not valid") From 7430bf0dc06f36e526909314af4847e3779e6380 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 13:13:51 +0100 Subject: [PATCH 073/111] change naming and some formatting --- src/emhass/command_line.py | 746 ++++++++++++------- src/emhass/machine_learning_regressor.py | 285 ++++---- src/emhass/retrieve_hass.py | 318 +++++--- src/emhass/utils.py | 887 +++++++++++++++-------- 4 files changed, 1397 insertions(+), 839 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index d0a8f71a..0c094c96 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -8,14 +8,15 @@ import json import copy import pickle -import time -import numpy as np -import pandas as pd from datetime import datetime, timezone from typing import Optional, Tuple +from importlib.metadata import version +import numpy as np +import pandas as pd + from distutils.util import strtobool -from importlib.metadata import version + from emhass.retrieve_hass import RetrieveHass from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster @@ -54,8 +55,14 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, emhass_conf, use_secrets=not(get_data_from_file), params=params) # Treat runtimeparams params, retrieve_hass_conf, optim_conf, plant_conf = utils.treat_runtimeparams( - runtimeparams, params, retrieve_hass_conf, - optim_conf, plant_conf, set_type, logger) + runtimeparams, + params, + retrieve_hass_conf, + optim_conf, + plant_conf, + set_type, + logger, + ) # Define main objects rh = RetrieveHass(retrieve_hass_conf['hass_url'], retrieve_hass_conf['long_lived_token'], retrieve_hass_conf['freq'], retrieve_hass_conf['time_zone'], @@ -72,35 +79,53 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, with open(emhass_conf['data_path'] / 'test_df_final.pkl', 'rb') as inp: rh.df_final, days_list, var_list = pickle.load(inp) else: - days_list = utils.get_days_list(retrieve_hass_conf['days_to_retrieve']) - var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']] - if not rh.get_data(days_list, var_list, - minimal_response=False, significant_changes_only=False): - return False - if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'], - set_zero_min = retrieve_hass_conf['set_zero_min'], - var_replace_zero = retrieve_hass_conf['var_replace_zero'], - var_interp = retrieve_hass_conf['var_interp']): + days_list = utils.get_days_list(retrieve_hass_conf["days_to_retrieve"]) + var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]] + if not rh.get_data( + days_list, + var_list, + minimal_response=False, + significant_changes_only=False, + ): + return False + if not rh.prepare_data( + retrieve_hass_conf["var_load"], + load_negative=retrieve_hass_conf["load_negative"], + set_zero_min=retrieve_hass_conf["set_zero_min"], + var_replace_zero=retrieve_hass_conf["var_replace_zero"], + var_interp=retrieve_hass_conf["var_interp"], + ): return False df_input_data = rh.df_final.copy() # What we don't need for this type of action P_PV_forecast, P_load_forecast, df_input_data_dayahead = None, None, None elif set_type == "dayahead-optim": # Get PV and load forecasts - df_weather = fcst.get_weather_forecast(method=optim_conf['weather_forecast_method']) + df_weather = fcst.get_weather_forecast( + method=optim_conf["weather_forecast_method"] + ) P_PV_forecast = fcst.get_power_from_weather(df_weather) P_load_forecast = fcst.get_load_forecast(method=optim_conf['load_forecast_method']) if isinstance(P_load_forecast,bool) and not P_load_forecast: logger.error("Unable to get sensor power photovoltaics, or sensor power load no var loads. Check HA sensors and their daily data") return False - df_input_data_dayahead = pd.DataFrame(np.transpose(np.vstack([P_PV_forecast.values,P_load_forecast.values])), - index=P_PV_forecast.index, - columns=['P_PV_forecast', 'P_load_forecast']) + df_input_data_dayahead = pd.DataFrame( + np.transpose(np.vstack([P_PV_forecast.values, P_load_forecast.values])), + index=P_PV_forecast.index, + columns=["P_PV_forecast", "P_load_forecast"], + ) df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead) params = json.loads(params) - if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None: - prediction_horizon = params['passed_data']['prediction_horizon'] - df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]] + if ( + "prediction_horizon" in params["passed_data"] + and params["passed_data"]["prediction_horizon"] is not None + ): + prediction_horizon = params["passed_data"]["prediction_horizon"] + df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[ + df_input_data_dayahead.index[0] : df_input_data_dayahead.index[ + prediction_horizon - 1 + ] + ] # What we don't need for this type of action df_input_data, days_list = None, None elif set_type == "naive-mpc-optim": @@ -110,14 +135,21 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, rh.df_final, days_list, var_list = pickle.load(inp) else: days_list = utils.get_days_list(1) - var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']] - if not rh.get_data(days_list, var_list, - minimal_response=False, significant_changes_only=False): + var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]] + if not rh.get_data( + days_list, + var_list, + minimal_response=False, + significant_changes_only=False, + ): return False - if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'], - set_zero_min = retrieve_hass_conf['set_zero_min'], - var_replace_zero = retrieve_hass_conf['var_replace_zero'], - var_interp = retrieve_hass_conf['var_interp']): + if not rh.prepare_data( + retrieve_hass_conf["var_load"], + load_negative=retrieve_hass_conf["load_negative"], + set_zero_min=retrieve_hass_conf["set_zero_min"], + var_replace_zero=retrieve_hass_conf["var_replace_zero"], + var_interp=retrieve_hass_conf["var_interp"], + ): return False df_input_data = rh.df_final.copy() # Get PV and load forecasts @@ -129,43 +161,56 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, return False df_input_data_dayahead = pd.concat([P_PV_forecast, P_load_forecast], axis=1) df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead) - df_input_data_dayahead.columns = ['P_PV_forecast', 'P_load_forecast'] + df_input_data_dayahead.columns = ["P_PV_forecast", "P_load_forecast"] params = json.loads(params) - if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None: - prediction_horizon = params['passed_data']['prediction_horizon'] - df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]] - elif set_type == "forecast-model-fit" or set_type == "forecast-model-predict" or set_type == "forecast-model-tune": + if ( + "prediction_horizon" in params["passed_data"] + and params["passed_data"]["prediction_horizon"] is not None + ): + prediction_horizon = params["passed_data"]["prediction_horizon"] + df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[ + df_input_data_dayahead.index[0] : df_input_data_dayahead.index[ + prediction_horizon - 1 + ] + ] + elif ( + set_type == "forecast-model-fit" + or set_type == "forecast-model-predict" + or set_type == "forecast-model-tune" + ): df_input_data_dayahead = None P_PV_forecast, P_load_forecast = None, None params = json.loads(params) # Retrieve data from hass - days_to_retrieve = params['passed_data']['days_to_retrieve'] - model_type = params['passed_data']['model_type'] - var_model = params['passed_data']['var_model'] + days_to_retrieve = params["passed_data"]["days_to_retrieve"] + model_type = params["passed_data"]["model_type"] + var_model = params["passed_data"]["var_model"] if get_data_from_file: days_list = None filename = 'data_train_'+model_type+'.pkl' filename_path = emhass_conf['data_path'] / filename with open(filename_path, 'rb') as inp: df_input_data, _ = pickle.load(inp) - df_input_data = df_input_data[df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve):] + df_input_data = df_input_data[ + df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve) : + ] else: days_list = utils.get_days_list(days_to_retrieve) var_list = [var_model] if not rh.get_data(days_list, var_list): return False df_input_data = rh.df_final.copy() - + elif set_type == "regressor-model-fit": - + df_input_data_dayahead = None P_PV_forecast, P_load_forecast = None, None params = json.loads(params) days_list = None - csv_file = params['passed_data']['csv_file'] - independent_variables = params['passed_data']['independent_variables'] - dependent_variable = params['passed_data']['dependent_variable'] - timestamp = params['passed_data']['timestamp'] + csv_file = params["passed_data"]["csv_file"] + features = params["passed_data"]["features"] + target = params["passed_data"]["target"] + timestamp = params["passed_data"]["timestamp"] filename_path = pathlib.Path(base_path) / csv_file if filename_path.is_file(): df_input_data = pd.read_csv(filename_path, parse_dates=True) @@ -174,8 +219,8 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, logger.error("The cvs file was not found.") raise ValueError("The CSV file " + csv_file + " was not found.") required_columns = [] - required_columns.extend(independent_variables) - required_columns.append(dependent_variable) + required_columns.extend(features) + required_columns.append(target) if timestamp is not None: required_columns.append(timestamp) @@ -189,13 +234,15 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, P_PV_forecast, P_load_forecast = None, None days_list = None params = json.loads(params) - + elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None else: - logger.error("The passed action argument and hence the set_type parameter for setup is not valid") + logger.error( + "The passed action argument and hence the set_type parameter for setup is not valid" + ) df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None @@ -216,12 +263,17 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, 'days_list': days_list } return input_data_dict - -def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = True, debug: Optional[bool] = False) -> pd.DataFrame: + + +def perfect_forecast_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = True, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the perfect forecast optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -250,18 +302,23 @@ def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger, opt_res = input_data_dict['opt'].perform_perfect_forecast_optim(df_input_data, input_data_dict['days_list']) # Save CSV file for analysis if save_data_to_file: - filename = 'opt_res_perfect_optim_'+input_data_dict['costfun']+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + filename = "opt_res_perfect_optim_" + input_data_dict["costfun"] + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: opt_res.to_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_label='timestamp') return opt_res - -def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame: + + +def dayahead_forecast_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the day-ahead optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -290,19 +347,26 @@ def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger, df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast']) # Save CSV file for publish_data if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: opt_res_dayahead.to_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_label='timestamp') return opt_res_dayahead -def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame: + +def naive_mpc_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the naive Model Predictive Controller optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -327,27 +391,39 @@ def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger, if isinstance(df_input_data_dayahead,bool) and not df_input_data_dayahead: return False # The specifics params for the MPC at runtime - prediction_horizon = input_data_dict['params']['passed_data']['prediction_horizon'] - soc_init = input_data_dict['params']['passed_data']['soc_init'] - soc_final = input_data_dict['params']['passed_data']['soc_final'] - def_total_hours = input_data_dict['params']['passed_data']['def_total_hours'] - def_start_timestep = input_data_dict['params']['passed_data']['def_start_timestep'] - def_end_timestep = input_data_dict['params']['passed_data']['def_end_timestep'] - opt_res_naive_mpc = input_data_dict['opt'].perform_naive_mpc_optim( - df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast'], - prediction_horizon, soc_init, soc_final, def_total_hours, def_start_timestep, def_end_timestep) + prediction_horizon = input_data_dict["params"]["passed_data"]["prediction_horizon"] + soc_init = input_data_dict["params"]["passed_data"]["soc_init"] + soc_final = input_data_dict["params"]["passed_data"]["soc_final"] + def_total_hours = input_data_dict["params"]["passed_data"]["def_total_hours"] + def_start_timestep = input_data_dict["params"]["passed_data"]["def_start_timestep"] + def_end_timestep = input_data_dict["params"]["passed_data"]["def_end_timestep"] + opt_res_naive_mpc = input_data_dict["opt"].perform_naive_mpc_optim( + df_input_data_dayahead, + input_data_dict["P_PV_forecast"], + input_data_dict["P_load_forecast"], + prediction_horizon, + soc_init, + soc_final, + def_total_hours, + def_start_timestep, + def_end_timestep, + ) # Save CSV file for publish_data if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_naive_mpc_'+today.strftime("%Y_%m_%d")+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_naive_mpc_" + today.strftime("%Y_%m_%d") + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: opt_res_naive_mpc.to_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_label='timestamp') return opt_res_naive_mpc -def forecast_model_fit(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]: + +def forecast_model_fit( + input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False +) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]: """Perform a forecast model fit from training data retrieved from Home Assistant. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -369,8 +445,9 @@ def forecast_model_fit(input_data_dict: dict, logger: logging.Logger, # The ML forecaster object mlf = MLForecaster(data, model_type, var_model, sklearn_model, num_lags, input_data_dict['emhass_conf'], logger) # Fit the ML model - df_pred, df_pred_backtest = mlf.fit(split_date_delta=split_date_delta, - perform_backtest=perform_backtest) + df_pred, df_pred_backtest = mlf.fit( + split_date_delta=split_date_delta, perform_backtest=perform_backtest + ) # Save model if not debug: filename = model_type+'_mlf.pkl' @@ -379,9 +456,14 @@ def forecast_model_fit(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred, df_pred_backtest, mlf -def forecast_model_predict(input_data_dict: dict, logger: logging.Logger, - use_last_window: Optional[bool] = True, debug: Optional[bool] = False, - mlf: Optional[MLForecaster] = None) -> pd.DataFrame: + +def forecast_model_predict( + input_data_dict: dict, + logger: logging.Logger, + use_last_window: Optional[bool] = True, + debug: Optional[bool] = False, + mlf: Optional[MLForecaster] = None, +) -> pd.DataFrame: r"""Perform a forecast model predict using a previously trained skforecast model. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -408,46 +490,73 @@ def forecast_model_predict(input_data_dict: dict, logger: logging.Logger, filename_path = input_data_dict['emhass_conf']['data_path'] / filename if not debug: if filename_path.is_file(): - with open(filename_path, 'rb') as inp: + with open(filename_path, "rb") as inp: mlf = pickle.load(inp) else: - logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") + logger.error( + "The ML forecaster file was not found, please run a model fit method before this predict method" + ) return # Make predictions if use_last_window: - data_last_window = copy.deepcopy(input_data_dict['df_input_data']) + data_last_window = copy.deepcopy(input_data_dict["df_input_data"]) else: data_last_window = None predictions = mlf.predict(data_last_window) # Publish data to a Home Assistant sensor - model_predict_publish = input_data_dict['params']['passed_data']['model_predict_publish'] - model_predict_entity_id = input_data_dict['params']['passed_data']['model_predict_entity_id'] - model_predict_unit_of_measurement = input_data_dict['params']['passed_data']['model_predict_unit_of_measurement'] - model_predict_friendly_name = input_data_dict['params']['passed_data']['model_predict_friendly_name'] - publish_prefix = input_data_dict['params']['passed_data']['publish_prefix'] + model_predict_publish = input_data_dict["params"]["passed_data"][ + "model_predict_publish" + ] + model_predict_entity_id = input_data_dict["params"]["passed_data"][ + "model_predict_entity_id" + ] + model_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][ + "model_predict_unit_of_measurement" + ] + model_predict_friendly_name = input_data_dict["params"]["passed_data"][ + "model_predict_friendly_name" + ] + publish_prefix = input_data_dict["params"]["passed_data"]["publish_prefix"] if model_predict_publish is True: # Estimate the current index - now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0) - if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest': - idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first': - idx_closest = predictions.index.get_indexer([now_precise], method='ffill')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last': - idx_closest = predictions.index.get_indexer([now_precise], method='bfill')[0] + now_precise = datetime.now( + input_data_dict["retrieve_hass_conf"]["time_zone"] + ).replace(second=0, microsecond=0) + if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest": + idx_closest = predictions.index.get_indexer( + [now_precise], method="nearest" + )[0] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first": + idx_closest = predictions.index.get_indexer([now_precise], method="ffill")[ + 0 + ] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last": + idx_closest = predictions.index.get_indexer([now_precise], method="bfill")[ + 0 + ] if idx_closest == -1: - idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0] + idx_closest = predictions.index.get_indexer( + [now_precise], method="nearest" + )[0] # Publish Load forecast - input_data_dict['rh'].post_data(predictions, idx_closest, - model_predict_entity_id, - model_predict_unit_of_measurement, - model_predict_friendly_name, - type_var = 'mlforecaster', - publish_prefix=publish_prefix) + input_data_dict["rh"].post_data( + predictions, + idx_closest, + model_predict_entity_id, + model_predict_unit_of_measurement, + model_predict_friendly_name, + type_var="mlforecaster", + publish_prefix=publish_prefix, + ) return predictions -def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False, mlf: Optional[MLForecaster] = None - ) -> Tuple[pd.DataFrame, MLForecaster]: + +def forecast_model_tune( + input_data_dict: dict, + logger: logging.Logger, + debug: Optional[bool] = False, + mlf: Optional[MLForecaster] = None, +) -> Tuple[pd.DataFrame, MLForecaster]: """Tune a forecast model hyperparameters using bayesian optimization. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -468,10 +577,12 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, filename_path = input_data_dict['emhass_conf']['data_path'] / filename if not debug: if filename_path.is_file(): - with open(filename_path, 'rb') as inp: + with open(filename_path, "rb") as inp: mlf = pickle.load(inp) else: - logger.error("The ML forecaster file was not found, please run a model fit method before this tune method") + logger.error( + "The ML forecaster file was not found, please run a model fit method before this tune method" + ) return None, None # Tune the model df_pred_optim = mlf.tune(debug=debug) @@ -483,8 +594,10 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf -def regressor_model_fit(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> None: + +def regressor_model_fit( + input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False +) -> None: """Perform a forecast model fit from training data retrieved from Home Assistant. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -494,26 +607,30 @@ def regressor_model_fit(input_data_dict: dict, logger: logging.Logger, :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional """ - data = copy.deepcopy(input_data_dict['df_input_data']) - model_type = input_data_dict['params']['passed_data']['model_type'] - sklearn_model = input_data_dict['params']['passed_data']['sklearn_model'] - independent_variables = input_data_dict['params']['passed_data']['independent_variables'] - dependent_variable = input_data_dict['params']['passed_data']['dependent_variable'] - timestamp = input_data_dict['params']['passed_data']['timestamp'] - date_features = input_data_dict['params']['passed_data']['date_features'] - root = input_data_dict['root'] + data = copy.deepcopy(input_data_dict["df_input_data"]) + model_type = input_data_dict["params"]["passed_data"]["model_type"] + sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"] + features = input_data_dict["params"]["passed_data"]["features"] + target = input_data_dict["params"]["passed_data"]["target"] + timestamp = input_data_dict["params"]["passed_data"]["timestamp"] + date_features = input_data_dict["params"]["passed_data"]["date_features"] + root = input_data_dict["root"] # The MLRegressor object - mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger) + mlr = MLRegressor( + data, model_type, sklearn_model, features, target, timestamp, logger + ) # Fit the ML model mlr.fit(date_features=date_features) # Save model if not debug: - filename = model_type+'_mlr.pkl' - with open(pathlib.Path(root) / filename, 'wb') as outp: + filename = model_type + "_mlr.pkl" + with open(pathlib.Path(root) / filename, "wb") as outp: pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL) -def regressor_model_predict(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> None: + +def regressor_model_predict( + input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False +) -> None: """Perform a prediction from csv file. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -523,38 +640,53 @@ def regressor_model_predict(input_data_dict: dict, logger: logging.Logger, :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional """ - model_type = input_data_dict['params']['passed_data']['model_type'] - root = input_data_dict['root'] - filename = model_type+'_mlr.pkl' + model_type = input_data_dict["params"]["passed_data"]["model_type"] + root = input_data_dict["root"] + filename = model_type + "_mlr.pkl" filename_path = pathlib.Path(root) / filename if not debug: if filename_path.is_file(): - with open(filename_path, 'rb') as inp: + with open(filename_path, "rb") as inp: mlr = pickle.load(inp) else: - logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") + logger.error( + "The ML forecaster file was not found, please run a model fit method before this predict method" + ) return - new_values = input_data_dict['params']['passed_data']['new_values'] + new_values = input_data_dict["params"]["passed_data"]["new_values"] # Predict from csv file prediction = mlr.predict(new_values) - mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id'] - mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement'] - mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name'] + mlr_predict_entity_id = input_data_dict["params"]["passed_data"][ + "mlr_predict_entity_id" + ] + mlr_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][ + "mlr_predict_unit_of_measurement" + ] + mlr_predict_friendly_name = input_data_dict["params"]["passed_data"][ + "mlr_predict_friendly_name" + ] # Publish prediction idx = 0 - input_data_dict['rh'].post_data(prediction, idx, - mlr_predict_entity_id, - mlr_predict_unit_of_measurement, - mlr_predict_friendly_name, - type_var = 'mlregressor') - -def publish_data(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, - opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame: + input_data_dict["rh"].post_data( + prediction, + idx, + mlr_predict_entity_id, + mlr_predict_unit_of_measurement, + mlr_predict_friendly_name, + type_var="mlregressor", + ) + + +def publish_data( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + opt_res_latest: Optional[pd.DataFrame] = None, +) -> pd.DataFrame: """ Publish the data obtained from the optimization results. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -568,10 +700,12 @@ def publish_data(input_data_dict: dict, logger: logging.Logger, logger.info("Publishing data to HASS instance") # Check if a day ahead optimization has been performed (read CSV file) if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv" else: - filename = 'opt_res_latest.csv' + filename = "opt_res_latest.csv" if opt_res_latest is None: if not os.path.isfile(input_data_dict['emhass_conf']['data_path'] / filename): logger.error("File not found error, run an optimization task first.") @@ -579,144 +713,191 @@ def publish_data(input_data_dict: dict, logger: logging.Logger, else: opt_res_latest = pd.read_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_col='timestamp') opt_res_latest.index = pd.to_datetime(opt_res_latest.index) - opt_res_latest.index.freq = input_data_dict['retrieve_hass_conf']['freq'] + opt_res_latest.index.freq = input_data_dict["retrieve_hass_conf"]["freq"] # Estimate the current index - now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0) - if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='ffill')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='bfill')[0] + now_precise = datetime.now( + input_data_dict["retrieve_hass_conf"]["time_zone"] + ).replace(second=0, microsecond=0) + if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[ + 0 + ] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="ffill")[0] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="bfill")[0] if idx_closest == -1: - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0] + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[ + 0 + ] # Publish the data - params = json.loads(input_data_dict['params']) - publish_prefix = params['passed_data']['publish_prefix'] + params = json.loads(input_data_dict["params"]) + publish_prefix = params["passed_data"]["publish_prefix"] # Publish PV forecast - custom_pv_forecast_id = params['passed_data']['custom_pv_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_PV'], idx_closest, - custom_pv_forecast_id["entity_id"], - custom_pv_forecast_id["unit_of_measurement"], - custom_pv_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) + custom_pv_forecast_id = params["passed_data"]["custom_pv_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_PV"], + idx_closest, + custom_pv_forecast_id["entity_id"], + custom_pv_forecast_id["unit_of_measurement"], + custom_pv_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) # Publish Load forecast - custom_load_forecast_id = params['passed_data']['custom_load_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_Load'], idx_closest, - custom_load_forecast_id["entity_id"], - custom_load_forecast_id["unit_of_measurement"], - custom_load_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) - cols_published = ['P_PV', 'P_Load'] + custom_load_forecast_id = params["passed_data"]["custom_load_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_Load"], + idx_closest, + custom_load_forecast_id["entity_id"], + custom_load_forecast_id["unit_of_measurement"], + custom_load_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) + cols_published = ["P_PV", "P_Load"] # Publish deferrable loads - custom_deferrable_forecast_id = params['passed_data']['custom_deferrable_forecast_id'] - for k in range(input_data_dict['opt'].optim_conf['num_def_loads']): + custom_deferrable_forecast_id = params["passed_data"][ + "custom_deferrable_forecast_id" + ] + for k in range(input_data_dict["opt"].optim_conf["num_def_loads"]): if "P_deferrable{}".format(k) not in opt_res_latest.columns: - logger.error("P_deferrable{}".format(k)+" was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.") + logger.error( + "P_deferrable{}".format(k) + + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution." + ) else: - input_data_dict['rh'].post_data(opt_res_latest["P_deferrable{}".format(k)], idx_closest, - custom_deferrable_forecast_id[k]["entity_id"], - custom_deferrable_forecast_id[k]["unit_of_measurement"], - custom_deferrable_forecast_id[k]["friendly_name"], - type_var = 'deferrable', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_deferrable{}".format(k)] + input_data_dict["rh"].post_data( + opt_res_latest["P_deferrable{}".format(k)], + idx_closest, + custom_deferrable_forecast_id[k]["entity_id"], + custom_deferrable_forecast_id[k]["unit_of_measurement"], + custom_deferrable_forecast_id[k]["friendly_name"], + type_var="deferrable", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_deferrable{}".format(k)] # Publish battery power - if input_data_dict['opt'].optim_conf['set_use_battery']: - if 'P_batt' not in opt_res_latest.columns: - logger.error("P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.") + if input_data_dict["opt"].optim_conf["set_use_battery"]: + if "P_batt" not in opt_res_latest.columns: + logger.error( + "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution." + ) else: - custom_batt_forecast_id = params['passed_data']['custom_batt_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_batt'], idx_closest, - custom_batt_forecast_id["entity_id"], - custom_batt_forecast_id["unit_of_measurement"], - custom_batt_forecast_id["friendly_name"], - type_var = 'batt', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_batt"] - custom_batt_soc_forecast_id = params['passed_data']['custom_batt_soc_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['SOC_opt']*100, idx_closest, - custom_batt_soc_forecast_id["entity_id"], - custom_batt_soc_forecast_id["unit_of_measurement"], - custom_batt_soc_forecast_id["friendly_name"], - type_var = 'SOC', - publish_prefix = publish_prefix) - cols_published = cols_published+["SOC_opt"] + custom_batt_forecast_id = params["passed_data"]["custom_batt_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_batt"], + idx_closest, + custom_batt_forecast_id["entity_id"], + custom_batt_forecast_id["unit_of_measurement"], + custom_batt_forecast_id["friendly_name"], + type_var="batt", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_batt"] + custom_batt_soc_forecast_id = params["passed_data"][ + "custom_batt_soc_forecast_id" + ] + input_data_dict["rh"].post_data( + opt_res_latest["SOC_opt"] * 100, + idx_closest, + custom_batt_soc_forecast_id["entity_id"], + custom_batt_soc_forecast_id["unit_of_measurement"], + custom_batt_soc_forecast_id["friendly_name"], + type_var="SOC", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["SOC_opt"] # Publish grid power - custom_grid_forecast_id = params['passed_data']['custom_grid_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_grid'], idx_closest, - custom_grid_forecast_id["entity_id"], - custom_grid_forecast_id["unit_of_measurement"], - custom_grid_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_grid"] + custom_grid_forecast_id = params["passed_data"]["custom_grid_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_grid"], + idx_closest, + custom_grid_forecast_id["entity_id"], + custom_grid_forecast_id["unit_of_measurement"], + custom_grid_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_grid"] # Publish total value of cost function - custom_cost_fun_id = params['passed_data']['custom_cost_fun_id'] - col_cost_fun = [i for i in opt_res_latest.columns if 'cost_fun_' in i] - input_data_dict['rh'].post_data(opt_res_latest[col_cost_fun], idx_closest, - custom_cost_fun_id["entity_id"], - custom_cost_fun_id["unit_of_measurement"], - custom_cost_fun_id["friendly_name"], - type_var = 'cost_fun', - publish_prefix = publish_prefix) + custom_cost_fun_id = params["passed_data"]["custom_cost_fun_id"] + col_cost_fun = [i for i in opt_res_latest.columns if "cost_fun_" in i] + input_data_dict["rh"].post_data( + opt_res_latest[col_cost_fun], + idx_closest, + custom_cost_fun_id["entity_id"], + custom_cost_fun_id["unit_of_measurement"], + custom_cost_fun_id["friendly_name"], + type_var="cost_fun", + publish_prefix=publish_prefix, + ) # Publish the optimization status - custom_cost_fun_id = params['passed_data']['custom_optim_status_id'] + custom_cost_fun_id = params["passed_data"]["custom_optim_status_id"] if "optim_status" not in opt_res_latest: - opt_res_latest["optim_status"] = 'Optimal' - logger.warning("no optim_status in opt_res_latest, run an optimization task first") - input_data_dict['rh'].post_data(opt_res_latest['optim_status'], idx_closest, - custom_cost_fun_id["entity_id"], - custom_cost_fun_id["unit_of_measurement"], - custom_cost_fun_id["friendly_name"], - type_var = 'optim_status', - publish_prefix = publish_prefix) - cols_published = cols_published+["optim_status"] + opt_res_latest["optim_status"] = "Optimal" + logger.warning( + "no optim_status in opt_res_latest, run an optimization task first" + ) + input_data_dict["rh"].post_data( + opt_res_latest["optim_status"], + idx_closest, + custom_cost_fun_id["entity_id"], + custom_cost_fun_id["unit_of_measurement"], + custom_cost_fun_id["friendly_name"], + type_var="optim_status", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["optim_status"] # Publish unit_load_cost - custom_unit_load_cost_id = params['passed_data']['custom_unit_load_cost_id'] - input_data_dict['rh'].post_data(opt_res_latest['unit_load_cost'], idx_closest, - custom_unit_load_cost_id["entity_id"], - custom_unit_load_cost_id["unit_of_measurement"], - custom_unit_load_cost_id["friendly_name"], - type_var = 'unit_load_cost', - publish_prefix = publish_prefix) - cols_published = cols_published+["unit_load_cost"] + custom_unit_load_cost_id = params["passed_data"]["custom_unit_load_cost_id"] + input_data_dict["rh"].post_data( + opt_res_latest["unit_load_cost"], + idx_closest, + custom_unit_load_cost_id["entity_id"], + custom_unit_load_cost_id["unit_of_measurement"], + custom_unit_load_cost_id["friendly_name"], + type_var="unit_load_cost", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["unit_load_cost"] # Publish unit_prod_price - custom_unit_prod_price_id = params['passed_data']['custom_unit_prod_price_id'] - input_data_dict['rh'].post_data(opt_res_latest['unit_prod_price'], idx_closest, - custom_unit_prod_price_id["entity_id"], - custom_unit_prod_price_id["unit_of_measurement"], - custom_unit_prod_price_id["friendly_name"], - type_var = 'unit_prod_price', - publish_prefix = publish_prefix) - cols_published = cols_published+["unit_prod_price"] + custom_unit_prod_price_id = params["passed_data"]["custom_unit_prod_price_id"] + input_data_dict["rh"].post_data( + opt_res_latest["unit_prod_price"], + idx_closest, + custom_unit_prod_price_id["entity_id"], + custom_unit_prod_price_id["unit_of_measurement"], + custom_unit_prod_price_id["friendly_name"], + type_var="unit_prod_price", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["unit_prod_price"] # Create a DF resuming what has been published opt_res = opt_res_latest[cols_published].loc[[opt_res_latest.index[idx_closest]]] return opt_res - - + + def main(): r"""Define the main command line entry function. This function may take several arguments as inputs. You can type `emhass --help` to see the list of options: - + - action: Set the desired action, options are: perfect-optim, dayahead-optim, naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune - + - config: Define path to the config.yaml file - + - costfun: Define the type of cost function, options are: profit, cost, self-consumption - + - log2file: Define if we should log to a file or not - + - params: Configuration parameters passed from data/options.json if using the add-on - + - runtimeparams: Pass runtime optimization parameters as dictionnary - + - debug: Use True for testing purposes - + """ # Parsing arguments parser = argparse.ArgumentParser() @@ -777,39 +958,49 @@ def main(): # Additionnal argument try: - parser.add_argument('--version', action='version', version='%(prog)s '+version('emhass')) + parser.add_argument( + "--version", action="version", version="%(prog)s " + version("emhass") + ) args = parser.parse_args() except Exception: - logger.info("Version not found for emhass package. Or importlib exited with PackageNotFoundError.") + logger.info( + "Version not found for emhass package. Or importlib exited with PackageNotFoundError." + ) # Setup parameters input_data_dict = set_input_data_dict(emhass_conf, args.costfun, args.params, args.runtimeparams, args.action, logger, args.debug) # Perform selected action - if args.action == 'perfect-optim': + if args.action == "perfect-optim": opt_res = perfect_forecast_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'dayahead-optim': + elif args.action == "dayahead-optim": opt_res = dayahead_forecast_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'naive-mpc-optim': + elif args.action == "naive-mpc-optim": opt_res = naive_mpc_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'forecast-model-fit': - df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) + elif args.action == "forecast-model-fit": + df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit( + input_data_dict, logger, debug=args.debug + ) opt_res = None - elif args.action == 'forecast-model-predict': + elif args.action == "forecast-model-predict": if args.debug: _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) else: mlf = None - df_pred = forecast_model_predict(input_data_dict, logger, debug=args.debug, mlf=mlf) + df_pred = forecast_model_predict( + input_data_dict, logger, debug=args.debug, mlf=mlf + ) opt_res = None - elif args.action == 'forecast-model-tune': + elif args.action == "forecast-model-tune": if args.debug: _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) else: mlf = None - df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=args.debug, mlf=mlf) + df_pred_optim, mlf = forecast_model_tune( + input_data_dict, logger, debug=args.debug, mlf=mlf + ) opt_res = None - elif args.action == 'publish-data': + elif args.action == "publish-data": opt_res = publish_data(input_data_dict, logger) else: logger.error("The passed action argument is not valid") @@ -819,17 +1010,22 @@ def main(): # Flush the logger ch.close() logger.removeHandler(ch) - if args.action == 'perfect-optim' or args.action == 'dayahead-optim' or \ - args.action == 'naive-mpc-optim' or args.action == 'publish-data': + if ( + args.action == "perfect-optim" + or args.action == "dayahead-optim" + or args.action == "naive-mpc-optim" + or args.action == "publish-data" + ): return opt_res - elif args.action == 'forecast-model-fit': + elif args.action == "forecast-model-fit": return df_fit_pred, df_fit_pred_backtest, mlf - elif args.action == 'forecast-model-predict': + elif args.action == "forecast-model-predict": return df_pred - elif args.action == 'forecast-model-tune': + elif args.action == "forecast-model-tune": return df_pred_optim, mlf else: return opt_res -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index d70df3ec..80ddd74f 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -9,8 +9,12 @@ import pandas as pd import numpy as np -from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor -from sklearn.metrics import r2_score +from sklearn.ensemble import ( + AdaBoostRegressor, + GradientBoostingRegressor, + RandomForestRegressor, +) +from sklearn.metrics import r2_score from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import GridSearchCV, train_test_split @@ -20,21 +24,31 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) + class MLRegressor: r""" A forecaster class using machine learning models. - + This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. - + It exposes two main methods: - + - `fit`: to train a model with the passed data. - + - `predict`: to obtain a forecast from a pre-trained model. - + """ - def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str, - logger: logging.Logger) -> None: + + def __init__( + self, + data, + model_type: str, + sklearn_model: str, + features: list, + target: str, + timestamp: str, + logger: logging.Logger, + ) -> None: r"""Define constructor for the forecast class. :param data: The data that will be used for train/test @@ -42,33 +56,35 @@ def __init__(self, data, model_type: str, sklearn_model: str, independent_variab :param model_type: A unique name defining this model and useful to identify \ for what it will be used for. :type model_type: str - :param independent_variables: A list of independent variables. \ + :param features: A list of features. \ Example: [`solar`, `degree_days`]. - :type independent_variables: list - :param dependent_variable: The dependent variable(to be predicted). \ + :type features: list + :param target: The target(to be predicted). \ Example: `hours`. - :type dependent_variable: str + :type target: str :param timestamp: If defined, the column key that has to be used of timestamp. :type timestamp: str :param logger: The passed logger object :type logger: logging.Logger """ self.data = data - self.independent_variables = independent_variables - self.dependent_variable = dependent_variable + self.features = features + self.target = target self.timestamp = timestamp self.model_type = model_type self.sklearn_model = sklearn_model self.logger = logger self.data.sort_index(inplace=True) - self.data = self.data[~self.data.index.duplicated(keep='first')] + self.data = self.data[~self.data.index.duplicated(keep="first")] self.data_exo = None self.steps = None self.model = None - self.grid_search =None - + self.grid_search = None + @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: + def add_date_features( + data: pd.DataFrame, date_features: list, timestamp: str + ) -> pd.DataFrame: """Add date features from the input DataFrame timestamp :param data: The input DataFrame @@ -79,179 +95,162 @@ def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) - :rtype: pd.DataFrame """ df = copy.deepcopy(data) - df[timestamp]= pd.to_datetime(df['timestamp']) - if 'year' in date_features: - df['year'] = [i.year for i in df['timestamp']] - if 'month' in date_features: - df['month'] = [i.month for i in df['timestamp']] - if 'day_of_week' in date_features: - df['day_of_week'] = [i.dayofweek for i in df['timestamp']] - if 'day_of_year' in date_features: - df['day_of_year'] = [i.dayofyear for i in df['timestamp']] - if 'day' in date_features: - df['day'] = [i.day for i in df['timestamp']] - if 'hour' in date_features: - df['hour'] = [i.day for i in df['timestamp']] + df[timestamp] = pd.to_datetime(df["timestamp"]) + if "year" in date_features: + df["year"] = [i.year for i in df["timestamp"]] + if "month" in date_features: + df["month"] = [i.month for i in df["timestamp"]] + if "day_of_week" in date_features: + df["day_of_week"] = [i.dayofweek for i in df["timestamp"]] + if "day_of_year" in date_features: + df["day_of_year"] = [i.dayofyear for i in df["timestamp"]] + if "day" in date_features: + df["day"] = [i.day for i in df["timestamp"]] + if "hour" in date_features: + df["hour"] = [i.day for i in df["timestamp"]] return df def fit(self, date_features: Optional[list] = []) -> None: """ Fit the model using the provided data. - + :param date_features: A list of 'date_features' to take into account when fitting the model. :type data: list """ - self.logger.info("Performing a csv model fit for "+self.model_type) + self.logger.info("Performing a csv model fit for " + self.model_type) self.data_exo = pd.DataFrame(self.data) - self.data_exo[self.independent_variables] = self.data[self.independent_variables] - self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] + self.data_exo[self.features] = self.data[self.features] + self.data_exo[self.target] = self.data[self.target] keep_columns = [] - keep_columns.extend(self.independent_variables) + keep_columns.extend(self.features) if self.timestamp is not None: keep_columns.append(self.timestamp) - keep_columns.append(self.dependent_variable) + keep_columns.append(self.target) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] self.data_exo.reset_index(drop=True, inplace=True) if len(date_features) > 0: if self.timestamp is not None: - self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp) + self.data_exo = MLRegressor.add_date_features( + self.data_exo, date_features, self.timestamp + ) else: - self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") + self.logger.error( + "If no timestamp provided, you can't use date_features, going further without date_features." + ) - y = self.data_exo[self.dependent_variable] - self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1) + y = self.data_exo[self.target] + self.data_exo = self.data_exo.drop(self.target, axis=1) if self.timestamp is not None: - self.data_exo = self.data_exo.drop(self.timestamp,axis=1) + self.data_exo = self.data_exo.drop(self.timestamp, axis=1) X = self.data_exo - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 + ) self.steps = len(X_test) regression_methods = { - 'LinearRegression': {"model": LinearRegression(), "param_grid": { - 'linearregression__fit_intercept': [True, False], - 'linearregression__positive': [True, False], - }}, - 'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}}, - 'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}}, - 'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}}, - 'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": { - 'gradientboostingregressor__n_estimators': [50, 100, 200], - 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - }}, - 'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": { - 'adaboostregressor__n_estimators': [50, 100, 200], - 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - }} + "LinearRegression": { + "model": LinearRegression(), + "param_grid": { + "linearregression__fit_intercept": [True, False], + "linearregression__positive": [True, False], + }, + }, + "RidgeRegression": { + "model": Ridge(), + "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, + }, + "LassoRegression": { + "model": Lasso(), + "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, + }, + "RandomForestRegression": { + "model": RandomForestRegressor(), + "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, + }, + "GradientBoostingRegression": { + "model": GradientBoostingRegressor(), + "param_grid": { + "gradientboostingregressor__n_estimators": [50, 100, 200], + "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + "AdaBoostRegression": { + "model": AdaBoostRegressor(), + "param_grid": { + "adaboostregressor__n_estimators": [50, 100, 200], + "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, } - # regression_methods = [ - # ('LinearRegression', LinearRegression(), { - # 'linearregression__fit_intercept': [True, False], - # 'linearregression__positive': [True, False], - # }), - # ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), - # ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), - # ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), - # ('GradientBoostingRegression', GradientBoostingRegressor(), { - # 'gradientboostingregressor__n_estimators': [50, 100, 200], - # 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - # }), - # ('AdaBoostRegression', AdaBoostRegressor(), { - # 'adaboostregressor__n_estimators': [50, 100, 200], - # 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - # }) - # ] - - if self.sklearn_model == 'LinearRegression': - base_model = regression_methods['LinearRegression']['model'] - param_grid = regression_methods['LinearRegression']['param_grid'] - elif self.sklearn_model == 'RidgeRegression': - base_model = regression_methods['RidgeRegression']['model'] - param_grid = regression_methods['RidgeRegression']['param_grid'] - elif self.sklearn_model == 'LassoRegression': - base_model = regression_methods['LassoRegression']['model'] - param_grid = regression_methods['LassoRegression']['param_grid'] - elif self.sklearn_model == 'RandomForestRegression': - base_model = regression_methods['RandomForestRegression']['model'] - param_grid = regression_methods['RandomForestRegression']['param_grid'] - elif self.sklearn_model == 'GradientBoostingRegression': - base_model = regression_methods['GradientBoostingRegression']['model'] - param_grid = regression_methods['GradientBoostingRegression']['param_grid'] - elif self.sklearn_model == 'AdaBoostRegression': - base_model = regression_methods['AdaBoostRegression']['model'] - param_grid = regression_methods['AdaBoostRegression']['param_grid'] + + if self.sklearn_model == "LinearRegression": + base_model = regression_methods["LinearRegression"]["model"] + param_grid = regression_methods["LinearRegression"]["param_grid"] + elif self.sklearn_model == "RidgeRegression": + base_model = regression_methods["RidgeRegression"]["model"] + param_grid = regression_methods["RidgeRegression"]["param_grid"] + elif self.sklearn_model == "LassoRegression": + base_model = regression_methods["LassoRegression"]["model"] + param_grid = regression_methods["LassoRegression"]["param_grid"] + elif self.sklearn_model == "RandomForestRegression": + base_model = regression_methods["RandomForestRegression"]["model"] + param_grid = regression_methods["RandomForestRegression"]["param_grid"] + elif self.sklearn_model == "GradientBoostingRegression": + base_model = regression_methods["GradientBoostingRegression"]["model"] + param_grid = regression_methods["GradientBoostingRegression"]["param_grid"] + elif self.sklearn_model == "AdaBoostRegression": + base_model = regression_methods["AdaBoostRegression"]["model"] + param_grid = regression_methods["AdaBoostRegression"]["param_grid"] else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - - - # Define the models - # for name, model, param_grid in regression_methods: - # self.model = make_pipeline( - # StandardScaler(), - # model - # ) - # # self.model = Pipeline([ - # # ('scaler', StandardScaler()), - # # (name, model) - # # ]) - - # # Use GridSearchCV to find the best hyperparameters for each model - # grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # grid_search.fit(X_train, y_train) - - # # Get the best model and print its mean squared error on the test set - # best_model = grid_search.best_estimator_ - # print(best_model) - # predictions = best_model.predict(X_test) - # print(predictions) - - self.model = make_pipeline( - StandardScaler(), - base_model - ) - # self.model = Pipeline([ - # ('scaler', StandardScaler()), - # ('regressor', base_model) - # ]) - # Define the parameters to tune - # param_grid = { - # 'regressor__fit_intercept': [True, False], - # 'regressor__positive': [True, False], - # } + self.logger.error( + "Passed sklearn model " + self.sklearn_model + " is not valid" + ) + + self.model = make_pipeline(StandardScaler(), base_model) # Create a grid search object - self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1) - + self.grid_search = GridSearchCV( + self.model, + param_grid, + cv=5, + scoring="neg_mean_squared_error", + refit=True, + verbose=0, + n_jobs=-1, + ) + # Fit the grid search object to the data - self.logger.info("Training a "+self.sklearn_model+" model") + self.logger.info("Training a " + self.sklearn_model + " model") start_time = time.time() self.grid_search.fit(X_train.values, y_train.values) - print("Best value for lambda : ",self.grid_search.best_params_) + print("Best value for lambda : ", self.grid_search.best_params_) print("Best score for cost function: ", self.grid_search.best_score_) self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") self.model = self.grid_search.best_estimator_ - # Make predictions predictions = self.model.predict(X_test.values) predictions = pd.Series(predictions, index=X_test.index) - pred_metric = r2_score(y_test,predictions) - self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") - + pred_metric = r2_score(y_test, predictions) + self.logger.info( + f"Prediction R2 score of fitted model on test data: {pred_metric}" + ) - def predict(self, new_values:list) -> np.ndarray: + def predict(self, new_values: list) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. - :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ + :param new_values: The new values for the features(in the same order as the features list). \ Example: [2.24, 5.68]. :type new_values: list :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - self.logger.info("Performing a prediction for "+self.model_type) + self.logger.info("Performing a prediction for " + self.model_type) new_values = np.array([new_values]) return self.model.predict(new_values) diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py index 52397c87..4fb909b1 100644 --- a/src/emhass/retrieve_hass.py +++ b/src/emhass/retrieve_hass.py @@ -35,7 +35,7 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, get_data_from_file: Optional[bool] = False) -> None: """ Define constructor for RetrieveHass class. - + :param hass_url: The URL of the Home Assistant instance :type hass_url: str :param long_lived_token: The long lived token retrieved from the configuration pane @@ -50,7 +50,7 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, :type emhass_conf: dict :param logger: The passed logger object :type logger: logging object - :param get_data_from_file: Select if data should be retrieved from a + :param get_data_from_file: Select if data should be retrieved from a previously saved pickle useful for testing or directly from connection to hass database :type get_data_from_file: bool, optional @@ -65,9 +65,14 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, self.logger = logger self.get_data_from_file = get_data_from_file - def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: Optional[bool] = False, - significant_changes_only: Optional[bool] = False, - test_url: Optional[str] = 'empty') -> None: + def get_data( + self, + days_list: pd.date_range, + var_list: list, + minimal_response: Optional[bool] = False, + significant_changes_only: Optional[bool] = False, + test_url: Optional[str] = "empty", + ) -> None: r""" Retrieve the actual data from hass. @@ -92,20 +97,36 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O """ self.logger.info("Retrieve hass get data method initiated...") self.df_final = pd.DataFrame() - x = 0 #iterate based on days + x = 0 # iterate based on days # Looping on each day from days list for day in days_list: - + for i, var in enumerate(var_list): - - if test_url == 'empty': - if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API - url = self.hass_url+"/history/period/"+day.isoformat()+"?filter_entity_id="+var - else: # Otherwise the Home Assistant Core API it is - url = self.hass_url+"api/history/period/"+day.isoformat()+"?filter_entity_id="+var - if minimal_response: # A support for minimal response + + if test_url == "empty": + if ( + self.hass_url == "http://supervisor/core/api" + ): # If we are using the supervisor API + url = ( + self.hass_url + + "/history/period/" + + day.isoformat() + + "?filter_entity_id=" + + var + ) + else: # Otherwise the Home Assistant Core API it is + url = ( + self.hass_url + + "api/history/period/" + + day.isoformat() + + "?filter_entity_id=" + + var + ) + if minimal_response: # A support for minimal response url = url + "?minimal_response" - if significant_changes_only: # And for signicant changes only (check the HASS restful API for more info) + if ( + significant_changes_only + ): # And for signicant changes only (check the HASS restful API for more info) url = url + "?significant_changes_only" else: url = test_url @@ -116,21 +137,29 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O try: response = get(url, headers=headers) except Exception: - self.logger.error("Unable to access Home Assistance instance, check URL") - self.logger.error("If using addon, try setting url and token to 'empty'") + self.logger.error( + "Unable to access Home Assistance instance, check URL" + ) + self.logger.error( + "If using addon, try setting url and token to 'empty'" + ) return False else: if response.status_code == 401: - self.logger.error("Unable to access Home Assistance instance, TOKEN/KEY") - self.logger.error("If using addon, try setting url and token to 'empty'") + self.logger.error( + "Unable to access Home Assistance instance, TOKEN/KEY" + ) + self.logger.error( + "If using addon, try setting url and token to 'empty'" + ) return False if response.status_code > 299: return f"Request Get Error: {response.status_code}" - '''import bz2 # Uncomment to save a serialized data for tests + """import bz2 # Uncomment to save a serialized data for tests import _pickle as cPickle with bz2.BZ2File("data/test_response_get_data_get_method.pbz2", "w") as f: - cPickle.dump(response, f)''' - try: # Sometimes when there are connection problems we need to catch empty retrieved json + cPickle.dump(response, f)""" + try: # Sometimes when there are connection problems we need to catch empty retrieved json data = response.json()[0] except IndexError: if x == 0: @@ -141,8 +170,12 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O df_raw = pd.DataFrame.from_dict(data) # self.logger.info(str(df_raw)) if len(df_raw) == 0: - if x == 0: - self.logger.error("The retrieved Dataframe is empty, A sensor:" + var + " may have 0 days of history or passed sensor may not be correct") + if x is 0: + self.logger.error( + "The retrieved Dataframe is empty, A sensor:" + + var + + " may have 0 days of history or passed sensor may not be correct" + ) else: self.logger.error("Retrieved empty Dataframe for day:"+ str(day) +", days_to_retrieve may be larger than the recorded history of sensor:" + var + " (check your recorder settings)") return False @@ -156,10 +189,17 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O format='%Y-%d-%m %H:%M').round(self.freq, ambiguous='infer', nonexistent='shift_forward') df_day = pd.DataFrame(index = ts) # Caution with undefined string data: unknown, unavailable, etc. - df_tp = df_raw.copy()[['state']].replace( - ['unknown', 'unavailable', ''], np.nan).astype(float).rename(columns={'state': var}) + df_tp = ( + df_raw.copy()[["state"]] + .replace(["unknown", "unavailable", ""], np.nan) + .astype(float) + .rename(columns={"state": var}) + ) # Setting index, resampling and concatenation - df_tp.set_index(pd.to_datetime(df_raw['last_changed'], format="ISO8601"), inplace=True) + df_tp.set_index( + pd.to_datetime(df_raw["last_changed"], format="ISO8601"), + inplace=True, + ) df_tp = df_tp.resample(self.freq).mean() df_day = pd.concat([df_day, df_tp], axis=1) self.df_final = pd.concat([self.df_final, df_day], axis=0) @@ -196,18 +236,24 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set """ try: - if load_negative: # Apply the correct sign to load power - self.df_final[var_load+'_positive'] = -self.df_final[var_load] + if load_negative: # Apply the correct sign to load power + self.df_final[var_load + "_positive"] = -self.df_final[var_load] else: - self.df_final[var_load+'_positive'] = self.df_final[var_load] + self.df_final[var_load + "_positive"] = self.df_final[var_load] self.df_final.drop([var_load], inplace=True, axis=1) except KeyError: - self.logger.error("Variable "+var_load+" was not found. This is typically because no data could be retrieved from Home Assistant") + self.logger.error( + "Variable " + + var_load + + " was not found. This is typically because no data could be retrieved from Home Assistant" + ) return False except ValueError: - self.logger.error("sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same") - return False - if set_zero_min: # Apply minimum values + self.logger.error( + "sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same" + ) + return False + if set_zero_min: # Apply minimum values self.df_final.clip(lower=0.0, inplace=True, axis=1) self.df_final.replace(to_replace=0.0, value=np.nan, inplace=True) new_var_replace_zero = [] @@ -215,59 +261,74 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set # Just changing the names of variables to contain the fact that they are considered positive if var_replace_zero is not None: for string in var_replace_zero: - new_string = string.replace(var_load, var_load+'_positive') + new_string = string.replace(var_load, var_load + "_positive") new_var_replace_zero.append(new_string) else: new_var_replace_zero = None if var_interp is not None: for string in var_interp: - new_string = string.replace(var_load, var_load+'_positive') + new_string = string.replace(var_load, var_load + "_positive") new_var_interp.append(new_string) else: new_var_interp = None # Treating NaN replacement: either by zeros or by linear interpolation if new_var_replace_zero is not None: - self.df_final[new_var_replace_zero] = self.df_final[new_var_replace_zero].fillna(0.0) + self.df_final[new_var_replace_zero] = self.df_final[ + new_var_replace_zero + ].fillna(0.0) if new_var_interp is not None: self.df_final[new_var_interp] = self.df_final[new_var_interp].interpolate( - method='linear', axis=0, limit=None) + method="linear", axis=0, limit=None + ) self.df_final[new_var_interp] = self.df_final[new_var_interp].fillna(0.0) # Setting the correct time zone on DF index if self.time_zone is not None: self.df_final.index = self.df_final.index.tz_convert(self.time_zone) # Drop datetimeindex duplicates on final DF - self.df_final = self.df_final[~self.df_final.index.duplicated(keep='first')] + self.df_final = self.df_final[~self.df_final.index.duplicated(keep="first")] return True - + @staticmethod - def get_attr_data_dict(data_df: pd.DataFrame, idx: int, entity_id: str, - unit_of_measurement: str, friendly_name: str, - list_name: str, state: float) -> dict: - list_df = copy.deepcopy(data_df).loc[data_df.index[idx]:].reset_index() - list_df.columns = ['timestamps', entity_id] - ts_list = [str(i) for i in list_df['timestamps'].tolist()] - vals_list = [str(np.round(i,2)) for i in list_df[entity_id].tolist()] + def get_attr_data_dict( + data_df: pd.DataFrame, + idx: int, + entity_id: str, + unit_of_measurement: str, + friendly_name: str, + list_name: str, + state: float, + ) -> dict: + list_df = copy.deepcopy(data_df).loc[data_df.index[idx] :].reset_index() + list_df.columns = ["timestamps", entity_id] + ts_list = [str(i) for i in list_df["timestamps"].tolist()] + vals_list = [str(np.round(i, 2)) for i in list_df[entity_id].tolist()] forecast_list = [] for i, ts in enumerate(ts_list): datum = {} datum["date"] = ts - datum[entity_id.split('sensor.')[1]] = vals_list[i] + datum[entity_id.split("sensor.")[1]] = vals_list[i] forecast_list.append(datum) data = { "state": "{:.2f}".format(state), "attributes": { "unit_of_measurement": unit_of_measurement, "friendly_name": friendly_name, - list_name: forecast_list - } + list_name: forecast_list, + }, } return data - - def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, - unit_of_measurement: str, friendly_name: str, - type_var: str, - from_mlforecaster: Optional[bool]=False, - publish_prefix: Optional[str]="") -> None: + + def post_data( + self, + data_df: pd.DataFrame, + idx: int, + entity_id: str, + unit_of_measurement: str, + friendly_name: str, + type_var: str, + from_mlforecaster: Optional[bool] = False, + publish_prefix: Optional[str] = "", + ) -> None: r""" Post passed data to hass. @@ -290,82 +351,139 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, """ # Add a possible prefix to the entity ID - entity_id = entity_id.replace('sensor.', 'sensor.'+publish_prefix) + entity_id = entity_id.replace("sensor.", "sensor." + publish_prefix) # Set the URL - if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API - url = self.hass_url+"/states/"+entity_id - else: # Otherwise the Home Assistant Core API it is - url = self.hass_url+"api/states/"+entity_id + if ( + self.hass_url == "http://supervisor/core/api" + ): # If we are using the supervisor API + url = self.hass_url + "/states/" + entity_id + else: # Otherwise the Home Assistant Core API it is + url = self.hass_url + "api/states/" + entity_id headers = { "Authorization": "Bearer " + self.long_lived_token, "content-type": "application/json", } # Preparing the data dict to be published - if type_var == 'cost_fun': - state = np.round(data_df.sum()[0],2) - elif type_var == 'unit_load_cost' or type_var == 'unit_prod_price': - state = np.round(data_df.loc[data_df.index[idx]],4) - elif type_var == 'optim_status': + if type_var == "cost_fun": + state = np.round(data_df.sum()[0], 2) + elif type_var == "unit_load_cost" or type_var == "unit_prod_price": + state = np.round(data_df.loc[data_df.index[idx]], 4) + elif type_var == "optim_status": state = data_df.loc[data_df.index[idx]] - elif type_var == 'csv_predictor': + elif type_var == "mlregressor": state = data_df[idx] else: - state = np.round(data_df.loc[data_df.index[idx]],2) - if type_var == 'power': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "forecasts", state) - elif type_var == 'deferrable': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "deferrables_schedule", state) - elif type_var == 'batt': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "battery_scheduled_power", state) - elif type_var == 'SOC': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "battery_scheduled_soc", state) - elif type_var == 'unit_load_cost': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "unit_load_cost_forecasts", state) - elif type_var == 'unit_prod_price': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "unit_prod_price_forecasts", state) - elif type_var == 'mlforecaster': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "scheduled_forecast", state) - elif type_var == 'optim_status': + state = np.round(data_df.loc[data_df.index[idx]], 2) + if type_var == "power": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "forecasts", + state, + ) + elif type_var == "deferrable": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "deferrables_schedule", + state, + ) + elif type_var == "batt": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "battery_scheduled_power", + state, + ) + elif type_var == "SOC": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "battery_scheduled_soc", + state, + ) + elif type_var == "unit_load_cost": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "unit_load_cost_forecasts", + state, + ) + elif type_var == "unit_prod_price": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "unit_prod_price_forecasts", + state, + ) + elif type_var == "mlforecaster": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "scheduled_forecast", + state, + ) + elif type_var == "optim_status": data = { "state": state, "attributes": { "unit_of_measurement": unit_of_measurement, - "friendly_name": friendly_name - } + "friendly_name": friendly_name, + }, } - elif type_var == 'csv_predictor': + elif type_var == "mlregressor": data = { "state": state, "attributes": { "unit_of_measurement": unit_of_measurement, - "friendly_name": friendly_name - } + "friendly_name": friendly_name, + }, } else: data = { "state": "{:.2f}".format(state), "attributes": { "unit_of_measurement": unit_of_measurement, - "friendly_name": friendly_name - } + "friendly_name": friendly_name, + }, } # Actually post the data if self.get_data_from_file: - class response: pass + + class response: + pass + response.status_code = 200 response.ok = True else: response = post(url, headers=headers, data=json.dumps(data)) # Treating the response status and posting them on the logger if response.ok: - self.logger.info("Successfully posted to "+entity_id+" = "+str(state)) + self.logger.info("Successfully posted to " + entity_id + " = " + str(state)) else: - self.logger.info("The status code for received curl command response is: "+str(response.status_code)) + self.logger.info( + "The status code for received curl command response is: " + + str(response.status_code) + ) return response, data diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 38a4e424..6d953ae6 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -2,10 +2,19 @@ # -*- coding: utf-8 -*- from typing import Tuple, Optional -import numpy as np, pandas as pd -import yaml, pytz, logging, pathlib, json, copy from datetime import datetime, timedelta, timezone +import logging +import pathlib +import json +import copy +import numpy as np +import pandas as pd +import yaml +import pytz + + import plotly.express as px + pd.options.plotting.backend = "plotly" from emhass.machine_learning_forecaster import MLForecaster @@ -14,13 +23,13 @@ def get_root(file: str, num_parent: Optional[int] = 3) -> str: """ Get the root absolute path of the working directory. - + :param file: The passed file path with __file__ :return: The root path :param num_parent: The number of parents levels up to desired root folder :type num_parent: int, optional :rtype: str - + """ if num_parent == 3: root = pathlib.Path(file).resolve().parent.parent.parent @@ -36,7 +45,7 @@ def get_logger(fun_name: str, emhass_conf: dict, save_to_file: Optional[bool] = logging_level: Optional[str] = "DEBUG") -> Tuple[logging.Logger, logging.StreamHandler]: """ Create a simple logger object. - + :param fun_name: The Python function object name where the logger will be used :type fun_name: str :param emhass_conf: Dictionary containing the needed emhass paths @@ -45,9 +54,9 @@ def get_logger(fun_name: str, emhass_conf: dict, save_to_file: Optional[bool] = :type save_to_file: bool, optional :return: The logger object and the handler :rtype: object - + """ - # create logger object + # create logger object logger = logging.getLogger(fun_name) logger.propagate = True logger.fileSetting = save_to_file @@ -70,14 +79,18 @@ def get_logger(fun_name: str, emhass_conf: dict, save_to_file: Optional[bool] = else: logger.setLevel(logging.DEBUG) ch.setLevel(logging.DEBUG) - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) ch.setFormatter(formatter) logger.addHandler(ch) return logger, ch -def get_forecast_dates(freq: int, delta_forecast: int, - timedelta_days: Optional[int] = 0) -> pd.core.indexes.datetimes.DatetimeIndex: + +def get_forecast_dates( + freq: int, delta_forecast: int, timedelta_days: Optional[int] = 0 +) -> pd.core.indexes.datetimes.DatetimeIndex: """ Get the date_range list of the needed future dates using the delta_forecast parameter. @@ -89,7 +102,7 @@ def get_forecast_dates(freq: int, delta_forecast: int, :type timedelta_days: Optional[int], optional :return: A list of future forecast dates. :rtype: pd.core.indexes.datetimes.DatetimeIndex - + """ freq = pd.to_timedelta(freq, "minutes") start_forecast = pd.Timestamp(datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0) @@ -99,11 +112,19 @@ def get_forecast_dates(freq: int, delta_forecast: int, freq=freq).round(freq, ambiguous='infer', nonexistent='shift_forward') return forecast_dates -def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict, - set_type: str, logger: logging.Logger) -> Tuple[str, dict]: + +def treat_runtimeparams( + runtimeparams: str, + params: str, + retrieve_hass_conf: dict, + optim_conf: dict, + plant_conf: dict, + set_type: str, + logger: logging.Logger, +) -> Tuple[str, dict]: """ - Treat the passed optimization runtime parameters. - + Treat the passed optimization runtime parameters. + :param runtimeparams: Json string containing the runtime parameters dict. :type runtimeparams: str :param params: Configuration parameters passed from data/options.json @@ -120,115 +141,155 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic :type logger: logging.Logger :return: Returning the params and optimization parameter container. :rtype: Tuple[str, dict] - + """ - if (params != None) and (params != 'null'): + if (params != None) and (params != "null"): params = json.loads(params) else: params = {} # Some default data needed custom_deferrable_forecast_id = [] - for k in range(optim_conf['num_def_loads']): - custom_deferrable_forecast_id.append({ - "entity_id": "sensor.p_deferrable{}".format(k), - "unit_of_measurement": "W", - "friendly_name": "Deferrable Load {}".format(k) - }) - default_passed_dict = {'custom_pv_forecast_id': {"entity_id": "sensor.p_pv_forecast", "unit_of_measurement": "W", "friendly_name": "PV Power Forecast"}, - 'custom_load_forecast_id': {"entity_id": "sensor.p_load_forecast", "unit_of_measurement": "W", "friendly_name": "Load Power Forecast"}, - 'custom_batt_forecast_id': {"entity_id": "sensor.p_batt_forecast", "unit_of_measurement": "W", "friendly_name": "Battery Power Forecast"}, - 'custom_batt_soc_forecast_id': {"entity_id": "sensor.soc_batt_forecast", "unit_of_measurement": "%", "friendly_name": "Battery SOC Forecast"}, - 'custom_grid_forecast_id': {"entity_id": "sensor.p_grid_forecast", "unit_of_measurement": "W", "friendly_name": "Grid Power Forecast"}, - 'custom_cost_fun_id': {"entity_id": "sensor.total_cost_fun_value", "unit_of_measurement": "", "friendly_name": "Total cost function value"}, - 'custom_optim_status_id': {"entity_id": "sensor.optim_status", "unit_of_measurement": "", "friendly_name": "EMHASS optimization status"}, - 'custom_unit_load_cost_id': {"entity_id": "sensor.unit_load_cost", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Load Cost"}, - 'custom_unit_prod_price_id': {"entity_id": "sensor.unit_prod_price", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Prod Price"}, - 'custom_deferrable_forecast_id': custom_deferrable_forecast_id, - 'publish_prefix': ""} - if 'passed_data' in params.keys(): + for k in range(optim_conf["num_def_loads"]): + custom_deferrable_forecast_id.append( + { + "entity_id": "sensor.p_deferrable{}".format(k), + "unit_of_measurement": "W", + "friendly_name": "Deferrable Load {}".format(k), + } + ) + default_passed_dict = { + "custom_pv_forecast_id": { + "entity_id": "sensor.p_pv_forecast", + "unit_of_measurement": "W", + "friendly_name": "PV Power Forecast", + }, + "custom_load_forecast_id": { + "entity_id": "sensor.p_load_forecast", + "unit_of_measurement": "W", + "friendly_name": "Load Power Forecast", + }, + "custom_batt_forecast_id": { + "entity_id": "sensor.p_batt_forecast", + "unit_of_measurement": "W", + "friendly_name": "Battery Power Forecast", + }, + "custom_batt_soc_forecast_id": { + "entity_id": "sensor.soc_batt_forecast", + "unit_of_measurement": "%", + "friendly_name": "Battery SOC Forecast", + }, + "custom_grid_forecast_id": { + "entity_id": "sensor.p_grid_forecast", + "unit_of_measurement": "W", + "friendly_name": "Grid Power Forecast", + }, + "custom_cost_fun_id": { + "entity_id": "sensor.total_cost_fun_value", + "unit_of_measurement": "", + "friendly_name": "Total cost function value", + }, + "custom_optim_status_id": { + "entity_id": "sensor.optim_status", + "unit_of_measurement": "", + "friendly_name": "EMHASS optimization status", + }, + "custom_unit_load_cost_id": { + "entity_id": "sensor.unit_load_cost", + "unit_of_measurement": "€/kWh", + "friendly_name": "Unit Load Cost", + }, + "custom_unit_prod_price_id": { + "entity_id": "sensor.unit_prod_price", + "unit_of_measurement": "€/kWh", + "friendly_name": "Unit Prod Price", + }, + "custom_deferrable_forecast_id": custom_deferrable_forecast_id, + "publish_prefix": "", + } + if "passed_data" in params.keys(): for key, value in default_passed_dict.items(): - params['passed_data'][key] = value + params["passed_data"][key] = value else: - params['passed_data'] = default_passed_dict + params["passed_data"] = default_passed_dict if runtimeparams is not None: runtimeparams = json.loads(runtimeparams) - freq = int(retrieve_hass_conf['freq'].seconds/60.0) - delta_forecast = int(optim_conf['delta_forecast'].days) + freq = int(retrieve_hass_conf["freq"].seconds / 60.0) + delta_forecast = int(optim_conf["delta_forecast"].days) forecast_dates = get_forecast_dates(freq, delta_forecast) if set_type == "regressor-model-fit": - csv_file = runtimeparams['csv_file'] - independent_variables = runtimeparams['independent_variables'] - dependent_variable = runtimeparams['dependent_variable'] - params['passed_data']['csv_file'] = csv_file - params['passed_data']['independent_variables'] = independent_variables - params['passed_data']['dependent_variable'] = dependent_variable - if 'timestamp' not in runtimeparams.keys(): - params['passed_data']['timestamp'] = None + csv_file = runtimeparams["csv_file"] + features = runtimeparams["features"] + target = runtimeparams["target"] + params["passed_data"]["csv_file"] = csv_file + params["passed_data"]["features"] = features + params["passed_data"]["target"] = target + if "timestamp" not in runtimeparams.keys(): + params["passed_data"]["timestamp"] = None else: - timestamp = runtimeparams['timestamp'] - params['passed_data']['timestamp'] = timestamp - if 'date_features' not in runtimeparams.keys(): - params['passed_data']['date_features'] = [] + timestamp = runtimeparams["timestamp"] + params["passed_data"]["timestamp"] = timestamp + if "date_features" not in runtimeparams.keys(): + params["passed_data"]["date_features"] = [] else: - date_features = runtimeparams['date_features'] - params['passed_data']['date_features'] = date_features - + date_features = runtimeparams["date_features"] + params["passed_data"]["date_features"] = date_features + if set_type == "regressor-model-predict": - new_values = runtimeparams['new_values'] - params['passed_data']['new_values'] = new_values + new_values = runtimeparams["new_values"] + params["passed_data"]["new_values"] = new_values # Treating special data passed for MPC control case - if set_type == 'naive-mpc-optim': - if 'prediction_horizon' not in runtimeparams.keys(): - prediction_horizon = 10 # 10 time steps by default + if set_type == "naive-mpc-optim": + if "prediction_horizon" not in runtimeparams.keys(): + prediction_horizon = 10 # 10 time steps by default else: - prediction_horizon = runtimeparams['prediction_horizon'] - params['passed_data']['prediction_horizon'] = prediction_horizon - if 'soc_init' not in runtimeparams.keys(): - soc_init = plant_conf['SOCtarget'] + prediction_horizon = runtimeparams["prediction_horizon"] + params["passed_data"]["prediction_horizon"] = prediction_horizon + if "soc_init" not in runtimeparams.keys(): + soc_init = plant_conf["SOCtarget"] else: - soc_init = runtimeparams['soc_init'] - params['passed_data']['soc_init'] = soc_init - if 'soc_final' not in runtimeparams.keys(): - soc_final = plant_conf['SOCtarget'] + soc_init = runtimeparams["soc_init"] + params["passed_data"]["soc_init"] = soc_init + if "soc_final" not in runtimeparams.keys(): + soc_final = plant_conf["SOCtarget"] else: - soc_final = runtimeparams['soc_final'] - params['passed_data']['soc_final'] = soc_final - if 'def_total_hours' not in runtimeparams.keys(): - def_total_hours = optim_conf['def_total_hours'] + soc_final = runtimeparams["soc_final"] + params["passed_data"]["soc_final"] = soc_final + if "def_total_hours" not in runtimeparams.keys(): + def_total_hours = optim_conf["def_total_hours"] else: - def_total_hours = runtimeparams['def_total_hours'] - params['passed_data']['def_total_hours'] = def_total_hours - if 'def_start_timestep' not in runtimeparams.keys(): - def_start_timestep = optim_conf['def_start_timestep'] + def_total_hours = runtimeparams["def_total_hours"] + params["passed_data"]["def_total_hours"] = def_total_hours + if "def_start_timestep" not in runtimeparams.keys(): + def_start_timestep = optim_conf["def_start_timestep"] else: - def_start_timestep = runtimeparams['def_start_timestep'] - params['passed_data']['def_start_timestep'] = def_start_timestep - if 'def_end_timestep' not in runtimeparams.keys(): - def_end_timestep = optim_conf['def_end_timestep'] + def_start_timestep = runtimeparams["def_start_timestep"] + params["passed_data"]["def_start_timestep"] = def_start_timestep + if "def_end_timestep" not in runtimeparams.keys(): + def_end_timestep = optim_conf["def_end_timestep"] else: - def_end_timestep = runtimeparams['def_end_timestep'] - params['passed_data']['def_end_timestep'] = def_end_timestep - if 'alpha' not in runtimeparams.keys(): + def_end_timestep = runtimeparams["def_end_timestep"] + params["passed_data"]["def_end_timestep"] = def_end_timestep + if "alpha" not in runtimeparams.keys(): alpha = 0.5 else: - alpha = runtimeparams['alpha'] - params['passed_data']['alpha'] = alpha - if 'beta' not in runtimeparams.keys(): + alpha = runtimeparams["alpha"] + params["passed_data"]["alpha"] = alpha + if "beta" not in runtimeparams.keys(): beta = 0.5 else: - beta = runtimeparams['beta'] - params['passed_data']['beta'] = beta + beta = runtimeparams["beta"] + params["passed_data"]["beta"] = beta forecast_dates = copy.deepcopy(forecast_dates)[0:prediction_horizon] else: - params['passed_data']['prediction_horizon'] = None - params['passed_data']['soc_init'] = None - params['passed_data']['soc_final'] = None - params['passed_data']['def_total_hours'] = None - params['passed_data']['def_start_timestep'] = None - params['passed_data']['def_end_timestep'] = None - params['passed_data']['alpha'] = None - params['passed_data']['beta'] = None + params["passed_data"]["prediction_horizon"] = None + params["passed_data"]["soc_init"] = None + params["passed_data"]["soc_final"] = None + params["passed_data"]["def_total_hours"] = None + params["passed_data"]["def_start_timestep"] = None + params["passed_data"]["def_end_timestep"] = None + params["passed_data"]["alpha"] = None + params["passed_data"]["beta"] = None # Treat passed forecast data lists list_forecast_key = ['pv_power_forecast', 'load_power_forecast', 'load_cost_forecast', 'prod_price_forecast'] forecast_methods = ['weather_forecast_method', 'load_forecast_method', 'load_cost_forecast_method', 'prod_price_forecast_method'] @@ -248,134 +309,177 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic else: params['passed_data'][forecast_key] = None # Treat passed data for forecast model fit/predict/tune at runtime - if 'days_to_retrieve' not in runtimeparams.keys(): + if "days_to_retrieve" not in runtimeparams.keys(): days_to_retrieve = 9 else: - days_to_retrieve = runtimeparams['days_to_retrieve'] - params['passed_data']['days_to_retrieve'] = days_to_retrieve - if 'model_type' not in runtimeparams.keys(): + days_to_retrieve = runtimeparams["days_to_retrieve"] + params["passed_data"]["days_to_retrieve"] = days_to_retrieve + if "model_type" not in runtimeparams.keys(): model_type = "load_forecast" else: - model_type = runtimeparams['model_type'] - params['passed_data']['model_type'] = model_type - if 'var_model' not in runtimeparams.keys(): + model_type = runtimeparams["model_type"] + params["passed_data"]["model_type"] = model_type + if "var_model" not in runtimeparams.keys(): var_model = "sensor.power_load_no_var_loads" else: - var_model = runtimeparams['var_model'] - params['passed_data']['var_model'] = var_model - if 'sklearn_model' not in runtimeparams.keys(): + var_model = runtimeparams["var_model"] + params["passed_data"]["var_model"] = var_model + if "sklearn_model" not in runtimeparams.keys(): sklearn_model = "KNeighborsRegressor" else: - sklearn_model = runtimeparams['sklearn_model'] - params['passed_data']['sklearn_model'] = sklearn_model - if 'num_lags' not in runtimeparams.keys(): + sklearn_model = runtimeparams["sklearn_model"] + params["passed_data"]["sklearn_model"] = sklearn_model + if "num_lags" not in runtimeparams.keys(): num_lags = 48 else: - num_lags = runtimeparams['num_lags'] - params['passed_data']['num_lags'] = num_lags - if 'split_date_delta' not in runtimeparams.keys(): - split_date_delta = '48h' + num_lags = runtimeparams["num_lags"] + params["passed_data"]["num_lags"] = num_lags + if "split_date_delta" not in runtimeparams.keys(): + split_date_delta = "48h" else: - split_date_delta = runtimeparams['split_date_delta'] - params['passed_data']['split_date_delta'] = split_date_delta - if 'perform_backtest' not in runtimeparams.keys(): + split_date_delta = runtimeparams["split_date_delta"] + params["passed_data"]["split_date_delta"] = split_date_delta + if "perform_backtest" not in runtimeparams.keys(): perform_backtest = False else: - perform_backtest = eval(str(runtimeparams['perform_backtest']).capitalize()) - params['passed_data']['perform_backtest'] = perform_backtest - if 'model_predict_publish' not in runtimeparams.keys(): + perform_backtest = eval(str(runtimeparams["perform_backtest"]).capitalize()) + params["passed_data"]["perform_backtest"] = perform_backtest + if "model_predict_publish" not in runtimeparams.keys(): model_predict_publish = False else: - model_predict_publish = eval(str(runtimeparams['model_predict_publish']).capitalize()) - params['passed_data']['model_predict_publish'] = model_predict_publish - if 'model_predict_entity_id' not in runtimeparams.keys(): + model_predict_publish = eval( + str(runtimeparams["model_predict_publish"]).capitalize() + ) + params["passed_data"]["model_predict_publish"] = model_predict_publish + if "model_predict_entity_id" not in runtimeparams.keys(): model_predict_entity_id = "sensor.p_load_forecast_custom_model" else: - model_predict_entity_id = runtimeparams['model_predict_entity_id'] - params['passed_data']['model_predict_entity_id'] = model_predict_entity_id - if 'model_predict_unit_of_measurement' not in runtimeparams.keys(): + model_predict_entity_id = runtimeparams["model_predict_entity_id"] + params["passed_data"]["model_predict_entity_id"] = model_predict_entity_id + if "model_predict_unit_of_measurement" not in runtimeparams.keys(): model_predict_unit_of_measurement = "W" else: - model_predict_unit_of_measurement = runtimeparams['model_predict_unit_of_measurement'] - params['passed_data']['model_predict_unit_of_measurement'] = model_predict_unit_of_measurement - if 'model_predict_friendly_name' not in runtimeparams.keys(): + model_predict_unit_of_measurement = runtimeparams[ + "model_predict_unit_of_measurement" + ] + params["passed_data"][ + "model_predict_unit_of_measurement" + ] = model_predict_unit_of_measurement + if "model_predict_friendly_name" not in runtimeparams.keys(): model_predict_friendly_name = "Load Power Forecast custom ML model" else: - model_predict_friendly_name = runtimeparams['model_predict_friendly_name'] - params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name - if 'mlr_predict_entity_id' not in runtimeparams.keys(): + model_predict_friendly_name = runtimeparams["model_predict_friendly_name"] + params["passed_data"][ + "model_predict_friendly_name" + ] = model_predict_friendly_name + if "mlr_predict_entity_id" not in runtimeparams.keys(): mlr_predict_entity_id = "sensor.mlr_predict" else: - mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id'] - params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id - if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys(): + mlr_predict_entity_id = runtimeparams["mlr_predict_entity_id"] + params["passed_data"]["mlr_predict_entity_id"] = mlr_predict_entity_id + if "mlr_predict_unit_of_measurement" not in runtimeparams.keys(): mlr_predict_unit_of_measurement = None else: - mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement'] - params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement - if 'mlr_predict_friendly_name' not in runtimeparams.keys(): + mlr_predict_unit_of_measurement = runtimeparams[ + "mlr_predict_unit_of_measurement" + ] + params["passed_data"][ + "mlr_predict_unit_of_measurement" + ] = mlr_predict_unit_of_measurement + if "mlr_predict_friendly_name" not in runtimeparams.keys(): mlr_predict_friendly_name = "mlr predictor" else: - mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name'] - params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name - # Treat optimization configuration parameters passed at runtime - if 'num_def_loads' in runtimeparams.keys(): - optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] - if 'P_deferrable_nom' in runtimeparams.keys(): - optim_conf['P_deferrable_nom'] = runtimeparams['P_deferrable_nom'] - if 'def_total_hours' in runtimeparams.keys(): - optim_conf['def_total_hours'] = runtimeparams['def_total_hours'] - if 'def_start_timestep' in runtimeparams.keys(): - optim_conf['def_start_timestep'] = runtimeparams['def_start_timestep'] - if 'def_end_timestep' in runtimeparams.keys(): - optim_conf['def_end_timestep'] = runtimeparams['def_end_timestep'] - if 'treat_def_as_semi_cont' in runtimeparams.keys(): - optim_conf['treat_def_as_semi_cont'] = [eval(str(k).capitalize()) for k in runtimeparams['treat_def_as_semi_cont']] - if 'set_def_constant' in runtimeparams.keys(): - optim_conf['set_def_constant'] = [eval(str(k).capitalize()) for k in runtimeparams['set_def_constant']] - if 'solcast_api_key' in runtimeparams.keys(): - retrieve_hass_conf['solcast_api_key'] = runtimeparams['solcast_api_key'] - optim_conf['weather_forecast_method'] = 'solcast' - if 'solcast_rooftop_id' in runtimeparams.keys(): - retrieve_hass_conf['solcast_rooftop_id'] = runtimeparams['solcast_rooftop_id'] - optim_conf['weather_forecast_method'] = 'solcast' - if 'solar_forecast_kwp' in runtimeparams.keys(): - retrieve_hass_conf['solar_forecast_kwp'] = runtimeparams['solar_forecast_kwp'] - optim_conf['weather_forecast_method'] = 'solar.forecast' - if 'weight_battery_discharge' in runtimeparams.keys(): - optim_conf['weight_battery_discharge'] = runtimeparams['weight_battery_discharge'] - if 'weight_battery_charge' in runtimeparams.keys(): - optim_conf['weight_battery_charge'] = runtimeparams['weight_battery_charge'] + mlr_predict_friendly_name = runtimeparams["mlr_predict_friendly_name"] + params["passed_data"]["mlr_predict_friendly_name"] = mlr_predict_friendly_name + # Treat optimization configuration parameters passed at runtime + if "num_def_loads" in runtimeparams.keys(): + optim_conf["num_def_loads"] = runtimeparams["num_def_loads"] + if "P_deferrable_nom" in runtimeparams.keys(): + optim_conf["P_deferrable_nom"] = runtimeparams["P_deferrable_nom"] + if "def_total_hours" in runtimeparams.keys(): + optim_conf["def_total_hours"] = runtimeparams["def_total_hours"] + if "def_start_timestep" in runtimeparams.keys(): + optim_conf["def_start_timestep"] = runtimeparams["def_start_timestep"] + if "def_end_timestep" in runtimeparams.keys(): + optim_conf["def_end_timestep"] = runtimeparams["def_end_timestep"] + if "treat_def_as_semi_cont" in runtimeparams.keys(): + optim_conf["treat_def_as_semi_cont"] = [ + eval(str(k).capitalize()) + for k in runtimeparams["treat_def_as_semi_cont"] + ] + if "set_def_constant" in runtimeparams.keys(): + optim_conf["set_def_constant"] = [ + eval(str(k).capitalize()) for k in runtimeparams["set_def_constant"] + ] + if "solcast_api_key" in runtimeparams.keys(): + retrieve_hass_conf["solcast_api_key"] = runtimeparams["solcast_api_key"] + optim_conf["weather_forecast_method"] = "solcast" + if "solcast_rooftop_id" in runtimeparams.keys(): + retrieve_hass_conf["solcast_rooftop_id"] = runtimeparams[ + "solcast_rooftop_id" + ] + optim_conf["weather_forecast_method"] = "solcast" + if "solar_forecast_kwp" in runtimeparams.keys(): + retrieve_hass_conf["solar_forecast_kwp"] = runtimeparams[ + "solar_forecast_kwp" + ] + optim_conf["weather_forecast_method"] = "solar.forecast" + if "weight_battery_discharge" in runtimeparams.keys(): + optim_conf["weight_battery_discharge"] = runtimeparams[ + "weight_battery_discharge" + ] + if "weight_battery_charge" in runtimeparams.keys(): + optim_conf["weight_battery_charge"] = runtimeparams["weight_battery_charge"] # Treat plant configuration parameters passed at runtime - if 'SOCtarget' in runtimeparams.keys(): - plant_conf['SOCtarget'] = runtimeparams['SOCtarget'] + if "SOCtarget" in runtimeparams.keys(): + plant_conf["SOCtarget"] = runtimeparams["SOCtarget"] # Treat custom entities id's and friendly names for variables - if 'custom_pv_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_pv_forecast_id'] = runtimeparams['custom_pv_forecast_id'] - if 'custom_load_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_load_forecast_id'] = runtimeparams['custom_load_forecast_id'] - if 'custom_batt_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_batt_forecast_id'] = runtimeparams['custom_batt_forecast_id'] - if 'custom_batt_soc_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_batt_soc_forecast_id'] = runtimeparams['custom_batt_soc_forecast_id'] - if 'custom_grid_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_grid_forecast_id'] = runtimeparams['custom_grid_forecast_id'] - if 'custom_cost_fun_id' in runtimeparams.keys(): - params['passed_data']['custom_cost_fun_id'] = runtimeparams['custom_cost_fun_id'] - if 'custom_optim_status_id' in runtimeparams.keys(): - params['passed_data']['custom_optim_status_id'] = runtimeparams['custom_optim_status_id'] - if 'custom_unit_load_cost_id' in runtimeparams.keys(): - params['passed_data']['custom_unit_load_cost_id'] = runtimeparams['custom_unit_load_cost_id'] - if 'custom_unit_prod_price_id' in runtimeparams.keys(): - params['passed_data']['custom_unit_prod_price_id'] = runtimeparams['custom_unit_prod_price_id'] - if 'custom_deferrable_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_deferrable_forecast_id'] = runtimeparams['custom_deferrable_forecast_id'] + if "custom_pv_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_pv_forecast_id"] = runtimeparams[ + "custom_pv_forecast_id" + ] + if "custom_load_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_load_forecast_id"] = runtimeparams[ + "custom_load_forecast_id" + ] + if "custom_batt_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_batt_forecast_id"] = runtimeparams[ + "custom_batt_forecast_id" + ] + if "custom_batt_soc_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_batt_soc_forecast_id"] = runtimeparams[ + "custom_batt_soc_forecast_id" + ] + if "custom_grid_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_grid_forecast_id"] = runtimeparams[ + "custom_grid_forecast_id" + ] + if "custom_cost_fun_id" in runtimeparams.keys(): + params["passed_data"]["custom_cost_fun_id"] = runtimeparams[ + "custom_cost_fun_id" + ] + if "custom_optim_status_id" in runtimeparams.keys(): + params["passed_data"]["custom_optim_status_id"] = runtimeparams[ + "custom_optim_status_id" + ] + if "custom_unit_load_cost_id" in runtimeparams.keys(): + params["passed_data"]["custom_unit_load_cost_id"] = runtimeparams[ + "custom_unit_load_cost_id" + ] + if "custom_unit_prod_price_id" in runtimeparams.keys(): + params["passed_data"]["custom_unit_prod_price_id"] = runtimeparams[ + "custom_unit_prod_price_id" + ] + if "custom_deferrable_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_deferrable_forecast_id"] = runtimeparams[ + "custom_deferrable_forecast_id" + ] # A condition to put a prefix on all published data - if 'publish_prefix' not in runtimeparams.keys(): + if "publish_prefix" not in runtimeparams.keys(): publish_prefix = "" else: - publish_prefix = runtimeparams['publish_prefix'] - params['passed_data']['publish_prefix'] = publish_prefix + publish_prefix = runtimeparams["publish_prefix"] + params["passed_data"]["publish_prefix"] = publish_prefix # Serialize the final params params = json.dumps(params) return params, retrieve_hass_conf, optim_conf, plant_conf @@ -406,40 +510,45 @@ def get_yaml_parse(emhass_conf: dict, use_secrets: Optional[bool] = True, with open(emhass_conf["root_path"] / 'secrets_emhass.yaml', 'r') as file: #assume secrets file is in root path input_secrets = yaml.load(file, Loader=yaml.FullLoader) else: - input_secrets = input_conf.pop('params_secrets', None) - - if (type(input_conf['retrieve_hass_conf']) == list): #if using old config version - retrieve_hass_conf = dict({key:d[key] for d in input_conf['retrieve_hass_conf'] for key in d}) + input_secrets = input_conf.pop("params_secrets", None) + + if type(input_conf["retrieve_hass_conf"]) == list: # if using old config version + retrieve_hass_conf = dict( + {key: d[key] for d in input_conf["retrieve_hass_conf"] for key in d} + ) else: - retrieve_hass_conf = input_conf.get('retrieve_hass_conf', {}) - + retrieve_hass_conf = input_conf.get("retrieve_hass_conf", {}) + if use_secrets: retrieve_hass_conf.update(input_secrets) else: - retrieve_hass_conf['hass_url'] = 'http://supervisor/core/api' - retrieve_hass_conf['long_lived_token'] = '${SUPERVISOR_TOKEN}' - retrieve_hass_conf['time_zone'] = 'Europe/Paris' - retrieve_hass_conf['lat'] = 45.83 - retrieve_hass_conf['lon'] = 6.86 - retrieve_hass_conf['alt'] = 4807.8 - retrieve_hass_conf['freq'] = pd.to_timedelta(retrieve_hass_conf['freq'], "minutes") - retrieve_hass_conf['time_zone'] = pytz.timezone(retrieve_hass_conf['time_zone']) - - if (type(input_conf['optim_conf']) == list): - optim_conf = dict({key:d[key] for d in input_conf['optim_conf'] for key in d}) + retrieve_hass_conf["hass_url"] = "http://supervisor/core/api" + retrieve_hass_conf["long_lived_token"] = "${SUPERVISOR_TOKEN}" + retrieve_hass_conf["time_zone"] = "Europe/Paris" + retrieve_hass_conf["lat"] = 45.83 + retrieve_hass_conf["lon"] = 6.86 + retrieve_hass_conf["alt"] = 4807.8 + retrieve_hass_conf["freq"] = pd.to_timedelta(retrieve_hass_conf["freq"], "minutes") + retrieve_hass_conf["time_zone"] = pytz.timezone(retrieve_hass_conf["time_zone"]) + + if type(input_conf["optim_conf"]) == list: + optim_conf = dict({key: d[key] for d in input_conf["optim_conf"] for key in d}) else: - optim_conf = input_conf.get('optim_conf', {}) + optim_conf = input_conf.get("optim_conf", {}) - optim_conf['list_hp_periods'] = dict((key,d[key]) for d in optim_conf['list_hp_periods'] for key in d) - optim_conf['delta_forecast'] = pd.Timedelta(days=optim_conf['delta_forecast']) - - if (type(input_conf['plant_conf']) == list): - plant_conf = dict({key:d[key] for d in input_conf['plant_conf'] for key in d}) + optim_conf["list_hp_periods"] = dict( + (key, d[key]) for d in optim_conf["list_hp_periods"] for key in d + ) + optim_conf["delta_forecast"] = pd.Timedelta(days=optim_conf["delta_forecast"]) + + if type(input_conf["plant_conf"]) == list: + plant_conf = dict({key: d[key] for d in input_conf["plant_conf"] for key in d}) else: - plant_conf = input_conf.get('plant_conf', {}) - + plant_conf = input_conf.get("plant_conf", {}) + return retrieve_hass_conf, optim_conf, plant_conf + def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dict: """ Build a dictionary with graphs and tables for the webui. @@ -450,61 +559,86 @@ def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dic :type plot_size: Optional[int], optional :return: A dictionary containing the graphs and tables in html format :rtype: dict - + """ - cols_p = [i for i in df.columns.to_list() if 'P_' in i] + cols_p = [i for i in df.columns.to_list() if "P_" in i] # Let's round the data in the DF - optim_status = df['optim_status'].unique().item() - df.drop('optim_status', axis=1, inplace=True) - cols_else = [i for i in df.columns.to_list() if 'P_' not in i] + optim_status = df["optim_status"].unique().item() + df.drop("optim_status", axis=1, inplace=True) + cols_else = [i for i in df.columns.to_list() if "P_" not in i] df = df.apply(pd.to_numeric) df[cols_p] = df[cols_p].astype(int) df[cols_else] = df[cols_else].round(3) # Create plots n_colors = len(cols_p) - colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)]) - fig_0 = px.line(df[cols_p], title='Systems powers schedule after optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_0.update_layout(xaxis_title='Timestamp', yaxis_title='System powers (W)') - if 'SOC_opt' in df.columns.to_list(): - fig_1 = px.line(df['SOC_opt'], title='Battery state of charge schedule after optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_1.update_layout(xaxis_title='Timestamp', yaxis_title='Battery SOC (%)') - cols_cost = [i for i in df.columns.to_list() if 'cost_' in i or 'unit_' in i] + colors = px.colors.sample_colorscale( + "jet", [n / (n_colors - 1) for n in range(n_colors)] + ) + fig_0 = px.line( + df[cols_p], + title="Systems powers schedule after optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_0.update_layout(xaxis_title="Timestamp", yaxis_title="System powers (W)") + if "SOC_opt" in df.columns.to_list(): + fig_1 = px.line( + df["SOC_opt"], + title="Battery state of charge schedule after optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_1.update_layout(xaxis_title="Timestamp", yaxis_title="Battery SOC (%)") + cols_cost = [i for i in df.columns.to_list() if "cost_" in i or "unit_" in i] n_colors = len(cols_cost) - colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)]) - fig_2 = px.line(df[cols_cost], title='Systems costs obtained from optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_2.update_layout(xaxis_title='Timestamp', yaxis_title='System costs (currency)') + colors = px.colors.sample_colorscale( + "jet", [n / (n_colors - 1) for n in range(n_colors)] + ) + fig_2 = px.line( + df[cols_cost], + title="Systems costs obtained from optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_2.update_layout(xaxis_title="Timestamp", yaxis_title="System costs (currency)") # Get full path to image - image_path_0 = fig_0.to_html(full_html=False, default_width='75%') - if 'SOC_opt' in df.columns.to_list(): - image_path_1 = fig_1.to_html(full_html=False, default_width='75%') - image_path_2 = fig_2.to_html(full_html=False, default_width='75%') + image_path_0 = fig_0.to_html(full_html=False, default_width="75%") + if "SOC_opt" in df.columns.to_list(): + image_path_1 = fig_1.to_html(full_html=False, default_width="75%") + image_path_2 = fig_2.to_html(full_html=False, default_width="75%") # The tables - table1 = df.reset_index().to_html(classes='mystyle', index=False) - cost_cols = [i for i in df.columns if 'cost_' in i] + table1 = df.reset_index().to_html(classes="mystyle", index=False) + cost_cols = [i for i in df.columns if "cost_" in i] table2 = df[cost_cols].reset_index().sum(numeric_only=True) - table2['optim_status'] = optim_status - table2 = table2.to_frame(name='Value').reset_index(names='Variable').to_html(classes='mystyle', index=False) + table2["optim_status"] = optim_status + table2 = ( + table2.to_frame(name="Value") + .reset_index(names="Variable") + .to_html(classes="mystyle", index=False) + ) # The dict of plots injection_dict = {} - injection_dict['title'] = '

EMHASS optimization results

' - injection_dict['subsubtitle0'] = '

Plotting latest optimization results

' - injection_dict['figure_0'] = image_path_0 - if 'SOC_opt' in df.columns.to_list(): - injection_dict['figure_1'] = image_path_1 - injection_dict['figure_2'] = image_path_2 - injection_dict['subsubtitle1'] = '

Last run optimization results table

' - injection_dict['table1'] = table1 - injection_dict['subsubtitle2'] = '

Summary table for latest optimization results

' - injection_dict['table2'] = table2 + injection_dict["title"] = "

EMHASS optimization results

" + injection_dict["subsubtitle0"] = "

Plotting latest optimization results

" + injection_dict["figure_0"] = image_path_0 + if "SOC_opt" in df.columns.to_list(): + injection_dict["figure_1"] = image_path_1 + injection_dict["figure_2"] = image_path_2 + injection_dict["subsubtitle1"] = "

Last run optimization results table

" + injection_dict["table1"] = table1 + injection_dict["subsubtitle2"] = ( + "

Summary table for latest optimization results

" + ) + injection_dict["table2"] = table2 return injection_dict -def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLForecaster) -> dict: + +def get_injection_dict_forecast_model_fit( + df_fit_pred: pd.DataFrame, mlf: MLForecaster +) -> dict: """ Build a dictionary with graphs and tables for the webui for special MLF fit case. @@ -516,19 +650,26 @@ def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLFore :rtype: dict """ fig = df_fit_pred.plot() - fig.layout.template = 'presentation' - fig.update_yaxes(title_text = mlf.model_type) - fig.update_xaxes(title_text = "Time") - image_path_0 = fig.to_html(full_html=False, default_width='75%') + fig.layout.template = "presentation" + fig.update_yaxes(title_text=mlf.model_type) + fig.update_xaxes(title_text="Time") + image_path_0 = fig.to_html(full_html=False, default_width="75%") # The dict of plots injection_dict = {} - injection_dict['title'] = '

Custom machine learning forecast model fit

' - injection_dict['subsubtitle0'] = '

Plotting train/test forecast model results for '+mlf.model_type+'

' - injection_dict['subsubtitle0'] = '

Forecasting variable '+mlf.var_model+'

' - injection_dict['figure_0'] = image_path_0 + injection_dict["title"] = "

Custom machine learning forecast model fit

" + injection_dict["subsubtitle0"] = ( + "

Plotting train/test forecast model results for " + mlf.model_type + "

" + ) + injection_dict["subsubtitle0"] = ( + "

Forecasting variable " + mlf.var_model + "

" + ) + injection_dict["figure_0"] = image_path_0 return injection_dict -def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLForecaster) -> dict: + +def get_injection_dict_forecast_model_tune( + df_pred_optim: pd.DataFrame, mlf: MLForecaster +) -> dict: """ Build a dictionary with graphs and tables for the webui for special MLF tune case. @@ -540,19 +681,32 @@ def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLF :rtype: dict """ fig = df_pred_optim.plot() - fig.layout.template = 'presentation' - fig.update_yaxes(title_text = mlf.model_type) - fig.update_xaxes(title_text = "Time") - image_path_0 = fig.to_html(full_html=False, default_width='75%') + fig.layout.template = "presentation" + fig.update_yaxes(title_text=mlf.model_type) + fig.update_xaxes(title_text="Time") + image_path_0 = fig.to_html(full_html=False, default_width="75%") # The dict of plots injection_dict = {} - injection_dict['title'] = '

Custom machine learning forecast model tune

' - injection_dict['subsubtitle0'] = '

Performed a tuning routine using bayesian optimization for '+mlf.model_type+'

' - injection_dict['subsubtitle0'] = '

Forecasting variable '+mlf.var_model+'

' - injection_dict['figure_0'] = image_path_0 + injection_dict["title"] = "

Custom machine learning forecast model tune

" + injection_dict["subsubtitle0"] = ( + "

Performed a tuning routine using bayesian optimization for " + + mlf.model_type + + "

" + ) + injection_dict["subsubtitle0"] = ( + "

Forecasting variable " + mlf.var_model + "

" + ) + injection_dict["figure_0"] = image_path_0 return injection_dict -def build_params(params: dict, params_secrets: dict, options: dict, addon: int, logger: logging.Logger) -> dict: + +def build_params( + params: dict, + params_secrets: dict, + options: dict, + addon: int, + logger: logging.Logger, +) -> dict: """ Build the main params dictionary from the loaded options.json when using the add-on. @@ -571,45 +725,120 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int, """ if addon == 1: # Updating variables in retrieve_hass_conf - params['retrieve_hass_conf']['freq'] = options.get('optimization_time_step',params['retrieve_hass_conf']['freq']) - params['retrieve_hass_conf']['days_to_retrieve'] = options.get('historic_days_to_retrieve',params['retrieve_hass_conf']['days_to_retrieve']) - params['retrieve_hass_conf']['var_PV'] = options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV']) - params['retrieve_hass_conf']['var_load'] = options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load']) - params['retrieve_hass_conf']['load_negative'] = options.get('load_negative',params['retrieve_hass_conf']['load_negative']) - params['retrieve_hass_conf']['set_zero_min'] = options.get('set_zero_min',params['retrieve_hass_conf']['set_zero_min']) - params['retrieve_hass_conf']['var_replace_zero'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_replace_zero'])] - params['retrieve_hass_conf']['var_interp'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV']), options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load'])] - params['retrieve_hass_conf']['method_ts_round'] = options.get('method_ts_round',params['retrieve_hass_conf']['method_ts_round']) + params["retrieve_hass_conf"]["freq"] = options.get( + "optimization_time_step", params["retrieve_hass_conf"]["freq"] + ) + params["retrieve_hass_conf"]["days_to_retrieve"] = options.get( + "historic_days_to_retrieve", + params["retrieve_hass_conf"]["days_to_retrieve"], + ) + params["retrieve_hass_conf"]["var_PV"] = options.get( + "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"] + ) + params["retrieve_hass_conf"]["var_load"] = options.get( + "sensor_power_load_no_var_loads", params["retrieve_hass_conf"]["var_load"] + ) + params["retrieve_hass_conf"]["load_negative"] = options.get( + "load_negative", params["retrieve_hass_conf"]["load_negative"] + ) + params["retrieve_hass_conf"]["set_zero_min"] = options.get( + "set_zero_min", params["retrieve_hass_conf"]["set_zero_min"] + ) + params["retrieve_hass_conf"]["var_replace_zero"] = [ + options.get( + "sensor_power_photovoltaics", + params["retrieve_hass_conf"]["var_replace_zero"], + ) + ] + params["retrieve_hass_conf"]["var_interp"] = [ + options.get( + "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"] + ), + options.get( + "sensor_power_load_no_var_loads", + params["retrieve_hass_conf"]["var_load"], + ), + ] + params["retrieve_hass_conf"]["method_ts_round"] = options.get( + "method_ts_round", params["retrieve_hass_conf"]["method_ts_round"] + ) # Update params Secrets if specified - params['params_secrets'] = params_secrets - params['params_secrets']['time_zone'] = options.get('time_zone',params_secrets['time_zone']) - params['params_secrets']['lat'] = options.get('Latitude',params_secrets['lat']) - params['params_secrets']['lon'] = options.get('Longitude',params_secrets['lon']) - params['params_secrets']['alt'] = options.get('Altitude',params_secrets['alt']) + params["params_secrets"] = params_secrets + params["params_secrets"]["time_zone"] = options.get( + "time_zone", params_secrets["time_zone"] + ) + params["params_secrets"]["lat"] = options.get("Latitude", params_secrets["lat"]) + params["params_secrets"]["lon"] = options.get( + "Longitude", params_secrets["lon"] + ) + params["params_secrets"]["alt"] = options.get("Altitude", params_secrets["alt"]) # Updating variables in optim_conf - params['optim_conf']['set_use_battery'] = options.get('set_use_battery',params['optim_conf']['set_use_battery']) - params['optim_conf']['num_def_loads'] = options.get('number_of_deferrable_loads',params['optim_conf']['num_def_loads']) - if options.get('list_nominal_power_of_deferrable_loads',None) != None: - params['optim_conf']['P_deferrable_nom'] = [i['nominal_power_of_deferrable_loads'] for i in options.get('list_nominal_power_of_deferrable_loads')] - if options.get('list_operating_hours_of_each_deferrable_load',None) != None: - params['optim_conf']['def_total_hours'] = [i['operating_hours_of_each_deferrable_load'] for i in options.get('list_operating_hours_of_each_deferrable_load')] - if options.get('list_treat_deferrable_load_as_semi_cont',None) != None: - params['optim_conf']['treat_def_as_semi_cont'] = [i['treat_deferrable_load_as_semi_cont'] for i in options.get('list_treat_deferrable_load_as_semi_cont')] - params['optim_conf']['weather_forecast_method'] = options.get('weather_forecast_method',params['optim_conf']['weather_forecast_method']) + params["optim_conf"]["set_use_battery"] = options.get( + "set_use_battery", params["optim_conf"]["set_use_battery"] + ) + params["optim_conf"]["num_def_loads"] = options.get( + "number_of_deferrable_loads", params["optim_conf"]["num_def_loads"] + ) + if options.get("list_nominal_power_of_deferrable_loads", None) != None: + params["optim_conf"]["P_deferrable_nom"] = [ + i["nominal_power_of_deferrable_loads"] + for i in options.get("list_nominal_power_of_deferrable_loads") + ] + if options.get("list_operating_hours_of_each_deferrable_load", None) != None: + params["optim_conf"]["def_total_hours"] = [ + i["operating_hours_of_each_deferrable_load"] + for i in options.get("list_operating_hours_of_each_deferrable_load") + ] + if options.get("list_treat_deferrable_load_as_semi_cont", None) != None: + params["optim_conf"]["treat_def_as_semi_cont"] = [ + i["treat_deferrable_load_as_semi_cont"] + for i in options.get("list_treat_deferrable_load_as_semi_cont") + ] + params["optim_conf"]["weather_forecast_method"] = options.get( + "weather_forecast_method", params["optim_conf"]["weather_forecast_method"] + ) # Update optional param secrets - if params['optim_conf']['weather_forecast_method'] == "solcast": - params['params_secrets']['solcast_api_key'] = options.get('optional_solcast_api_key',params_secrets.get('solcast_api_key',"123456")) - params['params_secrets']['solcast_rooftop_id'] = options.get('optional_solcast_rooftop_id',params_secrets.get('solcast_rooftop_id',"123456")) - elif params['optim_conf']['weather_forecast_method'] == "solar.forecast": - params['params_secrets']['solar_forecast_kwp'] = options.get('optional_solar_forecast_kwp',params_secrets.get('solar_forecast_kwp',5)) - params['optim_conf']['load_forecast_method'] = options.get('load_forecast_method',params['optim_conf']['load_forecast_method']) - params['optim_conf']['delta_forecast'] = options.get('delta_forecast_daily',params['optim_conf']['delta_forecast']) - params['optim_conf']['load_cost_forecast_method'] = options.get('load_cost_forecast_method',params['optim_conf']['load_cost_forecast_method']) - if options.get('list_set_deferrable_load_single_constant',None) != None: - params['optim_conf']['set_def_constant'] = [i['set_deferrable_load_single_constant'] for i in options.get('list_set_deferrable_load_single_constant')] - if options.get('list_peak_hours_periods_start_hours',None) != None and options.get('list_peak_hours_periods_end_hours',None) != None: - start_hours_list = [i['peak_hours_periods_start_hours'] for i in options['list_peak_hours_periods_start_hours']] - end_hours_list = [i['peak_hours_periods_end_hours'] for i in options['list_peak_hours_periods_end_hours']] + if params["optim_conf"]["weather_forecast_method"] == "solcast": + params["params_secrets"]["solcast_api_key"] = options.get( + "optional_solcast_api_key", + params_secrets.get("solcast_api_key", "123456"), + ) + params["params_secrets"]["solcast_rooftop_id"] = options.get( + "optional_solcast_rooftop_id", + params_secrets.get("solcast_rooftop_id", "123456"), + ) + elif params["optim_conf"]["weather_forecast_method"] == "solar.forecast": + params["params_secrets"]["solar_forecast_kwp"] = options.get( + "optional_solar_forecast_kwp", + params_secrets.get("solar_forecast_kwp", 5), + ) + params["optim_conf"]["load_forecast_method"] = options.get( + "load_forecast_method", params["optim_conf"]["load_forecast_method"] + ) + params["optim_conf"]["delta_forecast"] = options.get( + "delta_forecast_daily", params["optim_conf"]["delta_forecast"] + ) + params["optim_conf"]["load_cost_forecast_method"] = options.get( + "load_cost_forecast_method", + params["optim_conf"]["load_cost_forecast_method"], + ) + if options.get("list_set_deferrable_load_single_constant", None) != None: + params["optim_conf"]["set_def_constant"] = [ + i["set_deferrable_load_single_constant"] + for i in options.get("list_set_deferrable_load_single_constant") + ] + if ( + options.get("list_peak_hours_periods_start_hours", None) != None + and options.get("list_peak_hours_periods_end_hours", None) != None + ): + start_hours_list = [ + i["peak_hours_periods_start_hours"] + for i in options["list_peak_hours_periods_start_hours"] + ] + end_hours_list = [ + i["peak_hours_periods_end_hours"] + for i in options["list_peak_hours_periods_end_hours"] + ] num_peak_hours = len(start_hours_list) list_hp_periods_list = [{'period_hp_'+str(i+1):[{'start':start_hours_list[i]},{'end':end_hours_list[i]}]} for i in range(num_peak_hours)] params['optim_conf']['list_hp_periods'] = list_hp_periods_list @@ -681,20 +910,35 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int, for x in range(len(params['optim_conf']['P_deferrable_nom']), params['optim_conf']['num_def_loads']): params['optim_conf']['P_deferrable_nom'].append(0) # days_to_retrieve should be no less then 2 - if params['retrieve_hass_conf']['days_to_retrieve'] < 2: - params['retrieve_hass_conf']['days_to_retrieve'] = 2 - logger.warning("days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history") + if params["retrieve_hass_conf"]["days_to_retrieve"] < 2: + params["retrieve_hass_conf"]["days_to_retrieve"] = 2 + logger.warning( + "days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history" + ) else: - params['params_secrets'] = params_secrets + params["params_secrets"] = params_secrets # The params dict - params['passed_data'] = {'pv_power_forecast':None,'load_power_forecast':None,'load_cost_forecast':None,'prod_price_forecast':None, - 'prediction_horizon':None,'soc_init':None,'soc_final':None,'def_total_hours':None,'def_start_timestep':None,'def_end_timestep':None,'alpha':None,'beta':None} + params["passed_data"] = { + "pv_power_forecast": None, + "load_power_forecast": None, + "load_cost_forecast": None, + "prod_price_forecast": None, + "prediction_horizon": None, + "soc_init": None, + "soc_final": None, + "def_total_hours": None, + "def_start_timestep": None, + "def_end_timestep": None, + "alpha": None, + "beta": None, + } return params + def get_days_list(days_to_retrieve: int) -> pd.date_range: """ Get list of past days from today to days_to_retrieve. - + :param days_to_retrieve: Total number of days to retrieve from the past :type days_to_retrieve: int :return: The list of days @@ -703,19 +947,20 @@ def get_days_list(days_to_retrieve: int) -> pd.date_range: """ today = datetime.now(timezone.utc).replace(minute=0, second=0, microsecond=0) d = (today - timedelta(days=days_to_retrieve)).isoformat() - days_list = pd.date_range(start=d, end=today.isoformat(), freq='D') - + days_list = pd.date_range(start=d, end=today.isoformat(), freq="D") + return days_list + def set_df_index_freq(df: pd.DataFrame) -> pd.DataFrame: """ Set the freq of a DataFrame DateTimeIndex. - + :param df: Input DataFrame :type df: pd.DataFrame :return: Input DataFrame with freq defined :rtype: pd.DataFrame - + """ idx_diff = np.diff(df.index) sampling = pd.to_timedelta(np.median(idx_diff)) From 9ed8798032f6099a74da89e030fd49810eebd0e0 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 14:55:30 +0100 Subject: [PATCH 074/111] sklearn-model -> regression-model --- src/emhass/command_line.py | 4 ++-- src/emhass/utils.py | 5 +++++ src/emhass/web_server.py | 8 ++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 0c094c96..9d25d518 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -609,7 +609,7 @@ def regressor_model_fit( """ data = copy.deepcopy(input_data_dict["df_input_data"]) model_type = input_data_dict["params"]["passed_data"]["model_type"] - sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"] + regression_model = input_data_dict["params"]["passed_data"]["regression_model"] features = input_data_dict["params"]["passed_data"]["features"] target = input_data_dict["params"]["passed_data"]["target"] timestamp = input_data_dict["params"]["passed_data"]["timestamp"] @@ -617,7 +617,7 @@ def regressor_model_fit( root = input_data_dict["root"] # The MLRegressor object mlr = MLRegressor( - data, model_type, sklearn_model, features, target, timestamp, logger + data, model_type, regression_model, features, target, timestamp, logger ) # Fit the ML model mlr.fit(date_features=date_features) diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 6d953ae6..5e40160a 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -329,6 +329,11 @@ def treat_runtimeparams( else: sklearn_model = runtimeparams["sklearn_model"] params["passed_data"]["sklearn_model"] = sklearn_model + if "regression_model" not in runtimeparams.keys(): + regression_model = "LinearRegression" + else: + regression_model = runtimeparams["regression_model"] + params["passed_data"]["regression_model"] = regression_model if "num_lags" not in runtimeparams.keys(): num_lags = 48 else: diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index 50241590..9a100870 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -195,14 +195,14 @@ def action_call(action_name): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) elif action_name == 'regressor-model-fit': - app.logger.info(" >> Performing a regressor fit...") + app.logger.info(" >> Performing a machine learning regressor fit...") regressor_model_fit(input_data_dict, app.logger) - msg = f'EMHASS >> Action regressor-fit executed... \n' + msg = f'EMHASS >> Action regressor-model-fit executed... \n' return make_response(msg, 201) elif action_name == 'regressor-model-predict': - app.logger.info(" >> Performing a regressor predict...") + app.logger.info(" >> Performing a machine learning regressor predict...") regressor_model_predict(input_data_dict, app.logger) - msg = f'EMHASS >> Action regressor-predict executed... \n' + msg = f'EMHASS >> Action regressor-model-predict executed... \n' return make_response(msg, 201) else: app.logger.error("ERROR: passed action is not valid") From 7b5e2ba0675c639f8d9a82f5bd7dad1c6c04e4fd Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 15:18:57 +0100 Subject: [PATCH 075/111] REGRESSION_METHODS const --- src/emhass/machine_learning_regressor.py | 127 ++++++++++++----------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index 80ddd74f..9e7795d0 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -24,6 +24,41 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) +REGRESSION_METHODS = { + "LinearRegression": { + "model": LinearRegression(), + "param_grid": { + "linearregression__fit_intercept": [True, False], + "linearregression__positive": [True, False], + }, + }, + "RidgeRegression": { + "model": Ridge(), + "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, + }, + "LassoRegression": { + "model": Lasso(), + "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, + }, + "RandomForestRegression": { + "model": RandomForestRegressor(), + "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, + }, + "GradientBoostingRegression": { + "model": GradientBoostingRegressor(), + "param_grid": { + "gradientboostingregressor__n_estimators": [50, 100, 200], + "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + "AdaBoostRegression": { + "model": AdaBoostRegressor(), + "param_grid": { + "adaboostregressor__n_estimators": [50, 100, 200], + "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + } class MLRegressor: r""" @@ -43,7 +78,7 @@ def __init__( self, data, model_type: str, - sklearn_model: str, + regression_model: str, features: list, target: str, timestamp: str, @@ -56,11 +91,15 @@ def __init__( :param model_type: A unique name defining this model and useful to identify \ for what it will be used for. :type model_type: str + :param regression_model: The model that will be used. For now only \ + this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \ + `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`. + :type regression_model: str :param features: A list of features. \ - Example: [`solar`, `degree_days`]. + Example: [`solar_production`, `degree_days`]. :type features: list :param target: The target(to be predicted). \ - Example: `hours`. + Example: `heating_hours`. :type target: str :param timestamp: If defined, the column key that has to be used of timestamp. :type timestamp: str @@ -72,7 +111,7 @@ def __init__( self.target = target self.timestamp = timestamp self.model_type = model_type - self.sklearn_model = sklearn_model + self.regression_model = regression_model self.logger = logger self.data.sort_index(inplace=True) self.data = self.data[~self.data.index.duplicated(keep="first")] @@ -111,7 +150,7 @@ def add_date_features( return df - def fit(self, date_features: Optional[list] = []) -> None: + def fit(self, date_features: Optional[list] = None) -> None: """ Fit the model using the provided data. @@ -129,7 +168,7 @@ def fit(self, date_features: Optional[list] = []) -> None: keep_columns.append(self.target) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] self.data_exo.reset_index(drop=True, inplace=True) - if len(date_features) > 0: + if date_features is not None: if self.timestamp is not None: self.data_exo = MLRegressor.add_date_features( self.data_exo, date_features, self.timestamp @@ -150,63 +189,27 @@ def fit(self, date_features: Optional[list] = []) -> None: ) self.steps = len(X_test) - regression_methods = { - "LinearRegression": { - "model": LinearRegression(), - "param_grid": { - "linearregression__fit_intercept": [True, False], - "linearregression__positive": [True, False], - }, - }, - "RidgeRegression": { - "model": Ridge(), - "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, - }, - "LassoRegression": { - "model": Lasso(), - "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, - }, - "RandomForestRegression": { - "model": RandomForestRegressor(), - "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, - }, - "GradientBoostingRegression": { - "model": GradientBoostingRegressor(), - "param_grid": { - "gradientboostingregressor__n_estimators": [50, 100, 200], - "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - "AdaBoostRegression": { - "model": AdaBoostRegressor(), - "param_grid": { - "adaboostregressor__n_estimators": [50, 100, 200], - "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - } - - if self.sklearn_model == "LinearRegression": - base_model = regression_methods["LinearRegression"]["model"] - param_grid = regression_methods["LinearRegression"]["param_grid"] - elif self.sklearn_model == "RidgeRegression": - base_model = regression_methods["RidgeRegression"]["model"] - param_grid = regression_methods["RidgeRegression"]["param_grid"] - elif self.sklearn_model == "LassoRegression": - base_model = regression_methods["LassoRegression"]["model"] - param_grid = regression_methods["LassoRegression"]["param_grid"] - elif self.sklearn_model == "RandomForestRegression": - base_model = regression_methods["RandomForestRegression"]["model"] - param_grid = regression_methods["RandomForestRegression"]["param_grid"] - elif self.sklearn_model == "GradientBoostingRegression": - base_model = regression_methods["GradientBoostingRegression"]["model"] - param_grid = regression_methods["GradientBoostingRegression"]["param_grid"] - elif self.sklearn_model == "AdaBoostRegression": - base_model = regression_methods["AdaBoostRegression"]["model"] - param_grid = regression_methods["AdaBoostRegression"]["param_grid"] + if self.regression_model == "LinearRegression": + base_model = REGRESSION_METHODS["LinearRegression"]["model"] + param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"] + elif self.regression_model == "RidgeRegression": + base_model = REGRESSION_METHODS["RidgeRegression"]["model"] + param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"] + elif self.regression_model == "LassoRegression": + base_model = REGRESSION_METHODS["LassoRegression"]["model"] + param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"] + elif self.regression_model == "RandomForestRegression": + base_model = REGRESSION_METHODS["RandomForestRegression"]["model"] + param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"] + elif self.regression_model == "GradientBoostingRegression": + base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"] + param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"] + elif self.regression_model == "AdaBoostRegression": + base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"] + param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] else: self.logger.error( - "Passed sklearn model " + self.sklearn_model + " is not valid" + "Passed sklearn model " + self.regression_model + " is not valid" ) self.model = make_pipeline(StandardScaler(), base_model) @@ -223,7 +226,7 @@ def fit(self, date_features: Optional[list] = []) -> None: ) # Fit the grid search object to the data - self.logger.info("Training a " + self.sklearn_model + " model") + self.logger.info("Training a " + self.regression_model + " model") start_time = time.time() self.grid_search.fit(X_train.values, y_train.values) print("Best value for lambda : ", self.grid_search.best_params_) From 6bf36a358321512258dd0294706383a59ca3652b Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 11:12:28 +0100 Subject: [PATCH 076/111] Some cleanup --- src/emhass/machine_learning_regressor.py | 220 +++++++++++++---------- 1 file changed, 125 insertions(+), 95 deletions(-) diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index 9e7795d0..95f624b3 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -1,70 +1,72 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- +"""Machine learning regressor module.""" + +from __future__ import annotations import copy -import logging import time -from typing import Optional import warnings +from typing import TYPE_CHECKING -import pandas as pd import numpy as np +import pandas as pd from sklearn.ensemble import ( AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, ) -from sklearn.metrics import r2_score - from sklearn.linear_model import Lasso, LinearRegression, Ridge +from sklearn.metrics import r2_score from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler +if TYPE_CHECKING: + import logging warnings.filterwarnings("ignore", category=DeprecationWarning) REGRESSION_METHODS = { - "LinearRegression": { - "model": LinearRegression(), - "param_grid": { - "linearregression__fit_intercept": [True, False], - "linearregression__positive": [True, False], - }, - }, - "RidgeRegression": { - "model": Ridge(), - "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, - }, - "LassoRegression": { - "model": Lasso(), - "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, - }, - "RandomForestRegression": { - "model": RandomForestRegressor(), - "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, - }, - "GradientBoostingRegression": { - "model": GradientBoostingRegressor(), - "param_grid": { - "gradientboostingregressor__n_estimators": [50, 100, 200], - "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - "AdaBoostRegression": { - "model": AdaBoostRegressor(), - "param_grid": { - "adaboostregressor__n_estimators": [50, 100, 200], - "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], - }, - }, - } + "LinearRegression": { + "model": LinearRegression(), + "param_grid": { + "linearregression__fit_intercept": [True, False], + "linearregression__positive": [True, False], + }, + }, + "RidgeRegression": { + "model": Ridge(), + "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, + }, + "LassoRegression": { + "model": Lasso(), + "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, + }, + "RandomForestRegression": { + "model": RandomForestRegressor(), + "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, + }, + "GradientBoostingRegression": { + "model": GradientBoostingRegressor(), + "param_grid": { + "gradientboostingregressor__n_estimators": [50, 100, 200], + "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + "AdaBoostRegression": { + "model": AdaBoostRegressor(), + "param_grid": { + "adaboostregressor__n_estimators": [50, 100, 200], + "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, +} + class MLRegressor: - r""" - A forecaster class using machine learning models. + r"""A forecaster class using machine learning models. - This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. + This class uses the `sklearn` module and the machine learning models are \ + from `scikit-learn`. It exposes two main methods: @@ -74,9 +76,9 @@ class MLRegressor: """ - def __init__( - self, - data, + def __init__( # noqa: PLR0913 + self: MLRegressor, + data: pd.DataFrame, model_type: str, regression_model: str, features: list, @@ -92,8 +94,9 @@ def __init__( for what it will be used for. :type model_type: str :param regression_model: The model that will be used. For now only \ - this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \ - `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`. + this options are possible: `LinearRegression`, `RidgeRegression`, \ + `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \ + `GradientBoostingRegression` and `AdaBoostRegression`. :type regression_model: str :param features: A list of features. \ Example: [`solar_production`, `degree_days`]. @@ -113,7 +116,7 @@ def __init__( self.model_type = model_type self.regression_model = regression_model self.logger = logger - self.data.sort_index(inplace=True) + self.data = self.data.sort_index() self.data = self.data[~self.data.index.duplicated(keep="first")] self.data_exo = None self.steps = None @@ -122,9 +125,11 @@ def __init__( @staticmethod def add_date_features( - data: pd.DataFrame, date_features: list, timestamp: str + data: pd.DataFrame, + date_features: list, + timestamp: str, ) -> pd.DataFrame: - """Add date features from the input DataFrame timestamp + """Add date features from the input DataFrame timestamp. :param data: The input DataFrame :type data: pd.DataFrame @@ -133,7 +138,7 @@ def add_date_features( :return: The DataFrame with the added features :rtype: pd.DataFrame """ - df = copy.deepcopy(data) + df = copy.deepcopy(data) # noqa: PD901 df[timestamp] = pd.to_datetime(df["timestamp"]) if "year" in date_features: df["year"] = [i.year for i in df["timestamp"]] @@ -150,14 +155,54 @@ def add_date_features( return df - def fit(self, date_features: Optional[list] = None) -> None: + def get_regression_model(self: MLRegressor) -> tuple[str, str]: + """Get the base model and parameter grid for the specified regression model. + + Returns a tuple containing the base model and parameter grid corresponding to \ + the specified regression model. + + Args: + ---- + self: The instance of the MLRegressor class. + + Returns: + ------- + A tuple containing the base model and parameter grid. + """ - Fit the model using the provided data. + if self.regression_model == "LinearRegression": + base_model = REGRESSION_METHODS["LinearRegression"]["model"] + param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"] + elif self.regression_model == "RidgeRegression": + base_model = REGRESSION_METHODS["RidgeRegression"]["model"] + param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"] + elif self.regression_model == "LassoRegression": + base_model = REGRESSION_METHODS["LassoRegression"]["model"] + param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"] + elif self.regression_model == "RandomForestRegression": + base_model = REGRESSION_METHODS["RandomForestRegression"]["model"] + param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"] + elif self.regression_model == "GradientBoostingRegression": + base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"] + param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"] + elif self.regression_model == "AdaBoostRegression": + base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"] + param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] + else: + self.logger.error( + "Passed sklearn model %s is not valid", + self.regression_model, + ) + return base_model, param_grid - :param date_features: A list of 'date_features' to take into account when fitting the model. + def fit(self: MLRegressor, date_features: list | None = None) -> None: + """Fit the model using the provided data. + + :param date_features: A list of 'date_features' to take into account when \ + fitting the model. :type data: list """ - self.logger.info("Performing a csv model fit for " + self.model_type) + self.logger.info("Performing a MLRegressor fit for %s", self.model_type) self.data_exo = pd.DataFrame(self.data) self.data_exo[self.features] = self.data[self.features] self.data_exo[self.target] = self.data[self.target] @@ -167,50 +212,36 @@ def fit(self, date_features: Optional[list] = None) -> None: keep_columns.append(self.timestamp) keep_columns.append(self.target) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] - self.data_exo.reset_index(drop=True, inplace=True) + self.data_exo = self.data_exo.reset_index(drop=True) if date_features is not None: if self.timestamp is not None: self.data_exo = MLRegressor.add_date_features( - self.data_exo, date_features, self.timestamp + self.data_exo, + date_features, + self.timestamp, ) else: self.logger.error( - "If no timestamp provided, you can't use date_features, going further without date_features." + "If no timestamp provided, you can't use date_features, going \ + further without date_features.", ) y = self.data_exo[self.target] self.data_exo = self.data_exo.drop(self.target, axis=1) if self.timestamp is not None: self.data_exo = self.data_exo.drop(self.timestamp, axis=1) - X = self.data_exo + X = self.data_exo # noqa: N806 - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, random_state=42 + X_train, X_test, y_train, y_test = train_test_split( # noqa: N806 + X, + y, + test_size=0.2, + random_state=42, ) + self.steps = len(X_test) - if self.regression_model == "LinearRegression": - base_model = REGRESSION_METHODS["LinearRegression"]["model"] - param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"] - elif self.regression_model == "RidgeRegression": - base_model = REGRESSION_METHODS["RidgeRegression"]["model"] - param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"] - elif self.regression_model == "LassoRegression": - base_model = REGRESSION_METHODS["LassoRegression"]["model"] - param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"] - elif self.regression_model == "RandomForestRegression": - base_model = REGRESSION_METHODS["RandomForestRegression"]["model"] - param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"] - elif self.regression_model == "GradientBoostingRegression": - base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"] - param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"] - elif self.regression_model == "AdaBoostRegression": - base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"] - param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] - else: - self.logger.error( - "Passed sklearn model " + self.regression_model + " is not valid" - ) + base_model, param_grid = self.get_regression_model() self.model = make_pipeline(StandardScaler(), base_model) @@ -226,12 +257,10 @@ def fit(self, date_features: Optional[list] = None) -> None: ) # Fit the grid search object to the data - self.logger.info("Training a " + self.regression_model + " model") + self.logger.info("Training a %s model", self.regression_model) start_time = time.time() self.grid_search.fit(X_train.values, y_train.values) - print("Best value for lambda : ", self.grid_search.best_params_) - print("Best score for cost function: ", self.grid_search.best_score_) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + self.logger.info("Elapsed time for model fit: %s", time.time() - start_time) self.model = self.grid_search.best_estimator_ @@ -240,20 +269,21 @@ def fit(self, date_features: Optional[list] = None) -> None: predictions = pd.Series(predictions, index=X_test.index) pred_metric = r2_score(y_test, predictions) self.logger.info( - f"Prediction R2 score of fitted model on test data: {pred_metric}" + "Prediction R2 score of fitted model on test data: %s", + pred_metric, ) - def predict(self, new_values: list) -> np.ndarray: - r"""The predict method to generate a forecast from a csv file. - + def predict(self: MLRegressor, new_values: list) -> np.ndarray: + """Predict a new value. - :param new_values: The new values for the features(in the same order as the features list). \ + :param new_values: The new values for the features \ + (in the same order as the features list). \ Example: [2.24, 5.68]. :type new_values: list :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - self.logger.info("Performing a prediction for " + self.model_type) + self.logger.info("Performing a prediction for %s", self.model_type) new_values = np.array([new_values]) return self.model.predict(new_values) From 43b492763702cb64a81e77129e0fc34746ac3870 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:24:21 +0100 Subject: [PATCH 077/111] Add csv-prediction --- src/emhass/command_line.py | 1 - src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 9d25d518..35d8b10c 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -16,7 +16,6 @@ from distutils.util import strtobool - from emhass.retrieve_hass import RetrieveHass from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py new file mode 100644 index 00000000..a1c5576b --- /dev/null +++ b/src/emhass/csv_predictor.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +import copy +import pathlib +import time +from typing import Optional +# from typing import Optional, Tuple +import pandas as pd +import numpy as np + +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import ElasticNet +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsRegressor +# from sklearn.metrics import r2_score + +# from skforecast.ForecasterAutoreg import ForecasterAutoreg +# from skforecast.model_selection import bayesian_search_forecaster +# from skforecast.model_selection import backtesting_forecaster + +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) + +class CsvPredictor: + r""" + A forecaster class using machine learning models. + + This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + + It exposes one main method: + + - `predict`: to obtain a forecast from a pre-trained model. + + """ + + def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + logger: logging.Logger) -> None: + r"""Define constructor for the forecast class. + + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str + :param var_model: The name of the sensor to retrieve data from Home Assistant. \ + Example: `sensor.power_load_no_var_loads`. + :type var_model: str + :param sklearn_model: The `scikit-learn` model that will be used. For now only \ + this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. + :type sklearn_model: str + :param num_lags: The number of auto-regression lags to consider. A good starting point \ + is to fix this as one day. For example if your time step is 30 minutes, then fix this \ + to 48, if the time step is 1 hour the fix this to 24 and so on. + :type num_lags: int + :param root: The parent folder of the path where the config.yaml file is located + :type root: str + :param logger: The passed logger object + :type logger: logging.Logger + """ + self.data = data + self.model_type = model_type + self.csv_file = csv_file + self.independent_variables = independent_variables + self.dependent_variable = dependent_variable + self.sklearn_model = sklearn_model + self.new_values = new_values + self.root = root + self.logger = logger + self.is_tuned = False + + + def load_data(self): + filename_path = pathlib.Path(self.root) / self.csv_file + if filename_path.is_file(): + with open(filename_path, 'rb') as inp: + data = pd.read_csv(filename_path) + else: + self.logger.error("The cvs file was not found.") + return + + required_columns = self.independent_variables + + if not set(required_columns).issubset(data.columns): + raise ValueError( + f"CSV file should contain the following columns: {', '.join(required_columns)}" + ) + return data + + def prepare_data(self, data): + X = data[self.independent_variables].values + y = data[self.dependent_variable].values + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + return X_train, y_train + + + def predict(self, perform_backtest: Optional[bool] = False + ) -> pd.Series: + r"""The fit method to train the ML model. + + :param split_date_delta: The delta from now to `split_date_delta` that will be used \ + as the test period to evaluate the model, defaults to '48h' + :type split_date_delta: Optional[str], optional + :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ + the performance of the model on the complete train set, defaults to False + :type perform_backtest: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest + :rtype: Tuple[pd.DataFrame, pd.DataFrame] + """ + self.logger.info("Performing a forecast model fit for "+self.model_type) + # Preparing the data: adding exogenous features + data = self.load_data() + X, y = self.prepare_data(data) + + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) + + return prediction + + + + \ No newline at end of file From 173f05e5f01c99abacbe9248b79c5b94279adc86 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:45:02 +0100 Subject: [PATCH 078/111] Use gridsearchcv and split up fit and predict --- src/emhass/csv_predictor.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index a1c5576b..4e4ca37e 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import copy +from datetime import datetime import logging import copy import pathlib @@ -9,6 +11,7 @@ # from typing import Optional, Tuple import pandas as pd import numpy as np +from sklearn.metrics import classification_report, r2_score from sklearn.linear_model import LinearRegression from sklearn.linear_model import ElasticNet @@ -64,11 +67,16 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable - self.sklearn_model = sklearn_model - self.new_values = new_values - self.root = root + self.timestamp = timestamp + self.model_type = model_type self.logger = logger self.is_tuned = False + self.data.sort_index(inplace=True) + self.data = self.data[~self.data.index.duplicated(keep='first')] + + @staticmethod + def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + """Add date features from the input DataFrame timestamp def load_data(self): From 19da6f8aede912179696fc4ba38c1becd2e397fc Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 15:48:12 +0100 Subject: [PATCH 079/111] gitignore fun --- .vscode/launch.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 10313c97..b953c7d3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,7 +2,7 @@ "configurations": [ { "name": "Python: Current File", - "type": "debugpy", + "type": "python", "request": "launch", "program": "${file}", "console": "integratedTerminal", @@ -10,10 +10,11 @@ }, { "name": "EMHASS run", - "type": "debugpy", + "type": "python", "request": "launch", - "module": "emhass.web_server", + "program": "web_server.py", "console": "integratedTerminal", + "cwd": "${workspaceFolder}/src/emhass/", "purpose":["debug-in-terminal"], "justMyCode": true, "env": { @@ -21,15 +22,15 @@ "OPTIONS_PATH": "/workspaces/emhass/options.json", "SECRETS_PATH": "/workspaces/emhass/secrets_emhass.yaml", "DATA_PATH": "/workspaces/emhass/data/", - "LOGGING_LEVEL": "DEBUG" } }, { "name": "EMHASS run ADDON", - "type": "debugpy", + "type": "python", "request": "launch", - "module": "emhass.web_server", + "program": "web_server.py", "console": "integratedTerminal", + "cwd": "${workspaceFolder}/src/emhass/", "args": ["--addon", "true", "--no_response", "true"], "purpose":["debug-in-terminal"], "justMyCode": true, @@ -44,7 +45,6 @@ "LAT": "45.83", //optional change "LON": "6.86", //optional change "ALT": "4807.8", //optional change - "LOGGING_LEVEL": "DEBUG" //optional change }, } From 1f531ec4e690e4942158d1a0c8ed6042520c69df Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 11:39:39 +0100 Subject: [PATCH 080/111] python -> debugpy --- .vscode/launch.json | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index b953c7d3..ec6c6987 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,7 +2,7 @@ "configurations": [ { "name": "Python: Current File", - "type": "python", + "type": "debugpy", "request": "launch", "program": "${file}", "console": "integratedTerminal", @@ -10,12 +10,14 @@ }, { "name": "EMHASS run", - "type": "python", + "type": "debugpy", "request": "launch", "program": "web_server.py", "console": "integratedTerminal", "cwd": "${workspaceFolder}/src/emhass/", - "purpose":["debug-in-terminal"], + "purpose": [ + "debug-in-terminal" + ], "justMyCode": true, "env": { "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml", @@ -26,13 +28,20 @@ }, { "name": "EMHASS run ADDON", - "type": "python", + "type": "debugpy", "request": "launch", "program": "web_server.py", "console": "integratedTerminal", "cwd": "${workspaceFolder}/src/emhass/", - "args": ["--addon", "true", "--no_response", "true"], - "purpose":["debug-in-terminal"], + "args": [ + "--addon", + "true", + "--no_response", + "true" + ], + "purpose": [ + "debug-in-terminal" + ], "justMyCode": true, "env": { "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml", @@ -46,7 +55,6 @@ "LON": "6.86", //optional change "ALT": "4807.8", //optional change }, - - } + } ] } \ No newline at end of file From b4984d01b96d8b1593b28185f85c4d9e834dbfc4 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 12:38:07 +0100 Subject: [PATCH 081/111] launch.json --- .vscode/launch.json | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index ec6c6987..f0ceae3a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,9 +12,8 @@ "name": "EMHASS run", "type": "debugpy", "request": "launch", - "program": "web_server.py", + "module": "emhass.web_server", "console": "integratedTerminal", - "cwd": "${workspaceFolder}/src/emhass/", "purpose": [ "debug-in-terminal" ], @@ -30,9 +29,8 @@ "name": "EMHASS run ADDON", "type": "debugpy", "request": "launch", - "program": "web_server.py", + "module": "emhass.web_server", "console": "integratedTerminal", - "cwd": "${workspaceFolder}/src/emhass/", "args": [ "--addon", "true", From c1344b18e02681131174312d268d921b2f216a57 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 12:40:45 +0100 Subject: [PATCH 082/111] delete csv-predictor --- src/emhass/csv_predictor.py | 147 ------------------------------------ 1 file changed, 147 deletions(-) delete mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py deleted file mode 100644 index 4e4ca37e..00000000 --- a/src/emhass/csv_predictor.py +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import copy -from datetime import datetime -import logging -import copy -import pathlib -import time -from typing import Optional -# from typing import Optional, Tuple -import pandas as pd -import numpy as np -from sklearn.metrics import classification_report, r2_score - -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import ElasticNet -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsRegressor -# from sklearn.metrics import r2_score - -# from skforecast.ForecasterAutoreg import ForecasterAutoreg -# from skforecast.model_selection import bayesian_search_forecaster -# from skforecast.model_selection import backtesting_forecaster - -import warnings -warnings.filterwarnings("ignore", category=DeprecationWarning) - -class CsvPredictor: - r""" - A forecaster class using machine learning models. - - This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. - - It exposes one main method: - - - `predict`: to obtain a forecast from a pre-trained model. - - """ - - def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - logger: logging.Logger) -> None: - r"""Define constructor for the forecast class. - - :param data: The data that will be used for train/test - :type data: pd.DataFrame - :param model_type: A unique name defining this model and useful to identify \ - for what it will be used for. - :type model_type: str - :param var_model: The name of the sensor to retrieve data from Home Assistant. \ - Example: `sensor.power_load_no_var_loads`. - :type var_model: str - :param sklearn_model: The `scikit-learn` model that will be used. For now only \ - this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. - :type sklearn_model: str - :param num_lags: The number of auto-regression lags to consider. A good starting point \ - is to fix this as one day. For example if your time step is 30 minutes, then fix this \ - to 48, if the time step is 1 hour the fix this to 24 and so on. - :type num_lags: int - :param root: The parent folder of the path where the config.yaml file is located - :type root: str - :param logger: The passed logger object - :type logger: logging.Logger - """ - self.data = data - self.model_type = model_type - self.csv_file = csv_file - self.independent_variables = independent_variables - self.dependent_variable = dependent_variable - self.timestamp = timestamp - self.model_type = model_type - self.logger = logger - self.is_tuned = False - self.data.sort_index(inplace=True) - self.data = self.data[~self.data.index.duplicated(keep='first')] - - @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: - """Add date features from the input DataFrame timestamp - - - def load_data(self): - filename_path = pathlib.Path(self.root) / self.csv_file - if filename_path.is_file(): - with open(filename_path, 'rb') as inp: - data = pd.read_csv(filename_path) - else: - self.logger.error("The cvs file was not found.") - return - - required_columns = self.independent_variables - - if not set(required_columns).issubset(data.columns): - raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}" - ) - return data - - def prepare_data(self, data): - X = data[self.independent_variables].values - y = data[self.dependent_variable].values - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - - return X_train, y_train - - - def predict(self, perform_backtest: Optional[bool] = False - ) -> pd.Series: - r"""The fit method to train the ML model. - - :param split_date_delta: The delta from now to `split_date_delta` that will be used \ - as the test period to evaluate the model, defaults to '48h' - :type split_date_delta: Optional[str], optional - :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ - the performance of the model on the complete train set, defaults to False - :type perform_backtest: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest - :rtype: Tuple[pd.DataFrame, pd.DataFrame] - """ - self.logger.info("Performing a forecast model fit for "+self.model_type) - # Preparing the data: adding exogenous features - data = self.load_data() - X, y = self.prepare_data(data) - - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Training a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - - return prediction - - - - \ No newline at end of file From 723ed3f7f10c2dcbc43d1f771c44bec7ed9d87ff Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 18 Mar 2024 09:33:20 +0100 Subject: [PATCH 083/111] remove KNeighborsRegressor --- src/emhass/machine_learning_regressor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index 95f624b3..732b4266 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -95,7 +95,7 @@ def __init__( # noqa: PLR0913 :type model_type: str :param regression_model: The model that will be used. For now only \ this options are possible: `LinearRegression`, `RidgeRegression`, \ - `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \ + `LassoRegression`, `RandomForestRegression`, \ `GradientBoostingRegression` and `AdaBoostRegression`. :type regression_model: str :param features: A list of features. \ From 93380f3e79fd5ea9bd6127de286b518730ca3b98 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Wed, 17 Apr 2024 14:07:21 +0200 Subject: [PATCH 084/111] add tests --- src/emhass/command_line.py | 100 +++++++---- tests/test_command_line_utils.py | 215 ++++++++++++++++++++--- tests/test_machine_learning_regressor.py | 113 ++++++++++++ 3 files changed, 376 insertions(+), 52 deletions(-) create mode 100644 tests/test_machine_learning_regressor.py diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 35d8b10c..06280ff4 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -200,9 +200,9 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, return False df_input_data = rh.df_final.copy() - elif set_type == "regressor-model-fit": + elif set_type == "regressor-model-fit" or set_type == "regressor-model-predict": - df_input_data_dayahead = None + df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None params = json.loads(params) days_list = None @@ -210,7 +210,13 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, features = params["passed_data"]["features"] target = params["passed_data"]["target"] timestamp = params["passed_data"]["timestamp"] - filename_path = pathlib.Path(base_path) / csv_file + if get_data_from_file: + base_path = base_path + "/data" + filename_path = pathlib.Path(base_path) / csv_file + + else: + filename_path = pathlib.Path(base_path) / csv_file + if filename_path.is_file(): df_input_data = pd.read_csv(filename_path, parse_dates=True) @@ -226,13 +232,8 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, if not set(required_columns).issubset(df_input_data.columns): logger.error("The cvs file does not contain the required columns.") raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}" + f"CSV file should contain the following columns: {', '.join(required_columns)}", ) - elif set_type == "regressor-model-predict": - df_input_data, df_input_data_dayahead = None, None - P_PV_forecast, P_load_forecast = None, None - days_list = None - params = json.loads(params) elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None @@ -240,7 +241,7 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, days_list = None else: logger.error( - "The passed action argument and hence the set_type parameter for setup is not valid" + "The passed action argument and hence the set_type parameter for setup is not valid", ) df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None @@ -493,7 +494,7 @@ def forecast_model_predict( mlf = pickle.load(inp) else: logger.error( - "The ML forecaster file was not found, please run a model fit method before this predict method" + "The ML forecaster file was not found, please run a model fit method before this predict method", ) return # Make predictions @@ -580,7 +581,7 @@ def forecast_model_tune( mlf = pickle.load(inp) else: logger.error( - "The ML forecaster file was not found, please run a model fit method before this tune method" + "The ML forecaster file was not found, please run a model fit method before this tune method", ) return None, None # Tune the model @@ -595,7 +596,9 @@ def forecast_model_tune( def regressor_model_fit( - input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False + input_data_dict: dict, + logger: logging.Logger, + debug: Optional[bool] = False, ) -> None: """Perform a forecast model fit from training data retrieved from Home Assistant. @@ -614,9 +617,16 @@ def regressor_model_fit( timestamp = input_data_dict["params"]["passed_data"]["timestamp"] date_features = input_data_dict["params"]["passed_data"]["date_features"] root = input_data_dict["root"] + # The MLRegressor object mlr = MLRegressor( - data, model_type, regression_model, features, target, timestamp, logger + data, + model_type, + regression_model, + features, + target, + timestamp, + logger, ) # Fit the ML model mlr.fit(date_features=date_features) @@ -625,10 +635,14 @@ def regressor_model_fit( filename = model_type + "_mlr.pkl" with open(pathlib.Path(root) / filename, "wb") as outp: pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL) + return mlr def regressor_model_predict( - input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False + input_data_dict: dict, + logger: logging.Logger, + debug: Optional[bool] = False, + mlr: Optional[MLRegressor] = None, ) -> None: """Perform a prediction from csv file. @@ -649,7 +663,7 @@ def regressor_model_predict( mlr = pickle.load(inp) else: logger.error( - "The ML forecaster file was not found, please run a model fit method before this predict method" + "The ML forecaster file was not found, please run a model fit method before this predict method", ) return new_values = input_data_dict["params"]["passed_data"]["new_values"] @@ -667,14 +681,16 @@ def regressor_model_predict( ] # Publish prediction idx = 0 - input_data_dict["rh"].post_data( - prediction, - idx, - mlr_predict_entity_id, - mlr_predict_unit_of_measurement, - mlr_predict_friendly_name, - type_var="mlregressor", - ) + if not debug: + input_data_dict["rh"].post_data( + prediction, + idx, + mlr_predict_entity_id, + mlr_predict_unit_of_measurement, + mlr_predict_friendly_name, + type_var="mlregressor", + ) + return prediction def publish_data( @@ -763,7 +779,7 @@ def publish_data( if "P_deferrable{}".format(k) not in opt_res_latest.columns: logger.error( "P_deferrable{}".format(k) - + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution." + + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.", ) else: input_data_dict["rh"].post_data( @@ -780,7 +796,7 @@ def publish_data( if input_data_dict["opt"].optim_conf["set_use_battery"]: if "P_batt" not in opt_res_latest.columns: logger.error( - "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution." + "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.", ) else: custom_batt_forecast_id = params["passed_data"]["custom_batt_forecast_id"] @@ -836,7 +852,7 @@ def publish_data( if "optim_status" not in opt_res_latest: opt_res_latest["optim_status"] = "Optimal" logger.warning( - "no optim_status in opt_res_latest, run an optimization task first" + "no optim_status in opt_res_latest, run an optimization task first", ) input_data_dict["rh"].post_data( opt_res_latest["optim_status"], @@ -958,12 +974,14 @@ def main(): # Additionnal argument try: parser.add_argument( - "--version", action="version", version="%(prog)s " + version("emhass") + "--version", + action="version", + version="%(prog)s " + version("emhass"), ) args = parser.parse_args() except Exception: logger.info( - "Version not found for emhass package. Or importlib exited with PackageNotFoundError." + "Version not found for emhass package. Or importlib exited with PackageNotFoundError.", ) # Setup parameters input_data_dict = set_input_data_dict(emhass_conf, @@ -996,7 +1014,25 @@ def main(): else: mlf = None df_pred_optim, mlf = forecast_model_tune( - input_data_dict, logger, debug=args.debug, mlf=mlf + input_data_dict, + logger, + debug=args.debug, + mlf=mlf, + ) + opt_res = None + elif args.action == "regressor-model-fit": + mlr = regressor_model_fit(input_data_dict, logger, debug=args.debug) + opt_res = None + elif args.action == "regressor-model-predict": + if args.debug: + mlr = regressor_model_fit(input_data_dict, logger, debug=args.debug) + else: + mlr = None + prediction = regressor_model_predict( + input_data_dict, + logger, + debug=args.debug, + mlr=mlr, ) opt_res = None elif args.action == "publish-data": @@ -1020,6 +1056,10 @@ def main(): return df_fit_pred, df_fit_pred_backtest, mlf elif args.action == "forecast-model-predict": return df_pred + elif args.action == "regressor-model-fit": + return mlr + elif args.action == "regressor-model-predict": + return prediction elif args.action == "forecast-model-tune": return df_pred_optim, mlf else: diff --git a/tests/test_command_line_utils.py b/tests/test_command_line_utils.py index 4605db81..293733f4 100644 --- a/tests/test_command_line_utils.py +++ b/tests/test_command_line_utils.py @@ -5,10 +5,21 @@ from unittest.mock import patch import pandas as pd import pathlib, json, yaml, copy +import numpy as np from emhass.command_line import set_input_data_dict -from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim -from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune +from emhass.command_line import ( + perfect_forecast_optim, + dayahead_forecast_optim, + naive_mpc_optim, +) +from emhass.command_line import ( + forecast_model_fit, + forecast_model_predict, + forecast_model_tune, + regressor_model_fit, + regressor_model_predict, +) from emhass.command_line import publish_data from emhass.command_line import main from emhass import utils @@ -307,13 +318,102 @@ def test_forecast_model_fit_predict_tune(self): self.assertIsInstance(df_pred, pd.Series) self.assertTrue(df_pred.isnull().sum().sum() == 0) # Test the tune method - df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=True, mlf=mlf) + df_pred_optim, mlf = forecast_model_tune( + input_data_dict, logger, debug=True, mlf=mlf + ) self.assertIsInstance(df_pred_optim, pd.DataFrame) self.assertTrue(mlf.is_tuned == True) - # Test ijection_dict for tune method on webui + # Test injection_dict for tune method on webui injection_dict = utils.get_injection_dict_forecast_model_tune(df_fit_pred, mlf) self.assertIsInstance(injection_dict, dict) - self.assertIsInstance(injection_dict['figure_0'], str) + self.assertIsInstance(injection_dict["figure_0"], str) + + def test_regressor_model_fit_predict(self): + config_path = pathlib.Path(root + "/config_emhass.yaml") + base_path = str(config_path.parent) + costfun = "profit" + action = "regressor-model-fit" # fit and predict methods + params = TestCommandLineUtils.get_test_params() + runtimeparams = { + "csv_file": "prediction.csv", + "features": ["dd", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_dd", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "mlr_predict_entity_id": "sensor.predicted_hours_test", + "mlr_predict_unit_of_measurement": "h", + "mlr_predict_friendly_name": "Predicted hours", + "new_values": [12.79, 4.766, 1, 2], + } + runtimeparams_json = json.dumps(runtimeparams) + params_json = json.dumps(params) + input_data_dict = set_input_data_dict( + config_path, + base_path, + costfun, + params_json, + runtimeparams_json, + action, + logger, + get_data_from_file=True, + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd", + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["regression_model"] + == "AdaBoostRegression", + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["csv_file"] == "prediction.csv", + ) + mlr = regressor_model_fit(input_data_dict, logger, debug=True) + + # def test_regressor_model_predict(self): + config_path = pathlib.Path(root + "/config_emhass.yaml") + base_path = str(config_path.parent) # + "/data" + costfun = "profit" + action = "regressor-model-predict" # predict methods + params = TestCommandLineUtils.get_test_params() + runtimeparams = { + "csv_file": "prediction.csv", + "features": ["dd", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_dd", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "mlr_predict_entity_id": "sensor.predicted_hours_test", + "mlr_predict_unit_of_measurement": "h", + "mlr_predict_friendly_name": "Predicted hours", + "new_values": [12.79, 4.766, 1, 2], + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params_json = json.dumps(params) + + input_data_dict = set_input_data_dict( + config_path, + base_path, + costfun, + params_json, + runtimeparams_json, + action, + logger, + get_data_from_file=True, + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd", + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["mlr_predict_friendly_name"] + == "Predicted hours", + ) + + regressor_model_predict(input_data_dict, logger, debug=True, mlr=mlr) + @patch('sys.argv', ['main', '--action', 'test', '--config', str(emhass_conf['config_path']), '--debug', 'True']) @@ -326,27 +426,30 @@ def test_main_wrong_action(self): def test_main_perfect_forecast_optim(self): opt_res = main() self.assertIsInstance(opt_res, pd.DataFrame) - self.assertTrue(opt_res.isnull().sum().sum()==0) + self.assertTrue(opt_res.isnull().sum().sum() == 0) self.assertIsInstance(opt_res.index, pd.core.indexes.datetimes.DatetimeIndex) - self.assertIsInstance(opt_res.index.dtype, pd.core.dtypes.dtypes.DatetimeTZDtype) - + self.assertIsInstance( + opt_res.index.dtype, + pd.core.dtypes.dtypes.DatetimeTZDtype, + ) + def test_main_dayahead_forecast_optim(self): with patch('sys.argv', ['main', '--action', 'dayahead-optim', '--config', str(emhass_conf['config_path']), '--params', self.params_json, '--runtimeparams', self.runtimeparams_json, '--debug', 'True']): opt_res = main() self.assertIsInstance(opt_res, pd.DataFrame) - self.assertTrue(opt_res.isnull().sum().sum()==0) - + self.assertTrue(opt_res.isnull().sum().sum() == 0) + def test_main_naive_mpc_optim(self): with patch('sys.argv', ['main', '--action', 'naive-mpc-optim', '--config', str(emhass_conf['config_path']), '--params', self.params_json, '--runtimeparams', self.runtimeparams_json, '--debug', 'True']): opt_res = main() self.assertIsInstance(opt_res, pd.DataFrame) - self.assertTrue(opt_res.isnull().sum().sum()==0) - self.assertTrue(len(opt_res)==10) - + self.assertTrue(opt_res.isnull().sum().sum() == 0) + self.assertTrue(len(opt_res) == 10) + def test_main_forecast_model_fit(self): params = copy.deepcopy(json.loads(self.params_json)) runtimeparams = { @@ -377,12 +480,12 @@ def test_main_forecast_model_predict(self): "var_model": "sensor.power_load_no_var_loads", "sklearn_model": "KNeighborsRegressor", "num_lags": 48, - "split_date_delta": '48h', - "perform_backtest": False + "split_date_delta": "48h", + "perform_backtest": False, } runtimeparams_json = json.dumps(runtimeparams) - params['passed_data'] = runtimeparams - params['optim_conf']['load_forecast_method'] = 'skforecast' + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" params_json = json.dumps(params) with patch('sys.argv', ['main', '--action', 'forecast-model-predict', '--config', str(emhass_conf['config_path']), '--params', params_json, '--runtimeparams', runtimeparams_json, @@ -390,7 +493,7 @@ def test_main_forecast_model_predict(self): df_pred = main() self.assertIsInstance(df_pred, pd.Series) self.assertTrue(df_pred.isnull().sum().sum() == 0) - + def test_main_forecast_model_tune(self): params = copy.deepcopy(json.loads(self.params_json)) runtimeparams = { @@ -399,12 +502,12 @@ def test_main_forecast_model_tune(self): "var_model": "sensor.power_load_no_var_loads", "sklearn_model": "KNeighborsRegressor", "num_lags": 48, - "split_date_delta": '48h', - "perform_backtest": False + "split_date_delta": "48h", + "perform_backtest": False, } runtimeparams_json = json.dumps(runtimeparams) - params['passed_data'] = runtimeparams - params['optim_conf']['load_forecast_method'] = 'skforecast' + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" params_json = json.dumps(params) with patch('sys.argv', ['main', '--action', 'forecast-model-tune', '--config', str(emhass_conf['config_path']), '--params', params_json, '--runtimeparams', runtimeparams_json, @@ -412,6 +515,74 @@ def test_main_forecast_model_tune(self): df_pred_optim, mlf = main() self.assertIsInstance(df_pred_optim, pd.DataFrame) self.assertTrue(mlf.is_tuned == True) + + def test_main_regressor_model_fit(self): + params = copy.deepcopy(json.loads(self.params_json)) + runtimeparams = { + "csv_file": "prediction.csv", + "features": ["dd", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_dd", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params_json = json.dumps(params) + with patch( + "sys.argv", + [ + "main", + "--action", + "regressor-model-fit", + "--config", + str(pathlib.Path(root + "/config_emhass.yaml")), + "--params", + params_json, + "--runtimeparams", + runtimeparams_json, + "--debug", + "True", + ], + ): + mlr = main() + + def test_main_regressor_model_predict(self): + params = copy.deepcopy(json.loads(self.params_json)) + runtimeparams = { + "csv_file": "prediction.csv", + "features": ["dd", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_dd", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "new_values": [12.79, 4.766, 1, 2], + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" + params_json = json.dumps(params) + with patch( + "sys.argv", + [ + "main", + "--action", + "regressor-model-predict", + "--config", + str(pathlib.Path(root + "/config_emhass.yaml")), + "--params", + params_json, + "--runtimeparams", + runtimeparams_json, + "--debug", + "True", + ], + ): + prediction = main() + self.assertIsInstance(prediction, np.ndarray) + @patch('sys.argv', ['main', '--action', 'publish-data', '--config', str(emhass_conf['config_path']), '--debug', 'True']) diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py new file mode 100644 index 00000000..88137b0d --- /dev/null +++ b/tests/test_machine_learning_regressor.py @@ -0,0 +1,113 @@ +"""Machine learning regressor test module.""" + +import copy +import json +import pathlib +import unittest + +import numpy as np +import pandas as pd +from sklearn.pipeline import Pipeline +import yaml +from emhass import utils +from emhass.command_line import set_input_data_dict +from emhass.machine_learning_regressor import MLRegressor +from sklearn.ensemble import ( + AdaBoostRegressor, +) + +# the root folder +root = str(utils.get_root(__file__, num_parent=2)) +# create logger +logger, ch = utils.get_logger(__name__, root, save_to_file=False) + + +class TestMLRegressor(unittest.TestCase): + @staticmethod + def get_test_params(): + with open(root + "/config_emhass.yaml", "r") as file: + params = yaml.load(file, Loader=yaml.FullLoader) + params.update( + { + "params_secrets": { + "hass_url": "http://supervisor/core/api", + "long_lived_token": "${SUPERVISOR_TOKEN}", + "time_zone": "Europe/Paris", + "lat": 45.83, + "lon": 6.86, + "alt": 8000.0, + }, + }, + ) + return params + + def setUp(self): + params = TestMLRegressor.get_test_params() + params_json = json.dumps(params) + config_path = pathlib.Path(root + "/config_emhass.yaml") + base_path = str(config_path.parent) # + "/data" + costfun = "profit" + action = "regressor-model-fit" # fit and predict methods + params = copy.deepcopy(json.loads(params_json)) + runtimeparams = { + "csv_file": "prediction.csv", + "features": ["dd", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_dd", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "new_values": [12.79, 4.766, 1, 2], + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" + params_json = json.dumps(params) + self.input_data_dict = set_input_data_dict( + config_path, + base_path, + costfun, + params_json, + runtimeparams_json, + action, + logger, + get_data_from_file=True, + ) + data = copy.deepcopy(self.input_data_dict["df_input_data"]) + self.assertIsInstance(data, pd.DataFrame) + self.csv_file = self.input_data_dict["params"]["passed_data"]["csv_file"] + features = self.input_data_dict["params"]["passed_data"]["features"] + target = self.input_data_dict["params"]["passed_data"]["target"] + regression_model = self.input_data_dict["params"]["passed_data"][ + "regression_model" + ] + model_type = self.input_data_dict["params"]["passed_data"]["model_type"] + timestamp = self.input_data_dict["params"]["passed_data"]["timestamp"] + self.date_features = self.input_data_dict["params"]["passed_data"][ + "date_features" + ] + self.new_values = self.input_data_dict["params"]["passed_data"]["new_values"] + self.mlr = MLRegressor( + data, + model_type, + regression_model, + features, + target, + timestamp, + logger, + ) + + def test_fit(self): + self.mlr.fit(self.date_features) + self.assertIsInstance(self.mlr.model, Pipeline) + + def test_predict(self): + self.mlr.fit(self.date_features) + predictions = self.mlr.predict(self.new_values) + self.assertIsInstance(predictions, np.ndarray) + + +if __name__ == "__main__": + unittest.main() + ch.close() + logger.removeHandler(ch) From b614f81fd3a54787f251939a02f5e6611535c1e9 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:13:47 +0100 Subject: [PATCH 085/111] add /app to gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 581080c8..fa1f0d74 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,6 @@ secrets_emhass.yaml .vscode/launch.json .vscode/settings.json .vscode/tasks.json -*.csv *.html *.pkl data/actionLogs.txt From 86b160461964c3b5ae5e50934681d9b099771058 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:24:21 +0100 Subject: [PATCH 086/111] Add csv-prediction --- src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py new file mode 100644 index 00000000..a1c5576b --- /dev/null +++ b/src/emhass/csv_predictor.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +import copy +import pathlib +import time +from typing import Optional +# from typing import Optional, Tuple +import pandas as pd +import numpy as np + +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import ElasticNet +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsRegressor +# from sklearn.metrics import r2_score + +# from skforecast.ForecasterAutoreg import ForecasterAutoreg +# from skforecast.model_selection import bayesian_search_forecaster +# from skforecast.model_selection import backtesting_forecaster + +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) + +class CsvPredictor: + r""" + A forecaster class using machine learning models. + + This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + + It exposes one main method: + + - `predict`: to obtain a forecast from a pre-trained model. + + """ + + def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + logger: logging.Logger) -> None: + r"""Define constructor for the forecast class. + + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str + :param var_model: The name of the sensor to retrieve data from Home Assistant. \ + Example: `sensor.power_load_no_var_loads`. + :type var_model: str + :param sklearn_model: The `scikit-learn` model that will be used. For now only \ + this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. + :type sklearn_model: str + :param num_lags: The number of auto-regression lags to consider. A good starting point \ + is to fix this as one day. For example if your time step is 30 minutes, then fix this \ + to 48, if the time step is 1 hour the fix this to 24 and so on. + :type num_lags: int + :param root: The parent folder of the path where the config.yaml file is located + :type root: str + :param logger: The passed logger object + :type logger: logging.Logger + """ + self.data = data + self.model_type = model_type + self.csv_file = csv_file + self.independent_variables = independent_variables + self.dependent_variable = dependent_variable + self.sklearn_model = sklearn_model + self.new_values = new_values + self.root = root + self.logger = logger + self.is_tuned = False + + + def load_data(self): + filename_path = pathlib.Path(self.root) / self.csv_file + if filename_path.is_file(): + with open(filename_path, 'rb') as inp: + data = pd.read_csv(filename_path) + else: + self.logger.error("The cvs file was not found.") + return + + required_columns = self.independent_variables + + if not set(required_columns).issubset(data.columns): + raise ValueError( + f"CSV file should contain the following columns: {', '.join(required_columns)}" + ) + return data + + def prepare_data(self, data): + X = data[self.independent_variables].values + y = data[self.dependent_variable].values + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + return X_train, y_train + + + def predict(self, perform_backtest: Optional[bool] = False + ) -> pd.Series: + r"""The fit method to train the ML model. + + :param split_date_delta: The delta from now to `split_date_delta` that will be used \ + as the test period to evaluate the model, defaults to '48h' + :type split_date_delta: Optional[str], optional + :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ + the performance of the model on the complete train set, defaults to False + :type perform_backtest: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest + :rtype: Tuple[pd.DataFrame, pd.DataFrame] + """ + self.logger.info("Performing a forecast model fit for "+self.model_type) + # Preparing the data: adding exogenous features + data = self.load_data() + X, y = self.prepare_data(data) + + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) + + return prediction + + + + \ No newline at end of file From cd59928ed973afa198b4aebdf92f5357a493f712 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 9 Jan 2024 21:11:13 +0100 Subject: [PATCH 087/111] cleanup --- src/emhass/csv_predictor.py | 48 ++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index a1c5576b..9f012f8d 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -31,11 +31,13 @@ class CsvPredictor: It exposes one main method: - - `predict`: to obtain a forecast from a pre-trained model. + - `predict`: to obtain a forecast from a csv file. """ - def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + # logger: logging.Logger) -> None: + def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -44,23 +46,28 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe :param model_type: A unique name defining this model and useful to identify \ for what it will be used for. :type model_type: str - :param var_model: The name of the sensor to retrieve data from Home Assistant. \ - Example: `sensor.power_load_no_var_loads`. - :type var_model: str + :param csv_file: The name of the csv file to retrieve data from. \ + Example: `prediction.csv`. + :type csv_file: str + :param independent_variables: A list of independent variables. \ + Example: [`solar`, `degree_days`]. + :type independent_variables: list + :param dependent_variable: The dependent variable(to be predicted). \ + Example: `hours`. + :type dependent_variable: str :param sklearn_model: The `scikit-learn` model that will be used. For now only \ this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. :type sklearn_model: str - :param num_lags: The number of auto-regression lags to consider. A good starting point \ - is to fix this as one day. For example if your time step is 30 minutes, then fix this \ - to 48, if the time step is 1 hour the fix this to 24 and so on. - :type num_lags: int + :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ + Example: [2.24, 5.68]. + :type new_values: list :param root: The parent folder of the path where the config.yaml file is located :type root: str :param logger: The passed logger object :type logger: logging.Logger """ - self.data = data - self.model_type = model_type + # self.data = data + # self.model_type = model_type self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable @@ -86,18 +93,30 @@ def load_data(self): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) + print(type(data)) return data def prepare_data(self, data): + """ + Prepare the data. + + :param data: Input Data + :return: Input DataFrame with freq defined + :rtype: pd.DataFrame + + """ X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + print(type(X_train)) + print(type(y_train)) return X_train, y_train - def predict(self, perform_backtest: Optional[bool] = False - ) -> pd.Series: + # def predict(self, perform_backtest: Optional[bool] = False + # ) -> pd.Series: + def predict(self): r"""The fit method to train the ML model. :param split_date_delta: The delta from now to `split_date_delta` that will be used \ @@ -109,7 +128,7 @@ def predict(self, perform_backtest: Optional[bool] = False :return: The DataFrame containing the forecast data results without and with backtest :rtype: Tuple[pd.DataFrame, pd.DataFrame] """ - self.logger.info("Performing a forecast model fit for "+self.model_type) + self.logger.info("Performing a prediction for "+self.csv_file) # Preparing the data: adding exogenous features data = self.load_data() X, y = self.prepare_data(data) @@ -131,6 +150,7 @@ def predict(self, perform_backtest: Optional[bool] = False self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") new_values = np.array([self.new_values]) prediction = self.forecaster.predict(new_values) + print(type(prediction)) return prediction From d4a3c677b2ad53f6b0c9c2522bee593cfc67a30d Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Thu, 18 Jan 2024 10:46:38 +0100 Subject: [PATCH 088/111] more cleanup --- src/emhass/csv_predictor.py | 92 ++++++++++++++----------------------- 1 file changed, 34 insertions(+), 58 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 9f012f8d..9550c157 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -2,11 +2,9 @@ # -*- coding: utf-8 -*- import logging -import copy import pathlib import time -from typing import Optional -# from typing import Optional, Tuple +from typing import Tuple import pandas as pd import numpy as np @@ -14,11 +12,6 @@ from sklearn.linear_model import ElasticNet from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsRegressor -# from sklearn.metrics import r2_score - -# from skforecast.ForecasterAutoreg import ForecasterAutoreg -# from skforecast.model_selection import bayesian_search_forecaster -# from skforecast.model_selection import backtesting_forecaster import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) @@ -34,18 +27,10 @@ class CsvPredictor: - `predict`: to obtain a forecast from a csv file. """ - - # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - # logger: logging.Logger) -> None: def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. - :param data: The data that will be used for train/test - :type data: pd.DataFrame - :param model_type: A unique name defining this model and useful to identify \ - for what it will be used for. - :type model_type: str :param csv_file: The name of the csv file to retrieve data from. \ Example: `prediction.csv`. :type csv_file: str @@ -66,8 +51,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl :param logger: The passed logger object :type logger: logging.Logger """ - # self.data = data - # self.model_type = model_type self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable @@ -78,14 +61,17 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl self.is_tuned = False - def load_data(self): + def load_data(self) -> pd.DataFrame: + """Load the data.""" filename_path = pathlib.Path(self.root) / self.csv_file if filename_path.is_file(): with open(filename_path, 'rb') as inp: data = pd.read_csv(filename_path) else: self.logger.error("The cvs file was not found.") - return + raise ValueError( + f"The CSV file "+ self.csv_file +" was not found." + ) required_columns = self.independent_variables @@ -93,66 +79,56 @@ def load_data(self): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) - print(type(data)) return data - def prepare_data(self, data): + def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: """ Prepare the data. :param data: Input Data - :return: Input DataFrame with freq defined - :rtype: pd.DataFrame + :type data: pd.DataFrame + :return: A tuple containing the train data. + :rtype: Tuple[np.ndarray, np.ndarray] """ X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - print(type(X_train)) - print(type(y_train)) return X_train, y_train - # def predict(self, perform_backtest: Optional[bool] = False - # ) -> pd.Series: - def predict(self): - r"""The fit method to train the ML model. + def predict(self) -> np.ndarray: + r"""The predict method to generate a forecast from a csv file. - :param split_date_delta: The delta from now to `split_date_delta` that will be used \ - as the test period to evaluate the model, defaults to '48h' - :type split_date_delta: Optional[str], optional - :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ - the performance of the model on the complete train set, defaults to False - :type perform_backtest: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest - :rtype: Tuple[pd.DataFrame, pd.DataFrame] + :return: The np.ndarray containing the predicted value. + :rtype: np.ndarray """ self.logger.info("Performing a prediction for "+self.csv_file) # Preparing the data: adding exogenous features data = self.load_data() - X, y = self.prepare_data(data) + if data is not None: + X, y = self.prepare_data(data) - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Training a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - print(type(prediction)) + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Predict through a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) - return prediction + return prediction From 87c54d83bc5fe6810b770e299a6399c90f1a25c0 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 19 Jan 2024 11:34:33 +0100 Subject: [PATCH 089/111] filename_path -> inp --- src/emhass/csv_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 9550c157..499903d0 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -66,7 +66,7 @@ def load_data(self) -> pd.DataFrame: filename_path = pathlib.Path(self.root) / self.csv_file if filename_path.is_file(): with open(filename_path, 'rb') as inp: - data = pd.read_csv(filename_path) + data = pd.read_csv(inp) else: self.logger.error("The cvs file was not found.") raise ValueError( From a9cd098939cbc0a720ea5885873096bdb323829f Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 29 Jan 2024 11:24:45 +0100 Subject: [PATCH 090/111] resolve some comments --- src/emhass/csv_predictor.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 499903d0..1f478c01 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -5,6 +5,8 @@ import pathlib import time from typing import Tuple +import warnings + import pandas as pd import numpy as np @@ -13,14 +15,14 @@ from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsRegressor -import warnings -warnings.filterwarnings("ignore", category=DeprecationWarning) + +warnings.filterwarnings("ignore", category=DeprecationWarning) class CsvPredictor: r""" A forecaster class using machine learning models. - This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. It exposes one main method: @@ -28,11 +30,11 @@ class CsvPredictor: """ def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - logger: logging.Logger) -> None: + logger: logging.Logger) -> None: r"""Define constructor for the forecast class. :param csv_file: The name of the csv file to retrieve data from. \ - Example: `prediction.csv`. + Example: `input_train_data.csv`. :type csv_file: str :param independent_variables: A list of independent variables. \ Example: [`solar`, `degree_days`]. @@ -60,7 +62,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl self.logger = logger self.is_tuned = False - def load_data(self) -> pd.DataFrame: """Load the data.""" filename_path = pathlib.Path(self.root) / self.csv_file @@ -69,18 +70,16 @@ def load_data(self) -> pd.DataFrame: data = pd.read_csv(inp) else: self.logger.error("The cvs file was not found.") - raise ValueError( - f"The CSV file "+ self.csv_file +" was not found." - ) + raise ValueError("The CSV file " + self.csv_file + " was not found.") required_columns = self.independent_variables - + if not set(required_columns).issubset(data.columns): raise ValueError( f"CSV file should contain the following columns: {', '.join(required_columns)}" ) return data - + def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: """ Prepare the data. @@ -94,10 +93,10 @@ def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: X = data[self.independent_variables].values y = data[self.dependent_variable].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - + return X_train, y_train - - + + def predict(self) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. @@ -109,7 +108,7 @@ def predict(self) -> np.ndarray: data = self.load_data() if data is not None: X, y = self.prepare_data(data) - + if self.sklearn_model == 'LinearRegression': base_model = LinearRegression() elif self.sklearn_model == 'ElasticNet': @@ -127,9 +126,5 @@ def predict(self) -> np.ndarray: self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") new_values = np.array([self.new_values]) prediction = self.forecaster.predict(new_values) - + return prediction - - - - \ No newline at end of file From 9a86046333ffb7a42386fc8b49422a4a5aeff476 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:45:02 +0100 Subject: [PATCH 091/111] Use gridsearchcv and split up fit and predict --- src/emhass/csv_predictor.py | 173 +++++++++++++++++++++++------------- 1 file changed, 111 insertions(+), 62 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1f478c01..636d5835 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -1,19 +1,22 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import copy +from datetime import datetime import logging import pathlib import time -from typing import Tuple +from typing import Optional, Tuple import warnings import pandas as pd import numpy as np +from sklearn.metrics import classification_report, r2_score from sklearn.linear_model import LinearRegression -from sklearn.linear_model import ElasticNet -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsRegressor +from sklearn.model_selection import GridSearchCV, train_test_split +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler warnings.filterwarnings("ignore", category=DeprecationWarning) @@ -29,7 +32,7 @@ class CsvPredictor: - `predict`: to obtain a forecast from a csv file. """ - def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -53,78 +56,124 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl :param logger: The passed logger object :type logger: logging.Logger """ - self.csv_file = csv_file + self.data = data self.independent_variables = independent_variables self.dependent_variable = dependent_variable - self.sklearn_model = sklearn_model - self.new_values = new_values - self.root = root + self.timestamp = timestamp + self.model_type = model_type self.logger = logger self.is_tuned = False + self.data.sort_index(inplace=True) + self.data = self.data[~self.data.index.duplicated(keep='first')] + + @staticmethod + def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + """Add date features from the input DataFrame timestamp - def load_data(self) -> pd.DataFrame: - """Load the data.""" - filename_path = pathlib.Path(self.root) / self.csv_file - if filename_path.is_file(): - with open(filename_path, 'rb') as inp: - data = pd.read_csv(inp) - else: - self.logger.error("The cvs file was not found.") - raise ValueError("The CSV file " + self.csv_file + " was not found.") - - required_columns = self.independent_variables - - if not set(required_columns).issubset(data.columns): - raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}" - ) - return data - - def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]: + :param data: The input DataFrame + :type data: pd.DataFrame + :return: The DataFrame with the added features + :rtype: pd.DataFrame + """ + df = copy.deepcopy(data) + df['timestamp']= pd.to_datetime(df['timestamp']) + if 'year' in date_features: + df['year'] = [i.month for i in df['timestamp']] + if 'month' in date_features: + df['month'] = [i.month for i in df['timestamp']] + if 'day_of_week' in date_features: + df['day_of_week'] = [i.dayofweek for i in df['timestamp']] + if 'day_of_year' in date_features: + df['day_of_year'] = [i.dayofyear for i in df['timestamp']] + if 'day' in date_features: + df['day'] = [i.day for i in df['timestamp']] + if 'hour' in date_features: + df['hour'] = [i.day for i in df['timestamp']] + + return df + + def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]: """ - Prepare the data. + Fit the model using the provided data. :param data: Input Data :type data: pd.DataFrame - :return: A tuple containing the train data. - :rtype: Tuple[np.ndarray, np.ndarray] - """ - X = data[self.independent_variables].values - y = data[self.dependent_variable].values - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + self.logger.info("Performing a forecast model fit for "+self.model_type) + self.data_exo = pd.DataFrame(self.data) + self.data_exo[self.independent_variables] = self.data[self.independent_variables] + self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] + keep_columns = [] + keep_columns.extend(self.independent_variables) + if self.timestamp is not None: + keep_columns.append(self.timestamp) + keep_columns.append(self.dependent_variable) + self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] + self.data_exo.reset_index(drop=True, inplace=True) + # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp') + if len(date_features) > 0: + if self.timestamp is not None: + self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) + else: + self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") - return X_train, y_train + y = self.data_exo[self.dependent_variable] + self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1) + if self.timestamp is not None: + self.data_exo = self.data_exo.drop(self.timestamp,axis=1) + X = self.data_exo + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + self.steps = len(X_test) + + # Define the model + self.model = Pipeline([ + ('scaler', StandardScaler()), + ('regressor', LinearRegression()) + ]) + # Define the parameters to tune + param_grid = { + 'regressor__fit_intercept': [True, False], + 'regressor__positive': [True, False], + } + + # Create a grid search object + self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # Fit the grid search object to the data + self.logger.info("Fitting the model...") + start_time = time.time() + self.grid_search.fit(X_train.values, y_train.values) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + + self.model = self.grid_search.best_estimator_ + + + # Make predictions + predictions = self.model.predict(X_test.values) + predictions = pd.Series(predictions, index=X_test.index) + pred_metric = r2_score(y_test,predictions) + self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + + # Prepare forecast DataFrame + df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred']) + df_pred['train'] = y_train + df_pred['test'] = y_test + df_pred['pred'] = predictions + print(df_pred) + # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp') + + + + # return df_pred + - def predict(self) -> np.ndarray: + def predict(self, new_values:list) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ - self.logger.info("Performing a prediction for "+self.csv_file) - # Preparing the data: adding exogenous features - data = self.load_data() - if data is not None: - X, y = self.prepare_data(data) - - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Predict through a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - - return prediction + self.logger.info("Performing a prediction for "+self.model_type) + new_values = np.array([new_values]) + + return self.model.predict(new_values) From cfb248b13eeb2ed706d6eb61e74c1d1693636239 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:56:19 +0100 Subject: [PATCH 092/111] remove backtest --- src/emhass/csv_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 636d5835..1b2396b5 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -92,7 +92,7 @@ def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: return df - def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]: + def fit(self, date_features: Optional[list] = []) -> None: """ Fit the model using the provided data. From 4af34ad04486bda05ac5045233bf6b44389889db Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 12:11:41 +0100 Subject: [PATCH 093/111] cleanup --- src/emhass/csv_predictor.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1b2396b5..1e46927d 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -2,16 +2,14 @@ # -*- coding: utf-8 -*- import copy -from datetime import datetime import logging -import pathlib import time -from typing import Optional, Tuple +from typing import Optional import warnings import pandas as pd import numpy as np -from sklearn.metrics import classification_report, r2_score +from sklearn.metrics import r2_score from sklearn.linear_model import LinearRegression from sklearn.model_selection import GridSearchCV, train_test_split @@ -110,7 +108,6 @@ def fit(self, date_features: Optional[list] = []) -> None: keep_columns.append(self.dependent_variable) self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] self.data_exo.reset_index(drop=True, inplace=True) - # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp') if len(date_features) > 0: if self.timestamp is not None: self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) @@ -153,18 +150,6 @@ def fit(self, date_features: Optional[list] = []) -> None: predictions = pd.Series(predictions, index=X_test.index) pred_metric = r2_score(y_test,predictions) self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") - - # Prepare forecast DataFrame - df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred']) - df_pred['train'] = y_train - df_pred['test'] = y_test - df_pred['pred'] = predictions - print(df_pred) - # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp') - - - - # return df_pred def predict(self, new_values:list) -> np.ndarray: From c8e02a70a4c6728813d326d3486860ff3662edc0 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 13:41:06 +0100 Subject: [PATCH 094/111] cleanup + docstrings --- src/emhass/csv_predictor.py | 45 ++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 1e46927d..57d61791 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -25,32 +25,30 @@ class CsvPredictor: This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. - It exposes one main method: + It exposes two main methods: - - `predict`: to obtain a forecast from a csv file. + - `fit`: to train a model with the passed data. + + - `predict`: to obtain a forecast from a pre-trained model. """ def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. - :param csv_file: The name of the csv file to retrieve data from. \ - Example: `input_train_data.csv`. - :type csv_file: str + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str :param independent_variables: A list of independent variables. \ Example: [`solar`, `degree_days`]. :type independent_variables: list :param dependent_variable: The dependent variable(to be predicted). \ Example: `hours`. :type dependent_variable: str - :param sklearn_model: The `scikit-learn` model that will be used. For now only \ - this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. - :type sklearn_model: str - :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ - Example: [2.24, 5.68]. - :type new_values: list - :param root: The parent folder of the path where the config.yaml file is located - :type root: str + :param timestamp: If defined, the column key that has to be used of timestamp. + :type timestamp: str :param logger: The passed logger object :type logger: logging.Logger """ @@ -60,23 +58,24 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent self.timestamp = timestamp self.model_type = model_type self.logger = logger - self.is_tuned = False self.data.sort_index(inplace=True) self.data = self.data[~self.data.index.duplicated(keep='first')] @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: """Add date features from the input DataFrame timestamp :param data: The input DataFrame :type data: pd.DataFrame + :param timestamp: The column containing the timestamp + :type timestamp: str :return: The DataFrame with the added features :rtype: pd.DataFrame """ df = copy.deepcopy(data) - df['timestamp']= pd.to_datetime(df['timestamp']) + df[timestamp]= pd.to_datetime(df['timestamp']) if 'year' in date_features: - df['year'] = [i.month for i in df['timestamp']] + df['year'] = [i.year for i in df['timestamp']] if 'month' in date_features: df['month'] = [i.month for i in df['timestamp']] if 'day_of_week' in date_features: @@ -94,10 +93,10 @@ def fit(self, date_features: Optional[list] = []) -> None: """ Fit the model using the provided data. - :param data: Input Data - :type data: pd.DataFrame + :param date_features: A list of 'date_features' to take into account when fitting the model. + :type data: list """ - self.logger.info("Performing a forecast model fit for "+self.model_type) + self.logger.info("Performing a csv model fit for "+self.model_type) self.data_exo = pd.DataFrame(self.data) self.data_exo[self.independent_variables] = self.data[self.independent_variables] self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] @@ -110,7 +109,7 @@ def fit(self, date_features: Optional[list] = []) -> None: self.data_exo.reset_index(drop=True, inplace=True) if len(date_features) > 0: if self.timestamp is not None: - self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features) + self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp) else: self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") @@ -155,6 +154,10 @@ def fit(self, date_features: Optional[list] = []) -> None: def predict(self, new_values:list) -> np.ndarray: r"""The predict method to generate a forecast from a csv file. + + :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ + Example: [2.24, 5.68]. + :type new_values: list :return: The np.ndarray containing the predicted value. :rtype: np.ndarray """ From 775d61f57a6772d5e59b0511b67f2f9d4ed6496b Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 11 Mar 2024 09:59:27 +0100 Subject: [PATCH 095/111] add other regression methods --- src/emhass/csv_predictor.py | 87 +++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 28 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 57d61791..2b6fb86a 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -9,9 +9,10 @@ import pandas as pd import numpy as np +from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor from sklearn.metrics import r2_score -from sklearn.linear_model import LinearRegression +from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler @@ -122,33 +123,63 @@ def fit(self, date_features: Optional[list] = []) -> None: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) self.steps = len(X_test) - # Define the model - self.model = Pipeline([ - ('scaler', StandardScaler()), - ('regressor', LinearRegression()) - ]) - # Define the parameters to tune - param_grid = { - 'regressor__fit_intercept': [True, False], - 'regressor__positive': [True, False], - } - - # Create a grid search object - self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # Fit the grid search object to the data - self.logger.info("Fitting the model...") - start_time = time.time() - self.grid_search.fit(X_train.values, y_train.values) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - - self.model = self.grid_search.best_estimator_ - - - # Make predictions - predictions = self.model.predict(X_test.values) - predictions = pd.Series(predictions, index=X_test.index) - pred_metric = r2_score(y_test,predictions) - self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + regression_methods = [ + ('Linear Regression', LinearRegression(), {}), + ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), + ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), + ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), + ('Gradient Boosting Regression', GradientBoostingRegressor(), { + 'gradientboostingregressor__n_estimators': [50, 100, 200], + 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] + }), + ('AdaBoost Regression', AdaBoostRegressor(), { + 'adaboostregressor__n_estimators': [50, 100, 200], + 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] + }) + ] + + # Define the models + for name, model, param_grid in regression_methods: + pipeline = Pipeline([ + ('scaler', StandardScaler()), + (name, model) + ]) + + # Use GridSearchCV to find the best hyperparameters for each model + grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5) + grid_search.fit(X_train, y_train) + + # Get the best model and print its mean squared error on the test set + best_model = grid_search.best_estimator_ + print(best_model) + predictions = best_model.predict(X_test) + print(predictions) + # self.model = Pipeline([ + # ('scaler', StandardScaler()), + # ('regressor', LinearRegression()) + # ]) + # # Define the parameters to tune + # param_grid = { + # 'regressor__fit_intercept': [True, False], + # 'regressor__positive': [True, False], + # } + + # # Create a grid search object + # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # # Fit the grid search object to the data + # self.logger.info("Fitting the model...") + # start_time = time.time() + # self.grid_search.fit(X_train.values, y_train.values) + # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + + # self.model = self.grid_search.best_estimator_ + + + # # Make predictions + # predictions = self.model.predict(X_test.values) + # predictions = pd.Series(predictions, index=X_test.index) + # pred_metric = r2_score(y_test,predictions) + # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") def predict(self, new_values:list) -> np.ndarray: From 904a36a69b93fbd215a06a976803aea58e26fd1f Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:16:44 +0100 Subject: [PATCH 096/111] multiple regression methods --- src/emhass/csv_predictor.py | 141 +++++++++++++++++++++++++----------- 1 file changed, 100 insertions(+), 41 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index 2b6fb86a..3ffeba27 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -14,7 +14,7 @@ from sklearn.linear_model import Lasso, LinearRegression, Ridge from sklearn.model_selection import GridSearchCV, train_test_split -from sklearn.pipeline import Pipeline +from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler @@ -33,7 +33,7 @@ class CsvPredictor: - `predict`: to obtain a forecast from a pre-trained model. """ - def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str, + def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str, logger: logging.Logger) -> None: r"""Define constructor for the forecast class. @@ -58,9 +58,14 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent self.dependent_variable = dependent_variable self.timestamp = timestamp self.model_type = model_type + self.sklearn_model = sklearn_model self.logger = logger self.data.sort_index(inplace=True) self.data = self.data[~self.data.index.duplicated(keep='first')] + self.data_exo = None + self.steps = None + self.model = None + self.grid_search =None @staticmethod def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: @@ -123,63 +128,117 @@ def fit(self, date_features: Optional[list] = []) -> None: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) self.steps = len(X_test) - regression_methods = [ - ('Linear Regression', LinearRegression(), {}), - ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), - ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), - ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), - ('Gradient Boosting Regression', GradientBoostingRegressor(), { + regression_methods = { + 'LinearRegression': {"model": LinearRegression(), "param_grid": { + 'linearregression__fit_intercept': [True, False], + 'linearregression__positive': [True, False], + }}, + 'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}}, + 'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}}, + 'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}}, + 'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": { 'gradientboostingregressor__n_estimators': [50, 100, 200], 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - }), - ('AdaBoost Regression', AdaBoostRegressor(), { + }}, + 'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": { 'adaboostregressor__n_estimators': [50, 100, 200], 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - }) - ] + }} + } + # regression_methods = [ + # ('LinearRegression', LinearRegression(), { + # 'linearregression__fit_intercept': [True, False], + # 'linearregression__positive': [True, False], + # }), + # ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), + # ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), + # ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), + # ('GradientBoostingRegression', GradientBoostingRegressor(), { + # 'gradientboostingregressor__n_estimators': [50, 100, 200], + # 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] + # }), + # ('AdaBoostRegression', AdaBoostRegressor(), { + # 'adaboostregressor__n_estimators': [50, 100, 200], + # 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] + # }) + # ] + + if self.sklearn_model == 'LinearRegression': + base_model = regression_methods['LinearRegression']['model'] + param_grid = regression_methods['LinearRegression']['param_grid'] + elif self.sklearn_model == 'RidgeRegression': + base_model = regression_methods['RidgeRegression']['model'] + param_grid = regression_methods['RidgeRegression']['param_grid'] + elif self.sklearn_model == 'LassoRegression': + base_model = regression_methods['LassoRegression']['model'] + param_grid = regression_methods['LassoRegression']['param_grid'] + elif self.sklearn_model == 'RandomForestRegression': + base_model = regression_methods['RandomForestRegression']['model'] + param_grid = regression_methods['RandomForestRegression']['param_grid'] + elif self.sklearn_model == 'GradientBoostingRegression': + base_model = regression_methods['GradientBoostingRegression']['model'] + param_grid = regression_methods['GradientBoostingRegression']['param_grid'] + elif self.sklearn_model == 'AdaBoostRegression': + base_model = regression_methods['AdaBoostRegression']['model'] + param_grid = regression_methods['AdaBoostRegression']['param_grid'] + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the models - for name, model, param_grid in regression_methods: - pipeline = Pipeline([ - ('scaler', StandardScaler()), - (name, model) - ]) + # for name, model, param_grid in regression_methods: + # self.model = make_pipeline( + # StandardScaler(), + # model + # ) + # # self.model = Pipeline([ + # # ('scaler', StandardScaler()), + # # (name, model) + # # ]) - # Use GridSearchCV to find the best hyperparameters for each model - grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5) - grid_search.fit(X_train, y_train) - - # Get the best model and print its mean squared error on the test set - best_model = grid_search.best_estimator_ - print(best_model) - predictions = best_model.predict(X_test) - print(predictions) + # # Use GridSearchCV to find the best hyperparameters for each model + # grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) + # grid_search.fit(X_train, y_train) + + # # Get the best model and print its mean squared error on the test set + # best_model = grid_search.best_estimator_ + # print(best_model) + # predictions = best_model.predict(X_test) + # print(predictions) + + self.model = make_pipeline( + StandardScaler(), + base_model + ) # self.model = Pipeline([ # ('scaler', StandardScaler()), - # ('regressor', LinearRegression()) + # ('regressor', base_model) # ]) - # # Define the parameters to tune + # Define the parameters to tune # param_grid = { # 'regressor__fit_intercept': [True, False], # 'regressor__positive': [True, False], # } - # # Create a grid search object - # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # # Fit the grid search object to the data - # self.logger.info("Fitting the model...") - # start_time = time.time() - # self.grid_search.fit(X_train.values, y_train.values) - # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + # Create a grid search object + self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1) + + # Fit the grid search object to the data + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.grid_search.fit(X_train.values, y_train.values) + print("Best value for lambda : ",self.grid_search.best_params_) + print("Best score for cost function: ", self.grid_search.best_score_) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - # self.model = self.grid_search.best_estimator_ + self.model = self.grid_search.best_estimator_ - # # Make predictions - # predictions = self.model.predict(X_test.values) - # predictions = pd.Series(predictions, index=X_test.index) - # pred_metric = r2_score(y_test,predictions) - # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") + # Make predictions + predictions = self.model.predict(X_test.values) + predictions = pd.Series(predictions, index=X_test.index) + pred_metric = r2_score(y_test,predictions) + self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") def predict(self, new_values:list) -> np.ndarray: From ce407cfa711cb7c335ec6d031784b55a2b5408aa Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 12:42:27 +0100 Subject: [PATCH 097/111] change to MLRegressor --- src/emhass/csv_predictor.py | 257 ------------------------------------ src/emhass/utils.py | 71 +++++----- 2 files changed, 30 insertions(+), 298 deletions(-) delete mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py deleted file mode 100644 index 3ffeba27..00000000 --- a/src/emhass/csv_predictor.py +++ /dev/null @@ -1,257 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import copy -import logging -import time -from typing import Optional -import warnings - -import pandas as pd -import numpy as np -from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor -from sklearn.metrics import r2_score - -from sklearn.linear_model import Lasso, LinearRegression, Ridge -from sklearn.model_selection import GridSearchCV, train_test_split -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import StandardScaler - - -warnings.filterwarnings("ignore", category=DeprecationWarning) - -class CsvPredictor: - r""" - A forecaster class using machine learning models. - - This class uses the `sklearn` module and the machine learning models are from `scikit-learn`. - - It exposes two main methods: - - - `fit`: to train a model with the passed data. - - - `predict`: to obtain a forecast from a pre-trained model. - - """ - def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str, - logger: logging.Logger) -> None: - r"""Define constructor for the forecast class. - - :param data: The data that will be used for train/test - :type data: pd.DataFrame - :param model_type: A unique name defining this model and useful to identify \ - for what it will be used for. - :type model_type: str - :param independent_variables: A list of independent variables. \ - Example: [`solar`, `degree_days`]. - :type independent_variables: list - :param dependent_variable: The dependent variable(to be predicted). \ - Example: `hours`. - :type dependent_variable: str - :param timestamp: If defined, the column key that has to be used of timestamp. - :type timestamp: str - :param logger: The passed logger object - :type logger: logging.Logger - """ - self.data = data - self.independent_variables = independent_variables - self.dependent_variable = dependent_variable - self.timestamp = timestamp - self.model_type = model_type - self.sklearn_model = sklearn_model - self.logger = logger - self.data.sort_index(inplace=True) - self.data = self.data[~self.data.index.duplicated(keep='first')] - self.data_exo = None - self.steps = None - self.model = None - self.grid_search =None - - @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame: - """Add date features from the input DataFrame timestamp - - :param data: The input DataFrame - :type data: pd.DataFrame - :param timestamp: The column containing the timestamp - :type timestamp: str - :return: The DataFrame with the added features - :rtype: pd.DataFrame - """ - df = copy.deepcopy(data) - df[timestamp]= pd.to_datetime(df['timestamp']) - if 'year' in date_features: - df['year'] = [i.year for i in df['timestamp']] - if 'month' in date_features: - df['month'] = [i.month for i in df['timestamp']] - if 'day_of_week' in date_features: - df['day_of_week'] = [i.dayofweek for i in df['timestamp']] - if 'day_of_year' in date_features: - df['day_of_year'] = [i.dayofyear for i in df['timestamp']] - if 'day' in date_features: - df['day'] = [i.day for i in df['timestamp']] - if 'hour' in date_features: - df['hour'] = [i.day for i in df['timestamp']] - - return df - - def fit(self, date_features: Optional[list] = []) -> None: - """ - Fit the model using the provided data. - - :param date_features: A list of 'date_features' to take into account when fitting the model. - :type data: list - """ - self.logger.info("Performing a csv model fit for "+self.model_type) - self.data_exo = pd.DataFrame(self.data) - self.data_exo[self.independent_variables] = self.data[self.independent_variables] - self.data_exo[self.dependent_variable] = self.data[self.dependent_variable] - keep_columns = [] - keep_columns.extend(self.independent_variables) - if self.timestamp is not None: - keep_columns.append(self.timestamp) - keep_columns.append(self.dependent_variable) - self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] - self.data_exo.reset_index(drop=True, inplace=True) - if len(date_features) > 0: - if self.timestamp is not None: - self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp) - else: - self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.") - - y = self.data_exo[self.dependent_variable] - self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1) - if self.timestamp is not None: - self.data_exo = self.data_exo.drop(self.timestamp,axis=1) - X = self.data_exo - - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - self.steps = len(X_test) - - regression_methods = { - 'LinearRegression': {"model": LinearRegression(), "param_grid": { - 'linearregression__fit_intercept': [True, False], - 'linearregression__positive': [True, False], - }}, - 'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}}, - 'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}}, - 'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}}, - 'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": { - 'gradientboostingregressor__n_estimators': [50, 100, 200], - 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - }}, - 'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": { - 'adaboostregressor__n_estimators': [50, 100, 200], - 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - }} - } - # regression_methods = [ - # ('LinearRegression', LinearRegression(), { - # 'linearregression__fit_intercept': [True, False], - # 'linearregression__positive': [True, False], - # }), - # ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}), - # ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}), - # ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}), - # ('GradientBoostingRegression', GradientBoostingRegressor(), { - # 'gradientboostingregressor__n_estimators': [50, 100, 200], - # 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2] - # }), - # ('AdaBoostRegression', AdaBoostRegressor(), { - # 'adaboostregressor__n_estimators': [50, 100, 200], - # 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2] - # }) - # ] - - if self.sklearn_model == 'LinearRegression': - base_model = regression_methods['LinearRegression']['model'] - param_grid = regression_methods['LinearRegression']['param_grid'] - elif self.sklearn_model == 'RidgeRegression': - base_model = regression_methods['RidgeRegression']['model'] - param_grid = regression_methods['RidgeRegression']['param_grid'] - elif self.sklearn_model == 'LassoRegression': - base_model = regression_methods['LassoRegression']['model'] - param_grid = regression_methods['LassoRegression']['param_grid'] - elif self.sklearn_model == 'RandomForestRegression': - base_model = regression_methods['RandomForestRegression']['model'] - param_grid = regression_methods['RandomForestRegression']['param_grid'] - elif self.sklearn_model == 'GradientBoostingRegression': - base_model = regression_methods['GradientBoostingRegression']['model'] - param_grid = regression_methods['GradientBoostingRegression']['param_grid'] - elif self.sklearn_model == 'AdaBoostRegression': - base_model = regression_methods['AdaBoostRegression']['model'] - param_grid = regression_methods['AdaBoostRegression']['param_grid'] - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - - - # Define the models - # for name, model, param_grid in regression_methods: - # self.model = make_pipeline( - # StandardScaler(), - # model - # ) - # # self.model = Pipeline([ - # # ('scaler', StandardScaler()), - # # (name, model) - # # ]) - - # # Use GridSearchCV to find the best hyperparameters for each model - # grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1) - # grid_search.fit(X_train, y_train) - - # # Get the best model and print its mean squared error on the test set - # best_model = grid_search.best_estimator_ - # print(best_model) - # predictions = best_model.predict(X_test) - # print(predictions) - - self.model = make_pipeline( - StandardScaler(), - base_model - ) - # self.model = Pipeline([ - # ('scaler', StandardScaler()), - # ('regressor', base_model) - # ]) - # Define the parameters to tune - # param_grid = { - # 'regressor__fit_intercept': [True, False], - # 'regressor__positive': [True, False], - # } - - # Create a grid search object - self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1) - - # Fit the grid search object to the data - self.logger.info("Training a "+self.sklearn_model+" model") - start_time = time.time() - self.grid_search.fit(X_train.values, y_train.values) - print("Best value for lambda : ",self.grid_search.best_params_) - print("Best score for cost function: ", self.grid_search.best_score_) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - - self.model = self.grid_search.best_estimator_ - - - # Make predictions - predictions = self.model.predict(X_test.values) - predictions = pd.Series(predictions, index=X_test.index) - pred_metric = r2_score(y_test,predictions) - self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}") - - - def predict(self, new_values:list) -> np.ndarray: - r"""The predict method to generate a forecast from a csv file. - - - :param new_values: The new values for the independent variables(in the same order as the independent variables list). \ - Example: [2.24, 5.68]. - :type new_values: list - :return: The np.ndarray containing the predicted value. - :rtype: np.ndarray - """ - self.logger.info("Performing a prediction for "+self.model_type) - new_values = np.array([new_values]) - - return self.model.predict(new_values) diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 5e40160a..1d98ebb1 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -394,47 +394,36 @@ def treat_runtimeparams( if "mlr_predict_friendly_name" not in runtimeparams.keys(): mlr_predict_friendly_name = "mlr predictor" else: - mlr_predict_friendly_name = runtimeparams["mlr_predict_friendly_name"] - params["passed_data"]["mlr_predict_friendly_name"] = mlr_predict_friendly_name - # Treat optimization configuration parameters passed at runtime - if "num_def_loads" in runtimeparams.keys(): - optim_conf["num_def_loads"] = runtimeparams["num_def_loads"] - if "P_deferrable_nom" in runtimeparams.keys(): - optim_conf["P_deferrable_nom"] = runtimeparams["P_deferrable_nom"] - if "def_total_hours" in runtimeparams.keys(): - optim_conf["def_total_hours"] = runtimeparams["def_total_hours"] - if "def_start_timestep" in runtimeparams.keys(): - optim_conf["def_start_timestep"] = runtimeparams["def_start_timestep"] - if "def_end_timestep" in runtimeparams.keys(): - optim_conf["def_end_timestep"] = runtimeparams["def_end_timestep"] - if "treat_def_as_semi_cont" in runtimeparams.keys(): - optim_conf["treat_def_as_semi_cont"] = [ - eval(str(k).capitalize()) - for k in runtimeparams["treat_def_as_semi_cont"] - ] - if "set_def_constant" in runtimeparams.keys(): - optim_conf["set_def_constant"] = [ - eval(str(k).capitalize()) for k in runtimeparams["set_def_constant"] - ] - if "solcast_api_key" in runtimeparams.keys(): - retrieve_hass_conf["solcast_api_key"] = runtimeparams["solcast_api_key"] - optim_conf["weather_forecast_method"] = "solcast" - if "solcast_rooftop_id" in runtimeparams.keys(): - retrieve_hass_conf["solcast_rooftop_id"] = runtimeparams[ - "solcast_rooftop_id" - ] - optim_conf["weather_forecast_method"] = "solcast" - if "solar_forecast_kwp" in runtimeparams.keys(): - retrieve_hass_conf["solar_forecast_kwp"] = runtimeparams[ - "solar_forecast_kwp" - ] - optim_conf["weather_forecast_method"] = "solar.forecast" - if "weight_battery_discharge" in runtimeparams.keys(): - optim_conf["weight_battery_discharge"] = runtimeparams[ - "weight_battery_discharge" - ] - if "weight_battery_charge" in runtimeparams.keys(): - optim_conf["weight_battery_charge"] = runtimeparams["weight_battery_charge"] + mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name'] + params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name + # Treat optimization configuration parameters passed at runtime + if 'num_def_loads' in runtimeparams.keys(): + optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] + if 'P_deferrable_nom' in runtimeparams.keys(): + optim_conf['P_deferrable_nom'] = runtimeparams['P_deferrable_nom'] + if 'def_total_hours' in runtimeparams.keys(): + optim_conf['def_total_hours'] = runtimeparams['def_total_hours'] + if 'def_start_timestep' in runtimeparams.keys(): + optim_conf['def_start_timestep'] = runtimeparams['def_start_timestep'] + if 'def_end_timestep' in runtimeparams.keys(): + optim_conf['def_end_timestep'] = runtimeparams['def_end_timestep'] + if 'treat_def_as_semi_cont' in runtimeparams.keys(): + optim_conf['treat_def_as_semi_cont'] = [eval(str(k).capitalize()) for k in runtimeparams['treat_def_as_semi_cont']] + if 'set_def_constant' in runtimeparams.keys(): + optim_conf['set_def_constant'] = [eval(str(k).capitalize()) for k in runtimeparams['set_def_constant']] + if 'solcast_api_key' in runtimeparams.keys(): + retrieve_hass_conf['solcast_api_key'] = runtimeparams['solcast_api_key'] + optim_conf['weather_forecast_method'] = 'solcast' + if 'solcast_rooftop_id' in runtimeparams.keys(): + retrieve_hass_conf['solcast_rooftop_id'] = runtimeparams['solcast_rooftop_id'] + optim_conf['weather_forecast_method'] = 'solcast' + if 'solar_forecast_kwp' in runtimeparams.keys(): + retrieve_hass_conf['solar_forecast_kwp'] = runtimeparams['solar_forecast_kwp'] + optim_conf['weather_forecast_method'] = 'solar.forecast' + if 'weight_battery_discharge' in runtimeparams.keys(): + optim_conf['weight_battery_discharge'] = runtimeparams['weight_battery_discharge'] + if 'weight_battery_charge' in runtimeparams.keys(): + optim_conf['weight_battery_charge'] = runtimeparams['weight_battery_charge'] # Treat plant configuration parameters passed at runtime if "SOCtarget" in runtimeparams.keys(): plant_conf["SOCtarget"] = runtimeparams["SOCtarget"] From 9d45cb125fa61a5cd68d4021124de55eea9c3daa Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 15 Mar 2024 13:13:51 +0100 Subject: [PATCH 098/111] change naming and some formatting --- src/emhass/command_line.py | 5 +-- src/emhass/utils.py | 71 ++++++++++++++++++++++---------------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 06280ff4..ee27be0f 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -1014,10 +1014,7 @@ def main(): else: mlf = None df_pred_optim, mlf = forecast_model_tune( - input_data_dict, - logger, - debug=args.debug, - mlf=mlf, + input_data_dict, logger, debug=args.debug, mlf=mlf ) opt_res = None elif args.action == "regressor-model-fit": diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 1d98ebb1..5e40160a 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -394,36 +394,47 @@ def treat_runtimeparams( if "mlr_predict_friendly_name" not in runtimeparams.keys(): mlr_predict_friendly_name = "mlr predictor" else: - mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name'] - params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name - # Treat optimization configuration parameters passed at runtime - if 'num_def_loads' in runtimeparams.keys(): - optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] - if 'P_deferrable_nom' in runtimeparams.keys(): - optim_conf['P_deferrable_nom'] = runtimeparams['P_deferrable_nom'] - if 'def_total_hours' in runtimeparams.keys(): - optim_conf['def_total_hours'] = runtimeparams['def_total_hours'] - if 'def_start_timestep' in runtimeparams.keys(): - optim_conf['def_start_timestep'] = runtimeparams['def_start_timestep'] - if 'def_end_timestep' in runtimeparams.keys(): - optim_conf['def_end_timestep'] = runtimeparams['def_end_timestep'] - if 'treat_def_as_semi_cont' in runtimeparams.keys(): - optim_conf['treat_def_as_semi_cont'] = [eval(str(k).capitalize()) for k in runtimeparams['treat_def_as_semi_cont']] - if 'set_def_constant' in runtimeparams.keys(): - optim_conf['set_def_constant'] = [eval(str(k).capitalize()) for k in runtimeparams['set_def_constant']] - if 'solcast_api_key' in runtimeparams.keys(): - retrieve_hass_conf['solcast_api_key'] = runtimeparams['solcast_api_key'] - optim_conf['weather_forecast_method'] = 'solcast' - if 'solcast_rooftop_id' in runtimeparams.keys(): - retrieve_hass_conf['solcast_rooftop_id'] = runtimeparams['solcast_rooftop_id'] - optim_conf['weather_forecast_method'] = 'solcast' - if 'solar_forecast_kwp' in runtimeparams.keys(): - retrieve_hass_conf['solar_forecast_kwp'] = runtimeparams['solar_forecast_kwp'] - optim_conf['weather_forecast_method'] = 'solar.forecast' - if 'weight_battery_discharge' in runtimeparams.keys(): - optim_conf['weight_battery_discharge'] = runtimeparams['weight_battery_discharge'] - if 'weight_battery_charge' in runtimeparams.keys(): - optim_conf['weight_battery_charge'] = runtimeparams['weight_battery_charge'] + mlr_predict_friendly_name = runtimeparams["mlr_predict_friendly_name"] + params["passed_data"]["mlr_predict_friendly_name"] = mlr_predict_friendly_name + # Treat optimization configuration parameters passed at runtime + if "num_def_loads" in runtimeparams.keys(): + optim_conf["num_def_loads"] = runtimeparams["num_def_loads"] + if "P_deferrable_nom" in runtimeparams.keys(): + optim_conf["P_deferrable_nom"] = runtimeparams["P_deferrable_nom"] + if "def_total_hours" in runtimeparams.keys(): + optim_conf["def_total_hours"] = runtimeparams["def_total_hours"] + if "def_start_timestep" in runtimeparams.keys(): + optim_conf["def_start_timestep"] = runtimeparams["def_start_timestep"] + if "def_end_timestep" in runtimeparams.keys(): + optim_conf["def_end_timestep"] = runtimeparams["def_end_timestep"] + if "treat_def_as_semi_cont" in runtimeparams.keys(): + optim_conf["treat_def_as_semi_cont"] = [ + eval(str(k).capitalize()) + for k in runtimeparams["treat_def_as_semi_cont"] + ] + if "set_def_constant" in runtimeparams.keys(): + optim_conf["set_def_constant"] = [ + eval(str(k).capitalize()) for k in runtimeparams["set_def_constant"] + ] + if "solcast_api_key" in runtimeparams.keys(): + retrieve_hass_conf["solcast_api_key"] = runtimeparams["solcast_api_key"] + optim_conf["weather_forecast_method"] = "solcast" + if "solcast_rooftop_id" in runtimeparams.keys(): + retrieve_hass_conf["solcast_rooftop_id"] = runtimeparams[ + "solcast_rooftop_id" + ] + optim_conf["weather_forecast_method"] = "solcast" + if "solar_forecast_kwp" in runtimeparams.keys(): + retrieve_hass_conf["solar_forecast_kwp"] = runtimeparams[ + "solar_forecast_kwp" + ] + optim_conf["weather_forecast_method"] = "solar.forecast" + if "weight_battery_discharge" in runtimeparams.keys(): + optim_conf["weight_battery_discharge"] = runtimeparams[ + "weight_battery_discharge" + ] + if "weight_battery_charge" in runtimeparams.keys(): + optim_conf["weight_battery_charge"] = runtimeparams["weight_battery_charge"] # Treat plant configuration parameters passed at runtime if "SOCtarget" in runtimeparams.keys(): plant_conf["SOCtarget"] = runtimeparams["SOCtarget"] From af8b9f6be5aa19dbebcf3b429155a5e3bd8f42cd Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 7 Jan 2024 08:24:21 +0100 Subject: [PATCH 099/111] Add csv-prediction --- src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py new file mode 100644 index 00000000..a1c5576b --- /dev/null +++ b/src/emhass/csv_predictor.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +import copy +import pathlib +import time +from typing import Optional +# from typing import Optional, Tuple +import pandas as pd +import numpy as np + +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import ElasticNet +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsRegressor +# from sklearn.metrics import r2_score + +# from skforecast.ForecasterAutoreg import ForecasterAutoreg +# from skforecast.model_selection import bayesian_search_forecaster +# from skforecast.model_selection import backtesting_forecaster + +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) + +class CsvPredictor: + r""" + A forecaster class using machine learning models. + + This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. + + It exposes one main method: + + - `predict`: to obtain a forecast from a pre-trained model. + + """ + + def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, + logger: logging.Logger) -> None: + r"""Define constructor for the forecast class. + + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str + :param var_model: The name of the sensor to retrieve data from Home Assistant. \ + Example: `sensor.power_load_no_var_loads`. + :type var_model: str + :param sklearn_model: The `scikit-learn` model that will be used. For now only \ + this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. + :type sklearn_model: str + :param num_lags: The number of auto-regression lags to consider. A good starting point \ + is to fix this as one day. For example if your time step is 30 minutes, then fix this \ + to 48, if the time step is 1 hour the fix this to 24 and so on. + :type num_lags: int + :param root: The parent folder of the path where the config.yaml file is located + :type root: str + :param logger: The passed logger object + :type logger: logging.Logger + """ + self.data = data + self.model_type = model_type + self.csv_file = csv_file + self.independent_variables = independent_variables + self.dependent_variable = dependent_variable + self.sklearn_model = sklearn_model + self.new_values = new_values + self.root = root + self.logger = logger + self.is_tuned = False + + + def load_data(self): + filename_path = pathlib.Path(self.root) / self.csv_file + if filename_path.is_file(): + with open(filename_path, 'rb') as inp: + data = pd.read_csv(filename_path) + else: + self.logger.error("The cvs file was not found.") + return + + required_columns = self.independent_variables + + if not set(required_columns).issubset(data.columns): + raise ValueError( + f"CSV file should contain the following columns: {', '.join(required_columns)}" + ) + return data + + def prepare_data(self, data): + X = data[self.independent_variables].values + y = data[self.dependent_variable].values + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + return X_train, y_train + + + def predict(self, perform_backtest: Optional[bool] = False + ) -> pd.Series: + r"""The fit method to train the ML model. + + :param split_date_delta: The delta from now to `split_date_delta` that will be used \ + as the test period to evaluate the model, defaults to '48h' + :type split_date_delta: Optional[str], optional + :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ + the performance of the model on the complete train set, defaults to False + :type perform_backtest: Optional[bool], optional + :return: The DataFrame containing the forecast data results without and with backtest + :rtype: Tuple[pd.DataFrame, pd.DataFrame] + """ + self.logger.info("Performing a forecast model fit for "+self.model_type) + # Preparing the data: adding exogenous features + data = self.load_data() + X, y = self.prepare_data(data) + + if self.sklearn_model == 'LinearRegression': + base_model = LinearRegression() + elif self.sklearn_model == 'ElasticNet': + base_model = ElasticNet() + elif self.sklearn_model == 'KNeighborsRegressor': + base_model = KNeighborsRegressor() + else: + self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") + # Define the forecaster object + self.forecaster = base_model + # Fit and time it + self.logger.info("Training a "+self.sklearn_model+" model") + start_time = time.time() + self.forecaster.fit(X, y) + self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") + new_values = np.array([self.new_values]) + prediction = self.forecaster.predict(new_values) + + return prediction + + + + \ No newline at end of file From b035ccdbcf61ac718cbc46fc4890c63ec75e3afd Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 11:45:02 +0100 Subject: [PATCH 100/111] Use gridsearchcv and split up fit and predict --- src/emhass/csv_predictor.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py index a1c5576b..4e4ca37e 100644 --- a/src/emhass/csv_predictor.py +++ b/src/emhass/csv_predictor.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import copy +from datetime import datetime import logging import copy import pathlib @@ -9,6 +11,7 @@ # from typing import Optional, Tuple import pandas as pd import numpy as np +from sklearn.metrics import classification_report, r2_score from sklearn.linear_model import LinearRegression from sklearn.linear_model import ElasticNet @@ -64,11 +67,16 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe self.csv_file = csv_file self.independent_variables = independent_variables self.dependent_variable = dependent_variable - self.sklearn_model = sklearn_model - self.new_values = new_values - self.root = root + self.timestamp = timestamp + self.model_type = model_type self.logger = logger self.is_tuned = False + self.data.sort_index(inplace=True) + self.data = self.data[~self.data.index.duplicated(keep='first')] + + @staticmethod + def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: + """Add date features from the input DataFrame timestamp def load_data(self): From 879186fae968c0dbde8b46cc9d0c3419bab361b8 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Tue, 13 Feb 2024 15:48:12 +0100 Subject: [PATCH 101/111] gitignore fun --- .vscode/launch.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index f0ceae3a..94690663 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,7 +2,7 @@ "configurations": [ { "name": "Python: Current File", - "type": "debugpy", + "type": "python", "request": "launch", "program": "${file}", "console": "integratedTerminal", @@ -10,9 +10,9 @@ }, { "name": "EMHASS run", - "type": "debugpy", + "type": "python", "request": "launch", - "module": "emhass.web_server", + "program": "web_server.py", "console": "integratedTerminal", "purpose": [ "debug-in-terminal" @@ -27,9 +27,9 @@ }, { "name": "EMHASS run ADDON", - "type": "debugpy", + "type": "python", "request": "launch", - "module": "emhass.web_server", + "program": "web_server.py", "console": "integratedTerminal", "args": [ "--addon", From 36e17350cf266a8a866c9b5cee31bfa79448537d Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 11:39:39 +0100 Subject: [PATCH 102/111] python -> debugpy --- .vscode/launch.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 94690663..e8e023c8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,7 +2,7 @@ "configurations": [ { "name": "Python: Current File", - "type": "python", + "type": "debugpy", "request": "launch", "program": "${file}", "console": "integratedTerminal", @@ -10,7 +10,7 @@ }, { "name": "EMHASS run", - "type": "python", + "type": "debugpy", "request": "launch", "program": "web_server.py", "console": "integratedTerminal", @@ -27,7 +27,7 @@ }, { "name": "EMHASS run ADDON", - "type": "python", + "type": "debugpy", "request": "launch", "program": "web_server.py", "console": "integratedTerminal", From 3e82c8772345e3015b136f020ce5f3eeedcaabf4 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 12:38:07 +0100 Subject: [PATCH 103/111] launch.json --- .vscode/launch.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index e8e023c8..f0ceae3a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,7 +12,7 @@ "name": "EMHASS run", "type": "debugpy", "request": "launch", - "program": "web_server.py", + "module": "emhass.web_server", "console": "integratedTerminal", "purpose": [ "debug-in-terminal" @@ -29,7 +29,7 @@ "name": "EMHASS run ADDON", "type": "debugpy", "request": "launch", - "program": "web_server.py", + "module": "emhass.web_server", "console": "integratedTerminal", "args": [ "--addon", From 730a2df77993b94df7ae616a6f3a5f280334490a Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Sun, 17 Mar 2024 12:40:45 +0100 Subject: [PATCH 104/111] delete csv-predictor --- src/emhass/csv_predictor.py | 147 ------------------------------------ 1 file changed, 147 deletions(-) delete mode 100644 src/emhass/csv_predictor.py diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py deleted file mode 100644 index 4e4ca37e..00000000 --- a/src/emhass/csv_predictor.py +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import copy -from datetime import datetime -import logging -import copy -import pathlib -import time -from typing import Optional -# from typing import Optional, Tuple -import pandas as pd -import numpy as np -from sklearn.metrics import classification_report, r2_score - -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import ElasticNet -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsRegressor -# from sklearn.metrics import r2_score - -# from skforecast.ForecasterAutoreg import ForecasterAutoreg -# from skforecast.model_selection import bayesian_search_forecaster -# from skforecast.model_selection import backtesting_forecaster - -import warnings -warnings.filterwarnings("ignore", category=DeprecationWarning) - -class CsvPredictor: - r""" - A forecaster class using machine learning models. - - This class uses the `skforecast` module and the machine learning models are from `scikit-learn`. - - It exposes one main method: - - - `predict`: to obtain a forecast from a pre-trained model. - - """ - - def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str, - logger: logging.Logger) -> None: - r"""Define constructor for the forecast class. - - :param data: The data that will be used for train/test - :type data: pd.DataFrame - :param model_type: A unique name defining this model and useful to identify \ - for what it will be used for. - :type model_type: str - :param var_model: The name of the sensor to retrieve data from Home Assistant. \ - Example: `sensor.power_load_no_var_loads`. - :type var_model: str - :param sklearn_model: The `scikit-learn` model that will be used. For now only \ - this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`. - :type sklearn_model: str - :param num_lags: The number of auto-regression lags to consider. A good starting point \ - is to fix this as one day. For example if your time step is 30 minutes, then fix this \ - to 48, if the time step is 1 hour the fix this to 24 and so on. - :type num_lags: int - :param root: The parent folder of the path where the config.yaml file is located - :type root: str - :param logger: The passed logger object - :type logger: logging.Logger - """ - self.data = data - self.model_type = model_type - self.csv_file = csv_file - self.independent_variables = independent_variables - self.dependent_variable = dependent_variable - self.timestamp = timestamp - self.model_type = model_type - self.logger = logger - self.is_tuned = False - self.data.sort_index(inplace=True) - self.data = self.data[~self.data.index.duplicated(keep='first')] - - @staticmethod - def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame: - """Add date features from the input DataFrame timestamp - - - def load_data(self): - filename_path = pathlib.Path(self.root) / self.csv_file - if filename_path.is_file(): - with open(filename_path, 'rb') as inp: - data = pd.read_csv(filename_path) - else: - self.logger.error("The cvs file was not found.") - return - - required_columns = self.independent_variables - - if not set(required_columns).issubset(data.columns): - raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}" - ) - return data - - def prepare_data(self, data): - X = data[self.independent_variables].values - y = data[self.dependent_variable].values - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - - return X_train, y_train - - - def predict(self, perform_backtest: Optional[bool] = False - ) -> pd.Series: - r"""The fit method to train the ML model. - - :param split_date_delta: The delta from now to `split_date_delta` that will be used \ - as the test period to evaluate the model, defaults to '48h' - :type split_date_delta: Optional[str], optional - :param perform_backtest: If `True` then a back testing routine is performed to evaluate \ - the performance of the model on the complete train set, defaults to False - :type perform_backtest: Optional[bool], optional - :return: The DataFrame containing the forecast data results without and with backtest - :rtype: Tuple[pd.DataFrame, pd.DataFrame] - """ - self.logger.info("Performing a forecast model fit for "+self.model_type) - # Preparing the data: adding exogenous features - data = self.load_data() - X, y = self.prepare_data(data) - - if self.sklearn_model == 'LinearRegression': - base_model = LinearRegression() - elif self.sklearn_model == 'ElasticNet': - base_model = ElasticNet() - elif self.sklearn_model == 'KNeighborsRegressor': - base_model = KNeighborsRegressor() - else: - self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid") - # Define the forecaster object - self.forecaster = base_model - # Fit and time it - self.logger.info("Training a "+self.sklearn_model+" model") - start_time = time.time() - self.forecaster.fit(X, y) - self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}") - new_values = np.array([self.new_values]) - prediction = self.forecaster.predict(new_values) - - return prediction - - - - \ No newline at end of file From a23b693f3b1ed6babc780c590fcf250844708eae Mon Sep 17 00:00:00 2001 From: gieljnssns Date: Tue, 19 Mar 2024 04:40:02 +0100 Subject: [PATCH 105/111] first documentation for mlregressor --- docs/index.md | 4 +- docs/mlregressor.md | 91 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 docs/mlregressor.md diff --git a/docs/index.md b/docs/index.md index cf015a3f..cc9f33a8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,6 +6,7 @@ # EMHASS: Energy Management for Home Assistant ```{image} images/emhass_logo.png + ``` Welcome to the documentation of EMHASS. With this package written in Python you will be able to implement a real Energy Management System for your household. This software was designed to be easy configurable and with a fast integration with Home Assistant: @@ -21,6 +22,7 @@ differences.md lpems.md forecasts.md mlforecaster.md +mlregressor.md study_case.md config.md emhass.md @@ -32,5 +34,3 @@ develop.md - {ref}`genindex` - {ref}`modindex` - {ref}`search` - - diff --git a/docs/mlregressor.md b/docs/mlregressor.md new file mode 100644 index 00000000..7206af99 --- /dev/null +++ b/docs/mlregressor.md @@ -0,0 +1,91 @@ +# The machine learning regressor + +Starting with v0.9.0, a new framework is proposed within EMHASS. It provides a machine learning module to predict values from a csv file using different regression models. + +This API provides two main methods: + +- fit: To train a model with the passed data. This method is exposed with the `regressor-model-fit` end point. + +- predict: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point. + +## A basic model fit + +To train a model use the `regressor-model-fit` end point. + +Some paramters can be optionally defined at runtime: + +- `csv_file`: The name of the csv file containing your data. + +- `features`: A list of features, you can provide new values for this. + +- `target`: The target, the value that has to be predicted. + +- `model_type`: Define the name of the model regressor that this will be used for. For example: `heating_hours_degreeday`. This should be an unique name if you are using multiple custom regressor models. + +- `regression_model`: The regression model that will be used. For now only this options are possible: `LinearRegression`, `RidgeRegression`, `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`. + +- `timestamp`: If defined, the column key that has to be used for timestamp. + +- `date_features`: A list of 'date_features' to take into account when fitting the model. Possibilities are `year`, `month`, `day_of_week` (monday=0, sunday=6), `day_of_year`, `day`(day_of_month) and `hour` + +``` +runtimeparams = { + "csv_file": "heating_prediction.csv", + "features":["degreeday", "solar"], + "target": "heating_hours", + "regression_model": "RandomForestRegression", + "model_type": "heating_hours_degreeday", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"] + } +``` + +A correct `curl` call to launch a model fit can look like this: + +``` +curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-fit +``` + +After applying the `curl` command to fit the model the following information is logged by EMHASS: + + 2023-02-20 22:05:22,658 - __main__ - INFO - Training a LinearRegression model + 2023-02-20 22:05:23,882 - __main__ - INFO - Elapsed time: 1.2236599922180176 + 2023-02-20 22:05:24,612 - __main__ - INFO - Prediction R2 score: 0.2654560762747957 + +## The predict method + +To obtain a prediction using a previously trained model use the `regressor-model-predict` end point. + +``` +curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-predict +``` + +If needed pass the correct `model_type` like this: + +``` +curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "load_forecast"}' http://localhost:5000/action/regressor-model-predict +``` + +It is possible to publish the predict method results to a Home Assistant sensor. + +The list of parameters needed to set the data publish task is: + +- `mlr_predict_entity_id`: The unique `entity_id` to be used. + +- `mlr_predict_unit_of_measurement`: The `unit_of_measurement` to be used. + +- `mlr_predict_friendly_name`: The `friendly_name` to be used. + +- `new_values`: The new values for the features (in the same order as the features list). Also when using date_features, add these to the new values. + +- `model_type`: The model type that has to be predicted + +``` +runtimeparams = { + "mlr_predict_entity_id": "sensor.mlr_predict", + "mlr_predict_unit_of_measurement": None, + "mlr_predict_friendly_name": "mlr predictor", + "new_values": [8.2, 7.23, 2, 6], + "model_type": "heating_hours_degreeday" +} +``` From 058c732983dd7e76ef9d1c4663fd23b626f13867 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Wed, 17 Apr 2024 15:45:58 +0200 Subject: [PATCH 106/111] Rename paragrams --- tests/test_command_line_utils.py | 33 +++++++++++++----------- tests/test_machine_learning_regressor.py | 11 +++----- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/test_command_line_utils.py b/tests/test_command_line_utils.py index 293733f4..c4482c07 100644 --- a/tests/test_command_line_utils.py +++ b/tests/test_command_line_utils.py @@ -335,11 +335,11 @@ def test_regressor_model_fit_predict(self): action = "regressor-model-fit" # fit and predict methods params = TestCommandLineUtils.get_test_params() runtimeparams = { - "csv_file": "prediction.csv", - "features": ["dd", "solar"], + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], "target": "hour", "regression_model": "AdaBoostRegression", - "model_type": "heating_dd", + "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "mlr_predict_entity_id": "sensor.predicted_hours_test", @@ -360,14 +360,16 @@ def test_regressor_model_fit_predict(self): get_data_from_file=True, ) self.assertTrue( - input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd", + input_data_dict["params"]["passed_data"]["model_type"] + == "heating_hours_degreeday", ) self.assertTrue( input_data_dict["params"]["passed_data"]["regression_model"] == "AdaBoostRegression", ) self.assertTrue( - input_data_dict["params"]["passed_data"]["csv_file"] == "prediction.csv", + input_data_dict["params"]["passed_data"]["csv_file"] + == "heating_prediction.csv", ) mlr = regressor_model_fit(input_data_dict, logger, debug=True) @@ -378,11 +380,11 @@ def test_regressor_model_fit_predict(self): action = "regressor-model-predict" # predict methods params = TestCommandLineUtils.get_test_params() runtimeparams = { - "csv_file": "prediction.csv", - "features": ["dd", "solar"], + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], "target": "hour", "regression_model": "AdaBoostRegression", - "model_type": "heating_dd", + "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "mlr_predict_entity_id": "sensor.predicted_hours_test", @@ -405,7 +407,8 @@ def test_regressor_model_fit_predict(self): get_data_from_file=True, ) self.assertTrue( - input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd", + input_data_dict["params"]["passed_data"]["model_type"] + == "heating_hours_degreeday", ) self.assertTrue( input_data_dict["params"]["passed_data"]["mlr_predict_friendly_name"] @@ -519,11 +522,11 @@ def test_main_forecast_model_tune(self): def test_main_regressor_model_fit(self): params = copy.deepcopy(json.loads(self.params_json)) runtimeparams = { - "csv_file": "prediction.csv", - "features": ["dd", "solar"], + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], "target": "hour", "regression_model": "AdaBoostRegression", - "model_type": "heating_dd", + "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], } @@ -551,11 +554,11 @@ def test_main_regressor_model_fit(self): def test_main_regressor_model_predict(self): params = copy.deepcopy(json.loads(self.params_json)) runtimeparams = { - "csv_file": "prediction.csv", - "features": ["dd", "solar"], + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], "target": "hour", "regression_model": "AdaBoostRegression", - "model_type": "heating_dd", + "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2], diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py index 88137b0d..74702b6f 100644 --- a/tests/test_machine_learning_regressor.py +++ b/tests/test_machine_learning_regressor.py @@ -7,14 +7,11 @@ import numpy as np import pandas as pd -from sklearn.pipeline import Pipeline import yaml from emhass import utils from emhass.command_line import set_input_data_dict from emhass.machine_learning_regressor import MLRegressor -from sklearn.ensemble import ( - AdaBoostRegressor, -) +from sklearn.pipeline import Pipeline # the root folder root = str(utils.get_root(__file__, num_parent=2)) @@ -50,11 +47,11 @@ def setUp(self): action = "regressor-model-fit" # fit and predict methods params = copy.deepcopy(json.loads(params_json)) runtimeparams = { - "csv_file": "prediction.csv", - "features": ["dd", "solar"], + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], "target": "hour", "regression_model": "AdaBoostRegression", - "model_type": "heating_dd", + "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2], From 6c2162908ef0d87be65fa789ce21fb30843380c5 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Thu, 18 Apr 2024 11:40:54 +0200 Subject: [PATCH 107/111] Ready for review (I think) --- docs/mlregressor.md | 106 +++++++++++++++++++---- src/emhass/command_line.py | 61 +++++++------ src/emhass/machine_learning_regressor.py | 3 +- src/emhass/utils.py | 15 +++- 4 files changed, 137 insertions(+), 48 deletions(-) diff --git a/docs/mlregressor.md b/docs/mlregressor.md index 7206af99..dee5fccd 100644 --- a/docs/mlregressor.md +++ b/docs/mlregressor.md @@ -8,6 +8,7 @@ This API provides two main methods: - predict: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point. + ## A basic model fit To train a model use the `regressor-model-fit` end point. @@ -45,28 +46,38 @@ A correct `curl` call to launch a model fit can look like this: ``` curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-fit ``` - -After applying the `curl` command to fit the model the following information is logged by EMHASS: - - 2023-02-20 22:05:22,658 - __main__ - INFO - Training a LinearRegression model - 2023-02-20 22:05:23,882 - __main__ - INFO - Elapsed time: 1.2236599922180176 - 2023-02-20 22:05:24,612 - __main__ - INFO - Prediction R2 score: 0.2654560762747957 - -## The predict method - -To obtain a prediction using a previously trained model use the `regressor-model-predict` end point. +A Home Assistant `rest_command` can look like this: ``` -curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-predict +fit_heating_hours: + url: http://127.0.0.1:5000/action/regressor-model-fit + method: POST + content_type: "application/json" + payload: >- + { + "csv_file": "heating_prediction.csv", + "features":["degreeday", "solar"], + "target": "hours", + "regression_model": "RandomForestRegression", + "model_type": "heating_hours_degreeday", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"] + } ``` +After fitting the model the following information is logged by EMHASS: -If needed pass the correct `model_type` like this: + 2024-04-17 12:41:50,019 - web_server - INFO - Passed runtime parameters: {'csv_file': 'heating_prediction.csv', 'features': ['degreeday', 'solar'], 'target': 'heating_hours', 'regression_model': 'RandomForestRegression', 'model_type': 'heating_hours_degreeday', 'timestamp': 'timestamp', 'date_features': ['month', 'day_of_week']} + 2024-04-17 12:41:50,020 - web_server - INFO - >> Setting input data dict + 2024-04-17 12:41:50,021 - web_server - INFO - Setting up needed data + 2024-04-17 12:41:50,048 - web_server - INFO - >> Performing a machine learning regressor fit... + 2024-04-17 12:41:50,049 - web_server - INFO - Performing a MLRegressor fit for heating_hours_degreeday + 2024-04-17 12:41:50,064 - web_server - INFO - Training a RandomForestRegression model + 2024-04-17 12:41:57,852 - web_server - INFO - Elapsed time for model fit: 7.78800106048584 + 2024-04-17 12:41:57,862 - web_server - INFO - Prediction R2 score of fitted model on test data: -0.5667567505914477 -``` -curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "load_forecast"}' http://localhost:5000/action/regressor-model-predict -``` +## The predict method -It is possible to publish the predict method results to a Home Assistant sensor. +To obtain a prediction using a previously trained model use the `regressor-model-predict` end point. The list of parameters needed to set the data publish task is: @@ -89,3 +100,66 @@ runtimeparams = { "model_type": "heating_hours_degreeday" } ``` + +Pass the correct `model_type` like this: + +``` +curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "heating_hours_degreeday"}' http://localhost:5000/action/regressor-model-predict +``` + +A Home Assistant `rest_command` can look like this: + +``` +predict_heating_hours: + url: http://localhost:5001/action/regressor-model-predict + method: POST + content_type: "application/json" + payload: >- + { + "mlr_predict_entity_id": "sensor.predicted_hours", + "mlr_predict_unit_of_measurement": "h", + "mlr_predict_friendly_name": "Predicted hours", + "new_values": [8.2, 7.23, 2, 6], + "model_type": "heating_hours_degreeday" + } +``` +After predicting the model the following information is logged by EMHASS: + +``` +2024-04-17 14:25:40,695 - web_server - INFO - Passed runtime parameters: {'mlr_predict_entity_id': 'sensor.predicted_hours', 'mlr_predict_unit_of_measurement': 'h', 'mlr_predict_friendly_name': 'Predicted hours', 'new_values': [8.2, 7.23, 2, 6], 'model_type': 'heating_hours_degreeday'} +2024-04-17 14:25:40,696 - web_server - INFO - >> Setting input data dict +2024-04-17 14:25:40,696 - web_server - INFO - Setting up needed data +2024-04-17 14:25:40,700 - web_server - INFO - >> Performing a machine learning regressor predict... +2024-04-17 14:25:40,715 - web_server - INFO - Performing a prediction for heating_hours_degreeday +2024-04-17 14:25:40,750 - web_server - INFO - Successfully posted to sensor.predicted_hours = 3.716600000000001 +``` +The predict method will publish the result to a Home Assistant sensor. + + +## How to store data in a csv file from Home Assistant +Notify to a file +``` +notify: + - platform: file + name: heating_hours_prediction + timestamp: false + filename: /share/heating_prediction.csv +``` +Then you need an automation to notify to this file +``` +alias: "Heating csv" +id: 157b1d57-73d9-4f39-82c6-13ce0cf42 +trigger: + - platform: time + at: "23:59:32" +action: + - service: notify.heating_hours_prediction + data: + message: > + {% set degreeday = states('sensor.degree_day_daily') |float %} + {% set heating_hours = states('sensor.heating_hours_today') |float | round(2) %} + {% set solar = states('sensor.solar_daily') |float | round(3) %} + {% set time = now() %} + + {{time}},{{degreeday}},{{solar}},{{heating_hours}} +``` \ No newline at end of file diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index ee27be0f..2190aadf 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -206,34 +206,39 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, P_PV_forecast, P_load_forecast = None, None params = json.loads(params) days_list = None - csv_file = params["passed_data"]["csv_file"] - features = params["passed_data"]["features"] - target = params["passed_data"]["target"] - timestamp = params["passed_data"]["timestamp"] - if get_data_from_file: - base_path = base_path + "/data" - filename_path = pathlib.Path(base_path) / csv_file - - else: - filename_path = pathlib.Path(base_path) / csv_file - - if filename_path.is_file(): - df_input_data = pd.read_csv(filename_path, parse_dates=True) - - else: - logger.error("The cvs file was not found.") - raise ValueError("The CSV file " + csv_file + " was not found.") - required_columns = [] - required_columns.extend(features) - required_columns.append(target) - if timestamp is not None: - required_columns.append(timestamp) - - if not set(required_columns).issubset(df_input_data.columns): - logger.error("The cvs file does not contain the required columns.") - raise ValueError( - f"CSV file should contain the following columns: {', '.join(required_columns)}", - ) + csv_file = params["passed_data"].get("csv_file", None) + if "features" in params["passed_data"]: + features = params["passed_data"]["features"] + if "target" in params["passed_data"]: + target = params["passed_data"]["target"] + if "timestamp" in params["passed_data"]: + timestamp = params["passed_data"]["timestamp"] + if csv_file: + if get_data_from_file: + base_path = base_path + "/data" + filename_path = pathlib.Path(base_path) / csv_file + + else: + filename_path = pathlib.Path(base_path) / csv_file + + if filename_path.is_file(): + df_input_data = pd.read_csv(filename_path, parse_dates=True) + + else: + logger.error("The cvs file was not found.") + raise ValueError("The CSV file " + csv_file + " was not found.") + required_columns = [] + required_columns.extend(features) + required_columns.append(target) + if timestamp is not None: + required_columns.append(timestamp) + + if not set(required_columns).issubset(df_input_data.columns): + logger.error("The cvs file does not contain the required columns.") + msg = f"CSV file should contain the following columns: {', '.join(required_columns)}" + raise ValueError( + msg, + ) elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py index 732b4266..f0d3c532 100644 --- a/src/emhass/machine_learning_regressor.py +++ b/src/emhass/machine_learning_regressor.py @@ -190,9 +190,10 @@ def get_regression_model(self: MLRegressor) -> tuple[str, str]: param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] else: self.logger.error( - "Passed sklearn model %s is not valid", + "Passed model %s is not valid", self.regression_model, ) + return None return base_model, param_grid def fit(self: MLRegressor, date_features: list | None = None) -> None: diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 5e40160a..8bb6f101 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -223,12 +223,12 @@ def treat_runtimeparams( params["passed_data"]["csv_file"] = csv_file params["passed_data"]["features"] = features params["passed_data"]["target"] = target - if "timestamp" not in runtimeparams.keys(): + if "timestamp" not in runtimeparams: params["passed_data"]["timestamp"] = None else: timestamp = runtimeparams["timestamp"] params["passed_data"]["timestamp"] = timestamp - if "date_features" not in runtimeparams.keys(): + if "date_features" not in runtimeparams: params["passed_data"]["date_features"] = [] else: date_features = runtimeparams["date_features"] @@ -237,6 +237,15 @@ def treat_runtimeparams( if set_type == "regressor-model-predict": new_values = runtimeparams["new_values"] params["passed_data"]["new_values"] = new_values + if "csv_file" in runtimeparams: + csv_file = runtimeparams["csv_file"] + params["passed_data"]["csv_file"] = csv_file + if "features" in runtimeparams: + features = runtimeparams["features"] + params["passed_data"]["features"] = features + if "target" in runtimeparams: + target = runtimeparams["target"] + params["passed_data"]["target"] = target # Treating special data passed for MPC control case if set_type == "naive-mpc-optim": @@ -330,7 +339,7 @@ def treat_runtimeparams( sklearn_model = runtimeparams["sklearn_model"] params["passed_data"]["sklearn_model"] = sklearn_model if "regression_model" not in runtimeparams.keys(): - regression_model = "LinearRegression" + regression_model = "AdaBoostRegression" else: regression_model = runtimeparams["regression_model"] params["passed_data"]["regression_model"] = regression_model From fec455841b4f9f34ec816fec9ff245ee9720bc83 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Thu, 18 Apr 2024 14:11:23 +0200 Subject: [PATCH 108/111] remove *.csv from .gitignore to upload heating_prediction.csv --- data/heating_prediction.csv | 130 ++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 data/heating_prediction.csv diff --git a/data/heating_prediction.csv b/data/heating_prediction.csv new file mode 100644 index 00000000..f50a8a49 --- /dev/null +++ b/data/heating_prediction.csv @@ -0,0 +1,130 @@ +timestamp,degreeday,solar,hour +2023-11-10 23:59:32.458039+01:00,12.23,3.982,2.87 +2023-11-11 23:59:32.459778+01:00,12.94,13.723,3.14 +2023-11-12 23:59:32.462220+01:00,14.45,4.925,3.5 +2023-11-13 23:59:32.462167+01:00,8.49,2.138,3.19 +2023-11-14 23:59:32.338942+01:00,8.61,2.444,2.91 +2023-11-15 23:59:32.195198+01:00,9.83,10.685,2.88 +2023-11-16 23:59:32.501044+01:00,12.8,1.955,1.28 +2023-11-17 23:59:32.316366+01:00,13.35,8.742,2.97 +2023-11-18 23:59:32.082785+01:00,11.84,0.849,3.42 +2023-11-19 23:59:32.077198+01:00,7.3,10.85,1.9 +2023-11-20 23:59:32.431964+01:00,9.91,6.395,2.48 +2023-11-21 23:59:32.295705+01:00,11.44,2.678,2.91 +2023-11-22 23:59:32.377740+01:00,16.14,2.994,2.96 +2023-11-23 23:59:32.385890+01:00,9.31,5.346,2.91 +2023-11-24 23:59:32.376194+01:00,12.96,8.61,2.9 +2023-11-25 23:59:32.373666+01:00,14.91,12.31,3.47 +2023-11-26 23:59:32.373647+01:00,14.79,2.589,3.69 +2023-11-27 23:59:32.379920+01:00,14.92,0.322,6.05 +2023-11-28 23:59:32.213947+01:00,18.59,20.342,2.94 +2023-11-29 23:59:32.217384+01:00,19.05,5.393,5.41 +2023-11-30 23:59:32.222641+01:00,21.27,1.899,6.77 +2023-12-01 23:59:32.224533+01:00,21.3,1.233,5.75 +2023-12-02 23:59:32.107119+01:00,21.97,14.653,2.96 +2023-12-03 23:59:32.107436+01:00,20.61,4.766,8.89 +2023-12-04 23:59:32.116642+01:00,18.36,1.349,6.73 +2023-12-05 23:59:32.191254+01:00,16.93,0.869,6.17 +2023-12-06 23:59:32.176803+01:00,16.8,5.413,5.38 +2023-12-07 23:59:32.251031+01:00,17.67,8.089,5.98 +2023-12-08 23:59:32.255888+01:00,14.37,1.203,5.63 +2023-12-09 23:59:32.109040+01:00,11.94,0.814,5.08 +2023-12-10 23:59:32.103738+01:00,9.72,6.051,3.42 +2023-12-11 23:59:32.497717+01:00,9.83,1.459,3.87 +2023-12-12 23:59:32.502503+01:00,11.18,4.176,3.31 +2023-12-13 23:59:32.504794+01:00,11.09,2.91,3.1 +2023-12-14 23:59:32.177489+01:00,13.88,7.53,2.89 +2023-12-15 23:59:32.186292+01:00,12.18,2.129,5.68 +2023-12-16 23:59:32.176812+01:00,11.75,1.641,3.46 +2023-12-17 23:59:32.119874+01:00,12.18,14.868,3.46 +2023-12-18 23:59:32.120168+01:00,14.75,1.283,3.12 +2023-12-19 23:59:32.120101+01:00,12.82,0.09,5.07 +2023-12-20 23:59:32.249731+01:00,12.8,3.803,3.6 +2023-12-21 23:59:32.249135+01:00,8.73,2.096,3.55 +2023-12-22 23:59:32.385164+01:00,9.12,1.278,0.85 +2023-12-23 23:59:32.382910+01:00,8.99,1.848,0.0 +2023-12-24 23:59:32.382457+01:00,8.04,0.165,7.42 +2023-12-25 23:59:32.303520+01:00,7.56,1.028,2.93 +2023-12-26 23:59:32.105788+01:00,10.55,9.274,2.92 +2023-12-27 23:59:32.183107+01:00,11.78,2.026,3.39 +2023-12-28 23:59:32.183405+01:00,8.91,3.68,3.19 +2023-12-29 23:59:32.399740+01:00,9.35,2.464,2.95 +2023-12-30 23:59:32.091110+01:00,11.07,7.948,3.44 +2023-12-31 23:59:32.257530+01:00,10.51,3.5,3.48 +2024-01-01 23:59:32.106161+01:00,12.75,4.046,3.08 +2024-01-02 23:59:32.103187+01:00,8.81,0.562,4.46 +2024-01-03 23:59:32.429947+01:00,10.03,2.184,3.26 +2024-01-04 23:59:32.436773+01:00,11.22,5.662,2.97 +2024-01-05 23:59:32.165969+01:00,12.42,1.199,3.6 +2024-01-06 23:59:32.110208+01:00,15.35,0.295,4.32 +2024-01-07 23:59:32.147775+01:00,19.88,0.896,6.19 +2024-01-08 23:59:32.242815+01:00,22.74,6.468,5.82 +2024-01-09 23:59:32.201342+01:00,24.38,21.307,6.92 +2024-01-10 23:59:32.411136+01:00,24.84,18.89,1.53 +2024-01-11 23:59:32.399433+01:00,23.57,19.27,3.05 +2024-01-12 23:59:32.467622+01:00,18.22,1.977,13.98 +2024-01-13 23:59:32.077428+01:00,17.9,0.472,6.93 +2024-01-14 23:59:32.127844+01:00,19.65,1.346,6.95 +2024-01-15 23:59:32.125062+01:00,19.49,4.35,7.82 +2024-01-16 23:59:32.280474+01:00,21.21,9.238,5.7 +2024-01-17 23:59:32.283951+01:00,23.17,1.193,7.37 +2024-01-18 23:59:32.361241+01:00,21.61,17.307,6.67 +2024-01-19 23:59:32.341654+01:00,22.06,21.004,6.24 +2024-01-20 23:59:32.359151+01:00,21.95,12.912,6.43 +2024-01-21 23:59:32.126221+01:00,17.38,3.28,7.45 +2024-01-22 23:59:32.126346+01:00,9.47,7.645,6.1 +2024-01-23 23:59:32.417727+01:00,11.87,7.689,4.76 +2024-01-24 23:59:32.420933+01:00,8.15,10.052,3.62 +2024-01-25 23:59:32.419138+01:00,12.38,3.785,3.98 +2024-01-26 23:59:32.422066+01:00,11.4,11.94,3.1 +2024-01-27 23:59:32.176538+01:00,17.96,19.741,3.45 +2024-01-28 23:59:32.168328+01:00,16.72,20.366,4.85 +2024-01-29 23:59:32.173916+01:00,13.11,16.972,4.51 +2024-01-30 23:59:32.503034+01:00,11.21,4.013,3.99 +2024-01-31 23:59:32.179265+01:00,12.79,4.766,3.73 +2024-02-01 23:59:32.487147+01:00,12.74,23.924,2.98 +2024-02-02 23:59:32.570084+01:00,13.0,2.98,5.04 +2024-02-03 23:59:32.484878+01:00,9.26,1.413,3.48 +2024-02-04 23:59:32.472168+01:00,8.35,4.306,3.47 +2024-02-05 23:59:32.409856+01:00,9.78,5.704,0.0 +2024-02-06 23:59:32.439147+01:00,9.15,2.431,6.56 +2024-02-07 23:59:32.235231+01:00,14.42,3.839,3.07 +2024-02-08 23:59:32.441543+01:00,13.9,1.412,5.94 +2024-02-09 23:59:32.443230+01:00,8.2,7.246,2.96 +2024-02-10 23:59:32.504326+01:00,8.37,8.567,3.48 +2024-02-11 23:59:32.452959+01:00,10.44,5.304,0.0 +2024-02-12 23:59:32.450999+01:00,12.65,16.004,3.42 +2024-02-13 23:59:32.343162+01:00,13.84,19.809,3.16 +2024-02-14 23:59:32.339408+01:00,8.48,1.98,4.52 +2024-02-15 23:59:32.339971+01:00,6.13,9.952,2.98 +2024-02-16 23:59:32.455273+01:00,7.66,3.675,3.06 +2024-02-17 23:59:32.097937+01:00,8.56,12.269,3.48 +2024-02-18 23:59:32.126377+01:00,9.59,2.205,3.04 +2024-02-19 23:59:32.421243+01:00,10.22,3.731,2.97 +2024-02-20 23:59:32.421985+01:00,11.61,13.775,0.0 +2024-02-21 23:59:32.371300+01:00,10.52,4.856,3.02 +2024-02-22 23:59:32.373153+01:00,9.53,4.256,3.48 +2024-02-23 23:59:32.372545+01:00,13.66,8.743,4.09 +2024-02-24 23:59:32.197044+01:00,14.44,7.842,4.3 +2024-02-25 23:59:32.196386+01:00,12.41,16.235,3.48 +2024-02-26 23:59:32.409648+01:00,14.63,2.096,5.05 +2024-02-27 23:59:32.373347+01:00,14.5,29.437,3.21 +2024-02-28 23:59:32.407538+01:00,15.38,6.475,4.88 +2024-02-29 23:59:32.194724+01:00,11.83,3.238,4.68 +2024-03-01 23:59:32.084520+01:00,10.56,14.352,3.8 +2024-03-02 23:59:32.066434+01:00,9.94,25.356,3.49 +2024-03-03 23:59:32.270878+01:00,8.9,10.577,3.19 +2024-03-04 23:59:32.274918+01:00,10.67,28.096,2.08 +2024-03-05 23:59:32.315023+01:00,12.19,10.553,2.95 +2024-03-06 23:59:32.441001+01:00,11.38,32.597,2.91 +2024-03-07 23:59:32.440044+01:00,12.39,28.856,2.96 +2024-03-08 23:59:32.228265+01:00,12.01,37.395,2.96 +2024-03-09 23:59:32.081874+01:00,8.72,17.66,3.5 +2024-03-10 23:59:32.335321+01:00,8.0,12.207,3.47 +2024-03-11 23:59:32.139531+01:00,10.39,2.526,2.96 +2024-03-12 23:59:32.136709+01:00,10.24,8.211,2.98 +2024-03-13 23:59:32.407174+01:00,7.19,6.425,2.95 +2024-03-14 23:59:32.342436+01:00,6.06,33.389,1.64 +2024-03-15 23:59:32.266278+01:00,5.63,12.628,2.96 +2024-03-16 23:59:32.155245+01:00,9.57,12.103,3.0 +2024-03-17 23:59:32.366155+01:00,8.43,14.302,0.25 From b9ec17ada28ca9d006e3cf89cac5174b2ca2c3b5 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Fri, 19 Apr 2024 09:17:07 +0200 Subject: [PATCH 109/111] Adapt to the latest path changes --- src/emhass/command_line.py | 11 +++++------ tests/test_command_line_utils.py | 14 ++++---------- tests/test_machine_learning_regressor.py | 15 ++++++++------- 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index 2190aadf..b669e584 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -215,11 +215,11 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, timestamp = params["passed_data"]["timestamp"] if csv_file: if get_data_from_file: - base_path = base_path + "/data" + base_path = emhass_conf["data_path"] # + "/data" filename_path = pathlib.Path(base_path) / csv_file else: - filename_path = pathlib.Path(base_path) / csv_file + filename_path = emhass_conf["data_path"] / csv_file if filename_path.is_file(): df_input_data = pd.read_csv(filename_path, parse_dates=True) @@ -621,7 +621,6 @@ def regressor_model_fit( target = input_data_dict["params"]["passed_data"]["target"] timestamp = input_data_dict["params"]["passed_data"]["timestamp"] date_features = input_data_dict["params"]["passed_data"]["date_features"] - root = input_data_dict["root"] # The MLRegressor object mlr = MLRegressor( @@ -638,7 +637,8 @@ def regressor_model_fit( # Save model if not debug: filename = model_type + "_mlr.pkl" - with open(pathlib.Path(root) / filename, "wb") as outp: + filename_path = input_data_dict["emhass_conf"]["data_path"] / filename + with open(filename_path, "wb") as outp: pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL) return mlr @@ -659,9 +659,8 @@ def regressor_model_predict( :type debug: Optional[bool], optional """ model_type = input_data_dict["params"]["passed_data"]["model_type"] - root = input_data_dict["root"] filename = model_type + "_mlr.pkl" - filename_path = pathlib.Path(root) / filename + filename_path = input_data_dict["emhass_conf"]["data_path"] / filename if not debug: if filename_path.is_file(): with open(filename_path, "rb") as inp: diff --git a/tests/test_command_line_utils.py b/tests/test_command_line_utils.py index c4482c07..a8194c62 100644 --- a/tests/test_command_line_utils.py +++ b/tests/test_command_line_utils.py @@ -329,8 +329,6 @@ def test_forecast_model_fit_predict_tune(self): self.assertIsInstance(injection_dict["figure_0"], str) def test_regressor_model_fit_predict(self): - config_path = pathlib.Path(root + "/config_emhass.yaml") - base_path = str(config_path.parent) costfun = "profit" action = "regressor-model-fit" # fit and predict methods params = TestCommandLineUtils.get_test_params() @@ -350,8 +348,7 @@ def test_regressor_model_fit_predict(self): runtimeparams_json = json.dumps(runtimeparams) params_json = json.dumps(params) input_data_dict = set_input_data_dict( - config_path, - base_path, + emhass_conf, costfun, params_json, runtimeparams_json, @@ -374,8 +371,6 @@ def test_regressor_model_fit_predict(self): mlr = regressor_model_fit(input_data_dict, logger, debug=True) # def test_regressor_model_predict(self): - config_path = pathlib.Path(root + "/config_emhass.yaml") - base_path = str(config_path.parent) # + "/data" costfun = "profit" action = "regressor-model-predict" # predict methods params = TestCommandLineUtils.get_test_params() @@ -397,8 +392,7 @@ def test_regressor_model_fit_predict(self): params_json = json.dumps(params) input_data_dict = set_input_data_dict( - config_path, - base_path, + emhass_conf, costfun, params_json, runtimeparams_json, @@ -540,7 +534,7 @@ def test_main_regressor_model_fit(self): "--action", "regressor-model-fit", "--config", - str(pathlib.Path(root + "/config_emhass.yaml")), + str(emhass_conf["config_path"]), "--params", params_json, "--runtimeparams", @@ -574,7 +568,7 @@ def test_main_regressor_model_predict(self): "--action", "regressor-model-predict", "--config", - str(pathlib.Path(root + "/config_emhass.yaml")), + str(emhass_conf["config_path"]), "--params", params_json, "--runtimeparams", diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py index 74702b6f..4201199a 100644 --- a/tests/test_machine_learning_regressor.py +++ b/tests/test_machine_learning_regressor.py @@ -15,15 +15,19 @@ # the root folder root = str(utils.get_root(__file__, num_parent=2)) +emhass_conf = {} +emhass_conf["config_path"] = pathlib.Path(root) / "config_emhass.yaml" +emhass_conf["data_path"] = pathlib.Path(root) / "data/" +emhass_conf["root_path"] = pathlib.Path(root) # create logger -logger, ch = utils.get_logger(__name__, root, save_to_file=False) +logger, ch = utils.get_logger(__name__, emhass_conf, save_to_file=False) class TestMLRegressor(unittest.TestCase): @staticmethod def get_test_params(): - with open(root + "/config_emhass.yaml", "r") as file: - params = yaml.load(file, Loader=yaml.FullLoader) + with open(emhass_conf["config_path"]) as file: + params = yaml.safe_load(file) params.update( { "params_secrets": { @@ -41,8 +45,6 @@ def get_test_params(): def setUp(self): params = TestMLRegressor.get_test_params() params_json = json.dumps(params) - config_path = pathlib.Path(root + "/config_emhass.yaml") - base_path = str(config_path.parent) # + "/data" costfun = "profit" action = "regressor-model-fit" # fit and predict methods params = copy.deepcopy(json.loads(params_json)) @@ -61,8 +63,7 @@ def setUp(self): params["optim_conf"]["load_forecast_method"] = "skforecast" params_json = json.dumps(params) self.input_data_dict = set_input_data_dict( - config_path, - base_path, + emhass_conf, costfun, params_json, runtimeparams_json, From 69a114090f60f96d9ee4ad00c2573568bc2c186f Mon Sep 17 00:00:00 2001 From: GeoDerp <18461782+GeoDerp@users.noreply.github.com> Date: Fri, 19 Apr 2024 14:33:15 +0000 Subject: [PATCH 110/111] mlregressor, add web buttons for mlregressor, add some suggestions --- docs/develop.md | 15 ++++- docs/mlregressor.md | 59 +++++++++++++----- src/emhass/command_line.py | 77 ++++++++++++++++-------- src/emhass/static/advanced.html | 3 + src/emhass/static/script.js | 2 + src/emhass/utils.py | 23 ++++--- src/emhass/web_server.py | 14 +++-- tests/test_machine_learning_regressor.py | 2 +- 8 files changed, 139 insertions(+), 56 deletions(-) diff --git a/docs/develop.md b/docs/develop.md index 6716f5ec..da81ef25 100644 --- a/docs/develop.md +++ b/docs/develop.md @@ -221,6 +221,11 @@ For those who wish to mount/sync the local `data` folder with the data folder fr docker run ... -v $(pwd)/data/:/app/data ... ``` +You can also mount data (ex .csv) files separately +```bash +docker run... -v $(pwd)/data/heating_prediction.csv:/app/data/ ... +``` + #### Issue with TARGETARCH If your docker build fails with an error related to `TARGETARCH`. It may be best to add your devices architecture manually: @@ -301,7 +306,7 @@ git checkout $branch ```bash #testing addon (build and run) docker build -t emhass/docker --build-arg build_version=addon-local . -docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/options.json:/app/options.json -e LAT="45.83" -e LON="6.86" -e ALT="4807.8" -e TIME_ZONE="Europe/Paris" emhass/docker --url $HAURL --key $HAKEY +docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/data/heating_prediction.csv:/app/data/heating_prediction.csv -v $(pwd)/options.json:/app/options.json -e LAT="45.83" -e LON="6.86" -e ALT="4807.8" -e TIME_ZONE="Europe/Paris" emhass/docker --url $HAURL --key $HAKEY ``` ```bash #run actions on a separate terminal @@ -311,6 +316,8 @@ curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/a curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-fit curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-predict curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-tune +curl -i -H "Content-Type:application/json" -X POST -d '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "hour", "regression_model": "RandomForestRegression", "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2] }' http://localhost:5000/action/regressor-model-fit +curl -i -H "Content-Type:application/json" -X POST -d '{"mlr_predict_entity_id": "sensor.mlr_predict", "mlr_predict_unit_of_measurement": "h", "mlr_predict_friendly_name": "mlr predictor", "new_values": [8.2, 7.23, 2, 6], "model_type": "heating_hours_degreeday" }' http://localhost:5000/action/regressor-model-predict curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/publish-data ``` @@ -326,7 +333,7 @@ lat: 45.83 lon: 6.86 alt: 4807.8 EOT -docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml emhass/docker +docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/data/heating_prediction.csv:/app/data/heating_prediction.csv -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml emhass/docker ``` ```bash #run actions on a separate terminal @@ -336,10 +343,12 @@ curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/a curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-fit curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-predict curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-tune +curl -i -H "Content-Type:application/json" -X POST -d '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "hour", "regression_model": "RandomForestRegression", "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2] }' http://localhost:5000/action/regressor-model-fit +curl -i -H "Content-Type:application/json" -X POST -d '{"mlr_predict_entity_id": "sensor.mlr_predict", "mlr_predict_unit_of_measurement": "h", "mlr_predict_friendly_name": "mlr predictor", "new_values": [8.2, 7.23, 2, 6], "model_type": "heating_hours_degreeday" }' http://localhost:5000/action/regressor-model-predict curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/publish-data ``` -User may wish to re-test with tweaked parameters such as `lp_solver` and `weather_forecast_method`, in `config_emhass.yaml` *(standalone)* or `options.json` *(addon)*, to broaden the testing scope. +User may wish to re-test with tweaked parameters such as `lp_solver`, `weather_forecast_method` and `load_forecast_method`, in `config_emhass.yaml` *(standalone)* or `options.json` *(addon)*, to broaden the testing scope. *see [EMHASS & EMHASS-Add-on differences](https://emhass.readthedocs.io/en/latest/differences.html) for more information on how these config_emhass & options files differ* *Note: may need to set `--build-arg TARGETARCH=YOUR-ARCH` in docker build* diff --git a/docs/mlregressor.md b/docs/mlregressor.md index dee5fccd..7746e985 100644 --- a/docs/mlregressor.md +++ b/docs/mlregressor.md @@ -4,10 +4,9 @@ Starting with v0.9.0, a new framework is proposed within EMHASS. It provides a m This API provides two main methods: -- fit: To train a model with the passed data. This method is exposed with the `regressor-model-fit` end point. - -- predict: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point. +- **fit**: To train a model with the passed data. This method is exposed with the `regressor-model-fit` end point. +- **predict**: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point. ## A basic model fit @@ -29,10 +28,11 @@ Some paramters can be optionally defined at runtime: - `date_features`: A list of 'date_features' to take into account when fitting the model. Possibilities are `year`, `month`, `day_of_week` (monday=0, sunday=6), `day_of_year`, `day`(day_of_month) and `hour` -``` +### Examples: +```yaml runtimeparams = { "csv_file": "heating_prediction.csv", - "features":["degreeday", "solar"], + "features": ["degreeday", "solar"], "target": "heating_hours", "regression_model": "RandomForestRegression", "model_type": "heating_hours_degreeday", @@ -43,12 +43,17 @@ runtimeparams = { A correct `curl` call to launch a model fit can look like this: +```bash +curl -i -H "Content-Type:application/json" -X POST -d '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "heating_hours"}' http://localhost:5000/action/regressor-model-fit ``` -curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-fit +or +```bash +curl -i -H "Content-Type:application/json" -X POST -d '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "hour", "regression_model": "RandomForestRegression", "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2] }' http://localhost:5000/action/regressor-model-fit ``` + A Home Assistant `rest_command` can look like this: -``` +```yaml fit_heating_hours: url: http://127.0.0.1:5000/action/regressor-model-fit method: POST @@ -56,7 +61,7 @@ fit_heating_hours: payload: >- { "csv_file": "heating_prediction.csv", - "features":["degreeday", "solar"], + "features": ["degreeday", "solar"], "target": "hours", "regression_model": "RandomForestRegression", "model_type": "heating_hours_degreeday", @@ -91,7 +96,8 @@ The list of parameters needed to set the data publish task is: - `model_type`: The model type that has to be predicted -``` +### Examples: +```yaml runtimeparams = { "mlr_predict_entity_id": "sensor.mlr_predict", "mlr_predict_unit_of_measurement": None, @@ -103,13 +109,17 @@ runtimeparams = { Pass the correct `model_type` like this: -``` +```bash curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "heating_hours_degreeday"}' http://localhost:5000/action/regressor-model-predict ``` +or +```bash +curl -i -H "Content-Type:application/json" -X POST -d '{"mlr_predict_entity_id": "sensor.mlr_predict", "mlr_predict_unit_of_measurement": "h", "mlr_predict_friendly_name": "mlr predictor", "new_values": [8.2, 7.23, 2, 6], "model_type": "heating_hours_degreeday" }' http://localhost:5000/action/regressor-model-predict +``` A Home Assistant `rest_command` can look like this: -``` +```yaml predict_heating_hours: url: http://localhost:5001/action/regressor-model-predict method: POST @@ -136,9 +146,30 @@ After predicting the model the following information is logged by EMHASS: The predict method will publish the result to a Home Assistant sensor. -## How to store data in a csv file from Home Assistant -Notify to a file +## Storing CSV files + +### Standalone container - how to mount a .csv files in data_path folder +If running EMHASS as Standalone container, you will need to volume mount a folder to be the `data_path`, or mount a single .csv file inside `data_path` + +Example of mounting a folder as data_path *(.csv files stored inside)* +```bash +docker run -it --restart always -p 5000:5000 -e LOCAL_COSTFUN="profit" -v $(pwd)/data:/app/data -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml --name DockerEMHASS +``` +Example of mounting a single csv file +```bash +docker run -it --restart always -p 5000:5000 -e LOCAL_COSTFUN="profit" -v $(pwd)/data/heating_prediction.csv:/app/data/heating_prediction.csv -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml --name DockerEMHASS ``` + +### Add-on - How to store data in a csv file from Home Assistant + +#### Change data_path +If running EMHASS-Add-On, you will likley need to change the `data_path` to a folder your Home Assistant can access. +To do this, set the `data_path` to `/share/` in the addon *Configuration* page. + +#### Store sensor data to csv + +Notify to a file +```yaml notify: - platform: file name: heating_hours_prediction @@ -146,7 +177,7 @@ notify: filename: /share/heating_prediction.csv ``` Then you need an automation to notify to this file -``` +```yaml alias: "Heating csv" id: 157b1d57-73d9-4f39-82c6-13ce0cf42 trigger: diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index b669e584..f5b5281d 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -225,8 +225,9 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, df_input_data = pd.read_csv(filename_path, parse_dates=True) else: - logger.error("The cvs file was not found.") - raise ValueError("The CSV file " + csv_file + " was not found.") + logger.error("The CSV file " + csv_file + " was not found in path: " + str(emhass_conf["data_path"])) + return False + #raise ValueError("The CSV file " + csv_file + " was not found.") required_columns = [] required_columns.extend(features) required_columns.append(target) @@ -236,9 +237,11 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, if not set(required_columns).issubset(df_input_data.columns): logger.error("The cvs file does not contain the required columns.") msg = f"CSV file should contain the following columns: {', '.join(required_columns)}" - raise ValueError( - msg, - ) + logger.error(msg) + return False + #raise ValueError( + # msg, + #) elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None @@ -615,12 +618,36 @@ def regressor_model_fit( :type debug: Optional[bool], optional """ data = copy.deepcopy(input_data_dict["df_input_data"]) - model_type = input_data_dict["params"]["passed_data"]["model_type"] - regression_model = input_data_dict["params"]["passed_data"]["regression_model"] - features = input_data_dict["params"]["passed_data"]["features"] - target = input_data_dict["params"]["passed_data"]["target"] - timestamp = input_data_dict["params"]["passed_data"]["timestamp"] - date_features = input_data_dict["params"]["passed_data"]["date_features"] + if "model_type" in input_data_dict["params"]["passed_data"]: + model_type = input_data_dict["params"]["passed_data"]["model_type"] + else: + logger.error("parameter: 'model_type' not passed") + return False + if "regression_model" in input_data_dict["params"]["passed_data"]: + regression_model = input_data_dict["params"]["passed_data"]["regression_model"] + else: + logger.error("parameter: 'regression_model' not passed") + return False + if "features" in input_data_dict["params"]["passed_data"]: + features = input_data_dict["params"]["passed_data"]["features"] + else: + logger.error("parameter: 'features' not passed") + return False + if "target" in input_data_dict["params"]["passed_data"]: + target = input_data_dict["params"]["passed_data"]["target"] + else: + logger.error("parameter: 'target' not passed") + return False + if "timestamp" in input_data_dict["params"]["passed_data"]: + timestamp = input_data_dict["params"]["passed_data"]["timestamp"] + else: + logger.error("parameter: 'timestamp' not passed") + return False + if "date_features" in input_data_dict["params"]["passed_data"]: + date_features = input_data_dict["params"]["passed_data"]["date_features"] + else: + logger.error("parameter: 'date_features' not passed") + return False # The MLRegressor object mlr = MLRegressor( @@ -658,7 +685,11 @@ def regressor_model_predict( :param debug: True to debug, useful for unit testing, defaults to False :type debug: Optional[bool], optional """ - model_type = input_data_dict["params"]["passed_data"]["model_type"] + if "model_type" in input_data_dict["params"]["passed_data"]: + model_type = input_data_dict["params"]["passed_data"]["model_type"] + else: + logger.error("parameter: 'model_type' not passed") + return False filename = model_type + "_mlr.pkl" filename_path = input_data_dict["emhass_conf"]["data_path"] / filename if not debug: @@ -669,20 +700,18 @@ def regressor_model_predict( logger.error( "The ML forecaster file was not found, please run a model fit method before this predict method", ) - return - new_values = input_data_dict["params"]["passed_data"]["new_values"] + return False + if "new_values" in input_data_dict["params"]["passed_data"]: + new_values = input_data_dict["params"]["passed_data"]["new_values"] + else: + logger.error("parameter: 'new_values' not passed") + return False # Predict from csv file prediction = mlr.predict(new_values) - - mlr_predict_entity_id = input_data_dict["params"]["passed_data"][ - "mlr_predict_entity_id" - ] - mlr_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][ - "mlr_predict_unit_of_measurement" - ] - mlr_predict_friendly_name = input_data_dict["params"]["passed_data"][ - "mlr_predict_friendly_name" - ] + + mlr_predict_entity_id = input_data_dict["params"]["passed_data"].get("mlr_predict_entity_id","sensor.mlr_predict") + mlr_predict_unit_of_measurement = input_data_dict["params"]["passed_data"].get("mlr_predict_unit_of_measurement","h") + mlr_predict_friendly_name = input_data_dict["params"]["passed_data"].get("mlr_predict_friendly_name","mlr predictor") # Publish prediction idx = 0 if not debug: diff --git a/src/emhass/static/advanced.html b/src/emhass/static/advanced.html index 38371583..6595520c 100644 --- a/src/emhass/static/advanced.html +++ b/src/emhass/static/advanced.html @@ -14,6 +14,9 @@

Use the buttons below to fit, predict and tune a machine learning model for +

+ +

Input Runtime Parameters

diff --git a/src/emhass/static/script.js b/src/emhass/static/script.js index 05861814..ad02b578 100644 --- a/src/emhass/static/script.js +++ b/src/emhass/static/script.js @@ -16,6 +16,8 @@ function loadButtons(page) { "forecast-model-fit", "forecast-model-predict", "forecast-model-tune", + "regressor-model-fit", + "regressor-model-predict", "perfect-optim", "publish-data", "naive-mpc-optim" diff --git a/src/emhass/utils.py b/src/emhass/utils.py index 8bb6f101..aa344732 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -216,13 +216,16 @@ def treat_runtimeparams( freq = int(retrieve_hass_conf["freq"].seconds / 60.0) delta_forecast = int(optim_conf["delta_forecast"].days) forecast_dates = get_forecast_dates(freq, delta_forecast) - if set_type == "regressor-model-fit": - csv_file = runtimeparams["csv_file"] - features = runtimeparams["features"] - target = runtimeparams["target"] - params["passed_data"]["csv_file"] = csv_file - params["passed_data"]["features"] = features - params["passed_data"]["target"] = target + if set_type == "regressor-model-fit": + if "csv_file" in runtimeparams: + csv_file = runtimeparams["csv_file"] + params["passed_data"]["csv_file"] = csv_file + if "features" in runtimeparams: + features = runtimeparams["features"] + params["passed_data"]["features"] = features + if "target" in runtimeparams: + target = runtimeparams["target"] + params["passed_data"]["target"] = target if "timestamp" not in runtimeparams: params["passed_data"]["timestamp"] = None else: @@ -233,10 +236,10 @@ def treat_runtimeparams( else: date_features = runtimeparams["date_features"] params["passed_data"]["date_features"] = date_features - if set_type == "regressor-model-predict": - new_values = runtimeparams["new_values"] - params["passed_data"]["new_values"] = new_values + if "new_values" in runtimeparams: + new_values = runtimeparams["new_values"] + params["passed_data"]["new_values"] = new_values if "csv_file" in runtimeparams: csv_file = runtimeparams["csv_file"] params["passed_data"]["csv_file"] = csv_file diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index 9a100870..afb5370a 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -195,15 +195,21 @@ def action_call(action_name): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) elif action_name == 'regressor-model-fit': - app.logger.info(" >> Performing a machine learning regressor fit...") + ActionStr = " >> Performing a machine learning regressor fit..." + app.logger.info(ActionStr) regressor_model_fit(input_data_dict, app.logger) msg = f'EMHASS >> Action regressor-model-fit executed... \n' - return make_response(msg, 201) + if not checkFileLog(ActionStr): + return make_response(msg, 201) + return make_response(grabLog(ActionStr), 400) elif action_name == 'regressor-model-predict': - app.logger.info(" >> Performing a machine learning regressor predict...") + ActionStr = " >> Performing a machine learning regressor predict..." + app.logger.info(ActionStr) regressor_model_predict(input_data_dict, app.logger) msg = f'EMHASS >> Action regressor-model-predict executed... \n' - return make_response(msg, 201) + if not checkFileLog(ActionStr): + return make_response(msg, 201) + return make_response(grabLog(ActionStr), 400) else: app.logger.error("ERROR: passed action is not valid") msg = f'EMHASS >> ERROR: Passed action is not valid... \n' diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py index 4201199a..0d40ac0b 100644 --- a/tests/test_machine_learning_regressor.py +++ b/tests/test_machine_learning_regressor.py @@ -56,7 +56,7 @@ def setUp(self): "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], - "new_values": [12.79, 4.766, 1, 2], + "new_values": [12.79, 4.766, 1, 2] } runtimeparams_json = json.dumps(runtimeparams) params["passed_data"] = runtimeparams From a78ae79280202aa1e23553fcc7fc93d4c98456c6 Mon Sep 17 00:00:00 2001 From: Giel Janssens Date: Mon, 22 Apr 2024 09:32:50 +0200 Subject: [PATCH 111/111] is 0 -> == 0 --- src/emhass/retrieve_hass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py index 4fb909b1..4bac582f 100644 --- a/src/emhass/retrieve_hass.py +++ b/src/emhass/retrieve_hass.py @@ -170,7 +170,7 @@ def get_data( df_raw = pd.DataFrame.from_dict(data) # self.logger.info(str(df_raw)) if len(df_raw) == 0: - if x is 0: + if x == 0: self.logger.error( "The retrieved Dataframe is empty, A sensor:" + var