From d9a701c42432303b97fefb42ac9faa6b18fe59d8 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:13:47 +0100
Subject: [PATCH 001/111] add /app to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index de278369..94da1122 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ secrets_emhass.yaml
 .vscode/launch.json
 .vscode/settings.json
 .vscode/tasks.json
+**/app
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

From c8b71e11ce0aef8ee3315fc758285cfed7de3c55 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:24:21 +0100
Subject: [PATCH 002/111] Add csv-prediction

---
 src/emhass/command_line.py  |  46 ++++++++++++
 src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++
 src/emhass/retrieve_hass.py |  10 +++
 src/emhass/utils.py         |  25 +++++++
 src/emhass/web_server.py    |   6 ++
 5 files changed, 226 insertions(+)
 create mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index f05aa42e..4d4f75c9 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -20,6 +20,7 @@
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
 from emhass.optimization import Optimization
+from emhass.csv_predictor import CsvPredictor
 from emhass import utils
 
 
@@ -153,6 +154,12 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
             if not rh.get_data(days_list, var_list):
                 return False
             df_input_data = rh.df_final.copy()
+    elif set_type == "csv-predict":
+        df_input_data, df_input_data_dayahead = None, None
+        P_PV_forecast, P_load_forecast = None, None
+        days_list = None
+        params = json.loads(params)
+       
     elif set_type == "publish-data":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
@@ -433,6 +440,45 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
     return df_pred_optim, mlf
 
+def csv_predict(input_data_dict: dict, logger: logging.Logger,
+    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
+    """Perform a forecast model fit from training data retrieved from Home Assistant.
+
+    :param input_data_dict: A dictionnary with multiple data used by the action functions
+    :type input_data_dict: dict
+    :param logger: The passed logger object
+    :type logger: logging.Logger
+    :param debug: True to debug, useful for unit testing, defaults to False
+    :type debug: Optional[bool], optional
+    :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object
+    :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]
+    """
+    data = copy.deepcopy(input_data_dict['df_input_data'])
+    model_type = input_data_dict['params']['passed_data']['model_type']
+    csv_file = input_data_dict['params']['passed_data']['csv_file']
+    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
+    perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
+    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
+    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
+    new_values = input_data_dict['params']['passed_data']['new_values']
+    root = input_data_dict['root']
+    # The ML forecaster object
+    csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    # Fit the ML model
+    prediction = csv.predict(perform_backtest=perform_backtest)
+
+    csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
+    csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
+    csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name']
+    # Publish Load forecast
+    idx = 0
+    input_data_dict['rh'].post_data(prediction, idx,
+                                    csv_predict_entity_id,
+                                    csv_predict_unit_of_measurement, 
+                                    csv_predict_friendly_name,
+                                    type_var = 'csv_predictor')
+    return prediction
+
 def publish_data(input_data_dict: dict, logger: logging.Logger,
     save_data_to_file: Optional[bool] = False, 
     opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame:
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
new file mode 100644
index 00000000..a1c5576b
--- /dev/null
+++ b/src/emhass/csv_predictor.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import logging
+import copy
+import pathlib
+import time
+from typing import Optional
+# from typing import Optional, Tuple
+import pandas as pd
+import numpy as np
+
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import ElasticNet
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsRegressor
+# from sklearn.metrics import r2_score
+
+# from skforecast.ForecasterAutoreg import ForecasterAutoreg
+# from skforecast.model_selection import bayesian_search_forecaster
+# from skforecast.model_selection import backtesting_forecaster
+
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+class CsvPredictor:
+    r"""
+    A forecaster class using machine learning models.
+    
+    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    
+    It exposes one main method:
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
+    
+    """
+
+    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+                  logger: logging.Logger) -> None:
+        r"""Define constructor for the forecast class.
+
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
+        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
+            Example: `sensor.power_load_no_var_loads`.
+        :type var_model: str
+        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
+            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
+        :type sklearn_model: str
+        :param num_lags: The number of auto-regression lags to consider. A good starting point \
+            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
+            to 48, if the time step is 1 hour the fix this to 24 and so on.
+        :type num_lags: int
+        :param root: The parent folder of the path where the config.yaml file is located
+        :type root: str
+        :param logger: The passed logger object
+        :type logger: logging.Logger
+        """
+        self.data = data
+        self.model_type = model_type
+        self.csv_file = csv_file
+        self.independent_variables = independent_variables
+        self.dependent_variable = dependent_variable
+        self.sklearn_model = sklearn_model
+        self.new_values = new_values
+        self.root = root
+        self.logger = logger
+        self.is_tuned = False
+
+    
+    def load_data(self):
+        filename_path = pathlib.Path(self.root) / self.csv_file
+        if filename_path.is_file():
+            with open(filename_path, 'rb') as inp:
+                data = pd.read_csv(filename_path)
+        else:
+            self.logger.error("The cvs file was not found.")
+            return
+
+        required_columns = self.independent_variables
+        
+        if not set(required_columns).issubset(data.columns):
+            raise ValueError(
+                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+            )
+        return data
+    
+    def prepare_data(self, data):
+        X = data[self.independent_variables].values
+        y = data[self.dependent_variable].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        
+        return X_train, y_train
+    
+    
+    def predict(self, perform_backtest: Optional[bool] = False
+            ) -> pd.Series:
+        r"""The fit method to train the ML model.
+
+        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
+            as the test period to evaluate the model, defaults to '48h'
+        :type split_date_delta: Optional[str], optional
+        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
+            the performance of the model on the complete train set, defaults to False
+        :type perform_backtest: Optional[bool], optional
+        :return: The DataFrame containing the forecast data results without and with backtest
+        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        """
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        # Preparing the data: adding exogenous features
+        data = self.load_data()
+        X, y = self.prepare_data(data)
+        
+        if self.sklearn_model == 'LinearRegression':
+            base_model = LinearRegression()
+        elif self.sklearn_model == 'ElasticNet':
+            base_model = ElasticNet()
+        elif self.sklearn_model == 'KNeighborsRegressor':
+            base_model = KNeighborsRegressor()
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+        # Define the forecaster object
+        self.forecaster = base_model
+        # Fit and time it
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.forecaster.fit(X, y)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        new_values = np.array([self.new_values])
+        prediction = self.forecaster.predict(new_values)
+        
+        return prediction
+    
+    
+    
+    
\ No newline at end of file
diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py
index 5aeba613..b4708f78 100644
--- a/src/emhass/retrieve_hass.py
+++ b/src/emhass/retrieve_hass.py
@@ -303,6 +303,8 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str,
             state = np.round(data_df.loc[data_df.index[idx]],4)
         elif type_var == 'optim_status':
             state = data_df.loc[data_df.index[idx]]
+        elif type_var == 'csv_predictor':
+            state = data_df[idx]
         else:
             state = np.round(data_df.loc[data_df.index[idx]],2)
         if type_var == 'power':
@@ -334,6 +336,14 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str,
                     "friendly_name": friendly_name
                 }
             }
+        elif type_var == 'csv_predictor':
+            data = {
+                "state": state,
+                "attributes": {
+                    "unit_of_measurement": unit_of_measurement,
+                    "friendly_name": friendly_name
+                }
+            }
         else:
             data = {
                 "state": "{:.2f}".format(state),
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index fb3b8a9c..22043d54 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -155,6 +155,16 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         freq = int(retrieve_hass_conf['freq'].seconds/60.0)
         delta_forecast = int(optim_conf['delta_forecast'].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
+        if set_type == "csv-predict":
+            csv_file = runtimeparams['csv_file']
+            independent_variables = runtimeparams['independent_variables']
+            dependent_variable = runtimeparams['dependent_variable']
+            new_values = runtimeparams['new_values']
+            params['passed_data']['csv_file'] = csv_file
+            params['passed_data']['independent_variables'] = independent_variables
+            params['passed_data']['dependent_variable'] = dependent_variable
+            params['passed_data']['new_values'] = new_values
+
         # Treating special data passed for MPC control case
         if set_type == 'naive-mpc-optim':
             if 'prediction_horizon' not in runtimeparams.keys():
@@ -320,6 +330,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         else:
             model_predict_friendly_name = runtimeparams['model_predict_friendly_name']
         params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name
+        if 'csv_predict_entity_id' not in runtimeparams.keys():
+            csv_predict_entity_id = "sensor.csv_predictor"
+        else:
+            csv_predict_entity_id = runtimeparams['csv_predict_entity_id']
+        params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id
+        if 'csv_predict_unit_of_measurement' not in runtimeparams.keys():
+            csv_predict_unit_of_measurement = None
+        else:
+            csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement']
+        params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement
+        if 'csv_predict_friendly_name' not in runtimeparams.keys():
+            csv_predict_friendly_name = "Csv predictor"
+        else:
+            csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name']
+        params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name
         # Treat optimization configuration parameters passed at runtime 
         if 'num_def_loads' in runtimeparams.keys():
             optim_conf['num_def_loads'] = runtimeparams['num_def_loads']
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index 3f7099e2..40274095 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -13,6 +13,7 @@
 from emhass.command_line import set_input_data_dict
 from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim
 from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune
+from emhass.command_line import csv_predict
 from emhass.command_line import publish_data
 from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \
     get_injection_dict_forecast_model_tune, build_params
@@ -189,6 +190,11 @@ def action_call(action_name):
         if not checkFileLog(ActionStr):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
+    elif action_name == 'csv-predict':
+        app.logger.info(" >> Performing a csv predict...")
+        csv_predict(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action csv-predict executed... \n'
+        return make_response(msg, 201)
     else:
         app.logger.error("ERROR: passed action is not valid")
         msg = f'EMHASS >> ERROR: Passed action is not valid... \n'

From 0e2f535a31e51b718724adce548dda48e340e9a4 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 9 Jan 2024 21:11:13 +0100
Subject: [PATCH 003/111] cleanup

---
 src/emhass/command_line.py  | 12 ++++++----
 src/emhass/csv_predictor.py | 48 ++++++++++++++++++++++++++-----------
 2 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 4d4f75c9..b255872e 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -453,19 +453,21 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger,
     :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object
     :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]
     """
-    data = copy.deepcopy(input_data_dict['df_input_data'])
-    model_type = input_data_dict['params']['passed_data']['model_type']
+    # data = copy.deepcopy(input_data_dict['df_input_data'])
+    # model_type = input_data_dict['params']['passed_data']['model_type']
     csv_file = input_data_dict['params']['passed_data']['csv_file']
     sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
+    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     new_values = input_data_dict['params']['passed_data']['new_values']
     root = input_data_dict['root']
     # The ML forecaster object
-    csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
     # Fit the ML model
-    prediction = csv.predict(perform_backtest=perform_backtest)
+    prediction = csv.predict()
+    # prediction = csv.predict(perform_backtest=perform_backtest)
 
     csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
     csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index a1c5576b..9f012f8d 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -31,11 +31,13 @@ class CsvPredictor:
     
     It exposes one main method:
     
-    - `predict`: to obtain a forecast from a pre-trained model.
+    - `predict`: to obtain a forecast from a csv file.
     
     """
 
-    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    #               logger: logging.Logger) -> None:
+    def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
                   logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -44,23 +46,28 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe
         :param model_type: A unique name defining this model and useful to identify \
             for what it will be used for.
         :type model_type: str
-        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
-            Example: `sensor.power_load_no_var_loads`.
-        :type var_model: str
+        :param csv_file: The name of the csv file to retrieve data from. \
+            Example: `prediction.csv`.
+        :type csv_file: str
+        :param independent_variables: A list of independent variables. \
+            Example: [`solar`, `degree_days`].
+        :type independent_variables: list
+        :param dependent_variable: The dependent variable(to be predicted). \
+            Example: `hours`.
+        :type dependent_variable: str
         :param sklearn_model: The `scikit-learn` model that will be used. For now only \
             this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
         :type sklearn_model: str
-        :param num_lags: The number of auto-regression lags to consider. A good starting point \
-            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
-            to 48, if the time step is 1 hour the fix this to 24 and so on.
-        :type num_lags: int
+        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
+            Example: [2.24, 5.68].
+        :type new_values: list
         :param root: The parent folder of the path where the config.yaml file is located
         :type root: str
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        self.data = data
-        self.model_type = model_type
+        # self.data = data
+        # self.model_type = model_type
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
@@ -86,18 +93,30 @@ def load_data(self):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
+        print(type(data))
         return data
     
     def prepare_data(self, data):
+        """
+        Prepare the data.
+        
+        :param data: Input Data
+        :return: Input DataFrame with freq defined
+        :rtype: pd.DataFrame
+        
+        """
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        print(type(X_train))
+        print(type(y_train))
         
         return X_train, y_train
     
     
-    def predict(self, perform_backtest: Optional[bool] = False
-            ) -> pd.Series:
+    # def predict(self, perform_backtest: Optional[bool] = False
+    #         ) -> pd.Series:
+    def predict(self):
         r"""The fit method to train the ML model.
 
         :param split_date_delta: The delta from now to `split_date_delta` that will be used \
@@ -109,7 +128,7 @@ def predict(self, perform_backtest: Optional[bool] = False
         :return: The DataFrame containing the forecast data results without and with backtest
         :rtype: Tuple[pd.DataFrame, pd.DataFrame]
         """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.logger.info("Performing a prediction for "+self.csv_file)
         # Preparing the data: adding exogenous features
         data = self.load_data()
         X, y = self.prepare_data(data)
@@ -131,6 +150,7 @@ def predict(self, perform_backtest: Optional[bool] = False
         self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
         new_values = np.array([self.new_values])
         prediction = self.forecaster.predict(new_values)
+        print(type(prediction))
         
         return prediction
     

From 61f64fd3f41e458db6c6fa9f8af7083c8416663f Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Thu, 18 Jan 2024 10:46:38 +0100
Subject: [PATCH 004/111] more cleanup

---
 src/emhass/command_line.py  | 17 +++----
 src/emhass/csv_predictor.py | 92 ++++++++++++++-----------------------
 2 files changed, 40 insertions(+), 69 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index b255872e..60c342b6 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -441,8 +441,8 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
     return df_pred_optim, mlf
 
 def csv_predict(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
-    """Perform a forecast model fit from training data retrieved from Home Assistant.
+    debug: Optional[bool] = False) -> np.ndarray:
+    """Perform a prediction from csv file.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
@@ -450,29 +450,24 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger,
     :type logger: logging.Logger
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
-    :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object
-    :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]
+    :return: The np.ndarray containing the predicted value.
+    :rtype: np.ndarray
     """
-    # data = copy.deepcopy(input_data_dict['df_input_data'])
-    # model_type = input_data_dict['params']['passed_data']['model_type']
     csv_file = input_data_dict['params']['passed_data']['csv_file']
     sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     new_values = input_data_dict['params']['passed_data']['new_values']
     root = input_data_dict['root']
     # The ML forecaster object
-    # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
     csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
-    # Fit the ML model
+    # Predict from csv file
     prediction = csv.predict()
-    # prediction = csv.predict(perform_backtest=perform_backtest)
 
     csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
     csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
     csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name']
-    # Publish Load forecast
+    # Publish prediction
     idx = 0
     input_data_dict['rh'].post_data(prediction, idx,
                                     csv_predict_entity_id,
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 9f012f8d..9550c157 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -2,11 +2,9 @@
 # -*- coding: utf-8 -*-
 
 import logging
-import copy
 import pathlib
 import time
-from typing import Optional
-# from typing import Optional, Tuple
+from typing import Tuple
 import pandas as pd
 import numpy as np
 
@@ -14,11 +12,6 @@
 from sklearn.linear_model import ElasticNet
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsRegressor
-# from sklearn.metrics import r2_score
-
-# from skforecast.ForecasterAutoreg import ForecasterAutoreg
-# from skforecast.model_selection import bayesian_search_forecaster
-# from skforecast.model_selection import backtesting_forecaster
 
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning) 
@@ -34,18 +27,10 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a csv file.
     
     """
-
-    # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-    #               logger: logging.Logger) -> None:
     def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
                   logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
-        :param data: The data that will be used for train/test
-        :type data: pd.DataFrame
-        :param model_type: A unique name defining this model and useful to identify \
-            for what it will be used for.
-        :type model_type: str
         :param csv_file: The name of the csv file to retrieve data from. \
             Example: `prediction.csv`.
         :type csv_file: str
@@ -66,8 +51,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        # self.data = data
-        # self.model_type = model_type
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
@@ -78,14 +61,17 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         self.is_tuned = False
 
     
-    def load_data(self):
+    def load_data(self) -> pd.DataFrame:
+        """Load the data."""
         filename_path = pathlib.Path(self.root) / self.csv_file
         if filename_path.is_file():
             with open(filename_path, 'rb') as inp:
                 data = pd.read_csv(filename_path)
         else:
             self.logger.error("The cvs file was not found.")
-            return
+            raise ValueError(
+                f"The CSV file "+ self.csv_file +" was not found."
+            )
 
         required_columns = self.independent_variables
         
@@ -93,66 +79,56 @@ def load_data(self):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
-        print(type(data))
         return data
     
-    def prepare_data(self, data):
+    def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         """
         Prepare the data.
         
         :param data: Input Data
-        :return: Input DataFrame with freq defined
-        :rtype: pd.DataFrame
+        :type data: pd.DataFrame
+        :return: A tuple containing the train data.
+        :rtype: Tuple[np.ndarray, np.ndarray]
         
         """
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        print(type(X_train))
-        print(type(y_train))
         
         return X_train, y_train
     
     
-    # def predict(self, perform_backtest: Optional[bool] = False
-    #         ) -> pd.Series:
-    def predict(self):
-        r"""The fit method to train the ML model.
+    def predict(self) -> np.ndarray:
+        r"""The predict method to generate a forecast from a csv file.
 
-        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
-            as the test period to evaluate the model, defaults to '48h'
-        :type split_date_delta: Optional[str], optional
-        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
-            the performance of the model on the complete train set, defaults to False
-        :type perform_backtest: Optional[bool], optional
-        :return: The DataFrame containing the forecast data results without and with backtest
-        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        :return: The np.ndarray containing the predicted value.
+        :rtype: np.ndarray
         """
         self.logger.info("Performing a prediction for "+self.csv_file)
         # Preparing the data: adding exogenous features
         data = self.load_data()
-        X, y = self.prepare_data(data)
+        if data is not None:
+            X, y = self.prepare_data(data)
         
-        if self.sklearn_model == 'LinearRegression':
-            base_model = LinearRegression()
-        elif self.sklearn_model == 'ElasticNet':
-            base_model = ElasticNet()
-        elif self.sklearn_model == 'KNeighborsRegressor':
-            base_model = KNeighborsRegressor()
-        else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-        # Define the forecaster object
-        self.forecaster = base_model
-        # Fit and time it
-        self.logger.info("Training a "+self.sklearn_model+" model")
-        start_time = time.time()
-        self.forecaster.fit(X, y)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-        new_values = np.array([self.new_values])
-        prediction = self.forecaster.predict(new_values)
-        print(type(prediction))
+            if self.sklearn_model == 'LinearRegression':
+                base_model = LinearRegression()
+            elif self.sklearn_model == 'ElasticNet':
+                base_model = ElasticNet()
+            elif self.sklearn_model == 'KNeighborsRegressor':
+                base_model = KNeighborsRegressor()
+            else:
+                self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+            # Define the forecaster object
+            self.forecaster = base_model
+            # Fit and time it
+            self.logger.info("Predict through a "+self.sklearn_model+" model")
+            start_time = time.time()
+            self.forecaster.fit(X, y)
+            self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+            new_values = np.array([self.new_values])
+            prediction = self.forecaster.predict(new_values)
         
-        return prediction
+            return prediction
     
     
     

From 11b3a3d2d3ca77ab3e13a322928e594c5d527ace Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 19 Jan 2024 11:34:33 +0100
Subject: [PATCH 005/111] filename_path -> inp

---
 src/emhass/csv_predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 9550c157..499903d0 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -66,7 +66,7 @@ def load_data(self) -> pd.DataFrame:
         filename_path = pathlib.Path(self.root) / self.csv_file
         if filename_path.is_file():
             with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(filename_path)
+                data = pd.read_csv(inp)
         else:
             self.logger.error("The cvs file was not found.")
             raise ValueError(

From 779c6b8eaabdaecb5a184d1c02eb3720830f90db Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 29 Jan 2024 11:24:45 +0100
Subject: [PATCH 006/111] resolve some comments

---
 src/emhass/csv_predictor.py | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 499903d0..1f478c01 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -5,6 +5,8 @@
 import pathlib
 import time
 from typing import Tuple
+import warnings
+
 import pandas as pd
 import numpy as np
 
@@ -13,14 +15,14 @@
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsRegressor
 
-import warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 class CsvPredictor:
     r"""
     A forecaster class using machine learning models.
     
-    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
     
     It exposes one main method:
     
@@ -28,11 +30,11 @@ class CsvPredictor:
     
     """
     def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-                  logger: logging.Logger) -> None:
+                logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
         :param csv_file: The name of the csv file to retrieve data from. \
-            Example: `prediction.csv`.
+            Example: `input_train_data.csv`.
         :type csv_file: str
         :param independent_variables: A list of independent variables. \
             Example: [`solar`, `degree_days`].
@@ -60,7 +62,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         self.logger = logger
         self.is_tuned = False
 
-    
     def load_data(self) -> pd.DataFrame:
         """Load the data."""
         filename_path = pathlib.Path(self.root) / self.csv_file
@@ -69,18 +70,16 @@ def load_data(self) -> pd.DataFrame:
                 data = pd.read_csv(inp)
         else:
             self.logger.error("The cvs file was not found.")
-            raise ValueError(
-                f"The CSV file "+ self.csv_file +" was not found."
-            )
+            raise ValueError("The CSV file " + self.csv_file + " was not found.")
 
         required_columns = self.independent_variables
-        
+
         if not set(required_columns).issubset(data.columns):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
         return data
-    
+
     def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         """
         Prepare the data.
@@ -94,10 +93,10 @@ def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        
+
         return X_train, y_train
-    
-    
+
+
     def predict(self) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
@@ -109,7 +108,7 @@ def predict(self) -> np.ndarray:
         data = self.load_data()
         if data is not None:
             X, y = self.prepare_data(data)
-        
+
             if self.sklearn_model == 'LinearRegression':
                 base_model = LinearRegression()
             elif self.sklearn_model == 'ElasticNet':
@@ -127,9 +126,5 @@ def predict(self) -> np.ndarray:
             self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
             new_values = np.array([self.new_values])
             prediction = self.forecaster.predict(new_values)
-        
+
             return prediction
-    
-    
-    
-    
\ No newline at end of file

From 2d0c460e02649366d55a4280eb31b6b3a1a28bd2 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:45:02 +0100
Subject: [PATCH 007/111] Use gridsearchcv and split up fit and predict

---
 src/emhass/command_line.py  |  87 +++++++++++++++---
 src/emhass/csv_predictor.py | 173 +++++++++++++++++++++++-------------
 src/emhass/utils.py         |  16 +++-
 src/emhass/web_server.py    |  11 ++-
 4 files changed, 210 insertions(+), 77 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 60c342b6..e2b86335 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -154,7 +154,36 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
             if not rh.get_data(days_list, var_list):
                 return False
             df_input_data = rh.df_final.copy()
-    elif set_type == "csv-predict":
+ 
+    elif set_type == "csv-model-fit":
+        
+        df_input_data_dayahead = None
+        P_PV_forecast, P_load_forecast = None, None
+        params = json.loads(params)
+        days_list = None
+        csv_file = params['passed_data']['csv_file']
+        independent_variables = params['passed_data']['independent_variables']
+        dependent_variable = params['passed_data']['dependent_variable']
+        timestamp = params['passed_data']['timestamp']
+        filename_path = pathlib.Path(base_path) / csv_file
+        if filename_path.is_file():
+            df_input_data = pd.read_csv(filename_path, parse_dates=True)
+
+        else:
+            logger.error("The cvs file was not found.")
+            raise ValueError("The CSV file " + csv_file + " was not found.")
+        required_columns = []
+        required_columns.extend(independent_variables)
+        required_columns.append(dependent_variable)
+        if timestamp is not None:
+            required_columns.append(timestamp)
+
+        if not set(required_columns).issubset(df_input_data.columns):
+            logger.error("The cvs file does not contain the required columns.")
+            raise ValueError(
+                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+            )
+    elif set_type == "csv-model-predict":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
@@ -440,7 +469,41 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
     return df_pred_optim, mlf
 
-def csv_predict(input_data_dict: dict, logger: logging.Logger,
+def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
+    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
+    """Perform a forecast model fit from training data retrieved from Home Assistant.
+
+    :param input_data_dict: A dictionnary with multiple data used by the action functions
+    :type input_data_dict: dict
+    :param logger: The passed logger object
+    :type logger: logging.Logger
+    :param debug: True to debug, useful for unit testing, defaults to False
+    :type debug: Optional[bool], optional
+    :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object
+    :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster]
+    """
+    data = copy.deepcopy(input_data_dict['df_input_data'])
+    # csv_file = input_data_dict['params']['passed_data']['csv_file']
+    model_type = input_data_dict['params']['passed_data']['model_type']
+    # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
+    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
+    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
+    timestamp = input_data_dict['params']['passed_data']['timestamp']
+    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
+    date_features = input_data_dict['params']['passed_data']['date_features']
+    root = input_data_dict['root']
+    # The ML forecaster object
+    csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger)
+    # Fit the ML model
+    df_pred = csv.fit(date_features=date_features)
+    # Save model
+    if not debug:
+        filename = model_type+'_csv.pkl'
+        with open(pathlib.Path(root) / filename, 'wb') as outp:
+            pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL)
+    # return df_pred, csv
+
+def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
     debug: Optional[bool] = False) -> np.ndarray:
     """Perform a prediction from csv file.
 
@@ -453,16 +516,20 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger,
     :return: The np.ndarray containing the predicted value.
     :rtype: np.ndarray
     """
-    csv_file = input_data_dict['params']['passed_data']['csv_file']
-    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
-    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
-    new_values = input_data_dict['params']['passed_data']['new_values']
+    model_type = input_data_dict['params']['passed_data']['model_type']
     root = input_data_dict['root']
-    # The ML forecaster object
-    csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    filename = model_type+'_csv.pkl'
+    filename_path = pathlib.Path(root) / filename
+    if not debug:
+        if filename_path.is_file():
+            with open(filename_path, 'rb') as inp:
+                csv = pickle.load(inp)
+        else:
+            logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
+            return
+    new_values = input_data_dict['params']['passed_data']['new_values']
     # Predict from csv file
-    prediction = csv.predict()
+    prediction = csv.predict(new_values)
 
     csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
     csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1f478c01..636d5835 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -1,19 +1,22 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+import copy
+from datetime import datetime
 import logging
 import pathlib
 import time
-from typing import Tuple
+from typing import Optional, Tuple
 import warnings
 
 import pandas as pd
 import numpy as np
+from sklearn.metrics import classification_report, r2_score
 
 from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import ElasticNet
-from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsRegressor
+from sklearn.model_selection import GridSearchCV, train_test_split
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
 
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
@@ -29,7 +32,7 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a csv file.
     
     """
-    def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -53,78 +56,124 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        self.csv_file = csv_file
+        self.data = data
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
-        self.sklearn_model = sklearn_model
-        self.new_values = new_values
-        self.root = root
+        self.timestamp = timestamp
+        self.model_type = model_type
         self.logger = logger
         self.is_tuned = False
+        self.data.sort_index(inplace=True)
+        self.data = self.data[~self.data.index.duplicated(keep='first')]
+    
+    @staticmethod
+    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+        """Add date features from the input DataFrame timestamp
 
-    def load_data(self) -> pd.DataFrame:
-        """Load the data."""
-        filename_path = pathlib.Path(self.root) / self.csv_file
-        if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(inp)
-        else:
-            self.logger.error("The cvs file was not found.")
-            raise ValueError("The CSV file " + self.csv_file + " was not found.")
-
-        required_columns = self.independent_variables
-
-        if not set(required_columns).issubset(data.columns):
-            raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}"
-            )
-        return data
-
-    def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
+        :param data: The input DataFrame
+        :type data: pd.DataFrame
+        :return: The DataFrame with the added features
+        :rtype: pd.DataFrame
+        """
+        df = copy.deepcopy(data)
+        df['timestamp']= pd.to_datetime(df['timestamp'])
+        if 'year' in date_features:
+            df['year'] = [i.month for i in df['timestamp']]
+        if 'month' in date_features:
+            df['month'] = [i.month for i in df['timestamp']]
+        if 'day_of_week' in date_features:
+            df['day_of_week'] = [i.dayofweek for i in df['timestamp']]
+        if 'day_of_year' in date_features:
+            df['day_of_year'] = [i.dayofyear for i in df['timestamp']]
+        if 'day' in date_features:
+            df['day'] = [i.day for i in df['timestamp']]
+        if 'hour' in date_features:
+            df['hour'] = [i.day for i in df['timestamp']]
+
+        return df
+
+    def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]:
         """
-        Prepare the data.
+        Fit the model using the provided data.
         
         :param data: Input Data
         :type data: pd.DataFrame
-        :return: A tuple containing the train data.
-        :rtype: Tuple[np.ndarray, np.ndarray]
-        
         """
-        X = data[self.independent_variables].values
-        y = data[self.dependent_variable].values
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.data_exo = pd.DataFrame(self.data)
+        self.data_exo[self.independent_variables] = self.data[self.independent_variables]
+        self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
+        keep_columns = []
+        keep_columns.extend(self.independent_variables)
+        if self.timestamp is not None:
+            keep_columns.append(self.timestamp)
+        keep_columns.append(self.dependent_variable)
+        self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
+        self.data_exo.reset_index(drop=True, inplace=True)
+        # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp')
+        if len(date_features) > 0:
+            if self.timestamp is not None:
+                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
+            else:
+                self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
 
-        return X_train, y_train
+        y = self.data_exo[self.dependent_variable]
+        self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1)
+        if self.timestamp is not None:
+            self.data_exo = self.data_exo.drop(self.timestamp,axis=1)
+        X = self.data_exo
 
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        self.steps = len(X_test)
+
+        # Define the model
+        self.model = Pipeline([
+            ('scaler', StandardScaler()),
+            ('regressor', LinearRegression())
+        ])
+        # Define the parameters to tune
+        param_grid = {
+            'regressor__fit_intercept': [True, False],
+            'regressor__positive': [True, False],
+        }
+
+        # Create a grid search object
+        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        # Fit the grid search object to the data
+        self.logger.info("Fitting the model...")
+        start_time = time.time()
+        self.grid_search.fit(X_train.values, y_train.values)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+
+        self.model = self.grid_search.best_estimator_
+
+
+        # Make predictions
+        predictions = self.model.predict(X_test.values)
+        predictions = pd.Series(predictions, index=X_test.index)
+        pred_metric = r2_score(y_test,predictions)
+        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+
+        # Prepare forecast DataFrame
+        df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred'])
+        df_pred['train'] = y_train
+        df_pred['test'] = y_test
+        df_pred['pred'] = predictions
+        print(df_pred)
+        # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp')
+
+
+
+        # return df_pred
+        
 
-    def predict(self) -> np.ndarray:
+    def predict(self, new_values:list) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """
-        self.logger.info("Performing a prediction for "+self.csv_file)
-        # Preparing the data: adding exogenous features
-        data = self.load_data()
-        if data is not None:
-            X, y = self.prepare_data(data)
-
-            if self.sklearn_model == 'LinearRegression':
-                base_model = LinearRegression()
-            elif self.sklearn_model == 'ElasticNet':
-                base_model = ElasticNet()
-            elif self.sklearn_model == 'KNeighborsRegressor':
-                base_model = KNeighborsRegressor()
-            else:
-                self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-            # Define the forecaster object
-            self.forecaster = base_model
-            # Fit and time it
-            self.logger.info("Predict through a "+self.sklearn_model+" model")
-            start_time = time.time()
-            self.forecaster.fit(X, y)
-            self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-            new_values = np.array([self.new_values])
-            prediction = self.forecaster.predict(new_values)
-
-            return prediction
+        self.logger.info("Performing a prediction for "+self.model_type)
+        new_values = np.array([new_values])
+
+        return self.model.predict(new_values)
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 22043d54..ddc834ae 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -155,14 +155,26 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         freq = int(retrieve_hass_conf['freq'].seconds/60.0)
         delta_forecast = int(optim_conf['delta_forecast'].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
-        if set_type == "csv-predict":
+        if set_type == "csv-model-fit":
             csv_file = runtimeparams['csv_file']
             independent_variables = runtimeparams['independent_variables']
             dependent_variable = runtimeparams['dependent_variable']
-            new_values = runtimeparams['new_values']
             params['passed_data']['csv_file'] = csv_file
             params['passed_data']['independent_variables'] = independent_variables
             params['passed_data']['dependent_variable'] = dependent_variable
+            if 'timestamp' not in runtimeparams.keys():
+                params['passed_data']['timestamp'] = None
+            else:
+                timestamp = runtimeparams['timestamp']
+                params['passed_data']['timestamp'] = timestamp
+            if 'date_features' not in runtimeparams.keys():
+                params['passed_data']['date_features'] = []
+            else:
+                date_features = runtimeparams['date_features']
+                params['passed_data']['date_features'] = date_features
+            
+        if set_type == "csv-model-predict":
+            new_values = runtimeparams['new_values']
             params['passed_data']['new_values'] = new_values
 
         # Treating special data passed for MPC control case
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index 40274095..8db64091 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -13,7 +13,7 @@
 from emhass.command_line import set_input_data_dict
 from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim
 from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune
-from emhass.command_line import csv_predict
+from emhass.command_line import csv_model_fit, csv_model_predict
 from emhass.command_line import publish_data
 from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \
     get_injection_dict_forecast_model_tune, build_params
@@ -190,9 +190,14 @@ def action_call(action_name):
         if not checkFileLog(ActionStr):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
-    elif action_name == 'csv-predict':
+    elif action_name == 'csv-model-fit':
+        app.logger.info(" >> Performing a csv fit...")
+        csv_model_fit(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action csv-fit executed... \n'
+        return make_response(msg, 201)
+    elif action_name == 'csv-model-predict':
         app.logger.info(" >> Performing a csv predict...")
-        csv_predict(input_data_dict, app.logger)
+        csv_model_predict(input_data_dict, app.logger)
         msg = f'EMHASS >> Action csv-predict executed... \n'
         return make_response(msg, 201)
     else:

From 19337101bbba1a134379b26a20edf565e7b44606 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:56:19 +0100
Subject: [PATCH 008/111] remove backtest

---
 src/emhass/csv_predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 636d5835..1b2396b5 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -92,7 +92,7 @@ def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
 
         return df
 
-    def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    def fit(self, date_features: Optional[list] = []) -> None:
         """
         Fit the model using the provided data.
         

From 0bf50a2b3c7cac5166c7b939668c5bd5aefb4105 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 12:11:41 +0100
Subject: [PATCH 009/111] cleanup

---
 src/emhass/csv_predictor.py | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1b2396b5..1e46927d 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -2,16 +2,14 @@
 # -*- coding: utf-8 -*-
 
 import copy
-from datetime import datetime
 import logging
-import pathlib
 import time
-from typing import Optional, Tuple
+from typing import Optional
 import warnings
 
 import pandas as pd
 import numpy as np
-from sklearn.metrics import classification_report, r2_score
+from sklearn.metrics import  r2_score
 
 from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import GridSearchCV, train_test_split
@@ -110,7 +108,6 @@ def fit(self, date_features: Optional[list] = []) -> None:
         keep_columns.append(self.dependent_variable)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
         self.data_exo.reset_index(drop=True, inplace=True)
-        # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp')
         if len(date_features) > 0:
             if self.timestamp is not None:
                 self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
@@ -153,18 +150,6 @@ def fit(self, date_features: Optional[list] = []) -> None:
         predictions = pd.Series(predictions, index=X_test.index)
         pred_metric = r2_score(y_test,predictions)
         self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
-
-        # Prepare forecast DataFrame
-        df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred'])
-        df_pred['train'] = y_train
-        df_pred['test'] = y_test
-        df_pred['pred'] = predictions
-        print(df_pred)
-        # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp')
-
-
-
-        # return df_pred
         
 
     def predict(self, new_values:list) -> np.ndarray:

From ac45455f57eabe4205da8043481c1f0710dd7703 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 13:41:06 +0100
Subject: [PATCH 010/111] cleanup + docstrings

---
 src/emhass/command_line.py  | 17 ++++----------
 src/emhass/csv_predictor.py | 45 ++++++++++++++++++++-----------------
 2 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index e2b86335..33af80c0 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -470,7 +470,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
     return df_pred_optim, mlf
 
 def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
+    debug: Optional[bool] = False) -> None:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -479,32 +479,26 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
     :type logger: logging.Logger
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
-    :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object
-    :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster]
     """
     data = copy.deepcopy(input_data_dict['df_input_data'])
-    # csv_file = input_data_dict['params']['passed_data']['csv_file']
     model_type = input_data_dict['params']['passed_data']['model_type']
-    # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     timestamp = input_data_dict['params']['passed_data']['timestamp']
-    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
     date_features = input_data_dict['params']['passed_data']['date_features']
     root = input_data_dict['root']
-    # The ML forecaster object
+    # The CSV forecaster object
     csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger)
     # Fit the ML model
-    df_pred = csv.fit(date_features=date_features)
+    csv.fit(date_features=date_features)
     # Save model
     if not debug:
         filename = model_type+'_csv.pkl'
         with open(pathlib.Path(root) / filename, 'wb') as outp:
             pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL)
-    # return df_pred, csv
 
 def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> np.ndarray:
+    debug: Optional[bool] = False) -> None:
     """Perform a prediction from csv file.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -513,8 +507,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
     :type logger: logging.Logger
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
-    :return: The np.ndarray containing the predicted value.
-    :rtype: np.ndarray
     """
     model_type = input_data_dict['params']['passed_data']['model_type']
     root = input_data_dict['root']
@@ -541,7 +533,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
                                     csv_predict_unit_of_measurement, 
                                     csv_predict_friendly_name,
                                     type_var = 'csv_predictor')
-    return prediction
 
 def publish_data(input_data_dict: dict, logger: logging.Logger,
     save_data_to_file: Optional[bool] = False, 
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1e46927d..57d61791 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -25,32 +25,30 @@ class CsvPredictor:
     
     This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
     
-    It exposes one main method:
+    It exposes two main methods:
     
-    - `predict`: to obtain a forecast from a csv file.
+    - `fit`: to train a model with the passed data.
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
     
     """
     def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
-        :param csv_file: The name of the csv file to retrieve data from. \
-            Example: `input_train_data.csv`.
-        :type csv_file: str
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
         :param independent_variables: A list of independent variables. \
             Example: [`solar`, `degree_days`].
         :type independent_variables: list
         :param dependent_variable: The dependent variable(to be predicted). \
             Example: `hours`.
         :type dependent_variable: str
-        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
-            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
-        :type sklearn_model: str
-        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
-            Example: [2.24, 5.68].
-        :type new_values: list
-        :param root: The parent folder of the path where the config.yaml file is located
-        :type root: str
+        :param timestamp: If defined, the column key that has to be used of timestamp.
+        :type timestamp: str
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
@@ -60,23 +58,24 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent
         self.timestamp = timestamp
         self.model_type = model_type
         self.logger = logger
-        self.is_tuned = False
         self.data.sort_index(inplace=True)
         self.data = self.data[~self.data.index.duplicated(keep='first')]
     
     @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+    def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
         """Add date features from the input DataFrame timestamp
 
         :param data: The input DataFrame
         :type data: pd.DataFrame
+        :param timestamp: The column containing the timestamp
+        :type timestamp: str
         :return: The DataFrame with the added features
         :rtype: pd.DataFrame
         """
         df = copy.deepcopy(data)
-        df['timestamp']= pd.to_datetime(df['timestamp'])
+        df[timestamp]= pd.to_datetime(df['timestamp'])
         if 'year' in date_features:
-            df['year'] = [i.month for i in df['timestamp']]
+            df['year'] = [i.year for i in df['timestamp']]
         if 'month' in date_features:
             df['month'] = [i.month for i in df['timestamp']]
         if 'day_of_week' in date_features:
@@ -94,10 +93,10 @@ def fit(self, date_features: Optional[list] = []) -> None:
         """
         Fit the model using the provided data.
         
-        :param data: Input Data
-        :type data: pd.DataFrame
+        :param date_features: A list of 'date_features' to take into account when fitting the model.
+        :type data: list
         """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.logger.info("Performing a csv model fit for "+self.model_type)
         self.data_exo = pd.DataFrame(self.data)
         self.data_exo[self.independent_variables] = self.data[self.independent_variables]
         self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
@@ -110,7 +109,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         self.data_exo.reset_index(drop=True, inplace=True)
         if len(date_features) > 0:
             if self.timestamp is not None:
-                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
+                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp)
             else:
                 self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
 
@@ -155,6 +154,10 @@ def fit(self, date_features: Optional[list] = []) -> None:
     def predict(self, new_values:list) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
+
+        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
+            Example: [2.24, 5.68].
+        :type new_values: list
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """

From f3574c8db0ff4034ddb1af603df57c782c13f203 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 11 Mar 2024 09:59:27 +0100
Subject: [PATCH 011/111] add other regression methods

---
 src/emhass/csv_predictor.py | 87 +++++++++++++++++++++++++------------
 1 file changed, 59 insertions(+), 28 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 57d61791..2b6fb86a 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -9,9 +9,10 @@
 
 import pandas as pd
 import numpy as np
+from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor
 from sklearn.metrics import  r2_score
 
-from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import Lasso, LinearRegression, Ridge
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
@@ -122,33 +123,63 @@ def fit(self, date_features: Optional[list] = []) -> None:
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         self.steps = len(X_test)
 
-        # Define the model
-        self.model = Pipeline([
-            ('scaler', StandardScaler()),
-            ('regressor', LinearRegression())
-        ])
-        # Define the parameters to tune
-        param_grid = {
-            'regressor__fit_intercept': [True, False],
-            'regressor__positive': [True, False],
-        }
-
-        # Create a grid search object
-        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        # Fit the grid search object to the data
-        self.logger.info("Fitting the model...")
-        start_time = time.time()
-        self.grid_search.fit(X_train.values, y_train.values)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-
-        self.model = self.grid_search.best_estimator_
-
-
-        # Make predictions
-        predictions = self.model.predict(X_test.values)
-        predictions = pd.Series(predictions, index=X_test.index)
-        pred_metric = r2_score(y_test,predictions)
-        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+        regression_methods = [
+            ('Linear Regression', LinearRegression(), {}),
+            ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
+            ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
+            ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
+            ('Gradient Boosting Regression', GradientBoostingRegressor(), {
+                'gradientboostingregressor__n_estimators': [50, 100, 200],
+                'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
+            }),
+            ('AdaBoost Regression', AdaBoostRegressor(), {
+                'adaboostregressor__n_estimators': [50, 100, 200],
+                'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
+            })
+        ]
+
+        # Define the models
+        for name, model, param_grid in regression_methods:
+            pipeline = Pipeline([
+                ('scaler', StandardScaler()),
+                (name, model)
+            ])
+            
+            # Use GridSearchCV to find the best hyperparameters for each model
+            grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5)
+            grid_search.fit(X_train, y_train)
+
+            # Get the best model and print its mean squared error on the test set
+            best_model = grid_search.best_estimator_
+            print(best_model)
+            predictions = best_model.predict(X_test)
+            print(predictions)
+        # self.model = Pipeline([
+        #     ('scaler', StandardScaler()),
+        #     ('regressor', LinearRegression())
+        # ])
+        # # Define the parameters to tune
+        # param_grid = {
+        #     'regressor__fit_intercept': [True, False],
+        #     'regressor__positive': [True, False],
+        # }
+
+        # # Create a grid search object
+        # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        # # Fit the grid search object to the data
+        # self.logger.info("Fitting the model...")
+        # start_time = time.time()
+        # self.grid_search.fit(X_train.values, y_train.values)
+        # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+
+        # self.model = self.grid_search.best_estimator_
+
+
+        # # Make predictions
+        # predictions = self.model.predict(X_test.values)
+        # predictions = pd.Series(predictions, index=X_test.index)
+        # pred_metric = r2_score(y_test,predictions)
+        # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
         
 
     def predict(self, new_values:list) -> np.ndarray:

From b9fa914ca93733156d802ac17c95228a3276562c Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:10:15 +0100
Subject: [PATCH 012/111] add --editable

---
 .vscode/tasks.json | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.vscode/tasks.json b/.vscode/tasks.json
index 11a92388..0b25f4f1 100644
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -9,7 +9,11 @@
                 "isDefault": true
             },
             "args": [
-                "install", "--no-deps", "--force-reinstall", "."
+                "install",
+                "--no-deps",
+                "--force-reinstall",
+                "--editable",
+                "."
             ],
             "presentation": {
                 "echo": true,

From f7fc59ffeecf862601d0dd896e73da4eaf6c6739 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:16:17 +0100
Subject: [PATCH 013/111] Add sklearn model

---
 src/emhass/command_line.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 33af80c0..ae98aa59 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -482,13 +482,14 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
     """
     data = copy.deepcopy(input_data_dict['df_input_data'])
     model_type = input_data_dict['params']['passed_data']['model_type']
+    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     timestamp = input_data_dict['params']['passed_data']['timestamp']
     date_features = input_data_dict['params']['passed_data']['date_features']
     root = input_data_dict['root']
     # The CSV forecaster object
-    csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger)
+    csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
     # Fit the ML model
     csv.fit(date_features=date_features)
     # Save model

From 7177ad2cdcba20c2982dcf1a5d86fccc2c11bad0 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:16:44 +0100
Subject: [PATCH 014/111] multiple regression methods

---
 src/emhass/csv_predictor.py | 141 +++++++++++++++++++++++++-----------
 1 file changed, 100 insertions(+), 41 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 2b6fb86a..3ffeba27 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -14,7 +14,7 @@
 
 from sklearn.linear_model import Lasso, LinearRegression, Ridge
 from sklearn.model_selection import GridSearchCV, train_test_split
-from sklearn.pipeline import Pipeline
+from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 
 
@@ -33,7 +33,7 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a pre-trained model.
     
     """
-    def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
+    def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -58,9 +58,14 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent
         self.dependent_variable = dependent_variable
         self.timestamp = timestamp
         self.model_type = model_type
+        self.sklearn_model = sklearn_model
         self.logger = logger
         self.data.sort_index(inplace=True)
         self.data = self.data[~self.data.index.duplicated(keep='first')]
+        self.data_exo = None
+        self.steps = None
+        self.model = None
+        self.grid_search =None
     
     @staticmethod
     def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
@@ -123,63 +128,117 @@ def fit(self, date_features: Optional[list] = []) -> None:
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         self.steps = len(X_test)
 
-        regression_methods = [
-            ('Linear Regression', LinearRegression(), {}),
-            ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
-            ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
-            ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
-            ('Gradient Boosting Regression', GradientBoostingRegressor(), {
+        regression_methods = {
+            'LinearRegression': {"model": LinearRegression(), "param_grid": {
+                'linearregression__fit_intercept': [True, False],
+                'linearregression__positive': [True, False],
+            }},
+            'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}},
+            'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}},
+            'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}},
+            'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": {
                 'gradientboostingregressor__n_estimators': [50, 100, 200],
                 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-            }),
-            ('AdaBoost Regression', AdaBoostRegressor(), {
+            }},
+            'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": {
                 'adaboostregressor__n_estimators': [50, 100, 200],
                 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-            })
-        ]
+            }}
+        }
+        # regression_methods = [
+        #     ('LinearRegression', LinearRegression(), {
+        #         'linearregression__fit_intercept': [True, False],
+        #         'linearregression__positive': [True, False],
+        #     }),
+        #     ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
+        #     ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
+        #     ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
+        #     ('GradientBoostingRegression', GradientBoostingRegressor(), {
+        #         'gradientboostingregressor__n_estimators': [50, 100, 200],
+        #         'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
+        #     }),
+        #     ('AdaBoostRegression', AdaBoostRegressor(), {
+        #         'adaboostregressor__n_estimators': [50, 100, 200],
+        #         'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
+        #     })
+        # ]
+
+        if self.sklearn_model == 'LinearRegression':
+            base_model = regression_methods['LinearRegression']['model']
+            param_grid = regression_methods['LinearRegression']['param_grid']
+        elif self.sklearn_model == 'RidgeRegression':
+            base_model = regression_methods['RidgeRegression']['model']
+            param_grid = regression_methods['RidgeRegression']['param_grid']
+        elif self.sklearn_model == 'LassoRegression':
+            base_model = regression_methods['LassoRegression']['model']
+            param_grid = regression_methods['LassoRegression']['param_grid']
+        elif self.sklearn_model == 'RandomForestRegression':
+            base_model = regression_methods['RandomForestRegression']['model']
+            param_grid = regression_methods['RandomForestRegression']['param_grid']
+        elif self.sklearn_model == 'GradientBoostingRegression':
+            base_model = regression_methods['GradientBoostingRegression']['model']
+            param_grid = regression_methods['GradientBoostingRegression']['param_grid']
+        elif self.sklearn_model == 'AdaBoostRegression':
+            base_model = regression_methods['AdaBoostRegression']['model']
+            param_grid = regression_methods['AdaBoostRegression']['param_grid']
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+
 
         # Define the models
-        for name, model, param_grid in regression_methods:
-            pipeline = Pipeline([
-                ('scaler', StandardScaler()),
-                (name, model)
-            ])
+        # for name, model, param_grid in regression_methods:
+        #     self.model = make_pipeline(
+        #         StandardScaler(),
+        #         model
+        #     )
+        #     # self.model = Pipeline([
+        #     #     ('scaler', StandardScaler()),
+        #     #     (name, model)
+        #     # ])
             
-            # Use GridSearchCV to find the best hyperparameters for each model
-            grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5)
-            grid_search.fit(X_train, y_train)
-
-            # Get the best model and print its mean squared error on the test set
-            best_model = grid_search.best_estimator_
-            print(best_model)
-            predictions = best_model.predict(X_test)
-            print(predictions)
+        #     # Use GridSearchCV to find the best hyperparameters for each model
+        #     grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        #     grid_search.fit(X_train, y_train)
+
+        #     # Get the best model and print its mean squared error on the test set
+        #     best_model = grid_search.best_estimator_
+        #     print(best_model)
+        #     predictions = best_model.predict(X_test)
+        #     print(predictions)
+
+        self.model = make_pipeline(
+            StandardScaler(),
+            base_model
+        )
         # self.model = Pipeline([
         #     ('scaler', StandardScaler()),
-        #     ('regressor', LinearRegression())
+        #     ('regressor', base_model)
         # ])
-        # # Define the parameters to tune
+        # Define the parameters to tune
         # param_grid = {
         #     'regressor__fit_intercept': [True, False],
         #     'regressor__positive': [True, False],
         # }
 
-        # # Create a grid search object
-        # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        # # Fit the grid search object to the data
-        # self.logger.info("Fitting the model...")
-        # start_time = time.time()
-        # self.grid_search.fit(X_train.values, y_train.values)
-        # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        # Create a grid search object
+        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1)
+        
+        # Fit the grid search object to the data
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.grid_search.fit(X_train.values, y_train.values)
+        print("Best value for lambda : ",self.grid_search.best_params_)
+        print("Best score for cost function: ", self.grid_search.best_score_)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
 
-        # self.model = self.grid_search.best_estimator_
+        self.model = self.grid_search.best_estimator_
 
 
-        # # Make predictions
-        # predictions = self.model.predict(X_test.values)
-        # predictions = pd.Series(predictions, index=X_test.index)
-        # pred_metric = r2_score(y_test,predictions)
-        # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+        # Make predictions
+        predictions = self.model.predict(X_test.values)
+        predictions = pd.Series(predictions, index=X_test.index)
+        pred_metric = r2_score(y_test,predictions)
+        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
         
 
     def predict(self, new_values:list) -> np.ndarray:

From 11664bf628cc1910f745c0d193a6125fff888960 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:42:27 +0100
Subject: [PATCH 015/111] change to MLRegressor

---
 src/emhass/command_line.py                    | 40 +++++++++----------
 ...ictor.py => machine_learning_regressor.py} |  4 +-
 src/emhass/utils.py                           | 28 ++++++-------
 src/emhass/web_server.py                      | 18 ++++-----
 4 files changed, 45 insertions(+), 45 deletions(-)
 rename src/emhass/{csv_predictor.py => machine_learning_regressor.py} (98%)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index ae98aa59..17be0098 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -20,7 +20,7 @@
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
 from emhass.optimization import Optimization
-from emhass.csv_predictor import CsvPredictor
+from emhass.machine_learning_regressor import MLRegressor
 from emhass import utils
 
 
@@ -155,7 +155,7 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
                 return False
             df_input_data = rh.df_final.copy()
  
-    elif set_type == "csv-model-fit":
+    elif set_type == "regressor-model-fit":
         
         df_input_data_dayahead = None
         P_PV_forecast, P_load_forecast = None, None
@@ -183,7 +183,7 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
-    elif set_type == "csv-model-predict":
+    elif set_type == "regressor-model-predict":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
@@ -469,7 +469,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
     return df_pred_optim, mlf
 
-def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
+def regressor_model_fit(input_data_dict: dict, logger: logging.Logger,
     debug: Optional[bool] = False) -> None:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
@@ -488,17 +488,17 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
     timestamp = input_data_dict['params']['passed_data']['timestamp']
     date_features = input_data_dict['params']['passed_data']['date_features']
     root = input_data_dict['root']
-    # The CSV forecaster object
-    csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
+    # The MLRegressor object
+    mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
     # Fit the ML model
-    csv.fit(date_features=date_features)
+    mlr.fit(date_features=date_features)
     # Save model
     if not debug:
-        filename = model_type+'_csv.pkl'
+        filename = model_type+'_mlr.pkl'
         with open(pathlib.Path(root) / filename, 'wb') as outp:
-            pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL)
+            pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL)
 
-def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
+def regressor_model_predict(input_data_dict: dict, logger: logging.Logger,
     debug: Optional[bool] = False) -> None:
     """Perform a prediction from csv file.
 
@@ -511,29 +511,29 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
     """
     model_type = input_data_dict['params']['passed_data']['model_type']
     root = input_data_dict['root']
-    filename = model_type+'_csv.pkl'
+    filename = model_type+'_mlr.pkl'
     filename_path = pathlib.Path(root) / filename
     if not debug:
         if filename_path.is_file():
             with open(filename_path, 'rb') as inp:
-                csv = pickle.load(inp)
+                mlr = pickle.load(inp)
         else:
             logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
             return
     new_values = input_data_dict['params']['passed_data']['new_values']
     # Predict from csv file
-    prediction = csv.predict(new_values)
+    prediction = mlr.predict(new_values)
 
-    csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
-    csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
-    csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name']
+    mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id']
+    mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement']
+    mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name']
     # Publish prediction
     idx = 0
     input_data_dict['rh'].post_data(prediction, idx,
-                                    csv_predict_entity_id,
-                                    csv_predict_unit_of_measurement, 
-                                    csv_predict_friendly_name,
-                                    type_var = 'csv_predictor')
+                                    mlr_predict_entity_id,
+                                    mlr_predict_unit_of_measurement, 
+                                    mlr_predict_friendly_name,
+                                    type_var = 'mlregressor')
 
 def publish_data(input_data_dict: dict, logger: logging.Logger,
     save_data_to_file: Optional[bool] = False, 
diff --git a/src/emhass/csv_predictor.py b/src/emhass/machine_learning_regressor.py
similarity index 98%
rename from src/emhass/csv_predictor.py
rename to src/emhass/machine_learning_regressor.py
index 3ffeba27..d70df3ec 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -20,7 +20,7 @@
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-class CsvPredictor:
+class MLRegressor:
     r"""
     A forecaster class using machine learning models.
     
@@ -115,7 +115,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         self.data_exo.reset_index(drop=True, inplace=True)
         if len(date_features) > 0:
             if self.timestamp is not None:
-                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp)
+                self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp)
             else:
                 self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
 
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index ddc834ae..5f9f249b 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -155,7 +155,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         freq = int(retrieve_hass_conf['freq'].seconds/60.0)
         delta_forecast = int(optim_conf['delta_forecast'].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
-        if set_type == "csv-model-fit":
+        if set_type == "regressor-model-fit":
             csv_file = runtimeparams['csv_file']
             independent_variables = runtimeparams['independent_variables']
             dependent_variable = runtimeparams['dependent_variable']
@@ -173,7 +173,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
                 date_features = runtimeparams['date_features']
                 params['passed_data']['date_features'] = date_features
             
-        if set_type == "csv-model-predict":
+        if set_type == "regressor-model-predict":
             new_values = runtimeparams['new_values']
             params['passed_data']['new_values'] = new_values
 
@@ -342,21 +342,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         else:
             model_predict_friendly_name = runtimeparams['model_predict_friendly_name']
         params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name
-        if 'csv_predict_entity_id' not in runtimeparams.keys():
-            csv_predict_entity_id = "sensor.csv_predictor"
+        if 'mlr_predict_entity_id' not in runtimeparams.keys():
+            mlr_predict_entity_id = "sensor.mlr_predict"
         else:
-            csv_predict_entity_id = runtimeparams['csv_predict_entity_id']
-        params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id
-        if 'csv_predict_unit_of_measurement' not in runtimeparams.keys():
-            csv_predict_unit_of_measurement = None
+            mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id']
+        params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id
+        if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys():
+            mlr_predict_unit_of_measurement = None
         else:
-            csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement']
-        params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement
-        if 'csv_predict_friendly_name' not in runtimeparams.keys():
-            csv_predict_friendly_name = "Csv predictor"
+            mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement']
+        params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement
+        if 'mlr_predict_friendly_name' not in runtimeparams.keys():
+            mlr_predict_friendly_name = "mlr predictor"
         else:
-            csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name']
-        params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name
+            mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name']
+        params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name
         # Treat optimization configuration parameters passed at runtime 
         if 'num_def_loads' in runtimeparams.keys():
             optim_conf['num_def_loads'] = runtimeparams['num_def_loads']
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index 8db64091..cdb98b00 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -13,7 +13,7 @@
 from emhass.command_line import set_input_data_dict
 from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim
 from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune
-from emhass.command_line import csv_model_fit, csv_model_predict
+from emhass.command_line import regressor_model_fit, regressor_model_predict
 from emhass.command_line import publish_data
 from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \
     get_injection_dict_forecast_model_tune, build_params
@@ -190,15 +190,15 @@ def action_call(action_name):
         if not checkFileLog(ActionStr):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
-    elif action_name == 'csv-model-fit':
-        app.logger.info(" >> Performing a csv fit...")
-        csv_model_fit(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action csv-fit executed... \n'
+    elif action_name == 'regressor-model-fit':
+        app.logger.info(" >> Performing a regressor fit...")
+        regressor_model_fit(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action regressor-fit executed... \n'
         return make_response(msg, 201)
-    elif action_name == 'csv-model-predict':
-        app.logger.info(" >> Performing a csv predict...")
-        csv_model_predict(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action csv-predict executed... \n'
+    elif action_name == 'regressor-model-predict':
+        app.logger.info(" >> Performing a regressor predict...")
+        regressor_model_predict(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action regressor-predict executed... \n'
         return make_response(msg, 201)
     else:
         app.logger.error("ERROR: passed action is not valid")

From 5b168cd68c3a978a4d4e1c2a008185122db6b1ef Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 13:13:51 +0100
Subject: [PATCH 016/111] change naming and some formatting

---
 src/emhass/command_line.py               | 1019 +++++++++++-------
 src/emhass/machine_learning_regressor.py |  285 +++--
 src/emhass/retrieve_hass.py              |  372 +++++--
 src/emhass/utils.py                      | 1238 ++++++++++++++--------
 4 files changed, 1882 insertions(+), 1032 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 17be0098..b4a9050c 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -8,14 +8,15 @@
 import json
 import copy
 import pickle
-import time
-import numpy as np
-import pandas as pd
 from datetime import datetime, timezone
 from typing import Optional, Tuple
+from importlib.metadata import version
+import numpy as np
+import pandas as pd
+
 from distutils.util import strtobool
 
-from importlib.metadata import version
+
 from emhass.retrieve_hass import RetrieveHass
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
@@ -24,12 +25,19 @@
 from emhass import utils
 
 
-def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, 
-    params: str, runtimeparams: str, set_type: str, logger: logging.Logger,
-    get_data_from_file: Optional[bool] = False) -> dict:
+def set_input_data_dict(
+    config_path: pathlib.Path,
+    base_path: str,
+    costfun: str,
+    params: str,
+    runtimeparams: str,
+    set_type: str,
+    logger: logging.Logger,
+    get_data_from_file: Optional[bool] = False,
+) -> dict:
     """
     Set up some of the data needed for the different actions.
-    
+
     :param config_path: The complete absolute path where the config.yaml file is located
     :type config_path: pathlib.Path
     :param base_path: The parent folder of the config_path
@@ -53,118 +61,196 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
     logger.info("Setting up needed data")
     # Parsing yaml
     retrieve_hass_conf, optim_conf, plant_conf = utils.get_yaml_parse(
-        config_path, use_secrets=not(get_data_from_file), params=params)
+        config_path, use_secrets=not (get_data_from_file), params=params
+    )
     # Treat runtimeparams
     params, retrieve_hass_conf, optim_conf, plant_conf = utils.treat_runtimeparams(
-        runtimeparams, params, retrieve_hass_conf, 
-        optim_conf, plant_conf, set_type, logger)
+        runtimeparams,
+        params,
+        retrieve_hass_conf,
+        optim_conf,
+        plant_conf,
+        set_type,
+        logger,
+    )
     # Define main objects
-    rh = RetrieveHass(retrieve_hass_conf['hass_url'], retrieve_hass_conf['long_lived_token'], 
-                       retrieve_hass_conf['freq'], retrieve_hass_conf['time_zone'], 
-                       params, base_path, logger, get_data_from_file=get_data_from_file)
-    fcst = Forecast(retrieve_hass_conf, optim_conf, plant_conf,
-                    params, base_path, logger, get_data_from_file=get_data_from_file)
-    opt = Optimization(retrieve_hass_conf, optim_conf, plant_conf, 
-                       fcst.var_load_cost, fcst.var_prod_price, 
-                       costfun, base_path, logger)
+    rh = RetrieveHass(
+        retrieve_hass_conf["hass_url"],
+        retrieve_hass_conf["long_lived_token"],
+        retrieve_hass_conf["freq"],
+        retrieve_hass_conf["time_zone"],
+        params,
+        base_path,
+        logger,
+        get_data_from_file=get_data_from_file,
+    )
+    fcst = Forecast(
+        retrieve_hass_conf,
+        optim_conf,
+        plant_conf,
+        params,
+        base_path,
+        logger,
+        get_data_from_file=get_data_from_file,
+    )
+    opt = Optimization(
+        retrieve_hass_conf,
+        optim_conf,
+        plant_conf,
+        fcst.var_load_cost,
+        fcst.var_prod_price,
+        costfun,
+        base_path,
+        logger,
+    )
     # Perform setup based on type of action
     if set_type == "perfect-optim":
         # Retrieve data from hass
         if get_data_from_file:
-            with open(pathlib.Path(base_path) / 'data' / 'test_df_final.pkl', 'rb') as inp:
+            with open(
+                pathlib.Path(base_path) / "data" / "test_df_final.pkl", "rb"
+            ) as inp:
                 rh.df_final, days_list, var_list = pickle.load(inp)
         else:
-            days_list = utils.get_days_list(retrieve_hass_conf['days_to_retrieve'])
-            var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']]
-            if not rh.get_data(days_list, var_list,
-                        minimal_response=False, significant_changes_only=False):
-                return False 
-        if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'],
-                        set_zero_min = retrieve_hass_conf['set_zero_min'], 
-                        var_replace_zero = retrieve_hass_conf['var_replace_zero'], 
-                        var_interp = retrieve_hass_conf['var_interp']):
+            days_list = utils.get_days_list(retrieve_hass_conf["days_to_retrieve"])
+            var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]]
+            if not rh.get_data(
+                days_list,
+                var_list,
+                minimal_response=False,
+                significant_changes_only=False,
+            ):
+                return False
+        if not rh.prepare_data(
+            retrieve_hass_conf["var_load"],
+            load_negative=retrieve_hass_conf["load_negative"],
+            set_zero_min=retrieve_hass_conf["set_zero_min"],
+            var_replace_zero=retrieve_hass_conf["var_replace_zero"],
+            var_interp=retrieve_hass_conf["var_interp"],
+        ):
             return False
         df_input_data = rh.df_final.copy()
         # What we don't need for this type of action
         P_PV_forecast, P_load_forecast, df_input_data_dayahead = None, None, None
     elif set_type == "dayahead-optim":
         # Get PV and load forecasts
-        df_weather = fcst.get_weather_forecast(method=optim_conf['weather_forecast_method'])
+        df_weather = fcst.get_weather_forecast(
+            method=optim_conf["weather_forecast_method"]
+        )
         P_PV_forecast = fcst.get_power_from_weather(df_weather)
         P_load_forecast = fcst.get_load_forecast(method=optim_conf['load_forecast_method'])
         if isinstance(P_load_forecast,bool) and not P_load_forecast:
             logger.error("Unable to get sensor power photovoltaics, or sensor power load no var loads. Check HA sensors and their daily data")
             return False
-        df_input_data_dayahead = pd.DataFrame(np.transpose(np.vstack([P_PV_forecast.values,P_load_forecast.values])),
-                                              index=P_PV_forecast.index,
-                                              columns=['P_PV_forecast', 'P_load_forecast'])
+        df_input_data_dayahead = pd.DataFrame(
+            np.transpose(np.vstack([P_PV_forecast.values, P_load_forecast.values])),
+            index=P_PV_forecast.index,
+            columns=["P_PV_forecast", "P_load_forecast"],
+        )
         df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead)
         params = json.loads(params)
-        if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None:
-            prediction_horizon = params['passed_data']['prediction_horizon']
-            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]]
+        if (
+            "prediction_horizon" in params["passed_data"]
+            and params["passed_data"]["prediction_horizon"] is not None
+        ):
+            prediction_horizon = params["passed_data"]["prediction_horizon"]
+            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[
+                df_input_data_dayahead.index[0] : df_input_data_dayahead.index[
+                    prediction_horizon - 1
+                ]
+            ]
         # What we don't need for this type of action
         df_input_data, days_list = None, None
     elif set_type == "naive-mpc-optim":
         # Retrieve data from hass
         if get_data_from_file:
-            with open(pathlib.Path(base_path) / 'data' / 'test_df_final.pkl', 'rb') as inp:
+            with open(
+                pathlib.Path(base_path) / "data" / "test_df_final.pkl", "rb"
+            ) as inp:
                 rh.df_final, days_list, var_list = pickle.load(inp)
         else:
             days_list = utils.get_days_list(1)
-            var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']]
-            if not rh.get_data(days_list, var_list,
-                        minimal_response=False, significant_changes_only=False):
+            var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]]
+            if not rh.get_data(
+                days_list,
+                var_list,
+                minimal_response=False,
+                significant_changes_only=False,
+            ):
                 return False
-        if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'],
-                        set_zero_min = retrieve_hass_conf['set_zero_min'], 
-                        var_replace_zero = retrieve_hass_conf['var_replace_zero'], 
-                        var_interp = retrieve_hass_conf['var_interp']):
+        if not rh.prepare_data(
+            retrieve_hass_conf["var_load"],
+            load_negative=retrieve_hass_conf["load_negative"],
+            set_zero_min=retrieve_hass_conf["set_zero_min"],
+            var_replace_zero=retrieve_hass_conf["var_replace_zero"],
+            var_interp=retrieve_hass_conf["var_interp"],
+        ):
             return False
         df_input_data = rh.df_final.copy()
         # Get PV and load forecasts
-        df_weather = fcst.get_weather_forecast(method=optim_conf['weather_forecast_method'])
-        P_PV_forecast = fcst.get_power_from_weather(df_weather, set_mix_forecast=True, df_now=df_input_data)
-        P_load_forecast = fcst.get_load_forecast(method=optim_conf['load_forecast_method'], set_mix_forecast=True, df_now=df_input_data)
+        df_weather = fcst.get_weather_forecast(
+            method=optim_conf["weather_forecast_method"]
+        )
+        P_PV_forecast = fcst.get_power_from_weather(
+            df_weather, set_mix_forecast=True, df_now=df_input_data
+        )
+        P_load_forecast = fcst.get_load_forecast(
+            method=optim_conf["load_forecast_method"],
+            set_mix_forecast=True,
+            df_now=df_input_data,
+        )
         df_input_data_dayahead = pd.concat([P_PV_forecast, P_load_forecast], axis=1)
         df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead)
-        df_input_data_dayahead.columns = ['P_PV_forecast', 'P_load_forecast']
+        df_input_data_dayahead.columns = ["P_PV_forecast", "P_load_forecast"]
         params = json.loads(params)
-        if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None:
-            prediction_horizon = params['passed_data']['prediction_horizon']
-            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]]
-    elif set_type == "forecast-model-fit" or set_type == "forecast-model-predict" or set_type == "forecast-model-tune":
+        if (
+            "prediction_horizon" in params["passed_data"]
+            and params["passed_data"]["prediction_horizon"] is not None
+        ):
+            prediction_horizon = params["passed_data"]["prediction_horizon"]
+            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[
+                df_input_data_dayahead.index[0] : df_input_data_dayahead.index[
+                    prediction_horizon - 1
+                ]
+            ]
+    elif (
+        set_type == "forecast-model-fit"
+        or set_type == "forecast-model-predict"
+        or set_type == "forecast-model-tune"
+    ):
         df_input_data_dayahead = None
         P_PV_forecast, P_load_forecast = None, None
         params = json.loads(params)
         # Retrieve data from hass
-        days_to_retrieve = params['passed_data']['days_to_retrieve']
-        model_type = params['passed_data']['model_type']
-        var_model = params['passed_data']['var_model']
+        days_to_retrieve = params["passed_data"]["days_to_retrieve"]
+        model_type = params["passed_data"]["model_type"]
+        var_model = params["passed_data"]["var_model"]
         if get_data_from_file:
             days_list = None
-            filename = 'data_train_'+model_type+'.pkl'
-            data_path = pathlib.Path(base_path) / 'data' / filename
-            with open(data_path, 'rb') as inp:
+            filename = "data_train_" + model_type + ".pkl"
+            data_path = pathlib.Path(base_path) / "data" / filename
+            with open(data_path, "rb") as inp:
                 df_input_data, _ = pickle.load(inp)
-            df_input_data = df_input_data[df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve):]
+            df_input_data = df_input_data[
+                df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve) :
+            ]
         else:
             days_list = utils.get_days_list(days_to_retrieve)
             var_list = [var_model]
             if not rh.get_data(days_list, var_list):
                 return False
             df_input_data = rh.df_final.copy()
- 
+
     elif set_type == "regressor-model-fit":
-        
+
         df_input_data_dayahead = None
         P_PV_forecast, P_load_forecast = None, None
         params = json.loads(params)
         days_list = None
-        csv_file = params['passed_data']['csv_file']
-        independent_variables = params['passed_data']['independent_variables']
-        dependent_variable = params['passed_data']['dependent_variable']
-        timestamp = params['passed_data']['timestamp']
+        csv_file = params["passed_data"]["csv_file"]
+        features = params["passed_data"]["features"]
+        target = params["passed_data"]["target"]
+        timestamp = params["passed_data"]["timestamp"]
         filename_path = pathlib.Path(base_path) / csv_file
         if filename_path.is_file():
             df_input_data = pd.read_csv(filename_path, parse_dates=True)
@@ -173,8 +259,8 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
             logger.error("The cvs file was not found.")
             raise ValueError("The CSV file " + csv_file + " was not found.")
         required_columns = []
-        required_columns.extend(independent_variables)
-        required_columns.append(dependent_variable)
+        required_columns.extend(features)
+        required_columns.append(target)
         if timestamp is not None:
             required_columns.append(timestamp)
 
@@ -188,39 +274,46 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
         params = json.loads(params)
-       
+
     elif set_type == "publish-data":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
     else:
-        logger.error("The passed action argument and hence the set_type parameter for setup is not valid")
+        logger.error(
+            "The passed action argument and hence the set_type parameter for setup is not valid"
+        )
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
 
     # The input data dictionnary to return
     input_data_dict = {
-        'root': base_path,
-        'retrieve_hass_conf': retrieve_hass_conf,
-        'rh': rh,
-        'opt': opt,
-        'fcst': fcst,
-        'df_input_data': df_input_data,
-        'df_input_data_dayahead': df_input_data_dayahead,
-        'P_PV_forecast': P_PV_forecast,
-        'P_load_forecast': P_load_forecast,
-        'costfun': costfun,
-        'params': params,
-        'days_list': days_list
+        "root": base_path,
+        "retrieve_hass_conf": retrieve_hass_conf,
+        "rh": rh,
+        "opt": opt,
+        "fcst": fcst,
+        "df_input_data": df_input_data,
+        "df_input_data_dayahead": df_input_data_dayahead,
+        "P_PV_forecast": P_PV_forecast,
+        "P_load_forecast": P_load_forecast,
+        "costfun": costfun,
+        "params": params,
+        "days_list": days_list,
     }
     return input_data_dict
-    
-def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = True, debug: Optional[bool] = False) -> pd.DataFrame:
+
+
+def perfect_forecast_optim(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = True,
+    debug: Optional[bool] = False,
+) -> pd.DataFrame:
     """
     Perform a call to the perfect forecast optimization routine.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -235,26 +328,38 @@ def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger,
     """
     logger.info("Performing perfect forecast optimization")
     # Load cost and prod price forecast
-    df_input_data = input_data_dict['fcst'].get_load_cost_forecast(
-        input_data_dict['df_input_data'], 
-        method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method'])
-    df_input_data = input_data_dict['fcst'].get_prod_price_forecast(
-        df_input_data, method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method'])
-    opt_res = input_data_dict['opt'].perform_perfect_forecast_optim(df_input_data, input_data_dict['days_list'])
+    df_input_data = input_data_dict["fcst"].get_load_cost_forecast(
+        input_data_dict["df_input_data"],
+        method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"],
+    )
+    df_input_data = input_data_dict["fcst"].get_prod_price_forecast(
+        df_input_data,
+        method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"],
+    )
+    opt_res = input_data_dict["opt"].perform_perfect_forecast_optim(
+        df_input_data, input_data_dict["days_list"]
+    )
     # Save CSV file for analysis
     if save_data_to_file:
-        filename = 'opt_res_perfect_optim_'+input_data_dict['costfun']+'.csv'
-    else: # Just save the latest optimization results
-        filename = 'opt_res_latest.csv'
+        filename = "opt_res_perfect_optim_" + input_data_dict["costfun"] + ".csv"
+    else:  # Just save the latest optimization results
+        filename = "opt_res_latest.csv"
     if not debug:
-        opt_res.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp')
+        opt_res.to_csv(
+            pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp"
+        )
     return opt_res
-    
-def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame:
+
+
+def dayahead_forecast_optim(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = False,
+    debug: Optional[bool] = False,
+) -> pd.DataFrame:
     """
     Perform a call to the day-ahead optimization routine.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -269,29 +374,43 @@ def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger,
     """
     logger.info("Performing day-ahead forecast optimization")
     # Load cost and prod price forecast
-    df_input_data_dayahead = input_data_dict['fcst'].get_load_cost_forecast(
-        input_data_dict['df_input_data_dayahead'],
-        method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method'])
-    df_input_data_dayahead = input_data_dict['fcst'].get_prod_price_forecast(
-        df_input_data_dayahead, 
-        method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method'])
-    opt_res_dayahead = input_data_dict['opt'].perform_dayahead_forecast_optim(
-        df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast'])
+    df_input_data_dayahead = input_data_dict["fcst"].get_load_cost_forecast(
+        input_data_dict["df_input_data_dayahead"],
+        method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"],
+    )
+    df_input_data_dayahead = input_data_dict["fcst"].get_prod_price_forecast(
+        df_input_data_dayahead,
+        method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"],
+    )
+    opt_res_dayahead = input_data_dict["opt"].perform_dayahead_forecast_optim(
+        df_input_data_dayahead,
+        input_data_dict["P_PV_forecast"],
+        input_data_dict["P_load_forecast"],
+    )
     # Save CSV file for publish_data
     if save_data_to_file:
-        today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
-        filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv'
-    else: # Just save the latest optimization results
-        filename = 'opt_res_latest.csv'
+        today = datetime.now(timezone.utc).replace(
+            hour=0, minute=0, second=0, microsecond=0
+        )
+        filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv"
+    else:  # Just save the latest optimization results
+        filename = "opt_res_latest.csv"
     if not debug:
-        opt_res_dayahead.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp')
+        opt_res_dayahead.to_csv(
+            pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp"
+        )
     return opt_res_dayahead
 
-def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame:
+
+def naive_mpc_optim(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = False,
+    debug: Optional[bool] = False,
+) -> pd.DataFrame:
     """
     Perform a call to the naive Model Predictive Controller optimization routine.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -306,33 +425,50 @@ def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger,
     """
     logger.info("Performing naive MPC optimization")
     # Load cost and prod price forecast
-    df_input_data_dayahead = input_data_dict['fcst'].get_load_cost_forecast(
-        input_data_dict['df_input_data_dayahead'],
-        method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method'])
-    df_input_data_dayahead = input_data_dict['fcst'].get_prod_price_forecast(
-        df_input_data_dayahead, method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method'])
+    df_input_data_dayahead = input_data_dict["fcst"].get_load_cost_forecast(
+        input_data_dict["df_input_data_dayahead"],
+        method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"],
+    )
+    df_input_data_dayahead = input_data_dict["fcst"].get_prod_price_forecast(
+        df_input_data_dayahead,
+        method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"],
+    )
     # The specifics params for the MPC at runtime
-    prediction_horizon = input_data_dict['params']['passed_data']['prediction_horizon']
-    soc_init = input_data_dict['params']['passed_data']['soc_init']
-    soc_final = input_data_dict['params']['passed_data']['soc_final']
-    def_total_hours = input_data_dict['params']['passed_data']['def_total_hours']
-    def_start_timestep = input_data_dict['params']['passed_data']['def_start_timestep']
-    def_end_timestep = input_data_dict['params']['passed_data']['def_end_timestep']
-    opt_res_naive_mpc = input_data_dict['opt'].perform_naive_mpc_optim(
-        df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast'],
-        prediction_horizon, soc_init, soc_final, def_total_hours, def_start_timestep, def_end_timestep)
+    prediction_horizon = input_data_dict["params"]["passed_data"]["prediction_horizon"]
+    soc_init = input_data_dict["params"]["passed_data"]["soc_init"]
+    soc_final = input_data_dict["params"]["passed_data"]["soc_final"]
+    def_total_hours = input_data_dict["params"]["passed_data"]["def_total_hours"]
+    def_start_timestep = input_data_dict["params"]["passed_data"]["def_start_timestep"]
+    def_end_timestep = input_data_dict["params"]["passed_data"]["def_end_timestep"]
+    opt_res_naive_mpc = input_data_dict["opt"].perform_naive_mpc_optim(
+        df_input_data_dayahead,
+        input_data_dict["P_PV_forecast"],
+        input_data_dict["P_load_forecast"],
+        prediction_horizon,
+        soc_init,
+        soc_final,
+        def_total_hours,
+        def_start_timestep,
+        def_end_timestep,
+    )
     # Save CSV file for publish_data
     if save_data_to_file:
-        today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
-        filename = 'opt_res_naive_mpc_'+today.strftime("%Y_%m_%d")+'.csv'
-    else: # Just save the latest optimization results
-        filename = 'opt_res_latest.csv'
+        today = datetime.now(timezone.utc).replace(
+            hour=0, minute=0, second=0, microsecond=0
+        )
+        filename = "opt_res_naive_mpc_" + today.strftime("%Y_%m_%d") + ".csv"
+    else:  # Just save the latest optimization results
+        filename = "opt_res_latest.csv"
     if not debug:
-        opt_res_naive_mpc.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp')
+        opt_res_naive_mpc.to_csv(
+            pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp"
+        )
     return opt_res_naive_mpc
 
-def forecast_model_fit(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]:
+
+def forecast_model_fit(
+    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -344,29 +480,37 @@ def forecast_model_fit(input_data_dict: dict, logger: logging.Logger,
     :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object
     :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster]
     """
-    data = copy.deepcopy(input_data_dict['df_input_data'])
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    var_model = input_data_dict['params']['passed_data']['var_model']
-    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    num_lags = input_data_dict['params']['passed_data']['num_lags']
-    split_date_delta = input_data_dict['params']['passed_data']['split_date_delta']
-    perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
-    root = input_data_dict['root']
+    data = copy.deepcopy(input_data_dict["df_input_data"])
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    var_model = input_data_dict["params"]["passed_data"]["var_model"]
+    sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"]
+    num_lags = input_data_dict["params"]["passed_data"]["num_lags"]
+    split_date_delta = input_data_dict["params"]["passed_data"]["split_date_delta"]
+    perform_backtest = input_data_dict["params"]["passed_data"]["perform_backtest"]
+    root = input_data_dict["root"]
     # The ML forecaster object
-    mlf = MLForecaster(data, model_type, var_model, sklearn_model, num_lags, root, logger)
+    mlf = MLForecaster(
+        data, model_type, var_model, sklearn_model, num_lags, root, logger
+    )
     # Fit the ML model
-    df_pred, df_pred_backtest = mlf.fit(split_date_delta=split_date_delta, 
-                                        perform_backtest=perform_backtest)
+    df_pred, df_pred_backtest = mlf.fit(
+        split_date_delta=split_date_delta, perform_backtest=perform_backtest
+    )
     # Save model
     if not debug:
-        filename = model_type+'_mlf.pkl'
-        with open(pathlib.Path(root) / filename, 'wb') as outp:
+        filename = model_type + "_mlf.pkl"
+        with open(pathlib.Path(root) / filename, "wb") as outp:
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
     return df_pred, df_pred_backtest, mlf
 
-def forecast_model_predict(input_data_dict: dict, logger: logging.Logger,
-    use_last_window: Optional[bool] = True, debug: Optional[bool] = False,
-    mlf: Optional[MLForecaster] = None) -> pd.DataFrame:
+
+def forecast_model_predict(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    use_last_window: Optional[bool] = True,
+    debug: Optional[bool] = False,
+    mlf: Optional[MLForecaster] = None,
+) -> pd.DataFrame:
     r"""Perform a forecast model predict using a previously trained skforecast model.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -388,52 +532,79 @@ def forecast_model_predict(input_data_dict: dict, logger: logging.Logger,
     :rtype: pd.DataFrame
     """
     # Load model
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    root = input_data_dict['root']
-    filename = model_type+'_mlf.pkl'
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    root = input_data_dict["root"]
+    filename = model_type + "_mlf.pkl"
     filename_path = pathlib.Path(root) / filename
     if not debug:
         if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
+            with open(filename_path, "rb") as inp:
                 mlf = pickle.load(inp)
         else:
-            logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
+            logger.error(
+                "The ML forecaster file was not found, please run a model fit method before this predict method"
+            )
             return
     # Make predictions
     if use_last_window:
-        data_last_window = copy.deepcopy(input_data_dict['df_input_data'])
+        data_last_window = copy.deepcopy(input_data_dict["df_input_data"])
     else:
         data_last_window = None
     predictions = mlf.predict(data_last_window)
     # Publish data to a Home Assistant sensor
-    model_predict_publish = input_data_dict['params']['passed_data']['model_predict_publish']
-    model_predict_entity_id = input_data_dict['params']['passed_data']['model_predict_entity_id']
-    model_predict_unit_of_measurement = input_data_dict['params']['passed_data']['model_predict_unit_of_measurement']
-    model_predict_friendly_name = input_data_dict['params']['passed_data']['model_predict_friendly_name']
-    publish_prefix = input_data_dict['params']['passed_data']['publish_prefix']
+    model_predict_publish = input_data_dict["params"]["passed_data"][
+        "model_predict_publish"
+    ]
+    model_predict_entity_id = input_data_dict["params"]["passed_data"][
+        "model_predict_entity_id"
+    ]
+    model_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][
+        "model_predict_unit_of_measurement"
+    ]
+    model_predict_friendly_name = input_data_dict["params"]["passed_data"][
+        "model_predict_friendly_name"
+    ]
+    publish_prefix = input_data_dict["params"]["passed_data"]["publish_prefix"]
     if model_predict_publish is True:
         # Estimate the current index
-        now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0)
-        if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest':
-            idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0]
-        elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first':
-            idx_closest = predictions.index.get_indexer([now_precise], method='ffill')[0]
-        elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last':
-            idx_closest = predictions.index.get_indexer([now_precise], method='bfill')[0]
+        now_precise = datetime.now(
+            input_data_dict["retrieve_hass_conf"]["time_zone"]
+        ).replace(second=0, microsecond=0)
+        if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest":
+            idx_closest = predictions.index.get_indexer(
+                [now_precise], method="nearest"
+            )[0]
+        elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first":
+            idx_closest = predictions.index.get_indexer([now_precise], method="ffill")[
+                0
+            ]
+        elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last":
+            idx_closest = predictions.index.get_indexer([now_precise], method="bfill")[
+                0
+            ]
         if idx_closest == -1:
-            idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0]
+            idx_closest = predictions.index.get_indexer(
+                [now_precise], method="nearest"
+            )[0]
         # Publish Load forecast
-        input_data_dict['rh'].post_data(predictions, idx_closest, 
-                                        model_predict_entity_id,
-                                        model_predict_unit_of_measurement, 
-                                        model_predict_friendly_name,
-                                        type_var = 'mlforecaster',
-                                        publish_prefix=publish_prefix)
+        input_data_dict["rh"].post_data(
+            predictions,
+            idx_closest,
+            model_predict_entity_id,
+            model_predict_unit_of_measurement,
+            model_predict_friendly_name,
+            type_var="mlforecaster",
+            publish_prefix=publish_prefix,
+        )
     return predictions
 
-def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False, mlf: Optional[MLForecaster] = None
-    ) -> Tuple[pd.DataFrame, MLForecaster]:
+
+def forecast_model_tune(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    debug: Optional[bool] = False,
+    mlf: Optional[MLForecaster] = None,
+) -> Tuple[pd.DataFrame, MLForecaster]:
     """Tune a forecast model hyperparameters using bayesian optimization.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -449,28 +620,32 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
     :rtype: pd.DataFrame
     """
     # Load model
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    root = input_data_dict['root']
-    filename = model_type+'_mlf.pkl'
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    root = input_data_dict["root"]
+    filename = model_type + "_mlf.pkl"
     filename_path = pathlib.Path(root) / filename
     if not debug:
         if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
+            with open(filename_path, "rb") as inp:
                 mlf = pickle.load(inp)
         else:
-            logger.error("The ML forecaster file was not found, please run a model fit method before this tune method")
+            logger.error(
+                "The ML forecaster file was not found, please run a model fit method before this tune method"
+            )
             return None, None
     # Tune the model
     df_pred_optim = mlf.tune(debug=debug)
     # Save model
     if not debug:
-        filename = model_type+'_mlf.pkl'
-        with open(pathlib.Path(root) / filename, 'wb') as outp:
+        filename = model_type + "_mlf.pkl"
+        with open(pathlib.Path(root) / filename, "wb") as outp:
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
     return df_pred_optim, mlf
 
-def regressor_model_fit(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> None:
+
+def regressor_model_fit(
+    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+) -> None:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -480,26 +655,30 @@ def regressor_model_fit(input_data_dict: dict, logger: logging.Logger,
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
     """
-    data = copy.deepcopy(input_data_dict['df_input_data'])
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
-    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
-    timestamp = input_data_dict['params']['passed_data']['timestamp']
-    date_features = input_data_dict['params']['passed_data']['date_features']
-    root = input_data_dict['root']
+    data = copy.deepcopy(input_data_dict["df_input_data"])
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"]
+    features = input_data_dict["params"]["passed_data"]["features"]
+    target = input_data_dict["params"]["passed_data"]["target"]
+    timestamp = input_data_dict["params"]["passed_data"]["timestamp"]
+    date_features = input_data_dict["params"]["passed_data"]["date_features"]
+    root = input_data_dict["root"]
     # The MLRegressor object
-    mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
+    mlr = MLRegressor(
+        data, model_type, sklearn_model, features, target, timestamp, logger
+    )
     # Fit the ML model
     mlr.fit(date_features=date_features)
     # Save model
     if not debug:
-        filename = model_type+'_mlr.pkl'
-        with open(pathlib.Path(root) / filename, 'wb') as outp:
+        filename = model_type + "_mlr.pkl"
+        with open(pathlib.Path(root) / filename, "wb") as outp:
             pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL)
 
-def regressor_model_predict(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> None:
+
+def regressor_model_predict(
+    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+) -> None:
     """Perform a prediction from csv file.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -509,38 +688,53 @@ def regressor_model_predict(input_data_dict: dict, logger: logging.Logger,
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
     """
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    root = input_data_dict['root']
-    filename = model_type+'_mlr.pkl'
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    root = input_data_dict["root"]
+    filename = model_type + "_mlr.pkl"
     filename_path = pathlib.Path(root) / filename
     if not debug:
         if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
+            with open(filename_path, "rb") as inp:
                 mlr = pickle.load(inp)
         else:
-            logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
+            logger.error(
+                "The ML forecaster file was not found, please run a model fit method before this predict method"
+            )
             return
-    new_values = input_data_dict['params']['passed_data']['new_values']
+    new_values = input_data_dict["params"]["passed_data"]["new_values"]
     # Predict from csv file
     prediction = mlr.predict(new_values)
 
-    mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id']
-    mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement']
-    mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name']
+    mlr_predict_entity_id = input_data_dict["params"]["passed_data"][
+        "mlr_predict_entity_id"
+    ]
+    mlr_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][
+        "mlr_predict_unit_of_measurement"
+    ]
+    mlr_predict_friendly_name = input_data_dict["params"]["passed_data"][
+        "mlr_predict_friendly_name"
+    ]
     # Publish prediction
     idx = 0
-    input_data_dict['rh'].post_data(prediction, idx,
-                                    mlr_predict_entity_id,
-                                    mlr_predict_unit_of_measurement, 
-                                    mlr_predict_friendly_name,
-                                    type_var = 'mlregressor')
-
-def publish_data(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = False, 
-    opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame:
+    input_data_dict["rh"].post_data(
+        prediction,
+        idx,
+        mlr_predict_entity_id,
+        mlr_predict_unit_of_measurement,
+        mlr_predict_friendly_name,
+        type_var="mlregressor",
+    )
+
+
+def publish_data(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = False,
+    opt_res_latest: Optional[pd.DataFrame] = None,
+) -> pd.DataFrame:
     """
     Publish the data obtained from the optimization results.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -554,166 +748,245 @@ def publish_data(input_data_dict: dict, logger: logging.Logger,
     logger.info("Publishing data to HASS instance")
     # Check if a day ahead optimization has been performed (read CSV file)
     if save_data_to_file:
-        today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
-        filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv'
+        today = datetime.now(timezone.utc).replace(
+            hour=0, minute=0, second=0, microsecond=0
+        )
+        filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv"
     else:
-        filename = 'opt_res_latest.csv'
+        filename = "opt_res_latest.csv"
     if opt_res_latest is None:
-        if not os.path.isfile(pathlib.Path(input_data_dict['root']) / filename):
+        if not os.path.isfile(pathlib.Path(input_data_dict["root"]) / filename):
             logger.error("File not found error, run an optimization task first.")
             return
         else:
-            opt_res_latest = pd.read_csv(pathlib.Path(input_data_dict['root']) / filename, index_col='timestamp')
+            opt_res_latest = pd.read_csv(
+                pathlib.Path(input_data_dict["root"]) / filename, index_col="timestamp"
+            )
             opt_res_latest.index = pd.to_datetime(opt_res_latest.index)
-            opt_res_latest.index.freq = input_data_dict['retrieve_hass_conf']['freq']
+            opt_res_latest.index.freq = input_data_dict["retrieve_hass_conf"]["freq"]
     # Estimate the current index
-    now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0)
-    if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest':
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0]
-    elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first':
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='ffill')[0]
-    elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last':
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='bfill')[0]
+    now_precise = datetime.now(
+        input_data_dict["retrieve_hass_conf"]["time_zone"]
+    ).replace(second=0, microsecond=0)
+    if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest":
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[
+            0
+        ]
+    elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first":
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="ffill")[0]
+    elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last":
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="bfill")[0]
     if idx_closest == -1:
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0]
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[
+            0
+        ]
     # Publish the data
-    params = json.loads(input_data_dict['params'])
-    publish_prefix = params['passed_data']['publish_prefix']
+    params = json.loads(input_data_dict["params"])
+    publish_prefix = params["passed_data"]["publish_prefix"]
     # Publish PV forecast
-    custom_pv_forecast_id = params['passed_data']['custom_pv_forecast_id']
-    input_data_dict['rh'].post_data(opt_res_latest['P_PV'], idx_closest, 
-                                    custom_pv_forecast_id["entity_id"], 
-                                    custom_pv_forecast_id["unit_of_measurement"],
-                                    custom_pv_forecast_id["friendly_name"],
-                                    type_var = 'power',
-                                    publish_prefix = publish_prefix)
+    custom_pv_forecast_id = params["passed_data"]["custom_pv_forecast_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["P_PV"],
+        idx_closest,
+        custom_pv_forecast_id["entity_id"],
+        custom_pv_forecast_id["unit_of_measurement"],
+        custom_pv_forecast_id["friendly_name"],
+        type_var="power",
+        publish_prefix=publish_prefix,
+    )
     # Publish Load forecast
-    custom_load_forecast_id = params['passed_data']['custom_load_forecast_id']
-    input_data_dict['rh'].post_data(opt_res_latest['P_Load'], idx_closest, 
-                                    custom_load_forecast_id["entity_id"], 
-                                    custom_load_forecast_id["unit_of_measurement"],
-                                    custom_load_forecast_id["friendly_name"],
-                                    type_var = 'power',
-                                    publish_prefix = publish_prefix)
-    cols_published = ['P_PV', 'P_Load']
+    custom_load_forecast_id = params["passed_data"]["custom_load_forecast_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["P_Load"],
+        idx_closest,
+        custom_load_forecast_id["entity_id"],
+        custom_load_forecast_id["unit_of_measurement"],
+        custom_load_forecast_id["friendly_name"],
+        type_var="power",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = ["P_PV", "P_Load"]
     # Publish deferrable loads
-    custom_deferrable_forecast_id = params['passed_data']['custom_deferrable_forecast_id']
-    for k in range(input_data_dict['opt'].optim_conf['num_def_loads']):
+    custom_deferrable_forecast_id = params["passed_data"][
+        "custom_deferrable_forecast_id"
+    ]
+    for k in range(input_data_dict["opt"].optim_conf["num_def_loads"]):
         if "P_deferrable{}".format(k) not in opt_res_latest.columns:
-            logger.error("P_deferrable{}".format(k)+" was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.")
+            logger.error(
+                "P_deferrable{}".format(k)
+                + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution."
+            )
         else:
-            input_data_dict['rh'].post_data(opt_res_latest["P_deferrable{}".format(k)], idx_closest, 
-                                            custom_deferrable_forecast_id[k]["entity_id"], 
-                                            custom_deferrable_forecast_id[k]["unit_of_measurement"],
-                                            custom_deferrable_forecast_id[k]["friendly_name"],
-                                            type_var = 'deferrable',
-                                            publish_prefix = publish_prefix)
-            cols_published = cols_published+["P_deferrable{}".format(k)]
+            input_data_dict["rh"].post_data(
+                opt_res_latest["P_deferrable{}".format(k)],
+                idx_closest,
+                custom_deferrable_forecast_id[k]["entity_id"],
+                custom_deferrable_forecast_id[k]["unit_of_measurement"],
+                custom_deferrable_forecast_id[k]["friendly_name"],
+                type_var="deferrable",
+                publish_prefix=publish_prefix,
+            )
+            cols_published = cols_published + ["P_deferrable{}".format(k)]
     # Publish battery power
-    if input_data_dict['opt'].optim_conf['set_use_battery']:
-        if 'P_batt' not in opt_res_latest.columns:
-            logger.error("P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.")
+    if input_data_dict["opt"].optim_conf["set_use_battery"]:
+        if "P_batt" not in opt_res_latest.columns:
+            logger.error(
+                "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution."
+            )
         else:
-            custom_batt_forecast_id = params['passed_data']['custom_batt_forecast_id']
-            input_data_dict['rh'].post_data(opt_res_latest['P_batt'], idx_closest,
-                                            custom_batt_forecast_id["entity_id"], 
-                                            custom_batt_forecast_id["unit_of_measurement"],
-                                            custom_batt_forecast_id["friendly_name"],
-                                            type_var = 'batt',
-                                            publish_prefix = publish_prefix)
-            cols_published = cols_published+["P_batt"]
-            custom_batt_soc_forecast_id = params['passed_data']['custom_batt_soc_forecast_id']
-            input_data_dict['rh'].post_data(opt_res_latest['SOC_opt']*100, idx_closest,
-                                            custom_batt_soc_forecast_id["entity_id"], 
-                                            custom_batt_soc_forecast_id["unit_of_measurement"],
-                                            custom_batt_soc_forecast_id["friendly_name"],
-                                            type_var = 'SOC',
-                                            publish_prefix = publish_prefix)
-            cols_published = cols_published+["SOC_opt"]
+            custom_batt_forecast_id = params["passed_data"]["custom_batt_forecast_id"]
+            input_data_dict["rh"].post_data(
+                opt_res_latest["P_batt"],
+                idx_closest,
+                custom_batt_forecast_id["entity_id"],
+                custom_batt_forecast_id["unit_of_measurement"],
+                custom_batt_forecast_id["friendly_name"],
+                type_var="batt",
+                publish_prefix=publish_prefix,
+            )
+            cols_published = cols_published + ["P_batt"]
+            custom_batt_soc_forecast_id = params["passed_data"][
+                "custom_batt_soc_forecast_id"
+            ]
+            input_data_dict["rh"].post_data(
+                opt_res_latest["SOC_opt"] * 100,
+                idx_closest,
+                custom_batt_soc_forecast_id["entity_id"],
+                custom_batt_soc_forecast_id["unit_of_measurement"],
+                custom_batt_soc_forecast_id["friendly_name"],
+                type_var="SOC",
+                publish_prefix=publish_prefix,
+            )
+            cols_published = cols_published + ["SOC_opt"]
     # Publish grid power
-    custom_grid_forecast_id = params['passed_data']['custom_grid_forecast_id']
-    input_data_dict['rh'].post_data(opt_res_latest['P_grid'], idx_closest, 
-                                    custom_grid_forecast_id["entity_id"], 
-                                    custom_grid_forecast_id["unit_of_measurement"],
-                                    custom_grid_forecast_id["friendly_name"],
-                                    type_var = 'power',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["P_grid"]
+    custom_grid_forecast_id = params["passed_data"]["custom_grid_forecast_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["P_grid"],
+        idx_closest,
+        custom_grid_forecast_id["entity_id"],
+        custom_grid_forecast_id["unit_of_measurement"],
+        custom_grid_forecast_id["friendly_name"],
+        type_var="power",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["P_grid"]
     # Publish total value of cost function
-    custom_cost_fun_id = params['passed_data']['custom_cost_fun_id']
-    col_cost_fun = [i for i in opt_res_latest.columns if 'cost_fun_' in i]
-    input_data_dict['rh'].post_data(opt_res_latest[col_cost_fun], idx_closest, 
-                                    custom_cost_fun_id["entity_id"], 
-                                    custom_cost_fun_id["unit_of_measurement"],
-                                    custom_cost_fun_id["friendly_name"],
-                                    type_var = 'cost_fun',
-                                    publish_prefix = publish_prefix)
+    custom_cost_fun_id = params["passed_data"]["custom_cost_fun_id"]
+    col_cost_fun = [i for i in opt_res_latest.columns if "cost_fun_" in i]
+    input_data_dict["rh"].post_data(
+        opt_res_latest[col_cost_fun],
+        idx_closest,
+        custom_cost_fun_id["entity_id"],
+        custom_cost_fun_id["unit_of_measurement"],
+        custom_cost_fun_id["friendly_name"],
+        type_var="cost_fun",
+        publish_prefix=publish_prefix,
+    )
     # Publish the optimization status
-    custom_cost_fun_id = params['passed_data']['custom_optim_status_id']
+    custom_cost_fun_id = params["passed_data"]["custom_optim_status_id"]
     if "optim_status" not in opt_res_latest:
-        opt_res_latest["optim_status"] = 'Optimal'
-        logger.warning("no optim_status in opt_res_latest, run an optimization task first")
-    input_data_dict['rh'].post_data(opt_res_latest['optim_status'], idx_closest, 
-                                    custom_cost_fun_id["entity_id"], 
-                                    custom_cost_fun_id["unit_of_measurement"],
-                                    custom_cost_fun_id["friendly_name"],
-                                    type_var = 'optim_status',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["optim_status"]
+        opt_res_latest["optim_status"] = "Optimal"
+        logger.warning(
+            "no optim_status in opt_res_latest, run an optimization task first"
+        )
+    input_data_dict["rh"].post_data(
+        opt_res_latest["optim_status"],
+        idx_closest,
+        custom_cost_fun_id["entity_id"],
+        custom_cost_fun_id["unit_of_measurement"],
+        custom_cost_fun_id["friendly_name"],
+        type_var="optim_status",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["optim_status"]
     # Publish unit_load_cost
-    custom_unit_load_cost_id = params['passed_data']['custom_unit_load_cost_id']
-    input_data_dict['rh'].post_data(opt_res_latest['unit_load_cost'], idx_closest, 
-                                    custom_unit_load_cost_id["entity_id"], 
-                                    custom_unit_load_cost_id["unit_of_measurement"],
-                                    custom_unit_load_cost_id["friendly_name"],
-                                    type_var = 'unit_load_cost',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["unit_load_cost"]
+    custom_unit_load_cost_id = params["passed_data"]["custom_unit_load_cost_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["unit_load_cost"],
+        idx_closest,
+        custom_unit_load_cost_id["entity_id"],
+        custom_unit_load_cost_id["unit_of_measurement"],
+        custom_unit_load_cost_id["friendly_name"],
+        type_var="unit_load_cost",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["unit_load_cost"]
     # Publish unit_prod_price
-    custom_unit_prod_price_id = params['passed_data']['custom_unit_prod_price_id']
-    input_data_dict['rh'].post_data(opt_res_latest['unit_prod_price'], idx_closest, 
-                                    custom_unit_prod_price_id["entity_id"], 
-                                    custom_unit_prod_price_id["unit_of_measurement"],
-                                    custom_unit_prod_price_id["friendly_name"],
-                                    type_var = 'unit_prod_price',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["unit_prod_price"]
+    custom_unit_prod_price_id = params["passed_data"]["custom_unit_prod_price_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["unit_prod_price"],
+        idx_closest,
+        custom_unit_prod_price_id["entity_id"],
+        custom_unit_prod_price_id["unit_of_measurement"],
+        custom_unit_prod_price_id["friendly_name"],
+        type_var="unit_prod_price",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["unit_prod_price"]
     # Create a DF resuming what has been published
     opt_res = opt_res_latest[cols_published].loc[[opt_res_latest.index[idx_closest]]]
     return opt_res
-    
-        
+
+
 def main():
     r"""Define the main command line entry function.
 
     This function may take several arguments as inputs. You can type `emhass --help` to see the list of options:
-    
+
     - action: Set the desired action, options are: perfect-optim, dayahead-optim,
       naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune
-      
+
     - config: Define path to the config.yaml file
-    
+
     - costfun: Define the type of cost function, options are: profit, cost, self-consumption
-    
+
     - log2file: Define if we should log to a file or not
-    
+
     - params: Configuration parameters passed from data/options.json if using the add-on
-    
+
     - runtimeparams: Pass runtime optimization parameters as dictionnary
-    
+
     - debug: Use True for testing purposes
-    
+
     """
     # Parsing arguments
     parser = argparse.ArgumentParser()
-    parser.add_argument('--action', type=str, help='Set the desired action, options are: perfect-optim, dayahead-optim,\
-        naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune')
-    parser.add_argument('--config', type=str, help='Define path to the config.yaml file')
-    parser.add_argument('--costfun', type=str, default='profit', help='Define the type of cost function, options are: profit, cost, self-consumption')
-    parser.add_argument('--log2file', type=strtobool, default='False', help='Define if we should log to a file or not')
-    parser.add_argument('--params', type=str, default=None, help='Configuration parameters passed from data/options.json')
-    parser.add_argument('--runtimeparams', type=str, default=None, help='Pass runtime optimization parameters as dictionnary')
-    parser.add_argument('--debug', type=strtobool, default='False', help='Use True for testing purposes')
+    parser.add_argument(
+        "--action",
+        type=str,
+        help="Set the desired action, options are: perfect-optim, dayahead-optim,\
+        naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune",
+    )
+    parser.add_argument(
+        "--config", type=str, help="Define path to the config.yaml file"
+    )
+    parser.add_argument(
+        "--costfun",
+        type=str,
+        default="profit",
+        help="Define the type of cost function, options are: profit, cost, self-consumption",
+    )
+    parser.add_argument(
+        "--log2file",
+        type=strtobool,
+        default="False",
+        help="Define if we should log to a file or not",
+    )
+    parser.add_argument(
+        "--params",
+        type=str,
+        default=None,
+        help="Configuration parameters passed from data/options.json",
+    )
+    parser.add_argument(
+        "--runtimeparams",
+        type=str,
+        default=None,
+        help="Pass runtime optimization parameters as dictionnary",
+    )
+    parser.add_argument(
+        "--debug", type=strtobool, default="False", help="Use True for testing purposes"
+    )
     args = parser.parse_args()
     # The path to the configuration files
     config_path = pathlib.Path(args.config)
@@ -722,39 +995,56 @@ def main():
     logger, ch = utils.get_logger(__name__, base_path, save_to_file=bool(args.log2file))
     # Additionnal argument
     try:
-        parser.add_argument('--version', action='version', version='%(prog)s '+version('emhass'))
+        parser.add_argument(
+            "--version", action="version", version="%(prog)s " + version("emhass")
+        )
         args = parser.parse_args()
     except Exception:
-        logger.info("Version not found for emhass package. Or importlib exited with PackageNotFoundError.")
+        logger.info(
+            "Version not found for emhass package. Or importlib exited with PackageNotFoundError."
+        )
     # Setup parameters
-    input_data_dict = set_input_data_dict(config_path, base_path, 
-                                          args.costfun, args.params, args.runtimeparams, args.action, 
-                                          logger, args.debug)
+    input_data_dict = set_input_data_dict(
+        config_path,
+        base_path,
+        args.costfun,
+        args.params,
+        args.runtimeparams,
+        args.action,
+        logger,
+        args.debug,
+    )
     # Perform selected action
-    if args.action == 'perfect-optim':
+    if args.action == "perfect-optim":
         opt_res = perfect_forecast_optim(input_data_dict, logger, debug=args.debug)
-    elif args.action == 'dayahead-optim':
+    elif args.action == "dayahead-optim":
         opt_res = dayahead_forecast_optim(input_data_dict, logger, debug=args.debug)
-    elif args.action == 'naive-mpc-optim':
+    elif args.action == "naive-mpc-optim":
         opt_res = naive_mpc_optim(input_data_dict, logger, debug=args.debug)
-    elif args.action == 'forecast-model-fit':
-        df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug)
+    elif args.action == "forecast-model-fit":
+        df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit(
+            input_data_dict, logger, debug=args.debug
+        )
         opt_res = None
-    elif args.action == 'forecast-model-predict':
+    elif args.action == "forecast-model-predict":
         if args.debug:
             _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug)
         else:
             mlf = None
-        df_pred = forecast_model_predict(input_data_dict, logger, debug=args.debug, mlf=mlf)
+        df_pred = forecast_model_predict(
+            input_data_dict, logger, debug=args.debug, mlf=mlf
+        )
         opt_res = None
-    elif args.action == 'forecast-model-tune':
+    elif args.action == "forecast-model-tune":
         if args.debug:
             _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug)
         else:
             mlf = None
-        df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=args.debug, mlf=mlf)
+        df_pred_optim, mlf = forecast_model_tune(
+            input_data_dict, logger, debug=args.debug, mlf=mlf
+        )
         opt_res = None
-    elif args.action == 'publish-data':
+    elif args.action == "publish-data":
         opt_res = publish_data(input_data_dict, logger)
     else:
         logger.error("The passed action argument is not valid")
@@ -763,15 +1053,20 @@ def main():
     # Flush the logger
     ch.close()
     logger.removeHandler(ch)
-    if args.action == 'perfect-optim' or args.action == 'dayahead-optim' or \
-        args.action == 'naive-mpc-optim' or args.action == 'publish-data':
+    if (
+        args.action == "perfect-optim"
+        or args.action == "dayahead-optim"
+        or args.action == "naive-mpc-optim"
+        or args.action == "publish-data"
+    ):
         return opt_res
-    elif args.action == 'forecast-model-fit':
+    elif args.action == "forecast-model-fit":
         return df_fit_pred, df_fit_pred_backtest, mlf
-    elif args.action == 'forecast-model-predict':
+    elif args.action == "forecast-model-predict":
         return df_pred
-    elif args.action == 'forecast-model-tune':
+    elif args.action == "forecast-model-tune":
         return df_pred_optim, mlf
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index d70df3ec..80ddd74f 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -9,8 +9,12 @@
 
 import pandas as pd
 import numpy as np
-from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor
-from sklearn.metrics import  r2_score
+from sklearn.ensemble import (
+    AdaBoostRegressor,
+    GradientBoostingRegressor,
+    RandomForestRegressor,
+)
+from sklearn.metrics import r2_score
 
 from sklearn.linear_model import Lasso, LinearRegression, Ridge
 from sklearn.model_selection import GridSearchCV, train_test_split
@@ -20,21 +24,31 @@
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
+
 class MLRegressor:
     r"""
     A forecaster class using machine learning models.
-    
+
     This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
-    
+
     It exposes two main methods:
-    
+
     - `fit`: to train a model with the passed data.
-    
+
     - `predict`: to obtain a forecast from a pre-trained model.
-    
+
     """
-    def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str,
-                logger: logging.Logger) -> None:
+
+    def __init__(
+        self,
+        data,
+        model_type: str,
+        sklearn_model: str,
+        features: list,
+        target: str,
+        timestamp: str,
+        logger: logging.Logger,
+    ) -> None:
         r"""Define constructor for the forecast class.
 
         :param data: The data that will be used for train/test
@@ -42,33 +56,35 @@ def __init__(self, data, model_type: str, sklearn_model: str, independent_variab
         :param model_type: A unique name defining this model and useful to identify \
             for what it will be used for.
         :type model_type: str
-        :param independent_variables: A list of independent variables. \
+        :param features: A list of features. \
             Example: [`solar`, `degree_days`].
-        :type independent_variables: list
-        :param dependent_variable: The dependent variable(to be predicted). \
+        :type features: list
+        :param target: The target(to be predicted). \
             Example: `hours`.
-        :type dependent_variable: str
+        :type target: str
         :param timestamp: If defined, the column key that has to be used of timestamp.
         :type timestamp: str
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
         self.data = data
-        self.independent_variables = independent_variables
-        self.dependent_variable = dependent_variable
+        self.features = features
+        self.target = target
         self.timestamp = timestamp
         self.model_type = model_type
         self.sklearn_model = sklearn_model
         self.logger = logger
         self.data.sort_index(inplace=True)
-        self.data = self.data[~self.data.index.duplicated(keep='first')]
+        self.data = self.data[~self.data.index.duplicated(keep="first")]
         self.data_exo = None
         self.steps = None
         self.model = None
-        self.grid_search =None
-    
+        self.grid_search = None
+
     @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
+    def add_date_features(
+        data: pd.DataFrame, date_features: list, timestamp: str
+    ) -> pd.DataFrame:
         """Add date features from the input DataFrame timestamp
 
         :param data: The input DataFrame
@@ -79,179 +95,162 @@ def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -
         :rtype: pd.DataFrame
         """
         df = copy.deepcopy(data)
-        df[timestamp]= pd.to_datetime(df['timestamp'])
-        if 'year' in date_features:
-            df['year'] = [i.year for i in df['timestamp']]
-        if 'month' in date_features:
-            df['month'] = [i.month for i in df['timestamp']]
-        if 'day_of_week' in date_features:
-            df['day_of_week'] = [i.dayofweek for i in df['timestamp']]
-        if 'day_of_year' in date_features:
-            df['day_of_year'] = [i.dayofyear for i in df['timestamp']]
-        if 'day' in date_features:
-            df['day'] = [i.day for i in df['timestamp']]
-        if 'hour' in date_features:
-            df['hour'] = [i.day for i in df['timestamp']]
+        df[timestamp] = pd.to_datetime(df["timestamp"])
+        if "year" in date_features:
+            df["year"] = [i.year for i in df["timestamp"]]
+        if "month" in date_features:
+            df["month"] = [i.month for i in df["timestamp"]]
+        if "day_of_week" in date_features:
+            df["day_of_week"] = [i.dayofweek for i in df["timestamp"]]
+        if "day_of_year" in date_features:
+            df["day_of_year"] = [i.dayofyear for i in df["timestamp"]]
+        if "day" in date_features:
+            df["day"] = [i.day for i in df["timestamp"]]
+        if "hour" in date_features:
+            df["hour"] = [i.day for i in df["timestamp"]]
 
         return df
 
     def fit(self, date_features: Optional[list] = []) -> None:
         """
         Fit the model using the provided data.
-        
+
         :param date_features: A list of 'date_features' to take into account when fitting the model.
         :type data: list
         """
-        self.logger.info("Performing a csv model fit for "+self.model_type)
+        self.logger.info("Performing a csv model fit for " + self.model_type)
         self.data_exo = pd.DataFrame(self.data)
-        self.data_exo[self.independent_variables] = self.data[self.independent_variables]
-        self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
+        self.data_exo[self.features] = self.data[self.features]
+        self.data_exo[self.target] = self.data[self.target]
         keep_columns = []
-        keep_columns.extend(self.independent_variables)
+        keep_columns.extend(self.features)
         if self.timestamp is not None:
             keep_columns.append(self.timestamp)
-        keep_columns.append(self.dependent_variable)
+        keep_columns.append(self.target)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
         self.data_exo.reset_index(drop=True, inplace=True)
         if len(date_features) > 0:
             if self.timestamp is not None:
-                self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp)
+                self.data_exo = MLRegressor.add_date_features(
+                    self.data_exo, date_features, self.timestamp
+                )
             else:
-                self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
+                self.logger.error(
+                    "If no timestamp provided, you can't use date_features, going further without date_features."
+                )
 
-        y = self.data_exo[self.dependent_variable]
-        self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1)
+        y = self.data_exo[self.target]
+        self.data_exo = self.data_exo.drop(self.target, axis=1)
         if self.timestamp is not None:
-            self.data_exo = self.data_exo.drop(self.timestamp,axis=1)
+            self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
         X = self.data_exo
 
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=0.2, random_state=42
+        )
         self.steps = len(X_test)
 
         regression_methods = {
-            'LinearRegression': {"model": LinearRegression(), "param_grid": {
-                'linearregression__fit_intercept': [True, False],
-                'linearregression__positive': [True, False],
-            }},
-            'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}},
-            'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}},
-            'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}},
-            'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": {
-                'gradientboostingregressor__n_estimators': [50, 100, 200],
-                'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-            }},
-            'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": {
-                'adaboostregressor__n_estimators': [50, 100, 200],
-                'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-            }}
+            "LinearRegression": {
+                "model": LinearRegression(),
+                "param_grid": {
+                    "linearregression__fit_intercept": [True, False],
+                    "linearregression__positive": [True, False],
+                },
+            },
+            "RidgeRegression": {
+                "model": Ridge(),
+                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
+            },
+            "LassoRegression": {
+                "model": Lasso(),
+                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
+            },
+            "RandomForestRegression": {
+                "model": RandomForestRegressor(),
+                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
+            },
+            "GradientBoostingRegression": {
+                "model": GradientBoostingRegressor(),
+                "param_grid": {
+                    "gradientboostingregressor__n_estimators": [50, 100, 200],
+                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
+            "AdaBoostRegression": {
+                "model": AdaBoostRegressor(),
+                "param_grid": {
+                    "adaboostregressor__n_estimators": [50, 100, 200],
+                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
         }
-        # regression_methods = [
-        #     ('LinearRegression', LinearRegression(), {
-        #         'linearregression__fit_intercept': [True, False],
-        #         'linearregression__positive': [True, False],
-        #     }),
-        #     ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
-        #     ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
-        #     ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
-        #     ('GradientBoostingRegression', GradientBoostingRegressor(), {
-        #         'gradientboostingregressor__n_estimators': [50, 100, 200],
-        #         'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-        #     }),
-        #     ('AdaBoostRegression', AdaBoostRegressor(), {
-        #         'adaboostregressor__n_estimators': [50, 100, 200],
-        #         'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-        #     })
-        # ]
-
-        if self.sklearn_model == 'LinearRegression':
-            base_model = regression_methods['LinearRegression']['model']
-            param_grid = regression_methods['LinearRegression']['param_grid']
-        elif self.sklearn_model == 'RidgeRegression':
-            base_model = regression_methods['RidgeRegression']['model']
-            param_grid = regression_methods['RidgeRegression']['param_grid']
-        elif self.sklearn_model == 'LassoRegression':
-            base_model = regression_methods['LassoRegression']['model']
-            param_grid = regression_methods['LassoRegression']['param_grid']
-        elif self.sklearn_model == 'RandomForestRegression':
-            base_model = regression_methods['RandomForestRegression']['model']
-            param_grid = regression_methods['RandomForestRegression']['param_grid']
-        elif self.sklearn_model == 'GradientBoostingRegression':
-            base_model = regression_methods['GradientBoostingRegression']['model']
-            param_grid = regression_methods['GradientBoostingRegression']['param_grid']
-        elif self.sklearn_model == 'AdaBoostRegression':
-            base_model = regression_methods['AdaBoostRegression']['model']
-            param_grid = regression_methods['AdaBoostRegression']['param_grid']
+
+        if self.sklearn_model == "LinearRegression":
+            base_model = regression_methods["LinearRegression"]["model"]
+            param_grid = regression_methods["LinearRegression"]["param_grid"]
+        elif self.sklearn_model == "RidgeRegression":
+            base_model = regression_methods["RidgeRegression"]["model"]
+            param_grid = regression_methods["RidgeRegression"]["param_grid"]
+        elif self.sklearn_model == "LassoRegression":
+            base_model = regression_methods["LassoRegression"]["model"]
+            param_grid = regression_methods["LassoRegression"]["param_grid"]
+        elif self.sklearn_model == "RandomForestRegression":
+            base_model = regression_methods["RandomForestRegression"]["model"]
+            param_grid = regression_methods["RandomForestRegression"]["param_grid"]
+        elif self.sklearn_model == "GradientBoostingRegression":
+            base_model = regression_methods["GradientBoostingRegression"]["model"]
+            param_grid = regression_methods["GradientBoostingRegression"]["param_grid"]
+        elif self.sklearn_model == "AdaBoostRegression":
+            base_model = regression_methods["AdaBoostRegression"]["model"]
+            param_grid = regression_methods["AdaBoostRegression"]["param_grid"]
         else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-
-
-        # Define the models
-        # for name, model, param_grid in regression_methods:
-        #     self.model = make_pipeline(
-        #         StandardScaler(),
-        #         model
-        #     )
-        #     # self.model = Pipeline([
-        #     #     ('scaler', StandardScaler()),
-        #     #     (name, model)
-        #     # ])
-            
-        #     # Use GridSearchCV to find the best hyperparameters for each model
-        #     grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        #     grid_search.fit(X_train, y_train)
-
-        #     # Get the best model and print its mean squared error on the test set
-        #     best_model = grid_search.best_estimator_
-        #     print(best_model)
-        #     predictions = best_model.predict(X_test)
-        #     print(predictions)
-
-        self.model = make_pipeline(
-            StandardScaler(),
-            base_model
-        )
-        # self.model = Pipeline([
-        #     ('scaler', StandardScaler()),
-        #     ('regressor', base_model)
-        # ])
-        # Define the parameters to tune
-        # param_grid = {
-        #     'regressor__fit_intercept': [True, False],
-        #     'regressor__positive': [True, False],
-        # }
+            self.logger.error(
+                "Passed sklearn model " + self.sklearn_model + " is not valid"
+            )
+
+        self.model = make_pipeline(StandardScaler(), base_model)
 
         # Create a grid search object
-        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1)
-        
+        self.grid_search = GridSearchCV(
+            self.model,
+            param_grid,
+            cv=5,
+            scoring="neg_mean_squared_error",
+            refit=True,
+            verbose=0,
+            n_jobs=-1,
+        )
+
         # Fit the grid search object to the data
-        self.logger.info("Training a "+self.sklearn_model+" model")
+        self.logger.info("Training a " + self.sklearn_model + " model")
         start_time = time.time()
         self.grid_search.fit(X_train.values, y_train.values)
-        print("Best value for lambda : ",self.grid_search.best_params_)
+        print("Best value for lambda : ", self.grid_search.best_params_)
         print("Best score for cost function: ", self.grid_search.best_score_)
         self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
 
         self.model = self.grid_search.best_estimator_
 
-
         # Make predictions
         predictions = self.model.predict(X_test.values)
         predictions = pd.Series(predictions, index=X_test.index)
-        pred_metric = r2_score(y_test,predictions)
-        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
-        
+        pred_metric = r2_score(y_test, predictions)
+        self.logger.info(
+            f"Prediction R2 score of fitted model on test data: {pred_metric}"
+        )
 
-    def predict(self, new_values:list) -> np.ndarray:
+    def predict(self, new_values: list) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
 
-        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
+        :param new_values: The new values for the features(in the same order as the features list). \
             Example: [2.24, 5.68].
         :type new_values: list
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """
-        self.logger.info("Performing a prediction for "+self.model_type)
+        self.logger.info("Performing a prediction for " + self.model_type)
         new_values = np.array([new_values])
 
         return self.model.predict(new_values)
diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py
index b4708f78..9f47efef 100644
--- a/src/emhass/retrieve_hass.py
+++ b/src/emhass/retrieve_hass.py
@@ -30,12 +30,20 @@ class RetrieveHass:
     
     """
 
-    def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, 
-                 time_zone: datetime.timezone, params: str, base_path: str, logger: logging.Logger,
-                 get_data_from_file: Optional[bool] = False) -> None:
+    def __init__(
+        self,
+        hass_url: str,
+        long_lived_token: str,
+        freq: pd.Timedelta,
+        time_zone: datetime.timezone,
+        params: str,
+        base_path: str,
+        logger: logging.Logger,
+        get_data_from_file: Optional[bool] = False,
+    ) -> None:
         """
         Define constructor for RetrieveHass class.
-        
+
         :param hass_url: The URL of the Home Assistant instance
         :type hass_url: str
         :param long_lived_token: The long lived token retrieved from the configuration pane
@@ -50,7 +58,7 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta,
         :type base_path: str
         :param logger: The passed logger object
         :type logger: logging object
-        :param get_data_from_file: Select if data should be retrieved from a 
+        :param get_data_from_file: Select if data should be retrieved from a
         previously saved pickle useful for testing or directly from connection to
         hass database
         :type get_data_from_file: bool, optional
@@ -65,9 +73,14 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta,
         self.logger = logger
         self.get_data_from_file = get_data_from_file
 
-    def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: Optional[bool] = False,
-                 significant_changes_only: Optional[bool] = False, 
-                 test_url: Optional[str] = 'empty') -> None:
+    def get_data(
+        self,
+        days_list: pd.date_range,
+        var_list: list,
+        minimal_response: Optional[bool] = False,
+        significant_changes_only: Optional[bool] = False,
+        test_url: Optional[str] = "empty",
+    ) -> None:
         r"""
         Retrieve the actual data from hass.
         
@@ -92,20 +105,36 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O
         """
         self.logger.info("Retrieve hass get data method initiated...")
         self.df_final = pd.DataFrame()
-        x = 0 #iterate based on days
+        x = 0  # iterate based on days
         # Looping on each day from days list
         for day in days_list:
-        
+
             for i, var in enumerate(var_list):
-                
-                if test_url == 'empty':
-                    if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API
-                        url = self.hass_url+"/history/period/"+day.isoformat()+"?filter_entity_id="+var
-                    else: # Otherwise the Home Assistant Core API it is
-                        url = self.hass_url+"api/history/period/"+day.isoformat()+"?filter_entity_id="+var
-                    if minimal_response: # A support for minimal response
+
+                if test_url == "empty":
+                    if (
+                        self.hass_url == "http://supervisor/core/api"
+                    ):  # If we are using the supervisor API
+                        url = (
+                            self.hass_url
+                            + "/history/period/"
+                            + day.isoformat()
+                            + "?filter_entity_id="
+                            + var
+                        )
+                    else:  # Otherwise the Home Assistant Core API it is
+                        url = (
+                            self.hass_url
+                            + "api/history/period/"
+                            + day.isoformat()
+                            + "?filter_entity_id="
+                            + var
+                        )
+                    if minimal_response:  # A support for minimal response
                         url = url + "?minimal_response"
-                    if significant_changes_only: # And for signicant changes only (check the HASS restful API for more info)
+                    if (
+                        significant_changes_only
+                    ):  # And for signicant changes only (check the HASS restful API for more info)
                         url = url + "?significant_changes_only"
                 else:
                     url = test_url
@@ -116,59 +145,96 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O
                 try:
                     response = get(url, headers=headers)
                 except Exception:
-                    self.logger.error("Unable to access Home Assistance instance, check URL")
-                    self.logger.error("If using addon, try setting url and token to 'empty'")
+                    self.logger.error(
+                        "Unable to access Home Assistance instance, check URL"
+                    )
+                    self.logger.error(
+                        "If using addon, try setting url and token to 'empty'"
+                    )
                     return False
                 else:
                     if response.status_code == 401:
-                        self.logger.error("Unable to access Home Assistance instance, TOKEN/KEY")
-                        self.logger.error("If using addon, try setting url and token to 'empty'")
+                        self.logger.error(
+                            "Unable to access Home Assistance instance, TOKEN/KEY"
+                        )
+                        self.logger.error(
+                            "If using addon, try setting url and token to 'empty'"
+                        )
                         return False
                     if response.status_code > 299:
                         return f"Request Get Error: {response.status_code}"
-                '''import bz2 # Uncomment to save a serialized data for tests
+                """import bz2 # Uncomment to save a serialized data for tests
                 import _pickle as cPickle
                 with bz2.BZ2File("data/test_response_get_data_get_method.pbz2", "w") as f: 
-                    cPickle.dump(response, f)'''
-                try: # Sometimes when there are connection problems we need to catch empty retrieved json
+                    cPickle.dump(response, f)"""
+                try:  # Sometimes when there are connection problems we need to catch empty retrieved json
                     data = response.json()[0]
                 except IndexError:
                     if x is 0:
-                        self.logger.error("The retrieved JSON is empty, A sensor:" + var + " may have 0 days of history or passed sensor may not be correct")
+                        self.logger.error(
+                            "The retrieved JSON is empty, A sensor:"
+                            + var
+                            + " may have 0 days of history or passed sensor may not be correct"
+                        )
                     else:
                         self.logger.error("The retrieved JSON is empty for day:"+ str(day) +", days_to_retrieve may be larger than the recorded history of sensor:" + var + " (check your recorder settings)")
                     return False
                 df_raw = pd.DataFrame.from_dict(data)
                 if len(df_raw) == 0:
                     if x is 0:
-                        self.logger.error("The retrieved Dataframe is empty, A sensor:" + var + " may have 0 days of history or passed sensor may not be correct")
+                        self.logger.error(
+                            "The retrieved Dataframe is empty, A sensor:"
+                            + var
+                            + " may have 0 days of history or passed sensor may not be correct"
+                        )
                     else:
                         self.logger.error("Retrieved empty Dataframe for day:"+ str(day) +", days_to_retrieve may be larger than the recorded history of sensor:" + var + " (check your recorder settings)")
                     return False
-                if i == 0: # Defining the DataFrame container
-                    from_date = pd.to_datetime(df_raw['last_changed'], format="ISO8601").min()
-                    to_date = pd.to_datetime(df_raw['last_changed'], format="ISO8601").max()
-                    ts = pd.to_datetime(pd.date_range(start=from_date, end=to_date, freq=self.freq), 
-                                        format='%Y-%d-%m %H:%M').round(self.freq, ambiguous='infer', nonexistent=self.freq)
-                    df_day = pd.DataFrame(index = ts)
+                if i == 0:  # Defining the DataFrame container
+                    from_date = pd.to_datetime(
+                        df_raw["last_changed"], format="ISO8601"
+                    ).min()
+                    to_date = pd.to_datetime(
+                        df_raw["last_changed"], format="ISO8601"
+                    ).max()
+                    ts = pd.to_datetime(
+                        pd.date_range(start=from_date, end=to_date, freq=self.freq),
+                        format="%Y-%d-%m %H:%M",
+                    ).round(self.freq, ambiguous="infer", nonexistent=self.freq)
+                    df_day = pd.DataFrame(index=ts)
                 # Caution with undefined string data: unknown, unavailable, etc.
-                df_tp = df_raw.copy()[['state']].replace(
-                    ['unknown', 'unavailable', ''], np.nan).astype(float).rename(columns={'state': var})
+                df_tp = (
+                    df_raw.copy()[["state"]]
+                    .replace(["unknown", "unavailable", ""], np.nan)
+                    .astype(float)
+                    .rename(columns={"state": var})
+                )
                 # Setting index, resampling and concatenation
-                df_tp.set_index(pd.to_datetime(df_raw['last_changed'], format="ISO8601"), inplace=True)
+                df_tp.set_index(
+                    pd.to_datetime(df_raw["last_changed"], format="ISO8601"),
+                    inplace=True,
+                )
                 df_tp = df_tp.resample(self.freq).mean()
                 df_day = pd.concat([df_day, df_tp], axis=1)
-            
+
             x += 1
             self.df_final = pd.concat([self.df_final, df_day], axis=0)
         self.df_final = set_df_index_freq(self.df_final)
         if self.df_final.index.freq != self.freq:
-            self.logger.error("The inferred freq from data is not equal to the defined freq in passed parameters")
+            self.logger.error(
+                "The inferred freq from data is not equal to the defined freq in passed parameters"
+            )
             return False
         return True
-    
-    def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set_zero_min: Optional[bool] = True,
-                     var_replace_zero: Optional[list] = None, var_interp: Optional[list] = None) -> None:
+
+    def prepare_data(
+        self,
+        var_load: str,
+        load_negative: Optional[bool] = False,
+        set_zero_min: Optional[bool] = True,
+        var_replace_zero: Optional[list] = None,
+        var_interp: Optional[list] = None,
+    ) -> None:
         r"""
         Apply some data treatment in preparation for the optimization task.
         
@@ -192,18 +258,24 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set
         
         """
         try:
-            if load_negative: # Apply the correct sign to load power
-                self.df_final[var_load+'_positive'] = -self.df_final[var_load]
+            if load_negative:  # Apply the correct sign to load power
+                self.df_final[var_load + "_positive"] = -self.df_final[var_load]
             else:
-                self.df_final[var_load+'_positive'] = self.df_final[var_load]
+                self.df_final[var_load + "_positive"] = self.df_final[var_load]
             self.df_final.drop([var_load], inplace=True, axis=1)
         except KeyError:
-            self.logger.error("Variable "+var_load+" was not found. This is typically because no data could be retrieved from Home Assistant")
+            self.logger.error(
+                "Variable "
+                + var_load
+                + " was not found. This is typically because no data could be retrieved from Home Assistant"
+            )
             return False
         except ValueError:
-            self.logger.error("sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same")
-            return False   
-        if set_zero_min: # Apply minimum values
+            self.logger.error(
+                "sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same"
+            )
+            return False
+        if set_zero_min:  # Apply minimum values
             self.df_final.clip(lower=0.0, inplace=True, axis=1)
             self.df_final.replace(to_replace=0.0, value=np.nan, inplace=True)
         new_var_replace_zero = []
@@ -211,59 +283,74 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set
         # Just changing the names of variables to contain the fact that they are considered positive
         if var_replace_zero is not None:
             for string in var_replace_zero:
-                new_string = string.replace(var_load, var_load+'_positive')
+                new_string = string.replace(var_load, var_load + "_positive")
                 new_var_replace_zero.append(new_string)
         else:
             new_var_replace_zero = None
         if var_interp is not None:
             for string in var_interp:
-                new_string = string.replace(var_load, var_load+'_positive')
+                new_string = string.replace(var_load, var_load + "_positive")
                 new_var_interp.append(new_string)
         else:
             new_var_interp = None
         # Treating NaN replacement: either by zeros or by linear interpolation
         if new_var_replace_zero is not None:
-            self.df_final[new_var_replace_zero] = self.df_final[new_var_replace_zero].fillna(0.0)
+            self.df_final[new_var_replace_zero] = self.df_final[
+                new_var_replace_zero
+            ].fillna(0.0)
         if new_var_interp is not None:
             self.df_final[new_var_interp] = self.df_final[new_var_interp].interpolate(
-                method='linear', axis=0, limit=None)
+                method="linear", axis=0, limit=None
+            )
             self.df_final[new_var_interp] = self.df_final[new_var_interp].fillna(0.0)
         # Setting the correct time zone on DF index
         if self.time_zone is not None:
             self.df_final.index = self.df_final.index.tz_convert(self.time_zone)
         # Drop datetimeindex duplicates on final DF
-        self.df_final = self.df_final[~self.df_final.index.duplicated(keep='first')]
+        self.df_final = self.df_final[~self.df_final.index.duplicated(keep="first")]
         return True
-    
+
     @staticmethod
-    def get_attr_data_dict(data_df: pd.DataFrame, idx: int, entity_id: str, 
-                           unit_of_measurement: str, friendly_name: str, 
-                           list_name: str, state: float) -> dict:
-        list_df = copy.deepcopy(data_df).loc[data_df.index[idx]:].reset_index()
-        list_df.columns = ['timestamps', entity_id]
-        ts_list = [str(i) for i in list_df['timestamps'].tolist()]
-        vals_list = [str(np.round(i,2)) for i in list_df[entity_id].tolist()]
+    def get_attr_data_dict(
+        data_df: pd.DataFrame,
+        idx: int,
+        entity_id: str,
+        unit_of_measurement: str,
+        friendly_name: str,
+        list_name: str,
+        state: float,
+    ) -> dict:
+        list_df = copy.deepcopy(data_df).loc[data_df.index[idx] :].reset_index()
+        list_df.columns = ["timestamps", entity_id]
+        ts_list = [str(i) for i in list_df["timestamps"].tolist()]
+        vals_list = [str(np.round(i, 2)) for i in list_df[entity_id].tolist()]
         forecast_list = []
         for i, ts in enumerate(ts_list):
             datum = {}
             datum["date"] = ts
-            datum[entity_id.split('sensor.')[1]] = vals_list[i]
+            datum[entity_id.split("sensor.")[1]] = vals_list[i]
             forecast_list.append(datum)
         data = {
             "state": "{:.2f}".format(state),
             "attributes": {
                 "unit_of_measurement": unit_of_measurement,
                 "friendly_name": friendly_name,
-                list_name: forecast_list
-            }
+                list_name: forecast_list,
+            },
         }
         return data
-    
-    def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, 
-                  unit_of_measurement: str, friendly_name: str,
-                  type_var: str,
-                  from_mlforecaster: Optional[bool]=False,
-                  publish_prefix: Optional[str]="") -> None:
+
+    def post_data(
+        self,
+        data_df: pd.DataFrame,
+        idx: int,
+        entity_id: str,
+        unit_of_measurement: str,
+        friendly_name: str,
+        type_var: str,
+        from_mlforecaster: Optional[bool] = False,
+        publish_prefix: Optional[str] = "",
+    ) -> None:
         r"""
         Post passed data to hass.
         
@@ -286,82 +373,139 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str,
 
         """
         # Add a possible prefix to the entity ID
-        entity_id = entity_id.replace('sensor.', 'sensor.'+publish_prefix)
+        entity_id = entity_id.replace("sensor.", "sensor." + publish_prefix)
         # Set the URL
-        if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API
-            url = self.hass_url+"/states/"+entity_id
-        else: # Otherwise the Home Assistant Core API it is
-            url = self.hass_url+"api/states/"+entity_id
+        if (
+            self.hass_url == "http://supervisor/core/api"
+        ):  # If we are using the supervisor API
+            url = self.hass_url + "/states/" + entity_id
+        else:  # Otherwise the Home Assistant Core API it is
+            url = self.hass_url + "api/states/" + entity_id
         headers = {
             "Authorization": "Bearer " + self.long_lived_token,
             "content-type": "application/json",
         }
         # Preparing the data dict to be published
-        if type_var == 'cost_fun':
-            state = np.round(data_df.sum()[0],2)
-        elif type_var == 'unit_load_cost' or type_var == 'unit_prod_price':
-            state = np.round(data_df.loc[data_df.index[idx]],4)
-        elif type_var == 'optim_status':
+        if type_var == "cost_fun":
+            state = np.round(data_df.sum()[0], 2)
+        elif type_var == "unit_load_cost" or type_var == "unit_prod_price":
+            state = np.round(data_df.loc[data_df.index[idx]], 4)
+        elif type_var == "optim_status":
             state = data_df.loc[data_df.index[idx]]
-        elif type_var == 'csv_predictor':
+        elif type_var == "mlregressor":
             state = data_df[idx]
         else:
-            state = np.round(data_df.loc[data_df.index[idx]],2)
-        if type_var == 'power':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "forecasts", state)
-        elif type_var == 'deferrable':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "deferrables_schedule", state)
-        elif type_var == 'batt':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "battery_scheduled_power", state)
-        elif type_var == 'SOC':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "battery_scheduled_soc", state)
-        elif type_var == 'unit_load_cost':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "unit_load_cost_forecasts", state)
-        elif type_var == 'unit_prod_price':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "unit_prod_price_forecasts", state)
-        elif type_var == 'mlforecaster':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "scheduled_forecast", state)
-        elif type_var == 'optim_status':
+            state = np.round(data_df.loc[data_df.index[idx]], 2)
+        if type_var == "power":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "forecasts",
+                state,
+            )
+        elif type_var == "deferrable":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "deferrables_schedule",
+                state,
+            )
+        elif type_var == "batt":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "battery_scheduled_power",
+                state,
+            )
+        elif type_var == "SOC":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "battery_scheduled_soc",
+                state,
+            )
+        elif type_var == "unit_load_cost":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "unit_load_cost_forecasts",
+                state,
+            )
+        elif type_var == "unit_prod_price":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "unit_prod_price_forecasts",
+                state,
+            )
+        elif type_var == "mlforecaster":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "scheduled_forecast",
+                state,
+            )
+        elif type_var == "optim_status":
             data = {
                 "state": state,
                 "attributes": {
                     "unit_of_measurement": unit_of_measurement,
-                    "friendly_name": friendly_name
-                }
+                    "friendly_name": friendly_name,
+                },
             }
-        elif type_var == 'csv_predictor':
+        elif type_var == "mlregressor":
             data = {
                 "state": state,
                 "attributes": {
                     "unit_of_measurement": unit_of_measurement,
-                    "friendly_name": friendly_name
-                }
+                    "friendly_name": friendly_name,
+                },
             }
         else:
             data = {
                 "state": "{:.2f}".format(state),
                 "attributes": {
                     "unit_of_measurement": unit_of_measurement,
-                    "friendly_name": friendly_name
-                }
+                    "friendly_name": friendly_name,
+                },
             }
         # Actually post the data
         if self.get_data_from_file:
-            class response: pass
+
+            class response:
+                pass
+
             response.status_code = 200
             response.ok = True
         else:
             response = post(url, headers=headers, data=json.dumps(data))
         # Treating the response status and posting them on the logger
         if response.ok:
-            self.logger.info("Successfully posted to "+entity_id+" = "+str(state))
+            self.logger.info("Successfully posted to " + entity_id + " = " + str(state))
         else:
-            self.logger.info("The status code for received curl command response is: "+str(response.status_code))
+            self.logger.info(
+                "The status code for received curl command response is: "
+                + str(response.status_code)
+            )
         return response, data
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 5f9f249b..02db0e09 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -2,10 +2,19 @@
 # -*- coding: utf-8 -*-
 
 from typing import Tuple, Optional
-import numpy as np, pandas as pd
-import yaml, pytz, logging, pathlib, json, copy
 from datetime import datetime, timedelta, timezone
+import logging
+import pathlib
+import json
+import copy
+import numpy as np
+import pandas as pd
+import yaml
+import pytz
+
+
 import plotly.express as px
+
 pd.options.plotting.backend = "plotly"
 
 from emhass.machine_learning_forecaster import MLForecaster
@@ -14,13 +23,13 @@
 def get_root(file: str, num_parent: Optional[int] = 3) -> str:
     """
     Get the root absolute path of the working directory.
-    
+
     :param file: The passed file path with __file__
     :return: The root path
     :param num_parent: The number of parents levels up to desired root folder
     :type num_parent: int, optional
     :rtype: str
-    
+
     """
     if num_parent == 3:
         root = pathlib.Path(file).resolve().parent.parent.parent
@@ -32,11 +41,16 @@ def get_root(file: str, num_parent: Optional[int] = 3) -> str:
         raise ValueError("num_parent value not valid, must be between 1 and 3")
     return root
 
-def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = True,
-               logging_level: Optional[str] = "DEBUG") -> Tuple[logging.Logger, logging.StreamHandler]:
+
+def get_logger(
+    fun_name: str,
+    config_path: str,
+    save_to_file: Optional[bool] = True,
+    logging_level: Optional[str] = "DEBUG",
+) -> Tuple[logging.Logger, logging.StreamHandler]:
     """
     Create a simple logger object.
-    
+
     :param fun_name: The Python function object name where the logger will be used
     :type fun_name: str
     :param config_path: The path to the yaml configuration file
@@ -45,14 +59,14 @@ def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = T
     :type save_to_file: bool, optional
     :return: The logger object and the handler
     :rtype: object
-    
+
     """
-	# create logger object
+    # create logger object
     logger = logging.getLogger(fun_name)
     logger.propagate = True
     logger.fileSetting = save_to_file
     if save_to_file:
-        ch = logging.FileHandler(config_path + '/data/logger_emhass.log')
+        ch = logging.FileHandler(config_path + "/data/logger_emhass.log")
     else:
         ch = logging.StreamHandler()
     if logging_level == "DEBUG":
@@ -70,14 +84,18 @@ def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = T
     else:
         logger.setLevel(logging.DEBUG)
         ch.setLevel(logging.DEBUG)
-    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
     ch.setFormatter(formatter)
     logger.addHandler(ch)
 
     return logger, ch
 
-def get_forecast_dates(freq: int, delta_forecast: int, 
-                       timedelta_days: Optional[int] = 0) -> pd.core.indexes.datetimes.DatetimeIndex:
+
+def get_forecast_dates(
+    freq: int, delta_forecast: int, timedelta_days: Optional[int] = 0
+) -> pd.core.indexes.datetimes.DatetimeIndex:
     """
     Get the date_range list of the needed future dates using the delta_forecast parameter.
 
@@ -89,7 +107,7 @@ def get_forecast_dates(freq: int, delta_forecast: int,
     :type timedelta_days: Optional[int], optional
     :return: A list of future forecast dates.
     :rtype: pd.core.indexes.datetimes.DatetimeIndex
-    
+
     """
     freq = pd.to_timedelta(freq, "minutes")
     start_forecast = pd.Timestamp(datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0)
@@ -99,11 +117,19 @@ def get_forecast_dates(freq: int, delta_forecast: int,
         freq=freq).round(freq, ambiguous='infer', nonexistent=freq)
     return forecast_dates
 
-def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict,
-                        set_type: str, logger: logging.Logger) -> Tuple[str, dict]:
+
+def treat_runtimeparams(
+    runtimeparams: str,
+    params: str,
+    retrieve_hass_conf: dict,
+    optim_conf: dict,
+    plant_conf: dict,
+    set_type: str,
+    logger: logging.Logger,
+) -> Tuple[str, dict]:
     """
-    Treat the passed optimization runtime parameters. 
-    
+    Treat the passed optimization runtime parameters.
+
     :param runtimeparams: Json string containing the runtime parameters dict.
     :type runtimeparams: str
     :param params: Configuration parameters passed from data/options.json
@@ -120,310 +146,479 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
     :type logger: logging.Logger
     :return: Returning the params and optimization parameter container.
     :rtype: Tuple[str, dict]
-    
+
     """
-    if (params != None) and (params != 'null'):
+    if (params != None) and (params != "null"):
         params = json.loads(params)
     else:
         params = {}
     # Some default data needed
     custom_deferrable_forecast_id = []
-    for k in range(optim_conf['num_def_loads']):
-        custom_deferrable_forecast_id.append({
-            "entity_id": "sensor.p_deferrable{}".format(k), 
-            "unit_of_measurement": "W", 
-            "friendly_name": "Deferrable Load {}".format(k)
-        })
-    default_passed_dict = {'custom_pv_forecast_id': {"entity_id": "sensor.p_pv_forecast", "unit_of_measurement": "W", "friendly_name": "PV Power Forecast"},
-                           'custom_load_forecast_id': {"entity_id": "sensor.p_load_forecast", "unit_of_measurement": "W", "friendly_name": "Load Power Forecast"},
-                           'custom_batt_forecast_id': {"entity_id": "sensor.p_batt_forecast", "unit_of_measurement": "W", "friendly_name": "Battery Power Forecast"},
-                           'custom_batt_soc_forecast_id': {"entity_id": "sensor.soc_batt_forecast", "unit_of_measurement": "%", "friendly_name": "Battery SOC Forecast"},
-                           'custom_grid_forecast_id': {"entity_id": "sensor.p_grid_forecast", "unit_of_measurement": "W", "friendly_name": "Grid Power Forecast"},
-                           'custom_cost_fun_id': {"entity_id": "sensor.total_cost_fun_value", "unit_of_measurement": "", "friendly_name": "Total cost function value"},
-                           'custom_optim_status_id': {"entity_id": "sensor.optim_status", "unit_of_measurement": "", "friendly_name": "EMHASS optimization status"},
-                           'custom_unit_load_cost_id': {"entity_id": "sensor.unit_load_cost", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Load Cost"},
-                           'custom_unit_prod_price_id': {"entity_id": "sensor.unit_prod_price", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Prod Price"},
-                           'custom_deferrable_forecast_id': custom_deferrable_forecast_id,
-                           'publish_prefix': ""}
-    if 'passed_data' in params.keys():
+    for k in range(optim_conf["num_def_loads"]):
+        custom_deferrable_forecast_id.append(
+            {
+                "entity_id": "sensor.p_deferrable{}".format(k),
+                "unit_of_measurement": "W",
+                "friendly_name": "Deferrable Load {}".format(k),
+            }
+        )
+    default_passed_dict = {
+        "custom_pv_forecast_id": {
+            "entity_id": "sensor.p_pv_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "PV Power Forecast",
+        },
+        "custom_load_forecast_id": {
+            "entity_id": "sensor.p_load_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "Load Power Forecast",
+        },
+        "custom_batt_forecast_id": {
+            "entity_id": "sensor.p_batt_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "Battery Power Forecast",
+        },
+        "custom_batt_soc_forecast_id": {
+            "entity_id": "sensor.soc_batt_forecast",
+            "unit_of_measurement": "%",
+            "friendly_name": "Battery SOC Forecast",
+        },
+        "custom_grid_forecast_id": {
+            "entity_id": "sensor.p_grid_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "Grid Power Forecast",
+        },
+        "custom_cost_fun_id": {
+            "entity_id": "sensor.total_cost_fun_value",
+            "unit_of_measurement": "",
+            "friendly_name": "Total cost function value",
+        },
+        "custom_optim_status_id": {
+            "entity_id": "sensor.optim_status",
+            "unit_of_measurement": "",
+            "friendly_name": "EMHASS optimization status",
+        },
+        "custom_unit_load_cost_id": {
+            "entity_id": "sensor.unit_load_cost",
+            "unit_of_measurement": "€/kWh",
+            "friendly_name": "Unit Load Cost",
+        },
+        "custom_unit_prod_price_id": {
+            "entity_id": "sensor.unit_prod_price",
+            "unit_of_measurement": "€/kWh",
+            "friendly_name": "Unit Prod Price",
+        },
+        "custom_deferrable_forecast_id": custom_deferrable_forecast_id,
+        "publish_prefix": "",
+    }
+    if "passed_data" in params.keys():
         for key, value in default_passed_dict.items():
-            params['passed_data'][key] = value
+            params["passed_data"][key] = value
     else:
-        params['passed_data'] = default_passed_dict
+        params["passed_data"] = default_passed_dict
     if runtimeparams is not None:
         runtimeparams = json.loads(runtimeparams)
-        freq = int(retrieve_hass_conf['freq'].seconds/60.0)
-        delta_forecast = int(optim_conf['delta_forecast'].days)
+        freq = int(retrieve_hass_conf["freq"].seconds / 60.0)
+        delta_forecast = int(optim_conf["delta_forecast"].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
         if set_type == "regressor-model-fit":
-            csv_file = runtimeparams['csv_file']
-            independent_variables = runtimeparams['independent_variables']
-            dependent_variable = runtimeparams['dependent_variable']
-            params['passed_data']['csv_file'] = csv_file
-            params['passed_data']['independent_variables'] = independent_variables
-            params['passed_data']['dependent_variable'] = dependent_variable
-            if 'timestamp' not in runtimeparams.keys():
-                params['passed_data']['timestamp'] = None
+            csv_file = runtimeparams["csv_file"]
+            features = runtimeparams["features"]
+            target = runtimeparams["target"]
+            params["passed_data"]["csv_file"] = csv_file
+            params["passed_data"]["features"] = features
+            params["passed_data"]["target"] = target
+            if "timestamp" not in runtimeparams.keys():
+                params["passed_data"]["timestamp"] = None
             else:
-                timestamp = runtimeparams['timestamp']
-                params['passed_data']['timestamp'] = timestamp
-            if 'date_features' not in runtimeparams.keys():
-                params['passed_data']['date_features'] = []
+                timestamp = runtimeparams["timestamp"]
+                params["passed_data"]["timestamp"] = timestamp
+            if "date_features" not in runtimeparams.keys():
+                params["passed_data"]["date_features"] = []
             else:
-                date_features = runtimeparams['date_features']
-                params['passed_data']['date_features'] = date_features
-            
+                date_features = runtimeparams["date_features"]
+                params["passed_data"]["date_features"] = date_features
+
         if set_type == "regressor-model-predict":
-            new_values = runtimeparams['new_values']
-            params['passed_data']['new_values'] = new_values
+            new_values = runtimeparams["new_values"]
+            params["passed_data"]["new_values"] = new_values
 
         # Treating special data passed for MPC control case
-        if set_type == 'naive-mpc-optim':
-            if 'prediction_horizon' not in runtimeparams.keys():
-                prediction_horizon = 10 # 10 time steps by default
+        if set_type == "naive-mpc-optim":
+            if "prediction_horizon" not in runtimeparams.keys():
+                prediction_horizon = 10  # 10 time steps by default
             else:
-                prediction_horizon = runtimeparams['prediction_horizon']
-            params['passed_data']['prediction_horizon'] = prediction_horizon
-            if 'soc_init' not in runtimeparams.keys():
-                soc_init = plant_conf['SOCtarget']
+                prediction_horizon = runtimeparams["prediction_horizon"]
+            params["passed_data"]["prediction_horizon"] = prediction_horizon
+            if "soc_init" not in runtimeparams.keys():
+                soc_init = plant_conf["SOCtarget"]
             else:
-                soc_init = runtimeparams['soc_init']
-            params['passed_data']['soc_init'] = soc_init
-            if 'soc_final' not in runtimeparams.keys():
-                soc_final = plant_conf['SOCtarget']
+                soc_init = runtimeparams["soc_init"]
+            params["passed_data"]["soc_init"] = soc_init
+            if "soc_final" not in runtimeparams.keys():
+                soc_final = plant_conf["SOCtarget"]
             else:
-                soc_final = runtimeparams['soc_final']
-            params['passed_data']['soc_final'] = soc_final
-            if 'def_total_hours' not in runtimeparams.keys():
-                def_total_hours = optim_conf['def_total_hours']
+                soc_final = runtimeparams["soc_final"]
+            params["passed_data"]["soc_final"] = soc_final
+            if "def_total_hours" not in runtimeparams.keys():
+                def_total_hours = optim_conf["def_total_hours"]
             else:
-                def_total_hours = runtimeparams['def_total_hours']
-            params['passed_data']['def_total_hours'] = def_total_hours
-            if 'def_start_timestep' not in runtimeparams.keys():
-                def_start_timestep = optim_conf['def_start_timestep']
+                def_total_hours = runtimeparams["def_total_hours"]
+            params["passed_data"]["def_total_hours"] = def_total_hours
+            if "def_start_timestep" not in runtimeparams.keys():
+                def_start_timestep = optim_conf["def_start_timestep"]
             else:
-                def_start_timestep = runtimeparams['def_start_timestep']
-            params['passed_data']['def_start_timestep'] = def_start_timestep
-            if 'def_end_timestep' not in runtimeparams.keys():
-                def_end_timestep = optim_conf['def_end_timestep']
+                def_start_timestep = runtimeparams["def_start_timestep"]
+            params["passed_data"]["def_start_timestep"] = def_start_timestep
+            if "def_end_timestep" not in runtimeparams.keys():
+                def_end_timestep = optim_conf["def_end_timestep"]
             else:
-                def_end_timestep = runtimeparams['def_end_timestep']
-            params['passed_data']['def_end_timestep'] = def_end_timestep
-            if 'alpha' not in runtimeparams.keys():
+                def_end_timestep = runtimeparams["def_end_timestep"]
+            params["passed_data"]["def_end_timestep"] = def_end_timestep
+            if "alpha" not in runtimeparams.keys():
                 alpha = 0.5
             else:
-                alpha = runtimeparams['alpha']
-            params['passed_data']['alpha'] = alpha
-            if 'beta' not in runtimeparams.keys():
+                alpha = runtimeparams["alpha"]
+            params["passed_data"]["alpha"] = alpha
+            if "beta" not in runtimeparams.keys():
                 beta = 0.5
             else:
-                beta = runtimeparams['beta']
-            params['passed_data']['beta'] = beta
+                beta = runtimeparams["beta"]
+            params["passed_data"]["beta"] = beta
             forecast_dates = copy.deepcopy(forecast_dates)[0:prediction_horizon]
         else:
-            params['passed_data']['prediction_horizon'] = None
-            params['passed_data']['soc_init'] = None
-            params['passed_data']['soc_final'] = None
-            params['passed_data']['def_total_hours'] = None
-            params['passed_data']['def_start_timestep'] = None
-            params['passed_data']['def_end_timestep'] = None
-            params['passed_data']['alpha'] = None
-            params['passed_data']['beta'] = None
+            params["passed_data"]["prediction_horizon"] = None
+            params["passed_data"]["soc_init"] = None
+            params["passed_data"]["soc_final"] = None
+            params["passed_data"]["def_total_hours"] = None
+            params["passed_data"]["def_start_timestep"] = None
+            params["passed_data"]["def_end_timestep"] = None
+            params["passed_data"]["alpha"] = None
+            params["passed_data"]["beta"] = None
         # Treat passed forecast data lists
-        if 'pv_power_forecast' in runtimeparams.keys():
-            if type(runtimeparams['pv_power_forecast']) == list and len(runtimeparams['pv_power_forecast']) >= len(forecast_dates):
-                params['passed_data']['pv_power_forecast'] = runtimeparams['pv_power_forecast']
-                optim_conf['weather_forecast_method'] = 'list'
+        if "pv_power_forecast" in runtimeparams.keys():
+            if type(runtimeparams["pv_power_forecast"]) == list and len(
+                runtimeparams["pv_power_forecast"]
+            ) >= len(forecast_dates):
+                params["passed_data"]["pv_power_forecast"] = runtimeparams[
+                    "pv_power_forecast"
+                ]
+                optim_conf["weather_forecast_method"] = "list"
             else:
-                logger.error("ERROR: The passed data is either not a list or the length is not correct, length should be "+str(len(forecast_dates)))
-                logger.error("Passed type is "+str(type(runtimeparams['pv_power_forecast']))+" and length is "+str(len(runtimeparams['pv_power_forecast'])))
-            list_non_digits = [x for x in runtimeparams['pv_power_forecast'] if not (isinstance(x, int) or isinstance(x, float))]
+                logger.error(
+                    "ERROR: The passed data is either not a list or the length is not correct, length should be "
+                    + str(len(forecast_dates))
+                )
+                logger.error(
+                    "Passed type is "
+                    + str(type(runtimeparams["pv_power_forecast"]))
+                    + " and length is "
+                    + str(len(runtimeparams["pv_power_forecast"]))
+                )
+            list_non_digits = [
+                x
+                for x in runtimeparams["pv_power_forecast"]
+                if not (isinstance(x, int) or isinstance(x, float))
+            ]
             if len(list_non_digits) > 0:
-                logger.warning("There are non numeric values on the passed data for pv_power_forecast, check for missing values (nans, null, etc)")
+                logger.warning(
+                    "There are non numeric values on the passed data for pv_power_forecast, check for missing values (nans, null, etc)"
+                )
                 for x in list_non_digits:
-                    logger.warning("This value in pv_power_forecast was detected as non digits: "+str(x))
+                    logger.warning(
+                        "This value in pv_power_forecast was detected as non digits: "
+                        + str(x)
+                    )
         else:
-            params['passed_data']['pv_power_forecast'] = None
-        if 'load_power_forecast' in runtimeparams.keys():
-            if type(runtimeparams['load_power_forecast']) == list and len(runtimeparams['load_power_forecast']) >= len(forecast_dates):
-                params['passed_data']['load_power_forecast'] = runtimeparams['load_power_forecast']
-                optim_conf['load_forecast_method'] = 'list'
+            params["passed_data"]["pv_power_forecast"] = None
+        if "load_power_forecast" in runtimeparams.keys():
+            if type(runtimeparams["load_power_forecast"]) == list and len(
+                runtimeparams["load_power_forecast"]
+            ) >= len(forecast_dates):
+                params["passed_data"]["load_power_forecast"] = runtimeparams[
+                    "load_power_forecast"
+                ]
+                optim_conf["load_forecast_method"] = "list"
             else:
-                logger.error("ERROR: The passed data is either not a list or the length is not correct, length should be "+str(len(forecast_dates)))
-                logger.error("Passed type is "+str(type(runtimeparams['load_power_forecast']))+" and length is "+str(len(runtimeparams['load_power_forecast'])))
-            list_non_digits = [x for x in runtimeparams['load_power_forecast'] if not (isinstance(x, int) or isinstance(x, float))]
+                logger.error(
+                    "ERROR: The passed data is either not a list or the length is not correct, length should be "
+                    + str(len(forecast_dates))
+                )
+                logger.error(
+                    "Passed type is "
+                    + str(type(runtimeparams["load_power_forecast"]))
+                    + " and length is "
+                    + str(len(runtimeparams["load_power_forecast"]))
+                )
+            list_non_digits = [
+                x
+                for x in runtimeparams["load_power_forecast"]
+                if not (isinstance(x, int) or isinstance(x, float))
+            ]
             if len(list_non_digits) > 0:
-                logger.warning("There are non numeric values on the passed data for load_power_forecast, check for missing values (nans, null, etc)")
+                logger.warning(
+                    "There are non numeric values on the passed data for load_power_forecast, check for missing values (nans, null, etc)"
+                )
                 for x in list_non_digits:
-                    logger.warning("This value in load_power_forecast was detected as non digits: "+str(x))
+                    logger.warning(
+                        "This value in load_power_forecast was detected as non digits: "
+                        + str(x)
+                    )
         else:
-            params['passed_data']['load_power_forecast'] = None
-        if 'load_cost_forecast' in runtimeparams.keys():
-            if type(runtimeparams['load_cost_forecast']) == list and len(runtimeparams['load_cost_forecast']) >= len(forecast_dates):
-                params['passed_data']['load_cost_forecast'] = runtimeparams['load_cost_forecast']
-                optim_conf['load_cost_forecast_method'] = 'list'
+            params["passed_data"]["load_power_forecast"] = None
+        if "load_cost_forecast" in runtimeparams.keys():
+            if type(runtimeparams["load_cost_forecast"]) == list and len(
+                runtimeparams["load_cost_forecast"]
+            ) >= len(forecast_dates):
+                params["passed_data"]["load_cost_forecast"] = runtimeparams[
+                    "load_cost_forecast"
+                ]
+                optim_conf["load_cost_forecast_method"] = "list"
             else:
-                logger.error("ERROR: The passed data is either not a list or the length is not correct, length should be "+str(len(forecast_dates)))
-                logger.error("Passed type is "+str(type(runtimeparams['load_cost_forecast']))+" and length is "+str(len(runtimeparams['load_cost_forecast'])))
-            list_non_digits = [x for x in runtimeparams['load_cost_forecast'] if not (isinstance(x, int) or isinstance(x, float))]
+                logger.error(
+                    "ERROR: The passed data is either not a list or the length is not correct, length should be "
+                    + str(len(forecast_dates))
+                )
+                logger.error(
+                    "Passed type is "
+                    + str(type(runtimeparams["load_cost_forecast"]))
+                    + " and length is "
+                    + str(len(runtimeparams["load_cost_forecast"]))
+                )
+            list_non_digits = [
+                x
+                for x in runtimeparams["load_cost_forecast"]
+                if not (isinstance(x, int) or isinstance(x, float))
+            ]
             if len(list_non_digits) > 0:
-                logger.warning("There are non numeric values on the passed data or load_cost_forecast, check for missing values (nans, null, etc)")
+                logger.warning(
+                    "There are non numeric values on the passed data or load_cost_forecast, check for missing values (nans, null, etc)"
+                )
                 for x in list_non_digits:
-                    logger.warning("This value in load_cost_forecast was detected as non digits: "+str(x))
+                    logger.warning(
+                        "This value in load_cost_forecast was detected as non digits: "
+                        + str(x)
+                    )
         else:
-            params['passed_data']['load_cost_forecast'] = None
-        if 'prod_price_forecast' in runtimeparams.keys():
-            if type(runtimeparams['prod_price_forecast']) == list and len(runtimeparams['prod_price_forecast']) >= len(forecast_dates):
-                params['passed_data']['prod_price_forecast'] = runtimeparams['prod_price_forecast']
-                optim_conf['prod_price_forecast_method'] = 'list'
+            params["passed_data"]["load_cost_forecast"] = None
+        if "prod_price_forecast" in runtimeparams.keys():
+            if type(runtimeparams["prod_price_forecast"]) == list and len(
+                runtimeparams["prod_price_forecast"]
+            ) >= len(forecast_dates):
+                params["passed_data"]["prod_price_forecast"] = runtimeparams[
+                    "prod_price_forecast"
+                ]
+                optim_conf["prod_price_forecast_method"] = "list"
             else:
-                logger.error("ERROR: The passed data is either not a list or the length is not correct, length should be "+str(len(forecast_dates)))
-                logger.error("Passed type is "+str(type(runtimeparams['prod_price_forecast']))+" and length is "+str(len(runtimeparams['prod_price_forecast'])))
-            list_non_digits = [x for x in runtimeparams['prod_price_forecast'] if not (isinstance(x, int) or isinstance(x, float))]
+                logger.error(
+                    "ERROR: The passed data is either not a list or the length is not correct, length should be "
+                    + str(len(forecast_dates))
+                )
+                logger.error(
+                    "Passed type is "
+                    + str(type(runtimeparams["prod_price_forecast"]))
+                    + " and length is "
+                    + str(len(runtimeparams["prod_price_forecast"]))
+                )
+            list_non_digits = [
+                x
+                for x in runtimeparams["prod_price_forecast"]
+                if not (isinstance(x, int) or isinstance(x, float))
+            ]
             if len(list_non_digits) > 0:
-                logger.warning("There are non numeric values on the passed data for prod_price_forecast, check for missing values (nans, null, etc)")
+                logger.warning(
+                    "There are non numeric values on the passed data for prod_price_forecast, check for missing values (nans, null, etc)"
+                )
                 for x in list_non_digits:
-                    logger.warning("This value in prod_price_forecast was detected as non digits: "+str(x))
+                    logger.warning(
+                        "This value in prod_price_forecast was detected as non digits: "
+                        + str(x)
+                    )
         else:
-            params['passed_data']['prod_price_forecast'] = None
+            params["passed_data"]["prod_price_forecast"] = None
         # Treat passed data for forecast model fit/predict/tune at runtime
-        if 'days_to_retrieve' not in runtimeparams.keys():
+        if "days_to_retrieve" not in runtimeparams.keys():
             days_to_retrieve = 9
         else:
-            days_to_retrieve = runtimeparams['days_to_retrieve']
-        params['passed_data']['days_to_retrieve'] = days_to_retrieve
-        if 'model_type' not in runtimeparams.keys():
+            days_to_retrieve = runtimeparams["days_to_retrieve"]
+        params["passed_data"]["days_to_retrieve"] = days_to_retrieve
+        if "model_type" not in runtimeparams.keys():
             model_type = "load_forecast"
         else:
-            model_type = runtimeparams['model_type']
-        params['passed_data']['model_type'] = model_type
-        if 'var_model' not in runtimeparams.keys():
+            model_type = runtimeparams["model_type"]
+        params["passed_data"]["model_type"] = model_type
+        if "var_model" not in runtimeparams.keys():
             var_model = "sensor.power_load_no_var_loads"
         else:
-            var_model = runtimeparams['var_model']
-        params['passed_data']['var_model'] = var_model
-        if 'sklearn_model' not in runtimeparams.keys():
+            var_model = runtimeparams["var_model"]
+        params["passed_data"]["var_model"] = var_model
+        if "sklearn_model" not in runtimeparams.keys():
             sklearn_model = "KNeighborsRegressor"
         else:
-            sklearn_model = runtimeparams['sklearn_model']
-        params['passed_data']['sklearn_model'] = sklearn_model
-        if 'num_lags' not in runtimeparams.keys():
+            sklearn_model = runtimeparams["sklearn_model"]
+        params["passed_data"]["sklearn_model"] = sklearn_model
+        if "num_lags" not in runtimeparams.keys():
             num_lags = 48
         else:
-            num_lags = runtimeparams['num_lags']
-        params['passed_data']['num_lags'] = num_lags
-        if 'split_date_delta' not in runtimeparams.keys():
-            split_date_delta = '48h'
+            num_lags = runtimeparams["num_lags"]
+        params["passed_data"]["num_lags"] = num_lags
+        if "split_date_delta" not in runtimeparams.keys():
+            split_date_delta = "48h"
         else:
-            split_date_delta = runtimeparams['split_date_delta']
-        params['passed_data']['split_date_delta'] = split_date_delta
-        if 'perform_backtest' not in runtimeparams.keys():
+            split_date_delta = runtimeparams["split_date_delta"]
+        params["passed_data"]["split_date_delta"] = split_date_delta
+        if "perform_backtest" not in runtimeparams.keys():
             perform_backtest = False
         else:
-            perform_backtest = eval(str(runtimeparams['perform_backtest']).capitalize())
-        params['passed_data']['perform_backtest'] = perform_backtest
-        if 'model_predict_publish' not in runtimeparams.keys():
+            perform_backtest = eval(str(runtimeparams["perform_backtest"]).capitalize())
+        params["passed_data"]["perform_backtest"] = perform_backtest
+        if "model_predict_publish" not in runtimeparams.keys():
             model_predict_publish = False
         else:
-            model_predict_publish = eval(str(runtimeparams['model_predict_publish']).capitalize())
-        params['passed_data']['model_predict_publish'] = model_predict_publish
-        if 'model_predict_entity_id' not in runtimeparams.keys():
+            model_predict_publish = eval(
+                str(runtimeparams["model_predict_publish"]).capitalize()
+            )
+        params["passed_data"]["model_predict_publish"] = model_predict_publish
+        if "model_predict_entity_id" not in runtimeparams.keys():
             model_predict_entity_id = "sensor.p_load_forecast_custom_model"
         else:
-            model_predict_entity_id = runtimeparams['model_predict_entity_id']
-        params['passed_data']['model_predict_entity_id'] = model_predict_entity_id
-        if 'model_predict_unit_of_measurement' not in runtimeparams.keys():
+            model_predict_entity_id = runtimeparams["model_predict_entity_id"]
+        params["passed_data"]["model_predict_entity_id"] = model_predict_entity_id
+        if "model_predict_unit_of_measurement" not in runtimeparams.keys():
             model_predict_unit_of_measurement = "W"
         else:
-            model_predict_unit_of_measurement = runtimeparams['model_predict_unit_of_measurement']
-        params['passed_data']['model_predict_unit_of_measurement'] = model_predict_unit_of_measurement
-        if 'model_predict_friendly_name' not in runtimeparams.keys():
+            model_predict_unit_of_measurement = runtimeparams[
+                "model_predict_unit_of_measurement"
+            ]
+        params["passed_data"][
+            "model_predict_unit_of_measurement"
+        ] = model_predict_unit_of_measurement
+        if "model_predict_friendly_name" not in runtimeparams.keys():
             model_predict_friendly_name = "Load Power Forecast custom ML model"
         else:
-            model_predict_friendly_name = runtimeparams['model_predict_friendly_name']
-        params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name
-        if 'mlr_predict_entity_id' not in runtimeparams.keys():
+            model_predict_friendly_name = runtimeparams["model_predict_friendly_name"]
+        params["passed_data"][
+            "model_predict_friendly_name"
+        ] = model_predict_friendly_name
+        if "mlr_predict_entity_id" not in runtimeparams.keys():
             mlr_predict_entity_id = "sensor.mlr_predict"
         else:
-            mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id']
-        params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id
-        if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys():
+            mlr_predict_entity_id = runtimeparams["mlr_predict_entity_id"]
+        params["passed_data"]["mlr_predict_entity_id"] = mlr_predict_entity_id
+        if "mlr_predict_unit_of_measurement" not in runtimeparams.keys():
             mlr_predict_unit_of_measurement = None
         else:
-            mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement']
-        params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement
-        if 'mlr_predict_friendly_name' not in runtimeparams.keys():
+            mlr_predict_unit_of_measurement = runtimeparams[
+                "mlr_predict_unit_of_measurement"
+            ]
+        params["passed_data"][
+            "mlr_predict_unit_of_measurement"
+        ] = mlr_predict_unit_of_measurement
+        if "mlr_predict_friendly_name" not in runtimeparams.keys():
             mlr_predict_friendly_name = "mlr predictor"
         else:
-            mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name']
-        params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name
-        # Treat optimization configuration parameters passed at runtime 
-        if 'num_def_loads' in runtimeparams.keys():
-            optim_conf['num_def_loads'] = runtimeparams['num_def_loads']
-        if 'P_deferrable_nom' in runtimeparams.keys():
-            optim_conf['P_deferrable_nom'] = runtimeparams['P_deferrable_nom']
-        if 'def_total_hours' in runtimeparams.keys():
-            optim_conf['def_total_hours'] = runtimeparams['def_total_hours']
-        if 'def_start_timestep' in runtimeparams.keys():
-            optim_conf['def_start_timestep'] = runtimeparams['def_start_timestep']
-        if 'def_end_timestep' in runtimeparams.keys():
-            optim_conf['def_end_timestep'] = runtimeparams['def_end_timestep']
-        if 'treat_def_as_semi_cont' in runtimeparams.keys():
-            optim_conf['treat_def_as_semi_cont'] = [eval(str(k).capitalize()) for k in runtimeparams['treat_def_as_semi_cont']]
-        if 'set_def_constant' in runtimeparams.keys():
-            optim_conf['set_def_constant'] = [eval(str(k).capitalize()) for k in runtimeparams['set_def_constant']]
-        if 'solcast_api_key' in runtimeparams.keys():
-            retrieve_hass_conf['solcast_api_key'] = runtimeparams['solcast_api_key']
-            optim_conf['weather_forecast_method'] = 'solcast'
-        if 'solcast_rooftop_id' in runtimeparams.keys():
-            retrieve_hass_conf['solcast_rooftop_id'] = runtimeparams['solcast_rooftop_id']
-            optim_conf['weather_forecast_method'] = 'solcast'
-        if 'solar_forecast_kwp' in runtimeparams.keys():
-            retrieve_hass_conf['solar_forecast_kwp'] = runtimeparams['solar_forecast_kwp']
-            optim_conf['weather_forecast_method'] = 'solar.forecast'
-        if 'weight_battery_discharge' in runtimeparams.keys():
-            optim_conf['weight_battery_discharge'] = runtimeparams['weight_battery_discharge']
-        if 'weight_battery_charge' in runtimeparams.keys():
-            optim_conf['weight_battery_charge'] = runtimeparams['weight_battery_charge']
+            mlr_predict_friendly_name = runtimeparams["mlr_predict_friendly_name"]
+        params["passed_data"]["mlr_predict_friendly_name"] = mlr_predict_friendly_name
+        # Treat optimization configuration parameters passed at runtime
+        if "num_def_loads" in runtimeparams.keys():
+            optim_conf["num_def_loads"] = runtimeparams["num_def_loads"]
+        if "P_deferrable_nom" in runtimeparams.keys():
+            optim_conf["P_deferrable_nom"] = runtimeparams["P_deferrable_nom"]
+        if "def_total_hours" in runtimeparams.keys():
+            optim_conf["def_total_hours"] = runtimeparams["def_total_hours"]
+        if "def_start_timestep" in runtimeparams.keys():
+            optim_conf["def_start_timestep"] = runtimeparams["def_start_timestep"]
+        if "def_end_timestep" in runtimeparams.keys():
+            optim_conf["def_end_timestep"] = runtimeparams["def_end_timestep"]
+        if "treat_def_as_semi_cont" in runtimeparams.keys():
+            optim_conf["treat_def_as_semi_cont"] = [
+                eval(str(k).capitalize())
+                for k in runtimeparams["treat_def_as_semi_cont"]
+            ]
+        if "set_def_constant" in runtimeparams.keys():
+            optim_conf["set_def_constant"] = [
+                eval(str(k).capitalize()) for k in runtimeparams["set_def_constant"]
+            ]
+        if "solcast_api_key" in runtimeparams.keys():
+            retrieve_hass_conf["solcast_api_key"] = runtimeparams["solcast_api_key"]
+            optim_conf["weather_forecast_method"] = "solcast"
+        if "solcast_rooftop_id" in runtimeparams.keys():
+            retrieve_hass_conf["solcast_rooftop_id"] = runtimeparams[
+                "solcast_rooftop_id"
+            ]
+            optim_conf["weather_forecast_method"] = "solcast"
+        if "solar_forecast_kwp" in runtimeparams.keys():
+            retrieve_hass_conf["solar_forecast_kwp"] = runtimeparams[
+                "solar_forecast_kwp"
+            ]
+            optim_conf["weather_forecast_method"] = "solar.forecast"
+        if "weight_battery_discharge" in runtimeparams.keys():
+            optim_conf["weight_battery_discharge"] = runtimeparams[
+                "weight_battery_discharge"
+            ]
+        if "weight_battery_charge" in runtimeparams.keys():
+            optim_conf["weight_battery_charge"] = runtimeparams["weight_battery_charge"]
         # Treat plant configuration parameters passed at runtime
-        if 'SOCtarget' in runtimeparams.keys():
-            plant_conf['SOCtarget'] = runtimeparams['SOCtarget']
+        if "SOCtarget" in runtimeparams.keys():
+            plant_conf["SOCtarget"] = runtimeparams["SOCtarget"]
         # Treat custom entities id's and friendly names for variables
-        if 'custom_pv_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_pv_forecast_id'] = runtimeparams['custom_pv_forecast_id']
-        if 'custom_load_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_load_forecast_id'] = runtimeparams['custom_load_forecast_id']
-        if 'custom_batt_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_batt_forecast_id'] = runtimeparams['custom_batt_forecast_id']
-        if 'custom_batt_soc_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_batt_soc_forecast_id'] = runtimeparams['custom_batt_soc_forecast_id']
-        if 'custom_grid_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_grid_forecast_id'] = runtimeparams['custom_grid_forecast_id']
-        if 'custom_cost_fun_id' in runtimeparams.keys():
-            params['passed_data']['custom_cost_fun_id'] = runtimeparams['custom_cost_fun_id']
-        if 'custom_optim_status_id' in runtimeparams.keys():
-            params['passed_data']['custom_optim_status_id'] = runtimeparams['custom_optim_status_id']
-        if 'custom_unit_load_cost_id' in runtimeparams.keys():
-            params['passed_data']['custom_unit_load_cost_id'] = runtimeparams['custom_unit_load_cost_id']
-        if 'custom_unit_prod_price_id' in runtimeparams.keys():
-            params['passed_data']['custom_unit_prod_price_id'] = runtimeparams['custom_unit_prod_price_id']
-        if 'custom_deferrable_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_deferrable_forecast_id'] = runtimeparams['custom_deferrable_forecast_id']
+        if "custom_pv_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_pv_forecast_id"] = runtimeparams[
+                "custom_pv_forecast_id"
+            ]
+        if "custom_load_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_load_forecast_id"] = runtimeparams[
+                "custom_load_forecast_id"
+            ]
+        if "custom_batt_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_batt_forecast_id"] = runtimeparams[
+                "custom_batt_forecast_id"
+            ]
+        if "custom_batt_soc_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_batt_soc_forecast_id"] = runtimeparams[
+                "custom_batt_soc_forecast_id"
+            ]
+        if "custom_grid_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_grid_forecast_id"] = runtimeparams[
+                "custom_grid_forecast_id"
+            ]
+        if "custom_cost_fun_id" in runtimeparams.keys():
+            params["passed_data"]["custom_cost_fun_id"] = runtimeparams[
+                "custom_cost_fun_id"
+            ]
+        if "custom_optim_status_id" in runtimeparams.keys():
+            params["passed_data"]["custom_optim_status_id"] = runtimeparams[
+                "custom_optim_status_id"
+            ]
+        if "custom_unit_load_cost_id" in runtimeparams.keys():
+            params["passed_data"]["custom_unit_load_cost_id"] = runtimeparams[
+                "custom_unit_load_cost_id"
+            ]
+        if "custom_unit_prod_price_id" in runtimeparams.keys():
+            params["passed_data"]["custom_unit_prod_price_id"] = runtimeparams[
+                "custom_unit_prod_price_id"
+            ]
+        if "custom_deferrable_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_deferrable_forecast_id"] = runtimeparams[
+                "custom_deferrable_forecast_id"
+            ]
         # A condition to put a prefix on all published data
-        if 'publish_prefix' not in runtimeparams.keys():
+        if "publish_prefix" not in runtimeparams.keys():
             publish_prefix = ""
         else:
-            publish_prefix = runtimeparams['publish_prefix']
-        params['passed_data']['publish_prefix'] = publish_prefix
+            publish_prefix = runtimeparams["publish_prefix"]
+        params["passed_data"]["publish_prefix"] = publish_prefix
     # Serialize the final params
     params = json.dumps(params)
     return params, retrieve_hass_conf, optim_conf, plant_conf
 
-def get_yaml_parse(config_path: str, use_secrets: Optional[bool] = True,
-                   params: Optional[str] = None) -> Tuple[dict, dict, dict]:
+
+def get_yaml_parse(
+    config_path: str, use_secrets: Optional[bool] = True, params: Optional[str] = None
+) -> Tuple[dict, dict, dict]:
     """
     Perform parsing of the config.yaml file.
-    
+
     :param config_path: The path to the yaml configuration file
     :type config_path: str
     :param use_secrets: Indicate if we should use a secrets file or not.
@@ -437,49 +632,54 @@ def get_yaml_parse(config_path: str, use_secrets: Optional[bool] = True,
     """
     base = config_path.parent
     if params is None:
-        with open(config_path, 'r') as file:
+        with open(config_path, "r") as file:
             input_conf = yaml.load(file, Loader=yaml.FullLoader)
     else:
         input_conf = json.loads(params)
     if use_secrets:
         if params is None:
-            with open(base / 'secrets_emhass.yaml', 'r') as file:
+            with open(base / "secrets_emhass.yaml", "r") as file:
                 input_secrets = yaml.load(file, Loader=yaml.FullLoader)
         else:
-            input_secrets = input_conf.pop('params_secrets', None)
-   
-    if (type(input_conf['retrieve_hass_conf']) == list): #if using old config version 
-        retrieve_hass_conf = dict({key:d[key] for d in input_conf['retrieve_hass_conf'] for key in d})
+            input_secrets = input_conf.pop("params_secrets", None)
+
+    if type(input_conf["retrieve_hass_conf"]) == list:  # if using old config version
+        retrieve_hass_conf = dict(
+            {key: d[key] for d in input_conf["retrieve_hass_conf"] for key in d}
+        )
     else:
-        retrieve_hass_conf = input_conf.get('retrieve_hass_conf', {})
-        
+        retrieve_hass_conf = input_conf.get("retrieve_hass_conf", {})
+
     if use_secrets:
         retrieve_hass_conf.update(input_secrets)
     else:
-        retrieve_hass_conf['hass_url'] = 'http://supervisor/core/api'
-        retrieve_hass_conf['long_lived_token'] = '${SUPERVISOR_TOKEN}'
-        retrieve_hass_conf['time_zone'] = 'Europe/Paris'
-        retrieve_hass_conf['lat'] = 45.83
-        retrieve_hass_conf['lon'] = 6.86
-        retrieve_hass_conf['alt'] = 4807.8
-    retrieve_hass_conf['freq'] = pd.to_timedelta(retrieve_hass_conf['freq'], "minutes")
-    retrieve_hass_conf['time_zone'] = pytz.timezone(retrieve_hass_conf['time_zone'])
-    
-    if (type(input_conf['optim_conf']) == list):
-        optim_conf = dict({key:d[key] for d in input_conf['optim_conf'] for key in d})
+        retrieve_hass_conf["hass_url"] = "http://supervisor/core/api"
+        retrieve_hass_conf["long_lived_token"] = "${SUPERVISOR_TOKEN}"
+        retrieve_hass_conf["time_zone"] = "Europe/Paris"
+        retrieve_hass_conf["lat"] = 45.83
+        retrieve_hass_conf["lon"] = 6.86
+        retrieve_hass_conf["alt"] = 4807.8
+    retrieve_hass_conf["freq"] = pd.to_timedelta(retrieve_hass_conf["freq"], "minutes")
+    retrieve_hass_conf["time_zone"] = pytz.timezone(retrieve_hass_conf["time_zone"])
+
+    if type(input_conf["optim_conf"]) == list:
+        optim_conf = dict({key: d[key] for d in input_conf["optim_conf"] for key in d})
     else:
-        optim_conf = input_conf.get('optim_conf', {})
+        optim_conf = input_conf.get("optim_conf", {})
 
-    optim_conf['list_hp_periods'] = dict((key,d[key]) for d in optim_conf['list_hp_periods'] for key in d)
-    optim_conf['delta_forecast'] = pd.Timedelta(days=optim_conf['delta_forecast'])
-    
-    if (type(input_conf['plant_conf']) == list):
-        plant_conf = dict({key:d[key] for d in input_conf['plant_conf'] for key in d})
+    optim_conf["list_hp_periods"] = dict(
+        (key, d[key]) for d in optim_conf["list_hp_periods"] for key in d
+    )
+    optim_conf["delta_forecast"] = pd.Timedelta(days=optim_conf["delta_forecast"])
+
+    if type(input_conf["plant_conf"]) == list:
+        plant_conf = dict({key: d[key] for d in input_conf["plant_conf"] for key in d})
     else:
-        plant_conf = input_conf.get('plant_conf', {})
-    
+        plant_conf = input_conf.get("plant_conf", {})
+
     return retrieve_hass_conf, optim_conf, plant_conf
 
+
 def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dict:
     """
     Build a dictionary with graphs and tables for the webui.
@@ -490,61 +690,86 @@ def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dic
     :type plot_size: Optional[int], optional
     :return: A dictionary containing the graphs and tables in html format
     :rtype: dict
-    
+
     """
-    cols_p = [i for i in df.columns.to_list() if 'P_' in i]
+    cols_p = [i for i in df.columns.to_list() if "P_" in i]
     # Let's round the data in the DF
-    optim_status = df['optim_status'].unique().item()
-    df.drop('optim_status', axis=1, inplace=True)
-    cols_else = [i for i in df.columns.to_list() if 'P_' not in i]
+    optim_status = df["optim_status"].unique().item()
+    df.drop("optim_status", axis=1, inplace=True)
+    cols_else = [i for i in df.columns.to_list() if "P_" not in i]
     df = df.apply(pd.to_numeric)
     df[cols_p] = df[cols_p].astype(int)
     df[cols_else] = df[cols_else].round(3)
     # Create plots
     n_colors = len(cols_p)
-    colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)])
-    fig_0 = px.line(df[cols_p], title='Systems powers schedule after optimization results', 
-                    template='presentation', line_shape="hv",
-                    color_discrete_sequence=colors)
-    fig_0.update_layout(xaxis_title='Timestamp', yaxis_title='System powers (W)')
-    if 'SOC_opt' in df.columns.to_list():
-        fig_1 = px.line(df['SOC_opt'], title='Battery state of charge schedule after optimization results', 
-                        template='presentation',  line_shape="hv",
-                        color_discrete_sequence=colors)
-        fig_1.update_layout(xaxis_title='Timestamp', yaxis_title='Battery SOC (%)')
-    cols_cost = [i for i in df.columns.to_list() if 'cost_' in i or 'unit_' in i]
+    colors = px.colors.sample_colorscale(
+        "jet", [n / (n_colors - 1) for n in range(n_colors)]
+    )
+    fig_0 = px.line(
+        df[cols_p],
+        title="Systems powers schedule after optimization results",
+        template="presentation",
+        line_shape="hv",
+        color_discrete_sequence=colors,
+    )
+    fig_0.update_layout(xaxis_title="Timestamp", yaxis_title="System powers (W)")
+    if "SOC_opt" in df.columns.to_list():
+        fig_1 = px.line(
+            df["SOC_opt"],
+            title="Battery state of charge schedule after optimization results",
+            template="presentation",
+            line_shape="hv",
+            color_discrete_sequence=colors,
+        )
+        fig_1.update_layout(xaxis_title="Timestamp", yaxis_title="Battery SOC (%)")
+    cols_cost = [i for i in df.columns.to_list() if "cost_" in i or "unit_" in i]
     n_colors = len(cols_cost)
-    colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)])
-    fig_2 = px.line(df[cols_cost], title='Systems costs obtained from optimization results', 
-                    template='presentation', line_shape="hv",
-                    color_discrete_sequence=colors)
-    fig_2.update_layout(xaxis_title='Timestamp', yaxis_title='System costs (currency)')
+    colors = px.colors.sample_colorscale(
+        "jet", [n / (n_colors - 1) for n in range(n_colors)]
+    )
+    fig_2 = px.line(
+        df[cols_cost],
+        title="Systems costs obtained from optimization results",
+        template="presentation",
+        line_shape="hv",
+        color_discrete_sequence=colors,
+    )
+    fig_2.update_layout(xaxis_title="Timestamp", yaxis_title="System costs (currency)")
     # Get full path to image
-    image_path_0 = fig_0.to_html(full_html=False, default_width='75%')
-    if 'SOC_opt' in df.columns.to_list():
-        image_path_1 = fig_1.to_html(full_html=False, default_width='75%')
-    image_path_2 = fig_2.to_html(full_html=False, default_width='75%')
+    image_path_0 = fig_0.to_html(full_html=False, default_width="75%")
+    if "SOC_opt" in df.columns.to_list():
+        image_path_1 = fig_1.to_html(full_html=False, default_width="75%")
+    image_path_2 = fig_2.to_html(full_html=False, default_width="75%")
     # The tables
-    table1 = df.reset_index().to_html(classes='mystyle', index=False)
-    cost_cols = [i for i in df.columns if 'cost_' in i]
+    table1 = df.reset_index().to_html(classes="mystyle", index=False)
+    cost_cols = [i for i in df.columns if "cost_" in i]
     table2 = df[cost_cols].reset_index().sum(numeric_only=True)
-    table2['optim_status'] = optim_status
-    table2 = table2.to_frame(name='Value').reset_index(names='Variable').to_html(classes='mystyle', index=False)
+    table2["optim_status"] = optim_status
+    table2 = (
+        table2.to_frame(name="Value")
+        .reset_index(names="Variable")
+        .to_html(classes="mystyle", index=False)
+    )
     # The dict of plots
     injection_dict = {}
-    injection_dict['title'] = '<h2>EMHASS optimization results</h2>'
-    injection_dict['subsubtitle0'] = '<h4>Plotting latest optimization results</h4>'
-    injection_dict['figure_0'] = image_path_0
-    if 'SOC_opt' in df.columns.to_list():
-        injection_dict['figure_1'] = image_path_1
-    injection_dict['figure_2'] = image_path_2
-    injection_dict['subsubtitle1'] = '<h4>Last run optimization results table</h4>'
-    injection_dict['table1'] = table1
-    injection_dict['subsubtitle2'] = '<h4>Summary table for latest optimization results</h4>'
-    injection_dict['table2'] = table2
+    injection_dict["title"] = "<h2>EMHASS optimization results</h2>"
+    injection_dict["subsubtitle0"] = "<h4>Plotting latest optimization results</h4>"
+    injection_dict["figure_0"] = image_path_0
+    if "SOC_opt" in df.columns.to_list():
+        injection_dict["figure_1"] = image_path_1
+    injection_dict["figure_2"] = image_path_2
+    injection_dict["subsubtitle1"] = "<h4>Last run optimization results table</h4>"
+    injection_dict["table1"] = table1
+    injection_dict["subsubtitle2"] = (
+        "<h4>Summary table for latest optimization results</h4>"
+    )
+    injection_dict["table2"] = table2
     return injection_dict
 
-def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLForecaster) -> dict:
+
+def get_injection_dict_forecast_model_fit(
+    df_fit_pred: pd.DataFrame, mlf: MLForecaster
+) -> dict:
     """
     Build a dictionary with graphs and tables for the webui for special MLF fit case.
 
@@ -556,19 +781,26 @@ def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLFore
     :rtype: dict
     """
     fig = df_fit_pred.plot()
-    fig.layout.template = 'presentation'
-    fig.update_yaxes(title_text = mlf.model_type)
-    fig.update_xaxes(title_text = "Time")
-    image_path_0 = fig.to_html(full_html=False, default_width='75%')
+    fig.layout.template = "presentation"
+    fig.update_yaxes(title_text=mlf.model_type)
+    fig.update_xaxes(title_text="Time")
+    image_path_0 = fig.to_html(full_html=False, default_width="75%")
     # The dict of plots
     injection_dict = {}
-    injection_dict['title'] = '<h2>Custom machine learning forecast model fit</h2>'
-    injection_dict['subsubtitle0'] = '<h4>Plotting train/test forecast model results for '+mlf.model_type+'</h4>'
-    injection_dict['subsubtitle0'] = '<h4>Forecasting variable '+mlf.var_model+'</h4>'
-    injection_dict['figure_0'] = image_path_0
+    injection_dict["title"] = "<h2>Custom machine learning forecast model fit</h2>"
+    injection_dict["subsubtitle0"] = (
+        "<h4>Plotting train/test forecast model results for " + mlf.model_type + "</h4>"
+    )
+    injection_dict["subsubtitle0"] = (
+        "<h4>Forecasting variable " + mlf.var_model + "</h4>"
+    )
+    injection_dict["figure_0"] = image_path_0
     return injection_dict
 
-def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLForecaster) -> dict:
+
+def get_injection_dict_forecast_model_tune(
+    df_pred_optim: pd.DataFrame, mlf: MLForecaster
+) -> dict:
     """
     Build a dictionary with graphs and tables for the webui for special MLF tune case.
 
@@ -580,19 +812,32 @@ def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLF
     :rtype: dict
     """
     fig = df_pred_optim.plot()
-    fig.layout.template = 'presentation'
-    fig.update_yaxes(title_text = mlf.model_type)
-    fig.update_xaxes(title_text = "Time")
-    image_path_0 = fig.to_html(full_html=False, default_width='75%')
+    fig.layout.template = "presentation"
+    fig.update_yaxes(title_text=mlf.model_type)
+    fig.update_xaxes(title_text="Time")
+    image_path_0 = fig.to_html(full_html=False, default_width="75%")
     # The dict of plots
     injection_dict = {}
-    injection_dict['title'] = '<h2>Custom machine learning forecast model tune</h2>'
-    injection_dict['subsubtitle0'] = '<h4>Performed a tuning routine using bayesian optimization for '+mlf.model_type+'</h4>'
-    injection_dict['subsubtitle0'] = '<h4>Forecasting variable '+mlf.var_model+'</h4>'
-    injection_dict['figure_0'] = image_path_0
+    injection_dict["title"] = "<h2>Custom machine learning forecast model tune</h2>"
+    injection_dict["subsubtitle0"] = (
+        "<h4>Performed a tuning routine using bayesian optimization for "
+        + mlf.model_type
+        + "</h4>"
+    )
+    injection_dict["subsubtitle0"] = (
+        "<h4>Forecasting variable " + mlf.var_model + "</h4>"
+    )
+    injection_dict["figure_0"] = image_path_0
     return injection_dict
 
-def build_params(params: dict, params_secrets: dict, options: dict, addon: int, logger: logging.Logger) -> dict:
+
+def build_params(
+    params: dict,
+    params_secrets: dict,
+    options: dict,
+    addon: int,
+    logger: logging.Logger,
+) -> dict:
     """
     Build the main params dictionary from the loaded options.json when using the add-on.
 
@@ -611,90 +856,241 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int,
     """
     if addon == 1:
         # Updating variables in retrieve_hass_conf
-        params['retrieve_hass_conf']['freq'] = options.get('optimization_time_step',params['retrieve_hass_conf']['freq'])
-        params['retrieve_hass_conf']['days_to_retrieve'] = options.get('historic_days_to_retrieve',params['retrieve_hass_conf']['days_to_retrieve'])
-        params['retrieve_hass_conf']['var_PV'] = options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV'])
-        params['retrieve_hass_conf']['var_load'] = options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load'])
-        params['retrieve_hass_conf']['load_negative'] = options.get('load_negative',params['retrieve_hass_conf']['load_negative'])
-        params['retrieve_hass_conf']['set_zero_min'] = options.get('set_zero_min',params['retrieve_hass_conf']['set_zero_min'])
-        params['retrieve_hass_conf']['var_replace_zero'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_replace_zero'])]
-        params['retrieve_hass_conf']['var_interp'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV']), options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load'])]
-        params['retrieve_hass_conf']['method_ts_round'] = options.get('method_ts_round',params['retrieve_hass_conf']['method_ts_round'])
+        params["retrieve_hass_conf"]["freq"] = options.get(
+            "optimization_time_step", params["retrieve_hass_conf"]["freq"]
+        )
+        params["retrieve_hass_conf"]["days_to_retrieve"] = options.get(
+            "historic_days_to_retrieve",
+            params["retrieve_hass_conf"]["days_to_retrieve"],
+        )
+        params["retrieve_hass_conf"]["var_PV"] = options.get(
+            "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"]
+        )
+        params["retrieve_hass_conf"]["var_load"] = options.get(
+            "sensor_power_load_no_var_loads", params["retrieve_hass_conf"]["var_load"]
+        )
+        params["retrieve_hass_conf"]["load_negative"] = options.get(
+            "load_negative", params["retrieve_hass_conf"]["load_negative"]
+        )
+        params["retrieve_hass_conf"]["set_zero_min"] = options.get(
+            "set_zero_min", params["retrieve_hass_conf"]["set_zero_min"]
+        )
+        params["retrieve_hass_conf"]["var_replace_zero"] = [
+            options.get(
+                "sensor_power_photovoltaics",
+                params["retrieve_hass_conf"]["var_replace_zero"],
+            )
+        ]
+        params["retrieve_hass_conf"]["var_interp"] = [
+            options.get(
+                "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"]
+            ),
+            options.get(
+                "sensor_power_load_no_var_loads",
+                params["retrieve_hass_conf"]["var_load"],
+            ),
+        ]
+        params["retrieve_hass_conf"]["method_ts_round"] = options.get(
+            "method_ts_round", params["retrieve_hass_conf"]["method_ts_round"]
+        )
         # Update params Secrets if specified
-        params['params_secrets'] = params_secrets
-        params['params_secrets']['time_zone'] = options.get('time_zone',params_secrets['time_zone'])
-        params['params_secrets']['lat'] = options.get('Latitude',params_secrets['lat'])
-        params['params_secrets']['lon'] = options.get('Longitude',params_secrets['lon'])
-        params['params_secrets']['alt'] = options.get('Altitude',params_secrets['alt'])
+        params["params_secrets"] = params_secrets
+        params["params_secrets"]["time_zone"] = options.get(
+            "time_zone", params_secrets["time_zone"]
+        )
+        params["params_secrets"]["lat"] = options.get("Latitude", params_secrets["lat"])
+        params["params_secrets"]["lon"] = options.get(
+            "Longitude", params_secrets["lon"]
+        )
+        params["params_secrets"]["alt"] = options.get("Altitude", params_secrets["alt"])
         # Updating variables in optim_conf
-        params['optim_conf']['set_use_battery'] = options.get('set_use_battery',params['optim_conf']['set_use_battery'])
-        params['optim_conf']['num_def_loads'] = options.get('number_of_deferrable_loads',params['optim_conf']['num_def_loads'])
-        if options.get('list_nominal_power_of_deferrable_loads',None) != None: 
-            params['optim_conf']['P_deferrable_nom'] = [i['nominal_power_of_deferrable_loads'] for i in options.get('list_nominal_power_of_deferrable_loads')]
-        if options.get('list_operating_hours_of_each_deferrable_load',None) != None: 
-            params['optim_conf']['def_total_hours'] = [i['operating_hours_of_each_deferrable_load'] for i in options.get('list_operating_hours_of_each_deferrable_load')]
-        if options.get('list_treat_deferrable_load_as_semi_cont',None) != None: 
-            params['optim_conf']['treat_def_as_semi_cont'] = [i['treat_deferrable_load_as_semi_cont'] for i in options.get('list_treat_deferrable_load_as_semi_cont')]
-        params['optim_conf']['weather_forecast_method'] = options.get('weather_forecast_method',params['optim_conf']['weather_forecast_method'])
+        params["optim_conf"]["set_use_battery"] = options.get(
+            "set_use_battery", params["optim_conf"]["set_use_battery"]
+        )
+        params["optim_conf"]["num_def_loads"] = options.get(
+            "number_of_deferrable_loads", params["optim_conf"]["num_def_loads"]
+        )
+        if options.get("list_nominal_power_of_deferrable_loads", None) != None:
+            params["optim_conf"]["P_deferrable_nom"] = [
+                i["nominal_power_of_deferrable_loads"]
+                for i in options.get("list_nominal_power_of_deferrable_loads")
+            ]
+        if options.get("list_operating_hours_of_each_deferrable_load", None) != None:
+            params["optim_conf"]["def_total_hours"] = [
+                i["operating_hours_of_each_deferrable_load"]
+                for i in options.get("list_operating_hours_of_each_deferrable_load")
+            ]
+        if options.get("list_treat_deferrable_load_as_semi_cont", None) != None:
+            params["optim_conf"]["treat_def_as_semi_cont"] = [
+                i["treat_deferrable_load_as_semi_cont"]
+                for i in options.get("list_treat_deferrable_load_as_semi_cont")
+            ]
+        params["optim_conf"]["weather_forecast_method"] = options.get(
+            "weather_forecast_method", params["optim_conf"]["weather_forecast_method"]
+        )
         # Update optional param secrets
-        if params['optim_conf']['weather_forecast_method'] == "solcast":
-            params['params_secrets']['solcast_api_key'] = options.get('optional_solcast_api_key',params_secrets.get('solcast_api_key',"123456"))
-            params['params_secrets']['solcast_rooftop_id'] = options.get('optional_solcast_rooftop_id',params_secrets.get('solcast_rooftop_id',"123456"))
-        elif params['optim_conf']['weather_forecast_method'] == "solar.forecast":    
-            params['params_secrets']['solar_forecast_kwp'] = options.get('optional_solar_forecast_kwp',params_secrets.get('solar_forecast_kwp',5))
-        params['optim_conf']['load_forecast_method'] = options.get('load_forecast_method',params['optim_conf']['load_forecast_method'])
-        params['optim_conf']['delta_forecast'] = options.get('delta_forecast_daily',params['optim_conf']['delta_forecast'])
-        params['optim_conf']['load_cost_forecast_method'] = options.get('load_cost_forecast_method',params['optim_conf']['load_cost_forecast_method'])
-        if options.get('list_set_deferrable_load_single_constant',None) != None: 
-            params['optim_conf']['set_def_constant'] = [i['set_deferrable_load_single_constant'] for i in options.get('list_set_deferrable_load_single_constant')]
-        if options.get('list_peak_hours_periods_start_hours',None) != None and options.get('list_peak_hours_periods_end_hours',None) != None:
-            start_hours_list = [i['peak_hours_periods_start_hours'] for i in options['list_peak_hours_periods_start_hours']]
-            end_hours_list = [i['peak_hours_periods_end_hours'] for i in options['list_peak_hours_periods_end_hours']]
+        if params["optim_conf"]["weather_forecast_method"] == "solcast":
+            params["params_secrets"]["solcast_api_key"] = options.get(
+                "optional_solcast_api_key",
+                params_secrets.get("solcast_api_key", "123456"),
+            )
+            params["params_secrets"]["solcast_rooftop_id"] = options.get(
+                "optional_solcast_rooftop_id",
+                params_secrets.get("solcast_rooftop_id", "123456"),
+            )
+        elif params["optim_conf"]["weather_forecast_method"] == "solar.forecast":
+            params["params_secrets"]["solar_forecast_kwp"] = options.get(
+                "optional_solar_forecast_kwp",
+                params_secrets.get("solar_forecast_kwp", 5),
+            )
+        params["optim_conf"]["load_forecast_method"] = options.get(
+            "load_forecast_method", params["optim_conf"]["load_forecast_method"]
+        )
+        params["optim_conf"]["delta_forecast"] = options.get(
+            "delta_forecast_daily", params["optim_conf"]["delta_forecast"]
+        )
+        params["optim_conf"]["load_cost_forecast_method"] = options.get(
+            "load_cost_forecast_method",
+            params["optim_conf"]["load_cost_forecast_method"],
+        )
+        if options.get("list_set_deferrable_load_single_constant", None) != None:
+            params["optim_conf"]["set_def_constant"] = [
+                i["set_deferrable_load_single_constant"]
+                for i in options.get("list_set_deferrable_load_single_constant")
+            ]
+        if (
+            options.get("list_peak_hours_periods_start_hours", None) != None
+            and options.get("list_peak_hours_periods_end_hours", None) != None
+        ):
+            start_hours_list = [
+                i["peak_hours_periods_start_hours"]
+                for i in options["list_peak_hours_periods_start_hours"]
+            ]
+            end_hours_list = [
+                i["peak_hours_periods_end_hours"]
+                for i in options["list_peak_hours_periods_end_hours"]
+            ]
             num_peak_hours = len(start_hours_list)
-            list_hp_periods_list = [{'period_hp_'+str(i+1):[{'start':start_hours_list[i]},{'end':end_hours_list[i]}]} for i in range(num_peak_hours)]
-            params['optim_conf']['list_hp_periods'] = list_hp_periods_list
-        params['optim_conf']['load_cost_hp'] = options.get('load_peak_hours_cost',params['optim_conf']['load_cost_hp'])
-        params['optim_conf']['load_cost_hc'] = options.get('load_offpeak_hours_cost', params['optim_conf']['load_cost_hc'])
-        params['optim_conf']['prod_price_forecast_method'] = options.get('production_price_forecast_method', params['optim_conf']['prod_price_forecast_method'])
-        params['optim_conf']['prod_sell_price'] = options.get('photovoltaic_production_sell_price',params['optim_conf']['prod_sell_price'])
-        params['optim_conf']['set_total_pv_sell'] = options.get('set_total_pv_sell',params['optim_conf']['set_total_pv_sell'])
-        params['optim_conf']['lp_solver'] = options.get('lp_solver',params['optim_conf']['lp_solver'])
-        params['optim_conf']['lp_solver_path'] = options.get('lp_solver_path',params['optim_conf']['lp_solver_path'])
-        params['optim_conf']['set_nocharge_from_grid'] = options.get('set_nocharge_from_grid',params['optim_conf']['set_nocharge_from_grid'])
-        params['optim_conf']['set_nodischarge_to_grid'] = options.get('set_nodischarge_to_grid',params['optim_conf']['set_nodischarge_to_grid'])
-        params['optim_conf']['set_battery_dynamic'] = options.get('set_battery_dynamic',params['optim_conf']['set_battery_dynamic'])
-        params['optim_conf']['battery_dynamic_max'] = options.get('battery_dynamic_max',params['optim_conf']['battery_dynamic_max'])
-        params['optim_conf']['battery_dynamic_min'] = options.get('battery_dynamic_min',params['optim_conf']['battery_dynamic_min'])
-        params['optim_conf']['weight_battery_discharge'] = options.get('weight_battery_discharge',params['optim_conf']['weight_battery_discharge'])
-        params['optim_conf']['weight_battery_charge'] = options.get('weight_battery_charge',params['optim_conf']['weight_battery_charge'])
-        if options.get('list_start_timesteps_of_each_deferrable_load',None) != None: 
-            params['optim_conf']['def_start_timestep'] = [i['start_timesteps_of_each_deferrable_load'] for i in options.get('list_start_timesteps_of_each_deferrable_load')]
-        if options.get('list_end_timesteps_of_each_deferrable_load',None) != None: 
-            params['optim_conf']['def_end_timestep'] = [i['end_timesteps_of_each_deferrable_load'] for i in options.get('list_end_timesteps_of_each_deferrable_load')]
-        # Updating variables in plant_con
-            params['plant_conf']['P_grid_max'] = options.get('maximum_power_from_grid',params['plant_conf']['P_grid_max'])
-        if options.get('list_pv_module_model',None) != None:         
-            params['plant_conf']['module_model'] = [i['pv_module_model'] for i in options.get('list_pv_module_model')]
-        if options.get('list_pv_inverter_model',None) != None:        
-            params['plant_conf']['inverter_model'] = [i['pv_inverter_model'] for i in options.get('list_pv_inverter_model')]
-        if options.get('list_surface_tilt',None) != None:        
-            params['plant_conf']['surface_tilt'] = [i['surface_tilt'] for i in options.get('list_surface_tilt')]
-        if options.get('list_surface_azimuth',None) != None:         
-            params['plant_conf']['surface_azimuth'] = [i['surface_azimuth'] for i in options.get('list_surface_azimuth')]
-        if options.get('list_modules_per_string',None) != None:         
-            params['plant_conf']['modules_per_string'] = [i['modules_per_string'] for i in options.get('list_modules_per_string')]
-        if options.get('list_strings_per_inverter',None) != None: 
-            params['plant_conf']['strings_per_inverter'] = [i['strings_per_inverter'] for i in options.get('list_strings_per_inverter')]
-        params['plant_conf']['Pd_max'] = options.get('battery_discharge_power_max',params['plant_conf']['Pd_max']) 
-        params['plant_conf']['Pc_max'] = options.get('battery_charge_power_max',params['plant_conf']['Pc_max'])
-        params['plant_conf']['eta_disch'] = options.get('battery_discharge_efficiency',params['plant_conf']['eta_disch'])
-        params['plant_conf']['eta_ch'] = options.get('battery_charge_efficiency',params['plant_conf']['eta_ch'])
-        params['plant_conf']['Enom'] = options.get('battery_nominal_energy_capacity',params['plant_conf']['Enom'])
-        params['plant_conf']['SOCmin'] = options.get('battery_minimum_state_of_charge',params['plant_conf']['SOCmin']) 
-        params['plant_conf']['SOCmax'] = options.get('battery_maximum_state_of_charge',params['plant_conf']['SOCmax']) 
-        params['plant_conf']['SOCtarget'] = options.get('battery_target_state_of_charge',params['plant_conf']['SOCtarget'])
-
-                # Check parameter lists have the same amounts as deferrable loads
+            list_hp_periods_list = [
+                {
+                    "period_hp_"
+                    + str(i + 1): [
+                        {"start": start_hours_list[i]},
+                        {"end": end_hours_list[i]},
+                    ]
+                }
+                for i in range(num_peak_hours)
+            ]
+            params["optim_conf"]["list_hp_periods"] = list_hp_periods_list
+        params["optim_conf"]["load_cost_hp"] = options.get(
+            "load_peak_hours_cost", params["optim_conf"]["load_cost_hp"]
+        )
+        params["optim_conf"]["load_cost_hc"] = options.get(
+            "load_offpeak_hours_cost", params["optim_conf"]["load_cost_hc"]
+        )
+        params["optim_conf"]["prod_price_forecast_method"] = options.get(
+            "production_price_forecast_method",
+            params["optim_conf"]["prod_price_forecast_method"],
+        )
+        params["optim_conf"]["prod_sell_price"] = options.get(
+            "photovoltaic_production_sell_price",
+            params["optim_conf"]["prod_sell_price"],
+        )
+        params["optim_conf"]["set_total_pv_sell"] = options.get(
+            "set_total_pv_sell", params["optim_conf"]["set_total_pv_sell"]
+        )
+        params["optim_conf"]["lp_solver"] = options.get(
+            "lp_solver", params["optim_conf"]["lp_solver"]
+        )
+        params["optim_conf"]["lp_solver_path"] = options.get(
+            "lp_solver_path", params["optim_conf"]["lp_solver_path"]
+        )
+        params["optim_conf"]["set_nocharge_from_grid"] = options.get(
+            "set_nocharge_from_grid", params["optim_conf"]["set_nocharge_from_grid"]
+        )
+        params["optim_conf"]["set_nodischarge_to_grid"] = options.get(
+            "set_nodischarge_to_grid", params["optim_conf"]["set_nodischarge_to_grid"]
+        )
+        params["optim_conf"]["set_battery_dynamic"] = options.get(
+            "set_battery_dynamic", params["optim_conf"]["set_battery_dynamic"]
+        )
+        params["optim_conf"]["battery_dynamic_max"] = options.get(
+            "battery_dynamic_max", params["optim_conf"]["battery_dynamic_max"]
+        )
+        params["optim_conf"]["battery_dynamic_min"] = options.get(
+            "battery_dynamic_min", params["optim_conf"]["battery_dynamic_min"]
+        )
+        params["optim_conf"]["weight_battery_discharge"] = options.get(
+            "weight_battery_discharge", params["optim_conf"]["weight_battery_discharge"]
+        )
+        params["optim_conf"]["weight_battery_charge"] = options.get(
+            "weight_battery_charge", params["optim_conf"]["weight_battery_charge"]
+        )
+        if options.get("list_start_timesteps_of_each_deferrable_load", None) != None:
+            params["optim_conf"]["def_start_timestep"] = [
+                i["start_timesteps_of_each_deferrable_load"]
+                for i in options.get("list_start_timesteps_of_each_deferrable_load")
+            ]
+        if options.get("list_end_timesteps_of_each_deferrable_load", None) != None:
+            params["optim_conf"]["def_end_timestep"] = [
+                i["end_timesteps_of_each_deferrable_load"]
+                for i in options.get("list_end_timesteps_of_each_deferrable_load")
+            ]
+            # Updating variables in plant_con
+            params["plant_conf"]["P_grid_max"] = options.get(
+                "maximum_power_from_grid", params["plant_conf"]["P_grid_max"]
+            )
+        if options.get("list_pv_module_model", None) != None:
+            params["plant_conf"]["module_model"] = [
+                i["pv_module_model"] for i in options.get("list_pv_module_model")
+            ]
+        if options.get("list_pv_inverter_model", None) != None:
+            params["plant_conf"]["inverter_model"] = [
+                i["pv_inverter_model"] for i in options.get("list_pv_inverter_model")
+            ]
+        if options.get("list_surface_tilt", None) != None:
+            params["plant_conf"]["surface_tilt"] = [
+                i["surface_tilt"] for i in options.get("list_surface_tilt")
+            ]
+        if options.get("list_surface_azimuth", None) != None:
+            params["plant_conf"]["surface_azimuth"] = [
+                i["surface_azimuth"] for i in options.get("list_surface_azimuth")
+            ]
+        if options.get("list_modules_per_string", None) != None:
+            params["plant_conf"]["modules_per_string"] = [
+                i["modules_per_string"] for i in options.get("list_modules_per_string")
+            ]
+        if options.get("list_strings_per_inverter", None) != None:
+            params["plant_conf"]["strings_per_inverter"] = [
+                i["strings_per_inverter"]
+                for i in options.get("list_strings_per_inverter")
+            ]
+        params["plant_conf"]["Pd_max"] = options.get(
+            "battery_discharge_power_max", params["plant_conf"]["Pd_max"]
+        )
+        params["plant_conf"]["Pc_max"] = options.get(
+            "battery_charge_power_max", params["plant_conf"]["Pc_max"]
+        )
+        params["plant_conf"]["eta_disch"] = options.get(
+            "battery_discharge_efficiency", params["plant_conf"]["eta_disch"]
+        )
+        params["plant_conf"]["eta_ch"] = options.get(
+            "battery_charge_efficiency", params["plant_conf"]["eta_ch"]
+        )
+        params["plant_conf"]["Enom"] = options.get(
+            "battery_nominal_energy_capacity", params["plant_conf"]["Enom"]
+        )
+        params["plant_conf"]["SOCmin"] = options.get(
+            "battery_minimum_state_of_charge", params["plant_conf"]["SOCmin"]
+        )
+        params["plant_conf"]["SOCmax"] = options.get(
+            "battery_maximum_state_of_charge", params["plant_conf"]["SOCmax"]
+        )
+        params["plant_conf"]["SOCtarget"] = options.get(
+            "battery_target_state_of_charge", params["plant_conf"]["SOCtarget"]
+        )
+
+        # Check parameter lists have the same amounts as deferrable loads
         # If not, set defaults it fill in gaps
         if params['optim_conf']['num_def_loads'] is not len(params['optim_conf']['def_start_timestep']):
             logger.warning("def_start_timestep / list_start_timesteps_of_each_deferrable_load does not match number in num_def_loads, adding default values to parameter")
@@ -721,20 +1117,35 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int,
             for x in range(len(params['optim_conf']['P_deferrable_nom']), params['optim_conf']['num_def_loads']):
                 params['optim_conf']['P_deferrable_nom'].append(0)   
         # days_to_retrieve should be no less then 2
-        if params['retrieve_hass_conf']['days_to_retrieve'] < 2:
-            params['retrieve_hass_conf']['days_to_retrieve'] = 2
-            logger.warning("days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history")
+        if params["retrieve_hass_conf"]["days_to_retrieve"] < 2:
+            params["retrieve_hass_conf"]["days_to_retrieve"] = 2
+            logger.warning(
+                "days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history"
+            )
     else:
-        params['params_secrets'] = params_secrets
+        params["params_secrets"] = params_secrets
     # The params dict
-    params['passed_data'] = {'pv_power_forecast':None,'load_power_forecast':None,'load_cost_forecast':None,'prod_price_forecast':None,
-                             'prediction_horizon':None,'soc_init':None,'soc_final':None,'def_total_hours':None,'def_start_timestep':None,'def_end_timestep':None,'alpha':None,'beta':None}
+    params["passed_data"] = {
+        "pv_power_forecast": None,
+        "load_power_forecast": None,
+        "load_cost_forecast": None,
+        "prod_price_forecast": None,
+        "prediction_horizon": None,
+        "soc_init": None,
+        "soc_final": None,
+        "def_total_hours": None,
+        "def_start_timestep": None,
+        "def_end_timestep": None,
+        "alpha": None,
+        "beta": None,
+    }
     return params
 
+
 def get_days_list(days_to_retrieve: int) -> pd.date_range:
     """
     Get list of past days from today to days_to_retrieve.
-    
+
     :param days_to_retrieve: Total number of days to retrieve from the past
     :type days_to_retrieve: int
     :return: The list of days
@@ -743,19 +1154,20 @@ def get_days_list(days_to_retrieve: int) -> pd.date_range:
     """
     today = datetime.now(timezone.utc).replace(minute=0, second=0, microsecond=0)
     d = (today - timedelta(days=days_to_retrieve)).isoformat()
-    days_list = pd.date_range(start=d, end=today.isoformat(), freq='D')
-    
+    days_list = pd.date_range(start=d, end=today.isoformat(), freq="D")
+
     return days_list
 
+
 def set_df_index_freq(df: pd.DataFrame) -> pd.DataFrame:
     """
     Set the freq of a DataFrame DateTimeIndex.
-    
+
     :param df: Input DataFrame
     :type df: pd.DataFrame
     :return: Input DataFrame with freq defined
     :rtype: pd.DataFrame
-    
+
     """
     idx_diff = np.diff(df.index)
     sampling = pd.to_timedelta(np.median(idx_diff))

From 4ced7571faab7a11030270e7e5a97e2f89ad2788 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 14:55:30 +0100
Subject: [PATCH 017/111] sklearn-model -> regression-model

---
 src/emhass/command_line.py | 4 ++--
 src/emhass/utils.py        | 5 +++++
 src/emhass/web_server.py   | 8 ++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index b4a9050c..1706d34c 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -657,7 +657,7 @@ def regressor_model_fit(
     """
     data = copy.deepcopy(input_data_dict["df_input_data"])
     model_type = input_data_dict["params"]["passed_data"]["model_type"]
-    sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"]
+    regression_model = input_data_dict["params"]["passed_data"]["regression_model"]
     features = input_data_dict["params"]["passed_data"]["features"]
     target = input_data_dict["params"]["passed_data"]["target"]
     timestamp = input_data_dict["params"]["passed_data"]["timestamp"]
@@ -665,7 +665,7 @@ def regressor_model_fit(
     root = input_data_dict["root"]
     # The MLRegressor object
     mlr = MLRegressor(
-        data, model_type, sklearn_model, features, target, timestamp, logger
+        data, model_type, regression_model, features, target, timestamp, logger
     )
     # Fit the ML model
     mlr.fit(date_features=date_features)
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 02db0e09..3886686f 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -457,6 +457,11 @@ def treat_runtimeparams(
         else:
             sklearn_model = runtimeparams["sklearn_model"]
         params["passed_data"]["sklearn_model"] = sklearn_model
+        if "regression_model" not in runtimeparams.keys():
+            regression_model = "LinearRegression"
+        else:
+            regression_model = runtimeparams["regression_model"]
+        params["passed_data"]["regression_model"] = regression_model
         if "num_lags" not in runtimeparams.keys():
             num_lags = 48
         else:
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index cdb98b00..64c690a9 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -191,14 +191,14 @@ def action_call(action_name):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
     elif action_name == 'regressor-model-fit':
-        app.logger.info(" >> Performing a regressor fit...")
+        app.logger.info(" >> Performing a machine learning regressor fit...")
         regressor_model_fit(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action regressor-fit executed... \n'
+        msg = f'EMHASS >> Action regressor-model-fit executed... \n'
         return make_response(msg, 201)
     elif action_name == 'regressor-model-predict':
-        app.logger.info(" >> Performing a regressor predict...")
+        app.logger.info(" >> Performing a machine learning regressor predict...")
         regressor_model_predict(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action regressor-predict executed... \n'
+        msg = f'EMHASS >> Action regressor-model-predict executed... \n'
         return make_response(msg, 201)
     else:
         app.logger.error("ERROR: passed action is not valid")

From 1cb2ed5682ee9831a9c62da069c9d71f4efe83c7 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 15:18:57 +0100
Subject: [PATCH 018/111] REGRESSION_METHODS const

---
 src/emhass/machine_learning_regressor.py | 127 ++++++++++++-----------
 1 file changed, 65 insertions(+), 62 deletions(-)

diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index 80ddd74f..9e7795d0 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -24,6 +24,41 @@
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
+REGRESSION_METHODS = {
+            "LinearRegression": {
+                "model": LinearRegression(),
+                "param_grid": {
+                    "linearregression__fit_intercept": [True, False],
+                    "linearregression__positive": [True, False],
+                },
+            },
+            "RidgeRegression": {
+                "model": Ridge(),
+                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
+            },
+            "LassoRegression": {
+                "model": Lasso(),
+                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
+            },
+            "RandomForestRegression": {
+                "model": RandomForestRegressor(),
+                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
+            },
+            "GradientBoostingRegression": {
+                "model": GradientBoostingRegressor(),
+                "param_grid": {
+                    "gradientboostingregressor__n_estimators": [50, 100, 200],
+                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
+            "AdaBoostRegression": {
+                "model": AdaBoostRegressor(),
+                "param_grid": {
+                    "adaboostregressor__n_estimators": [50, 100, 200],
+                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
+        }
 
 class MLRegressor:
     r"""
@@ -43,7 +78,7 @@ def __init__(
         self,
         data,
         model_type: str,
-        sklearn_model: str,
+        regression_model: str,
         features: list,
         target: str,
         timestamp: str,
@@ -56,11 +91,15 @@ def __init__(
         :param model_type: A unique name defining this model and useful to identify \
             for what it will be used for.
         :type model_type: str
+        :param regression_model: The model that will be used. For now only \
+            this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \
+            `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`.
+        :type regression_model: str
         :param features: A list of features. \
-            Example: [`solar`, `degree_days`].
+            Example: [`solar_production`, `degree_days`].
         :type features: list
         :param target: The target(to be predicted). \
-            Example: `hours`.
+            Example: `heating_hours`.
         :type target: str
         :param timestamp: If defined, the column key that has to be used of timestamp.
         :type timestamp: str
@@ -72,7 +111,7 @@ def __init__(
         self.target = target
         self.timestamp = timestamp
         self.model_type = model_type
-        self.sklearn_model = sklearn_model
+        self.regression_model = regression_model
         self.logger = logger
         self.data.sort_index(inplace=True)
         self.data = self.data[~self.data.index.duplicated(keep="first")]
@@ -111,7 +150,7 @@ def add_date_features(
 
         return df
 
-    def fit(self, date_features: Optional[list] = []) -> None:
+    def fit(self, date_features: Optional[list] = None) -> None:
         """
         Fit the model using the provided data.
 
@@ -129,7 +168,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         keep_columns.append(self.target)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
         self.data_exo.reset_index(drop=True, inplace=True)
-        if len(date_features) > 0:
+        if date_features is not None:
             if self.timestamp is not None:
                 self.data_exo = MLRegressor.add_date_features(
                     self.data_exo, date_features, self.timestamp
@@ -150,63 +189,27 @@ def fit(self, date_features: Optional[list] = []) -> None:
         )
         self.steps = len(X_test)
 
-        regression_methods = {
-            "LinearRegression": {
-                "model": LinearRegression(),
-                "param_grid": {
-                    "linearregression__fit_intercept": [True, False],
-                    "linearregression__positive": [True, False],
-                },
-            },
-            "RidgeRegression": {
-                "model": Ridge(),
-                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
-            },
-            "LassoRegression": {
-                "model": Lasso(),
-                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
-            },
-            "RandomForestRegression": {
-                "model": RandomForestRegressor(),
-                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
-            },
-            "GradientBoostingRegression": {
-                "model": GradientBoostingRegressor(),
-                "param_grid": {
-                    "gradientboostingregressor__n_estimators": [50, 100, 200],
-                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-            "AdaBoostRegression": {
-                "model": AdaBoostRegressor(),
-                "param_grid": {
-                    "adaboostregressor__n_estimators": [50, 100, 200],
-                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-        }
-
-        if self.sklearn_model == "LinearRegression":
-            base_model = regression_methods["LinearRegression"]["model"]
-            param_grid = regression_methods["LinearRegression"]["param_grid"]
-        elif self.sklearn_model == "RidgeRegression":
-            base_model = regression_methods["RidgeRegression"]["model"]
-            param_grid = regression_methods["RidgeRegression"]["param_grid"]
-        elif self.sklearn_model == "LassoRegression":
-            base_model = regression_methods["LassoRegression"]["model"]
-            param_grid = regression_methods["LassoRegression"]["param_grid"]
-        elif self.sklearn_model == "RandomForestRegression":
-            base_model = regression_methods["RandomForestRegression"]["model"]
-            param_grid = regression_methods["RandomForestRegression"]["param_grid"]
-        elif self.sklearn_model == "GradientBoostingRegression":
-            base_model = regression_methods["GradientBoostingRegression"]["model"]
-            param_grid = regression_methods["GradientBoostingRegression"]["param_grid"]
-        elif self.sklearn_model == "AdaBoostRegression":
-            base_model = regression_methods["AdaBoostRegression"]["model"]
-            param_grid = regression_methods["AdaBoostRegression"]["param_grid"]
+        if self.regression_model == "LinearRegression":
+            base_model = REGRESSION_METHODS["LinearRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
+        elif self.regression_model == "RidgeRegression":
+            base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
+        elif self.regression_model == "LassoRegression":
+            base_model = REGRESSION_METHODS["LassoRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
+        elif self.regression_model == "RandomForestRegression":
+            base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
+        elif self.regression_model == "GradientBoostingRegression":
+            base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
+            param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
+        elif self.regression_model == "AdaBoostRegression":
+            base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
+            param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
         else:
             self.logger.error(
-                "Passed sklearn model " + self.sklearn_model + " is not valid"
+                "Passed sklearn model " + self.regression_model + " is not valid"
             )
 
         self.model = make_pipeline(StandardScaler(), base_model)
@@ -223,7 +226,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         )
 
         # Fit the grid search object to the data
-        self.logger.info("Training a " + self.sklearn_model + " model")
+        self.logger.info("Training a " + self.regression_model + " model")
         start_time = time.time()
         self.grid_search.fit(X_train.values, y_train.values)
         print("Best value for lambda : ", self.grid_search.best_params_)

From 40adc0fff5baa1d8b865dd3378ca0a867eeb6c45 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 11:12:28 +0100
Subject: [PATCH 019/111] Some cleanup

---
 src/emhass/machine_learning_regressor.py | 220 +++++++++++++----------
 1 file changed, 125 insertions(+), 95 deletions(-)

diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index 9e7795d0..95f624b3 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -1,70 +1,72 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
+"""Machine learning regressor module."""
+
+from __future__ import annotations
 
 import copy
-import logging
 import time
-from typing import Optional
 import warnings
+from typing import TYPE_CHECKING
 
-import pandas as pd
 import numpy as np
+import pandas as pd
 from sklearn.ensemble import (
     AdaBoostRegressor,
     GradientBoostingRegressor,
     RandomForestRegressor,
 )
-from sklearn.metrics import r2_score
-
 from sklearn.linear_model import Lasso, LinearRegression, Ridge
+from sklearn.metrics import r2_score
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 
+if TYPE_CHECKING:
+    import logging
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 REGRESSION_METHODS = {
-            "LinearRegression": {
-                "model": LinearRegression(),
-                "param_grid": {
-                    "linearregression__fit_intercept": [True, False],
-                    "linearregression__positive": [True, False],
-                },
-            },
-            "RidgeRegression": {
-                "model": Ridge(),
-                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
-            },
-            "LassoRegression": {
-                "model": Lasso(),
-                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
-            },
-            "RandomForestRegression": {
-                "model": RandomForestRegressor(),
-                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
-            },
-            "GradientBoostingRegression": {
-                "model": GradientBoostingRegressor(),
-                "param_grid": {
-                    "gradientboostingregressor__n_estimators": [50, 100, 200],
-                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-            "AdaBoostRegression": {
-                "model": AdaBoostRegressor(),
-                "param_grid": {
-                    "adaboostregressor__n_estimators": [50, 100, 200],
-                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-        }
+    "LinearRegression": {
+        "model": LinearRegression(),
+        "param_grid": {
+            "linearregression__fit_intercept": [True, False],
+            "linearregression__positive": [True, False],
+        },
+    },
+    "RidgeRegression": {
+        "model": Ridge(),
+        "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
+    },
+    "LassoRegression": {
+        "model": Lasso(),
+        "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
+    },
+    "RandomForestRegression": {
+        "model": RandomForestRegressor(),
+        "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
+    },
+    "GradientBoostingRegression": {
+        "model": GradientBoostingRegressor(),
+        "param_grid": {
+            "gradientboostingregressor__n_estimators": [50, 100, 200],
+            "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
+        },
+    },
+    "AdaBoostRegression": {
+        "model": AdaBoostRegressor(),
+        "param_grid": {
+            "adaboostregressor__n_estimators": [50, 100, 200],
+            "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
+        },
+    },
+}
+
 
 class MLRegressor:
-    r"""
-    A forecaster class using machine learning models.
+    r"""A forecaster class using machine learning models.
 
-    This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
+    This class uses the `sklearn` module and the machine learning models are \
+        from `scikit-learn`.
 
     It exposes two main methods:
 
@@ -74,9 +76,9 @@ class MLRegressor:
 
     """
 
-    def __init__(
-        self,
-        data,
+    def __init__(  # noqa: PLR0913
+        self: MLRegressor,
+        data: pd.DataFrame,
         model_type: str,
         regression_model: str,
         features: list,
@@ -92,8 +94,9 @@ def __init__(
             for what it will be used for.
         :type model_type: str
         :param regression_model: The model that will be used. For now only \
-            this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \
-            `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`.
+            this options are possible: `LinearRegression`, `RidgeRegression`, \
+            `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \
+            `GradientBoostingRegression` and `AdaBoostRegression`.
         :type regression_model: str
         :param features: A list of features. \
             Example: [`solar_production`, `degree_days`].
@@ -113,7 +116,7 @@ def __init__(
         self.model_type = model_type
         self.regression_model = regression_model
         self.logger = logger
-        self.data.sort_index(inplace=True)
+        self.data = self.data.sort_index()
         self.data = self.data[~self.data.index.duplicated(keep="first")]
         self.data_exo = None
         self.steps = None
@@ -122,9 +125,11 @@ def __init__(
 
     @staticmethod
     def add_date_features(
-        data: pd.DataFrame, date_features: list, timestamp: str
+        data: pd.DataFrame,
+        date_features: list,
+        timestamp: str,
     ) -> pd.DataFrame:
-        """Add date features from the input DataFrame timestamp
+        """Add date features from the input DataFrame timestamp.
 
         :param data: The input DataFrame
         :type data: pd.DataFrame
@@ -133,7 +138,7 @@ def add_date_features(
         :return: The DataFrame with the added features
         :rtype: pd.DataFrame
         """
-        df = copy.deepcopy(data)
+        df = copy.deepcopy(data)  # noqa: PD901
         df[timestamp] = pd.to_datetime(df["timestamp"])
         if "year" in date_features:
             df["year"] = [i.year for i in df["timestamp"]]
@@ -150,14 +155,54 @@ def add_date_features(
 
         return df
 
-    def fit(self, date_features: Optional[list] = None) -> None:
+    def get_regression_model(self: MLRegressor) -> tuple[str, str]:
+        """Get the base model and parameter grid for the specified regression model.
+
+        Returns a tuple containing the base model and parameter grid corresponding to \
+            the specified regression model.
+
+        Args:
+        ----
+            self: The instance of the MLRegressor class.
+
+        Returns:
+        -------
+            A tuple containing the base model and parameter grid.
+
         """
-        Fit the model using the provided data.
+        if self.regression_model == "LinearRegression":
+            base_model = REGRESSION_METHODS["LinearRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
+        elif self.regression_model == "RidgeRegression":
+            base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
+        elif self.regression_model == "LassoRegression":
+            base_model = REGRESSION_METHODS["LassoRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
+        elif self.regression_model == "RandomForestRegression":
+            base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
+        elif self.regression_model == "GradientBoostingRegression":
+            base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
+            param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
+        elif self.regression_model == "AdaBoostRegression":
+            base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
+            param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
+        else:
+            self.logger.error(
+                "Passed sklearn model %s is not valid",
+                self.regression_model,
+            )
+        return base_model, param_grid
 
-        :param date_features: A list of 'date_features' to take into account when fitting the model.
+    def fit(self: MLRegressor, date_features: list | None = None) -> None:
+        """Fit the model using the provided data.
+
+        :param date_features: A list of 'date_features' to take into account when \
+            fitting the model.
         :type data: list
         """
-        self.logger.info("Performing a csv model fit for " + self.model_type)
+        self.logger.info("Performing a MLRegressor fit for %s", self.model_type)
         self.data_exo = pd.DataFrame(self.data)
         self.data_exo[self.features] = self.data[self.features]
         self.data_exo[self.target] = self.data[self.target]
@@ -167,50 +212,36 @@ def fit(self, date_features: Optional[list] = None) -> None:
             keep_columns.append(self.timestamp)
         keep_columns.append(self.target)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
-        self.data_exo.reset_index(drop=True, inplace=True)
+        self.data_exo = self.data_exo.reset_index(drop=True)
         if date_features is not None:
             if self.timestamp is not None:
                 self.data_exo = MLRegressor.add_date_features(
-                    self.data_exo, date_features, self.timestamp
+                    self.data_exo,
+                    date_features,
+                    self.timestamp,
                 )
             else:
                 self.logger.error(
-                    "If no timestamp provided, you can't use date_features, going further without date_features."
+                    "If no timestamp provided, you can't use date_features, going \
+                    further without date_features.",
                 )
 
         y = self.data_exo[self.target]
         self.data_exo = self.data_exo.drop(self.target, axis=1)
         if self.timestamp is not None:
             self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
-        X = self.data_exo
+        X = self.data_exo  # noqa: N806
 
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.2, random_state=42
+        X_train, X_test, y_train, y_test = train_test_split(  # noqa: N806
+            X,
+            y,
+            test_size=0.2,
+            random_state=42,
         )
+
         self.steps = len(X_test)
 
-        if self.regression_model == "LinearRegression":
-            base_model = REGRESSION_METHODS["LinearRegression"]["model"]
-            param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
-        elif self.regression_model == "RidgeRegression":
-            base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
-            param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
-        elif self.regression_model == "LassoRegression":
-            base_model = REGRESSION_METHODS["LassoRegression"]["model"]
-            param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
-        elif self.regression_model == "RandomForestRegression":
-            base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
-            param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
-        elif self.regression_model == "GradientBoostingRegression":
-            base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
-            param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
-        elif self.regression_model == "AdaBoostRegression":
-            base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
-            param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
-        else:
-            self.logger.error(
-                "Passed sklearn model " + self.regression_model + " is not valid"
-            )
+        base_model, param_grid = self.get_regression_model()
 
         self.model = make_pipeline(StandardScaler(), base_model)
 
@@ -226,12 +257,10 @@ def fit(self, date_features: Optional[list] = None) -> None:
         )
 
         # Fit the grid search object to the data
-        self.logger.info("Training a " + self.regression_model + " model")
+        self.logger.info("Training a %s model", self.regression_model)
         start_time = time.time()
         self.grid_search.fit(X_train.values, y_train.values)
-        print("Best value for lambda : ", self.grid_search.best_params_)
-        print("Best score for cost function: ", self.grid_search.best_score_)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        self.logger.info("Elapsed time for model fit: %s", time.time() - start_time)
 
         self.model = self.grid_search.best_estimator_
 
@@ -240,20 +269,21 @@ def fit(self, date_features: Optional[list] = None) -> None:
         predictions = pd.Series(predictions, index=X_test.index)
         pred_metric = r2_score(y_test, predictions)
         self.logger.info(
-            f"Prediction R2 score of fitted model on test data: {pred_metric}"
+            "Prediction R2 score of fitted model on test data: %s",
+            pred_metric,
         )
 
-    def predict(self, new_values: list) -> np.ndarray:
-        r"""The predict method to generate a forecast from a csv file.
-
+    def predict(self: MLRegressor, new_values: list) -> np.ndarray:
+        """Predict a new value.
 
-        :param new_values: The new values for the features(in the same order as the features list). \
+        :param new_values: The new values for the features \
+            (in the same order as the features list). \
             Example: [2.24, 5.68].
         :type new_values: list
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """
-        self.logger.info("Performing a prediction for " + self.model_type)
+        self.logger.info("Performing a prediction for %s", self.model_type)
         new_values = np.array([new_values])
 
         return self.model.predict(new_values)

From bbfbc3ab2922f4f1d4958a86daafc43ea45b1651 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:24:21 +0100
Subject: [PATCH 020/111] Add csv-prediction

---
 src/emhass/command_line.py  |   1 -
 src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++
 2 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 1706d34c..e6940518 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -16,7 +16,6 @@
 
 from distutils.util import strtobool
 
-
 from emhass.retrieve_hass import RetrieveHass
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
new file mode 100644
index 00000000..a1c5576b
--- /dev/null
+++ b/src/emhass/csv_predictor.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import logging
+import copy
+import pathlib
+import time
+from typing import Optional
+# from typing import Optional, Tuple
+import pandas as pd
+import numpy as np
+
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import ElasticNet
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsRegressor
+# from sklearn.metrics import r2_score
+
+# from skforecast.ForecasterAutoreg import ForecasterAutoreg
+# from skforecast.model_selection import bayesian_search_forecaster
+# from skforecast.model_selection import backtesting_forecaster
+
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+class CsvPredictor:
+    r"""
+    A forecaster class using machine learning models.
+    
+    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    
+    It exposes one main method:
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
+    
+    """
+
+    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+                  logger: logging.Logger) -> None:
+        r"""Define constructor for the forecast class.
+
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
+        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
+            Example: `sensor.power_load_no_var_loads`.
+        :type var_model: str
+        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
+            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
+        :type sklearn_model: str
+        :param num_lags: The number of auto-regression lags to consider. A good starting point \
+            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
+            to 48, if the time step is 1 hour the fix this to 24 and so on.
+        :type num_lags: int
+        :param root: The parent folder of the path where the config.yaml file is located
+        :type root: str
+        :param logger: The passed logger object
+        :type logger: logging.Logger
+        """
+        self.data = data
+        self.model_type = model_type
+        self.csv_file = csv_file
+        self.independent_variables = independent_variables
+        self.dependent_variable = dependent_variable
+        self.sklearn_model = sklearn_model
+        self.new_values = new_values
+        self.root = root
+        self.logger = logger
+        self.is_tuned = False
+
+    
+    def load_data(self):
+        filename_path = pathlib.Path(self.root) / self.csv_file
+        if filename_path.is_file():
+            with open(filename_path, 'rb') as inp:
+                data = pd.read_csv(filename_path)
+        else:
+            self.logger.error("The cvs file was not found.")
+            return
+
+        required_columns = self.independent_variables
+        
+        if not set(required_columns).issubset(data.columns):
+            raise ValueError(
+                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+            )
+        return data
+    
+    def prepare_data(self, data):
+        X = data[self.independent_variables].values
+        y = data[self.dependent_variable].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        
+        return X_train, y_train
+    
+    
+    def predict(self, perform_backtest: Optional[bool] = False
+            ) -> pd.Series:
+        r"""The fit method to train the ML model.
+
+        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
+            as the test period to evaluate the model, defaults to '48h'
+        :type split_date_delta: Optional[str], optional
+        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
+            the performance of the model on the complete train set, defaults to False
+        :type perform_backtest: Optional[bool], optional
+        :return: The DataFrame containing the forecast data results without and with backtest
+        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        """
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        # Preparing the data: adding exogenous features
+        data = self.load_data()
+        X, y = self.prepare_data(data)
+        
+        if self.sklearn_model == 'LinearRegression':
+            base_model = LinearRegression()
+        elif self.sklearn_model == 'ElasticNet':
+            base_model = ElasticNet()
+        elif self.sklearn_model == 'KNeighborsRegressor':
+            base_model = KNeighborsRegressor()
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+        # Define the forecaster object
+        self.forecaster = base_model
+        # Fit and time it
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.forecaster.fit(X, y)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        new_values = np.array([self.new_values])
+        prediction = self.forecaster.predict(new_values)
+        
+        return prediction
+    
+    
+    
+    
\ No newline at end of file

From b5c2b95e6d1a47b6fc5440db787f62a90f6cb7a0 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:45:02 +0100
Subject: [PATCH 021/111] Use gridsearchcv and split up fit and predict

---
 src/emhass/csv_predictor.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index a1c5576b..4e4ca37e 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+import copy
+from datetime import datetime
 import logging
 import copy
 import pathlib
@@ -9,6 +11,7 @@
 # from typing import Optional, Tuple
 import pandas as pd
 import numpy as np
+from sklearn.metrics import classification_report, r2_score
 
 from sklearn.linear_model import LinearRegression
 from sklearn.linear_model import ElasticNet
@@ -64,11 +67,16 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
-        self.sklearn_model = sklearn_model
-        self.new_values = new_values
-        self.root = root
+        self.timestamp = timestamp
+        self.model_type = model_type
         self.logger = logger
         self.is_tuned = False
+        self.data.sort_index(inplace=True)
+        self.data = self.data[~self.data.index.duplicated(keep='first')]
+    
+    @staticmethod
+    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+        """Add date features from the input DataFrame timestamp
 
     
     def load_data(self):

From 9eaf4883ee77fa28ae149de5793eaa6a2914b60f Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 15:48:12 +0100
Subject: [PATCH 022/111] gitignore fun

---
 .vscode/launch.json | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 10313c97..b953c7d3 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -2,7 +2,7 @@
   "configurations": [
     {
       "name": "Python: Current File",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
       "program": "${file}",
       "console": "integratedTerminal",
@@ -10,10 +10,11 @@
     },
     {
       "name": "EMHASS run",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
-      "module": "emhass.web_server",
+      "program": "web_server.py",
       "console": "integratedTerminal",
+      "cwd": "${workspaceFolder}/src/emhass/",
       "purpose":["debug-in-terminal"],
       "justMyCode": true,
       "env": {
@@ -21,15 +22,15 @@
         "OPTIONS_PATH": "/workspaces/emhass/options.json",
         "SECRETS_PATH": "/workspaces/emhass/secrets_emhass.yaml",
         "DATA_PATH": "/workspaces/emhass/data/",
-        "LOGGING_LEVEL": "DEBUG"
       }
     },
     {
       "name": "EMHASS run ADDON",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
-      "module": "emhass.web_server",
+      "program": "web_server.py",
       "console": "integratedTerminal",
+      "cwd": "${workspaceFolder}/src/emhass/",
       "args": ["--addon", "true",  "--no_response", "true"],
       "purpose":["debug-in-terminal"],
       "justMyCode": true,
@@ -44,7 +45,6 @@
         "LAT": "45.83", //optional change
         "LON": "6.86", //optional change
         "ALT": "4807.8", //optional change
-        "LOGGING_LEVEL": "DEBUG" //optional change
       },
       
   }

From 8f0cab3914dfb9f2c1117216ce454d276ea26f24 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 11:39:39 +0100
Subject: [PATCH 023/111] python -> debugpy

---
 .vscode/launch.json | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index b953c7d3..ec6c6987 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -2,7 +2,7 @@
   "configurations": [
     {
       "name": "Python: Current File",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "${file}",
       "console": "integratedTerminal",
@@ -10,12 +10,14 @@
     },
     {
       "name": "EMHASS run",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "web_server.py",
       "console": "integratedTerminal",
       "cwd": "${workspaceFolder}/src/emhass/",
-      "purpose":["debug-in-terminal"],
+      "purpose": [
+        "debug-in-terminal"
+      ],
       "justMyCode": true,
       "env": {
         "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml",
@@ -26,13 +28,20 @@
     },
     {
       "name": "EMHASS run ADDON",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "web_server.py",
       "console": "integratedTerminal",
       "cwd": "${workspaceFolder}/src/emhass/",
-      "args": ["--addon", "true",  "--no_response", "true"],
-      "purpose":["debug-in-terminal"],
+      "args": [
+        "--addon",
+        "true",
+        "--no_response",
+        "true"
+      ],
+      "purpose": [
+        "debug-in-terminal"
+      ],
       "justMyCode": true,
       "env": {
         "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml",
@@ -46,7 +55,6 @@
         "LON": "6.86", //optional change
         "ALT": "4807.8", //optional change
       },
-      
-  }
+    }
   ]
 }
\ No newline at end of file

From c27ea5cae6d1e9dbe74047dd1add150188c13529 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 12:38:07 +0100
Subject: [PATCH 024/111] launch.json

---
 .vscode/launch.json | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index ec6c6987..f0ceae3a 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -12,9 +12,8 @@
       "name": "EMHASS run",
       "type": "debugpy",
       "request": "launch",
-      "program": "web_server.py",
+      "module": "emhass.web_server",
       "console": "integratedTerminal",
-      "cwd": "${workspaceFolder}/src/emhass/",
       "purpose": [
         "debug-in-terminal"
       ],
@@ -30,9 +29,8 @@
       "name": "EMHASS run ADDON",
       "type": "debugpy",
       "request": "launch",
-      "program": "web_server.py",
+      "module": "emhass.web_server",
       "console": "integratedTerminal",
-      "cwd": "${workspaceFolder}/src/emhass/",
       "args": [
         "--addon",
         "true",

From e1543803baff1a57e3ae3ab32bb3c4c9b6cf1a2e Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 12:40:45 +0100
Subject: [PATCH 025/111] delete csv-predictor

---
 src/emhass/csv_predictor.py | 147 ------------------------------------
 1 file changed, 147 deletions(-)
 delete mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
deleted file mode 100644
index 4e4ca37e..00000000
--- a/src/emhass/csv_predictor.py
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import copy
-from datetime import datetime
-import logging
-import copy
-import pathlib
-import time
-from typing import Optional
-# from typing import Optional, Tuple
-import pandas as pd
-import numpy as np
-from sklearn.metrics import classification_report, r2_score
-
-from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import ElasticNet
-from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsRegressor
-# from sklearn.metrics import r2_score
-
-# from skforecast.ForecasterAutoreg import ForecasterAutoreg
-# from skforecast.model_selection import bayesian_search_forecaster
-# from skforecast.model_selection import backtesting_forecaster
-
-import warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning) 
-
-class CsvPredictor:
-    r"""
-    A forecaster class using machine learning models.
-    
-    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
-    
-    It exposes one main method:
-    
-    - `predict`: to obtain a forecast from a pre-trained model.
-    
-    """
-
-    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-                  logger: logging.Logger) -> None:
-        r"""Define constructor for the forecast class.
-
-        :param data: The data that will be used for train/test
-        :type data: pd.DataFrame
-        :param model_type: A unique name defining this model and useful to identify \
-            for what it will be used for.
-        :type model_type: str
-        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
-            Example: `sensor.power_load_no_var_loads`.
-        :type var_model: str
-        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
-            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
-        :type sklearn_model: str
-        :param num_lags: The number of auto-regression lags to consider. A good starting point \
-            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
-            to 48, if the time step is 1 hour the fix this to 24 and so on.
-        :type num_lags: int
-        :param root: The parent folder of the path where the config.yaml file is located
-        :type root: str
-        :param logger: The passed logger object
-        :type logger: logging.Logger
-        """
-        self.data = data
-        self.model_type = model_type
-        self.csv_file = csv_file
-        self.independent_variables = independent_variables
-        self.dependent_variable = dependent_variable
-        self.timestamp = timestamp
-        self.model_type = model_type
-        self.logger = logger
-        self.is_tuned = False
-        self.data.sort_index(inplace=True)
-        self.data = self.data[~self.data.index.duplicated(keep='first')]
-    
-    @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
-        """Add date features from the input DataFrame timestamp
-
-    
-    def load_data(self):
-        filename_path = pathlib.Path(self.root) / self.csv_file
-        if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(filename_path)
-        else:
-            self.logger.error("The cvs file was not found.")
-            return
-
-        required_columns = self.independent_variables
-        
-        if not set(required_columns).issubset(data.columns):
-            raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}"
-            )
-        return data
-    
-    def prepare_data(self, data):
-        X = data[self.independent_variables].values
-        y = data[self.dependent_variable].values
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        
-        return X_train, y_train
-    
-    
-    def predict(self, perform_backtest: Optional[bool] = False
-            ) -> pd.Series:
-        r"""The fit method to train the ML model.
-
-        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
-            as the test period to evaluate the model, defaults to '48h'
-        :type split_date_delta: Optional[str], optional
-        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
-            the performance of the model on the complete train set, defaults to False
-        :type perform_backtest: Optional[bool], optional
-        :return: The DataFrame containing the forecast data results without and with backtest
-        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
-        """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
-        # Preparing the data: adding exogenous features
-        data = self.load_data()
-        X, y = self.prepare_data(data)
-        
-        if self.sklearn_model == 'LinearRegression':
-            base_model = LinearRegression()
-        elif self.sklearn_model == 'ElasticNet':
-            base_model = ElasticNet()
-        elif self.sklearn_model == 'KNeighborsRegressor':
-            base_model = KNeighborsRegressor()
-        else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-        # Define the forecaster object
-        self.forecaster = base_model
-        # Fit and time it
-        self.logger.info("Training a "+self.sklearn_model+" model")
-        start_time = time.time()
-        self.forecaster.fit(X, y)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-        new_values = np.array([self.new_values])
-        prediction = self.forecaster.predict(new_values)
-        
-        return prediction
-    
-    
-    
-    
\ No newline at end of file

From e946d18667abf39accc9394bbe44768fd9746e1b Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 18 Mar 2024 09:33:20 +0100
Subject: [PATCH 026/111] remove KNeighborsRegressor

---
 src/emhass/machine_learning_regressor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index 95f624b3..732b4266 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -95,7 +95,7 @@ def __init__(  # noqa: PLR0913
         :type model_type: str
         :param regression_model: The model that will be used. For now only \
             this options are possible: `LinearRegression`, `RidgeRegression`, \
-            `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \
+            `LassoRegression`, `RandomForestRegression`, \
             `GradientBoostingRegression` and `AdaBoostRegression`.
         :type regression_model: str
         :param features: A list of features. \

From 06920aad01e43ddd5c0329b43758074ae709a562 Mon Sep 17 00:00:00 2001
From: gieljnssns <gieljnssns@me.com>
Date: Tue, 19 Mar 2024 04:40:02 +0100
Subject: [PATCH 027/111] first documentation for mlregressor

---
 docs/index.md       |  4 +-
 docs/mlregressor.md | 91 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 2 deletions(-)
 create mode 100644 docs/mlregressor.md

diff --git a/docs/index.md b/docs/index.md
index cf015a3f..cc9f33a8 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -6,6 +6,7 @@
 # EMHASS: Energy Management for Home Assistant
 
 ```{image} images/emhass_logo.png
+
 ```
 
 Welcome to the documentation of EMHASS. With this package written in Python you will be able to implement a real Energy Management System for your household. This software was designed to be easy configurable and with a fast integration with Home Assistant: <https://www.home-assistant.io/>
@@ -21,6 +22,7 @@ differences.md
 lpems.md
 forecasts.md
 mlforecaster.md
+mlregressor.md
 study_case.md
 config.md
 emhass.md
@@ -32,5 +34,3 @@ develop.md
 - {ref}`genindex`
 - {ref}`modindex`
 - {ref}`search`
-
-
diff --git a/docs/mlregressor.md b/docs/mlregressor.md
new file mode 100644
index 00000000..7206af99
--- /dev/null
+++ b/docs/mlregressor.md
@@ -0,0 +1,91 @@
+# The machine learning regressor
+
+Starting with v0.9.0, a new framework is proposed within EMHASS. It provides a machine learning module to predict values from a csv file using different regression models.
+
+This API provides two main methods:
+
+- fit: To train a model with the passed data. This method is exposed with the `regressor-model-fit` end point.
+
+- predict: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point.
+
+## A basic model fit
+
+To train a model use the `regressor-model-fit` end point.
+
+Some paramters can be optionally defined at runtime:
+
+- `csv_file`: The name of the csv file containing your data.
+
+- `features`: A list of features, you can provide new values for this.
+
+- `target`: The target, the value that has to be predicted.
+
+- `model_type`: Define the name of the model regressor that this will be used for. For example: `heating_hours_degreeday`. This should be an unique name if you are using multiple custom regressor models.
+
+- `regression_model`: The regression model that will be used. For now only this options are possible: `LinearRegression`, `RidgeRegression`, `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`.
+
+- `timestamp`: If defined, the column key that has to be used for timestamp.
+
+- `date_features`: A list of 'date_features' to take into account when fitting the model. Possibilities are `year`, `month`, `day_of_week` (monday=0, sunday=6), `day_of_year`, `day`(day_of_month) and `hour`
+
+```
+runtimeparams = {
+    "csv_file": "heating_prediction.csv",
+    "features":["degreeday", "solar"],
+    "target": "heating_hours",
+    "regression_model": "RandomForestRegression",
+    "model_type": "heating_hours_degreeday",
+    "timestamp": "timestamp",
+    "date_features": ["month", "day_of_week"]
+    }
+```
+
+A correct `curl` call to launch a model fit can look like this:
+
+```
+curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-fit
+```
+
+After applying the `curl` command to fit the model the following information is logged by EMHASS:
+
+    2023-02-20 22:05:22,658 - __main__ - INFO - Training a LinearRegression model
+    2023-02-20 22:05:23,882 - __main__ - INFO - Elapsed time: 1.2236599922180176
+    2023-02-20 22:05:24,612 - __main__ - INFO - Prediction R2 score: 0.2654560762747957
+
+## The predict method
+
+To obtain a prediction using a previously trained model use the `regressor-model-predict` end point.
+
+```
+curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-predict
+```
+
+If needed pass the correct `model_type` like this:
+
+```
+curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "load_forecast"}' http://localhost:5000/action/regressor-model-predict
+```
+
+It is possible to publish the predict method results to a Home Assistant sensor.
+
+The list of parameters needed to set the data publish task is:
+
+- `mlr_predict_entity_id`: The unique `entity_id` to be used.
+
+- `mlr_predict_unit_of_measurement`: The `unit_of_measurement` to be used.
+
+- `mlr_predict_friendly_name`: The `friendly_name` to be used.
+
+- `new_values`: The new values for the features (in the same order as the features list). Also when using date_features, add these to the new values.
+
+- `model_type`: The model type that has to be predicted
+
+```
+runtimeparams = {
+    "mlr_predict_entity_id": "sensor.mlr_predict",
+    "mlr_predict_unit_of_measurement": None,
+    "mlr_predict_friendly_name": "mlr predictor",
+    "new_values": [8.2, 7.23, 2, 6],
+    "model_type": "heating_hours_degreeday"
+}
+```

From a5be2b8df7a0336849f8e6a85619d61d55fdc5d5 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:13:47 +0100
Subject: [PATCH 028/111] add /app to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 5dc21af8..581080c8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@ secrets_emhass.yaml
 *.html
 *.pkl
 data/actionLogs.txt
+**/app
 
 
 # Byte-compiled / optimized / DLL files

From 7c712038c13d06a83e97a0dffab0ed81913ea747 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:24:21 +0100
Subject: [PATCH 029/111] Add csv-prediction

---
 src/emhass/command_line.py  |  46 ++++++++++++
 src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++
 src/emhass/retrieve_hass.py |  10 +++
 src/emhass/utils.py         |  25 +++++++
 src/emhass/web_server.py    |   6 ++
 5 files changed, 226 insertions(+)
 create mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 605c19e3..0eb69e4a 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -20,6 +20,7 @@
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
 from emhass.optimization import Optimization
+from emhass.csv_predictor import CsvPredictor
 from emhass import utils
 
 
@@ -153,6 +154,12 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
             if not rh.get_data(days_list, var_list):
                 return False
             df_input_data = rh.df_final.copy()
+    elif set_type == "csv-predict":
+        df_input_data, df_input_data_dayahead = None, None
+        P_PV_forecast, P_load_forecast = None, None
+        days_list = None
+        params = json.loads(params)
+       
     elif set_type == "publish-data":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
@@ -435,6 +442,45 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
     return df_pred_optim, mlf
 
+def csv_predict(input_data_dict: dict, logger: logging.Logger,
+    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
+    """Perform a forecast model fit from training data retrieved from Home Assistant.
+
+    :param input_data_dict: A dictionnary with multiple data used by the action functions
+    :type input_data_dict: dict
+    :param logger: The passed logger object
+    :type logger: logging.Logger
+    :param debug: True to debug, useful for unit testing, defaults to False
+    :type debug: Optional[bool], optional
+    :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object
+    :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]
+    """
+    data = copy.deepcopy(input_data_dict['df_input_data'])
+    model_type = input_data_dict['params']['passed_data']['model_type']
+    csv_file = input_data_dict['params']['passed_data']['csv_file']
+    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
+    perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
+    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
+    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
+    new_values = input_data_dict['params']['passed_data']['new_values']
+    root = input_data_dict['root']
+    # The ML forecaster object
+    csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    # Fit the ML model
+    prediction = csv.predict(perform_backtest=perform_backtest)
+
+    csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
+    csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
+    csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name']
+    # Publish Load forecast
+    idx = 0
+    input_data_dict['rh'].post_data(prediction, idx,
+                                    csv_predict_entity_id,
+                                    csv_predict_unit_of_measurement, 
+                                    csv_predict_friendly_name,
+                                    type_var = 'csv_predictor')
+    return prediction
+
 def publish_data(input_data_dict: dict, logger: logging.Logger,
     save_data_to_file: Optional[bool] = False, 
     opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame:
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
new file mode 100644
index 00000000..a1c5576b
--- /dev/null
+++ b/src/emhass/csv_predictor.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import logging
+import copy
+import pathlib
+import time
+from typing import Optional
+# from typing import Optional, Tuple
+import pandas as pd
+import numpy as np
+
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import ElasticNet
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsRegressor
+# from sklearn.metrics import r2_score
+
+# from skforecast.ForecasterAutoreg import ForecasterAutoreg
+# from skforecast.model_selection import bayesian_search_forecaster
+# from skforecast.model_selection import backtesting_forecaster
+
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+class CsvPredictor:
+    r"""
+    A forecaster class using machine learning models.
+    
+    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    
+    It exposes one main method:
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
+    
+    """
+
+    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+                  logger: logging.Logger) -> None:
+        r"""Define constructor for the forecast class.
+
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
+        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
+            Example: `sensor.power_load_no_var_loads`.
+        :type var_model: str
+        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
+            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
+        :type sklearn_model: str
+        :param num_lags: The number of auto-regression lags to consider. A good starting point \
+            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
+            to 48, if the time step is 1 hour the fix this to 24 and so on.
+        :type num_lags: int
+        :param root: The parent folder of the path where the config.yaml file is located
+        :type root: str
+        :param logger: The passed logger object
+        :type logger: logging.Logger
+        """
+        self.data = data
+        self.model_type = model_type
+        self.csv_file = csv_file
+        self.independent_variables = independent_variables
+        self.dependent_variable = dependent_variable
+        self.sklearn_model = sklearn_model
+        self.new_values = new_values
+        self.root = root
+        self.logger = logger
+        self.is_tuned = False
+
+    
+    def load_data(self):
+        filename_path = pathlib.Path(self.root) / self.csv_file
+        if filename_path.is_file():
+            with open(filename_path, 'rb') as inp:
+                data = pd.read_csv(filename_path)
+        else:
+            self.logger.error("The cvs file was not found.")
+            return
+
+        required_columns = self.independent_variables
+        
+        if not set(required_columns).issubset(data.columns):
+            raise ValueError(
+                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+            )
+        return data
+    
+    def prepare_data(self, data):
+        X = data[self.independent_variables].values
+        y = data[self.dependent_variable].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        
+        return X_train, y_train
+    
+    
+    def predict(self, perform_backtest: Optional[bool] = False
+            ) -> pd.Series:
+        r"""The fit method to train the ML model.
+
+        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
+            as the test period to evaluate the model, defaults to '48h'
+        :type split_date_delta: Optional[str], optional
+        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
+            the performance of the model on the complete train set, defaults to False
+        :type perform_backtest: Optional[bool], optional
+        :return: The DataFrame containing the forecast data results without and with backtest
+        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        """
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        # Preparing the data: adding exogenous features
+        data = self.load_data()
+        X, y = self.prepare_data(data)
+        
+        if self.sklearn_model == 'LinearRegression':
+            base_model = LinearRegression()
+        elif self.sklearn_model == 'ElasticNet':
+            base_model = ElasticNet()
+        elif self.sklearn_model == 'KNeighborsRegressor':
+            base_model = KNeighborsRegressor()
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+        # Define the forecaster object
+        self.forecaster = base_model
+        # Fit and time it
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.forecaster.fit(X, y)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        new_values = np.array([self.new_values])
+        prediction = self.forecaster.predict(new_values)
+        
+        return prediction
+    
+    
+    
+    
\ No newline at end of file
diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py
index f3f0649a..ca20ce40 100644
--- a/src/emhass/retrieve_hass.py
+++ b/src/emhass/retrieve_hass.py
@@ -303,6 +303,8 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str,
             state = np.round(data_df.loc[data_df.index[idx]],4)
         elif type_var == 'optim_status':
             state = data_df.loc[data_df.index[idx]]
+        elif type_var == 'csv_predictor':
+            state = data_df[idx]
         else:
             state = np.round(data_df.loc[data_df.index[idx]],2)
         if type_var == 'power':
@@ -334,6 +336,14 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str,
                     "friendly_name": friendly_name
                 }
             }
+        elif type_var == 'csv_predictor':
+            data = {
+                "state": state,
+                "attributes": {
+                    "unit_of_measurement": unit_of_measurement,
+                    "friendly_name": friendly_name
+                }
+            }
         else:
             data = {
                 "state": "{:.2f}".format(state),
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 61acab3b..a5d3002c 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -155,6 +155,16 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         freq = int(retrieve_hass_conf['freq'].seconds/60.0)
         delta_forecast = int(optim_conf['delta_forecast'].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
+        if set_type == "csv-predict":
+            csv_file = runtimeparams['csv_file']
+            independent_variables = runtimeparams['independent_variables']
+            dependent_variable = runtimeparams['dependent_variable']
+            new_values = runtimeparams['new_values']
+            params['passed_data']['csv_file'] = csv_file
+            params['passed_data']['independent_variables'] = independent_variables
+            params['passed_data']['dependent_variable'] = dependent_variable
+            params['passed_data']['new_values'] = new_values
+
         # Treating special data passed for MPC control case
         if set_type == 'naive-mpc-optim':
             if 'prediction_horizon' not in runtimeparams.keys():
@@ -281,6 +291,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         else:
             model_predict_friendly_name = runtimeparams['model_predict_friendly_name']
         params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name
+        if 'csv_predict_entity_id' not in runtimeparams.keys():
+            csv_predict_entity_id = "sensor.csv_predictor"
+        else:
+            csv_predict_entity_id = runtimeparams['csv_predict_entity_id']
+        params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id
+        if 'csv_predict_unit_of_measurement' not in runtimeparams.keys():
+            csv_predict_unit_of_measurement = None
+        else:
+            csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement']
+        params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement
+        if 'csv_predict_friendly_name' not in runtimeparams.keys():
+            csv_predict_friendly_name = "Csv predictor"
+        else:
+            csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name']
+        params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name
         # Treat optimization configuration parameters passed at runtime 
         if 'num_def_loads' in runtimeparams.keys():
             optim_conf['num_def_loads'] = runtimeparams['num_def_loads']
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index 989298d4..886f9304 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -13,6 +13,7 @@
 from emhass.command_line import set_input_data_dict
 from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim
 from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune
+from emhass.command_line import csv_predict
 from emhass.command_line import publish_data
 from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \
     get_injection_dict_forecast_model_tune, build_params
@@ -193,6 +194,11 @@ def action_call(action_name):
         if not checkFileLog(ActionStr):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
+    elif action_name == 'csv-predict':
+        app.logger.info(" >> Performing a csv predict...")
+        csv_predict(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action csv-predict executed... \n'
+        return make_response(msg, 201)
     else:
         app.logger.error("ERROR: passed action is not valid")
         msg = f'EMHASS >> ERROR: Passed action is not valid... \n'

From 9b7472a7bb3033f6404ad4623dcd28becc62bae1 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 9 Jan 2024 21:11:13 +0100
Subject: [PATCH 030/111] cleanup

---
 src/emhass/command_line.py  | 12 ++++++----
 src/emhass/csv_predictor.py | 48 ++++++++++++++++++++++++++-----------
 2 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 0eb69e4a..528efcfb 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -455,19 +455,21 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger,
     :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object
     :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]
     """
-    data = copy.deepcopy(input_data_dict['df_input_data'])
-    model_type = input_data_dict['params']['passed_data']['model_type']
+    # data = copy.deepcopy(input_data_dict['df_input_data'])
+    # model_type = input_data_dict['params']['passed_data']['model_type']
     csv_file = input_data_dict['params']['passed_data']['csv_file']
     sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
+    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     new_values = input_data_dict['params']['passed_data']['new_values']
     root = input_data_dict['root']
     # The ML forecaster object
-    csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
     # Fit the ML model
-    prediction = csv.predict(perform_backtest=perform_backtest)
+    prediction = csv.predict()
+    # prediction = csv.predict(perform_backtest=perform_backtest)
 
     csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
     csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index a1c5576b..9f012f8d 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -31,11 +31,13 @@ class CsvPredictor:
     
     It exposes one main method:
     
-    - `predict`: to obtain a forecast from a pre-trained model.
+    - `predict`: to obtain a forecast from a csv file.
     
     """
 
-    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    #               logger: logging.Logger) -> None:
+    def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
                   logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -44,23 +46,28 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe
         :param model_type: A unique name defining this model and useful to identify \
             for what it will be used for.
         :type model_type: str
-        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
-            Example: `sensor.power_load_no_var_loads`.
-        :type var_model: str
+        :param csv_file: The name of the csv file to retrieve data from. \
+            Example: `prediction.csv`.
+        :type csv_file: str
+        :param independent_variables: A list of independent variables. \
+            Example: [`solar`, `degree_days`].
+        :type independent_variables: list
+        :param dependent_variable: The dependent variable(to be predicted). \
+            Example: `hours`.
+        :type dependent_variable: str
         :param sklearn_model: The `scikit-learn` model that will be used. For now only \
             this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
         :type sklearn_model: str
-        :param num_lags: The number of auto-regression lags to consider. A good starting point \
-            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
-            to 48, if the time step is 1 hour the fix this to 24 and so on.
-        :type num_lags: int
+        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
+            Example: [2.24, 5.68].
+        :type new_values: list
         :param root: The parent folder of the path where the config.yaml file is located
         :type root: str
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        self.data = data
-        self.model_type = model_type
+        # self.data = data
+        # self.model_type = model_type
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
@@ -86,18 +93,30 @@ def load_data(self):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
+        print(type(data))
         return data
     
     def prepare_data(self, data):
+        """
+        Prepare the data.
+        
+        :param data: Input Data
+        :return: Input DataFrame with freq defined
+        :rtype: pd.DataFrame
+        
+        """
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        print(type(X_train))
+        print(type(y_train))
         
         return X_train, y_train
     
     
-    def predict(self, perform_backtest: Optional[bool] = False
-            ) -> pd.Series:
+    # def predict(self, perform_backtest: Optional[bool] = False
+    #         ) -> pd.Series:
+    def predict(self):
         r"""The fit method to train the ML model.
 
         :param split_date_delta: The delta from now to `split_date_delta` that will be used \
@@ -109,7 +128,7 @@ def predict(self, perform_backtest: Optional[bool] = False
         :return: The DataFrame containing the forecast data results without and with backtest
         :rtype: Tuple[pd.DataFrame, pd.DataFrame]
         """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.logger.info("Performing a prediction for "+self.csv_file)
         # Preparing the data: adding exogenous features
         data = self.load_data()
         X, y = self.prepare_data(data)
@@ -131,6 +150,7 @@ def predict(self, perform_backtest: Optional[bool] = False
         self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
         new_values = np.array([self.new_values])
         prediction = self.forecaster.predict(new_values)
+        print(type(prediction))
         
         return prediction
     

From b975b74b9b51ab20daf92054ca3ab5efe367f721 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Thu, 18 Jan 2024 10:46:38 +0100
Subject: [PATCH 031/111] more cleanup

---
 src/emhass/command_line.py  | 17 +++----
 src/emhass/csv_predictor.py | 92 ++++++++++++++-----------------------
 2 files changed, 40 insertions(+), 69 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 528efcfb..ea249f8d 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -443,8 +443,8 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
     return df_pred_optim, mlf
 
 def csv_predict(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
-    """Perform a forecast model fit from training data retrieved from Home Assistant.
+    debug: Optional[bool] = False) -> np.ndarray:
+    """Perform a prediction from csv file.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
@@ -452,29 +452,24 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger,
     :type logger: logging.Logger
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
-    :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object
-    :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]
+    :return: The np.ndarray containing the predicted value.
+    :rtype: np.ndarray
     """
-    # data = copy.deepcopy(input_data_dict['df_input_data'])
-    # model_type = input_data_dict['params']['passed_data']['model_type']
     csv_file = input_data_dict['params']['passed_data']['csv_file']
     sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     new_values = input_data_dict['params']['passed_data']['new_values']
     root = input_data_dict['root']
     # The ML forecaster object
-    # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
     csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
-    # Fit the ML model
+    # Predict from csv file
     prediction = csv.predict()
-    # prediction = csv.predict(perform_backtest=perform_backtest)
 
     csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
     csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
     csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name']
-    # Publish Load forecast
+    # Publish prediction
     idx = 0
     input_data_dict['rh'].post_data(prediction, idx,
                                     csv_predict_entity_id,
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 9f012f8d..9550c157 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -2,11 +2,9 @@
 # -*- coding: utf-8 -*-
 
 import logging
-import copy
 import pathlib
 import time
-from typing import Optional
-# from typing import Optional, Tuple
+from typing import Tuple
 import pandas as pd
 import numpy as np
 
@@ -14,11 +12,6 @@
 from sklearn.linear_model import ElasticNet
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsRegressor
-# from sklearn.metrics import r2_score
-
-# from skforecast.ForecasterAutoreg import ForecasterAutoreg
-# from skforecast.model_selection import bayesian_search_forecaster
-# from skforecast.model_selection import backtesting_forecaster
 
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning) 
@@ -34,18 +27,10 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a csv file.
     
     """
-
-    # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-    #               logger: logging.Logger) -> None:
     def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
                   logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
-        :param data: The data that will be used for train/test
-        :type data: pd.DataFrame
-        :param model_type: A unique name defining this model and useful to identify \
-            for what it will be used for.
-        :type model_type: str
         :param csv_file: The name of the csv file to retrieve data from. \
             Example: `prediction.csv`.
         :type csv_file: str
@@ -66,8 +51,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        # self.data = data
-        # self.model_type = model_type
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
@@ -78,14 +61,17 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         self.is_tuned = False
 
     
-    def load_data(self):
+    def load_data(self) -> pd.DataFrame:
+        """Load the data."""
         filename_path = pathlib.Path(self.root) / self.csv_file
         if filename_path.is_file():
             with open(filename_path, 'rb') as inp:
                 data = pd.read_csv(filename_path)
         else:
             self.logger.error("The cvs file was not found.")
-            return
+            raise ValueError(
+                f"The CSV file "+ self.csv_file +" was not found."
+            )
 
         required_columns = self.independent_variables
         
@@ -93,66 +79,56 @@ def load_data(self):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
-        print(type(data))
         return data
     
-    def prepare_data(self, data):
+    def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         """
         Prepare the data.
         
         :param data: Input Data
-        :return: Input DataFrame with freq defined
-        :rtype: pd.DataFrame
+        :type data: pd.DataFrame
+        :return: A tuple containing the train data.
+        :rtype: Tuple[np.ndarray, np.ndarray]
         
         """
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        print(type(X_train))
-        print(type(y_train))
         
         return X_train, y_train
     
     
-    # def predict(self, perform_backtest: Optional[bool] = False
-    #         ) -> pd.Series:
-    def predict(self):
-        r"""The fit method to train the ML model.
+    def predict(self) -> np.ndarray:
+        r"""The predict method to generate a forecast from a csv file.
 
-        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
-            as the test period to evaluate the model, defaults to '48h'
-        :type split_date_delta: Optional[str], optional
-        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
-            the performance of the model on the complete train set, defaults to False
-        :type perform_backtest: Optional[bool], optional
-        :return: The DataFrame containing the forecast data results without and with backtest
-        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        :return: The np.ndarray containing the predicted value.
+        :rtype: np.ndarray
         """
         self.logger.info("Performing a prediction for "+self.csv_file)
         # Preparing the data: adding exogenous features
         data = self.load_data()
-        X, y = self.prepare_data(data)
+        if data is not None:
+            X, y = self.prepare_data(data)
         
-        if self.sklearn_model == 'LinearRegression':
-            base_model = LinearRegression()
-        elif self.sklearn_model == 'ElasticNet':
-            base_model = ElasticNet()
-        elif self.sklearn_model == 'KNeighborsRegressor':
-            base_model = KNeighborsRegressor()
-        else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-        # Define the forecaster object
-        self.forecaster = base_model
-        # Fit and time it
-        self.logger.info("Training a "+self.sklearn_model+" model")
-        start_time = time.time()
-        self.forecaster.fit(X, y)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-        new_values = np.array([self.new_values])
-        prediction = self.forecaster.predict(new_values)
-        print(type(prediction))
+            if self.sklearn_model == 'LinearRegression':
+                base_model = LinearRegression()
+            elif self.sklearn_model == 'ElasticNet':
+                base_model = ElasticNet()
+            elif self.sklearn_model == 'KNeighborsRegressor':
+                base_model = KNeighborsRegressor()
+            else:
+                self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+            # Define the forecaster object
+            self.forecaster = base_model
+            # Fit and time it
+            self.logger.info("Predict through a "+self.sklearn_model+" model")
+            start_time = time.time()
+            self.forecaster.fit(X, y)
+            self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+            new_values = np.array([self.new_values])
+            prediction = self.forecaster.predict(new_values)
         
-        return prediction
+            return prediction
     
     
     

From 36ba25f1a5138f8ddc13917926c2e3e35738aa22 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 19 Jan 2024 11:34:33 +0100
Subject: [PATCH 032/111] filename_path -> inp

---
 src/emhass/csv_predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 9550c157..499903d0 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -66,7 +66,7 @@ def load_data(self) -> pd.DataFrame:
         filename_path = pathlib.Path(self.root) / self.csv_file
         if filename_path.is_file():
             with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(filename_path)
+                data = pd.read_csv(inp)
         else:
             self.logger.error("The cvs file was not found.")
             raise ValueError(

From 259ced37b9e6aa0725f57ff590e18d29e427b272 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 29 Jan 2024 11:24:45 +0100
Subject: [PATCH 033/111] resolve some comments

---
 src/emhass/csv_predictor.py | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 499903d0..1f478c01 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -5,6 +5,8 @@
 import pathlib
 import time
 from typing import Tuple
+import warnings
+
 import pandas as pd
 import numpy as np
 
@@ -13,14 +15,14 @@
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsRegressor
 
-import warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 class CsvPredictor:
     r"""
     A forecaster class using machine learning models.
     
-    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
     
     It exposes one main method:
     
@@ -28,11 +30,11 @@ class CsvPredictor:
     
     """
     def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-                  logger: logging.Logger) -> None:
+                logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
         :param csv_file: The name of the csv file to retrieve data from. \
-            Example: `prediction.csv`.
+            Example: `input_train_data.csv`.
         :type csv_file: str
         :param independent_variables: A list of independent variables. \
             Example: [`solar`, `degree_days`].
@@ -60,7 +62,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         self.logger = logger
         self.is_tuned = False
 
-    
     def load_data(self) -> pd.DataFrame:
         """Load the data."""
         filename_path = pathlib.Path(self.root) / self.csv_file
@@ -69,18 +70,16 @@ def load_data(self) -> pd.DataFrame:
                 data = pd.read_csv(inp)
         else:
             self.logger.error("The cvs file was not found.")
-            raise ValueError(
-                f"The CSV file "+ self.csv_file +" was not found."
-            )
+            raise ValueError("The CSV file " + self.csv_file + " was not found.")
 
         required_columns = self.independent_variables
-        
+
         if not set(required_columns).issubset(data.columns):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
         return data
-    
+
     def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         """
         Prepare the data.
@@ -94,10 +93,10 @@ def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        
+
         return X_train, y_train
-    
-    
+
+
     def predict(self) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
@@ -109,7 +108,7 @@ def predict(self) -> np.ndarray:
         data = self.load_data()
         if data is not None:
             X, y = self.prepare_data(data)
-        
+
             if self.sklearn_model == 'LinearRegression':
                 base_model = LinearRegression()
             elif self.sklearn_model == 'ElasticNet':
@@ -127,9 +126,5 @@ def predict(self) -> np.ndarray:
             self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
             new_values = np.array([self.new_values])
             prediction = self.forecaster.predict(new_values)
-        
+
             return prediction
-    
-    
-    
-    
\ No newline at end of file

From e200dc841452697b48d1e006395c64ddd2ed2913 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:45:02 +0100
Subject: [PATCH 034/111] Use gridsearchcv and split up fit and predict

---
 src/emhass/command_line.py  |  87 +++++++++++++++---
 src/emhass/csv_predictor.py | 173 +++++++++++++++++++++++-------------
 src/emhass/utils.py         |  16 +++-
 src/emhass/web_server.py    |  11 ++-
 4 files changed, 210 insertions(+), 77 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index ea249f8d..4dd795ae 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -154,7 +154,36 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
             if not rh.get_data(days_list, var_list):
                 return False
             df_input_data = rh.df_final.copy()
-    elif set_type == "csv-predict":
+ 
+    elif set_type == "csv-model-fit":
+        
+        df_input_data_dayahead = None
+        P_PV_forecast, P_load_forecast = None, None
+        params = json.loads(params)
+        days_list = None
+        csv_file = params['passed_data']['csv_file']
+        independent_variables = params['passed_data']['independent_variables']
+        dependent_variable = params['passed_data']['dependent_variable']
+        timestamp = params['passed_data']['timestamp']
+        filename_path = pathlib.Path(base_path) / csv_file
+        if filename_path.is_file():
+            df_input_data = pd.read_csv(filename_path, parse_dates=True)
+
+        else:
+            logger.error("The cvs file was not found.")
+            raise ValueError("The CSV file " + csv_file + " was not found.")
+        required_columns = []
+        required_columns.extend(independent_variables)
+        required_columns.append(dependent_variable)
+        if timestamp is not None:
+            required_columns.append(timestamp)
+
+        if not set(required_columns).issubset(df_input_data.columns):
+            logger.error("The cvs file does not contain the required columns.")
+            raise ValueError(
+                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+            )
+    elif set_type == "csv-model-predict":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
@@ -442,7 +471,41 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
     return df_pred_optim, mlf
 
-def csv_predict(input_data_dict: dict, logger: logging.Logger,
+def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
+    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
+    """Perform a forecast model fit from training data retrieved from Home Assistant.
+
+    :param input_data_dict: A dictionnary with multiple data used by the action functions
+    :type input_data_dict: dict
+    :param logger: The passed logger object
+    :type logger: logging.Logger
+    :param debug: True to debug, useful for unit testing, defaults to False
+    :type debug: Optional[bool], optional
+    :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object
+    :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster]
+    """
+    data = copy.deepcopy(input_data_dict['df_input_data'])
+    # csv_file = input_data_dict['params']['passed_data']['csv_file']
+    model_type = input_data_dict['params']['passed_data']['model_type']
+    # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
+    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
+    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
+    timestamp = input_data_dict['params']['passed_data']['timestamp']
+    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
+    date_features = input_data_dict['params']['passed_data']['date_features']
+    root = input_data_dict['root']
+    # The ML forecaster object
+    csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger)
+    # Fit the ML model
+    df_pred = csv.fit(date_features=date_features)
+    # Save model
+    if not debug:
+        filename = model_type+'_csv.pkl'
+        with open(pathlib.Path(root) / filename, 'wb') as outp:
+            pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL)
+    # return df_pred, csv
+
+def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
     debug: Optional[bool] = False) -> np.ndarray:
     """Perform a prediction from csv file.
 
@@ -455,16 +518,20 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger,
     :return: The np.ndarray containing the predicted value.
     :rtype: np.ndarray
     """
-    csv_file = input_data_dict['params']['passed_data']['csv_file']
-    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
-    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
-    new_values = input_data_dict['params']['passed_data']['new_values']
+    model_type = input_data_dict['params']['passed_data']['model_type']
     root = input_data_dict['root']
-    # The ML forecaster object
-    csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    filename = model_type+'_csv.pkl'
+    filename_path = pathlib.Path(root) / filename
+    if not debug:
+        if filename_path.is_file():
+            with open(filename_path, 'rb') as inp:
+                csv = pickle.load(inp)
+        else:
+            logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
+            return
+    new_values = input_data_dict['params']['passed_data']['new_values']
     # Predict from csv file
-    prediction = csv.predict()
+    prediction = csv.predict(new_values)
 
     csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
     csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1f478c01..636d5835 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -1,19 +1,22 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+import copy
+from datetime import datetime
 import logging
 import pathlib
 import time
-from typing import Tuple
+from typing import Optional, Tuple
 import warnings
 
 import pandas as pd
 import numpy as np
+from sklearn.metrics import classification_report, r2_score
 
 from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import ElasticNet
-from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsRegressor
+from sklearn.model_selection import GridSearchCV, train_test_split
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
 
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
@@ -29,7 +32,7 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a csv file.
     
     """
-    def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -53,78 +56,124 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        self.csv_file = csv_file
+        self.data = data
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
-        self.sklearn_model = sklearn_model
-        self.new_values = new_values
-        self.root = root
+        self.timestamp = timestamp
+        self.model_type = model_type
         self.logger = logger
         self.is_tuned = False
+        self.data.sort_index(inplace=True)
+        self.data = self.data[~self.data.index.duplicated(keep='first')]
+    
+    @staticmethod
+    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+        """Add date features from the input DataFrame timestamp
 
-    def load_data(self) -> pd.DataFrame:
-        """Load the data."""
-        filename_path = pathlib.Path(self.root) / self.csv_file
-        if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(inp)
-        else:
-            self.logger.error("The cvs file was not found.")
-            raise ValueError("The CSV file " + self.csv_file + " was not found.")
-
-        required_columns = self.independent_variables
-
-        if not set(required_columns).issubset(data.columns):
-            raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}"
-            )
-        return data
-
-    def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
+        :param data: The input DataFrame
+        :type data: pd.DataFrame
+        :return: The DataFrame with the added features
+        :rtype: pd.DataFrame
+        """
+        df = copy.deepcopy(data)
+        df['timestamp']= pd.to_datetime(df['timestamp'])
+        if 'year' in date_features:
+            df['year'] = [i.month for i in df['timestamp']]
+        if 'month' in date_features:
+            df['month'] = [i.month for i in df['timestamp']]
+        if 'day_of_week' in date_features:
+            df['day_of_week'] = [i.dayofweek for i in df['timestamp']]
+        if 'day_of_year' in date_features:
+            df['day_of_year'] = [i.dayofyear for i in df['timestamp']]
+        if 'day' in date_features:
+            df['day'] = [i.day for i in df['timestamp']]
+        if 'hour' in date_features:
+            df['hour'] = [i.day for i in df['timestamp']]
+
+        return df
+
+    def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]:
         """
-        Prepare the data.
+        Fit the model using the provided data.
         
         :param data: Input Data
         :type data: pd.DataFrame
-        :return: A tuple containing the train data.
-        :rtype: Tuple[np.ndarray, np.ndarray]
-        
         """
-        X = data[self.independent_variables].values
-        y = data[self.dependent_variable].values
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.data_exo = pd.DataFrame(self.data)
+        self.data_exo[self.independent_variables] = self.data[self.independent_variables]
+        self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
+        keep_columns = []
+        keep_columns.extend(self.independent_variables)
+        if self.timestamp is not None:
+            keep_columns.append(self.timestamp)
+        keep_columns.append(self.dependent_variable)
+        self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
+        self.data_exo.reset_index(drop=True, inplace=True)
+        # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp')
+        if len(date_features) > 0:
+            if self.timestamp is not None:
+                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
+            else:
+                self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
 
-        return X_train, y_train
+        y = self.data_exo[self.dependent_variable]
+        self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1)
+        if self.timestamp is not None:
+            self.data_exo = self.data_exo.drop(self.timestamp,axis=1)
+        X = self.data_exo
 
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        self.steps = len(X_test)
+
+        # Define the model
+        self.model = Pipeline([
+            ('scaler', StandardScaler()),
+            ('regressor', LinearRegression())
+        ])
+        # Define the parameters to tune
+        param_grid = {
+            'regressor__fit_intercept': [True, False],
+            'regressor__positive': [True, False],
+        }
+
+        # Create a grid search object
+        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        # Fit the grid search object to the data
+        self.logger.info("Fitting the model...")
+        start_time = time.time()
+        self.grid_search.fit(X_train.values, y_train.values)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+
+        self.model = self.grid_search.best_estimator_
+
+
+        # Make predictions
+        predictions = self.model.predict(X_test.values)
+        predictions = pd.Series(predictions, index=X_test.index)
+        pred_metric = r2_score(y_test,predictions)
+        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+
+        # Prepare forecast DataFrame
+        df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred'])
+        df_pred['train'] = y_train
+        df_pred['test'] = y_test
+        df_pred['pred'] = predictions
+        print(df_pred)
+        # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp')
+
+
+
+        # return df_pred
+        
 
-    def predict(self) -> np.ndarray:
+    def predict(self, new_values:list) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """
-        self.logger.info("Performing a prediction for "+self.csv_file)
-        # Preparing the data: adding exogenous features
-        data = self.load_data()
-        if data is not None:
-            X, y = self.prepare_data(data)
-
-            if self.sklearn_model == 'LinearRegression':
-                base_model = LinearRegression()
-            elif self.sklearn_model == 'ElasticNet':
-                base_model = ElasticNet()
-            elif self.sklearn_model == 'KNeighborsRegressor':
-                base_model = KNeighborsRegressor()
-            else:
-                self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-            # Define the forecaster object
-            self.forecaster = base_model
-            # Fit and time it
-            self.logger.info("Predict through a "+self.sklearn_model+" model")
-            start_time = time.time()
-            self.forecaster.fit(X, y)
-            self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-            new_values = np.array([self.new_values])
-            prediction = self.forecaster.predict(new_values)
-
-            return prediction
+        self.logger.info("Performing a prediction for "+self.model_type)
+        new_values = np.array([new_values])
+
+        return self.model.predict(new_values)
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index a5d3002c..b57528b2 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -155,14 +155,26 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         freq = int(retrieve_hass_conf['freq'].seconds/60.0)
         delta_forecast = int(optim_conf['delta_forecast'].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
-        if set_type == "csv-predict":
+        if set_type == "csv-model-fit":
             csv_file = runtimeparams['csv_file']
             independent_variables = runtimeparams['independent_variables']
             dependent_variable = runtimeparams['dependent_variable']
-            new_values = runtimeparams['new_values']
             params['passed_data']['csv_file'] = csv_file
             params['passed_data']['independent_variables'] = independent_variables
             params['passed_data']['dependent_variable'] = dependent_variable
+            if 'timestamp' not in runtimeparams.keys():
+                params['passed_data']['timestamp'] = None
+            else:
+                timestamp = runtimeparams['timestamp']
+                params['passed_data']['timestamp'] = timestamp
+            if 'date_features' not in runtimeparams.keys():
+                params['passed_data']['date_features'] = []
+            else:
+                date_features = runtimeparams['date_features']
+                params['passed_data']['date_features'] = date_features
+            
+        if set_type == "csv-model-predict":
+            new_values = runtimeparams['new_values']
             params['passed_data']['new_values'] = new_values
 
         # Treating special data passed for MPC control case
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index 886f9304..db8d0b13 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -13,7 +13,7 @@
 from emhass.command_line import set_input_data_dict
 from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim
 from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune
-from emhass.command_line import csv_predict
+from emhass.command_line import csv_model_fit, csv_model_predict
 from emhass.command_line import publish_data
 from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \
     get_injection_dict_forecast_model_tune, build_params
@@ -194,9 +194,14 @@ def action_call(action_name):
         if not checkFileLog(ActionStr):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
-    elif action_name == 'csv-predict':
+    elif action_name == 'csv-model-fit':
+        app.logger.info(" >> Performing a csv fit...")
+        csv_model_fit(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action csv-fit executed... \n'
+        return make_response(msg, 201)
+    elif action_name == 'csv-model-predict':
         app.logger.info(" >> Performing a csv predict...")
-        csv_predict(input_data_dict, app.logger)
+        csv_model_predict(input_data_dict, app.logger)
         msg = f'EMHASS >> Action csv-predict executed... \n'
         return make_response(msg, 201)
     else:

From d09cbe30c0f08ba8d6f2f3cfd4b38d1d153030cc Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:56:19 +0100
Subject: [PATCH 035/111] remove backtest

---
 src/emhass/csv_predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 636d5835..1b2396b5 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -92,7 +92,7 @@ def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
 
         return df
 
-    def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    def fit(self, date_features: Optional[list] = []) -> None:
         """
         Fit the model using the provided data.
         

From cb2050a245ccd89ad000ee2c9a66b0c36c1d756c Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 12:11:41 +0100
Subject: [PATCH 036/111] cleanup

---
 src/emhass/csv_predictor.py | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1b2396b5..1e46927d 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -2,16 +2,14 @@
 # -*- coding: utf-8 -*-
 
 import copy
-from datetime import datetime
 import logging
-import pathlib
 import time
-from typing import Optional, Tuple
+from typing import Optional
 import warnings
 
 import pandas as pd
 import numpy as np
-from sklearn.metrics import classification_report, r2_score
+from sklearn.metrics import  r2_score
 
 from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import GridSearchCV, train_test_split
@@ -110,7 +108,6 @@ def fit(self, date_features: Optional[list] = []) -> None:
         keep_columns.append(self.dependent_variable)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
         self.data_exo.reset_index(drop=True, inplace=True)
-        # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp')
         if len(date_features) > 0:
             if self.timestamp is not None:
                 self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
@@ -153,18 +150,6 @@ def fit(self, date_features: Optional[list] = []) -> None:
         predictions = pd.Series(predictions, index=X_test.index)
         pred_metric = r2_score(y_test,predictions)
         self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
-
-        # Prepare forecast DataFrame
-        df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred'])
-        df_pred['train'] = y_train
-        df_pred['test'] = y_test
-        df_pred['pred'] = predictions
-        print(df_pred)
-        # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp')
-
-
-
-        # return df_pred
         
 
     def predict(self, new_values:list) -> np.ndarray:

From 264a8ca312b0ef6192625e36f2357f887611d594 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 13:41:06 +0100
Subject: [PATCH 037/111] cleanup + docstrings

---
 src/emhass/command_line.py  | 17 ++++----------
 src/emhass/csv_predictor.py | 45 ++++++++++++++++++++-----------------
 2 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 4dd795ae..f1135527 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -472,7 +472,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
     return df_pred_optim, mlf
 
 def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
+    debug: Optional[bool] = False) -> None:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -481,32 +481,26 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
     :type logger: logging.Logger
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
-    :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object
-    :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster]
     """
     data = copy.deepcopy(input_data_dict['df_input_data'])
-    # csv_file = input_data_dict['params']['passed_data']['csv_file']
     model_type = input_data_dict['params']['passed_data']['model_type']
-    # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     timestamp = input_data_dict['params']['passed_data']['timestamp']
-    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
     date_features = input_data_dict['params']['passed_data']['date_features']
     root = input_data_dict['root']
-    # The ML forecaster object
+    # The CSV forecaster object
     csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger)
     # Fit the ML model
-    df_pred = csv.fit(date_features=date_features)
+    csv.fit(date_features=date_features)
     # Save model
     if not debug:
         filename = model_type+'_csv.pkl'
         with open(pathlib.Path(root) / filename, 'wb') as outp:
             pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL)
-    # return df_pred, csv
 
 def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> np.ndarray:
+    debug: Optional[bool] = False) -> None:
     """Perform a prediction from csv file.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -515,8 +509,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
     :type logger: logging.Logger
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
-    :return: The np.ndarray containing the predicted value.
-    :rtype: np.ndarray
     """
     model_type = input_data_dict['params']['passed_data']['model_type']
     root = input_data_dict['root']
@@ -543,7 +535,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
                                     csv_predict_unit_of_measurement, 
                                     csv_predict_friendly_name,
                                     type_var = 'csv_predictor')
-    return prediction
 
 def publish_data(input_data_dict: dict, logger: logging.Logger,
     save_data_to_file: Optional[bool] = False, 
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1e46927d..57d61791 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -25,32 +25,30 @@ class CsvPredictor:
     
     This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
     
-    It exposes one main method:
+    It exposes two main methods:
     
-    - `predict`: to obtain a forecast from a csv file.
+    - `fit`: to train a model with the passed data.
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
     
     """
     def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
-        :param csv_file: The name of the csv file to retrieve data from. \
-            Example: `input_train_data.csv`.
-        :type csv_file: str
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
         :param independent_variables: A list of independent variables. \
             Example: [`solar`, `degree_days`].
         :type independent_variables: list
         :param dependent_variable: The dependent variable(to be predicted). \
             Example: `hours`.
         :type dependent_variable: str
-        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
-            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
-        :type sklearn_model: str
-        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
-            Example: [2.24, 5.68].
-        :type new_values: list
-        :param root: The parent folder of the path where the config.yaml file is located
-        :type root: str
+        :param timestamp: If defined, the column key that has to be used of timestamp.
+        :type timestamp: str
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
@@ -60,23 +58,24 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent
         self.timestamp = timestamp
         self.model_type = model_type
         self.logger = logger
-        self.is_tuned = False
         self.data.sort_index(inplace=True)
         self.data = self.data[~self.data.index.duplicated(keep='first')]
     
     @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+    def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
         """Add date features from the input DataFrame timestamp
 
         :param data: The input DataFrame
         :type data: pd.DataFrame
+        :param timestamp: The column containing the timestamp
+        :type timestamp: str
         :return: The DataFrame with the added features
         :rtype: pd.DataFrame
         """
         df = copy.deepcopy(data)
-        df['timestamp']= pd.to_datetime(df['timestamp'])
+        df[timestamp]= pd.to_datetime(df['timestamp'])
         if 'year' in date_features:
-            df['year'] = [i.month for i in df['timestamp']]
+            df['year'] = [i.year for i in df['timestamp']]
         if 'month' in date_features:
             df['month'] = [i.month for i in df['timestamp']]
         if 'day_of_week' in date_features:
@@ -94,10 +93,10 @@ def fit(self, date_features: Optional[list] = []) -> None:
         """
         Fit the model using the provided data.
         
-        :param data: Input Data
-        :type data: pd.DataFrame
+        :param date_features: A list of 'date_features' to take into account when fitting the model.
+        :type data: list
         """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.logger.info("Performing a csv model fit for "+self.model_type)
         self.data_exo = pd.DataFrame(self.data)
         self.data_exo[self.independent_variables] = self.data[self.independent_variables]
         self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
@@ -110,7 +109,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         self.data_exo.reset_index(drop=True, inplace=True)
         if len(date_features) > 0:
             if self.timestamp is not None:
-                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
+                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp)
             else:
                 self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
 
@@ -155,6 +154,10 @@ def fit(self, date_features: Optional[list] = []) -> None:
     def predict(self, new_values:list) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
+
+        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
+            Example: [2.24, 5.68].
+        :type new_values: list
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """

From c375042c2acfdd652e0a4d118a496a0770fdb178 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 11 Mar 2024 09:59:27 +0100
Subject: [PATCH 038/111] add other regression methods

---
 src/emhass/csv_predictor.py | 87 +++++++++++++++++++++++++------------
 1 file changed, 59 insertions(+), 28 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 57d61791..2b6fb86a 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -9,9 +9,10 @@
 
 import pandas as pd
 import numpy as np
+from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor
 from sklearn.metrics import  r2_score
 
-from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import Lasso, LinearRegression, Ridge
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
@@ -122,33 +123,63 @@ def fit(self, date_features: Optional[list] = []) -> None:
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         self.steps = len(X_test)
 
-        # Define the model
-        self.model = Pipeline([
-            ('scaler', StandardScaler()),
-            ('regressor', LinearRegression())
-        ])
-        # Define the parameters to tune
-        param_grid = {
-            'regressor__fit_intercept': [True, False],
-            'regressor__positive': [True, False],
-        }
-
-        # Create a grid search object
-        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        # Fit the grid search object to the data
-        self.logger.info("Fitting the model...")
-        start_time = time.time()
-        self.grid_search.fit(X_train.values, y_train.values)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-
-        self.model = self.grid_search.best_estimator_
-
-
-        # Make predictions
-        predictions = self.model.predict(X_test.values)
-        predictions = pd.Series(predictions, index=X_test.index)
-        pred_metric = r2_score(y_test,predictions)
-        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+        regression_methods = [
+            ('Linear Regression', LinearRegression(), {}),
+            ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
+            ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
+            ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
+            ('Gradient Boosting Regression', GradientBoostingRegressor(), {
+                'gradientboostingregressor__n_estimators': [50, 100, 200],
+                'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
+            }),
+            ('AdaBoost Regression', AdaBoostRegressor(), {
+                'adaboostregressor__n_estimators': [50, 100, 200],
+                'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
+            })
+        ]
+
+        # Define the models
+        for name, model, param_grid in regression_methods:
+            pipeline = Pipeline([
+                ('scaler', StandardScaler()),
+                (name, model)
+            ])
+            
+            # Use GridSearchCV to find the best hyperparameters for each model
+            grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5)
+            grid_search.fit(X_train, y_train)
+
+            # Get the best model and print its mean squared error on the test set
+            best_model = grid_search.best_estimator_
+            print(best_model)
+            predictions = best_model.predict(X_test)
+            print(predictions)
+        # self.model = Pipeline([
+        #     ('scaler', StandardScaler()),
+        #     ('regressor', LinearRegression())
+        # ])
+        # # Define the parameters to tune
+        # param_grid = {
+        #     'regressor__fit_intercept': [True, False],
+        #     'regressor__positive': [True, False],
+        # }
+
+        # # Create a grid search object
+        # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        # # Fit the grid search object to the data
+        # self.logger.info("Fitting the model...")
+        # start_time = time.time()
+        # self.grid_search.fit(X_train.values, y_train.values)
+        # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+
+        # self.model = self.grid_search.best_estimator_
+
+
+        # # Make predictions
+        # predictions = self.model.predict(X_test.values)
+        # predictions = pd.Series(predictions, index=X_test.index)
+        # pred_metric = r2_score(y_test,predictions)
+        # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
         
 
     def predict(self, new_values:list) -> np.ndarray:

From 79752dfae93396dd20e520c6cc8ab7ec138fc6c7 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:10:15 +0100
Subject: [PATCH 039/111] add --editable

---
 .vscode/tasks.json | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.vscode/tasks.json b/.vscode/tasks.json
index 11a92388..0b25f4f1 100644
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -9,7 +9,11 @@
                 "isDefault": true
             },
             "args": [
-                "install", "--no-deps", "--force-reinstall", "."
+                "install",
+                "--no-deps",
+                "--force-reinstall",
+                "--editable",
+                "."
             ],
             "presentation": {
                 "echo": true,

From 79c478e4ce4ad080bce1ea755db293f332577da1 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:16:17 +0100
Subject: [PATCH 040/111] Add sklearn model

---
 src/emhass/command_line.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index f1135527..1845c857 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -484,13 +484,14 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
     """
     data = copy.deepcopy(input_data_dict['df_input_data'])
     model_type = input_data_dict['params']['passed_data']['model_type']
+    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     timestamp = input_data_dict['params']['passed_data']['timestamp']
     date_features = input_data_dict['params']['passed_data']['date_features']
     root = input_data_dict['root']
     # The CSV forecaster object
-    csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger)
+    csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
     # Fit the ML model
     csv.fit(date_features=date_features)
     # Save model

From d4c36f09c2ede5c6a4c4a4518d013c8312686418 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:16:44 +0100
Subject: [PATCH 041/111] multiple regression methods

---
 src/emhass/csv_predictor.py | 141 +++++++++++++++++++++++++-----------
 1 file changed, 100 insertions(+), 41 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 2b6fb86a..3ffeba27 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -14,7 +14,7 @@
 
 from sklearn.linear_model import Lasso, LinearRegression, Ridge
 from sklearn.model_selection import GridSearchCV, train_test_split
-from sklearn.pipeline import Pipeline
+from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 
 
@@ -33,7 +33,7 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a pre-trained model.
     
     """
-    def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
+    def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -58,9 +58,14 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent
         self.dependent_variable = dependent_variable
         self.timestamp = timestamp
         self.model_type = model_type
+        self.sklearn_model = sklearn_model
         self.logger = logger
         self.data.sort_index(inplace=True)
         self.data = self.data[~self.data.index.duplicated(keep='first')]
+        self.data_exo = None
+        self.steps = None
+        self.model = None
+        self.grid_search =None
     
     @staticmethod
     def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
@@ -123,63 +128,117 @@ def fit(self, date_features: Optional[list] = []) -> None:
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         self.steps = len(X_test)
 
-        regression_methods = [
-            ('Linear Regression', LinearRegression(), {}),
-            ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
-            ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
-            ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
-            ('Gradient Boosting Regression', GradientBoostingRegressor(), {
+        regression_methods = {
+            'LinearRegression': {"model": LinearRegression(), "param_grid": {
+                'linearregression__fit_intercept': [True, False],
+                'linearregression__positive': [True, False],
+            }},
+            'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}},
+            'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}},
+            'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}},
+            'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": {
                 'gradientboostingregressor__n_estimators': [50, 100, 200],
                 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-            }),
-            ('AdaBoost Regression', AdaBoostRegressor(), {
+            }},
+            'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": {
                 'adaboostregressor__n_estimators': [50, 100, 200],
                 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-            })
-        ]
+            }}
+        }
+        # regression_methods = [
+        #     ('LinearRegression', LinearRegression(), {
+        #         'linearregression__fit_intercept': [True, False],
+        #         'linearregression__positive': [True, False],
+        #     }),
+        #     ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
+        #     ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
+        #     ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
+        #     ('GradientBoostingRegression', GradientBoostingRegressor(), {
+        #         'gradientboostingregressor__n_estimators': [50, 100, 200],
+        #         'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
+        #     }),
+        #     ('AdaBoostRegression', AdaBoostRegressor(), {
+        #         'adaboostregressor__n_estimators': [50, 100, 200],
+        #         'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
+        #     })
+        # ]
+
+        if self.sklearn_model == 'LinearRegression':
+            base_model = regression_methods['LinearRegression']['model']
+            param_grid = regression_methods['LinearRegression']['param_grid']
+        elif self.sklearn_model == 'RidgeRegression':
+            base_model = regression_methods['RidgeRegression']['model']
+            param_grid = regression_methods['RidgeRegression']['param_grid']
+        elif self.sklearn_model == 'LassoRegression':
+            base_model = regression_methods['LassoRegression']['model']
+            param_grid = regression_methods['LassoRegression']['param_grid']
+        elif self.sklearn_model == 'RandomForestRegression':
+            base_model = regression_methods['RandomForestRegression']['model']
+            param_grid = regression_methods['RandomForestRegression']['param_grid']
+        elif self.sklearn_model == 'GradientBoostingRegression':
+            base_model = regression_methods['GradientBoostingRegression']['model']
+            param_grid = regression_methods['GradientBoostingRegression']['param_grid']
+        elif self.sklearn_model == 'AdaBoostRegression':
+            base_model = regression_methods['AdaBoostRegression']['model']
+            param_grid = regression_methods['AdaBoostRegression']['param_grid']
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+
 
         # Define the models
-        for name, model, param_grid in regression_methods:
-            pipeline = Pipeline([
-                ('scaler', StandardScaler()),
-                (name, model)
-            ])
+        # for name, model, param_grid in regression_methods:
+        #     self.model = make_pipeline(
+        #         StandardScaler(),
+        #         model
+        #     )
+        #     # self.model = Pipeline([
+        #     #     ('scaler', StandardScaler()),
+        #     #     (name, model)
+        #     # ])
             
-            # Use GridSearchCV to find the best hyperparameters for each model
-            grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5)
-            grid_search.fit(X_train, y_train)
-
-            # Get the best model and print its mean squared error on the test set
-            best_model = grid_search.best_estimator_
-            print(best_model)
-            predictions = best_model.predict(X_test)
-            print(predictions)
+        #     # Use GridSearchCV to find the best hyperparameters for each model
+        #     grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        #     grid_search.fit(X_train, y_train)
+
+        #     # Get the best model and print its mean squared error on the test set
+        #     best_model = grid_search.best_estimator_
+        #     print(best_model)
+        #     predictions = best_model.predict(X_test)
+        #     print(predictions)
+
+        self.model = make_pipeline(
+            StandardScaler(),
+            base_model
+        )
         # self.model = Pipeline([
         #     ('scaler', StandardScaler()),
-        #     ('regressor', LinearRegression())
+        #     ('regressor', base_model)
         # ])
-        # # Define the parameters to tune
+        # Define the parameters to tune
         # param_grid = {
         #     'regressor__fit_intercept': [True, False],
         #     'regressor__positive': [True, False],
         # }
 
-        # # Create a grid search object
-        # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        # # Fit the grid search object to the data
-        # self.logger.info("Fitting the model...")
-        # start_time = time.time()
-        # self.grid_search.fit(X_train.values, y_train.values)
-        # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        # Create a grid search object
+        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1)
+        
+        # Fit the grid search object to the data
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.grid_search.fit(X_train.values, y_train.values)
+        print("Best value for lambda : ",self.grid_search.best_params_)
+        print("Best score for cost function: ", self.grid_search.best_score_)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
 
-        # self.model = self.grid_search.best_estimator_
+        self.model = self.grid_search.best_estimator_
 
 
-        # # Make predictions
-        # predictions = self.model.predict(X_test.values)
-        # predictions = pd.Series(predictions, index=X_test.index)
-        # pred_metric = r2_score(y_test,predictions)
-        # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+        # Make predictions
+        predictions = self.model.predict(X_test.values)
+        predictions = pd.Series(predictions, index=X_test.index)
+        pred_metric = r2_score(y_test,predictions)
+        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
         
 
     def predict(self, new_values:list) -> np.ndarray:

From bf64255238dfe8a8bc955eb3c27fd8fe603a9fe0 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:42:27 +0100
Subject: [PATCH 042/111] change to MLRegressor

---
 src/emhass/command_line.py                    | 40 +++++++++----------
 ...ictor.py => machine_learning_regressor.py} |  4 +-
 src/emhass/utils.py                           | 28 ++++++-------
 src/emhass/web_server.py                      | 18 ++++-----
 4 files changed, 45 insertions(+), 45 deletions(-)
 rename src/emhass/{csv_predictor.py => machine_learning_regressor.py} (98%)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 1845c857..5c5b4483 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -20,7 +20,7 @@
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
 from emhass.optimization import Optimization
-from emhass.csv_predictor import CsvPredictor
+from emhass.machine_learning_regressor import MLRegressor
 from emhass import utils
 
 
@@ -155,7 +155,7 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
                 return False
             df_input_data = rh.df_final.copy()
  
-    elif set_type == "csv-model-fit":
+    elif set_type == "regressor-model-fit":
         
         df_input_data_dayahead = None
         P_PV_forecast, P_load_forecast = None, None
@@ -183,7 +183,7 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
-    elif set_type == "csv-model-predict":
+    elif set_type == "regressor-model-predict":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
@@ -471,7 +471,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
     return df_pred_optim, mlf
 
-def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
+def regressor_model_fit(input_data_dict: dict, logger: logging.Logger,
     debug: Optional[bool] = False) -> None:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
@@ -490,17 +490,17 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
     timestamp = input_data_dict['params']['passed_data']['timestamp']
     date_features = input_data_dict['params']['passed_data']['date_features']
     root = input_data_dict['root']
-    # The CSV forecaster object
-    csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
+    # The MLRegressor object
+    mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
     # Fit the ML model
-    csv.fit(date_features=date_features)
+    mlr.fit(date_features=date_features)
     # Save model
     if not debug:
-        filename = model_type+'_csv.pkl'
+        filename = model_type+'_mlr.pkl'
         with open(pathlib.Path(root) / filename, 'wb') as outp:
-            pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL)
+            pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL)
 
-def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
+def regressor_model_predict(input_data_dict: dict, logger: logging.Logger,
     debug: Optional[bool] = False) -> None:
     """Perform a prediction from csv file.
 
@@ -513,29 +513,29 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
     """
     model_type = input_data_dict['params']['passed_data']['model_type']
     root = input_data_dict['root']
-    filename = model_type+'_csv.pkl'
+    filename = model_type+'_mlr.pkl'
     filename_path = pathlib.Path(root) / filename
     if not debug:
         if filename_path.is_file():
             with open(filename_path, 'rb') as inp:
-                csv = pickle.load(inp)
+                mlr = pickle.load(inp)
         else:
             logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
             return
     new_values = input_data_dict['params']['passed_data']['new_values']
     # Predict from csv file
-    prediction = csv.predict(new_values)
+    prediction = mlr.predict(new_values)
 
-    csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
-    csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
-    csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name']
+    mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id']
+    mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement']
+    mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name']
     # Publish prediction
     idx = 0
     input_data_dict['rh'].post_data(prediction, idx,
-                                    csv_predict_entity_id,
-                                    csv_predict_unit_of_measurement, 
-                                    csv_predict_friendly_name,
-                                    type_var = 'csv_predictor')
+                                    mlr_predict_entity_id,
+                                    mlr_predict_unit_of_measurement, 
+                                    mlr_predict_friendly_name,
+                                    type_var = 'mlregressor')
 
 def publish_data(input_data_dict: dict, logger: logging.Logger,
     save_data_to_file: Optional[bool] = False, 
diff --git a/src/emhass/csv_predictor.py b/src/emhass/machine_learning_regressor.py
similarity index 98%
rename from src/emhass/csv_predictor.py
rename to src/emhass/machine_learning_regressor.py
index 3ffeba27..d70df3ec 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -20,7 +20,7 @@
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-class CsvPredictor:
+class MLRegressor:
     r"""
     A forecaster class using machine learning models.
     
@@ -115,7 +115,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         self.data_exo.reset_index(drop=True, inplace=True)
         if len(date_features) > 0:
             if self.timestamp is not None:
-                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp)
+                self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp)
             else:
                 self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
 
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index b57528b2..4bbac11c 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -155,7 +155,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         freq = int(retrieve_hass_conf['freq'].seconds/60.0)
         delta_forecast = int(optim_conf['delta_forecast'].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
-        if set_type == "csv-model-fit":
+        if set_type == "regressor-model-fit":
             csv_file = runtimeparams['csv_file']
             independent_variables = runtimeparams['independent_variables']
             dependent_variable = runtimeparams['dependent_variable']
@@ -173,7 +173,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
                 date_features = runtimeparams['date_features']
                 params['passed_data']['date_features'] = date_features
             
-        if set_type == "csv-model-predict":
+        if set_type == "regressor-model-predict":
             new_values = runtimeparams['new_values']
             params['passed_data']['new_values'] = new_values
 
@@ -303,21 +303,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         else:
             model_predict_friendly_name = runtimeparams['model_predict_friendly_name']
         params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name
-        if 'csv_predict_entity_id' not in runtimeparams.keys():
-            csv_predict_entity_id = "sensor.csv_predictor"
+        if 'mlr_predict_entity_id' not in runtimeparams.keys():
+            mlr_predict_entity_id = "sensor.mlr_predict"
         else:
-            csv_predict_entity_id = runtimeparams['csv_predict_entity_id']
-        params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id
-        if 'csv_predict_unit_of_measurement' not in runtimeparams.keys():
-            csv_predict_unit_of_measurement = None
+            mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id']
+        params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id
+        if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys():
+            mlr_predict_unit_of_measurement = None
         else:
-            csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement']
-        params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement
-        if 'csv_predict_friendly_name' not in runtimeparams.keys():
-            csv_predict_friendly_name = "Csv predictor"
+            mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement']
+        params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement
+        if 'mlr_predict_friendly_name' not in runtimeparams.keys():
+            mlr_predict_friendly_name = "mlr predictor"
         else:
-            csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name']
-        params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name
+            mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name']
+        params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name
         # Treat optimization configuration parameters passed at runtime 
         if 'num_def_loads' in runtimeparams.keys():
             optim_conf['num_def_loads'] = runtimeparams['num_def_loads']
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index db8d0b13..e72022fe 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -13,7 +13,7 @@
 from emhass.command_line import set_input_data_dict
 from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim
 from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune
-from emhass.command_line import csv_model_fit, csv_model_predict
+from emhass.command_line import regressor_model_fit, regressor_model_predict
 from emhass.command_line import publish_data
 from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \
     get_injection_dict_forecast_model_tune, build_params
@@ -194,15 +194,15 @@ def action_call(action_name):
         if not checkFileLog(ActionStr):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
-    elif action_name == 'csv-model-fit':
-        app.logger.info(" >> Performing a csv fit...")
-        csv_model_fit(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action csv-fit executed... \n'
+    elif action_name == 'regressor-model-fit':
+        app.logger.info(" >> Performing a regressor fit...")
+        regressor_model_fit(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action regressor-fit executed... \n'
         return make_response(msg, 201)
-    elif action_name == 'csv-model-predict':
-        app.logger.info(" >> Performing a csv predict...")
-        csv_model_predict(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action csv-predict executed... \n'
+    elif action_name == 'regressor-model-predict':
+        app.logger.info(" >> Performing a regressor predict...")
+        regressor_model_predict(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action regressor-predict executed... \n'
         return make_response(msg, 201)
     else:
         app.logger.error("ERROR: passed action is not valid")

From d5adde325856134dc70e68ae46075dc2df182179 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 13:13:51 +0100
Subject: [PATCH 043/111] change naming and some formatting

---
 src/emhass/command_line.py               | 1021 ++++++++++++++--------
 src/emhass/machine_learning_regressor.py |  285 +++---
 src/emhass/retrieve_hass.py              |  376 +++++---
 src/emhass/utils.py                      |  910 ++++++++++++-------
 4 files changed, 1640 insertions(+), 952 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 5c5b4483..b4a9050c 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -8,14 +8,15 @@
 import json
 import copy
 import pickle
-import time
-import numpy as np
-import pandas as pd
 from datetime import datetime, timezone
 from typing import Optional, Tuple
+from importlib.metadata import version
+import numpy as np
+import pandas as pd
+
 from distutils.util import strtobool
 
-from importlib.metadata import version
+
 from emhass.retrieve_hass import RetrieveHass
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
@@ -24,12 +25,19 @@
 from emhass import utils
 
 
-def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str, 
-    params: str, runtimeparams: str, set_type: str, logger: logging.Logger,
-    get_data_from_file: Optional[bool] = False) -> dict:
+def set_input_data_dict(
+    config_path: pathlib.Path,
+    base_path: str,
+    costfun: str,
+    params: str,
+    runtimeparams: str,
+    set_type: str,
+    logger: logging.Logger,
+    get_data_from_file: Optional[bool] = False,
+) -> dict:
     """
     Set up some of the data needed for the different actions.
-    
+
     :param config_path: The complete absolute path where the config.yaml file is located
     :type config_path: pathlib.Path
     :param base_path: The parent folder of the config_path
@@ -53,118 +61,196 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
     logger.info("Setting up needed data")
     # Parsing yaml
     retrieve_hass_conf, optim_conf, plant_conf = utils.get_yaml_parse(
-        config_path, use_secrets=not(get_data_from_file), params=params)
+        config_path, use_secrets=not (get_data_from_file), params=params
+    )
     # Treat runtimeparams
     params, retrieve_hass_conf, optim_conf, plant_conf = utils.treat_runtimeparams(
-        runtimeparams, params, retrieve_hass_conf, 
-        optim_conf, plant_conf, set_type, logger)
+        runtimeparams,
+        params,
+        retrieve_hass_conf,
+        optim_conf,
+        plant_conf,
+        set_type,
+        logger,
+    )
     # Define main objects
-    rh = RetrieveHass(retrieve_hass_conf['hass_url'], retrieve_hass_conf['long_lived_token'], 
-                      retrieve_hass_conf['freq'], retrieve_hass_conf['time_zone'], 
-                      params, base_path, logger, get_data_from_file=get_data_from_file)
-    fcst = Forecast(retrieve_hass_conf, optim_conf, plant_conf,
-                    params, base_path, logger, get_data_from_file=get_data_from_file)
-    opt = Optimization(retrieve_hass_conf, optim_conf, plant_conf, 
-                       fcst.var_load_cost, fcst.var_prod_price, 
-                       costfun, base_path, logger)
+    rh = RetrieveHass(
+        retrieve_hass_conf["hass_url"],
+        retrieve_hass_conf["long_lived_token"],
+        retrieve_hass_conf["freq"],
+        retrieve_hass_conf["time_zone"],
+        params,
+        base_path,
+        logger,
+        get_data_from_file=get_data_from_file,
+    )
+    fcst = Forecast(
+        retrieve_hass_conf,
+        optim_conf,
+        plant_conf,
+        params,
+        base_path,
+        logger,
+        get_data_from_file=get_data_from_file,
+    )
+    opt = Optimization(
+        retrieve_hass_conf,
+        optim_conf,
+        plant_conf,
+        fcst.var_load_cost,
+        fcst.var_prod_price,
+        costfun,
+        base_path,
+        logger,
+    )
     # Perform setup based on type of action
     if set_type == "perfect-optim":
         # Retrieve data from hass
         if get_data_from_file:
-            with open(pathlib.Path(base_path) / 'data' / 'test_df_final.pkl', 'rb') as inp:
+            with open(
+                pathlib.Path(base_path) / "data" / "test_df_final.pkl", "rb"
+            ) as inp:
                 rh.df_final, days_list, var_list = pickle.load(inp)
         else:
-            days_list = utils.get_days_list(retrieve_hass_conf['days_to_retrieve'])
-            var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']]
-            if not rh.get_data(days_list, var_list,
-                               minimal_response=False, significant_changes_only=False):
-                return False 
-        if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'],
-                               set_zero_min = retrieve_hass_conf['set_zero_min'], 
-                               var_replace_zero = retrieve_hass_conf['var_replace_zero'], 
-                               var_interp = retrieve_hass_conf['var_interp']):
+            days_list = utils.get_days_list(retrieve_hass_conf["days_to_retrieve"])
+            var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]]
+            if not rh.get_data(
+                days_list,
+                var_list,
+                minimal_response=False,
+                significant_changes_only=False,
+            ):
+                return False
+        if not rh.prepare_data(
+            retrieve_hass_conf["var_load"],
+            load_negative=retrieve_hass_conf["load_negative"],
+            set_zero_min=retrieve_hass_conf["set_zero_min"],
+            var_replace_zero=retrieve_hass_conf["var_replace_zero"],
+            var_interp=retrieve_hass_conf["var_interp"],
+        ):
             return False
         df_input_data = rh.df_final.copy()
         # What we don't need for this type of action
         P_PV_forecast, P_load_forecast, df_input_data_dayahead = None, None, None
     elif set_type == "dayahead-optim":
         # Get PV and load forecasts
-        df_weather = fcst.get_weather_forecast(method=optim_conf['weather_forecast_method'])
+        df_weather = fcst.get_weather_forecast(
+            method=optim_conf["weather_forecast_method"]
+        )
         P_PV_forecast = fcst.get_power_from_weather(df_weather)
         P_load_forecast = fcst.get_load_forecast(method=optim_conf['load_forecast_method'])
         if isinstance(P_load_forecast,bool) and not P_load_forecast:
             logger.error("Unable to get sensor power photovoltaics, or sensor power load no var loads. Check HA sensors and their daily data")
             return False
-        df_input_data_dayahead = pd.DataFrame(np.transpose(np.vstack([P_PV_forecast.values,P_load_forecast.values])),
-                                              index=P_PV_forecast.index,
-                                              columns=['P_PV_forecast', 'P_load_forecast'])
+        df_input_data_dayahead = pd.DataFrame(
+            np.transpose(np.vstack([P_PV_forecast.values, P_load_forecast.values])),
+            index=P_PV_forecast.index,
+            columns=["P_PV_forecast", "P_load_forecast"],
+        )
         df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead)
         params = json.loads(params)
-        if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None:
-            prediction_horizon = params['passed_data']['prediction_horizon']
-            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]]
+        if (
+            "prediction_horizon" in params["passed_data"]
+            and params["passed_data"]["prediction_horizon"] is not None
+        ):
+            prediction_horizon = params["passed_data"]["prediction_horizon"]
+            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[
+                df_input_data_dayahead.index[0] : df_input_data_dayahead.index[
+                    prediction_horizon - 1
+                ]
+            ]
         # What we don't need for this type of action
         df_input_data, days_list = None, None
     elif set_type == "naive-mpc-optim":
         # Retrieve data from hass
         if get_data_from_file:
-            with open(pathlib.Path(base_path) / 'data' / 'test_df_final.pkl', 'rb') as inp:
+            with open(
+                pathlib.Path(base_path) / "data" / "test_df_final.pkl", "rb"
+            ) as inp:
                 rh.df_final, days_list, var_list = pickle.load(inp)
         else:
             days_list = utils.get_days_list(1)
-            var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']]
-            if not rh.get_data(days_list, var_list,
-                               minimal_response=False, significant_changes_only=False):
+            var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]]
+            if not rh.get_data(
+                days_list,
+                var_list,
+                minimal_response=False,
+                significant_changes_only=False,
+            ):
                 return False
-        if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'],
-                               set_zero_min = retrieve_hass_conf['set_zero_min'], 
-                               var_replace_zero = retrieve_hass_conf['var_replace_zero'], 
-                               var_interp = retrieve_hass_conf['var_interp']):
+        if not rh.prepare_data(
+            retrieve_hass_conf["var_load"],
+            load_negative=retrieve_hass_conf["load_negative"],
+            set_zero_min=retrieve_hass_conf["set_zero_min"],
+            var_replace_zero=retrieve_hass_conf["var_replace_zero"],
+            var_interp=retrieve_hass_conf["var_interp"],
+        ):
             return False
         df_input_data = rh.df_final.copy()
         # Get PV and load forecasts
-        df_weather = fcst.get_weather_forecast(method=optim_conf['weather_forecast_method'])
-        P_PV_forecast = fcst.get_power_from_weather(df_weather, set_mix_forecast=True, df_now=df_input_data)
-        P_load_forecast = fcst.get_load_forecast(method=optim_conf['load_forecast_method'], set_mix_forecast=True, df_now=df_input_data)
+        df_weather = fcst.get_weather_forecast(
+            method=optim_conf["weather_forecast_method"]
+        )
+        P_PV_forecast = fcst.get_power_from_weather(
+            df_weather, set_mix_forecast=True, df_now=df_input_data
+        )
+        P_load_forecast = fcst.get_load_forecast(
+            method=optim_conf["load_forecast_method"],
+            set_mix_forecast=True,
+            df_now=df_input_data,
+        )
         df_input_data_dayahead = pd.concat([P_PV_forecast, P_load_forecast], axis=1)
         df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead)
-        df_input_data_dayahead.columns = ['P_PV_forecast', 'P_load_forecast']
+        df_input_data_dayahead.columns = ["P_PV_forecast", "P_load_forecast"]
         params = json.loads(params)
-        if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None:
-            prediction_horizon = params['passed_data']['prediction_horizon']
-            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]]
-    elif set_type == "forecast-model-fit" or set_type == "forecast-model-predict" or set_type == "forecast-model-tune":
+        if (
+            "prediction_horizon" in params["passed_data"]
+            and params["passed_data"]["prediction_horizon"] is not None
+        ):
+            prediction_horizon = params["passed_data"]["prediction_horizon"]
+            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[
+                df_input_data_dayahead.index[0] : df_input_data_dayahead.index[
+                    prediction_horizon - 1
+                ]
+            ]
+    elif (
+        set_type == "forecast-model-fit"
+        or set_type == "forecast-model-predict"
+        or set_type == "forecast-model-tune"
+    ):
         df_input_data_dayahead = None
         P_PV_forecast, P_load_forecast = None, None
         params = json.loads(params)
         # Retrieve data from hass
-        days_to_retrieve = params['passed_data']['days_to_retrieve']
-        model_type = params['passed_data']['model_type']
-        var_model = params['passed_data']['var_model']
+        days_to_retrieve = params["passed_data"]["days_to_retrieve"]
+        model_type = params["passed_data"]["model_type"]
+        var_model = params["passed_data"]["var_model"]
         if get_data_from_file:
             days_list = None
-            filename = 'data_train_'+model_type+'.pkl'
-            data_path = pathlib.Path(base_path) / 'data' / filename
-            with open(data_path, 'rb') as inp:
+            filename = "data_train_" + model_type + ".pkl"
+            data_path = pathlib.Path(base_path) / "data" / filename
+            with open(data_path, "rb") as inp:
                 df_input_data, _ = pickle.load(inp)
-            df_input_data = df_input_data[df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve):]
+            df_input_data = df_input_data[
+                df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve) :
+            ]
         else:
             days_list = utils.get_days_list(days_to_retrieve)
             var_list = [var_model]
             if not rh.get_data(days_list, var_list):
                 return False
             df_input_data = rh.df_final.copy()
- 
+
     elif set_type == "regressor-model-fit":
-        
+
         df_input_data_dayahead = None
         P_PV_forecast, P_load_forecast = None, None
         params = json.loads(params)
         days_list = None
-        csv_file = params['passed_data']['csv_file']
-        independent_variables = params['passed_data']['independent_variables']
-        dependent_variable = params['passed_data']['dependent_variable']
-        timestamp = params['passed_data']['timestamp']
+        csv_file = params["passed_data"]["csv_file"]
+        features = params["passed_data"]["features"]
+        target = params["passed_data"]["target"]
+        timestamp = params["passed_data"]["timestamp"]
         filename_path = pathlib.Path(base_path) / csv_file
         if filename_path.is_file():
             df_input_data = pd.read_csv(filename_path, parse_dates=True)
@@ -173,8 +259,8 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
             logger.error("The cvs file was not found.")
             raise ValueError("The CSV file " + csv_file + " was not found.")
         required_columns = []
-        required_columns.extend(independent_variables)
-        required_columns.append(dependent_variable)
+        required_columns.extend(features)
+        required_columns.append(target)
         if timestamp is not None:
             required_columns.append(timestamp)
 
@@ -188,39 +274,46 @@ def set_input_data_dict(config_path: pathlib.Path, base_path: str, costfun: str,
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
         params = json.loads(params)
-       
+
     elif set_type == "publish-data":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
     else:
-        logger.error("The passed action argument and hence the set_type parameter for setup is not valid")
+        logger.error(
+            "The passed action argument and hence the set_type parameter for setup is not valid"
+        )
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
 
     # The input data dictionnary to return
     input_data_dict = {
-        'root': base_path,
-        'retrieve_hass_conf': retrieve_hass_conf,
-        'rh': rh,
-        'opt': opt,
-        'fcst': fcst,
-        'df_input_data': df_input_data,
-        'df_input_data_dayahead': df_input_data_dayahead,
-        'P_PV_forecast': P_PV_forecast,
-        'P_load_forecast': P_load_forecast,
-        'costfun': costfun,
-        'params': params,
-        'days_list': days_list
+        "root": base_path,
+        "retrieve_hass_conf": retrieve_hass_conf,
+        "rh": rh,
+        "opt": opt,
+        "fcst": fcst,
+        "df_input_data": df_input_data,
+        "df_input_data_dayahead": df_input_data_dayahead,
+        "P_PV_forecast": P_PV_forecast,
+        "P_load_forecast": P_load_forecast,
+        "costfun": costfun,
+        "params": params,
+        "days_list": days_list,
     }
     return input_data_dict
-    
-def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = True, debug: Optional[bool] = False) -> pd.DataFrame:
+
+
+def perfect_forecast_optim(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = True,
+    debug: Optional[bool] = False,
+) -> pd.DataFrame:
     """
     Perform a call to the perfect forecast optimization routine.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -235,28 +328,38 @@ def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger,
     """
     logger.info("Performing perfect forecast optimization")
     # Load cost and prod price forecast
-    df_input_data = input_data_dict['fcst'].get_load_cost_forecast(
-        input_data_dict['df_input_data'], 
-        method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method'],
-        list_and_perfect=True)
-    df_input_data = input_data_dict['fcst'].get_prod_price_forecast(
-        df_input_data, method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method'],
-        list_and_perfect=True)
-    opt_res = input_data_dict['opt'].perform_perfect_forecast_optim(df_input_data, input_data_dict['days_list'])
+    df_input_data = input_data_dict["fcst"].get_load_cost_forecast(
+        input_data_dict["df_input_data"],
+        method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"],
+    )
+    df_input_data = input_data_dict["fcst"].get_prod_price_forecast(
+        df_input_data,
+        method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"],
+    )
+    opt_res = input_data_dict["opt"].perform_perfect_forecast_optim(
+        df_input_data, input_data_dict["days_list"]
+    )
     # Save CSV file for analysis
     if save_data_to_file:
-        filename = 'opt_res_perfect_optim_'+input_data_dict['costfun']+'.csv'
-    else: # Just save the latest optimization results
-        filename = 'opt_res_latest.csv'
+        filename = "opt_res_perfect_optim_" + input_data_dict["costfun"] + ".csv"
+    else:  # Just save the latest optimization results
+        filename = "opt_res_latest.csv"
     if not debug:
-        opt_res.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp')
+        opt_res.to_csv(
+            pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp"
+        )
     return opt_res
-    
-def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame:
+
+
+def dayahead_forecast_optim(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = False,
+    debug: Optional[bool] = False,
+) -> pd.DataFrame:
     """
     Perform a call to the day-ahead optimization routine.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -271,29 +374,43 @@ def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger,
     """
     logger.info("Performing day-ahead forecast optimization")
     # Load cost and prod price forecast
-    df_input_data_dayahead = input_data_dict['fcst'].get_load_cost_forecast(
-        input_data_dict['df_input_data_dayahead'],
-        method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method'])
-    df_input_data_dayahead = input_data_dict['fcst'].get_prod_price_forecast(
-        df_input_data_dayahead, 
-        method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method'])
-    opt_res_dayahead = input_data_dict['opt'].perform_dayahead_forecast_optim(
-        df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast'])
+    df_input_data_dayahead = input_data_dict["fcst"].get_load_cost_forecast(
+        input_data_dict["df_input_data_dayahead"],
+        method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"],
+    )
+    df_input_data_dayahead = input_data_dict["fcst"].get_prod_price_forecast(
+        df_input_data_dayahead,
+        method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"],
+    )
+    opt_res_dayahead = input_data_dict["opt"].perform_dayahead_forecast_optim(
+        df_input_data_dayahead,
+        input_data_dict["P_PV_forecast"],
+        input_data_dict["P_load_forecast"],
+    )
     # Save CSV file for publish_data
     if save_data_to_file:
-        today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
-        filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv'
-    else: # Just save the latest optimization results
-        filename = 'opt_res_latest.csv'
+        today = datetime.now(timezone.utc).replace(
+            hour=0, minute=0, second=0, microsecond=0
+        )
+        filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv"
+    else:  # Just save the latest optimization results
+        filename = "opt_res_latest.csv"
     if not debug:
-        opt_res_dayahead.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp')
+        opt_res_dayahead.to_csv(
+            pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp"
+        )
     return opt_res_dayahead
 
-def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame:
+
+def naive_mpc_optim(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = False,
+    debug: Optional[bool] = False,
+) -> pd.DataFrame:
     """
     Perform a call to the naive Model Predictive Controller optimization routine.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -308,33 +425,50 @@ def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger,
     """
     logger.info("Performing naive MPC optimization")
     # Load cost and prod price forecast
-    df_input_data_dayahead = input_data_dict['fcst'].get_load_cost_forecast(
-        input_data_dict['df_input_data_dayahead'],
-        method=input_data_dict['fcst'].optim_conf['load_cost_forecast_method'])
-    df_input_data_dayahead = input_data_dict['fcst'].get_prod_price_forecast(
-        df_input_data_dayahead, method=input_data_dict['fcst'].optim_conf['prod_price_forecast_method'])
+    df_input_data_dayahead = input_data_dict["fcst"].get_load_cost_forecast(
+        input_data_dict["df_input_data_dayahead"],
+        method=input_data_dict["fcst"].optim_conf["load_cost_forecast_method"],
+    )
+    df_input_data_dayahead = input_data_dict["fcst"].get_prod_price_forecast(
+        df_input_data_dayahead,
+        method=input_data_dict["fcst"].optim_conf["prod_price_forecast_method"],
+    )
     # The specifics params for the MPC at runtime
-    prediction_horizon = input_data_dict['params']['passed_data']['prediction_horizon']
-    soc_init = input_data_dict['params']['passed_data']['soc_init']
-    soc_final = input_data_dict['params']['passed_data']['soc_final']
-    def_total_hours = input_data_dict['params']['passed_data']['def_total_hours']
-    def_start_timestep = input_data_dict['params']['passed_data']['def_start_timestep']
-    def_end_timestep = input_data_dict['params']['passed_data']['def_end_timestep']
-    opt_res_naive_mpc = input_data_dict['opt'].perform_naive_mpc_optim(
-        df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast'],
-        prediction_horizon, soc_init, soc_final, def_total_hours, def_start_timestep, def_end_timestep)
+    prediction_horizon = input_data_dict["params"]["passed_data"]["prediction_horizon"]
+    soc_init = input_data_dict["params"]["passed_data"]["soc_init"]
+    soc_final = input_data_dict["params"]["passed_data"]["soc_final"]
+    def_total_hours = input_data_dict["params"]["passed_data"]["def_total_hours"]
+    def_start_timestep = input_data_dict["params"]["passed_data"]["def_start_timestep"]
+    def_end_timestep = input_data_dict["params"]["passed_data"]["def_end_timestep"]
+    opt_res_naive_mpc = input_data_dict["opt"].perform_naive_mpc_optim(
+        df_input_data_dayahead,
+        input_data_dict["P_PV_forecast"],
+        input_data_dict["P_load_forecast"],
+        prediction_horizon,
+        soc_init,
+        soc_final,
+        def_total_hours,
+        def_start_timestep,
+        def_end_timestep,
+    )
     # Save CSV file for publish_data
     if save_data_to_file:
-        today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
-        filename = 'opt_res_naive_mpc_'+today.strftime("%Y_%m_%d")+'.csv'
-    else: # Just save the latest optimization results
-        filename = 'opt_res_latest.csv'
+        today = datetime.now(timezone.utc).replace(
+            hour=0, minute=0, second=0, microsecond=0
+        )
+        filename = "opt_res_naive_mpc_" + today.strftime("%Y_%m_%d") + ".csv"
+    else:  # Just save the latest optimization results
+        filename = "opt_res_latest.csv"
     if not debug:
-        opt_res_naive_mpc.to_csv(pathlib.Path(input_data_dict['root']) / filename, index_label='timestamp')
+        opt_res_naive_mpc.to_csv(
+            pathlib.Path(input_data_dict["root"]) / filename, index_label="timestamp"
+        )
     return opt_res_naive_mpc
 
-def forecast_model_fit(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]:
+
+def forecast_model_fit(
+    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -346,29 +480,37 @@ def forecast_model_fit(input_data_dict: dict, logger: logging.Logger,
     :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object
     :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster]
     """
-    data = copy.deepcopy(input_data_dict['df_input_data'])
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    var_model = input_data_dict['params']['passed_data']['var_model']
-    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    num_lags = input_data_dict['params']['passed_data']['num_lags']
-    split_date_delta = input_data_dict['params']['passed_data']['split_date_delta']
-    perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
-    root = input_data_dict['root']
+    data = copy.deepcopy(input_data_dict["df_input_data"])
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    var_model = input_data_dict["params"]["passed_data"]["var_model"]
+    sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"]
+    num_lags = input_data_dict["params"]["passed_data"]["num_lags"]
+    split_date_delta = input_data_dict["params"]["passed_data"]["split_date_delta"]
+    perform_backtest = input_data_dict["params"]["passed_data"]["perform_backtest"]
+    root = input_data_dict["root"]
     # The ML forecaster object
-    mlf = MLForecaster(data, model_type, var_model, sklearn_model, num_lags, root, logger)
+    mlf = MLForecaster(
+        data, model_type, var_model, sklearn_model, num_lags, root, logger
+    )
     # Fit the ML model
-    df_pred, df_pred_backtest = mlf.fit(split_date_delta=split_date_delta, 
-                                        perform_backtest=perform_backtest)
+    df_pred, df_pred_backtest = mlf.fit(
+        split_date_delta=split_date_delta, perform_backtest=perform_backtest
+    )
     # Save model
     if not debug:
-        filename = model_type+'_mlf.pkl'
-        with open(pathlib.Path(root) / filename, 'wb') as outp:
+        filename = model_type + "_mlf.pkl"
+        with open(pathlib.Path(root) / filename, "wb") as outp:
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
     return df_pred, df_pred_backtest, mlf
 
-def forecast_model_predict(input_data_dict: dict, logger: logging.Logger,
-    use_last_window: Optional[bool] = True, debug: Optional[bool] = False,
-    mlf: Optional[MLForecaster] = None) -> pd.DataFrame:
+
+def forecast_model_predict(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    use_last_window: Optional[bool] = True,
+    debug: Optional[bool] = False,
+    mlf: Optional[MLForecaster] = None,
+) -> pd.DataFrame:
     r"""Perform a forecast model predict using a previously trained skforecast model.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -390,52 +532,79 @@ def forecast_model_predict(input_data_dict: dict, logger: logging.Logger,
     :rtype: pd.DataFrame
     """
     # Load model
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    root = input_data_dict['root']
-    filename = model_type+'_mlf.pkl'
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    root = input_data_dict["root"]
+    filename = model_type + "_mlf.pkl"
     filename_path = pathlib.Path(root) / filename
     if not debug:
         if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
+            with open(filename_path, "rb") as inp:
                 mlf = pickle.load(inp)
         else:
-            logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
+            logger.error(
+                "The ML forecaster file was not found, please run a model fit method before this predict method"
+            )
             return
     # Make predictions
     if use_last_window:
-        data_last_window = copy.deepcopy(input_data_dict['df_input_data'])
+        data_last_window = copy.deepcopy(input_data_dict["df_input_data"])
     else:
         data_last_window = None
     predictions = mlf.predict(data_last_window)
     # Publish data to a Home Assistant sensor
-    model_predict_publish = input_data_dict['params']['passed_data']['model_predict_publish']
-    model_predict_entity_id = input_data_dict['params']['passed_data']['model_predict_entity_id']
-    model_predict_unit_of_measurement = input_data_dict['params']['passed_data']['model_predict_unit_of_measurement']
-    model_predict_friendly_name = input_data_dict['params']['passed_data']['model_predict_friendly_name']
-    publish_prefix = input_data_dict['params']['passed_data']['publish_prefix']
+    model_predict_publish = input_data_dict["params"]["passed_data"][
+        "model_predict_publish"
+    ]
+    model_predict_entity_id = input_data_dict["params"]["passed_data"][
+        "model_predict_entity_id"
+    ]
+    model_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][
+        "model_predict_unit_of_measurement"
+    ]
+    model_predict_friendly_name = input_data_dict["params"]["passed_data"][
+        "model_predict_friendly_name"
+    ]
+    publish_prefix = input_data_dict["params"]["passed_data"]["publish_prefix"]
     if model_predict_publish is True:
         # Estimate the current index
-        now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0)
-        if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest':
-            idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0]
-        elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first':
-            idx_closest = predictions.index.get_indexer([now_precise], method='ffill')[0]
-        elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last':
-            idx_closest = predictions.index.get_indexer([now_precise], method='bfill')[0]
+        now_precise = datetime.now(
+            input_data_dict["retrieve_hass_conf"]["time_zone"]
+        ).replace(second=0, microsecond=0)
+        if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest":
+            idx_closest = predictions.index.get_indexer(
+                [now_precise], method="nearest"
+            )[0]
+        elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first":
+            idx_closest = predictions.index.get_indexer([now_precise], method="ffill")[
+                0
+            ]
+        elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last":
+            idx_closest = predictions.index.get_indexer([now_precise], method="bfill")[
+                0
+            ]
         if idx_closest == -1:
-            idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0]
+            idx_closest = predictions.index.get_indexer(
+                [now_precise], method="nearest"
+            )[0]
         # Publish Load forecast
-        input_data_dict['rh'].post_data(predictions, idx_closest, 
-                                        model_predict_entity_id,
-                                        model_predict_unit_of_measurement, 
-                                        model_predict_friendly_name,
-                                        type_var = 'mlforecaster',
-                                        publish_prefix=publish_prefix)
+        input_data_dict["rh"].post_data(
+            predictions,
+            idx_closest,
+            model_predict_entity_id,
+            model_predict_unit_of_measurement,
+            model_predict_friendly_name,
+            type_var="mlforecaster",
+            publish_prefix=publish_prefix,
+        )
     return predictions
 
-def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False, mlf: Optional[MLForecaster] = None
-    ) -> Tuple[pd.DataFrame, MLForecaster]:
+
+def forecast_model_tune(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    debug: Optional[bool] = False,
+    mlf: Optional[MLForecaster] = None,
+) -> Tuple[pd.DataFrame, MLForecaster]:
     """Tune a forecast model hyperparameters using bayesian optimization.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -451,28 +620,32 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
     :rtype: pd.DataFrame
     """
     # Load model
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    root = input_data_dict['root']
-    filename = model_type+'_mlf.pkl'
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    root = input_data_dict["root"]
+    filename = model_type + "_mlf.pkl"
     filename_path = pathlib.Path(root) / filename
     if not debug:
         if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
+            with open(filename_path, "rb") as inp:
                 mlf = pickle.load(inp)
         else:
-            logger.error("The ML forecaster file was not found, please run a model fit method before this tune method")
+            logger.error(
+                "The ML forecaster file was not found, please run a model fit method before this tune method"
+            )
             return None, None
     # Tune the model
     df_pred_optim = mlf.tune(debug=debug)
     # Save model
     if not debug:
-        filename = model_type+'_mlf.pkl'
-        with open(pathlib.Path(root) / filename, 'wb') as outp:
+        filename = model_type + "_mlf.pkl"
+        with open(pathlib.Path(root) / filename, "wb") as outp:
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
     return df_pred_optim, mlf
 
-def regressor_model_fit(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> None:
+
+def regressor_model_fit(
+    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+) -> None:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -482,26 +655,30 @@ def regressor_model_fit(input_data_dict: dict, logger: logging.Logger,
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
     """
-    data = copy.deepcopy(input_data_dict['df_input_data'])
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
-    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
-    timestamp = input_data_dict['params']['passed_data']['timestamp']
-    date_features = input_data_dict['params']['passed_data']['date_features']
-    root = input_data_dict['root']
+    data = copy.deepcopy(input_data_dict["df_input_data"])
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"]
+    features = input_data_dict["params"]["passed_data"]["features"]
+    target = input_data_dict["params"]["passed_data"]["target"]
+    timestamp = input_data_dict["params"]["passed_data"]["timestamp"]
+    date_features = input_data_dict["params"]["passed_data"]["date_features"]
+    root = input_data_dict["root"]
     # The MLRegressor object
-    mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
+    mlr = MLRegressor(
+        data, model_type, sklearn_model, features, target, timestamp, logger
+    )
     # Fit the ML model
     mlr.fit(date_features=date_features)
     # Save model
     if not debug:
-        filename = model_type+'_mlr.pkl'
-        with open(pathlib.Path(root) / filename, 'wb') as outp:
+        filename = model_type + "_mlr.pkl"
+        with open(pathlib.Path(root) / filename, "wb") as outp:
             pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL)
 
-def regressor_model_predict(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> None:
+
+def regressor_model_predict(
+    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+) -> None:
     """Perform a prediction from csv file.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -511,38 +688,53 @@ def regressor_model_predict(input_data_dict: dict, logger: logging.Logger,
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
     """
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    root = input_data_dict['root']
-    filename = model_type+'_mlr.pkl'
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    root = input_data_dict["root"]
+    filename = model_type + "_mlr.pkl"
     filename_path = pathlib.Path(root) / filename
     if not debug:
         if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
+            with open(filename_path, "rb") as inp:
                 mlr = pickle.load(inp)
         else:
-            logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
+            logger.error(
+                "The ML forecaster file was not found, please run a model fit method before this predict method"
+            )
             return
-    new_values = input_data_dict['params']['passed_data']['new_values']
+    new_values = input_data_dict["params"]["passed_data"]["new_values"]
     # Predict from csv file
     prediction = mlr.predict(new_values)
 
-    mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id']
-    mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement']
-    mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name']
+    mlr_predict_entity_id = input_data_dict["params"]["passed_data"][
+        "mlr_predict_entity_id"
+    ]
+    mlr_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][
+        "mlr_predict_unit_of_measurement"
+    ]
+    mlr_predict_friendly_name = input_data_dict["params"]["passed_data"][
+        "mlr_predict_friendly_name"
+    ]
     # Publish prediction
     idx = 0
-    input_data_dict['rh'].post_data(prediction, idx,
-                                    mlr_predict_entity_id,
-                                    mlr_predict_unit_of_measurement, 
-                                    mlr_predict_friendly_name,
-                                    type_var = 'mlregressor')
-
-def publish_data(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = False, 
-    opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame:
+    input_data_dict["rh"].post_data(
+        prediction,
+        idx,
+        mlr_predict_entity_id,
+        mlr_predict_unit_of_measurement,
+        mlr_predict_friendly_name,
+        type_var="mlregressor",
+    )
+
+
+def publish_data(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = False,
+    opt_res_latest: Optional[pd.DataFrame] = None,
+) -> pd.DataFrame:
     """
     Publish the data obtained from the optimization results.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -556,166 +748,245 @@ def publish_data(input_data_dict: dict, logger: logging.Logger,
     logger.info("Publishing data to HASS instance")
     # Check if a day ahead optimization has been performed (read CSV file)
     if save_data_to_file:
-        today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
-        filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv'
+        today = datetime.now(timezone.utc).replace(
+            hour=0, minute=0, second=0, microsecond=0
+        )
+        filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv"
     else:
-        filename = 'opt_res_latest.csv'
+        filename = "opt_res_latest.csv"
     if opt_res_latest is None:
-        if not os.path.isfile(pathlib.Path(input_data_dict['root']) / filename):
+        if not os.path.isfile(pathlib.Path(input_data_dict["root"]) / filename):
             logger.error("File not found error, run an optimization task first.")
             return
         else:
-            opt_res_latest = pd.read_csv(pathlib.Path(input_data_dict['root']) / filename, index_col='timestamp')
+            opt_res_latest = pd.read_csv(
+                pathlib.Path(input_data_dict["root"]) / filename, index_col="timestamp"
+            )
             opt_res_latest.index = pd.to_datetime(opt_res_latest.index)
-            opt_res_latest.index.freq = input_data_dict['retrieve_hass_conf']['freq']
+            opt_res_latest.index.freq = input_data_dict["retrieve_hass_conf"]["freq"]
     # Estimate the current index
-    now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0)
-    if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest':
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0]
-    elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first':
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='ffill')[0]
-    elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last':
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='bfill')[0]
+    now_precise = datetime.now(
+        input_data_dict["retrieve_hass_conf"]["time_zone"]
+    ).replace(second=0, microsecond=0)
+    if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest":
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[
+            0
+        ]
+    elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first":
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="ffill")[0]
+    elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last":
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="bfill")[0]
     if idx_closest == -1:
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0]
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[
+            0
+        ]
     # Publish the data
-    params = json.loads(input_data_dict['params'])
-    publish_prefix = params['passed_data']['publish_prefix']
+    params = json.loads(input_data_dict["params"])
+    publish_prefix = params["passed_data"]["publish_prefix"]
     # Publish PV forecast
-    custom_pv_forecast_id = params['passed_data']['custom_pv_forecast_id']
-    input_data_dict['rh'].post_data(opt_res_latest['P_PV'], idx_closest, 
-                                    custom_pv_forecast_id["entity_id"], 
-                                    custom_pv_forecast_id["unit_of_measurement"],
-                                    custom_pv_forecast_id["friendly_name"],
-                                    type_var = 'power',
-                                    publish_prefix = publish_prefix)
+    custom_pv_forecast_id = params["passed_data"]["custom_pv_forecast_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["P_PV"],
+        idx_closest,
+        custom_pv_forecast_id["entity_id"],
+        custom_pv_forecast_id["unit_of_measurement"],
+        custom_pv_forecast_id["friendly_name"],
+        type_var="power",
+        publish_prefix=publish_prefix,
+    )
     # Publish Load forecast
-    custom_load_forecast_id = params['passed_data']['custom_load_forecast_id']
-    input_data_dict['rh'].post_data(opt_res_latest['P_Load'], idx_closest, 
-                                    custom_load_forecast_id["entity_id"], 
-                                    custom_load_forecast_id["unit_of_measurement"],
-                                    custom_load_forecast_id["friendly_name"],
-                                    type_var = 'power',
-                                    publish_prefix = publish_prefix)
-    cols_published = ['P_PV', 'P_Load']
+    custom_load_forecast_id = params["passed_data"]["custom_load_forecast_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["P_Load"],
+        idx_closest,
+        custom_load_forecast_id["entity_id"],
+        custom_load_forecast_id["unit_of_measurement"],
+        custom_load_forecast_id["friendly_name"],
+        type_var="power",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = ["P_PV", "P_Load"]
     # Publish deferrable loads
-    custom_deferrable_forecast_id = params['passed_data']['custom_deferrable_forecast_id']
-    for k in range(input_data_dict['opt'].optim_conf['num_def_loads']):
+    custom_deferrable_forecast_id = params["passed_data"][
+        "custom_deferrable_forecast_id"
+    ]
+    for k in range(input_data_dict["opt"].optim_conf["num_def_loads"]):
         if "P_deferrable{}".format(k) not in opt_res_latest.columns:
-            logger.error("P_deferrable{}".format(k)+" was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.")
+            logger.error(
+                "P_deferrable{}".format(k)
+                + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution."
+            )
         else:
-            input_data_dict['rh'].post_data(opt_res_latest["P_deferrable{}".format(k)], idx_closest, 
-                                            custom_deferrable_forecast_id[k]["entity_id"], 
-                                            custom_deferrable_forecast_id[k]["unit_of_measurement"],
-                                            custom_deferrable_forecast_id[k]["friendly_name"],
-                                            type_var = 'deferrable',
-                                            publish_prefix = publish_prefix)
-            cols_published = cols_published+["P_deferrable{}".format(k)]
+            input_data_dict["rh"].post_data(
+                opt_res_latest["P_deferrable{}".format(k)],
+                idx_closest,
+                custom_deferrable_forecast_id[k]["entity_id"],
+                custom_deferrable_forecast_id[k]["unit_of_measurement"],
+                custom_deferrable_forecast_id[k]["friendly_name"],
+                type_var="deferrable",
+                publish_prefix=publish_prefix,
+            )
+            cols_published = cols_published + ["P_deferrable{}".format(k)]
     # Publish battery power
-    if input_data_dict['opt'].optim_conf['set_use_battery']:
-        if 'P_batt' not in opt_res_latest.columns:
-            logger.error("P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.")
+    if input_data_dict["opt"].optim_conf["set_use_battery"]:
+        if "P_batt" not in opt_res_latest.columns:
+            logger.error(
+                "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution."
+            )
         else:
-            custom_batt_forecast_id = params['passed_data']['custom_batt_forecast_id']
-            input_data_dict['rh'].post_data(opt_res_latest['P_batt'], idx_closest,
-                                            custom_batt_forecast_id["entity_id"], 
-                                            custom_batt_forecast_id["unit_of_measurement"],
-                                            custom_batt_forecast_id["friendly_name"],
-                                            type_var = 'batt',
-                                            publish_prefix = publish_prefix)
-            cols_published = cols_published+["P_batt"]
-            custom_batt_soc_forecast_id = params['passed_data']['custom_batt_soc_forecast_id']
-            input_data_dict['rh'].post_data(opt_res_latest['SOC_opt']*100, idx_closest,
-                                            custom_batt_soc_forecast_id["entity_id"], 
-                                            custom_batt_soc_forecast_id["unit_of_measurement"],
-                                            custom_batt_soc_forecast_id["friendly_name"],
-                                            type_var = 'SOC',
-                                            publish_prefix = publish_prefix)
-            cols_published = cols_published+["SOC_opt"]
+            custom_batt_forecast_id = params["passed_data"]["custom_batt_forecast_id"]
+            input_data_dict["rh"].post_data(
+                opt_res_latest["P_batt"],
+                idx_closest,
+                custom_batt_forecast_id["entity_id"],
+                custom_batt_forecast_id["unit_of_measurement"],
+                custom_batt_forecast_id["friendly_name"],
+                type_var="batt",
+                publish_prefix=publish_prefix,
+            )
+            cols_published = cols_published + ["P_batt"]
+            custom_batt_soc_forecast_id = params["passed_data"][
+                "custom_batt_soc_forecast_id"
+            ]
+            input_data_dict["rh"].post_data(
+                opt_res_latest["SOC_opt"] * 100,
+                idx_closest,
+                custom_batt_soc_forecast_id["entity_id"],
+                custom_batt_soc_forecast_id["unit_of_measurement"],
+                custom_batt_soc_forecast_id["friendly_name"],
+                type_var="SOC",
+                publish_prefix=publish_prefix,
+            )
+            cols_published = cols_published + ["SOC_opt"]
     # Publish grid power
-    custom_grid_forecast_id = params['passed_data']['custom_grid_forecast_id']
-    input_data_dict['rh'].post_data(opt_res_latest['P_grid'], idx_closest, 
-                                    custom_grid_forecast_id["entity_id"], 
-                                    custom_grid_forecast_id["unit_of_measurement"],
-                                    custom_grid_forecast_id["friendly_name"],
-                                    type_var = 'power',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["P_grid"]
+    custom_grid_forecast_id = params["passed_data"]["custom_grid_forecast_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["P_grid"],
+        idx_closest,
+        custom_grid_forecast_id["entity_id"],
+        custom_grid_forecast_id["unit_of_measurement"],
+        custom_grid_forecast_id["friendly_name"],
+        type_var="power",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["P_grid"]
     # Publish total value of cost function
-    custom_cost_fun_id = params['passed_data']['custom_cost_fun_id']
-    col_cost_fun = [i for i in opt_res_latest.columns if 'cost_fun_' in i]
-    input_data_dict['rh'].post_data(opt_res_latest[col_cost_fun], idx_closest, 
-                                    custom_cost_fun_id["entity_id"], 
-                                    custom_cost_fun_id["unit_of_measurement"],
-                                    custom_cost_fun_id["friendly_name"],
-                                    type_var = 'cost_fun',
-                                    publish_prefix = publish_prefix)
+    custom_cost_fun_id = params["passed_data"]["custom_cost_fun_id"]
+    col_cost_fun = [i for i in opt_res_latest.columns if "cost_fun_" in i]
+    input_data_dict["rh"].post_data(
+        opt_res_latest[col_cost_fun],
+        idx_closest,
+        custom_cost_fun_id["entity_id"],
+        custom_cost_fun_id["unit_of_measurement"],
+        custom_cost_fun_id["friendly_name"],
+        type_var="cost_fun",
+        publish_prefix=publish_prefix,
+    )
     # Publish the optimization status
-    custom_cost_fun_id = params['passed_data']['custom_optim_status_id']
+    custom_cost_fun_id = params["passed_data"]["custom_optim_status_id"]
     if "optim_status" not in opt_res_latest:
-        opt_res_latest["optim_status"] = 'Optimal'
-        logger.warning("no optim_status in opt_res_latest, run an optimization task first")
-    input_data_dict['rh'].post_data(opt_res_latest['optim_status'], idx_closest, 
-                                    custom_cost_fun_id["entity_id"], 
-                                    custom_cost_fun_id["unit_of_measurement"],
-                                    custom_cost_fun_id["friendly_name"],
-                                    type_var = 'optim_status',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["optim_status"]
+        opt_res_latest["optim_status"] = "Optimal"
+        logger.warning(
+            "no optim_status in opt_res_latest, run an optimization task first"
+        )
+    input_data_dict["rh"].post_data(
+        opt_res_latest["optim_status"],
+        idx_closest,
+        custom_cost_fun_id["entity_id"],
+        custom_cost_fun_id["unit_of_measurement"],
+        custom_cost_fun_id["friendly_name"],
+        type_var="optim_status",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["optim_status"]
     # Publish unit_load_cost
-    custom_unit_load_cost_id = params['passed_data']['custom_unit_load_cost_id']
-    input_data_dict['rh'].post_data(opt_res_latest['unit_load_cost'], idx_closest, 
-                                    custom_unit_load_cost_id["entity_id"], 
-                                    custom_unit_load_cost_id["unit_of_measurement"],
-                                    custom_unit_load_cost_id["friendly_name"],
-                                    type_var = 'unit_load_cost',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["unit_load_cost"]
+    custom_unit_load_cost_id = params["passed_data"]["custom_unit_load_cost_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["unit_load_cost"],
+        idx_closest,
+        custom_unit_load_cost_id["entity_id"],
+        custom_unit_load_cost_id["unit_of_measurement"],
+        custom_unit_load_cost_id["friendly_name"],
+        type_var="unit_load_cost",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["unit_load_cost"]
     # Publish unit_prod_price
-    custom_unit_prod_price_id = params['passed_data']['custom_unit_prod_price_id']
-    input_data_dict['rh'].post_data(opt_res_latest['unit_prod_price'], idx_closest, 
-                                    custom_unit_prod_price_id["entity_id"], 
-                                    custom_unit_prod_price_id["unit_of_measurement"],
-                                    custom_unit_prod_price_id["friendly_name"],
-                                    type_var = 'unit_prod_price',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["unit_prod_price"]
+    custom_unit_prod_price_id = params["passed_data"]["custom_unit_prod_price_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["unit_prod_price"],
+        idx_closest,
+        custom_unit_prod_price_id["entity_id"],
+        custom_unit_prod_price_id["unit_of_measurement"],
+        custom_unit_prod_price_id["friendly_name"],
+        type_var="unit_prod_price",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["unit_prod_price"]
     # Create a DF resuming what has been published
     opt_res = opt_res_latest[cols_published].loc[[opt_res_latest.index[idx_closest]]]
     return opt_res
-    
-        
+
+
 def main():
     r"""Define the main command line entry function.
 
     This function may take several arguments as inputs. You can type `emhass --help` to see the list of options:
-    
+
     - action: Set the desired action, options are: perfect-optim, dayahead-optim,
       naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune
-      
+
     - config: Define path to the config.yaml file
-    
+
     - costfun: Define the type of cost function, options are: profit, cost, self-consumption
-    
+
     - log2file: Define if we should log to a file or not
-    
+
     - params: Configuration parameters passed from data/options.json if using the add-on
-    
+
     - runtimeparams: Pass runtime optimization parameters as dictionnary
-    
+
     - debug: Use True for testing purposes
-    
+
     """
     # Parsing arguments
     parser = argparse.ArgumentParser()
-    parser.add_argument('--action', type=str, help='Set the desired action, options are: perfect-optim, dayahead-optim,\
-        naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune')
-    parser.add_argument('--config', type=str, help='Define path to the config.yaml file')
-    parser.add_argument('--costfun', type=str, default='profit', help='Define the type of cost function, options are: profit, cost, self-consumption')
-    parser.add_argument('--log2file', type=strtobool, default='False', help='Define if we should log to a file or not')
-    parser.add_argument('--params', type=str, default=None, help='Configuration parameters passed from data/options.json')
-    parser.add_argument('--runtimeparams', type=str, default=None, help='Pass runtime optimization parameters as dictionnary')
-    parser.add_argument('--debug', type=strtobool, default='False', help='Use True for testing purposes')
+    parser.add_argument(
+        "--action",
+        type=str,
+        help="Set the desired action, options are: perfect-optim, dayahead-optim,\
+        naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune",
+    )
+    parser.add_argument(
+        "--config", type=str, help="Define path to the config.yaml file"
+    )
+    parser.add_argument(
+        "--costfun",
+        type=str,
+        default="profit",
+        help="Define the type of cost function, options are: profit, cost, self-consumption",
+    )
+    parser.add_argument(
+        "--log2file",
+        type=strtobool,
+        default="False",
+        help="Define if we should log to a file or not",
+    )
+    parser.add_argument(
+        "--params",
+        type=str,
+        default=None,
+        help="Configuration parameters passed from data/options.json",
+    )
+    parser.add_argument(
+        "--runtimeparams",
+        type=str,
+        default=None,
+        help="Pass runtime optimization parameters as dictionnary",
+    )
+    parser.add_argument(
+        "--debug", type=strtobool, default="False", help="Use True for testing purposes"
+    )
     args = parser.parse_args()
     # The path to the configuration files
     config_path = pathlib.Path(args.config)
@@ -724,39 +995,56 @@ def main():
     logger, ch = utils.get_logger(__name__, base_path, save_to_file=bool(args.log2file))
     # Additionnal argument
     try:
-        parser.add_argument('--version', action='version', version='%(prog)s '+version('emhass'))
+        parser.add_argument(
+            "--version", action="version", version="%(prog)s " + version("emhass")
+        )
         args = parser.parse_args()
     except Exception:
-        logger.info("Version not found for emhass package. Or importlib exited with PackageNotFoundError.")
+        logger.info(
+            "Version not found for emhass package. Or importlib exited with PackageNotFoundError."
+        )
     # Setup parameters
-    input_data_dict = set_input_data_dict(config_path, base_path, 
-                                          args.costfun, args.params, args.runtimeparams, args.action, 
-                                          logger, args.debug)
+    input_data_dict = set_input_data_dict(
+        config_path,
+        base_path,
+        args.costfun,
+        args.params,
+        args.runtimeparams,
+        args.action,
+        logger,
+        args.debug,
+    )
     # Perform selected action
-    if args.action == 'perfect-optim':
+    if args.action == "perfect-optim":
         opt_res = perfect_forecast_optim(input_data_dict, logger, debug=args.debug)
-    elif args.action == 'dayahead-optim':
+    elif args.action == "dayahead-optim":
         opt_res = dayahead_forecast_optim(input_data_dict, logger, debug=args.debug)
-    elif args.action == 'naive-mpc-optim':
+    elif args.action == "naive-mpc-optim":
         opt_res = naive_mpc_optim(input_data_dict, logger, debug=args.debug)
-    elif args.action == 'forecast-model-fit':
-        df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug)
+    elif args.action == "forecast-model-fit":
+        df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit(
+            input_data_dict, logger, debug=args.debug
+        )
         opt_res = None
-    elif args.action == 'forecast-model-predict':
+    elif args.action == "forecast-model-predict":
         if args.debug:
             _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug)
         else:
             mlf = None
-        df_pred = forecast_model_predict(input_data_dict, logger, debug=args.debug, mlf=mlf)
+        df_pred = forecast_model_predict(
+            input_data_dict, logger, debug=args.debug, mlf=mlf
+        )
         opt_res = None
-    elif args.action == 'forecast-model-tune':
+    elif args.action == "forecast-model-tune":
         if args.debug:
             _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug)
         else:
             mlf = None
-        df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=args.debug, mlf=mlf)
+        df_pred_optim, mlf = forecast_model_tune(
+            input_data_dict, logger, debug=args.debug, mlf=mlf
+        )
         opt_res = None
-    elif args.action == 'publish-data':
+    elif args.action == "publish-data":
         opt_res = publish_data(input_data_dict, logger)
     else:
         logger.error("The passed action argument is not valid")
@@ -765,15 +1053,20 @@ def main():
     # Flush the logger
     ch.close()
     logger.removeHandler(ch)
-    if args.action == 'perfect-optim' or args.action == 'dayahead-optim' or \
-        args.action == 'naive-mpc-optim' or args.action == 'publish-data':
+    if (
+        args.action == "perfect-optim"
+        or args.action == "dayahead-optim"
+        or args.action == "naive-mpc-optim"
+        or args.action == "publish-data"
+    ):
         return opt_res
-    elif args.action == 'forecast-model-fit':
+    elif args.action == "forecast-model-fit":
         return df_fit_pred, df_fit_pred_backtest, mlf
-    elif args.action == 'forecast-model-predict':
+    elif args.action == "forecast-model-predict":
         return df_pred
-    elif args.action == 'forecast-model-tune':
+    elif args.action == "forecast-model-tune":
         return df_pred_optim, mlf
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index d70df3ec..80ddd74f 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -9,8 +9,12 @@
 
 import pandas as pd
 import numpy as np
-from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor
-from sklearn.metrics import  r2_score
+from sklearn.ensemble import (
+    AdaBoostRegressor,
+    GradientBoostingRegressor,
+    RandomForestRegressor,
+)
+from sklearn.metrics import r2_score
 
 from sklearn.linear_model import Lasso, LinearRegression, Ridge
 from sklearn.model_selection import GridSearchCV, train_test_split
@@ -20,21 +24,31 @@
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
+
 class MLRegressor:
     r"""
     A forecaster class using machine learning models.
-    
+
     This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
-    
+
     It exposes two main methods:
-    
+
     - `fit`: to train a model with the passed data.
-    
+
     - `predict`: to obtain a forecast from a pre-trained model.
-    
+
     """
-    def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str,
-                logger: logging.Logger) -> None:
+
+    def __init__(
+        self,
+        data,
+        model_type: str,
+        sklearn_model: str,
+        features: list,
+        target: str,
+        timestamp: str,
+        logger: logging.Logger,
+    ) -> None:
         r"""Define constructor for the forecast class.
 
         :param data: The data that will be used for train/test
@@ -42,33 +56,35 @@ def __init__(self, data, model_type: str, sklearn_model: str, independent_variab
         :param model_type: A unique name defining this model and useful to identify \
             for what it will be used for.
         :type model_type: str
-        :param independent_variables: A list of independent variables. \
+        :param features: A list of features. \
             Example: [`solar`, `degree_days`].
-        :type independent_variables: list
-        :param dependent_variable: The dependent variable(to be predicted). \
+        :type features: list
+        :param target: The target(to be predicted). \
             Example: `hours`.
-        :type dependent_variable: str
+        :type target: str
         :param timestamp: If defined, the column key that has to be used of timestamp.
         :type timestamp: str
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
         self.data = data
-        self.independent_variables = independent_variables
-        self.dependent_variable = dependent_variable
+        self.features = features
+        self.target = target
         self.timestamp = timestamp
         self.model_type = model_type
         self.sklearn_model = sklearn_model
         self.logger = logger
         self.data.sort_index(inplace=True)
-        self.data = self.data[~self.data.index.duplicated(keep='first')]
+        self.data = self.data[~self.data.index.duplicated(keep="first")]
         self.data_exo = None
         self.steps = None
         self.model = None
-        self.grid_search =None
-    
+        self.grid_search = None
+
     @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
+    def add_date_features(
+        data: pd.DataFrame, date_features: list, timestamp: str
+    ) -> pd.DataFrame:
         """Add date features from the input DataFrame timestamp
 
         :param data: The input DataFrame
@@ -79,179 +95,162 @@ def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -
         :rtype: pd.DataFrame
         """
         df = copy.deepcopy(data)
-        df[timestamp]= pd.to_datetime(df['timestamp'])
-        if 'year' in date_features:
-            df['year'] = [i.year for i in df['timestamp']]
-        if 'month' in date_features:
-            df['month'] = [i.month for i in df['timestamp']]
-        if 'day_of_week' in date_features:
-            df['day_of_week'] = [i.dayofweek for i in df['timestamp']]
-        if 'day_of_year' in date_features:
-            df['day_of_year'] = [i.dayofyear for i in df['timestamp']]
-        if 'day' in date_features:
-            df['day'] = [i.day for i in df['timestamp']]
-        if 'hour' in date_features:
-            df['hour'] = [i.day for i in df['timestamp']]
+        df[timestamp] = pd.to_datetime(df["timestamp"])
+        if "year" in date_features:
+            df["year"] = [i.year for i in df["timestamp"]]
+        if "month" in date_features:
+            df["month"] = [i.month for i in df["timestamp"]]
+        if "day_of_week" in date_features:
+            df["day_of_week"] = [i.dayofweek for i in df["timestamp"]]
+        if "day_of_year" in date_features:
+            df["day_of_year"] = [i.dayofyear for i in df["timestamp"]]
+        if "day" in date_features:
+            df["day"] = [i.day for i in df["timestamp"]]
+        if "hour" in date_features:
+            df["hour"] = [i.day for i in df["timestamp"]]
 
         return df
 
     def fit(self, date_features: Optional[list] = []) -> None:
         """
         Fit the model using the provided data.
-        
+
         :param date_features: A list of 'date_features' to take into account when fitting the model.
         :type data: list
         """
-        self.logger.info("Performing a csv model fit for "+self.model_type)
+        self.logger.info("Performing a csv model fit for " + self.model_type)
         self.data_exo = pd.DataFrame(self.data)
-        self.data_exo[self.independent_variables] = self.data[self.independent_variables]
-        self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
+        self.data_exo[self.features] = self.data[self.features]
+        self.data_exo[self.target] = self.data[self.target]
         keep_columns = []
-        keep_columns.extend(self.independent_variables)
+        keep_columns.extend(self.features)
         if self.timestamp is not None:
             keep_columns.append(self.timestamp)
-        keep_columns.append(self.dependent_variable)
+        keep_columns.append(self.target)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
         self.data_exo.reset_index(drop=True, inplace=True)
         if len(date_features) > 0:
             if self.timestamp is not None:
-                self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp)
+                self.data_exo = MLRegressor.add_date_features(
+                    self.data_exo, date_features, self.timestamp
+                )
             else:
-                self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
+                self.logger.error(
+                    "If no timestamp provided, you can't use date_features, going further without date_features."
+                )
 
-        y = self.data_exo[self.dependent_variable]
-        self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1)
+        y = self.data_exo[self.target]
+        self.data_exo = self.data_exo.drop(self.target, axis=1)
         if self.timestamp is not None:
-            self.data_exo = self.data_exo.drop(self.timestamp,axis=1)
+            self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
         X = self.data_exo
 
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=0.2, random_state=42
+        )
         self.steps = len(X_test)
 
         regression_methods = {
-            'LinearRegression': {"model": LinearRegression(), "param_grid": {
-                'linearregression__fit_intercept': [True, False],
-                'linearregression__positive': [True, False],
-            }},
-            'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}},
-            'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}},
-            'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}},
-            'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": {
-                'gradientboostingregressor__n_estimators': [50, 100, 200],
-                'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-            }},
-            'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": {
-                'adaboostregressor__n_estimators': [50, 100, 200],
-                'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-            }}
+            "LinearRegression": {
+                "model": LinearRegression(),
+                "param_grid": {
+                    "linearregression__fit_intercept": [True, False],
+                    "linearregression__positive": [True, False],
+                },
+            },
+            "RidgeRegression": {
+                "model": Ridge(),
+                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
+            },
+            "LassoRegression": {
+                "model": Lasso(),
+                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
+            },
+            "RandomForestRegression": {
+                "model": RandomForestRegressor(),
+                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
+            },
+            "GradientBoostingRegression": {
+                "model": GradientBoostingRegressor(),
+                "param_grid": {
+                    "gradientboostingregressor__n_estimators": [50, 100, 200],
+                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
+            "AdaBoostRegression": {
+                "model": AdaBoostRegressor(),
+                "param_grid": {
+                    "adaboostregressor__n_estimators": [50, 100, 200],
+                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
         }
-        # regression_methods = [
-        #     ('LinearRegression', LinearRegression(), {
-        #         'linearregression__fit_intercept': [True, False],
-        #         'linearregression__positive': [True, False],
-        #     }),
-        #     ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
-        #     ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
-        #     ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
-        #     ('GradientBoostingRegression', GradientBoostingRegressor(), {
-        #         'gradientboostingregressor__n_estimators': [50, 100, 200],
-        #         'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-        #     }),
-        #     ('AdaBoostRegression', AdaBoostRegressor(), {
-        #         'adaboostregressor__n_estimators': [50, 100, 200],
-        #         'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-        #     })
-        # ]
-
-        if self.sklearn_model == 'LinearRegression':
-            base_model = regression_methods['LinearRegression']['model']
-            param_grid = regression_methods['LinearRegression']['param_grid']
-        elif self.sklearn_model == 'RidgeRegression':
-            base_model = regression_methods['RidgeRegression']['model']
-            param_grid = regression_methods['RidgeRegression']['param_grid']
-        elif self.sklearn_model == 'LassoRegression':
-            base_model = regression_methods['LassoRegression']['model']
-            param_grid = regression_methods['LassoRegression']['param_grid']
-        elif self.sklearn_model == 'RandomForestRegression':
-            base_model = regression_methods['RandomForestRegression']['model']
-            param_grid = regression_methods['RandomForestRegression']['param_grid']
-        elif self.sklearn_model == 'GradientBoostingRegression':
-            base_model = regression_methods['GradientBoostingRegression']['model']
-            param_grid = regression_methods['GradientBoostingRegression']['param_grid']
-        elif self.sklearn_model == 'AdaBoostRegression':
-            base_model = regression_methods['AdaBoostRegression']['model']
-            param_grid = regression_methods['AdaBoostRegression']['param_grid']
+
+        if self.sklearn_model == "LinearRegression":
+            base_model = regression_methods["LinearRegression"]["model"]
+            param_grid = regression_methods["LinearRegression"]["param_grid"]
+        elif self.sklearn_model == "RidgeRegression":
+            base_model = regression_methods["RidgeRegression"]["model"]
+            param_grid = regression_methods["RidgeRegression"]["param_grid"]
+        elif self.sklearn_model == "LassoRegression":
+            base_model = regression_methods["LassoRegression"]["model"]
+            param_grid = regression_methods["LassoRegression"]["param_grid"]
+        elif self.sklearn_model == "RandomForestRegression":
+            base_model = regression_methods["RandomForestRegression"]["model"]
+            param_grid = regression_methods["RandomForestRegression"]["param_grid"]
+        elif self.sklearn_model == "GradientBoostingRegression":
+            base_model = regression_methods["GradientBoostingRegression"]["model"]
+            param_grid = regression_methods["GradientBoostingRegression"]["param_grid"]
+        elif self.sklearn_model == "AdaBoostRegression":
+            base_model = regression_methods["AdaBoostRegression"]["model"]
+            param_grid = regression_methods["AdaBoostRegression"]["param_grid"]
         else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-
-
-        # Define the models
-        # for name, model, param_grid in regression_methods:
-        #     self.model = make_pipeline(
-        #         StandardScaler(),
-        #         model
-        #     )
-        #     # self.model = Pipeline([
-        #     #     ('scaler', StandardScaler()),
-        #     #     (name, model)
-        #     # ])
-            
-        #     # Use GridSearchCV to find the best hyperparameters for each model
-        #     grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        #     grid_search.fit(X_train, y_train)
-
-        #     # Get the best model and print its mean squared error on the test set
-        #     best_model = grid_search.best_estimator_
-        #     print(best_model)
-        #     predictions = best_model.predict(X_test)
-        #     print(predictions)
-
-        self.model = make_pipeline(
-            StandardScaler(),
-            base_model
-        )
-        # self.model = Pipeline([
-        #     ('scaler', StandardScaler()),
-        #     ('regressor', base_model)
-        # ])
-        # Define the parameters to tune
-        # param_grid = {
-        #     'regressor__fit_intercept': [True, False],
-        #     'regressor__positive': [True, False],
-        # }
+            self.logger.error(
+                "Passed sklearn model " + self.sklearn_model + " is not valid"
+            )
+
+        self.model = make_pipeline(StandardScaler(), base_model)
 
         # Create a grid search object
-        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1)
-        
+        self.grid_search = GridSearchCV(
+            self.model,
+            param_grid,
+            cv=5,
+            scoring="neg_mean_squared_error",
+            refit=True,
+            verbose=0,
+            n_jobs=-1,
+        )
+
         # Fit the grid search object to the data
-        self.logger.info("Training a "+self.sklearn_model+" model")
+        self.logger.info("Training a " + self.sklearn_model + " model")
         start_time = time.time()
         self.grid_search.fit(X_train.values, y_train.values)
-        print("Best value for lambda : ",self.grid_search.best_params_)
+        print("Best value for lambda : ", self.grid_search.best_params_)
         print("Best score for cost function: ", self.grid_search.best_score_)
         self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
 
         self.model = self.grid_search.best_estimator_
 
-
         # Make predictions
         predictions = self.model.predict(X_test.values)
         predictions = pd.Series(predictions, index=X_test.index)
-        pred_metric = r2_score(y_test,predictions)
-        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
-        
+        pred_metric = r2_score(y_test, predictions)
+        self.logger.info(
+            f"Prediction R2 score of fitted model on test data: {pred_metric}"
+        )
 
-    def predict(self, new_values:list) -> np.ndarray:
+    def predict(self, new_values: list) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
 
-        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
+        :param new_values: The new values for the features(in the same order as the features list). \
             Example: [2.24, 5.68].
         :type new_values: list
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """
-        self.logger.info("Performing a prediction for "+self.model_type)
+        self.logger.info("Performing a prediction for " + self.model_type)
         new_values = np.array([new_values])
 
         return self.model.predict(new_values)
diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py
index ca20ce40..9f47efef 100644
--- a/src/emhass/retrieve_hass.py
+++ b/src/emhass/retrieve_hass.py
@@ -30,12 +30,20 @@ class RetrieveHass:
     
     """
 
-    def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, 
-                 time_zone: datetime.timezone, params: str, base_path: str, logger: logging.Logger,
-                 get_data_from_file: Optional[bool] = False) -> None:
+    def __init__(
+        self,
+        hass_url: str,
+        long_lived_token: str,
+        freq: pd.Timedelta,
+        time_zone: datetime.timezone,
+        params: str,
+        base_path: str,
+        logger: logging.Logger,
+        get_data_from_file: Optional[bool] = False,
+    ) -> None:
         """
         Define constructor for RetrieveHass class.
-        
+
         :param hass_url: The URL of the Home Assistant instance
         :type hass_url: str
         :param long_lived_token: The long lived token retrieved from the configuration pane
@@ -50,7 +58,7 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta,
         :type base_path: str
         :param logger: The passed logger object
         :type logger: logging object
-        :param get_data_from_file: Select if data should be retrieved from a 
+        :param get_data_from_file: Select if data should be retrieved from a
         previously saved pickle useful for testing or directly from connection to
         hass database
         :type get_data_from_file: bool, optional
@@ -65,9 +73,14 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta,
         self.logger = logger
         self.get_data_from_file = get_data_from_file
 
-    def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: Optional[bool] = False,
-                 significant_changes_only: Optional[bool] = False, 
-                 test_url: Optional[str] = 'empty') -> None:
+    def get_data(
+        self,
+        days_list: pd.date_range,
+        var_list: list,
+        minimal_response: Optional[bool] = False,
+        significant_changes_only: Optional[bool] = False,
+        test_url: Optional[str] = "empty",
+    ) -> None:
         r"""
         Retrieve the actual data from hass.
         
@@ -92,20 +105,36 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O
         """
         self.logger.info("Retrieve hass get data method initiated...")
         self.df_final = pd.DataFrame()
-        x = 0 #iterate based on days
+        x = 0  # iterate based on days
         # Looping on each day from days list
         for day in days_list:
-        
+
             for i, var in enumerate(var_list):
-                
-                if test_url == 'empty':
-                    if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API
-                        url = self.hass_url+"/history/period/"+day.isoformat()+"?filter_entity_id="+var
-                    else: # Otherwise the Home Assistant Core API it is
-                        url = self.hass_url+"api/history/period/"+day.isoformat()+"?filter_entity_id="+var
-                    if minimal_response: # A support for minimal response
+
+                if test_url == "empty":
+                    if (
+                        self.hass_url == "http://supervisor/core/api"
+                    ):  # If we are using the supervisor API
+                        url = (
+                            self.hass_url
+                            + "/history/period/"
+                            + day.isoformat()
+                            + "?filter_entity_id="
+                            + var
+                        )
+                    else:  # Otherwise the Home Assistant Core API it is
+                        url = (
+                            self.hass_url
+                            + "api/history/period/"
+                            + day.isoformat()
+                            + "?filter_entity_id="
+                            + var
+                        )
+                    if minimal_response:  # A support for minimal response
                         url = url + "?minimal_response"
-                    if significant_changes_only: # And for signicant changes only (check the HASS restful API for more info)
+                    if (
+                        significant_changes_only
+                    ):  # And for signicant changes only (check the HASS restful API for more info)
                         url = url + "?significant_changes_only"
                 else:
                     url = test_url
@@ -116,59 +145,96 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O
                 try:
                     response = get(url, headers=headers)
                 except Exception:
-                    self.logger.error("Unable to access Home Assistance instance, check URL")
-                    self.logger.error("If using addon, try setting url and token to 'empty'")
+                    self.logger.error(
+                        "Unable to access Home Assistance instance, check URL"
+                    )
+                    self.logger.error(
+                        "If using addon, try setting url and token to 'empty'"
+                    )
                     return False
                 else:
                     if response.status_code == 401:
-                        self.logger.error("Unable to access Home Assistance instance, TOKEN/KEY")
-                        self.logger.error("If using addon, try setting url and token to 'empty'")
+                        self.logger.error(
+                            "Unable to access Home Assistance instance, TOKEN/KEY"
+                        )
+                        self.logger.error(
+                            "If using addon, try setting url and token to 'empty'"
+                        )
                         return False
                     if response.status_code > 299:
                         return f"Request Get Error: {response.status_code}"
-                '''import bz2 # Uncomment to save a serialized data for tests
+                """import bz2 # Uncomment to save a serialized data for tests
                 import _pickle as cPickle
                 with bz2.BZ2File("data/test_response_get_data_get_method.pbz2", "w") as f: 
-                    cPickle.dump(response, f)'''
-                try: # Sometimes when there are connection problems we need to catch empty retrieved json
+                    cPickle.dump(response, f)"""
+                try:  # Sometimes when there are connection problems we need to catch empty retrieved json
                     data = response.json()[0]
                 except IndexError:
-                    if x == 0:
-                        self.logger.error("The retrieved JSON is empty, A sensor:" + var + " may have 0 days of history or passed sensor may not be correct")
+                    if x is 0:
+                        self.logger.error(
+                            "The retrieved JSON is empty, A sensor:"
+                            + var
+                            + " may have 0 days of history or passed sensor may not be correct"
+                        )
                     else:
                         self.logger.error("The retrieved JSON is empty for day:"+ str(day) +", days_to_retrieve may be larger than the recorded history of sensor:" + var + " (check your recorder settings)")
                     return False
                 df_raw = pd.DataFrame.from_dict(data)
                 if len(df_raw) == 0:
-                    if x == 0:
-                        self.logger.error("The retrieved Dataframe is empty, A sensor:" + var + " may have 0 days of history or passed sensor may not be correct")
+                    if x is 0:
+                        self.logger.error(
+                            "The retrieved Dataframe is empty, A sensor:"
+                            + var
+                            + " may have 0 days of history or passed sensor may not be correct"
+                        )
                     else:
                         self.logger.error("Retrieved empty Dataframe for day:"+ str(day) +", days_to_retrieve may be larger than the recorded history of sensor:" + var + " (check your recorder settings)")
                     return False
-                if i == 0: # Defining the DataFrame container
-                    from_date = pd.to_datetime(df_raw['last_changed'], format="ISO8601").min()
-                    to_date = pd.to_datetime(df_raw['last_changed'], format="ISO8601").max()
-                    ts = pd.to_datetime(pd.date_range(start=from_date, end=to_date, freq=self.freq), 
-                                        format='%Y-%d-%m %H:%M').round(self.freq, ambiguous='infer', nonexistent='shift_forward')
-                    df_day = pd.DataFrame(index = ts)
+                if i == 0:  # Defining the DataFrame container
+                    from_date = pd.to_datetime(
+                        df_raw["last_changed"], format="ISO8601"
+                    ).min()
+                    to_date = pd.to_datetime(
+                        df_raw["last_changed"], format="ISO8601"
+                    ).max()
+                    ts = pd.to_datetime(
+                        pd.date_range(start=from_date, end=to_date, freq=self.freq),
+                        format="%Y-%d-%m %H:%M",
+                    ).round(self.freq, ambiguous="infer", nonexistent=self.freq)
+                    df_day = pd.DataFrame(index=ts)
                 # Caution with undefined string data: unknown, unavailable, etc.
-                df_tp = df_raw.copy()[['state']].replace(
-                    ['unknown', 'unavailable', ''], np.nan).astype(float).rename(columns={'state': var})
+                df_tp = (
+                    df_raw.copy()[["state"]]
+                    .replace(["unknown", "unavailable", ""], np.nan)
+                    .astype(float)
+                    .rename(columns={"state": var})
+                )
                 # Setting index, resampling and concatenation
-                df_tp.set_index(pd.to_datetime(df_raw['last_changed'], format="ISO8601"), inplace=True)
+                df_tp.set_index(
+                    pd.to_datetime(df_raw["last_changed"], format="ISO8601"),
+                    inplace=True,
+                )
                 df_tp = df_tp.resample(self.freq).mean()
                 df_day = pd.concat([df_day, df_tp], axis=1)
-            
+
             x += 1
             self.df_final = pd.concat([self.df_final, df_day], axis=0)
         self.df_final = set_df_index_freq(self.df_final)
         if self.df_final.index.freq != self.freq:
-            self.logger.error("The inferred freq from data is not equal to the defined freq in passed parameters")
+            self.logger.error(
+                "The inferred freq from data is not equal to the defined freq in passed parameters"
+            )
             return False
         return True
-    
-    def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set_zero_min: Optional[bool] = True,
-                     var_replace_zero: Optional[list] = None, var_interp: Optional[list] = None) -> None:
+
+    def prepare_data(
+        self,
+        var_load: str,
+        load_negative: Optional[bool] = False,
+        set_zero_min: Optional[bool] = True,
+        var_replace_zero: Optional[list] = None,
+        var_interp: Optional[list] = None,
+    ) -> None:
         r"""
         Apply some data treatment in preparation for the optimization task.
         
@@ -192,18 +258,24 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set
         
         """
         try:
-            if load_negative: # Apply the correct sign to load power
-                self.df_final[var_load+'_positive'] = -self.df_final[var_load]
+            if load_negative:  # Apply the correct sign to load power
+                self.df_final[var_load + "_positive"] = -self.df_final[var_load]
             else:
-                self.df_final[var_load+'_positive'] = self.df_final[var_load]
+                self.df_final[var_load + "_positive"] = self.df_final[var_load]
             self.df_final.drop([var_load], inplace=True, axis=1)
         except KeyError:
-            self.logger.error("Variable "+var_load+" was not found. This is typically because no data could be retrieved from Home Assistant")
+            self.logger.error(
+                "Variable "
+                + var_load
+                + " was not found. This is typically because no data could be retrieved from Home Assistant"
+            )
             return False
         except ValueError:
-            self.logger.error("sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same")
-            return False   
-        if set_zero_min: # Apply minimum values
+            self.logger.error(
+                "sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same"
+            )
+            return False
+        if set_zero_min:  # Apply minimum values
             self.df_final.clip(lower=0.0, inplace=True, axis=1)
             self.df_final.replace(to_replace=0.0, value=np.nan, inplace=True)
         new_var_replace_zero = []
@@ -211,59 +283,74 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set
         # Just changing the names of variables to contain the fact that they are considered positive
         if var_replace_zero is not None:
             for string in var_replace_zero:
-                new_string = string.replace(var_load, var_load+'_positive')
+                new_string = string.replace(var_load, var_load + "_positive")
                 new_var_replace_zero.append(new_string)
         else:
             new_var_replace_zero = None
         if var_interp is not None:
             for string in var_interp:
-                new_string = string.replace(var_load, var_load+'_positive')
+                new_string = string.replace(var_load, var_load + "_positive")
                 new_var_interp.append(new_string)
         else:
             new_var_interp = None
         # Treating NaN replacement: either by zeros or by linear interpolation
         if new_var_replace_zero is not None:
-            self.df_final[new_var_replace_zero] = self.df_final[new_var_replace_zero].fillna(0.0)
+            self.df_final[new_var_replace_zero] = self.df_final[
+                new_var_replace_zero
+            ].fillna(0.0)
         if new_var_interp is not None:
             self.df_final[new_var_interp] = self.df_final[new_var_interp].interpolate(
-                method='linear', axis=0, limit=None)
+                method="linear", axis=0, limit=None
+            )
             self.df_final[new_var_interp] = self.df_final[new_var_interp].fillna(0.0)
         # Setting the correct time zone on DF index
         if self.time_zone is not None:
             self.df_final.index = self.df_final.index.tz_convert(self.time_zone)
         # Drop datetimeindex duplicates on final DF
-        self.df_final = self.df_final[~self.df_final.index.duplicated(keep='first')]
+        self.df_final = self.df_final[~self.df_final.index.duplicated(keep="first")]
         return True
-    
+
     @staticmethod
-    def get_attr_data_dict(data_df: pd.DataFrame, idx: int, entity_id: str, 
-                           unit_of_measurement: str, friendly_name: str, 
-                           list_name: str, state: float) -> dict:
-        list_df = copy.deepcopy(data_df).loc[data_df.index[idx]:].reset_index()
-        list_df.columns = ['timestamps', entity_id]
-        ts_list = [str(i) for i in list_df['timestamps'].tolist()]
-        vals_list = [str(np.round(i,2)) for i in list_df[entity_id].tolist()]
+    def get_attr_data_dict(
+        data_df: pd.DataFrame,
+        idx: int,
+        entity_id: str,
+        unit_of_measurement: str,
+        friendly_name: str,
+        list_name: str,
+        state: float,
+    ) -> dict:
+        list_df = copy.deepcopy(data_df).loc[data_df.index[idx] :].reset_index()
+        list_df.columns = ["timestamps", entity_id]
+        ts_list = [str(i) for i in list_df["timestamps"].tolist()]
+        vals_list = [str(np.round(i, 2)) for i in list_df[entity_id].tolist()]
         forecast_list = []
         for i, ts in enumerate(ts_list):
             datum = {}
             datum["date"] = ts
-            datum[entity_id.split('sensor.')[1]] = vals_list[i]
+            datum[entity_id.split("sensor.")[1]] = vals_list[i]
             forecast_list.append(datum)
         data = {
             "state": "{:.2f}".format(state),
             "attributes": {
                 "unit_of_measurement": unit_of_measurement,
                 "friendly_name": friendly_name,
-                list_name: forecast_list
-            }
+                list_name: forecast_list,
+            },
         }
         return data
-    
-    def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, 
-                  unit_of_measurement: str, friendly_name: str,
-                  type_var: str,
-                  from_mlforecaster: Optional[bool]=False,
-                  publish_prefix: Optional[str]="") -> None:
+
+    def post_data(
+        self,
+        data_df: pd.DataFrame,
+        idx: int,
+        entity_id: str,
+        unit_of_measurement: str,
+        friendly_name: str,
+        type_var: str,
+        from_mlforecaster: Optional[bool] = False,
+        publish_prefix: Optional[str] = "",
+    ) -> None:
         r"""
         Post passed data to hass.
         
@@ -286,82 +373,139 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str,
 
         """
         # Add a possible prefix to the entity ID
-        entity_id = entity_id.replace('sensor.', 'sensor.'+publish_prefix)
+        entity_id = entity_id.replace("sensor.", "sensor." + publish_prefix)
         # Set the URL
-        if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API
-            url = self.hass_url+"/states/"+entity_id
-        else: # Otherwise the Home Assistant Core API it is
-            url = self.hass_url+"api/states/"+entity_id
+        if (
+            self.hass_url == "http://supervisor/core/api"
+        ):  # If we are using the supervisor API
+            url = self.hass_url + "/states/" + entity_id
+        else:  # Otherwise the Home Assistant Core API it is
+            url = self.hass_url + "api/states/" + entity_id
         headers = {
             "Authorization": "Bearer " + self.long_lived_token,
             "content-type": "application/json",
         }
         # Preparing the data dict to be published
-        if type_var == 'cost_fun':
-            state = np.round(data_df.sum()[0],2)
-        elif type_var == 'unit_load_cost' or type_var == 'unit_prod_price':
-            state = np.round(data_df.loc[data_df.index[idx]],4)
-        elif type_var == 'optim_status':
+        if type_var == "cost_fun":
+            state = np.round(data_df.sum()[0], 2)
+        elif type_var == "unit_load_cost" or type_var == "unit_prod_price":
+            state = np.round(data_df.loc[data_df.index[idx]], 4)
+        elif type_var == "optim_status":
             state = data_df.loc[data_df.index[idx]]
-        elif type_var == 'csv_predictor':
+        elif type_var == "mlregressor":
             state = data_df[idx]
         else:
-            state = np.round(data_df.loc[data_df.index[idx]],2)
-        if type_var == 'power':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "forecasts", state)
-        elif type_var == 'deferrable':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "deferrables_schedule", state)
-        elif type_var == 'batt':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "battery_scheduled_power", state)
-        elif type_var == 'SOC':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "battery_scheduled_soc", state)
-        elif type_var == 'unit_load_cost':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "unit_load_cost_forecasts", state)
-        elif type_var == 'unit_prod_price':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "unit_prod_price_forecasts", state)
-        elif type_var == 'mlforecaster':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "scheduled_forecast", state)
-        elif type_var == 'optim_status':
+            state = np.round(data_df.loc[data_df.index[idx]], 2)
+        if type_var == "power":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "forecasts",
+                state,
+            )
+        elif type_var == "deferrable":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "deferrables_schedule",
+                state,
+            )
+        elif type_var == "batt":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "battery_scheduled_power",
+                state,
+            )
+        elif type_var == "SOC":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "battery_scheduled_soc",
+                state,
+            )
+        elif type_var == "unit_load_cost":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "unit_load_cost_forecasts",
+                state,
+            )
+        elif type_var == "unit_prod_price":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "unit_prod_price_forecasts",
+                state,
+            )
+        elif type_var == "mlforecaster":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "scheduled_forecast",
+                state,
+            )
+        elif type_var == "optim_status":
             data = {
                 "state": state,
                 "attributes": {
                     "unit_of_measurement": unit_of_measurement,
-                    "friendly_name": friendly_name
-                }
+                    "friendly_name": friendly_name,
+                },
             }
-        elif type_var == 'csv_predictor':
+        elif type_var == "mlregressor":
             data = {
                 "state": state,
                 "attributes": {
                     "unit_of_measurement": unit_of_measurement,
-                    "friendly_name": friendly_name
-                }
+                    "friendly_name": friendly_name,
+                },
             }
         else:
             data = {
                 "state": "{:.2f}".format(state),
                 "attributes": {
                     "unit_of_measurement": unit_of_measurement,
-                    "friendly_name": friendly_name
-                }
+                    "friendly_name": friendly_name,
+                },
             }
         # Actually post the data
         if self.get_data_from_file:
-            class response: pass
+
+            class response:
+                pass
+
             response.status_code = 200
             response.ok = True
         else:
             response = post(url, headers=headers, data=json.dumps(data))
         # Treating the response status and posting them on the logger
         if response.ok:
-            self.logger.info("Successfully posted to "+entity_id+" = "+str(state))
+            self.logger.info("Successfully posted to " + entity_id + " = " + str(state))
         else:
-            self.logger.info("The status code for received curl command response is: "+str(response.status_code))
+            self.logger.info(
+                "The status code for received curl command response is: "
+                + str(response.status_code)
+            )
         return response, data
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 4bbac11c..3a2cadd3 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -2,10 +2,19 @@
 # -*- coding: utf-8 -*-
 
 from typing import Tuple, Optional
-import numpy as np, pandas as pd
-import yaml, pytz, logging, pathlib, json, copy
 from datetime import datetime, timedelta, timezone
+import logging
+import pathlib
+import json
+import copy
+import numpy as np
+import pandas as pd
+import yaml
+import pytz
+
+
 import plotly.express as px
+
 pd.options.plotting.backend = "plotly"
 
 from emhass.machine_learning_forecaster import MLForecaster
@@ -14,13 +23,13 @@
 def get_root(file: str, num_parent: Optional[int] = 3) -> str:
     """
     Get the root absolute path of the working directory.
-    
+
     :param file: The passed file path with __file__
     :return: The root path
     :param num_parent: The number of parents levels up to desired root folder
     :type num_parent: int, optional
     :rtype: str
-    
+
     """
     if num_parent == 3:
         root = pathlib.Path(file).resolve().parent.parent.parent
@@ -32,11 +41,16 @@ def get_root(file: str, num_parent: Optional[int] = 3) -> str:
         raise ValueError("num_parent value not valid, must be between 1 and 3")
     return root
 
-def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = True,
-               logging_level: Optional[str] = "DEBUG") -> Tuple[logging.Logger, logging.StreamHandler]:
+
+def get_logger(
+    fun_name: str,
+    config_path: str,
+    save_to_file: Optional[bool] = True,
+    logging_level: Optional[str] = "DEBUG",
+) -> Tuple[logging.Logger, logging.StreamHandler]:
     """
     Create a simple logger object.
-    
+
     :param fun_name: The Python function object name where the logger will be used
     :type fun_name: str
     :param config_path: The path to the yaml configuration file
@@ -45,14 +59,14 @@ def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = T
     :type save_to_file: bool, optional
     :return: The logger object and the handler
     :rtype: object
-    
+
     """
-	# create logger object
+    # create logger object
     logger = logging.getLogger(fun_name)
     logger.propagate = True
     logger.fileSetting = save_to_file
     if save_to_file:
-        ch = logging.FileHandler(config_path + '/data/logger_emhass.log')
+        ch = logging.FileHandler(config_path + "/data/logger_emhass.log")
     else:
         ch = logging.StreamHandler()
     if logging_level == "DEBUG":
@@ -70,14 +84,18 @@ def get_logger(fun_name: str, config_path: str, save_to_file: Optional[bool] = T
     else:
         logger.setLevel(logging.DEBUG)
         ch.setLevel(logging.DEBUG)
-    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
     ch.setFormatter(formatter)
     logger.addHandler(ch)
 
     return logger, ch
 
-def get_forecast_dates(freq: int, delta_forecast: int, 
-                       timedelta_days: Optional[int] = 0) -> pd.core.indexes.datetimes.DatetimeIndex:
+
+def get_forecast_dates(
+    freq: int, delta_forecast: int, timedelta_days: Optional[int] = 0
+) -> pd.core.indexes.datetimes.DatetimeIndex:
     """
     Get the date_range list of the needed future dates using the delta_forecast parameter.
 
@@ -89,7 +107,7 @@ def get_forecast_dates(freq: int, delta_forecast: int,
     :type timedelta_days: Optional[int], optional
     :return: A list of future forecast dates.
     :rtype: pd.core.indexes.datetimes.DatetimeIndex
-    
+
     """
     freq = pd.to_timedelta(freq, "minutes")
     start_forecast = pd.Timestamp(datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0)
@@ -99,11 +117,19 @@ def get_forecast_dates(freq: int, delta_forecast: int,
         freq=freq).round(freq, ambiguous='infer', nonexistent='shift_forward')
     return forecast_dates
 
-def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict,
-                        set_type: str, logger: logging.Logger) -> Tuple[str, dict]:
+
+def treat_runtimeparams(
+    runtimeparams: str,
+    params: str,
+    retrieve_hass_conf: dict,
+    optim_conf: dict,
+    plant_conf: dict,
+    set_type: str,
+    logger: logging.Logger,
+) -> Tuple[str, dict]:
     """
-    Treat the passed optimization runtime parameters. 
-    
+    Treat the passed optimization runtime parameters.
+
     :param runtimeparams: Json string containing the runtime parameters dict.
     :type runtimeparams: str
     :param params: Configuration parameters passed from data/options.json
@@ -120,115 +146,155 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
     :type logger: logging.Logger
     :return: Returning the params and optimization parameter container.
     :rtype: Tuple[str, dict]
-    
+
     """
-    if (params != None) and (params != 'null'):
+    if (params != None) and (params != "null"):
         params = json.loads(params)
     else:
         params = {}
     # Some default data needed
     custom_deferrable_forecast_id = []
-    for k in range(optim_conf['num_def_loads']):
-        custom_deferrable_forecast_id.append({
-            "entity_id": "sensor.p_deferrable{}".format(k), 
-            "unit_of_measurement": "W", 
-            "friendly_name": "Deferrable Load {}".format(k)
-        })
-    default_passed_dict = {'custom_pv_forecast_id': {"entity_id": "sensor.p_pv_forecast", "unit_of_measurement": "W", "friendly_name": "PV Power Forecast"},
-                           'custom_load_forecast_id': {"entity_id": "sensor.p_load_forecast", "unit_of_measurement": "W", "friendly_name": "Load Power Forecast"},
-                           'custom_batt_forecast_id': {"entity_id": "sensor.p_batt_forecast", "unit_of_measurement": "W", "friendly_name": "Battery Power Forecast"},
-                           'custom_batt_soc_forecast_id': {"entity_id": "sensor.soc_batt_forecast", "unit_of_measurement": "%", "friendly_name": "Battery SOC Forecast"},
-                           'custom_grid_forecast_id': {"entity_id": "sensor.p_grid_forecast", "unit_of_measurement": "W", "friendly_name": "Grid Power Forecast"},
-                           'custom_cost_fun_id': {"entity_id": "sensor.total_cost_fun_value", "unit_of_measurement": "", "friendly_name": "Total cost function value"},
-                           'custom_optim_status_id': {"entity_id": "sensor.optim_status", "unit_of_measurement": "", "friendly_name": "EMHASS optimization status"},
-                           'custom_unit_load_cost_id': {"entity_id": "sensor.unit_load_cost", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Load Cost"},
-                           'custom_unit_prod_price_id': {"entity_id": "sensor.unit_prod_price", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Prod Price"},
-                           'custom_deferrable_forecast_id': custom_deferrable_forecast_id,
-                           'publish_prefix': ""}
-    if 'passed_data' in params.keys():
+    for k in range(optim_conf["num_def_loads"]):
+        custom_deferrable_forecast_id.append(
+            {
+                "entity_id": "sensor.p_deferrable{}".format(k),
+                "unit_of_measurement": "W",
+                "friendly_name": "Deferrable Load {}".format(k),
+            }
+        )
+    default_passed_dict = {
+        "custom_pv_forecast_id": {
+            "entity_id": "sensor.p_pv_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "PV Power Forecast",
+        },
+        "custom_load_forecast_id": {
+            "entity_id": "sensor.p_load_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "Load Power Forecast",
+        },
+        "custom_batt_forecast_id": {
+            "entity_id": "sensor.p_batt_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "Battery Power Forecast",
+        },
+        "custom_batt_soc_forecast_id": {
+            "entity_id": "sensor.soc_batt_forecast",
+            "unit_of_measurement": "%",
+            "friendly_name": "Battery SOC Forecast",
+        },
+        "custom_grid_forecast_id": {
+            "entity_id": "sensor.p_grid_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "Grid Power Forecast",
+        },
+        "custom_cost_fun_id": {
+            "entity_id": "sensor.total_cost_fun_value",
+            "unit_of_measurement": "",
+            "friendly_name": "Total cost function value",
+        },
+        "custom_optim_status_id": {
+            "entity_id": "sensor.optim_status",
+            "unit_of_measurement": "",
+            "friendly_name": "EMHASS optimization status",
+        },
+        "custom_unit_load_cost_id": {
+            "entity_id": "sensor.unit_load_cost",
+            "unit_of_measurement": "€/kWh",
+            "friendly_name": "Unit Load Cost",
+        },
+        "custom_unit_prod_price_id": {
+            "entity_id": "sensor.unit_prod_price",
+            "unit_of_measurement": "€/kWh",
+            "friendly_name": "Unit Prod Price",
+        },
+        "custom_deferrable_forecast_id": custom_deferrable_forecast_id,
+        "publish_prefix": "",
+    }
+    if "passed_data" in params.keys():
         for key, value in default_passed_dict.items():
-            params['passed_data'][key] = value
+            params["passed_data"][key] = value
     else:
-        params['passed_data'] = default_passed_dict
+        params["passed_data"] = default_passed_dict
     if runtimeparams is not None:
         runtimeparams = json.loads(runtimeparams)
-        freq = int(retrieve_hass_conf['freq'].seconds/60.0)
-        delta_forecast = int(optim_conf['delta_forecast'].days)
+        freq = int(retrieve_hass_conf["freq"].seconds / 60.0)
+        delta_forecast = int(optim_conf["delta_forecast"].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
         if set_type == "regressor-model-fit":
-            csv_file = runtimeparams['csv_file']
-            independent_variables = runtimeparams['independent_variables']
-            dependent_variable = runtimeparams['dependent_variable']
-            params['passed_data']['csv_file'] = csv_file
-            params['passed_data']['independent_variables'] = independent_variables
-            params['passed_data']['dependent_variable'] = dependent_variable
-            if 'timestamp' not in runtimeparams.keys():
-                params['passed_data']['timestamp'] = None
+            csv_file = runtimeparams["csv_file"]
+            features = runtimeparams["features"]
+            target = runtimeparams["target"]
+            params["passed_data"]["csv_file"] = csv_file
+            params["passed_data"]["features"] = features
+            params["passed_data"]["target"] = target
+            if "timestamp" not in runtimeparams.keys():
+                params["passed_data"]["timestamp"] = None
             else:
-                timestamp = runtimeparams['timestamp']
-                params['passed_data']['timestamp'] = timestamp
-            if 'date_features' not in runtimeparams.keys():
-                params['passed_data']['date_features'] = []
+                timestamp = runtimeparams["timestamp"]
+                params["passed_data"]["timestamp"] = timestamp
+            if "date_features" not in runtimeparams.keys():
+                params["passed_data"]["date_features"] = []
             else:
-                date_features = runtimeparams['date_features']
-                params['passed_data']['date_features'] = date_features
-            
+                date_features = runtimeparams["date_features"]
+                params["passed_data"]["date_features"] = date_features
+
         if set_type == "regressor-model-predict":
-            new_values = runtimeparams['new_values']
-            params['passed_data']['new_values'] = new_values
+            new_values = runtimeparams["new_values"]
+            params["passed_data"]["new_values"] = new_values
 
         # Treating special data passed for MPC control case
-        if set_type == 'naive-mpc-optim':
-            if 'prediction_horizon' not in runtimeparams.keys():
-                prediction_horizon = 10 # 10 time steps by default
+        if set_type == "naive-mpc-optim":
+            if "prediction_horizon" not in runtimeparams.keys():
+                prediction_horizon = 10  # 10 time steps by default
             else:
-                prediction_horizon = runtimeparams['prediction_horizon']
-            params['passed_data']['prediction_horizon'] = prediction_horizon
-            if 'soc_init' not in runtimeparams.keys():
-                soc_init = plant_conf['SOCtarget']
+                prediction_horizon = runtimeparams["prediction_horizon"]
+            params["passed_data"]["prediction_horizon"] = prediction_horizon
+            if "soc_init" not in runtimeparams.keys():
+                soc_init = plant_conf["SOCtarget"]
             else:
-                soc_init = runtimeparams['soc_init']
-            params['passed_data']['soc_init'] = soc_init
-            if 'soc_final' not in runtimeparams.keys():
-                soc_final = plant_conf['SOCtarget']
+                soc_init = runtimeparams["soc_init"]
+            params["passed_data"]["soc_init"] = soc_init
+            if "soc_final" not in runtimeparams.keys():
+                soc_final = plant_conf["SOCtarget"]
             else:
-                soc_final = runtimeparams['soc_final']
-            params['passed_data']['soc_final'] = soc_final
-            if 'def_total_hours' not in runtimeparams.keys():
-                def_total_hours = optim_conf['def_total_hours']
+                soc_final = runtimeparams["soc_final"]
+            params["passed_data"]["soc_final"] = soc_final
+            if "def_total_hours" not in runtimeparams.keys():
+                def_total_hours = optim_conf["def_total_hours"]
             else:
-                def_total_hours = runtimeparams['def_total_hours']
-            params['passed_data']['def_total_hours'] = def_total_hours
-            if 'def_start_timestep' not in runtimeparams.keys():
-                def_start_timestep = optim_conf['def_start_timestep']
+                def_total_hours = runtimeparams["def_total_hours"]
+            params["passed_data"]["def_total_hours"] = def_total_hours
+            if "def_start_timestep" not in runtimeparams.keys():
+                def_start_timestep = optim_conf["def_start_timestep"]
             else:
-                def_start_timestep = runtimeparams['def_start_timestep']
-            params['passed_data']['def_start_timestep'] = def_start_timestep
-            if 'def_end_timestep' not in runtimeparams.keys():
-                def_end_timestep = optim_conf['def_end_timestep']
+                def_start_timestep = runtimeparams["def_start_timestep"]
+            params["passed_data"]["def_start_timestep"] = def_start_timestep
+            if "def_end_timestep" not in runtimeparams.keys():
+                def_end_timestep = optim_conf["def_end_timestep"]
             else:
-                def_end_timestep = runtimeparams['def_end_timestep']
-            params['passed_data']['def_end_timestep'] = def_end_timestep
-            if 'alpha' not in runtimeparams.keys():
+                def_end_timestep = runtimeparams["def_end_timestep"]
+            params["passed_data"]["def_end_timestep"] = def_end_timestep
+            if "alpha" not in runtimeparams.keys():
                 alpha = 0.5
             else:
-                alpha = runtimeparams['alpha']
-            params['passed_data']['alpha'] = alpha
-            if 'beta' not in runtimeparams.keys():
+                alpha = runtimeparams["alpha"]
+            params["passed_data"]["alpha"] = alpha
+            if "beta" not in runtimeparams.keys():
                 beta = 0.5
             else:
-                beta = runtimeparams['beta']
-            params['passed_data']['beta'] = beta
+                beta = runtimeparams["beta"]
+            params["passed_data"]["beta"] = beta
             forecast_dates = copy.deepcopy(forecast_dates)[0:prediction_horizon]
         else:
-            params['passed_data']['prediction_horizon'] = None
-            params['passed_data']['soc_init'] = None
-            params['passed_data']['soc_final'] = None
-            params['passed_data']['def_total_hours'] = None
-            params['passed_data']['def_start_timestep'] = None
-            params['passed_data']['def_end_timestep'] = None
-            params['passed_data']['alpha'] = None
-            params['passed_data']['beta'] = None
+            params["passed_data"]["prediction_horizon"] = None
+            params["passed_data"]["soc_init"] = None
+            params["passed_data"]["soc_final"] = None
+            params["passed_data"]["def_total_hours"] = None
+            params["passed_data"]["def_start_timestep"] = None
+            params["passed_data"]["def_end_timestep"] = None
+            params["passed_data"]["alpha"] = None
+            params["passed_data"]["beta"] = None
         # Treat passed forecast data lists
         list_forecast_key = ['pv_power_forecast', 'load_power_forecast', 'load_cost_forecast', 'prod_price_forecast']
         forecast_methods = ['weather_forecast_method', 'load_forecast_method', 'load_cost_forecast_method', 'prod_price_forecast_method']
@@ -248,143 +314,188 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
             else:
                 params['passed_data'][forecast_key] = None
         # Treat passed data for forecast model fit/predict/tune at runtime
-        if 'days_to_retrieve' not in runtimeparams.keys():
+        if "days_to_retrieve" not in runtimeparams.keys():
             days_to_retrieve = 9
         else:
-            days_to_retrieve = runtimeparams['days_to_retrieve']
-        params['passed_data']['days_to_retrieve'] = days_to_retrieve
-        if 'model_type' not in runtimeparams.keys():
+            days_to_retrieve = runtimeparams["days_to_retrieve"]
+        params["passed_data"]["days_to_retrieve"] = days_to_retrieve
+        if "model_type" not in runtimeparams.keys():
             model_type = "load_forecast"
         else:
-            model_type = runtimeparams['model_type']
-        params['passed_data']['model_type'] = model_type
-        if 'var_model' not in runtimeparams.keys():
+            model_type = runtimeparams["model_type"]
+        params["passed_data"]["model_type"] = model_type
+        if "var_model" not in runtimeparams.keys():
             var_model = "sensor.power_load_no_var_loads"
         else:
-            var_model = runtimeparams['var_model']
-        params['passed_data']['var_model'] = var_model
-        if 'sklearn_model' not in runtimeparams.keys():
+            var_model = runtimeparams["var_model"]
+        params["passed_data"]["var_model"] = var_model
+        if "sklearn_model" not in runtimeparams.keys():
             sklearn_model = "KNeighborsRegressor"
         else:
-            sklearn_model = runtimeparams['sklearn_model']
-        params['passed_data']['sklearn_model'] = sklearn_model
-        if 'num_lags' not in runtimeparams.keys():
+            sklearn_model = runtimeparams["sklearn_model"]
+        params["passed_data"]["sklearn_model"] = sklearn_model
+        if "num_lags" not in runtimeparams.keys():
             num_lags = 48
         else:
-            num_lags = runtimeparams['num_lags']
-        params['passed_data']['num_lags'] = num_lags
-        if 'split_date_delta' not in runtimeparams.keys():
-            split_date_delta = '48h'
+            num_lags = runtimeparams["num_lags"]
+        params["passed_data"]["num_lags"] = num_lags
+        if "split_date_delta" not in runtimeparams.keys():
+            split_date_delta = "48h"
         else:
-            split_date_delta = runtimeparams['split_date_delta']
-        params['passed_data']['split_date_delta'] = split_date_delta
-        if 'perform_backtest' not in runtimeparams.keys():
+            split_date_delta = runtimeparams["split_date_delta"]
+        params["passed_data"]["split_date_delta"] = split_date_delta
+        if "perform_backtest" not in runtimeparams.keys():
             perform_backtest = False
         else:
-            perform_backtest = eval(str(runtimeparams['perform_backtest']).capitalize())
-        params['passed_data']['perform_backtest'] = perform_backtest
-        if 'model_predict_publish' not in runtimeparams.keys():
+            perform_backtest = eval(str(runtimeparams["perform_backtest"]).capitalize())
+        params["passed_data"]["perform_backtest"] = perform_backtest
+        if "model_predict_publish" not in runtimeparams.keys():
             model_predict_publish = False
         else:
-            model_predict_publish = eval(str(runtimeparams['model_predict_publish']).capitalize())
-        params['passed_data']['model_predict_publish'] = model_predict_publish
-        if 'model_predict_entity_id' not in runtimeparams.keys():
+            model_predict_publish = eval(
+                str(runtimeparams["model_predict_publish"]).capitalize()
+            )
+        params["passed_data"]["model_predict_publish"] = model_predict_publish
+        if "model_predict_entity_id" not in runtimeparams.keys():
             model_predict_entity_id = "sensor.p_load_forecast_custom_model"
         else:
-            model_predict_entity_id = runtimeparams['model_predict_entity_id']
-        params['passed_data']['model_predict_entity_id'] = model_predict_entity_id
-        if 'model_predict_unit_of_measurement' not in runtimeparams.keys():
+            model_predict_entity_id = runtimeparams["model_predict_entity_id"]
+        params["passed_data"]["model_predict_entity_id"] = model_predict_entity_id
+        if "model_predict_unit_of_measurement" not in runtimeparams.keys():
             model_predict_unit_of_measurement = "W"
         else:
-            model_predict_unit_of_measurement = runtimeparams['model_predict_unit_of_measurement']
-        params['passed_data']['model_predict_unit_of_measurement'] = model_predict_unit_of_measurement
-        if 'model_predict_friendly_name' not in runtimeparams.keys():
+            model_predict_unit_of_measurement = runtimeparams[
+                "model_predict_unit_of_measurement"
+            ]
+        params["passed_data"][
+            "model_predict_unit_of_measurement"
+        ] = model_predict_unit_of_measurement
+        if "model_predict_friendly_name" not in runtimeparams.keys():
             model_predict_friendly_name = "Load Power Forecast custom ML model"
         else:
-            model_predict_friendly_name = runtimeparams['model_predict_friendly_name']
-        params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name
-        if 'mlr_predict_entity_id' not in runtimeparams.keys():
+            model_predict_friendly_name = runtimeparams["model_predict_friendly_name"]
+        params["passed_data"][
+            "model_predict_friendly_name"
+        ] = model_predict_friendly_name
+        if "mlr_predict_entity_id" not in runtimeparams.keys():
             mlr_predict_entity_id = "sensor.mlr_predict"
         else:
-            mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id']
-        params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id
-        if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys():
+            mlr_predict_entity_id = runtimeparams["mlr_predict_entity_id"]
+        params["passed_data"]["mlr_predict_entity_id"] = mlr_predict_entity_id
+        if "mlr_predict_unit_of_measurement" not in runtimeparams.keys():
             mlr_predict_unit_of_measurement = None
         else:
-            mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement']
-        params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement
-        if 'mlr_predict_friendly_name' not in runtimeparams.keys():
+            mlr_predict_unit_of_measurement = runtimeparams[
+                "mlr_predict_unit_of_measurement"
+            ]
+        params["passed_data"][
+            "mlr_predict_unit_of_measurement"
+        ] = mlr_predict_unit_of_measurement
+        if "mlr_predict_friendly_name" not in runtimeparams.keys():
             mlr_predict_friendly_name = "mlr predictor"
         else:
-            mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name']
-        params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name
-        # Treat optimization configuration parameters passed at runtime 
-        if 'num_def_loads' in runtimeparams.keys():
-            optim_conf['num_def_loads'] = runtimeparams['num_def_loads']
-        if 'P_deferrable_nom' in runtimeparams.keys():
-            optim_conf['P_deferrable_nom'] = runtimeparams['P_deferrable_nom']
-        if 'def_total_hours' in runtimeparams.keys():
-            optim_conf['def_total_hours'] = runtimeparams['def_total_hours']
-        if 'def_start_timestep' in runtimeparams.keys():
-            optim_conf['def_start_timestep'] = runtimeparams['def_start_timestep']
-        if 'def_end_timestep' in runtimeparams.keys():
-            optim_conf['def_end_timestep'] = runtimeparams['def_end_timestep']
-        if 'treat_def_as_semi_cont' in runtimeparams.keys():
-            optim_conf['treat_def_as_semi_cont'] = [eval(str(k).capitalize()) for k in runtimeparams['treat_def_as_semi_cont']]
-        if 'set_def_constant' in runtimeparams.keys():
-            optim_conf['set_def_constant'] = [eval(str(k).capitalize()) for k in runtimeparams['set_def_constant']]
-        if 'solcast_api_key' in runtimeparams.keys():
-            retrieve_hass_conf['solcast_api_key'] = runtimeparams['solcast_api_key']
-            optim_conf['weather_forecast_method'] = 'solcast'
-        if 'solcast_rooftop_id' in runtimeparams.keys():
-            retrieve_hass_conf['solcast_rooftop_id'] = runtimeparams['solcast_rooftop_id']
-            optim_conf['weather_forecast_method'] = 'solcast'
-        if 'solar_forecast_kwp' in runtimeparams.keys():
-            retrieve_hass_conf['solar_forecast_kwp'] = runtimeparams['solar_forecast_kwp']
-            optim_conf['weather_forecast_method'] = 'solar.forecast'
-        if 'weight_battery_discharge' in runtimeparams.keys():
-            optim_conf['weight_battery_discharge'] = runtimeparams['weight_battery_discharge']
-        if 'weight_battery_charge' in runtimeparams.keys():
-            optim_conf['weight_battery_charge'] = runtimeparams['weight_battery_charge']
+            mlr_predict_friendly_name = runtimeparams["mlr_predict_friendly_name"]
+        params["passed_data"]["mlr_predict_friendly_name"] = mlr_predict_friendly_name
+        # Treat optimization configuration parameters passed at runtime
+        if "num_def_loads" in runtimeparams.keys():
+            optim_conf["num_def_loads"] = runtimeparams["num_def_loads"]
+        if "P_deferrable_nom" in runtimeparams.keys():
+            optim_conf["P_deferrable_nom"] = runtimeparams["P_deferrable_nom"]
+        if "def_total_hours" in runtimeparams.keys():
+            optim_conf["def_total_hours"] = runtimeparams["def_total_hours"]
+        if "def_start_timestep" in runtimeparams.keys():
+            optim_conf["def_start_timestep"] = runtimeparams["def_start_timestep"]
+        if "def_end_timestep" in runtimeparams.keys():
+            optim_conf["def_end_timestep"] = runtimeparams["def_end_timestep"]
+        if "treat_def_as_semi_cont" in runtimeparams.keys():
+            optim_conf["treat_def_as_semi_cont"] = [
+                eval(str(k).capitalize())
+                for k in runtimeparams["treat_def_as_semi_cont"]
+            ]
+        if "set_def_constant" in runtimeparams.keys():
+            optim_conf["set_def_constant"] = [
+                eval(str(k).capitalize()) for k in runtimeparams["set_def_constant"]
+            ]
+        if "solcast_api_key" in runtimeparams.keys():
+            retrieve_hass_conf["solcast_api_key"] = runtimeparams["solcast_api_key"]
+            optim_conf["weather_forecast_method"] = "solcast"
+        if "solcast_rooftop_id" in runtimeparams.keys():
+            retrieve_hass_conf["solcast_rooftop_id"] = runtimeparams[
+                "solcast_rooftop_id"
+            ]
+            optim_conf["weather_forecast_method"] = "solcast"
+        if "solar_forecast_kwp" in runtimeparams.keys():
+            retrieve_hass_conf["solar_forecast_kwp"] = runtimeparams[
+                "solar_forecast_kwp"
+            ]
+            optim_conf["weather_forecast_method"] = "solar.forecast"
+        if "weight_battery_discharge" in runtimeparams.keys():
+            optim_conf["weight_battery_discharge"] = runtimeparams[
+                "weight_battery_discharge"
+            ]
+        if "weight_battery_charge" in runtimeparams.keys():
+            optim_conf["weight_battery_charge"] = runtimeparams["weight_battery_charge"]
         # Treat plant configuration parameters passed at runtime
-        if 'SOCtarget' in runtimeparams.keys():
-            plant_conf['SOCtarget'] = runtimeparams['SOCtarget']
+        if "SOCtarget" in runtimeparams.keys():
+            plant_conf["SOCtarget"] = runtimeparams["SOCtarget"]
         # Treat custom entities id's and friendly names for variables
-        if 'custom_pv_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_pv_forecast_id'] = runtimeparams['custom_pv_forecast_id']
-        if 'custom_load_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_load_forecast_id'] = runtimeparams['custom_load_forecast_id']
-        if 'custom_batt_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_batt_forecast_id'] = runtimeparams['custom_batt_forecast_id']
-        if 'custom_batt_soc_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_batt_soc_forecast_id'] = runtimeparams['custom_batt_soc_forecast_id']
-        if 'custom_grid_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_grid_forecast_id'] = runtimeparams['custom_grid_forecast_id']
-        if 'custom_cost_fun_id' in runtimeparams.keys():
-            params['passed_data']['custom_cost_fun_id'] = runtimeparams['custom_cost_fun_id']
-        if 'custom_optim_status_id' in runtimeparams.keys():
-            params['passed_data']['custom_optim_status_id'] = runtimeparams['custom_optim_status_id']
-        if 'custom_unit_load_cost_id' in runtimeparams.keys():
-            params['passed_data']['custom_unit_load_cost_id'] = runtimeparams['custom_unit_load_cost_id']
-        if 'custom_unit_prod_price_id' in runtimeparams.keys():
-            params['passed_data']['custom_unit_prod_price_id'] = runtimeparams['custom_unit_prod_price_id']
-        if 'custom_deferrable_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_deferrable_forecast_id'] = runtimeparams['custom_deferrable_forecast_id']
+        if "custom_pv_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_pv_forecast_id"] = runtimeparams[
+                "custom_pv_forecast_id"
+            ]
+        if "custom_load_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_load_forecast_id"] = runtimeparams[
+                "custom_load_forecast_id"
+            ]
+        if "custom_batt_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_batt_forecast_id"] = runtimeparams[
+                "custom_batt_forecast_id"
+            ]
+        if "custom_batt_soc_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_batt_soc_forecast_id"] = runtimeparams[
+                "custom_batt_soc_forecast_id"
+            ]
+        if "custom_grid_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_grid_forecast_id"] = runtimeparams[
+                "custom_grid_forecast_id"
+            ]
+        if "custom_cost_fun_id" in runtimeparams.keys():
+            params["passed_data"]["custom_cost_fun_id"] = runtimeparams[
+                "custom_cost_fun_id"
+            ]
+        if "custom_optim_status_id" in runtimeparams.keys():
+            params["passed_data"]["custom_optim_status_id"] = runtimeparams[
+                "custom_optim_status_id"
+            ]
+        if "custom_unit_load_cost_id" in runtimeparams.keys():
+            params["passed_data"]["custom_unit_load_cost_id"] = runtimeparams[
+                "custom_unit_load_cost_id"
+            ]
+        if "custom_unit_prod_price_id" in runtimeparams.keys():
+            params["passed_data"]["custom_unit_prod_price_id"] = runtimeparams[
+                "custom_unit_prod_price_id"
+            ]
+        if "custom_deferrable_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_deferrable_forecast_id"] = runtimeparams[
+                "custom_deferrable_forecast_id"
+            ]
         # A condition to put a prefix on all published data
-        if 'publish_prefix' not in runtimeparams.keys():
+        if "publish_prefix" not in runtimeparams.keys():
             publish_prefix = ""
         else:
-            publish_prefix = runtimeparams['publish_prefix']
-        params['passed_data']['publish_prefix'] = publish_prefix
+            publish_prefix = runtimeparams["publish_prefix"]
+        params["passed_data"]["publish_prefix"] = publish_prefix
     # Serialize the final params
     params = json.dumps(params)
     return params, retrieve_hass_conf, optim_conf, plant_conf
 
-def get_yaml_parse(config_path: str, use_secrets: Optional[bool] = True,
-                   params: Optional[str] = None) -> Tuple[dict, dict, dict]:
+
+def get_yaml_parse(
+    config_path: str, use_secrets: Optional[bool] = True, params: Optional[str] = None
+) -> Tuple[dict, dict, dict]:
     """
     Perform parsing of the config.yaml file.
-    
+
     :param config_path: The path to the yaml configuration file
     :type config_path: str
     :param use_secrets: Indicate if we should use a secrets file or not.
@@ -398,49 +509,54 @@ def get_yaml_parse(config_path: str, use_secrets: Optional[bool] = True,
     """
     base = config_path.parent
     if params is None:
-        with open(config_path, 'r') as file:
+        with open(config_path, "r") as file:
             input_conf = yaml.load(file, Loader=yaml.FullLoader)
     else:
         input_conf = json.loads(params)
     if use_secrets:
         if params is None:
-            with open(base / 'secrets_emhass.yaml', 'r') as file:
+            with open(base / "secrets_emhass.yaml", "r") as file:
                 input_secrets = yaml.load(file, Loader=yaml.FullLoader)
         else:
-            input_secrets = input_conf.pop('params_secrets', None)
-   
-    if (type(input_conf['retrieve_hass_conf']) == list): #if using old config version 
-        retrieve_hass_conf = dict({key:d[key] for d in input_conf['retrieve_hass_conf'] for key in d})
+            input_secrets = input_conf.pop("params_secrets", None)
+
+    if type(input_conf["retrieve_hass_conf"]) == list:  # if using old config version
+        retrieve_hass_conf = dict(
+            {key: d[key] for d in input_conf["retrieve_hass_conf"] for key in d}
+        )
     else:
-        retrieve_hass_conf = input_conf.get('retrieve_hass_conf', {})
-        
+        retrieve_hass_conf = input_conf.get("retrieve_hass_conf", {})
+
     if use_secrets:
         retrieve_hass_conf.update(input_secrets)
     else:
-        retrieve_hass_conf['hass_url'] = 'http://supervisor/core/api'
-        retrieve_hass_conf['long_lived_token'] = '${SUPERVISOR_TOKEN}'
-        retrieve_hass_conf['time_zone'] = 'Europe/Paris'
-        retrieve_hass_conf['lat'] = 45.83
-        retrieve_hass_conf['lon'] = 6.86
-        retrieve_hass_conf['alt'] = 4807.8
-    retrieve_hass_conf['freq'] = pd.to_timedelta(retrieve_hass_conf['freq'], "minutes")
-    retrieve_hass_conf['time_zone'] = pytz.timezone(retrieve_hass_conf['time_zone'])
-    
-    if (type(input_conf['optim_conf']) == list):
-        optim_conf = dict({key:d[key] for d in input_conf['optim_conf'] for key in d})
+        retrieve_hass_conf["hass_url"] = "http://supervisor/core/api"
+        retrieve_hass_conf["long_lived_token"] = "${SUPERVISOR_TOKEN}"
+        retrieve_hass_conf["time_zone"] = "Europe/Paris"
+        retrieve_hass_conf["lat"] = 45.83
+        retrieve_hass_conf["lon"] = 6.86
+        retrieve_hass_conf["alt"] = 4807.8
+    retrieve_hass_conf["freq"] = pd.to_timedelta(retrieve_hass_conf["freq"], "minutes")
+    retrieve_hass_conf["time_zone"] = pytz.timezone(retrieve_hass_conf["time_zone"])
+
+    if type(input_conf["optim_conf"]) == list:
+        optim_conf = dict({key: d[key] for d in input_conf["optim_conf"] for key in d})
     else:
-        optim_conf = input_conf.get('optim_conf', {})
+        optim_conf = input_conf.get("optim_conf", {})
+
+    optim_conf["list_hp_periods"] = dict(
+        (key, d[key]) for d in optim_conf["list_hp_periods"] for key in d
+    )
+    optim_conf["delta_forecast"] = pd.Timedelta(days=optim_conf["delta_forecast"])
 
-    optim_conf['list_hp_periods'] = dict((key,d[key]) for d in optim_conf['list_hp_periods'] for key in d)
-    optim_conf['delta_forecast'] = pd.Timedelta(days=optim_conf['delta_forecast'])
-    
-    if (type(input_conf['plant_conf']) == list):
-        plant_conf = dict({key:d[key] for d in input_conf['plant_conf'] for key in d})
+    if type(input_conf["plant_conf"]) == list:
+        plant_conf = dict({key: d[key] for d in input_conf["plant_conf"] for key in d})
     else:
-        plant_conf = input_conf.get('plant_conf', {})
-    
+        plant_conf = input_conf.get("plant_conf", {})
+
     return retrieve_hass_conf, optim_conf, plant_conf
 
+
 def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dict:
     """
     Build a dictionary with graphs and tables for the webui.
@@ -451,61 +567,86 @@ def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dic
     :type plot_size: Optional[int], optional
     :return: A dictionary containing the graphs and tables in html format
     :rtype: dict
-    
+
     """
-    cols_p = [i for i in df.columns.to_list() if 'P_' in i]
+    cols_p = [i for i in df.columns.to_list() if "P_" in i]
     # Let's round the data in the DF
-    optim_status = df['optim_status'].unique().item()
-    df.drop('optim_status', axis=1, inplace=True)
-    cols_else = [i for i in df.columns.to_list() if 'P_' not in i]
+    optim_status = df["optim_status"].unique().item()
+    df.drop("optim_status", axis=1, inplace=True)
+    cols_else = [i for i in df.columns.to_list() if "P_" not in i]
     df = df.apply(pd.to_numeric)
     df[cols_p] = df[cols_p].astype(int)
     df[cols_else] = df[cols_else].round(3)
     # Create plots
     n_colors = len(cols_p)
-    colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)])
-    fig_0 = px.line(df[cols_p], title='Systems powers schedule after optimization results', 
-                    template='presentation', line_shape="hv",
-                    color_discrete_sequence=colors)
-    fig_0.update_layout(xaxis_title='Timestamp', yaxis_title='System powers (W)')
-    if 'SOC_opt' in df.columns.to_list():
-        fig_1 = px.line(df['SOC_opt'], title='Battery state of charge schedule after optimization results', 
-                        template='presentation',  line_shape="hv",
-                        color_discrete_sequence=colors)
-        fig_1.update_layout(xaxis_title='Timestamp', yaxis_title='Battery SOC (%)')
-    cols_cost = [i for i in df.columns.to_list() if 'cost_' in i or 'unit_' in i]
+    colors = px.colors.sample_colorscale(
+        "jet", [n / (n_colors - 1) for n in range(n_colors)]
+    )
+    fig_0 = px.line(
+        df[cols_p],
+        title="Systems powers schedule after optimization results",
+        template="presentation",
+        line_shape="hv",
+        color_discrete_sequence=colors,
+    )
+    fig_0.update_layout(xaxis_title="Timestamp", yaxis_title="System powers (W)")
+    if "SOC_opt" in df.columns.to_list():
+        fig_1 = px.line(
+            df["SOC_opt"],
+            title="Battery state of charge schedule after optimization results",
+            template="presentation",
+            line_shape="hv",
+            color_discrete_sequence=colors,
+        )
+        fig_1.update_layout(xaxis_title="Timestamp", yaxis_title="Battery SOC (%)")
+    cols_cost = [i for i in df.columns.to_list() if "cost_" in i or "unit_" in i]
     n_colors = len(cols_cost)
-    colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)])
-    fig_2 = px.line(df[cols_cost], title='Systems costs obtained from optimization results', 
-                    template='presentation', line_shape="hv",
-                    color_discrete_sequence=colors)
-    fig_2.update_layout(xaxis_title='Timestamp', yaxis_title='System costs (currency)')
+    colors = px.colors.sample_colorscale(
+        "jet", [n / (n_colors - 1) for n in range(n_colors)]
+    )
+    fig_2 = px.line(
+        df[cols_cost],
+        title="Systems costs obtained from optimization results",
+        template="presentation",
+        line_shape="hv",
+        color_discrete_sequence=colors,
+    )
+    fig_2.update_layout(xaxis_title="Timestamp", yaxis_title="System costs (currency)")
     # Get full path to image
-    image_path_0 = fig_0.to_html(full_html=False, default_width='75%')
-    if 'SOC_opt' in df.columns.to_list():
-        image_path_1 = fig_1.to_html(full_html=False, default_width='75%')
-    image_path_2 = fig_2.to_html(full_html=False, default_width='75%')
+    image_path_0 = fig_0.to_html(full_html=False, default_width="75%")
+    if "SOC_opt" in df.columns.to_list():
+        image_path_1 = fig_1.to_html(full_html=False, default_width="75%")
+    image_path_2 = fig_2.to_html(full_html=False, default_width="75%")
     # The tables
-    table1 = df.reset_index().to_html(classes='mystyle', index=False)
-    cost_cols = [i for i in df.columns if 'cost_' in i]
+    table1 = df.reset_index().to_html(classes="mystyle", index=False)
+    cost_cols = [i for i in df.columns if "cost_" in i]
     table2 = df[cost_cols].reset_index().sum(numeric_only=True)
-    table2['optim_status'] = optim_status
-    table2 = table2.to_frame(name='Value').reset_index(names='Variable').to_html(classes='mystyle', index=False)
+    table2["optim_status"] = optim_status
+    table2 = (
+        table2.to_frame(name="Value")
+        .reset_index(names="Variable")
+        .to_html(classes="mystyle", index=False)
+    )
     # The dict of plots
     injection_dict = {}
-    injection_dict['title'] = '<h2>EMHASS optimization results</h2>'
-    injection_dict['subsubtitle0'] = '<h4>Plotting latest optimization results</h4>'
-    injection_dict['figure_0'] = image_path_0
-    if 'SOC_opt' in df.columns.to_list():
-        injection_dict['figure_1'] = image_path_1
-    injection_dict['figure_2'] = image_path_2
-    injection_dict['subsubtitle1'] = '<h4>Last run optimization results table</h4>'
-    injection_dict['table1'] = table1
-    injection_dict['subsubtitle2'] = '<h4>Summary table for latest optimization results</h4>'
-    injection_dict['table2'] = table2
+    injection_dict["title"] = "<h2>EMHASS optimization results</h2>"
+    injection_dict["subsubtitle0"] = "<h4>Plotting latest optimization results</h4>"
+    injection_dict["figure_0"] = image_path_0
+    if "SOC_opt" in df.columns.to_list():
+        injection_dict["figure_1"] = image_path_1
+    injection_dict["figure_2"] = image_path_2
+    injection_dict["subsubtitle1"] = "<h4>Last run optimization results table</h4>"
+    injection_dict["table1"] = table1
+    injection_dict["subsubtitle2"] = (
+        "<h4>Summary table for latest optimization results</h4>"
+    )
+    injection_dict["table2"] = table2
     return injection_dict
 
-def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLForecaster) -> dict:
+
+def get_injection_dict_forecast_model_fit(
+    df_fit_pred: pd.DataFrame, mlf: MLForecaster
+) -> dict:
     """
     Build a dictionary with graphs and tables for the webui for special MLF fit case.
 
@@ -517,19 +658,26 @@ def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLFore
     :rtype: dict
     """
     fig = df_fit_pred.plot()
-    fig.layout.template = 'presentation'
-    fig.update_yaxes(title_text = mlf.model_type)
-    fig.update_xaxes(title_text = "Time")
-    image_path_0 = fig.to_html(full_html=False, default_width='75%')
+    fig.layout.template = "presentation"
+    fig.update_yaxes(title_text=mlf.model_type)
+    fig.update_xaxes(title_text="Time")
+    image_path_0 = fig.to_html(full_html=False, default_width="75%")
     # The dict of plots
     injection_dict = {}
-    injection_dict['title'] = '<h2>Custom machine learning forecast model fit</h2>'
-    injection_dict['subsubtitle0'] = '<h4>Plotting train/test forecast model results for '+mlf.model_type+'</h4>'
-    injection_dict['subsubtitle0'] = '<h4>Forecasting variable '+mlf.var_model+'</h4>'
-    injection_dict['figure_0'] = image_path_0
+    injection_dict["title"] = "<h2>Custom machine learning forecast model fit</h2>"
+    injection_dict["subsubtitle0"] = (
+        "<h4>Plotting train/test forecast model results for " + mlf.model_type + "</h4>"
+    )
+    injection_dict["subsubtitle0"] = (
+        "<h4>Forecasting variable " + mlf.var_model + "</h4>"
+    )
+    injection_dict["figure_0"] = image_path_0
     return injection_dict
 
-def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLForecaster) -> dict:
+
+def get_injection_dict_forecast_model_tune(
+    df_pred_optim: pd.DataFrame, mlf: MLForecaster
+) -> dict:
     """
     Build a dictionary with graphs and tables for the webui for special MLF tune case.
 
@@ -541,19 +689,32 @@ def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLF
     :rtype: dict
     """
     fig = df_pred_optim.plot()
-    fig.layout.template = 'presentation'
-    fig.update_yaxes(title_text = mlf.model_type)
-    fig.update_xaxes(title_text = "Time")
-    image_path_0 = fig.to_html(full_html=False, default_width='75%')
+    fig.layout.template = "presentation"
+    fig.update_yaxes(title_text=mlf.model_type)
+    fig.update_xaxes(title_text="Time")
+    image_path_0 = fig.to_html(full_html=False, default_width="75%")
     # The dict of plots
     injection_dict = {}
-    injection_dict['title'] = '<h2>Custom machine learning forecast model tune</h2>'
-    injection_dict['subsubtitle0'] = '<h4>Performed a tuning routine using bayesian optimization for '+mlf.model_type+'</h4>'
-    injection_dict['subsubtitle0'] = '<h4>Forecasting variable '+mlf.var_model+'</h4>'
-    injection_dict['figure_0'] = image_path_0
+    injection_dict["title"] = "<h2>Custom machine learning forecast model tune</h2>"
+    injection_dict["subsubtitle0"] = (
+        "<h4>Performed a tuning routine using bayesian optimization for "
+        + mlf.model_type
+        + "</h4>"
+    )
+    injection_dict["subsubtitle0"] = (
+        "<h4>Forecasting variable " + mlf.var_model + "</h4>"
+    )
+    injection_dict["figure_0"] = image_path_0
     return injection_dict
 
-def build_params(params: dict, params_secrets: dict, options: dict, addon: int, logger: logging.Logger) -> dict:
+
+def build_params(
+    params: dict,
+    params_secrets: dict,
+    options: dict,
+    addon: int,
+    logger: logging.Logger,
+) -> dict:
     """
     Build the main params dictionary from the loaded options.json when using the add-on.
 
@@ -572,45 +733,120 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int,
     """
     if addon == 1:
         # Updating variables in retrieve_hass_conf
-        params['retrieve_hass_conf']['freq'] = options.get('optimization_time_step',params['retrieve_hass_conf']['freq'])
-        params['retrieve_hass_conf']['days_to_retrieve'] = options.get('historic_days_to_retrieve',params['retrieve_hass_conf']['days_to_retrieve'])
-        params['retrieve_hass_conf']['var_PV'] = options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV'])
-        params['retrieve_hass_conf']['var_load'] = options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load'])
-        params['retrieve_hass_conf']['load_negative'] = options.get('load_negative',params['retrieve_hass_conf']['load_negative'])
-        params['retrieve_hass_conf']['set_zero_min'] = options.get('set_zero_min',params['retrieve_hass_conf']['set_zero_min'])
-        params['retrieve_hass_conf']['var_replace_zero'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_replace_zero'])]
-        params['retrieve_hass_conf']['var_interp'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV']), options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load'])]
-        params['retrieve_hass_conf']['method_ts_round'] = options.get('method_ts_round',params['retrieve_hass_conf']['method_ts_round'])
+        params["retrieve_hass_conf"]["freq"] = options.get(
+            "optimization_time_step", params["retrieve_hass_conf"]["freq"]
+        )
+        params["retrieve_hass_conf"]["days_to_retrieve"] = options.get(
+            "historic_days_to_retrieve",
+            params["retrieve_hass_conf"]["days_to_retrieve"],
+        )
+        params["retrieve_hass_conf"]["var_PV"] = options.get(
+            "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"]
+        )
+        params["retrieve_hass_conf"]["var_load"] = options.get(
+            "sensor_power_load_no_var_loads", params["retrieve_hass_conf"]["var_load"]
+        )
+        params["retrieve_hass_conf"]["load_negative"] = options.get(
+            "load_negative", params["retrieve_hass_conf"]["load_negative"]
+        )
+        params["retrieve_hass_conf"]["set_zero_min"] = options.get(
+            "set_zero_min", params["retrieve_hass_conf"]["set_zero_min"]
+        )
+        params["retrieve_hass_conf"]["var_replace_zero"] = [
+            options.get(
+                "sensor_power_photovoltaics",
+                params["retrieve_hass_conf"]["var_replace_zero"],
+            )
+        ]
+        params["retrieve_hass_conf"]["var_interp"] = [
+            options.get(
+                "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"]
+            ),
+            options.get(
+                "sensor_power_load_no_var_loads",
+                params["retrieve_hass_conf"]["var_load"],
+            ),
+        ]
+        params["retrieve_hass_conf"]["method_ts_round"] = options.get(
+            "method_ts_round", params["retrieve_hass_conf"]["method_ts_round"]
+        )
         # Update params Secrets if specified
-        params['params_secrets'] = params_secrets
-        params['params_secrets']['time_zone'] = options.get('time_zone',params_secrets['time_zone'])
-        params['params_secrets']['lat'] = options.get('Latitude',params_secrets['lat'])
-        params['params_secrets']['lon'] = options.get('Longitude',params_secrets['lon'])
-        params['params_secrets']['alt'] = options.get('Altitude',params_secrets['alt'])
+        params["params_secrets"] = params_secrets
+        params["params_secrets"]["time_zone"] = options.get(
+            "time_zone", params_secrets["time_zone"]
+        )
+        params["params_secrets"]["lat"] = options.get("Latitude", params_secrets["lat"])
+        params["params_secrets"]["lon"] = options.get(
+            "Longitude", params_secrets["lon"]
+        )
+        params["params_secrets"]["alt"] = options.get("Altitude", params_secrets["alt"])
         # Updating variables in optim_conf
-        params['optim_conf']['set_use_battery'] = options.get('set_use_battery',params['optim_conf']['set_use_battery'])
-        params['optim_conf']['num_def_loads'] = options.get('number_of_deferrable_loads',params['optim_conf']['num_def_loads'])
-        if options.get('list_nominal_power_of_deferrable_loads',None) != None: 
-            params['optim_conf']['P_deferrable_nom'] = [i['nominal_power_of_deferrable_loads'] for i in options.get('list_nominal_power_of_deferrable_loads')]
-        if options.get('list_operating_hours_of_each_deferrable_load',None) != None: 
-            params['optim_conf']['def_total_hours'] = [i['operating_hours_of_each_deferrable_load'] for i in options.get('list_operating_hours_of_each_deferrable_load')]
-        if options.get('list_treat_deferrable_load_as_semi_cont',None) != None: 
-            params['optim_conf']['treat_def_as_semi_cont'] = [i['treat_deferrable_load_as_semi_cont'] for i in options.get('list_treat_deferrable_load_as_semi_cont')]
-        params['optim_conf']['weather_forecast_method'] = options.get('weather_forecast_method',params['optim_conf']['weather_forecast_method'])
+        params["optim_conf"]["set_use_battery"] = options.get(
+            "set_use_battery", params["optim_conf"]["set_use_battery"]
+        )
+        params["optim_conf"]["num_def_loads"] = options.get(
+            "number_of_deferrable_loads", params["optim_conf"]["num_def_loads"]
+        )
+        if options.get("list_nominal_power_of_deferrable_loads", None) != None:
+            params["optim_conf"]["P_deferrable_nom"] = [
+                i["nominal_power_of_deferrable_loads"]
+                for i in options.get("list_nominal_power_of_deferrable_loads")
+            ]
+        if options.get("list_operating_hours_of_each_deferrable_load", None) != None:
+            params["optim_conf"]["def_total_hours"] = [
+                i["operating_hours_of_each_deferrable_load"]
+                for i in options.get("list_operating_hours_of_each_deferrable_load")
+            ]
+        if options.get("list_treat_deferrable_load_as_semi_cont", None) != None:
+            params["optim_conf"]["treat_def_as_semi_cont"] = [
+                i["treat_deferrable_load_as_semi_cont"]
+                for i in options.get("list_treat_deferrable_load_as_semi_cont")
+            ]
+        params["optim_conf"]["weather_forecast_method"] = options.get(
+            "weather_forecast_method", params["optim_conf"]["weather_forecast_method"]
+        )
         # Update optional param secrets
-        if params['optim_conf']['weather_forecast_method'] == "solcast":
-            params['params_secrets']['solcast_api_key'] = options.get('optional_solcast_api_key',params_secrets.get('solcast_api_key',"123456"))
-            params['params_secrets']['solcast_rooftop_id'] = options.get('optional_solcast_rooftop_id',params_secrets.get('solcast_rooftop_id',"123456"))
-        elif params['optim_conf']['weather_forecast_method'] == "solar.forecast":    
-            params['params_secrets']['solar_forecast_kwp'] = options.get('optional_solar_forecast_kwp',params_secrets.get('solar_forecast_kwp',5))
-        params['optim_conf']['load_forecast_method'] = options.get('load_forecast_method',params['optim_conf']['load_forecast_method'])
-        params['optim_conf']['delta_forecast'] = options.get('delta_forecast_daily',params['optim_conf']['delta_forecast'])
-        params['optim_conf']['load_cost_forecast_method'] = options.get('load_cost_forecast_method',params['optim_conf']['load_cost_forecast_method'])
-        if options.get('list_set_deferrable_load_single_constant',None) != None: 
-            params['optim_conf']['set_def_constant'] = [i['set_deferrable_load_single_constant'] for i in options.get('list_set_deferrable_load_single_constant')]
-        if options.get('list_peak_hours_periods_start_hours',None) != None and options.get('list_peak_hours_periods_end_hours',None) != None:
-            start_hours_list = [i['peak_hours_periods_start_hours'] for i in options['list_peak_hours_periods_start_hours']]
-            end_hours_list = [i['peak_hours_periods_end_hours'] for i in options['list_peak_hours_periods_end_hours']]
+        if params["optim_conf"]["weather_forecast_method"] == "solcast":
+            params["params_secrets"]["solcast_api_key"] = options.get(
+                "optional_solcast_api_key",
+                params_secrets.get("solcast_api_key", "123456"),
+            )
+            params["params_secrets"]["solcast_rooftop_id"] = options.get(
+                "optional_solcast_rooftop_id",
+                params_secrets.get("solcast_rooftop_id", "123456"),
+            )
+        elif params["optim_conf"]["weather_forecast_method"] == "solar.forecast":
+            params["params_secrets"]["solar_forecast_kwp"] = options.get(
+                "optional_solar_forecast_kwp",
+                params_secrets.get("solar_forecast_kwp", 5),
+            )
+        params["optim_conf"]["load_forecast_method"] = options.get(
+            "load_forecast_method", params["optim_conf"]["load_forecast_method"]
+        )
+        params["optim_conf"]["delta_forecast"] = options.get(
+            "delta_forecast_daily", params["optim_conf"]["delta_forecast"]
+        )
+        params["optim_conf"]["load_cost_forecast_method"] = options.get(
+            "load_cost_forecast_method",
+            params["optim_conf"]["load_cost_forecast_method"],
+        )
+        if options.get("list_set_deferrable_load_single_constant", None) != None:
+            params["optim_conf"]["set_def_constant"] = [
+                i["set_deferrable_load_single_constant"]
+                for i in options.get("list_set_deferrable_load_single_constant")
+            ]
+        if (
+            options.get("list_peak_hours_periods_start_hours", None) != None
+            and options.get("list_peak_hours_periods_end_hours", None) != None
+        ):
+            start_hours_list = [
+                i["peak_hours_periods_start_hours"]
+                for i in options["list_peak_hours_periods_start_hours"]
+            ]
+            end_hours_list = [
+                i["peak_hours_periods_end_hours"]
+                for i in options["list_peak_hours_periods_end_hours"]
+            ]
             num_peak_hours = len(start_hours_list)
             list_hp_periods_list = [{'period_hp_'+str(i+1):[{'start':start_hours_list[i]},{'end':end_hours_list[i]}]} for i in range(num_peak_hours)]
             params['optim_conf']['list_hp_periods'] = list_hp_periods_list
@@ -682,20 +918,35 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int,
             for x in range(len(params['optim_conf']['P_deferrable_nom']), params['optim_conf']['num_def_loads']):
                 params['optim_conf']['P_deferrable_nom'].append(0)   
         # days_to_retrieve should be no less then 2
-        if params['retrieve_hass_conf']['days_to_retrieve'] < 2:
-            params['retrieve_hass_conf']['days_to_retrieve'] = 2
-            logger.warning("days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history")
+        if params["retrieve_hass_conf"]["days_to_retrieve"] < 2:
+            params["retrieve_hass_conf"]["days_to_retrieve"] = 2
+            logger.warning(
+                "days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history"
+            )
     else:
-        params['params_secrets'] = params_secrets
+        params["params_secrets"] = params_secrets
     # The params dict
-    params['passed_data'] = {'pv_power_forecast':None,'load_power_forecast':None,'load_cost_forecast':None,'prod_price_forecast':None,
-                             'prediction_horizon':None,'soc_init':None,'soc_final':None,'def_total_hours':None,'def_start_timestep':None,'def_end_timestep':None,'alpha':None,'beta':None}
+    params["passed_data"] = {
+        "pv_power_forecast": None,
+        "load_power_forecast": None,
+        "load_cost_forecast": None,
+        "prod_price_forecast": None,
+        "prediction_horizon": None,
+        "soc_init": None,
+        "soc_final": None,
+        "def_total_hours": None,
+        "def_start_timestep": None,
+        "def_end_timestep": None,
+        "alpha": None,
+        "beta": None,
+    }
     return params
 
+
 def get_days_list(days_to_retrieve: int) -> pd.date_range:
     """
     Get list of past days from today to days_to_retrieve.
-    
+
     :param days_to_retrieve: Total number of days to retrieve from the past
     :type days_to_retrieve: int
     :return: The list of days
@@ -704,19 +955,20 @@ def get_days_list(days_to_retrieve: int) -> pd.date_range:
     """
     today = datetime.now(timezone.utc).replace(minute=0, second=0, microsecond=0)
     d = (today - timedelta(days=days_to_retrieve)).isoformat()
-    days_list = pd.date_range(start=d, end=today.isoformat(), freq='D')
-    
+    days_list = pd.date_range(start=d, end=today.isoformat(), freq="D")
+
     return days_list
 
+
 def set_df_index_freq(df: pd.DataFrame) -> pd.DataFrame:
     """
     Set the freq of a DataFrame DateTimeIndex.
-    
+
     :param df: Input DataFrame
     :type df: pd.DataFrame
     :return: Input DataFrame with freq defined
     :rtype: pd.DataFrame
-    
+
     """
     idx_diff = np.diff(df.index)
     sampling = pd.to_timedelta(np.median(idx_diff))

From b15914dfb0893f0a9c71d386474475b43d8bcf20 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 14:55:30 +0100
Subject: [PATCH 044/111] sklearn-model -> regression-model

---
 src/emhass/command_line.py | 4 ++--
 src/emhass/utils.py        | 5 +++++
 src/emhass/web_server.py   | 8 ++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index b4a9050c..1706d34c 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -657,7 +657,7 @@ def regressor_model_fit(
     """
     data = copy.deepcopy(input_data_dict["df_input_data"])
     model_type = input_data_dict["params"]["passed_data"]["model_type"]
-    sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"]
+    regression_model = input_data_dict["params"]["passed_data"]["regression_model"]
     features = input_data_dict["params"]["passed_data"]["features"]
     target = input_data_dict["params"]["passed_data"]["target"]
     timestamp = input_data_dict["params"]["passed_data"]["timestamp"]
@@ -665,7 +665,7 @@ def regressor_model_fit(
     root = input_data_dict["root"]
     # The MLRegressor object
     mlr = MLRegressor(
-        data, model_type, sklearn_model, features, target, timestamp, logger
+        data, model_type, regression_model, features, target, timestamp, logger
     )
     # Fit the ML model
     mlr.fit(date_features=date_features)
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 3a2cadd3..836f1085 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -334,6 +334,11 @@ def treat_runtimeparams(
         else:
             sklearn_model = runtimeparams["sklearn_model"]
         params["passed_data"]["sklearn_model"] = sklearn_model
+        if "regression_model" not in runtimeparams.keys():
+            regression_model = "LinearRegression"
+        else:
+            regression_model = runtimeparams["regression_model"]
+        params["passed_data"]["regression_model"] = regression_model
         if "num_lags" not in runtimeparams.keys():
             num_lags = 48
         else:
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index e72022fe..39afe939 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -195,14 +195,14 @@ def action_call(action_name):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
     elif action_name == 'regressor-model-fit':
-        app.logger.info(" >> Performing a regressor fit...")
+        app.logger.info(" >> Performing a machine learning regressor fit...")
         regressor_model_fit(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action regressor-fit executed... \n'
+        msg = f'EMHASS >> Action regressor-model-fit executed... \n'
         return make_response(msg, 201)
     elif action_name == 'regressor-model-predict':
-        app.logger.info(" >> Performing a regressor predict...")
+        app.logger.info(" >> Performing a machine learning regressor predict...")
         regressor_model_predict(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action regressor-predict executed... \n'
+        msg = f'EMHASS >> Action regressor-model-predict executed... \n'
         return make_response(msg, 201)
     else:
         app.logger.error("ERROR: passed action is not valid")

From 9807ed6db2be141a99bbe93e7877bd8a66019a31 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 15:18:57 +0100
Subject: [PATCH 045/111] REGRESSION_METHODS const

---
 src/emhass/machine_learning_regressor.py | 127 ++++++++++++-----------
 1 file changed, 65 insertions(+), 62 deletions(-)

diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index 80ddd74f..9e7795d0 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -24,6 +24,41 @@
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
+REGRESSION_METHODS = {
+            "LinearRegression": {
+                "model": LinearRegression(),
+                "param_grid": {
+                    "linearregression__fit_intercept": [True, False],
+                    "linearregression__positive": [True, False],
+                },
+            },
+            "RidgeRegression": {
+                "model": Ridge(),
+                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
+            },
+            "LassoRegression": {
+                "model": Lasso(),
+                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
+            },
+            "RandomForestRegression": {
+                "model": RandomForestRegressor(),
+                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
+            },
+            "GradientBoostingRegression": {
+                "model": GradientBoostingRegressor(),
+                "param_grid": {
+                    "gradientboostingregressor__n_estimators": [50, 100, 200],
+                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
+            "AdaBoostRegression": {
+                "model": AdaBoostRegressor(),
+                "param_grid": {
+                    "adaboostregressor__n_estimators": [50, 100, 200],
+                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
+        }
 
 class MLRegressor:
     r"""
@@ -43,7 +78,7 @@ def __init__(
         self,
         data,
         model_type: str,
-        sklearn_model: str,
+        regression_model: str,
         features: list,
         target: str,
         timestamp: str,
@@ -56,11 +91,15 @@ def __init__(
         :param model_type: A unique name defining this model and useful to identify \
             for what it will be used for.
         :type model_type: str
+        :param regression_model: The model that will be used. For now only \
+            this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \
+            `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`.
+        :type regression_model: str
         :param features: A list of features. \
-            Example: [`solar`, `degree_days`].
+            Example: [`solar_production`, `degree_days`].
         :type features: list
         :param target: The target(to be predicted). \
-            Example: `hours`.
+            Example: `heating_hours`.
         :type target: str
         :param timestamp: If defined, the column key that has to be used of timestamp.
         :type timestamp: str
@@ -72,7 +111,7 @@ def __init__(
         self.target = target
         self.timestamp = timestamp
         self.model_type = model_type
-        self.sklearn_model = sklearn_model
+        self.regression_model = regression_model
         self.logger = logger
         self.data.sort_index(inplace=True)
         self.data = self.data[~self.data.index.duplicated(keep="first")]
@@ -111,7 +150,7 @@ def add_date_features(
 
         return df
 
-    def fit(self, date_features: Optional[list] = []) -> None:
+    def fit(self, date_features: Optional[list] = None) -> None:
         """
         Fit the model using the provided data.
 
@@ -129,7 +168,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         keep_columns.append(self.target)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
         self.data_exo.reset_index(drop=True, inplace=True)
-        if len(date_features) > 0:
+        if date_features is not None:
             if self.timestamp is not None:
                 self.data_exo = MLRegressor.add_date_features(
                     self.data_exo, date_features, self.timestamp
@@ -150,63 +189,27 @@ def fit(self, date_features: Optional[list] = []) -> None:
         )
         self.steps = len(X_test)
 
-        regression_methods = {
-            "LinearRegression": {
-                "model": LinearRegression(),
-                "param_grid": {
-                    "linearregression__fit_intercept": [True, False],
-                    "linearregression__positive": [True, False],
-                },
-            },
-            "RidgeRegression": {
-                "model": Ridge(),
-                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
-            },
-            "LassoRegression": {
-                "model": Lasso(),
-                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
-            },
-            "RandomForestRegression": {
-                "model": RandomForestRegressor(),
-                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
-            },
-            "GradientBoostingRegression": {
-                "model": GradientBoostingRegressor(),
-                "param_grid": {
-                    "gradientboostingregressor__n_estimators": [50, 100, 200],
-                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-            "AdaBoostRegression": {
-                "model": AdaBoostRegressor(),
-                "param_grid": {
-                    "adaboostregressor__n_estimators": [50, 100, 200],
-                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-        }
-
-        if self.sklearn_model == "LinearRegression":
-            base_model = regression_methods["LinearRegression"]["model"]
-            param_grid = regression_methods["LinearRegression"]["param_grid"]
-        elif self.sklearn_model == "RidgeRegression":
-            base_model = regression_methods["RidgeRegression"]["model"]
-            param_grid = regression_methods["RidgeRegression"]["param_grid"]
-        elif self.sklearn_model == "LassoRegression":
-            base_model = regression_methods["LassoRegression"]["model"]
-            param_grid = regression_methods["LassoRegression"]["param_grid"]
-        elif self.sklearn_model == "RandomForestRegression":
-            base_model = regression_methods["RandomForestRegression"]["model"]
-            param_grid = regression_methods["RandomForestRegression"]["param_grid"]
-        elif self.sklearn_model == "GradientBoostingRegression":
-            base_model = regression_methods["GradientBoostingRegression"]["model"]
-            param_grid = regression_methods["GradientBoostingRegression"]["param_grid"]
-        elif self.sklearn_model == "AdaBoostRegression":
-            base_model = regression_methods["AdaBoostRegression"]["model"]
-            param_grid = regression_methods["AdaBoostRegression"]["param_grid"]
+        if self.regression_model == "LinearRegression":
+            base_model = REGRESSION_METHODS["LinearRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
+        elif self.regression_model == "RidgeRegression":
+            base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
+        elif self.regression_model == "LassoRegression":
+            base_model = REGRESSION_METHODS["LassoRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
+        elif self.regression_model == "RandomForestRegression":
+            base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
+        elif self.regression_model == "GradientBoostingRegression":
+            base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
+            param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
+        elif self.regression_model == "AdaBoostRegression":
+            base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
+            param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
         else:
             self.logger.error(
-                "Passed sklearn model " + self.sklearn_model + " is not valid"
+                "Passed sklearn model " + self.regression_model + " is not valid"
             )
 
         self.model = make_pipeline(StandardScaler(), base_model)
@@ -223,7 +226,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         )
 
         # Fit the grid search object to the data
-        self.logger.info("Training a " + self.sklearn_model + " model")
+        self.logger.info("Training a " + self.regression_model + " model")
         start_time = time.time()
         self.grid_search.fit(X_train.values, y_train.values)
         print("Best value for lambda : ", self.grid_search.best_params_)

From 063ccf2b140dd2e0ba8a6093084838e775a9fd2a Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 11:12:28 +0100
Subject: [PATCH 046/111] Some cleanup

---
 src/emhass/machine_learning_regressor.py | 220 +++++++++++++----------
 1 file changed, 125 insertions(+), 95 deletions(-)

diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index 9e7795d0..95f624b3 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -1,70 +1,72 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
+"""Machine learning regressor module."""
+
+from __future__ import annotations
 
 import copy
-import logging
 import time
-from typing import Optional
 import warnings
+from typing import TYPE_CHECKING
 
-import pandas as pd
 import numpy as np
+import pandas as pd
 from sklearn.ensemble import (
     AdaBoostRegressor,
     GradientBoostingRegressor,
     RandomForestRegressor,
 )
-from sklearn.metrics import r2_score
-
 from sklearn.linear_model import Lasso, LinearRegression, Ridge
+from sklearn.metrics import r2_score
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 
+if TYPE_CHECKING:
+    import logging
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 REGRESSION_METHODS = {
-            "LinearRegression": {
-                "model": LinearRegression(),
-                "param_grid": {
-                    "linearregression__fit_intercept": [True, False],
-                    "linearregression__positive": [True, False],
-                },
-            },
-            "RidgeRegression": {
-                "model": Ridge(),
-                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
-            },
-            "LassoRegression": {
-                "model": Lasso(),
-                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
-            },
-            "RandomForestRegression": {
-                "model": RandomForestRegressor(),
-                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
-            },
-            "GradientBoostingRegression": {
-                "model": GradientBoostingRegressor(),
-                "param_grid": {
-                    "gradientboostingregressor__n_estimators": [50, 100, 200],
-                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-            "AdaBoostRegression": {
-                "model": AdaBoostRegressor(),
-                "param_grid": {
-                    "adaboostregressor__n_estimators": [50, 100, 200],
-                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-        }
+    "LinearRegression": {
+        "model": LinearRegression(),
+        "param_grid": {
+            "linearregression__fit_intercept": [True, False],
+            "linearregression__positive": [True, False],
+        },
+    },
+    "RidgeRegression": {
+        "model": Ridge(),
+        "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
+    },
+    "LassoRegression": {
+        "model": Lasso(),
+        "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
+    },
+    "RandomForestRegression": {
+        "model": RandomForestRegressor(),
+        "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
+    },
+    "GradientBoostingRegression": {
+        "model": GradientBoostingRegressor(),
+        "param_grid": {
+            "gradientboostingregressor__n_estimators": [50, 100, 200],
+            "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
+        },
+    },
+    "AdaBoostRegression": {
+        "model": AdaBoostRegressor(),
+        "param_grid": {
+            "adaboostregressor__n_estimators": [50, 100, 200],
+            "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
+        },
+    },
+}
+
 
 class MLRegressor:
-    r"""
-    A forecaster class using machine learning models.
+    r"""A forecaster class using machine learning models.
 
-    This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
+    This class uses the `sklearn` module and the machine learning models are \
+        from `scikit-learn`.
 
     It exposes two main methods:
 
@@ -74,9 +76,9 @@ class MLRegressor:
 
     """
 
-    def __init__(
-        self,
-        data,
+    def __init__(  # noqa: PLR0913
+        self: MLRegressor,
+        data: pd.DataFrame,
         model_type: str,
         regression_model: str,
         features: list,
@@ -92,8 +94,9 @@ def __init__(
             for what it will be used for.
         :type model_type: str
         :param regression_model: The model that will be used. For now only \
-            this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \
-            `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`.
+            this options are possible: `LinearRegression`, `RidgeRegression`, \
+            `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \
+            `GradientBoostingRegression` and `AdaBoostRegression`.
         :type regression_model: str
         :param features: A list of features. \
             Example: [`solar_production`, `degree_days`].
@@ -113,7 +116,7 @@ def __init__(
         self.model_type = model_type
         self.regression_model = regression_model
         self.logger = logger
-        self.data.sort_index(inplace=True)
+        self.data = self.data.sort_index()
         self.data = self.data[~self.data.index.duplicated(keep="first")]
         self.data_exo = None
         self.steps = None
@@ -122,9 +125,11 @@ def __init__(
 
     @staticmethod
     def add_date_features(
-        data: pd.DataFrame, date_features: list, timestamp: str
+        data: pd.DataFrame,
+        date_features: list,
+        timestamp: str,
     ) -> pd.DataFrame:
-        """Add date features from the input DataFrame timestamp
+        """Add date features from the input DataFrame timestamp.
 
         :param data: The input DataFrame
         :type data: pd.DataFrame
@@ -133,7 +138,7 @@ def add_date_features(
         :return: The DataFrame with the added features
         :rtype: pd.DataFrame
         """
-        df = copy.deepcopy(data)
+        df = copy.deepcopy(data)  # noqa: PD901
         df[timestamp] = pd.to_datetime(df["timestamp"])
         if "year" in date_features:
             df["year"] = [i.year for i in df["timestamp"]]
@@ -150,14 +155,54 @@ def add_date_features(
 
         return df
 
-    def fit(self, date_features: Optional[list] = None) -> None:
+    def get_regression_model(self: MLRegressor) -> tuple[str, str]:
+        """Get the base model and parameter grid for the specified regression model.
+
+        Returns a tuple containing the base model and parameter grid corresponding to \
+            the specified regression model.
+
+        Args:
+        ----
+            self: The instance of the MLRegressor class.
+
+        Returns:
+        -------
+            A tuple containing the base model and parameter grid.
+
         """
-        Fit the model using the provided data.
+        if self.regression_model == "LinearRegression":
+            base_model = REGRESSION_METHODS["LinearRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
+        elif self.regression_model == "RidgeRegression":
+            base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
+        elif self.regression_model == "LassoRegression":
+            base_model = REGRESSION_METHODS["LassoRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
+        elif self.regression_model == "RandomForestRegression":
+            base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
+        elif self.regression_model == "GradientBoostingRegression":
+            base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
+            param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
+        elif self.regression_model == "AdaBoostRegression":
+            base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
+            param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
+        else:
+            self.logger.error(
+                "Passed sklearn model %s is not valid",
+                self.regression_model,
+            )
+        return base_model, param_grid
 
-        :param date_features: A list of 'date_features' to take into account when fitting the model.
+    def fit(self: MLRegressor, date_features: list | None = None) -> None:
+        """Fit the model using the provided data.
+
+        :param date_features: A list of 'date_features' to take into account when \
+            fitting the model.
         :type data: list
         """
-        self.logger.info("Performing a csv model fit for " + self.model_type)
+        self.logger.info("Performing a MLRegressor fit for %s", self.model_type)
         self.data_exo = pd.DataFrame(self.data)
         self.data_exo[self.features] = self.data[self.features]
         self.data_exo[self.target] = self.data[self.target]
@@ -167,50 +212,36 @@ def fit(self, date_features: Optional[list] = None) -> None:
             keep_columns.append(self.timestamp)
         keep_columns.append(self.target)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
-        self.data_exo.reset_index(drop=True, inplace=True)
+        self.data_exo = self.data_exo.reset_index(drop=True)
         if date_features is not None:
             if self.timestamp is not None:
                 self.data_exo = MLRegressor.add_date_features(
-                    self.data_exo, date_features, self.timestamp
+                    self.data_exo,
+                    date_features,
+                    self.timestamp,
                 )
             else:
                 self.logger.error(
-                    "If no timestamp provided, you can't use date_features, going further without date_features."
+                    "If no timestamp provided, you can't use date_features, going \
+                    further without date_features.",
                 )
 
         y = self.data_exo[self.target]
         self.data_exo = self.data_exo.drop(self.target, axis=1)
         if self.timestamp is not None:
             self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
-        X = self.data_exo
+        X = self.data_exo  # noqa: N806
 
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.2, random_state=42
+        X_train, X_test, y_train, y_test = train_test_split(  # noqa: N806
+            X,
+            y,
+            test_size=0.2,
+            random_state=42,
         )
+
         self.steps = len(X_test)
 
-        if self.regression_model == "LinearRegression":
-            base_model = REGRESSION_METHODS["LinearRegression"]["model"]
-            param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
-        elif self.regression_model == "RidgeRegression":
-            base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
-            param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
-        elif self.regression_model == "LassoRegression":
-            base_model = REGRESSION_METHODS["LassoRegression"]["model"]
-            param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
-        elif self.regression_model == "RandomForestRegression":
-            base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
-            param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
-        elif self.regression_model == "GradientBoostingRegression":
-            base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
-            param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
-        elif self.regression_model == "AdaBoostRegression":
-            base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
-            param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
-        else:
-            self.logger.error(
-                "Passed sklearn model " + self.regression_model + " is not valid"
-            )
+        base_model, param_grid = self.get_regression_model()
 
         self.model = make_pipeline(StandardScaler(), base_model)
 
@@ -226,12 +257,10 @@ def fit(self, date_features: Optional[list] = None) -> None:
         )
 
         # Fit the grid search object to the data
-        self.logger.info("Training a " + self.regression_model + " model")
+        self.logger.info("Training a %s model", self.regression_model)
         start_time = time.time()
         self.grid_search.fit(X_train.values, y_train.values)
-        print("Best value for lambda : ", self.grid_search.best_params_)
-        print("Best score for cost function: ", self.grid_search.best_score_)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        self.logger.info("Elapsed time for model fit: %s", time.time() - start_time)
 
         self.model = self.grid_search.best_estimator_
 
@@ -240,20 +269,21 @@ def fit(self, date_features: Optional[list] = None) -> None:
         predictions = pd.Series(predictions, index=X_test.index)
         pred_metric = r2_score(y_test, predictions)
         self.logger.info(
-            f"Prediction R2 score of fitted model on test data: {pred_metric}"
+            "Prediction R2 score of fitted model on test data: %s",
+            pred_metric,
         )
 
-    def predict(self, new_values: list) -> np.ndarray:
-        r"""The predict method to generate a forecast from a csv file.
-
+    def predict(self: MLRegressor, new_values: list) -> np.ndarray:
+        """Predict a new value.
 
-        :param new_values: The new values for the features(in the same order as the features list). \
+        :param new_values: The new values for the features \
+            (in the same order as the features list). \
             Example: [2.24, 5.68].
         :type new_values: list
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """
-        self.logger.info("Performing a prediction for " + self.model_type)
+        self.logger.info("Performing a prediction for %s", self.model_type)
         new_values = np.array([new_values])
 
         return self.model.predict(new_values)

From 6fae7a4280043240c07bbdb941a0c03e20ef0d8e Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:24:21 +0100
Subject: [PATCH 047/111] Add csv-prediction

---
 src/emhass/command_line.py  |   1 -
 src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++
 2 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 1706d34c..e6940518 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -16,7 +16,6 @@
 
 from distutils.util import strtobool
 
-
 from emhass.retrieve_hass import RetrieveHass
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
new file mode 100644
index 00000000..a1c5576b
--- /dev/null
+++ b/src/emhass/csv_predictor.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import logging
+import copy
+import pathlib
+import time
+from typing import Optional
+# from typing import Optional, Tuple
+import pandas as pd
+import numpy as np
+
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import ElasticNet
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsRegressor
+# from sklearn.metrics import r2_score
+
+# from skforecast.ForecasterAutoreg import ForecasterAutoreg
+# from skforecast.model_selection import bayesian_search_forecaster
+# from skforecast.model_selection import backtesting_forecaster
+
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+class CsvPredictor:
+    r"""
+    A forecaster class using machine learning models.
+    
+    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    
+    It exposes one main method:
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
+    
+    """
+
+    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+                  logger: logging.Logger) -> None:
+        r"""Define constructor for the forecast class.
+
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
+        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
+            Example: `sensor.power_load_no_var_loads`.
+        :type var_model: str
+        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
+            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
+        :type sklearn_model: str
+        :param num_lags: The number of auto-regression lags to consider. A good starting point \
+            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
+            to 48, if the time step is 1 hour the fix this to 24 and so on.
+        :type num_lags: int
+        :param root: The parent folder of the path where the config.yaml file is located
+        :type root: str
+        :param logger: The passed logger object
+        :type logger: logging.Logger
+        """
+        self.data = data
+        self.model_type = model_type
+        self.csv_file = csv_file
+        self.independent_variables = independent_variables
+        self.dependent_variable = dependent_variable
+        self.sklearn_model = sklearn_model
+        self.new_values = new_values
+        self.root = root
+        self.logger = logger
+        self.is_tuned = False
+
+    
+    def load_data(self):
+        filename_path = pathlib.Path(self.root) / self.csv_file
+        if filename_path.is_file():
+            with open(filename_path, 'rb') as inp:
+                data = pd.read_csv(filename_path)
+        else:
+            self.logger.error("The cvs file was not found.")
+            return
+
+        required_columns = self.independent_variables
+        
+        if not set(required_columns).issubset(data.columns):
+            raise ValueError(
+                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+            )
+        return data
+    
+    def prepare_data(self, data):
+        X = data[self.independent_variables].values
+        y = data[self.dependent_variable].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        
+        return X_train, y_train
+    
+    
+    def predict(self, perform_backtest: Optional[bool] = False
+            ) -> pd.Series:
+        r"""The fit method to train the ML model.
+
+        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
+            as the test period to evaluate the model, defaults to '48h'
+        :type split_date_delta: Optional[str], optional
+        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
+            the performance of the model on the complete train set, defaults to False
+        :type perform_backtest: Optional[bool], optional
+        :return: The DataFrame containing the forecast data results without and with backtest
+        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        """
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        # Preparing the data: adding exogenous features
+        data = self.load_data()
+        X, y = self.prepare_data(data)
+        
+        if self.sklearn_model == 'LinearRegression':
+            base_model = LinearRegression()
+        elif self.sklearn_model == 'ElasticNet':
+            base_model = ElasticNet()
+        elif self.sklearn_model == 'KNeighborsRegressor':
+            base_model = KNeighborsRegressor()
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+        # Define the forecaster object
+        self.forecaster = base_model
+        # Fit and time it
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.forecaster.fit(X, y)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        new_values = np.array([self.new_values])
+        prediction = self.forecaster.predict(new_values)
+        
+        return prediction
+    
+    
+    
+    
\ No newline at end of file

From b4293c009588a47c2bed748b6dd4c6ba15472020 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:45:02 +0100
Subject: [PATCH 048/111] Use gridsearchcv and split up fit and predict

---
 src/emhass/csv_predictor.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index a1c5576b..4e4ca37e 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+import copy
+from datetime import datetime
 import logging
 import copy
 import pathlib
@@ -9,6 +11,7 @@
 # from typing import Optional, Tuple
 import pandas as pd
 import numpy as np
+from sklearn.metrics import classification_report, r2_score
 
 from sklearn.linear_model import LinearRegression
 from sklearn.linear_model import ElasticNet
@@ -64,11 +67,16 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
-        self.sklearn_model = sklearn_model
-        self.new_values = new_values
-        self.root = root
+        self.timestamp = timestamp
+        self.model_type = model_type
         self.logger = logger
         self.is_tuned = False
+        self.data.sort_index(inplace=True)
+        self.data = self.data[~self.data.index.duplicated(keep='first')]
+    
+    @staticmethod
+    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+        """Add date features from the input DataFrame timestamp
 
     
     def load_data(self):

From fa8f6c0cb8e6dc430eacdd54cb37fb87daea3afc Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 15:48:12 +0100
Subject: [PATCH 049/111] gitignore fun

---
 .vscode/launch.json | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 10313c97..b953c7d3 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -2,7 +2,7 @@
   "configurations": [
     {
       "name": "Python: Current File",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
       "program": "${file}",
       "console": "integratedTerminal",
@@ -10,10 +10,11 @@
     },
     {
       "name": "EMHASS run",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
-      "module": "emhass.web_server",
+      "program": "web_server.py",
       "console": "integratedTerminal",
+      "cwd": "${workspaceFolder}/src/emhass/",
       "purpose":["debug-in-terminal"],
       "justMyCode": true,
       "env": {
@@ -21,15 +22,15 @@
         "OPTIONS_PATH": "/workspaces/emhass/options.json",
         "SECRETS_PATH": "/workspaces/emhass/secrets_emhass.yaml",
         "DATA_PATH": "/workspaces/emhass/data/",
-        "LOGGING_LEVEL": "DEBUG"
       }
     },
     {
       "name": "EMHASS run ADDON",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
-      "module": "emhass.web_server",
+      "program": "web_server.py",
       "console": "integratedTerminal",
+      "cwd": "${workspaceFolder}/src/emhass/",
       "args": ["--addon", "true",  "--no_response", "true"],
       "purpose":["debug-in-terminal"],
       "justMyCode": true,
@@ -44,7 +45,6 @@
         "LAT": "45.83", //optional change
         "LON": "6.86", //optional change
         "ALT": "4807.8", //optional change
-        "LOGGING_LEVEL": "DEBUG" //optional change
       },
       
   }

From 54966d45987ac21ba69ef6f0633b346e37154b5b Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 11:39:39 +0100
Subject: [PATCH 050/111] python -> debugpy

---
 .vscode/launch.json | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index b953c7d3..ec6c6987 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -2,7 +2,7 @@
   "configurations": [
     {
       "name": "Python: Current File",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "${file}",
       "console": "integratedTerminal",
@@ -10,12 +10,14 @@
     },
     {
       "name": "EMHASS run",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "web_server.py",
       "console": "integratedTerminal",
       "cwd": "${workspaceFolder}/src/emhass/",
-      "purpose":["debug-in-terminal"],
+      "purpose": [
+        "debug-in-terminal"
+      ],
       "justMyCode": true,
       "env": {
         "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml",
@@ -26,13 +28,20 @@
     },
     {
       "name": "EMHASS run ADDON",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "web_server.py",
       "console": "integratedTerminal",
       "cwd": "${workspaceFolder}/src/emhass/",
-      "args": ["--addon", "true",  "--no_response", "true"],
-      "purpose":["debug-in-terminal"],
+      "args": [
+        "--addon",
+        "true",
+        "--no_response",
+        "true"
+      ],
+      "purpose": [
+        "debug-in-terminal"
+      ],
       "justMyCode": true,
       "env": {
         "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml",
@@ -46,7 +55,6 @@
         "LON": "6.86", //optional change
         "ALT": "4807.8", //optional change
       },
-      
-  }
+    }
   ]
 }
\ No newline at end of file

From ef4e17703503a1e23910faa459a98b4e03b3ad67 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 12:38:07 +0100
Subject: [PATCH 051/111] launch.json

---
 .vscode/launch.json | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index ec6c6987..f0ceae3a 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -12,9 +12,8 @@
       "name": "EMHASS run",
       "type": "debugpy",
       "request": "launch",
-      "program": "web_server.py",
+      "module": "emhass.web_server",
       "console": "integratedTerminal",
-      "cwd": "${workspaceFolder}/src/emhass/",
       "purpose": [
         "debug-in-terminal"
       ],
@@ -30,9 +29,8 @@
       "name": "EMHASS run ADDON",
       "type": "debugpy",
       "request": "launch",
-      "program": "web_server.py",
+      "module": "emhass.web_server",
       "console": "integratedTerminal",
-      "cwd": "${workspaceFolder}/src/emhass/",
       "args": [
         "--addon",
         "true",

From 1a502e1d3ef6c86fee14574a50070beca9005f78 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 12:40:45 +0100
Subject: [PATCH 052/111] delete csv-predictor

---
 src/emhass/csv_predictor.py | 147 ------------------------------------
 1 file changed, 147 deletions(-)
 delete mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
deleted file mode 100644
index 4e4ca37e..00000000
--- a/src/emhass/csv_predictor.py
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import copy
-from datetime import datetime
-import logging
-import copy
-import pathlib
-import time
-from typing import Optional
-# from typing import Optional, Tuple
-import pandas as pd
-import numpy as np
-from sklearn.metrics import classification_report, r2_score
-
-from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import ElasticNet
-from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsRegressor
-# from sklearn.metrics import r2_score
-
-# from skforecast.ForecasterAutoreg import ForecasterAutoreg
-# from skforecast.model_selection import bayesian_search_forecaster
-# from skforecast.model_selection import backtesting_forecaster
-
-import warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning) 
-
-class CsvPredictor:
-    r"""
-    A forecaster class using machine learning models.
-    
-    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
-    
-    It exposes one main method:
-    
-    - `predict`: to obtain a forecast from a pre-trained model.
-    
-    """
-
-    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-                  logger: logging.Logger) -> None:
-        r"""Define constructor for the forecast class.
-
-        :param data: The data that will be used for train/test
-        :type data: pd.DataFrame
-        :param model_type: A unique name defining this model and useful to identify \
-            for what it will be used for.
-        :type model_type: str
-        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
-            Example: `sensor.power_load_no_var_loads`.
-        :type var_model: str
-        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
-            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
-        :type sklearn_model: str
-        :param num_lags: The number of auto-regression lags to consider. A good starting point \
-            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
-            to 48, if the time step is 1 hour the fix this to 24 and so on.
-        :type num_lags: int
-        :param root: The parent folder of the path where the config.yaml file is located
-        :type root: str
-        :param logger: The passed logger object
-        :type logger: logging.Logger
-        """
-        self.data = data
-        self.model_type = model_type
-        self.csv_file = csv_file
-        self.independent_variables = independent_variables
-        self.dependent_variable = dependent_variable
-        self.timestamp = timestamp
-        self.model_type = model_type
-        self.logger = logger
-        self.is_tuned = False
-        self.data.sort_index(inplace=True)
-        self.data = self.data[~self.data.index.duplicated(keep='first')]
-    
-    @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
-        """Add date features from the input DataFrame timestamp
-
-    
-    def load_data(self):
-        filename_path = pathlib.Path(self.root) / self.csv_file
-        if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(filename_path)
-        else:
-            self.logger.error("The cvs file was not found.")
-            return
-
-        required_columns = self.independent_variables
-        
-        if not set(required_columns).issubset(data.columns):
-            raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}"
-            )
-        return data
-    
-    def prepare_data(self, data):
-        X = data[self.independent_variables].values
-        y = data[self.dependent_variable].values
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        
-        return X_train, y_train
-    
-    
-    def predict(self, perform_backtest: Optional[bool] = False
-            ) -> pd.Series:
-        r"""The fit method to train the ML model.
-
-        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
-            as the test period to evaluate the model, defaults to '48h'
-        :type split_date_delta: Optional[str], optional
-        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
-            the performance of the model on the complete train set, defaults to False
-        :type perform_backtest: Optional[bool], optional
-        :return: The DataFrame containing the forecast data results without and with backtest
-        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
-        """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
-        # Preparing the data: adding exogenous features
-        data = self.load_data()
-        X, y = self.prepare_data(data)
-        
-        if self.sklearn_model == 'LinearRegression':
-            base_model = LinearRegression()
-        elif self.sklearn_model == 'ElasticNet':
-            base_model = ElasticNet()
-        elif self.sklearn_model == 'KNeighborsRegressor':
-            base_model = KNeighborsRegressor()
-        else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-        # Define the forecaster object
-        self.forecaster = base_model
-        # Fit and time it
-        self.logger.info("Training a "+self.sklearn_model+" model")
-        start_time = time.time()
-        self.forecaster.fit(X, y)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-        new_values = np.array([self.new_values])
-        prediction = self.forecaster.predict(new_values)
-        
-        return prediction
-    
-    
-    
-    
\ No newline at end of file

From 6095a2c77d361c129db35bdac958b68a8a0ad655 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 18 Mar 2024 09:33:20 +0100
Subject: [PATCH 053/111] remove KNeighborsRegressor

---
 src/emhass/machine_learning_regressor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index 95f624b3..732b4266 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -95,7 +95,7 @@ def __init__(  # noqa: PLR0913
         :type model_type: str
         :param regression_model: The model that will be used. For now only \
             this options are possible: `LinearRegression`, `RidgeRegression`, \
-            `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \
+            `LassoRegression`, `RandomForestRegression`, \
             `GradientBoostingRegression` and `AdaBoostRegression`.
         :type regression_model: str
         :param features: A list of features. \

From 8fd7fbf0d912fe2986034218d0ab4e7fbfb22433 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Wed, 17 Apr 2024 14:07:21 +0200
Subject: [PATCH 054/111] add tests

---
 src/emhass/command_line.py               | 109 ++++---
 tests/test_command_line_utils.py         | 344 ++++++++++++++++++++---
 tests/test_machine_learning_regressor.py | 113 ++++++++
 3 files changed, 488 insertions(+), 78 deletions(-)
 create mode 100644 tests/test_machine_learning_regressor.py

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index e6940518..bafb84f2 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -240,9 +240,9 @@ def set_input_data_dict(
                 return False
             df_input_data = rh.df_final.copy()
 
-    elif set_type == "regressor-model-fit":
+    elif set_type == "regressor-model-fit" or set_type == "regressor-model-predict":
 
-        df_input_data_dayahead = None
+        df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         params = json.loads(params)
         days_list = None
@@ -250,7 +250,13 @@ def set_input_data_dict(
         features = params["passed_data"]["features"]
         target = params["passed_data"]["target"]
         timestamp = params["passed_data"]["timestamp"]
-        filename_path = pathlib.Path(base_path) / csv_file
+        if get_data_from_file:
+            base_path = base_path + "/data"
+            filename_path = pathlib.Path(base_path) / csv_file
+
+        else:
+            filename_path = pathlib.Path(base_path) / csv_file
+
         if filename_path.is_file():
             df_input_data = pd.read_csv(filename_path, parse_dates=True)
 
@@ -266,13 +272,8 @@ def set_input_data_dict(
         if not set(required_columns).issubset(df_input_data.columns):
             logger.error("The cvs file does not contain the required columns.")
             raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+                f"CSV file should contain the following columns: {', '.join(required_columns)}",
             )
-    elif set_type == "regressor-model-predict":
-        df_input_data, df_input_data_dayahead = None, None
-        P_PV_forecast, P_load_forecast = None, None
-        days_list = None
-        params = json.loads(params)
 
     elif set_type == "publish-data":
         df_input_data, df_input_data_dayahead = None, None
@@ -280,7 +281,7 @@ def set_input_data_dict(
         days_list = None
     else:
         logger.error(
-            "The passed action argument and hence the set_type parameter for setup is not valid"
+            "The passed action argument and hence the set_type parameter for setup is not valid",
         )
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
@@ -541,7 +542,7 @@ def forecast_model_predict(
                 mlf = pickle.load(inp)
         else:
             logger.error(
-                "The ML forecaster file was not found, please run a model fit method before this predict method"
+                "The ML forecaster file was not found, please run a model fit method before this predict method",
             )
             return
     # Make predictions
@@ -629,7 +630,7 @@ def forecast_model_tune(
                 mlf = pickle.load(inp)
         else:
             logger.error(
-                "The ML forecaster file was not found, please run a model fit method before this tune method"
+                "The ML forecaster file was not found, please run a model fit method before this tune method",
             )
             return None, None
     # Tune the model
@@ -643,7 +644,9 @@ def forecast_model_tune(
 
 
 def regressor_model_fit(
-    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+    input_data_dict: dict,
+    logger: logging.Logger,
+    debug: Optional[bool] = False,
 ) -> None:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
@@ -662,9 +665,16 @@ def regressor_model_fit(
     timestamp = input_data_dict["params"]["passed_data"]["timestamp"]
     date_features = input_data_dict["params"]["passed_data"]["date_features"]
     root = input_data_dict["root"]
+
     # The MLRegressor object
     mlr = MLRegressor(
-        data, model_type, regression_model, features, target, timestamp, logger
+        data,
+        model_type,
+        regression_model,
+        features,
+        target,
+        timestamp,
+        logger,
     )
     # Fit the ML model
     mlr.fit(date_features=date_features)
@@ -673,10 +683,14 @@ def regressor_model_fit(
         filename = model_type + "_mlr.pkl"
         with open(pathlib.Path(root) / filename, "wb") as outp:
             pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL)
+    return mlr
 
 
 def regressor_model_predict(
-    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+    input_data_dict: dict,
+    logger: logging.Logger,
+    debug: Optional[bool] = False,
+    mlr: Optional[MLRegressor] = None,
 ) -> None:
     """Perform a prediction from csv file.
 
@@ -697,7 +711,7 @@ def regressor_model_predict(
                 mlr = pickle.load(inp)
         else:
             logger.error(
-                "The ML forecaster file was not found, please run a model fit method before this predict method"
+                "The ML forecaster file was not found, please run a model fit method before this predict method",
             )
             return
     new_values = input_data_dict["params"]["passed_data"]["new_values"]
@@ -715,14 +729,16 @@ def regressor_model_predict(
     ]
     # Publish prediction
     idx = 0
-    input_data_dict["rh"].post_data(
-        prediction,
-        idx,
-        mlr_predict_entity_id,
-        mlr_predict_unit_of_measurement,
-        mlr_predict_friendly_name,
-        type_var="mlregressor",
-    )
+    if not debug:
+        input_data_dict["rh"].post_data(
+            prediction,
+            idx,
+            mlr_predict_entity_id,
+            mlr_predict_unit_of_measurement,
+            mlr_predict_friendly_name,
+            type_var="mlregressor",
+        )
+    return prediction
 
 
 def publish_data(
@@ -813,7 +829,7 @@ def publish_data(
         if "P_deferrable{}".format(k) not in opt_res_latest.columns:
             logger.error(
                 "P_deferrable{}".format(k)
-                + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution."
+                + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.",
             )
         else:
             input_data_dict["rh"].post_data(
@@ -830,7 +846,7 @@ def publish_data(
     if input_data_dict["opt"].optim_conf["set_use_battery"]:
         if "P_batt" not in opt_res_latest.columns:
             logger.error(
-                "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution."
+                "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.",
             )
         else:
             custom_batt_forecast_id = params["passed_data"]["custom_batt_forecast_id"]
@@ -886,7 +902,7 @@ def publish_data(
     if "optim_status" not in opt_res_latest:
         opt_res_latest["optim_status"] = "Optimal"
         logger.warning(
-            "no optim_status in opt_res_latest, run an optimization task first"
+            "no optim_status in opt_res_latest, run an optimization task first",
         )
     input_data_dict["rh"].post_data(
         opt_res_latest["optim_status"],
@@ -957,7 +973,9 @@ def main():
         naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune",
     )
     parser.add_argument(
-        "--config", type=str, help="Define path to the config.yaml file"
+        "--config",
+        type=str,
+        help="Define path to the config.yaml file",
     )
     parser.add_argument(
         "--costfun",
@@ -984,7 +1002,10 @@ def main():
         help="Pass runtime optimization parameters as dictionnary",
     )
     parser.add_argument(
-        "--debug", type=strtobool, default="False", help="Use True for testing purposes"
+        "--debug",
+        type=strtobool,
+        default="False",
+        help="Use True for testing purposes",
     )
     args = parser.parse_args()
     # The path to the configuration files
@@ -995,12 +1016,14 @@ def main():
     # Additionnal argument
     try:
         parser.add_argument(
-            "--version", action="version", version="%(prog)s " + version("emhass")
+            "--version",
+            action="version",
+            version="%(prog)s " + version("emhass"),
         )
         args = parser.parse_args()
     except Exception:
         logger.info(
-            "Version not found for emhass package. Or importlib exited with PackageNotFoundError."
+            "Version not found for emhass package. Or importlib exited with PackageNotFoundError.",
         )
     # Setup parameters
     input_data_dict = set_input_data_dict(
@@ -1040,7 +1063,25 @@ def main():
         else:
             mlf = None
         df_pred_optim, mlf = forecast_model_tune(
-            input_data_dict, logger, debug=args.debug, mlf=mlf
+            input_data_dict,
+            logger,
+            debug=args.debug,
+            mlf=mlf,
+        )
+        opt_res = None
+    elif args.action == "regressor-model-fit":
+        mlr = regressor_model_fit(input_data_dict, logger, debug=args.debug)
+        opt_res = None
+    elif args.action == "regressor-model-predict":
+        if args.debug:
+            mlr = regressor_model_fit(input_data_dict, logger, debug=args.debug)
+        else:
+            mlr = None
+        prediction = regressor_model_predict(
+            input_data_dict,
+            logger,
+            debug=args.debug,
+            mlr=mlr,
         )
         opt_res = None
     elif args.action == "publish-data":
@@ -1063,6 +1104,10 @@ def main():
         return df_fit_pred, df_fit_pred_backtest, mlf
     elif args.action == "forecast-model-predict":
         return df_pred
+    elif args.action == "regressor-model-fit":
+        return mlr
+    elif args.action == "regressor-model-predict":
+        return prediction
     elif args.action == "forecast-model-tune":
         return df_pred_optim, mlf
 
diff --git a/tests/test_command_line_utils.py b/tests/test_command_line_utils.py
index d23aeb06..597b20e7 100644
--- a/tests/test_command_line_utils.py
+++ b/tests/test_command_line_utils.py
@@ -5,10 +5,21 @@
 from unittest.mock import patch
 import pandas as pd
 import pathlib, json, yaml, copy
+import numpy as np
 
 from emhass.command_line import set_input_data_dict
-from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim
-from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune
+from emhass.command_line import (
+    perfect_forecast_optim,
+    dayahead_forecast_optim,
+    naive_mpc_optim,
+)
+from emhass.command_line import (
+    forecast_model_fit,
+    forecast_model_predict,
+    forecast_model_tune,
+    regressor_model_fit,
+    regressor_model_predict,
+)
 from emhass.command_line import publish_data
 from emhass.command_line import main
 from emhass import utils
@@ -316,46 +327,183 @@ def test_forecast_model_fit_predict_tune(self):
         self.assertIsInstance(df_pred, pd.Series)
         self.assertTrue(df_pred.isnull().sum().sum() == 0)
         # Test the tune method
-        df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=True, mlf=mlf)
+        df_pred_optim, mlf = forecast_model_tune(
+            input_data_dict, logger, debug=True, mlf=mlf
+        )
         self.assertIsInstance(df_pred_optim, pd.DataFrame)
         self.assertTrue(mlf.is_tuned == True)
-        # Test ijection_dict for tune method on webui
+        # Test injection_dict for tune method on webui
         injection_dict = utils.get_injection_dict_forecast_model_tune(df_fit_pred, mlf)
         self.assertIsInstance(injection_dict, dict)
-        self.assertIsInstance(injection_dict['figure_0'], str)
-    
-    @patch('sys.argv', ['main', '--action', 'test', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), 
-                        '--debug', 'True'])
+        self.assertIsInstance(injection_dict["figure_0"], str)
+
+    def test_regressor_model_fit_predict(self):
+        config_path = pathlib.Path(root + "/config_emhass.yaml")
+        base_path = str(config_path.parent)
+        costfun = "profit"
+        action = "regressor-model-fit"  # fit and predict methods
+        params = TestCommandLineUtils.get_test_params()
+        runtimeparams = {
+            "csv_file": "prediction.csv",
+            "features": ["dd", "solar"],
+            "target": "hour",
+            "regression_model": "AdaBoostRegression",
+            "model_type": "heating_dd",
+            "timestamp": "timestamp",
+            "date_features": ["month", "day_of_week"],
+            "mlr_predict_entity_id": "sensor.predicted_hours_test",
+            "mlr_predict_unit_of_measurement": "h",
+            "mlr_predict_friendly_name": "Predicted hours",
+            "new_values": [12.79, 4.766, 1, 2],
+        }
+        runtimeparams_json = json.dumps(runtimeparams)
+        params_json = json.dumps(params)
+        input_data_dict = set_input_data_dict(
+            config_path,
+            base_path,
+            costfun,
+            params_json,
+            runtimeparams_json,
+            action,
+            logger,
+            get_data_from_file=True,
+        )
+        self.assertTrue(
+            input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd",
+        )
+        self.assertTrue(
+            input_data_dict["params"]["passed_data"]["regression_model"]
+            == "AdaBoostRegression",
+        )
+        self.assertTrue(
+            input_data_dict["params"]["passed_data"]["csv_file"] == "prediction.csv",
+        )
+        mlr = regressor_model_fit(input_data_dict, logger, debug=True)
+
+        # def test_regressor_model_predict(self):
+        config_path = pathlib.Path(root + "/config_emhass.yaml")
+        base_path = str(config_path.parent)  # + "/data"
+        costfun = "profit"
+        action = "regressor-model-predict"  # predict methods
+        params = TestCommandLineUtils.get_test_params()
+        runtimeparams = {
+            "csv_file": "prediction.csv",
+            "features": ["dd", "solar"],
+            "target": "hour",
+            "regression_model": "AdaBoostRegression",
+            "model_type": "heating_dd",
+            "timestamp": "timestamp",
+            "date_features": ["month", "day_of_week"],
+            "mlr_predict_entity_id": "sensor.predicted_hours_test",
+            "mlr_predict_unit_of_measurement": "h",
+            "mlr_predict_friendly_name": "Predicted hours",
+            "new_values": [12.79, 4.766, 1, 2],
+        }
+        runtimeparams_json = json.dumps(runtimeparams)
+        params["passed_data"] = runtimeparams
+        params_json = json.dumps(params)
+
+        input_data_dict = set_input_data_dict(
+            config_path,
+            base_path,
+            costfun,
+            params_json,
+            runtimeparams_json,
+            action,
+            logger,
+            get_data_from_file=True,
+        )
+        self.assertTrue(
+            input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd",
+        )
+        self.assertTrue(
+            input_data_dict["params"]["passed_data"]["mlr_predict_friendly_name"]
+            == "Predicted hours",
+        )
+
+        regressor_model_predict(input_data_dict, logger, debug=True, mlr=mlr)
+
+    @patch(
+        "sys.argv",
+        [
+            "main",
+            "--action",
+            "test",
+            "--config",
+            str(pathlib.Path(root + "/config_emhass.yaml")),
+            "--debug",
+            "True",
+        ],
+    )
     def test_main_wrong_action(self):
         opt_res = main()
         self.assertEqual(opt_res, None)
-        
-    @patch('sys.argv', ['main', '--action', 'perfect-optim', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), 
-                        '--debug', 'True'])
+
+    @patch(
+        "sys.argv",
+        [
+            "main",
+            "--action",
+            "perfect-optim",
+            "--config",
+            str(pathlib.Path(root + "/config_emhass.yaml")),
+            "--debug",
+            "True",
+        ],
+    )
     def test_main_perfect_forecast_optim(self):
         opt_res = main()
         self.assertIsInstance(opt_res, pd.DataFrame)
-        self.assertTrue(opt_res.isnull().sum().sum()==0)
+        self.assertTrue(opt_res.isnull().sum().sum() == 0)
         self.assertIsInstance(opt_res.index, pd.core.indexes.datetimes.DatetimeIndex)
-        self.assertIsInstance(opt_res.index.dtype, pd.core.dtypes.dtypes.DatetimeTZDtype)
-        
+        self.assertIsInstance(
+            opt_res.index.dtype,
+            pd.core.dtypes.dtypes.DatetimeTZDtype,
+        )
+
     def test_main_dayahead_forecast_optim(self):
-        with patch('sys.argv', ['main', '--action', 'dayahead-optim', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), 
-                                '--params', self.params_json, '--runtimeparams', self.runtimeparams_json,
-                                '--debug', 'True']):
+        with patch(
+            "sys.argv",
+            [
+                "main",
+                "--action",
+                "dayahead-optim",
+                "--config",
+                str(pathlib.Path(root + "/config_emhass.yaml")),
+                "--params",
+                self.params_json,
+                "--runtimeparams",
+                self.runtimeparams_json,
+                "--debug",
+                "True",
+            ],
+        ):
             opt_res = main()
         self.assertIsInstance(opt_res, pd.DataFrame)
-        self.assertTrue(opt_res.isnull().sum().sum()==0)
-        
+        self.assertTrue(opt_res.isnull().sum().sum() == 0)
+
     def test_main_naive_mpc_optim(self):
-        with patch('sys.argv', ['main', '--action', 'naive-mpc-optim', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), 
-                                '--params', self.params_json, '--runtimeparams', self.runtimeparams_json,
-                                '--debug', 'True']):
+        with patch(
+            "sys.argv",
+            [
+                "main",
+                "--action",
+                "naive-mpc-optim",
+                "--config",
+                str(pathlib.Path(root + "/config_emhass.yaml")),
+                "--params",
+                self.params_json,
+                "--runtimeparams",
+                self.runtimeparams_json,
+                "--debug",
+                "True",
+            ],
+        ):
             opt_res = main()
         self.assertIsInstance(opt_res, pd.DataFrame)
-        self.assertTrue(opt_res.isnull().sum().sum()==0)
-        self.assertTrue(len(opt_res)==10)
-        
+        self.assertTrue(opt_res.isnull().sum().sum() == 0)
+        self.assertTrue(len(opt_res) == 10)
+
     def test_main_forecast_model_fit(self):
         params = copy.deepcopy(json.loads(self.params_json))
         runtimeparams = {
@@ -386,20 +534,33 @@ def test_main_forecast_model_predict(self):
             "var_model": "sensor.power_load_no_var_loads",
             "sklearn_model": "KNeighborsRegressor",
             "num_lags": 48,
-            "split_date_delta": '48h',
-            "perform_backtest": False
+            "split_date_delta": "48h",
+            "perform_backtest": False,
         }
         runtimeparams_json = json.dumps(runtimeparams)
-        params['passed_data'] = runtimeparams
-        params['optim_conf']['load_forecast_method'] = 'skforecast'
+        params["passed_data"] = runtimeparams
+        params["optim_conf"]["load_forecast_method"] = "skforecast"
         params_json = json.dumps(params)
-        with patch('sys.argv', ['main', '--action', 'forecast-model-predict', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), 
-                                '--params', params_json, '--runtimeparams', runtimeparams_json,
-                                '--debug', 'True']):
+        with patch(
+            "sys.argv",
+            [
+                "main",
+                "--action",
+                "forecast-model-predict",
+                "--config",
+                str(pathlib.Path(root + "/config_emhass.yaml")),
+                "--params",
+                params_json,
+                "--runtimeparams",
+                runtimeparams_json,
+                "--debug",
+                "True",
+            ],
+        ):
             df_pred = main()
         self.assertIsInstance(df_pred, pd.Series)
         self.assertTrue(df_pred.isnull().sum().sum() == 0)
-        
+
     def test_main_forecast_model_tune(self):
         params = copy.deepcopy(json.loads(self.params_json))
         runtimeparams = {
@@ -408,27 +569,118 @@ def test_main_forecast_model_tune(self):
             "var_model": "sensor.power_load_no_var_loads",
             "sklearn_model": "KNeighborsRegressor",
             "num_lags": 48,
-            "split_date_delta": '48h',
-            "perform_backtest": False
+            "split_date_delta": "48h",
+            "perform_backtest": False,
         }
         runtimeparams_json = json.dumps(runtimeparams)
-        params['passed_data'] = runtimeparams
-        params['optim_conf']['load_forecast_method'] = 'skforecast'
+        params["passed_data"] = runtimeparams
+        params["optim_conf"]["load_forecast_method"] = "skforecast"
         params_json = json.dumps(params)
-        with patch('sys.argv', ['main', '--action', 'forecast-model-tune', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), 
-                                '--params', params_json, '--runtimeparams', runtimeparams_json,
-                                '--debug', 'True']):
+        with patch(
+            "sys.argv",
+            [
+                "main",
+                "--action",
+                "forecast-model-tune",
+                "--config",
+                str(pathlib.Path(root + "/config_emhass.yaml")),
+                "--params",
+                params_json,
+                "--runtimeparams",
+                runtimeparams_json,
+                "--debug",
+                "True",
+            ],
+        ):
             df_pred_optim, mlf = main()
         self.assertIsInstance(df_pred_optim, pd.DataFrame)
         self.assertTrue(mlf.is_tuned == True)
-        
-    @patch('sys.argv', ['main', '--action', 'publish-data', '--config', str(pathlib.Path(root+'/config_emhass.yaml')), 
-                        '--debug', 'True'])
+
+    def test_main_regressor_model_fit(self):
+        params = copy.deepcopy(json.loads(self.params_json))
+        runtimeparams = {
+            "csv_file": "prediction.csv",
+            "features": ["dd", "solar"],
+            "target": "hour",
+            "regression_model": "AdaBoostRegression",
+            "model_type": "heating_dd",
+            "timestamp": "timestamp",
+            "date_features": ["month", "day_of_week"],
+        }
+        runtimeparams_json = json.dumps(runtimeparams)
+        params["passed_data"] = runtimeparams
+        params_json = json.dumps(params)
+        with patch(
+            "sys.argv",
+            [
+                "main",
+                "--action",
+                "regressor-model-fit",
+                "--config",
+                str(pathlib.Path(root + "/config_emhass.yaml")),
+                "--params",
+                params_json,
+                "--runtimeparams",
+                runtimeparams_json,
+                "--debug",
+                "True",
+            ],
+        ):
+            mlr = main()
+
+    def test_main_regressor_model_predict(self):
+        params = copy.deepcopy(json.loads(self.params_json))
+        runtimeparams = {
+            "csv_file": "prediction.csv",
+            "features": ["dd", "solar"],
+            "target": "hour",
+            "regression_model": "AdaBoostRegression",
+            "model_type": "heating_dd",
+            "timestamp": "timestamp",
+            "date_features": ["month", "day_of_week"],
+            "new_values": [12.79, 4.766, 1, 2],
+        }
+        runtimeparams_json = json.dumps(runtimeparams)
+        params["passed_data"] = runtimeparams
+        params["optim_conf"]["load_forecast_method"] = "skforecast"
+        params_json = json.dumps(params)
+        with patch(
+            "sys.argv",
+            [
+                "main",
+                "--action",
+                "regressor-model-predict",
+                "--config",
+                str(pathlib.Path(root + "/config_emhass.yaml")),
+                "--params",
+                params_json,
+                "--runtimeparams",
+                runtimeparams_json,
+                "--debug",
+                "True",
+            ],
+        ):
+            prediction = main()
+        self.assertIsInstance(prediction, np.ndarray)
+
+    @patch(
+        "sys.argv",
+        [
+            "main",
+            "--action",
+            "publish-data",
+            "--config",
+            str(pathlib.Path(root + "/config_emhass.yaml")),
+            "--debug",
+            "True",
+        ],
+    )
     def test_main_publish_data(self):
         opt_res = main()
-        self.assertTrue(opt_res==None)
-        
-if __name__ == '__main__':
+        self.assertTrue(opt_res == None)
+
+
+if __name__ == "__main__":
     unittest.main()
     ch.close()
     logger.removeHandler(ch)
diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py
new file mode 100644
index 00000000..88137b0d
--- /dev/null
+++ b/tests/test_machine_learning_regressor.py
@@ -0,0 +1,113 @@
+"""Machine learning regressor test module."""
+
+import copy
+import json
+import pathlib
+import unittest
+
+import numpy as np
+import pandas as pd
+from sklearn.pipeline import Pipeline
+import yaml
+from emhass import utils
+from emhass.command_line import set_input_data_dict
+from emhass.machine_learning_regressor import MLRegressor
+from sklearn.ensemble import (
+    AdaBoostRegressor,
+)
+
+# the root folder
+root = str(utils.get_root(__file__, num_parent=2))
+# create logger
+logger, ch = utils.get_logger(__name__, root, save_to_file=False)
+
+
+class TestMLRegressor(unittest.TestCase):
+    @staticmethod
+    def get_test_params():
+        with open(root + "/config_emhass.yaml", "r") as file:
+            params = yaml.load(file, Loader=yaml.FullLoader)
+        params.update(
+            {
+                "params_secrets": {
+                    "hass_url": "http://supervisor/core/api",
+                    "long_lived_token": "${SUPERVISOR_TOKEN}",
+                    "time_zone": "Europe/Paris",
+                    "lat": 45.83,
+                    "lon": 6.86,
+                    "alt": 8000.0,
+                },
+            },
+        )
+        return params
+
+    def setUp(self):
+        params = TestMLRegressor.get_test_params()
+        params_json = json.dumps(params)
+        config_path = pathlib.Path(root + "/config_emhass.yaml")
+        base_path = str(config_path.parent)  # + "/data"
+        costfun = "profit"
+        action = "regressor-model-fit"  # fit and predict methods
+        params = copy.deepcopy(json.loads(params_json))
+        runtimeparams = {
+            "csv_file": "prediction.csv",
+            "features": ["dd", "solar"],
+            "target": "hour",
+            "regression_model": "AdaBoostRegression",
+            "model_type": "heating_dd",
+            "timestamp": "timestamp",
+            "date_features": ["month", "day_of_week"],
+            "new_values": [12.79, 4.766, 1, 2],
+        }
+        runtimeparams_json = json.dumps(runtimeparams)
+        params["passed_data"] = runtimeparams
+        params["optim_conf"]["load_forecast_method"] = "skforecast"
+        params_json = json.dumps(params)
+        self.input_data_dict = set_input_data_dict(
+            config_path,
+            base_path,
+            costfun,
+            params_json,
+            runtimeparams_json,
+            action,
+            logger,
+            get_data_from_file=True,
+        )
+        data = copy.deepcopy(self.input_data_dict["df_input_data"])
+        self.assertIsInstance(data, pd.DataFrame)
+        self.csv_file = self.input_data_dict["params"]["passed_data"]["csv_file"]
+        features = self.input_data_dict["params"]["passed_data"]["features"]
+        target = self.input_data_dict["params"]["passed_data"]["target"]
+        regression_model = self.input_data_dict["params"]["passed_data"][
+            "regression_model"
+        ]
+        model_type = self.input_data_dict["params"]["passed_data"]["model_type"]
+        timestamp = self.input_data_dict["params"]["passed_data"]["timestamp"]
+        self.date_features = self.input_data_dict["params"]["passed_data"][
+            "date_features"
+        ]
+        self.new_values = self.input_data_dict["params"]["passed_data"]["new_values"]
+        self.mlr = MLRegressor(
+            data,
+            model_type,
+            regression_model,
+            features,
+            target,
+            timestamp,
+            logger,
+        )
+
+    def test_fit(self):
+        self.mlr.fit(self.date_features)
+        self.assertIsInstance(self.mlr.model, Pipeline)
+
+    def test_predict(self):
+        self.mlr.fit(self.date_features)
+        predictions = self.mlr.predict(self.new_values)
+        self.assertIsInstance(predictions, np.ndarray)
+
+
+if __name__ == "__main__":
+    unittest.main()
+    ch.close()
+    logger.removeHandler(ch)

From c51d54049f1569a0b8547f8e04af3329f82a18d7 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Wed, 17 Apr 2024 15:45:58 +0200
Subject: [PATCH 055/111] Rename paragrams

---
 tests/test_command_line_utils.py         | 33 +++++++++++++-----------
 tests/test_machine_learning_regressor.py | 11 +++-----
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/tests/test_command_line_utils.py b/tests/test_command_line_utils.py
index 597b20e7..eaad3adb 100644
--- a/tests/test_command_line_utils.py
+++ b/tests/test_command_line_utils.py
@@ -344,11 +344,11 @@ def test_regressor_model_fit_predict(self):
         action = "regressor-model-fit"  # fit and predict methods
         params = TestCommandLineUtils.get_test_params()
         runtimeparams = {
-            "csv_file": "prediction.csv",
-            "features": ["dd", "solar"],
+            "csv_file": "heating_prediction.csv",
+            "features": ["degreeday", "solar"],
             "target": "hour",
             "regression_model": "AdaBoostRegression",
-            "model_type": "heating_dd",
+            "model_type": "heating_hours_degreeday",
             "timestamp": "timestamp",
             "date_features": ["month", "day_of_week"],
             "mlr_predict_entity_id": "sensor.predicted_hours_test",
@@ -369,14 +369,16 @@ def test_regressor_model_fit_predict(self):
             get_data_from_file=True,
         )
         self.assertTrue(
-            input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd",
+            input_data_dict["params"]["passed_data"]["model_type"]
+            == "heating_hours_degreeday",
         )
         self.assertTrue(
             input_data_dict["params"]["passed_data"]["regression_model"]
             == "AdaBoostRegression",
         )
         self.assertTrue(
-            input_data_dict["params"]["passed_data"]["csv_file"] == "prediction.csv",
+            input_data_dict["params"]["passed_data"]["csv_file"]
+            == "heating_prediction.csv",
         )
         mlr = regressor_model_fit(input_data_dict, logger, debug=True)
 
@@ -387,11 +389,11 @@ def test_regressor_model_fit_predict(self):
         action = "regressor-model-predict"  # predict methods
         params = TestCommandLineUtils.get_test_params()
         runtimeparams = {
-            "csv_file": "prediction.csv",
-            "features": ["dd", "solar"],
+            "csv_file": "heating_prediction.csv",
+            "features": ["degreeday", "solar"],
             "target": "hour",
             "regression_model": "AdaBoostRegression",
-            "model_type": "heating_dd",
+            "model_type": "heating_hours_degreeday",
             "timestamp": "timestamp",
             "date_features": ["month", "day_of_week"],
             "mlr_predict_entity_id": "sensor.predicted_hours_test",
@@ -414,7 +416,8 @@ def test_regressor_model_fit_predict(self):
             get_data_from_file=True,
         )
         self.assertTrue(
-            input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd",
+            input_data_dict["params"]["passed_data"]["model_type"]
+            == "heating_hours_degreeday",
         )
         self.assertTrue(
             input_data_dict["params"]["passed_data"]["mlr_predict_friendly_name"]
@@ -599,11 +602,11 @@ def test_main_forecast_model_tune(self):
     def test_main_regressor_model_fit(self):
         params = copy.deepcopy(json.loads(self.params_json))
         runtimeparams = {
-            "csv_file": "prediction.csv",
-            "features": ["dd", "solar"],
+            "csv_file": "heating_prediction.csv",
+            "features": ["degreeday", "solar"],
             "target": "hour",
             "regression_model": "AdaBoostRegression",
-            "model_type": "heating_dd",
+            "model_type": "heating_hours_degreeday",
             "timestamp": "timestamp",
             "date_features": ["month", "day_of_week"],
         }
@@ -631,11 +634,11 @@ def test_main_regressor_model_fit(self):
     def test_main_regressor_model_predict(self):
         params = copy.deepcopy(json.loads(self.params_json))
         runtimeparams = {
-            "csv_file": "prediction.csv",
-            "features": ["dd", "solar"],
+            "csv_file": "heating_prediction.csv",
+            "features": ["degreeday", "solar"],
             "target": "hour",
             "regression_model": "AdaBoostRegression",
-            "model_type": "heating_dd",
+            "model_type": "heating_hours_degreeday",
             "timestamp": "timestamp",
             "date_features": ["month", "day_of_week"],
             "new_values": [12.79, 4.766, 1, 2],
diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py
index 88137b0d..74702b6f 100644
--- a/tests/test_machine_learning_regressor.py
+++ b/tests/test_machine_learning_regressor.py
@@ -7,14 +7,11 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.pipeline import Pipeline
 import yaml
 from emhass import utils
 from emhass.command_line import set_input_data_dict
 from emhass.machine_learning_regressor import MLRegressor
-from sklearn.ensemble import (
-    AdaBoostRegressor,
-)
+from sklearn.pipeline import Pipeline
 
 # the root folder
 root = str(utils.get_root(__file__, num_parent=2))
@@ -50,11 +47,11 @@ def setUp(self):
         action = "regressor-model-fit"  # fit and predict methods
         params = copy.deepcopy(json.loads(params_json))
         runtimeparams = {
-            "csv_file": "prediction.csv",
-            "features": ["dd", "solar"],
+            "csv_file": "heating_prediction.csv",
+            "features": ["degreeday", "solar"],
             "target": "hour",
             "regression_model": "AdaBoostRegression",
-            "model_type": "heating_dd",
+            "model_type": "heating_hours_degreeday",
             "timestamp": "timestamp",
             "date_features": ["month", "day_of_week"],
             "new_values": [12.79, 4.766, 1, 2],

From 1590404ecab146805a9d2103b5d4cf32cbec2783 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Thu, 18 Apr 2024 11:40:54 +0200
Subject: [PATCH 056/111] Ready for review (I think)

---
 docs/mlregressor.md                      | 106 +++++++++++++++++++----
 src/emhass/command_line.py               |  53 +++++++-----
 src/emhass/machine_learning_regressor.py |   3 +-
 src/emhass/utils.py                      |  15 +++-
 4 files changed, 133 insertions(+), 44 deletions(-)

diff --git a/docs/mlregressor.md b/docs/mlregressor.md
index 7206af99..dee5fccd 100644
--- a/docs/mlregressor.md
+++ b/docs/mlregressor.md
@@ -8,6 +8,7 @@ This API provides two main methods:
 
 - predict: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point.
 
+
 ## A basic model fit
 
 To train a model use the `regressor-model-fit` end point.
@@ -45,28 +46,38 @@ A correct `curl` call to launch a model fit can look like this:
 ```
 curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-fit
 ```
-
-After applying the `curl` command to fit the model the following information is logged by EMHASS:
-
-    2023-02-20 22:05:22,658 - __main__ - INFO - Training a LinearRegression model
-    2023-02-20 22:05:23,882 - __main__ - INFO - Elapsed time: 1.2236599922180176
-    2023-02-20 22:05:24,612 - __main__ - INFO - Prediction R2 score: 0.2654560762747957
-
-## The predict method
-
-To obtain a prediction using a previously trained model use the `regressor-model-predict` end point.
+A Home Assistant `rest_command` can look like this:
 
 ```
-curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-predict
+fit_heating_hours:
+  url: http://127.0.0.1:5000/action/regressor-model-fit
+  method: POST
+  content_type: "application/json"
+  payload: >-
+    {
+    "csv_file": "heating_prediction.csv",
+    "features":["degreeday", "solar"],
+    "target": "hours",
+    "regression_model": "RandomForestRegression",
+    "model_type": "heating_hours_degreeday",
+    "timestamp": "timestamp",
+    "date_features": ["month", "day_of_week"]
+    }
 ```
+After fitting the model the following information is logged by EMHASS:
 
-If needed pass the correct `model_type` like this:
+    2024-04-17 12:41:50,019 - web_server - INFO - Passed runtime parameters: {'csv_file': 'heating_prediction.csv', 'features': ['degreeday', 'solar'], 'target': 'heating_hours', 'regression_model': 'RandomForestRegression', 'model_type': 'heating_hours_degreeday', 'timestamp': 'timestamp', 'date_features': ['month', 'day_of_week']}
+    2024-04-17 12:41:50,020 - web_server - INFO -  >> Setting input data dict
+    2024-04-17 12:41:50,021 - web_server - INFO - Setting up needed data
+    2024-04-17 12:41:50,048 - web_server - INFO -  >> Performing a machine learning regressor fit...
+    2024-04-17 12:41:50,049 - web_server - INFO - Performing a MLRegressor fit for heating_hours_degreeday
+    2024-04-17 12:41:50,064 - web_server - INFO - Training a RandomForestRegression model
+    2024-04-17 12:41:57,852 - web_server - INFO - Elapsed time for model fit: 7.78800106048584
+    2024-04-17 12:41:57,862 - web_server - INFO - Prediction R2 score of fitted model on test data: -0.5667567505914477
 
-```
-curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "load_forecast"}' http://localhost:5000/action/regressor-model-predict
-```
+## The predict method
 
-It is possible to publish the predict method results to a Home Assistant sensor.
+To obtain a prediction using a previously trained model use the `regressor-model-predict` end point.
 
 The list of parameters needed to set the data publish task is:
 
@@ -89,3 +100,66 @@ runtimeparams = {
     "model_type": "heating_hours_degreeday"
 }
 ```
+
+Pass the correct `model_type` like this:
+
+```
+curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "heating_hours_degreeday"}' http://localhost:5000/action/regressor-model-predict
+```
+
+A Home Assistant `rest_command` can look like this:
+
+```
+predict_heating_hours:
+  url: http://localhost:5001/action/regressor-model-predict
+  method: POST
+  content_type: "application/json"
+  payload: >-
+   {
+    "mlr_predict_entity_id": "sensor.predicted_hours",
+    "mlr_predict_unit_of_measurement": "h",
+    "mlr_predict_friendly_name": "Predicted hours",
+    "new_values": [8.2, 7.23, 2, 6],
+    "model_type": "heating_hours_degreeday"
+    }
+```
+After predicting the model the following information is logged by EMHASS:
+
+```
+2024-04-17 14:25:40,695 - web_server - INFO - Passed runtime parameters: {'mlr_predict_entity_id': 'sensor.predicted_hours', 'mlr_predict_unit_of_measurement': 'h', 'mlr_predict_friendly_name': 'Predicted hours', 'new_values': [8.2, 7.23, 2, 6], 'model_type': 'heating_hours_degreeday'}
+2024-04-17 14:25:40,696 - web_server - INFO -  >> Setting input data dict
+2024-04-17 14:25:40,696 - web_server - INFO - Setting up needed data
+2024-04-17 14:25:40,700 - web_server - INFO -  >> Performing a machine learning regressor predict...
+2024-04-17 14:25:40,715 - web_server - INFO - Performing a prediction for heating_hours_degreeday
+2024-04-17 14:25:40,750 - web_server - INFO - Successfully posted to sensor.predicted_hours = 3.716600000000001
+```
+The predict method will publish the result to a Home Assistant sensor.
+
+
+## How to store data in a csv file from Home Assistant
+Notify to a file
+```
+notify:
+  - platform: file
+    name: heating_hours_prediction
+    timestamp: false
+    filename: /share/heating_prediction.csv
+```
+Then you need an automation to notify to this file
+```
+alias: "Heating csv"
+id: 157b1d57-73d9-4f39-82c6-13ce0cf42
+trigger:
+  - platform: time
+    at: "23:59:32"
+action:
+  - service: notify.heating_hours_prediction
+    data:
+      message: >
+        {% set degreeday = states('sensor.degree_day_daily') |float %}
+        {% set heating_hours = states('sensor.heating_hours_today') |float | round(2) %}
+        {% set solar = states('sensor.solar_daily') |float | round(3) %}
+        {% set time = now() %}
+
+          {{time}},{{degreeday}},{{solar}},{{heating_hours}}
+```
\ No newline at end of file
diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 313cf885..feee327f 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -246,34 +246,39 @@ def set_input_data_dict(
         P_PV_forecast, P_load_forecast = None, None
         params = json.loads(params)
         days_list = None
-        csv_file = params["passed_data"]["csv_file"]
-        features = params["passed_data"]["features"]
-        target = params["passed_data"]["target"]
-        timestamp = params["passed_data"]["timestamp"]
-        if get_data_from_file:
-            base_path = base_path + "/data"
-            filename_path = pathlib.Path(base_path) / csv_file
+        csv_file = params["passed_data"].get("csv_file", None)
+        if "features" in params["passed_data"]:
+            features = params["passed_data"]["features"]
+        if "target" in params["passed_data"]:
+            target = params["passed_data"]["target"]
+        if "timestamp" in params["passed_data"]:
+            timestamp = params["passed_data"]["timestamp"]
+        if csv_file:
+            if get_data_from_file:
+                base_path = base_path + "/data"
+                filename_path = pathlib.Path(base_path) / csv_file
 
-        else:
-            filename_path = pathlib.Path(base_path) / csv_file
+            else:
+                filename_path = pathlib.Path(base_path) / csv_file
 
-        if filename_path.is_file():
-            df_input_data = pd.read_csv(filename_path, parse_dates=True)
+            if filename_path.is_file():
+                df_input_data = pd.read_csv(filename_path, parse_dates=True)
 
-        else:
-            logger.error("The cvs file was not found.")
-            raise ValueError("The CSV file " + csv_file + " was not found.")
-        required_columns = []
-        required_columns.extend(features)
-        required_columns.append(target)
-        if timestamp is not None:
-            required_columns.append(timestamp)
+            else:
+                logger.error("The cvs file was not found.")
+                raise ValueError("The CSV file " + csv_file + " was not found.")
+            required_columns = []
+            required_columns.extend(features)
+            required_columns.append(target)
+            if timestamp is not None:
+                required_columns.append(timestamp)
 
-        if not set(required_columns).issubset(df_input_data.columns):
-            logger.error("The cvs file does not contain the required columns.")
-            raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}",
-            )
+            if not set(required_columns).issubset(df_input_data.columns):
+                logger.error("The cvs file does not contain the required columns.")
+                msg = f"CSV file should contain the following columns: {', '.join(required_columns)}"
+                raise ValueError(
+                    msg,
+                )
 
     elif set_type == "publish-data":
         df_input_data, df_input_data_dayahead = None, None
diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index 732b4266..f0d3c532 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -190,9 +190,10 @@ def get_regression_model(self: MLRegressor) -> tuple[str, str]:
             param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
         else:
             self.logger.error(
-                "Passed sklearn model %s is not valid",
+                "Passed model %s is not valid",
                 self.regression_model,
             )
+            return None
         return base_model, param_grid
 
     def fit(self: MLRegressor, date_features: list | None = None) -> None:
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 836f1085..2517f4b2 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -228,12 +228,12 @@ def treat_runtimeparams(
             params["passed_data"]["csv_file"] = csv_file
             params["passed_data"]["features"] = features
             params["passed_data"]["target"] = target
-            if "timestamp" not in runtimeparams.keys():
+            if "timestamp" not in runtimeparams:
                 params["passed_data"]["timestamp"] = None
             else:
                 timestamp = runtimeparams["timestamp"]
                 params["passed_data"]["timestamp"] = timestamp
-            if "date_features" not in runtimeparams.keys():
+            if "date_features" not in runtimeparams:
                 params["passed_data"]["date_features"] = []
             else:
                 date_features = runtimeparams["date_features"]
@@ -242,6 +242,15 @@ def treat_runtimeparams(
         if set_type == "regressor-model-predict":
             new_values = runtimeparams["new_values"]
             params["passed_data"]["new_values"] = new_values
+            if "csv_file" in runtimeparams:
+                csv_file = runtimeparams["csv_file"]
+                params["passed_data"]["csv_file"] = csv_file
+            if "features" in runtimeparams:
+                features = runtimeparams["features"]
+                params["passed_data"]["features"] = features
+            if "target" in runtimeparams:
+                target = runtimeparams["target"]
+                params["passed_data"]["target"] = target
 
         # Treating special data passed for MPC control case
         if set_type == "naive-mpc-optim":
@@ -335,7 +344,7 @@ def treat_runtimeparams(
             sklearn_model = runtimeparams["sklearn_model"]
         params["passed_data"]["sklearn_model"] = sklearn_model
         if "regression_model" not in runtimeparams.keys():
-            regression_model = "LinearRegression"
+            regression_model = "AdaBoostRegression"
         else:
             regression_model = runtimeparams["regression_model"]
         params["passed_data"]["regression_model"] = regression_model

From f8b43aaf685fe6bc6f4d518d9f94416d22d96a25 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Thu, 18 Apr 2024 14:11:23 +0200
Subject: [PATCH 057/111] remove *.csv from .gitignore to upload
 heating_prediction.csv

---
 .gitignore                  |   2 +-
 data/heating_prediction.csv | 130 ++++++++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 data/heating_prediction.csv

diff --git a/.gitignore b/.gitignore
index 581080c8..604a82a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,7 +7,7 @@ secrets_emhass.yaml
 .vscode/launch.json
 .vscode/settings.json
 .vscode/tasks.json
-*.csv
+# *.csv
 *.html
 *.pkl
 data/actionLogs.txt
diff --git a/data/heating_prediction.csv b/data/heating_prediction.csv
new file mode 100644
index 00000000..f50a8a49
--- /dev/null
+++ b/data/heating_prediction.csv
@@ -0,0 +1,130 @@
+﻿timestamp,degreeday,solar,hour
+2023-11-10 23:59:32.458039+01:00,12.23,3.982,2.87
+2023-11-11 23:59:32.459778+01:00,12.94,13.723,3.14
+2023-11-12 23:59:32.462220+01:00,14.45,4.925,3.5
+2023-11-13 23:59:32.462167+01:00,8.49,2.138,3.19
+2023-11-14 23:59:32.338942+01:00,8.61,2.444,2.91
+2023-11-15 23:59:32.195198+01:00,9.83,10.685,2.88
+2023-11-16 23:59:32.501044+01:00,12.8,1.955,1.28
+2023-11-17 23:59:32.316366+01:00,13.35,8.742,2.97
+2023-11-18 23:59:32.082785+01:00,11.84,0.849,3.42
+2023-11-19 23:59:32.077198+01:00,7.3,10.85,1.9
+2023-11-20 23:59:32.431964+01:00,9.91,6.395,2.48
+2023-11-21 23:59:32.295705+01:00,11.44,2.678,2.91
+2023-11-22 23:59:32.377740+01:00,16.14,2.994,2.96
+2023-11-23 23:59:32.385890+01:00,9.31,5.346,2.91
+2023-11-24 23:59:32.376194+01:00,12.96,8.61,2.9
+2023-11-25 23:59:32.373666+01:00,14.91,12.31,3.47
+2023-11-26 23:59:32.373647+01:00,14.79,2.589,3.69
+2023-11-27 23:59:32.379920+01:00,14.92,0.322,6.05
+2023-11-28 23:59:32.213947+01:00,18.59,20.342,2.94
+2023-11-29 23:59:32.217384+01:00,19.05,5.393,5.41
+2023-11-30 23:59:32.222641+01:00,21.27,1.899,6.77
+2023-12-01 23:59:32.224533+01:00,21.3,1.233,5.75
+2023-12-02 23:59:32.107119+01:00,21.97,14.653,2.96
+2023-12-03 23:59:32.107436+01:00,20.61,4.766,8.89
+2023-12-04 23:59:32.116642+01:00,18.36,1.349,6.73
+2023-12-05 23:59:32.191254+01:00,16.93,0.869,6.17
+2023-12-06 23:59:32.176803+01:00,16.8,5.413,5.38
+2023-12-07 23:59:32.251031+01:00,17.67,8.089,5.98
+2023-12-08 23:59:32.255888+01:00,14.37,1.203,5.63
+2023-12-09 23:59:32.109040+01:00,11.94,0.814,5.08
+2023-12-10 23:59:32.103738+01:00,9.72,6.051,3.42
+2023-12-11 23:59:32.497717+01:00,9.83,1.459,3.87
+2023-12-12 23:59:32.502503+01:00,11.18,4.176,3.31
+2023-12-13 23:59:32.504794+01:00,11.09,2.91,3.1
+2023-12-14 23:59:32.177489+01:00,13.88,7.53,2.89
+2023-12-15 23:59:32.186292+01:00,12.18,2.129,5.68
+2023-12-16 23:59:32.176812+01:00,11.75,1.641,3.46
+2023-12-17 23:59:32.119874+01:00,12.18,14.868,3.46
+2023-12-18 23:59:32.120168+01:00,14.75,1.283,3.12
+2023-12-19 23:59:32.120101+01:00,12.82,0.09,5.07
+2023-12-20 23:59:32.249731+01:00,12.8,3.803,3.6
+2023-12-21 23:59:32.249135+01:00,8.73,2.096,3.55
+2023-12-22 23:59:32.385164+01:00,9.12,1.278,0.85
+2023-12-23 23:59:32.382910+01:00,8.99,1.848,0.0
+2023-12-24 23:59:32.382457+01:00,8.04,0.165,7.42
+2023-12-25 23:59:32.303520+01:00,7.56,1.028,2.93
+2023-12-26 23:59:32.105788+01:00,10.55,9.274,2.92
+2023-12-27 23:59:32.183107+01:00,11.78,2.026,3.39
+2023-12-28 23:59:32.183405+01:00,8.91,3.68,3.19
+2023-12-29 23:59:32.399740+01:00,9.35,2.464,2.95
+2023-12-30 23:59:32.091110+01:00,11.07,7.948,3.44
+2023-12-31 23:59:32.257530+01:00,10.51,3.5,3.48
+2024-01-01 23:59:32.106161+01:00,12.75,4.046,3.08
+2024-01-02 23:59:32.103187+01:00,8.81,0.562,4.46
+2024-01-03 23:59:32.429947+01:00,10.03,2.184,3.26
+2024-01-04 23:59:32.436773+01:00,11.22,5.662,2.97
+2024-01-05 23:59:32.165969+01:00,12.42,1.199,3.6
+2024-01-06 23:59:32.110208+01:00,15.35,0.295,4.32
+2024-01-07 23:59:32.147775+01:00,19.88,0.896,6.19
+2024-01-08 23:59:32.242815+01:00,22.74,6.468,5.82
+2024-01-09 23:59:32.201342+01:00,24.38,21.307,6.92
+2024-01-10 23:59:32.411136+01:00,24.84,18.89,1.53
+2024-01-11 23:59:32.399433+01:00,23.57,19.27,3.05
+2024-01-12 23:59:32.467622+01:00,18.22,1.977,13.98
+2024-01-13 23:59:32.077428+01:00,17.9,0.472,6.93
+2024-01-14 23:59:32.127844+01:00,19.65,1.346,6.95
+2024-01-15 23:59:32.125062+01:00,19.49,4.35,7.82
+2024-01-16 23:59:32.280474+01:00,21.21,9.238,5.7
+2024-01-17 23:59:32.283951+01:00,23.17,1.193,7.37
+2024-01-18 23:59:32.361241+01:00,21.61,17.307,6.67
+2024-01-19 23:59:32.341654+01:00,22.06,21.004,6.24
+2024-01-20 23:59:32.359151+01:00,21.95,12.912,6.43
+2024-01-21 23:59:32.126221+01:00,17.38,3.28,7.45
+2024-01-22 23:59:32.126346+01:00,9.47,7.645,6.1
+2024-01-23 23:59:32.417727+01:00,11.87,7.689,4.76
+2024-01-24 23:59:32.420933+01:00,8.15,10.052,3.62
+2024-01-25 23:59:32.419138+01:00,12.38,3.785,3.98
+2024-01-26 23:59:32.422066+01:00,11.4,11.94,3.1
+2024-01-27 23:59:32.176538+01:00,17.96,19.741,3.45
+2024-01-28 23:59:32.168328+01:00,16.72,20.366,4.85
+2024-01-29 23:59:32.173916+01:00,13.11,16.972,4.51
+2024-01-30 23:59:32.503034+01:00,11.21,4.013,3.99
+2024-01-31 23:59:32.179265+01:00,12.79,4.766,3.73
+2024-02-01 23:59:32.487147+01:00,12.74,23.924,2.98
+2024-02-02 23:59:32.570084+01:00,13.0,2.98,5.04
+2024-02-03 23:59:32.484878+01:00,9.26,1.413,3.48
+2024-02-04 23:59:32.472168+01:00,8.35,4.306,3.47
+2024-02-05 23:59:32.409856+01:00,9.78,5.704,0.0
+2024-02-06 23:59:32.439147+01:00,9.15,2.431,6.56
+2024-02-07 23:59:32.235231+01:00,14.42,3.839,3.07
+2024-02-08 23:59:32.441543+01:00,13.9,1.412,5.94
+2024-02-09 23:59:32.443230+01:00,8.2,7.246,2.96
+2024-02-10 23:59:32.504326+01:00,8.37,8.567,3.48
+2024-02-11 23:59:32.452959+01:00,10.44,5.304,0.0
+2024-02-12 23:59:32.450999+01:00,12.65,16.004,3.42
+2024-02-13 23:59:32.343162+01:00,13.84,19.809,3.16
+2024-02-14 23:59:32.339408+01:00,8.48,1.98,4.52
+2024-02-15 23:59:32.339971+01:00,6.13,9.952,2.98
+2024-02-16 23:59:32.455273+01:00,7.66,3.675,3.06
+2024-02-17 23:59:32.097937+01:00,8.56,12.269,3.48
+2024-02-18 23:59:32.126377+01:00,9.59,2.205,3.04
+2024-02-19 23:59:32.421243+01:00,10.22,3.731,2.97
+2024-02-20 23:59:32.421985+01:00,11.61,13.775,0.0
+2024-02-21 23:59:32.371300+01:00,10.52,4.856,3.02
+2024-02-22 23:59:32.373153+01:00,9.53,4.256,3.48
+2024-02-23 23:59:32.372545+01:00,13.66,8.743,4.09
+2024-02-24 23:59:32.197044+01:00,14.44,7.842,4.3
+2024-02-25 23:59:32.196386+01:00,12.41,16.235,3.48
+2024-02-26 23:59:32.409648+01:00,14.63,2.096,5.05
+2024-02-27 23:59:32.373347+01:00,14.5,29.437,3.21
+2024-02-28 23:59:32.407538+01:00,15.38,6.475,4.88
+2024-02-29 23:59:32.194724+01:00,11.83,3.238,4.68
+2024-03-01 23:59:32.084520+01:00,10.56,14.352,3.8
+2024-03-02 23:59:32.066434+01:00,9.94,25.356,3.49
+2024-03-03 23:59:32.270878+01:00,8.9,10.577,3.19
+2024-03-04 23:59:32.274918+01:00,10.67,28.096,2.08
+2024-03-05 23:59:32.315023+01:00,12.19,10.553,2.95
+2024-03-06 23:59:32.441001+01:00,11.38,32.597,2.91
+2024-03-07 23:59:32.440044+01:00,12.39,28.856,2.96
+2024-03-08 23:59:32.228265+01:00,12.01,37.395,2.96
+2024-03-09 23:59:32.081874+01:00,8.72,17.66,3.5
+2024-03-10 23:59:32.335321+01:00,8.0,12.207,3.47
+2024-03-11 23:59:32.139531+01:00,10.39,2.526,2.96
+2024-03-12 23:59:32.136709+01:00,10.24,8.211,2.98
+2024-03-13 23:59:32.407174+01:00,7.19,6.425,2.95
+2024-03-14 23:59:32.342436+01:00,6.06,33.389,1.64
+2024-03-15 23:59:32.266278+01:00,5.63,12.628,2.96
+2024-03-16 23:59:32.155245+01:00,9.57,12.103,3.0
+2024-03-17 23:59:32.366155+01:00,8.43,14.302,0.25

From a7f301cfd6d5a51f5587c1de644f4379a9ca74ff Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:13:47 +0100
Subject: [PATCH 058/111] add /app to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 5dc21af8..581080c8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@ secrets_emhass.yaml
 *.html
 *.pkl
 data/actionLogs.txt
+**/app
 
 
 # Byte-compiled / optimized / DLL files

From d40da8622332712d691cf066822c3ca7990a6a3c Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:24:21 +0100
Subject: [PATCH 059/111] Add csv-prediction

---
 src/emhass/command_line.py  |  46 ++++++++++++
 src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++
 src/emhass/retrieve_hass.py |  10 +++
 src/emhass/utils.py         |  25 +++++++
 src/emhass/web_server.py    |   6 ++
 5 files changed, 226 insertions(+)
 create mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 538f4ae6..24ab6132 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -20,6 +20,7 @@
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
 from emhass.optimization import Optimization
+from emhass.csv_predictor import CsvPredictor
 from emhass import utils
 
 
@@ -154,6 +155,12 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
             if not rh.get_data(days_list, var_list):
                 return False
             df_input_data = rh.df_final.copy()
+    elif set_type == "csv-predict":
+        df_input_data, df_input_data_dayahead = None, None
+        P_PV_forecast, P_load_forecast = None, None
+        days_list = None
+        params = json.loads(params)
+       
     elif set_type == "publish-data":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
@@ -447,6 +454,45 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)       
     return df_pred_optim, mlf
 
+def csv_predict(input_data_dict: dict, logger: logging.Logger,
+    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
+    """Perform a forecast model fit from training data retrieved from Home Assistant.
+
+    :param input_data_dict: A dictionnary with multiple data used by the action functions
+    :type input_data_dict: dict
+    :param logger: The passed logger object
+    :type logger: logging.Logger
+    :param debug: True to debug, useful for unit testing, defaults to False
+    :type debug: Optional[bool], optional
+    :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object
+    :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]
+    """
+    data = copy.deepcopy(input_data_dict['df_input_data'])
+    model_type = input_data_dict['params']['passed_data']['model_type']
+    csv_file = input_data_dict['params']['passed_data']['csv_file']
+    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
+    perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
+    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
+    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
+    new_values = input_data_dict['params']['passed_data']['new_values']
+    root = input_data_dict['root']
+    # The ML forecaster object
+    csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    # Fit the ML model
+    prediction = csv.predict(perform_backtest=perform_backtest)
+
+    csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
+    csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
+    csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name']
+    # Publish Load forecast
+    idx = 0
+    input_data_dict['rh'].post_data(prediction, idx,
+                                    csv_predict_entity_id,
+                                    csv_predict_unit_of_measurement, 
+                                    csv_predict_friendly_name,
+                                    type_var = 'csv_predictor')
+    return prediction
+
 def publish_data(input_data_dict: dict, logger: logging.Logger,
     save_data_to_file: Optional[bool] = False, 
     opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame:
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
new file mode 100644
index 00000000..a1c5576b
--- /dev/null
+++ b/src/emhass/csv_predictor.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import logging
+import copy
+import pathlib
+import time
+from typing import Optional
+# from typing import Optional, Tuple
+import pandas as pd
+import numpy as np
+
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import ElasticNet
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsRegressor
+# from sklearn.metrics import r2_score
+
+# from skforecast.ForecasterAutoreg import ForecasterAutoreg
+# from skforecast.model_selection import bayesian_search_forecaster
+# from skforecast.model_selection import backtesting_forecaster
+
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+class CsvPredictor:
+    r"""
+    A forecaster class using machine learning models.
+    
+    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    
+    It exposes one main method:
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
+    
+    """
+
+    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+                  logger: logging.Logger) -> None:
+        r"""Define constructor for the forecast class.
+
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
+        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
+            Example: `sensor.power_load_no_var_loads`.
+        :type var_model: str
+        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
+            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
+        :type sklearn_model: str
+        :param num_lags: The number of auto-regression lags to consider. A good starting point \
+            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
+            to 48, if the time step is 1 hour the fix this to 24 and so on.
+        :type num_lags: int
+        :param root: The parent folder of the path where the config.yaml file is located
+        :type root: str
+        :param logger: The passed logger object
+        :type logger: logging.Logger
+        """
+        self.data = data
+        self.model_type = model_type
+        self.csv_file = csv_file
+        self.independent_variables = independent_variables
+        self.dependent_variable = dependent_variable
+        self.sklearn_model = sklearn_model
+        self.new_values = new_values
+        self.root = root
+        self.logger = logger
+        self.is_tuned = False
+
+    
+    def load_data(self):
+        filename_path = pathlib.Path(self.root) / self.csv_file
+        if filename_path.is_file():
+            with open(filename_path, 'rb') as inp:
+                data = pd.read_csv(filename_path)
+        else:
+            self.logger.error("The cvs file was not found.")
+            return
+
+        required_columns = self.independent_variables
+        
+        if not set(required_columns).issubset(data.columns):
+            raise ValueError(
+                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+            )
+        return data
+    
+    def prepare_data(self, data):
+        X = data[self.independent_variables].values
+        y = data[self.dependent_variable].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        
+        return X_train, y_train
+    
+    
+    def predict(self, perform_backtest: Optional[bool] = False
+            ) -> pd.Series:
+        r"""The fit method to train the ML model.
+
+        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
+            as the test period to evaluate the model, defaults to '48h'
+        :type split_date_delta: Optional[str], optional
+        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
+            the performance of the model on the complete train set, defaults to False
+        :type perform_backtest: Optional[bool], optional
+        :return: The DataFrame containing the forecast data results without and with backtest
+        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        """
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        # Preparing the data: adding exogenous features
+        data = self.load_data()
+        X, y = self.prepare_data(data)
+        
+        if self.sklearn_model == 'LinearRegression':
+            base_model = LinearRegression()
+        elif self.sklearn_model == 'ElasticNet':
+            base_model = ElasticNet()
+        elif self.sklearn_model == 'KNeighborsRegressor':
+            base_model = KNeighborsRegressor()
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+        # Define the forecaster object
+        self.forecaster = base_model
+        # Fit and time it
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.forecaster.fit(X, y)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        new_values = np.array([self.new_values])
+        prediction = self.forecaster.predict(new_values)
+        
+        return prediction
+    
+    
+    
+    
\ No newline at end of file
diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py
index f5fa82de..52397c87 100644
--- a/src/emhass/retrieve_hass.py
+++ b/src/emhass/retrieve_hass.py
@@ -307,6 +307,8 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str,
             state = np.round(data_df.loc[data_df.index[idx]],4)
         elif type_var == 'optim_status':
             state = data_df.loc[data_df.index[idx]]
+        elif type_var == 'csv_predictor':
+            state = data_df[idx]
         else:
             state = np.round(data_df.loc[data_df.index[idx]],2)
         if type_var == 'power':
@@ -338,6 +340,14 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str,
                     "friendly_name": friendly_name
                 }
             }
+        elif type_var == 'csv_predictor':
+            data = {
+                "state": state,
+                "attributes": {
+                    "unit_of_measurement": unit_of_measurement,
+                    "friendly_name": friendly_name
+                }
+            }
         else:
             data = {
                 "state": "{:.2f}".format(state),
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index a2b0df8e..44152dd4 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -155,6 +155,16 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         freq = int(retrieve_hass_conf['freq'].seconds/60.0)
         delta_forecast = int(optim_conf['delta_forecast'].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
+        if set_type == "csv-predict":
+            csv_file = runtimeparams['csv_file']
+            independent_variables = runtimeparams['independent_variables']
+            dependent_variable = runtimeparams['dependent_variable']
+            new_values = runtimeparams['new_values']
+            params['passed_data']['csv_file'] = csv_file
+            params['passed_data']['independent_variables'] = independent_variables
+            params['passed_data']['dependent_variable'] = dependent_variable
+            params['passed_data']['new_values'] = new_values
+
         # Treating special data passed for MPC control case
         if set_type == 'naive-mpc-optim':
             if 'prediction_horizon' not in runtimeparams.keys():
@@ -281,6 +291,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         else:
             model_predict_friendly_name = runtimeparams['model_predict_friendly_name']
         params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name
+        if 'csv_predict_entity_id' not in runtimeparams.keys():
+            csv_predict_entity_id = "sensor.csv_predictor"
+        else:
+            csv_predict_entity_id = runtimeparams['csv_predict_entity_id']
+        params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id
+        if 'csv_predict_unit_of_measurement' not in runtimeparams.keys():
+            csv_predict_unit_of_measurement = None
+        else:
+            csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement']
+        params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement
+        if 'csv_predict_friendly_name' not in runtimeparams.keys():
+            csv_predict_friendly_name = "Csv predictor"
+        else:
+            csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name']
+        params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name
         # Treat optimization configuration parameters passed at runtime 
         if 'num_def_loads' in runtimeparams.keys():
             optim_conf['num_def_loads'] = runtimeparams['num_def_loads']
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index 2fdd2861..6a4549b8 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -13,6 +13,7 @@
 from emhass.command_line import set_input_data_dict
 from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim
 from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune
+from emhass.command_line import csv_predict
 from emhass.command_line import publish_data
 from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \
     get_injection_dict_forecast_model_tune, build_params
@@ -193,6 +194,11 @@ def action_call(action_name):
         if not checkFileLog(ActionStr):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
+    elif action_name == 'csv-predict':
+        app.logger.info(" >> Performing a csv predict...")
+        csv_predict(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action csv-predict executed... \n'
+        return make_response(msg, 201)
     else:
         app.logger.error("ERROR: passed action is not valid")
         msg = f'EMHASS >> ERROR: Passed action is not valid... \n'

From 86b9fec42e9ca8a2219a2cc4cc78d7f4e06b4996 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 9 Jan 2024 21:11:13 +0100
Subject: [PATCH 060/111] cleanup

---
 src/emhass/command_line.py  | 12 ++++++----
 src/emhass/csv_predictor.py | 48 ++++++++++++++++++++++++++-----------
 2 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 24ab6132..0cec14fa 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -467,19 +467,21 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger,
     :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object
     :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]
     """
-    data = copy.deepcopy(input_data_dict['df_input_data'])
-    model_type = input_data_dict['params']['passed_data']['model_type']
+    # data = copy.deepcopy(input_data_dict['df_input_data'])
+    # model_type = input_data_dict['params']['passed_data']['model_type']
     csv_file = input_data_dict['params']['passed_data']['csv_file']
     sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
+    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     new_values = input_data_dict['params']['passed_data']['new_values']
     root = input_data_dict['root']
     # The ML forecaster object
-    csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
     # Fit the ML model
-    prediction = csv.predict(perform_backtest=perform_backtest)
+    prediction = csv.predict()
+    # prediction = csv.predict(perform_backtest=perform_backtest)
 
     csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
     csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index a1c5576b..9f012f8d 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -31,11 +31,13 @@ class CsvPredictor:
     
     It exposes one main method:
     
-    - `predict`: to obtain a forecast from a pre-trained model.
+    - `predict`: to obtain a forecast from a csv file.
     
     """
 
-    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    #               logger: logging.Logger) -> None:
+    def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
                   logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -44,23 +46,28 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe
         :param model_type: A unique name defining this model and useful to identify \
             for what it will be used for.
         :type model_type: str
-        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
-            Example: `sensor.power_load_no_var_loads`.
-        :type var_model: str
+        :param csv_file: The name of the csv file to retrieve data from. \
+            Example: `prediction.csv`.
+        :type csv_file: str
+        :param independent_variables: A list of independent variables. \
+            Example: [`solar`, `degree_days`].
+        :type independent_variables: list
+        :param dependent_variable: The dependent variable(to be predicted). \
+            Example: `hours`.
+        :type dependent_variable: str
         :param sklearn_model: The `scikit-learn` model that will be used. For now only \
             this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
         :type sklearn_model: str
-        :param num_lags: The number of auto-regression lags to consider. A good starting point \
-            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
-            to 48, if the time step is 1 hour the fix this to 24 and so on.
-        :type num_lags: int
+        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
+            Example: [2.24, 5.68].
+        :type new_values: list
         :param root: The parent folder of the path where the config.yaml file is located
         :type root: str
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        self.data = data
-        self.model_type = model_type
+        # self.data = data
+        # self.model_type = model_type
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
@@ -86,18 +93,30 @@ def load_data(self):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
+        print(type(data))
         return data
     
     def prepare_data(self, data):
+        """
+        Prepare the data.
+        
+        :param data: Input Data
+        :return: Input DataFrame with freq defined
+        :rtype: pd.DataFrame
+        
+        """
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        print(type(X_train))
+        print(type(y_train))
         
         return X_train, y_train
     
     
-    def predict(self, perform_backtest: Optional[bool] = False
-            ) -> pd.Series:
+    # def predict(self, perform_backtest: Optional[bool] = False
+    #         ) -> pd.Series:
+    def predict(self):
         r"""The fit method to train the ML model.
 
         :param split_date_delta: The delta from now to `split_date_delta` that will be used \
@@ -109,7 +128,7 @@ def predict(self, perform_backtest: Optional[bool] = False
         :return: The DataFrame containing the forecast data results without and with backtest
         :rtype: Tuple[pd.DataFrame, pd.DataFrame]
         """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.logger.info("Performing a prediction for "+self.csv_file)
         # Preparing the data: adding exogenous features
         data = self.load_data()
         X, y = self.prepare_data(data)
@@ -131,6 +150,7 @@ def predict(self, perform_backtest: Optional[bool] = False
         self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
         new_values = np.array([self.new_values])
         prediction = self.forecaster.predict(new_values)
+        print(type(prediction))
         
         return prediction
     

From 21e486cd45cc92f805697d689308cc939be16f85 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Thu, 18 Jan 2024 10:46:38 +0100
Subject: [PATCH 061/111] more cleanup

---
 src/emhass/command_line.py  | 17 +++----
 src/emhass/csv_predictor.py | 92 ++++++++++++++-----------------------
 2 files changed, 40 insertions(+), 69 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 0cec14fa..b82f96b5 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -455,8 +455,8 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
     return df_pred_optim, mlf
 
 def csv_predict(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
-    """Perform a forecast model fit from training data retrieved from Home Assistant.
+    debug: Optional[bool] = False) -> np.ndarray:
+    """Perform a prediction from csv file.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
@@ -464,29 +464,24 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger,
     :type logger: logging.Logger
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
-    :return: The DataFrame containing the forecast data results without and with backtest and the `CsvPredictor` object
-    :rtype: Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]
+    :return: The np.ndarray containing the predicted value.
+    :rtype: np.ndarray
     """
-    # data = copy.deepcopy(input_data_dict['df_input_data'])
-    # model_type = input_data_dict['params']['passed_data']['model_type']
     csv_file = input_data_dict['params']['passed_data']['csv_file']
     sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     new_values = input_data_dict['params']['passed_data']['new_values']
     root = input_data_dict['root']
     # The ML forecaster object
-    # csv = CsvPredictor(data, model_type, csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
     csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
-    # Fit the ML model
+    # Predict from csv file
     prediction = csv.predict()
-    # prediction = csv.predict(perform_backtest=perform_backtest)
 
     csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
     csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
     csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name']
-    # Publish Load forecast
+    # Publish prediction
     idx = 0
     input_data_dict['rh'].post_data(prediction, idx,
                                     csv_predict_entity_id,
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 9f012f8d..9550c157 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -2,11 +2,9 @@
 # -*- coding: utf-8 -*-
 
 import logging
-import copy
 import pathlib
 import time
-from typing import Optional
-# from typing import Optional, Tuple
+from typing import Tuple
 import pandas as pd
 import numpy as np
 
@@ -14,11 +12,6 @@
 from sklearn.linear_model import ElasticNet
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsRegressor
-# from sklearn.metrics import r2_score
-
-# from skforecast.ForecasterAutoreg import ForecasterAutoreg
-# from skforecast.model_selection import bayesian_search_forecaster
-# from skforecast.model_selection import backtesting_forecaster
 
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning) 
@@ -34,18 +27,10 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a csv file.
     
     """
-
-    # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-    #               logger: logging.Logger) -> None:
     def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
                   logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
-        :param data: The data that will be used for train/test
-        :type data: pd.DataFrame
-        :param model_type: A unique name defining this model and useful to identify \
-            for what it will be used for.
-        :type model_type: str
         :param csv_file: The name of the csv file to retrieve data from. \
             Example: `prediction.csv`.
         :type csv_file: str
@@ -66,8 +51,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        # self.data = data
-        # self.model_type = model_type
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
@@ -78,14 +61,17 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         self.is_tuned = False
 
     
-    def load_data(self):
+    def load_data(self) -> pd.DataFrame:
+        """Load the data."""
         filename_path = pathlib.Path(self.root) / self.csv_file
         if filename_path.is_file():
             with open(filename_path, 'rb') as inp:
                 data = pd.read_csv(filename_path)
         else:
             self.logger.error("The cvs file was not found.")
-            return
+            raise ValueError(
+                f"The CSV file "+ self.csv_file +" was not found."
+            )
 
         required_columns = self.independent_variables
         
@@ -93,66 +79,56 @@ def load_data(self):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
-        print(type(data))
         return data
     
-    def prepare_data(self, data):
+    def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         """
         Prepare the data.
         
         :param data: Input Data
-        :return: Input DataFrame with freq defined
-        :rtype: pd.DataFrame
+        :type data: pd.DataFrame
+        :return: A tuple containing the train data.
+        :rtype: Tuple[np.ndarray, np.ndarray]
         
         """
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        print(type(X_train))
-        print(type(y_train))
         
         return X_train, y_train
     
     
-    # def predict(self, perform_backtest: Optional[bool] = False
-    #         ) -> pd.Series:
-    def predict(self):
-        r"""The fit method to train the ML model.
+    def predict(self) -> np.ndarray:
+        r"""The predict method to generate a forecast from a csv file.
 
-        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
-            as the test period to evaluate the model, defaults to '48h'
-        :type split_date_delta: Optional[str], optional
-        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
-            the performance of the model on the complete train set, defaults to False
-        :type perform_backtest: Optional[bool], optional
-        :return: The DataFrame containing the forecast data results without and with backtest
-        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        :return: The np.ndarray containing the predicted value.
+        :rtype: np.ndarray
         """
         self.logger.info("Performing a prediction for "+self.csv_file)
         # Preparing the data: adding exogenous features
         data = self.load_data()
-        X, y = self.prepare_data(data)
+        if data is not None:
+            X, y = self.prepare_data(data)
         
-        if self.sklearn_model == 'LinearRegression':
-            base_model = LinearRegression()
-        elif self.sklearn_model == 'ElasticNet':
-            base_model = ElasticNet()
-        elif self.sklearn_model == 'KNeighborsRegressor':
-            base_model = KNeighborsRegressor()
-        else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-        # Define the forecaster object
-        self.forecaster = base_model
-        # Fit and time it
-        self.logger.info("Training a "+self.sklearn_model+" model")
-        start_time = time.time()
-        self.forecaster.fit(X, y)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-        new_values = np.array([self.new_values])
-        prediction = self.forecaster.predict(new_values)
-        print(type(prediction))
+            if self.sklearn_model == 'LinearRegression':
+                base_model = LinearRegression()
+            elif self.sklearn_model == 'ElasticNet':
+                base_model = ElasticNet()
+            elif self.sklearn_model == 'KNeighborsRegressor':
+                base_model = KNeighborsRegressor()
+            else:
+                self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+            # Define the forecaster object
+            self.forecaster = base_model
+            # Fit and time it
+            self.logger.info("Predict through a "+self.sklearn_model+" model")
+            start_time = time.time()
+            self.forecaster.fit(X, y)
+            self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+            new_values = np.array([self.new_values])
+            prediction = self.forecaster.predict(new_values)
         
-        return prediction
+            return prediction
     
     
     

From 483898f815b1b9f8449fa64009363b4afadc7089 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 19 Jan 2024 11:34:33 +0100
Subject: [PATCH 062/111] filename_path -> inp

---
 src/emhass/csv_predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 9550c157..499903d0 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -66,7 +66,7 @@ def load_data(self) -> pd.DataFrame:
         filename_path = pathlib.Path(self.root) / self.csv_file
         if filename_path.is_file():
             with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(filename_path)
+                data = pd.read_csv(inp)
         else:
             self.logger.error("The cvs file was not found.")
             raise ValueError(

From 3559c2842feed5291b05bde3fe6d4d96e758a45e Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 29 Jan 2024 11:24:45 +0100
Subject: [PATCH 063/111] resolve some comments

---
 src/emhass/csv_predictor.py | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 499903d0..1f478c01 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -5,6 +5,8 @@
 import pathlib
 import time
 from typing import Tuple
+import warnings
+
 import pandas as pd
 import numpy as np
 
@@ -13,14 +15,14 @@
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsRegressor
 
-import warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 class CsvPredictor:
     r"""
     A forecaster class using machine learning models.
     
-    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
     
     It exposes one main method:
     
@@ -28,11 +30,11 @@ class CsvPredictor:
     
     """
     def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-                  logger: logging.Logger) -> None:
+                logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
         :param csv_file: The name of the csv file to retrieve data from. \
-            Example: `prediction.csv`.
+            Example: `input_train_data.csv`.
         :type csv_file: str
         :param independent_variables: A list of independent variables. \
             Example: [`solar`, `degree_days`].
@@ -60,7 +62,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         self.logger = logger
         self.is_tuned = False
 
-    
     def load_data(self) -> pd.DataFrame:
         """Load the data."""
         filename_path = pathlib.Path(self.root) / self.csv_file
@@ -69,18 +70,16 @@ def load_data(self) -> pd.DataFrame:
                 data = pd.read_csv(inp)
         else:
             self.logger.error("The cvs file was not found.")
-            raise ValueError(
-                f"The CSV file "+ self.csv_file +" was not found."
-            )
+            raise ValueError("The CSV file " + self.csv_file + " was not found.")
 
         required_columns = self.independent_variables
-        
+
         if not set(required_columns).issubset(data.columns):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
         return data
-    
+
     def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         """
         Prepare the data.
@@ -94,10 +93,10 @@ def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        
+
         return X_train, y_train
-    
-    
+
+
     def predict(self) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
@@ -109,7 +108,7 @@ def predict(self) -> np.ndarray:
         data = self.load_data()
         if data is not None:
             X, y = self.prepare_data(data)
-        
+
             if self.sklearn_model == 'LinearRegression':
                 base_model = LinearRegression()
             elif self.sklearn_model == 'ElasticNet':
@@ -127,9 +126,5 @@ def predict(self) -> np.ndarray:
             self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
             new_values = np.array([self.new_values])
             prediction = self.forecaster.predict(new_values)
-        
+
             return prediction
-    
-    
-    
-    
\ No newline at end of file

From c928f2b7338474a7595d32116b799d1db776a8f5 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:45:02 +0100
Subject: [PATCH 064/111] Use gridsearchcv and split up fit and predict

---
 src/emhass/command_line.py  |  87 +++++++++++++++---
 src/emhass/csv_predictor.py | 173 +++++++++++++++++++++++-------------
 src/emhass/utils.py         |  16 +++-
 src/emhass/web_server.py    |  11 ++-
 4 files changed, 210 insertions(+), 77 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index b82f96b5..ae690ae9 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -155,7 +155,36 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
             if not rh.get_data(days_list, var_list):
                 return False
             df_input_data = rh.df_final.copy()
-    elif set_type == "csv-predict":
+ 
+    elif set_type == "csv-model-fit":
+        
+        df_input_data_dayahead = None
+        P_PV_forecast, P_load_forecast = None, None
+        params = json.loads(params)
+        days_list = None
+        csv_file = params['passed_data']['csv_file']
+        independent_variables = params['passed_data']['independent_variables']
+        dependent_variable = params['passed_data']['dependent_variable']
+        timestamp = params['passed_data']['timestamp']
+        filename_path = pathlib.Path(base_path) / csv_file
+        if filename_path.is_file():
+            df_input_data = pd.read_csv(filename_path, parse_dates=True)
+
+        else:
+            logger.error("The cvs file was not found.")
+            raise ValueError("The CSV file " + csv_file + " was not found.")
+        required_columns = []
+        required_columns.extend(independent_variables)
+        required_columns.append(dependent_variable)
+        if timestamp is not None:
+            required_columns.append(timestamp)
+
+        if not set(required_columns).issubset(df_input_data.columns):
+            logger.error("The cvs file does not contain the required columns.")
+            raise ValueError(
+                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+            )
+    elif set_type == "csv-model-predict":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
@@ -454,7 +483,41 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)       
     return df_pred_optim, mlf
 
-def csv_predict(input_data_dict: dict, logger: logging.Logger,
+def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
+    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
+    """Perform a forecast model fit from training data retrieved from Home Assistant.
+
+    :param input_data_dict: A dictionnary with multiple data used by the action functions
+    :type input_data_dict: dict
+    :param logger: The passed logger object
+    :type logger: logging.Logger
+    :param debug: True to debug, useful for unit testing, defaults to False
+    :type debug: Optional[bool], optional
+    :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object
+    :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster]
+    """
+    data = copy.deepcopy(input_data_dict['df_input_data'])
+    # csv_file = input_data_dict['params']['passed_data']['csv_file']
+    model_type = input_data_dict['params']['passed_data']['model_type']
+    # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
+    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
+    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
+    timestamp = input_data_dict['params']['passed_data']['timestamp']
+    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
+    date_features = input_data_dict['params']['passed_data']['date_features']
+    root = input_data_dict['root']
+    # The ML forecaster object
+    csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger)
+    # Fit the ML model
+    df_pred = csv.fit(date_features=date_features)
+    # Save model
+    if not debug:
+        filename = model_type+'_csv.pkl'
+        with open(pathlib.Path(root) / filename, 'wb') as outp:
+            pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL)
+    # return df_pred, csv
+
+def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
     debug: Optional[bool] = False) -> np.ndarray:
     """Perform a prediction from csv file.
 
@@ -467,16 +530,20 @@ def csv_predict(input_data_dict: dict, logger: logging.Logger,
     :return: The np.ndarray containing the predicted value.
     :rtype: np.ndarray
     """
-    csv_file = input_data_dict['params']['passed_data']['csv_file']
-    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
-    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
-    new_values = input_data_dict['params']['passed_data']['new_values']
+    model_type = input_data_dict['params']['passed_data']['model_type']
     root = input_data_dict['root']
-    # The ML forecaster object
-    csv = CsvPredictor(csv_file, independent_variables, dependent_variable, sklearn_model, new_values, root, logger)
+    filename = model_type+'_csv.pkl'
+    filename_path = pathlib.Path(root) / filename
+    if not debug:
+        if filename_path.is_file():
+            with open(filename_path, 'rb') as inp:
+                csv = pickle.load(inp)
+        else:
+            logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
+            return
+    new_values = input_data_dict['params']['passed_data']['new_values']
     # Predict from csv file
-    prediction = csv.predict()
+    prediction = csv.predict(new_values)
 
     csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
     csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1f478c01..636d5835 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -1,19 +1,22 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+import copy
+from datetime import datetime
 import logging
 import pathlib
 import time
-from typing import Tuple
+from typing import Optional, Tuple
 import warnings
 
 import pandas as pd
 import numpy as np
+from sklearn.metrics import classification_report, r2_score
 
 from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import ElasticNet
-from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsRegressor
+from sklearn.model_selection import GridSearchCV, train_test_split
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
 
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
@@ -29,7 +32,7 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a csv file.
     
     """
-    def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -53,78 +56,124 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        self.csv_file = csv_file
+        self.data = data
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
-        self.sklearn_model = sklearn_model
-        self.new_values = new_values
-        self.root = root
+        self.timestamp = timestamp
+        self.model_type = model_type
         self.logger = logger
         self.is_tuned = False
+        self.data.sort_index(inplace=True)
+        self.data = self.data[~self.data.index.duplicated(keep='first')]
+    
+    @staticmethod
+    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+        """Add date features from the input DataFrame timestamp
 
-    def load_data(self) -> pd.DataFrame:
-        """Load the data."""
-        filename_path = pathlib.Path(self.root) / self.csv_file
-        if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(inp)
-        else:
-            self.logger.error("The cvs file was not found.")
-            raise ValueError("The CSV file " + self.csv_file + " was not found.")
-
-        required_columns = self.independent_variables
-
-        if not set(required_columns).issubset(data.columns):
-            raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}"
-            )
-        return data
-
-    def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
+        :param data: The input DataFrame
+        :type data: pd.DataFrame
+        :return: The DataFrame with the added features
+        :rtype: pd.DataFrame
+        """
+        df = copy.deepcopy(data)
+        df['timestamp']= pd.to_datetime(df['timestamp'])
+        if 'year' in date_features:
+            df['year'] = [i.month for i in df['timestamp']]
+        if 'month' in date_features:
+            df['month'] = [i.month for i in df['timestamp']]
+        if 'day_of_week' in date_features:
+            df['day_of_week'] = [i.dayofweek for i in df['timestamp']]
+        if 'day_of_year' in date_features:
+            df['day_of_year'] = [i.dayofyear for i in df['timestamp']]
+        if 'day' in date_features:
+            df['day'] = [i.day for i in df['timestamp']]
+        if 'hour' in date_features:
+            df['hour'] = [i.day for i in df['timestamp']]
+
+        return df
+
+    def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]:
         """
-        Prepare the data.
+        Fit the model using the provided data.
         
         :param data: Input Data
         :type data: pd.DataFrame
-        :return: A tuple containing the train data.
-        :rtype: Tuple[np.ndarray, np.ndarray]
-        
         """
-        X = data[self.independent_variables].values
-        y = data[self.dependent_variable].values
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.data_exo = pd.DataFrame(self.data)
+        self.data_exo[self.independent_variables] = self.data[self.independent_variables]
+        self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
+        keep_columns = []
+        keep_columns.extend(self.independent_variables)
+        if self.timestamp is not None:
+            keep_columns.append(self.timestamp)
+        keep_columns.append(self.dependent_variable)
+        self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
+        self.data_exo.reset_index(drop=True, inplace=True)
+        # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp')
+        if len(date_features) > 0:
+            if self.timestamp is not None:
+                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
+            else:
+                self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
 
-        return X_train, y_train
+        y = self.data_exo[self.dependent_variable]
+        self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1)
+        if self.timestamp is not None:
+            self.data_exo = self.data_exo.drop(self.timestamp,axis=1)
+        X = self.data_exo
 
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        self.steps = len(X_test)
+
+        # Define the model
+        self.model = Pipeline([
+            ('scaler', StandardScaler()),
+            ('regressor', LinearRegression())
+        ])
+        # Define the parameters to tune
+        param_grid = {
+            'regressor__fit_intercept': [True, False],
+            'regressor__positive': [True, False],
+        }
+
+        # Create a grid search object
+        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        # Fit the grid search object to the data
+        self.logger.info("Fitting the model...")
+        start_time = time.time()
+        self.grid_search.fit(X_train.values, y_train.values)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+
+        self.model = self.grid_search.best_estimator_
+
+
+        # Make predictions
+        predictions = self.model.predict(X_test.values)
+        predictions = pd.Series(predictions, index=X_test.index)
+        pred_metric = r2_score(y_test,predictions)
+        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+
+        # Prepare forecast DataFrame
+        df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred'])
+        df_pred['train'] = y_train
+        df_pred['test'] = y_test
+        df_pred['pred'] = predictions
+        print(df_pred)
+        # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp')
+
+
+
+        # return df_pred
+        
 
-    def predict(self) -> np.ndarray:
+    def predict(self, new_values:list) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """
-        self.logger.info("Performing a prediction for "+self.csv_file)
-        # Preparing the data: adding exogenous features
-        data = self.load_data()
-        if data is not None:
-            X, y = self.prepare_data(data)
-
-            if self.sklearn_model == 'LinearRegression':
-                base_model = LinearRegression()
-            elif self.sklearn_model == 'ElasticNet':
-                base_model = ElasticNet()
-            elif self.sklearn_model == 'KNeighborsRegressor':
-                base_model = KNeighborsRegressor()
-            else:
-                self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-            # Define the forecaster object
-            self.forecaster = base_model
-            # Fit and time it
-            self.logger.info("Predict through a "+self.sklearn_model+" model")
-            start_time = time.time()
-            self.forecaster.fit(X, y)
-            self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-            new_values = np.array([self.new_values])
-            prediction = self.forecaster.predict(new_values)
-
-            return prediction
+        self.logger.info("Performing a prediction for "+self.model_type)
+        new_values = np.array([new_values])
+
+        return self.model.predict(new_values)
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 44152dd4..4931d8c2 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -155,14 +155,26 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         freq = int(retrieve_hass_conf['freq'].seconds/60.0)
         delta_forecast = int(optim_conf['delta_forecast'].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
-        if set_type == "csv-predict":
+        if set_type == "csv-model-fit":
             csv_file = runtimeparams['csv_file']
             independent_variables = runtimeparams['independent_variables']
             dependent_variable = runtimeparams['dependent_variable']
-            new_values = runtimeparams['new_values']
             params['passed_data']['csv_file'] = csv_file
             params['passed_data']['independent_variables'] = independent_variables
             params['passed_data']['dependent_variable'] = dependent_variable
+            if 'timestamp' not in runtimeparams.keys():
+                params['passed_data']['timestamp'] = None
+            else:
+                timestamp = runtimeparams['timestamp']
+                params['passed_data']['timestamp'] = timestamp
+            if 'date_features' not in runtimeparams.keys():
+                params['passed_data']['date_features'] = []
+            else:
+                date_features = runtimeparams['date_features']
+                params['passed_data']['date_features'] = date_features
+            
+        if set_type == "csv-model-predict":
+            new_values = runtimeparams['new_values']
             params['passed_data']['new_values'] = new_values
 
         # Treating special data passed for MPC control case
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index 6a4549b8..ad71bb1e 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -13,7 +13,7 @@
 from emhass.command_line import set_input_data_dict
 from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim
 from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune
-from emhass.command_line import csv_predict
+from emhass.command_line import csv_model_fit, csv_model_predict
 from emhass.command_line import publish_data
 from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \
     get_injection_dict_forecast_model_tune, build_params
@@ -194,9 +194,14 @@ def action_call(action_name):
         if not checkFileLog(ActionStr):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
-    elif action_name == 'csv-predict':
+    elif action_name == 'csv-model-fit':
+        app.logger.info(" >> Performing a csv fit...")
+        csv_model_fit(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action csv-fit executed... \n'
+        return make_response(msg, 201)
+    elif action_name == 'csv-model-predict':
         app.logger.info(" >> Performing a csv predict...")
-        csv_predict(input_data_dict, app.logger)
+        csv_model_predict(input_data_dict, app.logger)
         msg = f'EMHASS >> Action csv-predict executed... \n'
         return make_response(msg, 201)
     else:

From e744c5e9af9b97cb534824227c3fbfc1457c68ed Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:56:19 +0100
Subject: [PATCH 065/111] remove backtest

---
 src/emhass/csv_predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 636d5835..1b2396b5 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -92,7 +92,7 @@ def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
 
         return df
 
-    def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    def fit(self, date_features: Optional[list] = []) -> None:
         """
         Fit the model using the provided data.
         

From cf5657f85107865978596853a2c3a7578efe943b Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 12:11:41 +0100
Subject: [PATCH 066/111] cleanup

---
 src/emhass/csv_predictor.py | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1b2396b5..1e46927d 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -2,16 +2,14 @@
 # -*- coding: utf-8 -*-
 
 import copy
-from datetime import datetime
 import logging
-import pathlib
 import time
-from typing import Optional, Tuple
+from typing import Optional
 import warnings
 
 import pandas as pd
 import numpy as np
-from sklearn.metrics import classification_report, r2_score
+from sklearn.metrics import  r2_score
 
 from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import GridSearchCV, train_test_split
@@ -110,7 +108,6 @@ def fit(self, date_features: Optional[list] = []) -> None:
         keep_columns.append(self.dependent_variable)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
         self.data_exo.reset_index(drop=True, inplace=True)
-        # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp')
         if len(date_features) > 0:
             if self.timestamp is not None:
                 self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
@@ -153,18 +150,6 @@ def fit(self, date_features: Optional[list] = []) -> None:
         predictions = pd.Series(predictions, index=X_test.index)
         pred_metric = r2_score(y_test,predictions)
         self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
-
-        # Prepare forecast DataFrame
-        df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred'])
-        df_pred['train'] = y_train
-        df_pred['test'] = y_test
-        df_pred['pred'] = predictions
-        print(df_pred)
-        # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp')
-
-
-
-        # return df_pred
         
 
     def predict(self, new_values:list) -> np.ndarray:

From b2d1eb2177391cc56ef0e2c56e72bfb1a4c3d79b Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 13:41:06 +0100
Subject: [PATCH 067/111] cleanup + docstrings

---
 src/emhass/command_line.py  | 17 ++++----------
 src/emhass/csv_predictor.py | 45 ++++++++++++++++++++-----------------
 2 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index ae690ae9..55d8f74e 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -484,7 +484,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
     return df_pred_optim, mlf
 
 def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, CsvPredictor]:
+    debug: Optional[bool] = False) -> None:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -493,32 +493,26 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
     :type logger: logging.Logger
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
-    :return: The DataFrame containing the forecast data results without and with backtest and the `mlforecaster` object
-    :rtype: Tuple[pd.DataFrame, pd.DataFrame, mlforecaster]
     """
     data = copy.deepcopy(input_data_dict['df_input_data'])
-    # csv_file = input_data_dict['params']['passed_data']['csv_file']
     model_type = input_data_dict['params']['passed_data']['model_type']
-    # sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     timestamp = input_data_dict['params']['passed_data']['timestamp']
-    # perform_backtest = input_data_dict['params']['passed_data']['perform_backtest']
     date_features = input_data_dict['params']['passed_data']['date_features']
     root = input_data_dict['root']
-    # The ML forecaster object
+    # The CSV forecaster object
     csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger)
     # Fit the ML model
-    df_pred = csv.fit(date_features=date_features)
+    csv.fit(date_features=date_features)
     # Save model
     if not debug:
         filename = model_type+'_csv.pkl'
         with open(pathlib.Path(root) / filename, 'wb') as outp:
             pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL)
-    # return df_pred, csv
 
 def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> np.ndarray:
+    debug: Optional[bool] = False) -> None:
     """Perform a prediction from csv file.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -527,8 +521,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
     :type logger: logging.Logger
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
-    :return: The np.ndarray containing the predicted value.
-    :rtype: np.ndarray
     """
     model_type = input_data_dict['params']['passed_data']['model_type']
     root = input_data_dict['root']
@@ -555,7 +547,6 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
                                     csv_predict_unit_of_measurement, 
                                     csv_predict_friendly_name,
                                     type_var = 'csv_predictor')
-    return prediction
 
 def publish_data(input_data_dict: dict, logger: logging.Logger,
     save_data_to_file: Optional[bool] = False, 
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1e46927d..57d61791 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -25,32 +25,30 @@ class CsvPredictor:
     
     This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
     
-    It exposes one main method:
+    It exposes two main methods:
     
-    - `predict`: to obtain a forecast from a csv file.
+    - `fit`: to train a model with the passed data.
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
     
     """
     def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
-        :param csv_file: The name of the csv file to retrieve data from. \
-            Example: `input_train_data.csv`.
-        :type csv_file: str
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
         :param independent_variables: A list of independent variables. \
             Example: [`solar`, `degree_days`].
         :type independent_variables: list
         :param dependent_variable: The dependent variable(to be predicted). \
             Example: `hours`.
         :type dependent_variable: str
-        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
-            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
-        :type sklearn_model: str
-        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
-            Example: [2.24, 5.68].
-        :type new_values: list
-        :param root: The parent folder of the path where the config.yaml file is located
-        :type root: str
+        :param timestamp: If defined, the column key that has to be used of timestamp.
+        :type timestamp: str
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
@@ -60,23 +58,24 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent
         self.timestamp = timestamp
         self.model_type = model_type
         self.logger = logger
-        self.is_tuned = False
         self.data.sort_index(inplace=True)
         self.data = self.data[~self.data.index.duplicated(keep='first')]
     
     @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+    def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
         """Add date features from the input DataFrame timestamp
 
         :param data: The input DataFrame
         :type data: pd.DataFrame
+        :param timestamp: The column containing the timestamp
+        :type timestamp: str
         :return: The DataFrame with the added features
         :rtype: pd.DataFrame
         """
         df = copy.deepcopy(data)
-        df['timestamp']= pd.to_datetime(df['timestamp'])
+        df[timestamp]= pd.to_datetime(df['timestamp'])
         if 'year' in date_features:
-            df['year'] = [i.month for i in df['timestamp']]
+            df['year'] = [i.year for i in df['timestamp']]
         if 'month' in date_features:
             df['month'] = [i.month for i in df['timestamp']]
         if 'day_of_week' in date_features:
@@ -94,10 +93,10 @@ def fit(self, date_features: Optional[list] = []) -> None:
         """
         Fit the model using the provided data.
         
-        :param data: Input Data
-        :type data: pd.DataFrame
+        :param date_features: A list of 'date_features' to take into account when fitting the model.
+        :type data: list
         """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.logger.info("Performing a csv model fit for "+self.model_type)
         self.data_exo = pd.DataFrame(self.data)
         self.data_exo[self.independent_variables] = self.data[self.independent_variables]
         self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
@@ -110,7 +109,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         self.data_exo.reset_index(drop=True, inplace=True)
         if len(date_features) > 0:
             if self.timestamp is not None:
-                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
+                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp)
             else:
                 self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
 
@@ -155,6 +154,10 @@ def fit(self, date_features: Optional[list] = []) -> None:
     def predict(self, new_values:list) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
+
+        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
+            Example: [2.24, 5.68].
+        :type new_values: list
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """

From 714f66bbd09f26d7f7ae7277e712c65ead0c51df Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 11 Mar 2024 09:59:27 +0100
Subject: [PATCH 068/111] add other regression methods

---
 src/emhass/csv_predictor.py | 87 +++++++++++++++++++++++++------------
 1 file changed, 59 insertions(+), 28 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 57d61791..2b6fb86a 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -9,9 +9,10 @@
 
 import pandas as pd
 import numpy as np
+from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor
 from sklearn.metrics import  r2_score
 
-from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import Lasso, LinearRegression, Ridge
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
@@ -122,33 +123,63 @@ def fit(self, date_features: Optional[list] = []) -> None:
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         self.steps = len(X_test)
 
-        # Define the model
-        self.model = Pipeline([
-            ('scaler', StandardScaler()),
-            ('regressor', LinearRegression())
-        ])
-        # Define the parameters to tune
-        param_grid = {
-            'regressor__fit_intercept': [True, False],
-            'regressor__positive': [True, False],
-        }
-
-        # Create a grid search object
-        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        # Fit the grid search object to the data
-        self.logger.info("Fitting the model...")
-        start_time = time.time()
-        self.grid_search.fit(X_train.values, y_train.values)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-
-        self.model = self.grid_search.best_estimator_
-
-
-        # Make predictions
-        predictions = self.model.predict(X_test.values)
-        predictions = pd.Series(predictions, index=X_test.index)
-        pred_metric = r2_score(y_test,predictions)
-        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+        regression_methods = [
+            ('Linear Regression', LinearRegression(), {}),
+            ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
+            ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
+            ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
+            ('Gradient Boosting Regression', GradientBoostingRegressor(), {
+                'gradientboostingregressor__n_estimators': [50, 100, 200],
+                'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
+            }),
+            ('AdaBoost Regression', AdaBoostRegressor(), {
+                'adaboostregressor__n_estimators': [50, 100, 200],
+                'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
+            })
+        ]
+
+        # Define the models
+        for name, model, param_grid in regression_methods:
+            pipeline = Pipeline([
+                ('scaler', StandardScaler()),
+                (name, model)
+            ])
+            
+            # Use GridSearchCV to find the best hyperparameters for each model
+            grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5)
+            grid_search.fit(X_train, y_train)
+
+            # Get the best model and print its mean squared error on the test set
+            best_model = grid_search.best_estimator_
+            print(best_model)
+            predictions = best_model.predict(X_test)
+            print(predictions)
+        # self.model = Pipeline([
+        #     ('scaler', StandardScaler()),
+        #     ('regressor', LinearRegression())
+        # ])
+        # # Define the parameters to tune
+        # param_grid = {
+        #     'regressor__fit_intercept': [True, False],
+        #     'regressor__positive': [True, False],
+        # }
+
+        # # Create a grid search object
+        # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        # # Fit the grid search object to the data
+        # self.logger.info("Fitting the model...")
+        # start_time = time.time()
+        # self.grid_search.fit(X_train.values, y_train.values)
+        # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+
+        # self.model = self.grid_search.best_estimator_
+
+
+        # # Make predictions
+        # predictions = self.model.predict(X_test.values)
+        # predictions = pd.Series(predictions, index=X_test.index)
+        # pred_metric = r2_score(y_test,predictions)
+        # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
         
 
     def predict(self, new_values:list) -> np.ndarray:

From 667611c4d779afe0f30d5b4bcd1a1821300d1c07 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:10:15 +0100
Subject: [PATCH 069/111] add --editable

---
 .vscode/tasks.json | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.vscode/tasks.json b/.vscode/tasks.json
index ffe440eb..ee23d121 100644
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -9,7 +9,11 @@
                 "isDefault": true
             },
             "args": [
-                "install", "--no-deps", "--force-reinstall", "."
+                "install",
+                "--no-deps",
+                "--force-reinstall",
+                "--editable",
+                "."
             ],
             "presentation": {
                 "echo": true,

From a63546cba66cf7b2a546715d763881a2f25d347f Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:16:17 +0100
Subject: [PATCH 070/111] Add sklearn model

---
 src/emhass/command_line.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 55d8f74e..f08f108c 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -496,13 +496,14 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
     """
     data = copy.deepcopy(input_data_dict['df_input_data'])
     model_type = input_data_dict['params']['passed_data']['model_type']
+    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
     independent_variables = input_data_dict['params']['passed_data']['independent_variables']
     dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
     timestamp = input_data_dict['params']['passed_data']['timestamp']
     date_features = input_data_dict['params']['passed_data']['date_features']
     root = input_data_dict['root']
     # The CSV forecaster object
-    csv = CsvPredictor(data, model_type, independent_variables, dependent_variable, timestamp, logger)
+    csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
     # Fit the ML model
     csv.fit(date_features=date_features)
     # Save model

From 6f720eb92effbf3a9c630a3edfa030c431a957fe Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:16:44 +0100
Subject: [PATCH 071/111] multiple regression methods

---
 src/emhass/csv_predictor.py | 141 +++++++++++++++++++++++++-----------
 1 file changed, 100 insertions(+), 41 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 2b6fb86a..3ffeba27 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -14,7 +14,7 @@
 
 from sklearn.linear_model import Lasso, LinearRegression, Ridge
 from sklearn.model_selection import GridSearchCV, train_test_split
-from sklearn.pipeline import Pipeline
+from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 
 
@@ -33,7 +33,7 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a pre-trained model.
     
     """
-    def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
+    def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -58,9 +58,14 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent
         self.dependent_variable = dependent_variable
         self.timestamp = timestamp
         self.model_type = model_type
+        self.sklearn_model = sklearn_model
         self.logger = logger
         self.data.sort_index(inplace=True)
         self.data = self.data[~self.data.index.duplicated(keep='first')]
+        self.data_exo = None
+        self.steps = None
+        self.model = None
+        self.grid_search =None
     
     @staticmethod
     def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
@@ -123,63 +128,117 @@ def fit(self, date_features: Optional[list] = []) -> None:
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         self.steps = len(X_test)
 
-        regression_methods = [
-            ('Linear Regression', LinearRegression(), {}),
-            ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
-            ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
-            ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
-            ('Gradient Boosting Regression', GradientBoostingRegressor(), {
+        regression_methods = {
+            'LinearRegression': {"model": LinearRegression(), "param_grid": {
+                'linearregression__fit_intercept': [True, False],
+                'linearregression__positive': [True, False],
+            }},
+            'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}},
+            'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}},
+            'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}},
+            'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": {
                 'gradientboostingregressor__n_estimators': [50, 100, 200],
                 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-            }),
-            ('AdaBoost Regression', AdaBoostRegressor(), {
+            }},
+            'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": {
                 'adaboostregressor__n_estimators': [50, 100, 200],
                 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-            })
-        ]
+            }}
+        }
+        # regression_methods = [
+        #     ('LinearRegression', LinearRegression(), {
+        #         'linearregression__fit_intercept': [True, False],
+        #         'linearregression__positive': [True, False],
+        #     }),
+        #     ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
+        #     ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
+        #     ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
+        #     ('GradientBoostingRegression', GradientBoostingRegressor(), {
+        #         'gradientboostingregressor__n_estimators': [50, 100, 200],
+        #         'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
+        #     }),
+        #     ('AdaBoostRegression', AdaBoostRegressor(), {
+        #         'adaboostregressor__n_estimators': [50, 100, 200],
+        #         'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
+        #     })
+        # ]
+
+        if self.sklearn_model == 'LinearRegression':
+            base_model = regression_methods['LinearRegression']['model']
+            param_grid = regression_methods['LinearRegression']['param_grid']
+        elif self.sklearn_model == 'RidgeRegression':
+            base_model = regression_methods['RidgeRegression']['model']
+            param_grid = regression_methods['RidgeRegression']['param_grid']
+        elif self.sklearn_model == 'LassoRegression':
+            base_model = regression_methods['LassoRegression']['model']
+            param_grid = regression_methods['LassoRegression']['param_grid']
+        elif self.sklearn_model == 'RandomForestRegression':
+            base_model = regression_methods['RandomForestRegression']['model']
+            param_grid = regression_methods['RandomForestRegression']['param_grid']
+        elif self.sklearn_model == 'GradientBoostingRegression':
+            base_model = regression_methods['GradientBoostingRegression']['model']
+            param_grid = regression_methods['GradientBoostingRegression']['param_grid']
+        elif self.sklearn_model == 'AdaBoostRegression':
+            base_model = regression_methods['AdaBoostRegression']['model']
+            param_grid = regression_methods['AdaBoostRegression']['param_grid']
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+
 
         # Define the models
-        for name, model, param_grid in regression_methods:
-            pipeline = Pipeline([
-                ('scaler', StandardScaler()),
-                (name, model)
-            ])
+        # for name, model, param_grid in regression_methods:
+        #     self.model = make_pipeline(
+        #         StandardScaler(),
+        #         model
+        #     )
+        #     # self.model = Pipeline([
+        #     #     ('scaler', StandardScaler()),
+        #     #     (name, model)
+        #     # ])
             
-            # Use GridSearchCV to find the best hyperparameters for each model
-            grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5)
-            grid_search.fit(X_train, y_train)
-
-            # Get the best model and print its mean squared error on the test set
-            best_model = grid_search.best_estimator_
-            print(best_model)
-            predictions = best_model.predict(X_test)
-            print(predictions)
+        #     # Use GridSearchCV to find the best hyperparameters for each model
+        #     grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        #     grid_search.fit(X_train, y_train)
+
+        #     # Get the best model and print its mean squared error on the test set
+        #     best_model = grid_search.best_estimator_
+        #     print(best_model)
+        #     predictions = best_model.predict(X_test)
+        #     print(predictions)
+
+        self.model = make_pipeline(
+            StandardScaler(),
+            base_model
+        )
         # self.model = Pipeline([
         #     ('scaler', StandardScaler()),
-        #     ('regressor', LinearRegression())
+        #     ('regressor', base_model)
         # ])
-        # # Define the parameters to tune
+        # Define the parameters to tune
         # param_grid = {
         #     'regressor__fit_intercept': [True, False],
         #     'regressor__positive': [True, False],
         # }
 
-        # # Create a grid search object
-        # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        # # Fit the grid search object to the data
-        # self.logger.info("Fitting the model...")
-        # start_time = time.time()
-        # self.grid_search.fit(X_train.values, y_train.values)
-        # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        # Create a grid search object
+        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1)
+        
+        # Fit the grid search object to the data
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.grid_search.fit(X_train.values, y_train.values)
+        print("Best value for lambda : ",self.grid_search.best_params_)
+        print("Best score for cost function: ", self.grid_search.best_score_)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
 
-        # self.model = self.grid_search.best_estimator_
+        self.model = self.grid_search.best_estimator_
 
 
-        # # Make predictions
-        # predictions = self.model.predict(X_test.values)
-        # predictions = pd.Series(predictions, index=X_test.index)
-        # pred_metric = r2_score(y_test,predictions)
-        # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+        # Make predictions
+        predictions = self.model.predict(X_test.values)
+        predictions = pd.Series(predictions, index=X_test.index)
+        pred_metric = r2_score(y_test,predictions)
+        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
         
 
     def predict(self, new_values:list) -> np.ndarray:

From be01f8fa75d5359e5397d476d29be9570b1426b2 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:42:27 +0100
Subject: [PATCH 072/111] change to MLRegressor

---
 src/emhass/command_line.py                    | 40 +++++++++----------
 ...ictor.py => machine_learning_regressor.py} |  4 +-
 src/emhass/utils.py                           | 28 ++++++-------
 src/emhass/web_server.py                      | 18 ++++-----
 4 files changed, 45 insertions(+), 45 deletions(-)
 rename src/emhass/{csv_predictor.py => machine_learning_regressor.py} (98%)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index f08f108c..d0a8f71a 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -20,7 +20,7 @@
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
 from emhass.optimization import Optimization
-from emhass.csv_predictor import CsvPredictor
+from emhass.machine_learning_regressor import MLRegressor
 from emhass import utils
 
 
@@ -156,7 +156,7 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
                 return False
             df_input_data = rh.df_final.copy()
  
-    elif set_type == "csv-model-fit":
+    elif set_type == "regressor-model-fit":
         
         df_input_data_dayahead = None
         P_PV_forecast, P_load_forecast = None, None
@@ -184,7 +184,7 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
-    elif set_type == "csv-model-predict":
+    elif set_type == "regressor-model-predict":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
@@ -483,7 +483,7 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)       
     return df_pred_optim, mlf
 
-def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
+def regressor_model_fit(input_data_dict: dict, logger: logging.Logger,
     debug: Optional[bool] = False) -> None:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
@@ -502,17 +502,17 @@ def csv_model_fit(input_data_dict: dict, logger: logging.Logger,
     timestamp = input_data_dict['params']['passed_data']['timestamp']
     date_features = input_data_dict['params']['passed_data']['date_features']
     root = input_data_dict['root']
-    # The CSV forecaster object
-    csv = CsvPredictor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
+    # The MLRegressor object
+    mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
     # Fit the ML model
-    csv.fit(date_features=date_features)
+    mlr.fit(date_features=date_features)
     # Save model
     if not debug:
-        filename = model_type+'_csv.pkl'
+        filename = model_type+'_mlr.pkl'
         with open(pathlib.Path(root) / filename, 'wb') as outp:
-            pickle.dump(csv, outp, pickle.HIGHEST_PROTOCOL)
+            pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL)
 
-def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
+def regressor_model_predict(input_data_dict: dict, logger: logging.Logger,
     debug: Optional[bool] = False) -> None:
     """Perform a prediction from csv file.
 
@@ -525,29 +525,29 @@ def csv_model_predict(input_data_dict: dict, logger: logging.Logger,
     """
     model_type = input_data_dict['params']['passed_data']['model_type']
     root = input_data_dict['root']
-    filename = model_type+'_csv.pkl'
+    filename = model_type+'_mlr.pkl'
     filename_path = pathlib.Path(root) / filename
     if not debug:
         if filename_path.is_file():
             with open(filename_path, 'rb') as inp:
-                csv = pickle.load(inp)
+                mlr = pickle.load(inp)
         else:
             logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
             return
     new_values = input_data_dict['params']['passed_data']['new_values']
     # Predict from csv file
-    prediction = csv.predict(new_values)
+    prediction = mlr.predict(new_values)
 
-    csv_predict_entity_id = input_data_dict['params']['passed_data']['csv_predict_entity_id']
-    csv_predict_unit_of_measurement = input_data_dict['params']['passed_data']['csv_predict_unit_of_measurement']
-    csv_predict_friendly_name = input_data_dict['params']['passed_data']['csv_predict_friendly_name']
+    mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id']
+    mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement']
+    mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name']
     # Publish prediction
     idx = 0
     input_data_dict['rh'].post_data(prediction, idx,
-                                    csv_predict_entity_id,
-                                    csv_predict_unit_of_measurement, 
-                                    csv_predict_friendly_name,
-                                    type_var = 'csv_predictor')
+                                    mlr_predict_entity_id,
+                                    mlr_predict_unit_of_measurement, 
+                                    mlr_predict_friendly_name,
+                                    type_var = 'mlregressor')
 
 def publish_data(input_data_dict: dict, logger: logging.Logger,
     save_data_to_file: Optional[bool] = False, 
diff --git a/src/emhass/csv_predictor.py b/src/emhass/machine_learning_regressor.py
similarity index 98%
rename from src/emhass/csv_predictor.py
rename to src/emhass/machine_learning_regressor.py
index 3ffeba27..d70df3ec 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -20,7 +20,7 @@
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-class CsvPredictor:
+class MLRegressor:
     r"""
     A forecaster class using machine learning models.
     
@@ -115,7 +115,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         self.data_exo.reset_index(drop=True, inplace=True)
         if len(date_features) > 0:
             if self.timestamp is not None:
-                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp)
+                self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp)
             else:
                 self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
 
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 4931d8c2..38a4e424 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -155,7 +155,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         freq = int(retrieve_hass_conf['freq'].seconds/60.0)
         delta_forecast = int(optim_conf['delta_forecast'].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
-        if set_type == "csv-model-fit":
+        if set_type == "regressor-model-fit":
             csv_file = runtimeparams['csv_file']
             independent_variables = runtimeparams['independent_variables']
             dependent_variable = runtimeparams['dependent_variable']
@@ -173,7 +173,7 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
                 date_features = runtimeparams['date_features']
                 params['passed_data']['date_features'] = date_features
             
-        if set_type == "csv-model-predict":
+        if set_type == "regressor-model-predict":
             new_values = runtimeparams['new_values']
             params['passed_data']['new_values'] = new_values
 
@@ -303,21 +303,21 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
         else:
             model_predict_friendly_name = runtimeparams['model_predict_friendly_name']
         params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name
-        if 'csv_predict_entity_id' not in runtimeparams.keys():
-            csv_predict_entity_id = "sensor.csv_predictor"
+        if 'mlr_predict_entity_id' not in runtimeparams.keys():
+            mlr_predict_entity_id = "sensor.mlr_predict"
         else:
-            csv_predict_entity_id = runtimeparams['csv_predict_entity_id']
-        params['passed_data']['csv_predict_entity_id'] = csv_predict_entity_id
-        if 'csv_predict_unit_of_measurement' not in runtimeparams.keys():
-            csv_predict_unit_of_measurement = None
+            mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id']
+        params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id
+        if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys():
+            mlr_predict_unit_of_measurement = None
         else:
-            csv_predict_unit_of_measurement = runtimeparams['csv_predict_unit_of_measurement']
-        params['passed_data']['csv_predict_unit_of_measurement'] = csv_predict_unit_of_measurement
-        if 'csv_predict_friendly_name' not in runtimeparams.keys():
-            csv_predict_friendly_name = "Csv predictor"
+            mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement']
+        params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement
+        if 'mlr_predict_friendly_name' not in runtimeparams.keys():
+            mlr_predict_friendly_name = "mlr predictor"
         else:
-            csv_predict_friendly_name = runtimeparams['csv_predict_friendly_name']
-        params['passed_data']['csv_predict_friendly_name'] = csv_predict_friendly_name
+            mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name']
+        params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name
         # Treat optimization configuration parameters passed at runtime 
         if 'num_def_loads' in runtimeparams.keys():
             optim_conf['num_def_loads'] = runtimeparams['num_def_loads']
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index ad71bb1e..50241590 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -13,7 +13,7 @@
 from emhass.command_line import set_input_data_dict
 from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim
 from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune
-from emhass.command_line import csv_model_fit, csv_model_predict
+from emhass.command_line import regressor_model_fit, regressor_model_predict
 from emhass.command_line import publish_data
 from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \
     get_injection_dict_forecast_model_tune, build_params
@@ -194,15 +194,15 @@ def action_call(action_name):
         if not checkFileLog(ActionStr):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
-    elif action_name == 'csv-model-fit':
-        app.logger.info(" >> Performing a csv fit...")
-        csv_model_fit(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action csv-fit executed... \n'
+    elif action_name == 'regressor-model-fit':
+        app.logger.info(" >> Performing a regressor fit...")
+        regressor_model_fit(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action regressor-fit executed... \n'
         return make_response(msg, 201)
-    elif action_name == 'csv-model-predict':
-        app.logger.info(" >> Performing a csv predict...")
-        csv_model_predict(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action csv-predict executed... \n'
+    elif action_name == 'regressor-model-predict':
+        app.logger.info(" >> Performing a regressor predict...")
+        regressor_model_predict(input_data_dict, app.logger)
+        msg = f'EMHASS >> Action regressor-predict executed... \n'
         return make_response(msg, 201)
     else:
         app.logger.error("ERROR: passed action is not valid")

From 7430bf0dc06f36e526909314af4847e3779e6380 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 13:13:51 +0100
Subject: [PATCH 073/111] change naming and some formatting

---
 src/emhass/command_line.py               | 746 ++++++++++++-------
 src/emhass/machine_learning_regressor.py | 285 ++++----
 src/emhass/retrieve_hass.py              | 318 +++++---
 src/emhass/utils.py                      | 887 +++++++++++++++--------
 4 files changed, 1397 insertions(+), 839 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index d0a8f71a..0c094c96 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -8,14 +8,15 @@
 import json
 import copy
 import pickle
-import time
-import numpy as np
-import pandas as pd
 from datetime import datetime, timezone
 from typing import Optional, Tuple
+from importlib.metadata import version
+import numpy as np
+import pandas as pd
+
 from distutils.util import strtobool
 
-from importlib.metadata import version
+
 from emhass.retrieve_hass import RetrieveHass
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
@@ -54,8 +55,14 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
         emhass_conf, use_secrets=not(get_data_from_file), params=params)
     # Treat runtimeparams
     params, retrieve_hass_conf, optim_conf, plant_conf = utils.treat_runtimeparams(
-        runtimeparams, params, retrieve_hass_conf, 
-        optim_conf, plant_conf, set_type, logger)
+        runtimeparams,
+        params,
+        retrieve_hass_conf,
+        optim_conf,
+        plant_conf,
+        set_type,
+        logger,
+    )
     # Define main objects
     rh = RetrieveHass(retrieve_hass_conf['hass_url'], retrieve_hass_conf['long_lived_token'], 
                       retrieve_hass_conf['freq'], retrieve_hass_conf['time_zone'], 
@@ -72,35 +79,53 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
             with open(emhass_conf['data_path'] / 'test_df_final.pkl', 'rb') as inp:
                 rh.df_final, days_list, var_list = pickle.load(inp)
         else:
-            days_list = utils.get_days_list(retrieve_hass_conf['days_to_retrieve'])
-            var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']]
-            if not rh.get_data(days_list, var_list,
-                               minimal_response=False, significant_changes_only=False):
-                return False 
-        if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'],
-                               set_zero_min = retrieve_hass_conf['set_zero_min'], 
-                               var_replace_zero = retrieve_hass_conf['var_replace_zero'], 
-                               var_interp = retrieve_hass_conf['var_interp']):
+            days_list = utils.get_days_list(retrieve_hass_conf["days_to_retrieve"])
+            var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]]
+            if not rh.get_data(
+                days_list,
+                var_list,
+                minimal_response=False,
+                significant_changes_only=False,
+            ):
+                return False
+        if not rh.prepare_data(
+            retrieve_hass_conf["var_load"],
+            load_negative=retrieve_hass_conf["load_negative"],
+            set_zero_min=retrieve_hass_conf["set_zero_min"],
+            var_replace_zero=retrieve_hass_conf["var_replace_zero"],
+            var_interp=retrieve_hass_conf["var_interp"],
+        ):
             return False
         df_input_data = rh.df_final.copy()
         # What we don't need for this type of action
         P_PV_forecast, P_load_forecast, df_input_data_dayahead = None, None, None
     elif set_type == "dayahead-optim":
         # Get PV and load forecasts
-        df_weather = fcst.get_weather_forecast(method=optim_conf['weather_forecast_method'])
+        df_weather = fcst.get_weather_forecast(
+            method=optim_conf["weather_forecast_method"]
+        )
         P_PV_forecast = fcst.get_power_from_weather(df_weather)
         P_load_forecast = fcst.get_load_forecast(method=optim_conf['load_forecast_method'])
         if isinstance(P_load_forecast,bool) and not P_load_forecast:
             logger.error("Unable to get sensor power photovoltaics, or sensor power load no var loads. Check HA sensors and their daily data")
             return False
-        df_input_data_dayahead = pd.DataFrame(np.transpose(np.vstack([P_PV_forecast.values,P_load_forecast.values])),
-                                              index=P_PV_forecast.index,
-                                              columns=['P_PV_forecast', 'P_load_forecast'])
+        df_input_data_dayahead = pd.DataFrame(
+            np.transpose(np.vstack([P_PV_forecast.values, P_load_forecast.values])),
+            index=P_PV_forecast.index,
+            columns=["P_PV_forecast", "P_load_forecast"],
+        )
         df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead)
         params = json.loads(params)
-        if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None:
-            prediction_horizon = params['passed_data']['prediction_horizon']
-            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]]
+        if (
+            "prediction_horizon" in params["passed_data"]
+            and params["passed_data"]["prediction_horizon"] is not None
+        ):
+            prediction_horizon = params["passed_data"]["prediction_horizon"]
+            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[
+                df_input_data_dayahead.index[0] : df_input_data_dayahead.index[
+                    prediction_horizon - 1
+                ]
+            ]
         # What we don't need for this type of action
         df_input_data, days_list = None, None
     elif set_type == "naive-mpc-optim":
@@ -110,14 +135,21 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
                 rh.df_final, days_list, var_list = pickle.load(inp)
         else:
             days_list = utils.get_days_list(1)
-            var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']]
-            if not rh.get_data(days_list, var_list,
-                               minimal_response=False, significant_changes_only=False):
+            var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]]
+            if not rh.get_data(
+                days_list,
+                var_list,
+                minimal_response=False,
+                significant_changes_only=False,
+            ):
                 return False
-        if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'],
-                               set_zero_min = retrieve_hass_conf['set_zero_min'], 
-                               var_replace_zero = retrieve_hass_conf['var_replace_zero'], 
-                               var_interp = retrieve_hass_conf['var_interp']):
+        if not rh.prepare_data(
+            retrieve_hass_conf["var_load"],
+            load_negative=retrieve_hass_conf["load_negative"],
+            set_zero_min=retrieve_hass_conf["set_zero_min"],
+            var_replace_zero=retrieve_hass_conf["var_replace_zero"],
+            var_interp=retrieve_hass_conf["var_interp"],
+        ):
             return False
         df_input_data = rh.df_final.copy()
         # Get PV and load forecasts
@@ -129,43 +161,56 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
             return False
         df_input_data_dayahead = pd.concat([P_PV_forecast, P_load_forecast], axis=1)
         df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead)
-        df_input_data_dayahead.columns = ['P_PV_forecast', 'P_load_forecast']
+        df_input_data_dayahead.columns = ["P_PV_forecast", "P_load_forecast"]
         params = json.loads(params)
-        if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None:
-            prediction_horizon = params['passed_data']['prediction_horizon']
-            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]]
-    elif set_type == "forecast-model-fit" or set_type == "forecast-model-predict" or set_type == "forecast-model-tune":
+        if (
+            "prediction_horizon" in params["passed_data"]
+            and params["passed_data"]["prediction_horizon"] is not None
+        ):
+            prediction_horizon = params["passed_data"]["prediction_horizon"]
+            df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[
+                df_input_data_dayahead.index[0] : df_input_data_dayahead.index[
+                    prediction_horizon - 1
+                ]
+            ]
+    elif (
+        set_type == "forecast-model-fit"
+        or set_type == "forecast-model-predict"
+        or set_type == "forecast-model-tune"
+    ):
         df_input_data_dayahead = None
         P_PV_forecast, P_load_forecast = None, None
         params = json.loads(params)
         # Retrieve data from hass
-        days_to_retrieve = params['passed_data']['days_to_retrieve']
-        model_type = params['passed_data']['model_type']
-        var_model = params['passed_data']['var_model']
+        days_to_retrieve = params["passed_data"]["days_to_retrieve"]
+        model_type = params["passed_data"]["model_type"]
+        var_model = params["passed_data"]["var_model"]
         if get_data_from_file:
             days_list = None
             filename = 'data_train_'+model_type+'.pkl'
             filename_path = emhass_conf['data_path'] / filename
             with open(filename_path, 'rb') as inp:
                 df_input_data, _ = pickle.load(inp)
-            df_input_data = df_input_data[df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve):]
+            df_input_data = df_input_data[
+                df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve) :
+            ]
         else:
             days_list = utils.get_days_list(days_to_retrieve)
             var_list = [var_model]
             if not rh.get_data(days_list, var_list):
                 return False
             df_input_data = rh.df_final.copy()
- 
+
     elif set_type == "regressor-model-fit":
-        
+
         df_input_data_dayahead = None
         P_PV_forecast, P_load_forecast = None, None
         params = json.loads(params)
         days_list = None
-        csv_file = params['passed_data']['csv_file']
-        independent_variables = params['passed_data']['independent_variables']
-        dependent_variable = params['passed_data']['dependent_variable']
-        timestamp = params['passed_data']['timestamp']
+        csv_file = params["passed_data"]["csv_file"]
+        features = params["passed_data"]["features"]
+        target = params["passed_data"]["target"]
+        timestamp = params["passed_data"]["timestamp"]
         filename_path = pathlib.Path(base_path) / csv_file
         if filename_path.is_file():
             df_input_data = pd.read_csv(filename_path, parse_dates=True)
@@ -174,8 +219,8 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
             logger.error("The cvs file was not found.")
             raise ValueError("The CSV file " + csv_file + " was not found.")
         required_columns = []
-        required_columns.extend(independent_variables)
-        required_columns.append(dependent_variable)
+        required_columns.extend(features)
+        required_columns.append(target)
         if timestamp is not None:
             required_columns.append(timestamp)
 
@@ -189,13 +234,15 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
         params = json.loads(params)
-       
+
     elif set_type == "publish-data":
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
     else:
-        logger.error("The passed action argument and hence the set_type parameter for setup is not valid")
+        logger.error(
+            "The passed action argument and hence the set_type parameter for setup is not valid"
+        )
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         days_list = None
@@ -216,12 +263,17 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
         'days_list': days_list
     }
     return input_data_dict
-    
-def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = True, debug: Optional[bool] = False) -> pd.DataFrame:
+
+
+def perfect_forecast_optim(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = True,
+    debug: Optional[bool] = False,
+) -> pd.DataFrame:
     """
     Perform a call to the perfect forecast optimization routine.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -250,18 +302,23 @@ def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger,
     opt_res = input_data_dict['opt'].perform_perfect_forecast_optim(df_input_data, input_data_dict['days_list'])
     # Save CSV file for analysis
     if save_data_to_file:
-        filename = 'opt_res_perfect_optim_'+input_data_dict['costfun']+'.csv'
-    else: # Just save the latest optimization results
-        filename = 'opt_res_latest.csv'
+        filename = "opt_res_perfect_optim_" + input_data_dict["costfun"] + ".csv"
+    else:  # Just save the latest optimization results
+        filename = "opt_res_latest.csv"
     if not debug:
         opt_res.to_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_label='timestamp')
     return opt_res
-    
-def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame:
+
+
+def dayahead_forecast_optim(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = False,
+    debug: Optional[bool] = False,
+) -> pd.DataFrame:
     """
     Perform a call to the day-ahead optimization routine.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -290,19 +347,26 @@ def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger,
         df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast'])
     # Save CSV file for publish_data
     if save_data_to_file:
-        today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
-        filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv'
-    else: # Just save the latest optimization results
-        filename = 'opt_res_latest.csv'
+        today = datetime.now(timezone.utc).replace(
+            hour=0, minute=0, second=0, microsecond=0
+        )
+        filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv"
+    else:  # Just save the latest optimization results
+        filename = "opt_res_latest.csv"
     if not debug:
         opt_res_dayahead.to_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_label='timestamp')
     return opt_res_dayahead
 
-def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame:
+
+def naive_mpc_optim(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = False,
+    debug: Optional[bool] = False,
+) -> pd.DataFrame:
     """
     Perform a call to the naive Model Predictive Controller optimization routine.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -327,27 +391,39 @@ def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger,
     if isinstance(df_input_data_dayahead,bool) and not df_input_data_dayahead:
         return False 
     # The specifics params for the MPC at runtime
-    prediction_horizon = input_data_dict['params']['passed_data']['prediction_horizon']
-    soc_init = input_data_dict['params']['passed_data']['soc_init']
-    soc_final = input_data_dict['params']['passed_data']['soc_final']
-    def_total_hours = input_data_dict['params']['passed_data']['def_total_hours']
-    def_start_timestep = input_data_dict['params']['passed_data']['def_start_timestep']
-    def_end_timestep = input_data_dict['params']['passed_data']['def_end_timestep']
-    opt_res_naive_mpc = input_data_dict['opt'].perform_naive_mpc_optim(
-        df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast'],
-        prediction_horizon, soc_init, soc_final, def_total_hours, def_start_timestep, def_end_timestep)
+    prediction_horizon = input_data_dict["params"]["passed_data"]["prediction_horizon"]
+    soc_init = input_data_dict["params"]["passed_data"]["soc_init"]
+    soc_final = input_data_dict["params"]["passed_data"]["soc_final"]
+    def_total_hours = input_data_dict["params"]["passed_data"]["def_total_hours"]
+    def_start_timestep = input_data_dict["params"]["passed_data"]["def_start_timestep"]
+    def_end_timestep = input_data_dict["params"]["passed_data"]["def_end_timestep"]
+    opt_res_naive_mpc = input_data_dict["opt"].perform_naive_mpc_optim(
+        df_input_data_dayahead,
+        input_data_dict["P_PV_forecast"],
+        input_data_dict["P_load_forecast"],
+        prediction_horizon,
+        soc_init,
+        soc_final,
+        def_total_hours,
+        def_start_timestep,
+        def_end_timestep,
+    )
     # Save CSV file for publish_data
     if save_data_to_file:
-        today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
-        filename = 'opt_res_naive_mpc_'+today.strftime("%Y_%m_%d")+'.csv'
-    else: # Just save the latest optimization results
-        filename = 'opt_res_latest.csv'
+        today = datetime.now(timezone.utc).replace(
+            hour=0, minute=0, second=0, microsecond=0
+        )
+        filename = "opt_res_naive_mpc_" + today.strftime("%Y_%m_%d") + ".csv"
+    else:  # Just save the latest optimization results
+        filename = "opt_res_latest.csv"
     if not debug:
         opt_res_naive_mpc.to_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_label='timestamp')
     return opt_res_naive_mpc
 
-def forecast_model_fit(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]:
+
+def forecast_model_fit(
+    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -369,8 +445,9 @@ def forecast_model_fit(input_data_dict: dict, logger: logging.Logger,
     # The ML forecaster object
     mlf = MLForecaster(data, model_type, var_model, sklearn_model, num_lags, input_data_dict['emhass_conf'], logger)
     # Fit the ML model
-    df_pred, df_pred_backtest = mlf.fit(split_date_delta=split_date_delta, 
-                                        perform_backtest=perform_backtest)
+    df_pred, df_pred_backtest = mlf.fit(
+        split_date_delta=split_date_delta, perform_backtest=perform_backtest
+    )
     # Save model
     if not debug:
         filename = model_type+'_mlf.pkl'
@@ -379,9 +456,14 @@ def forecast_model_fit(input_data_dict: dict, logger: logging.Logger,
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)
     return df_pred, df_pred_backtest, mlf
 
-def forecast_model_predict(input_data_dict: dict, logger: logging.Logger,
-    use_last_window: Optional[bool] = True, debug: Optional[bool] = False,
-    mlf: Optional[MLForecaster] = None) -> pd.DataFrame:
+
+def forecast_model_predict(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    use_last_window: Optional[bool] = True,
+    debug: Optional[bool] = False,
+    mlf: Optional[MLForecaster] = None,
+) -> pd.DataFrame:
     r"""Perform a forecast model predict using a previously trained skforecast model.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -408,46 +490,73 @@ def forecast_model_predict(input_data_dict: dict, logger: logging.Logger,
     filename_path = input_data_dict['emhass_conf']['data_path'] / filename
     if not debug:
         if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
+            with open(filename_path, "rb") as inp:
                 mlf = pickle.load(inp)
         else:
-            logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
+            logger.error(
+                "The ML forecaster file was not found, please run a model fit method before this predict method"
+            )
             return
     # Make predictions
     if use_last_window:
-        data_last_window = copy.deepcopy(input_data_dict['df_input_data'])
+        data_last_window = copy.deepcopy(input_data_dict["df_input_data"])
     else:
         data_last_window = None
     predictions = mlf.predict(data_last_window)
     # Publish data to a Home Assistant sensor
-    model_predict_publish = input_data_dict['params']['passed_data']['model_predict_publish']
-    model_predict_entity_id = input_data_dict['params']['passed_data']['model_predict_entity_id']
-    model_predict_unit_of_measurement = input_data_dict['params']['passed_data']['model_predict_unit_of_measurement']
-    model_predict_friendly_name = input_data_dict['params']['passed_data']['model_predict_friendly_name']
-    publish_prefix = input_data_dict['params']['passed_data']['publish_prefix']
+    model_predict_publish = input_data_dict["params"]["passed_data"][
+        "model_predict_publish"
+    ]
+    model_predict_entity_id = input_data_dict["params"]["passed_data"][
+        "model_predict_entity_id"
+    ]
+    model_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][
+        "model_predict_unit_of_measurement"
+    ]
+    model_predict_friendly_name = input_data_dict["params"]["passed_data"][
+        "model_predict_friendly_name"
+    ]
+    publish_prefix = input_data_dict["params"]["passed_data"]["publish_prefix"]
     if model_predict_publish is True:
         # Estimate the current index
-        now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0)
-        if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest':
-            idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0]
-        elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first':
-            idx_closest = predictions.index.get_indexer([now_precise], method='ffill')[0]
-        elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last':
-            idx_closest = predictions.index.get_indexer([now_precise], method='bfill')[0]
+        now_precise = datetime.now(
+            input_data_dict["retrieve_hass_conf"]["time_zone"]
+        ).replace(second=0, microsecond=0)
+        if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest":
+            idx_closest = predictions.index.get_indexer(
+                [now_precise], method="nearest"
+            )[0]
+        elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first":
+            idx_closest = predictions.index.get_indexer([now_precise], method="ffill")[
+                0
+            ]
+        elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last":
+            idx_closest = predictions.index.get_indexer([now_precise], method="bfill")[
+                0
+            ]
         if idx_closest == -1:
-            idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0]
+            idx_closest = predictions.index.get_indexer(
+                [now_precise], method="nearest"
+            )[0]
         # Publish Load forecast
-        input_data_dict['rh'].post_data(predictions, idx_closest, 
-                                        model_predict_entity_id,
-                                        model_predict_unit_of_measurement, 
-                                        model_predict_friendly_name,
-                                        type_var = 'mlforecaster',
-                                        publish_prefix=publish_prefix)
+        input_data_dict["rh"].post_data(
+            predictions,
+            idx_closest,
+            model_predict_entity_id,
+            model_predict_unit_of_measurement,
+            model_predict_friendly_name,
+            type_var="mlforecaster",
+            publish_prefix=publish_prefix,
+        )
     return predictions
 
-def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False, mlf: Optional[MLForecaster] = None
-    ) -> Tuple[pd.DataFrame, MLForecaster]:
+
+def forecast_model_tune(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    debug: Optional[bool] = False,
+    mlf: Optional[MLForecaster] = None,
+) -> Tuple[pd.DataFrame, MLForecaster]:
     """Tune a forecast model hyperparameters using bayesian optimization.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -468,10 +577,12 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
     filename_path = input_data_dict['emhass_conf']['data_path'] / filename
     if not debug:
         if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
+            with open(filename_path, "rb") as inp:
                 mlf = pickle.load(inp)
         else:
-            logger.error("The ML forecaster file was not found, please run a model fit method before this tune method")
+            logger.error(
+                "The ML forecaster file was not found, please run a model fit method before this tune method"
+            )
             return None, None
     # Tune the model
     df_pred_optim = mlf.tune(debug=debug)
@@ -483,8 +594,10 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger,
             pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL)       
     return df_pred_optim, mlf
 
-def regressor_model_fit(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> None:
+
+def regressor_model_fit(
+    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+) -> None:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -494,26 +607,30 @@ def regressor_model_fit(input_data_dict: dict, logger: logging.Logger,
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
     """
-    data = copy.deepcopy(input_data_dict['df_input_data'])
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    sklearn_model = input_data_dict['params']['passed_data']['sklearn_model']
-    independent_variables = input_data_dict['params']['passed_data']['independent_variables']
-    dependent_variable = input_data_dict['params']['passed_data']['dependent_variable']
-    timestamp = input_data_dict['params']['passed_data']['timestamp']
-    date_features = input_data_dict['params']['passed_data']['date_features']
-    root = input_data_dict['root']
+    data = copy.deepcopy(input_data_dict["df_input_data"])
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"]
+    features = input_data_dict["params"]["passed_data"]["features"]
+    target = input_data_dict["params"]["passed_data"]["target"]
+    timestamp = input_data_dict["params"]["passed_data"]["timestamp"]
+    date_features = input_data_dict["params"]["passed_data"]["date_features"]
+    root = input_data_dict["root"]
     # The MLRegressor object
-    mlr = MLRegressor(data, model_type, sklearn_model, independent_variables, dependent_variable, timestamp, logger)
+    mlr = MLRegressor(
+        data, model_type, sklearn_model, features, target, timestamp, logger
+    )
     # Fit the ML model
     mlr.fit(date_features=date_features)
     # Save model
     if not debug:
-        filename = model_type+'_mlr.pkl'
-        with open(pathlib.Path(root) / filename, 'wb') as outp:
+        filename = model_type + "_mlr.pkl"
+        with open(pathlib.Path(root) / filename, "wb") as outp:
             pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL)
 
-def regressor_model_predict(input_data_dict: dict, logger: logging.Logger,
-    debug: Optional[bool] = False) -> None:
+
+def regressor_model_predict(
+    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+) -> None:
     """Perform a prediction from csv file.
 
     :param input_data_dict: A dictionnary with multiple data used by the action functions
@@ -523,38 +640,53 @@ def regressor_model_predict(input_data_dict: dict, logger: logging.Logger,
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
     """
-    model_type = input_data_dict['params']['passed_data']['model_type']
-    root = input_data_dict['root']
-    filename = model_type+'_mlr.pkl'
+    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    root = input_data_dict["root"]
+    filename = model_type + "_mlr.pkl"
     filename_path = pathlib.Path(root) / filename
     if not debug:
         if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
+            with open(filename_path, "rb") as inp:
                 mlr = pickle.load(inp)
         else:
-            logger.error("The ML forecaster file was not found, please run a model fit method before this predict method")
+            logger.error(
+                "The ML forecaster file was not found, please run a model fit method before this predict method"
+            )
             return
-    new_values = input_data_dict['params']['passed_data']['new_values']
+    new_values = input_data_dict["params"]["passed_data"]["new_values"]
     # Predict from csv file
     prediction = mlr.predict(new_values)
 
-    mlr_predict_entity_id = input_data_dict['params']['passed_data']['mlr_predict_entity_id']
-    mlr_predict_unit_of_measurement = input_data_dict['params']['passed_data']['mlr_predict_unit_of_measurement']
-    mlr_predict_friendly_name = input_data_dict['params']['passed_data']['mlr_predict_friendly_name']
+    mlr_predict_entity_id = input_data_dict["params"]["passed_data"][
+        "mlr_predict_entity_id"
+    ]
+    mlr_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][
+        "mlr_predict_unit_of_measurement"
+    ]
+    mlr_predict_friendly_name = input_data_dict["params"]["passed_data"][
+        "mlr_predict_friendly_name"
+    ]
     # Publish prediction
     idx = 0
-    input_data_dict['rh'].post_data(prediction, idx,
-                                    mlr_predict_entity_id,
-                                    mlr_predict_unit_of_measurement, 
-                                    mlr_predict_friendly_name,
-                                    type_var = 'mlregressor')
-
-def publish_data(input_data_dict: dict, logger: logging.Logger,
-    save_data_to_file: Optional[bool] = False, 
-    opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame:
+    input_data_dict["rh"].post_data(
+        prediction,
+        idx,
+        mlr_predict_entity_id,
+        mlr_predict_unit_of_measurement,
+        mlr_predict_friendly_name,
+        type_var="mlregressor",
+    )
+
+
+def publish_data(
+    input_data_dict: dict,
+    logger: logging.Logger,
+    save_data_to_file: Optional[bool] = False,
+    opt_res_latest: Optional[pd.DataFrame] = None,
+) -> pd.DataFrame:
     """
     Publish the data obtained from the optimization results.
-    
+
     :param input_data_dict:  A dictionnary with multiple data used by the action functions
     :type input_data_dict: dict
     :param logger: The passed logger object
@@ -568,10 +700,12 @@ def publish_data(input_data_dict: dict, logger: logging.Logger,
     logger.info("Publishing data to HASS instance")
     # Check if a day ahead optimization has been performed (read CSV file)
     if save_data_to_file:
-        today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
-        filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv'
+        today = datetime.now(timezone.utc).replace(
+            hour=0, minute=0, second=0, microsecond=0
+        )
+        filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv"
     else:
-        filename = 'opt_res_latest.csv'
+        filename = "opt_res_latest.csv"
     if opt_res_latest is None:
         if not os.path.isfile(input_data_dict['emhass_conf']['data_path'] / filename):
             logger.error("File not found error, run an optimization task first.")
@@ -579,144 +713,191 @@ def publish_data(input_data_dict: dict, logger: logging.Logger,
         else:
             opt_res_latest = pd.read_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_col='timestamp')
             opt_res_latest.index = pd.to_datetime(opt_res_latest.index)
-            opt_res_latest.index.freq = input_data_dict['retrieve_hass_conf']['freq']
+            opt_res_latest.index.freq = input_data_dict["retrieve_hass_conf"]["freq"]
     # Estimate the current index
-    now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0)
-    if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest':
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0]
-    elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first':
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='ffill')[0]
-    elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last':
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='bfill')[0]
+    now_precise = datetime.now(
+        input_data_dict["retrieve_hass_conf"]["time_zone"]
+    ).replace(second=0, microsecond=0)
+    if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest":
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[
+            0
+        ]
+    elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first":
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="ffill")[0]
+    elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last":
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="bfill")[0]
     if idx_closest == -1:
-        idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0]
+        idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[
+            0
+        ]
     # Publish the data
-    params = json.loads(input_data_dict['params'])
-    publish_prefix = params['passed_data']['publish_prefix']
+    params = json.loads(input_data_dict["params"])
+    publish_prefix = params["passed_data"]["publish_prefix"]
     # Publish PV forecast
-    custom_pv_forecast_id = params['passed_data']['custom_pv_forecast_id']
-    input_data_dict['rh'].post_data(opt_res_latest['P_PV'], idx_closest, 
-                                    custom_pv_forecast_id["entity_id"], 
-                                    custom_pv_forecast_id["unit_of_measurement"],
-                                    custom_pv_forecast_id["friendly_name"],
-                                    type_var = 'power',
-                                    publish_prefix = publish_prefix)
+    custom_pv_forecast_id = params["passed_data"]["custom_pv_forecast_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["P_PV"],
+        idx_closest,
+        custom_pv_forecast_id["entity_id"],
+        custom_pv_forecast_id["unit_of_measurement"],
+        custom_pv_forecast_id["friendly_name"],
+        type_var="power",
+        publish_prefix=publish_prefix,
+    )
     # Publish Load forecast
-    custom_load_forecast_id = params['passed_data']['custom_load_forecast_id']
-    input_data_dict['rh'].post_data(opt_res_latest['P_Load'], idx_closest, 
-                                    custom_load_forecast_id["entity_id"], 
-                                    custom_load_forecast_id["unit_of_measurement"],
-                                    custom_load_forecast_id["friendly_name"],
-                                    type_var = 'power',
-                                    publish_prefix = publish_prefix)
-    cols_published = ['P_PV', 'P_Load']
+    custom_load_forecast_id = params["passed_data"]["custom_load_forecast_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["P_Load"],
+        idx_closest,
+        custom_load_forecast_id["entity_id"],
+        custom_load_forecast_id["unit_of_measurement"],
+        custom_load_forecast_id["friendly_name"],
+        type_var="power",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = ["P_PV", "P_Load"]
     # Publish deferrable loads
-    custom_deferrable_forecast_id = params['passed_data']['custom_deferrable_forecast_id']
-    for k in range(input_data_dict['opt'].optim_conf['num_def_loads']):
+    custom_deferrable_forecast_id = params["passed_data"][
+        "custom_deferrable_forecast_id"
+    ]
+    for k in range(input_data_dict["opt"].optim_conf["num_def_loads"]):
         if "P_deferrable{}".format(k) not in opt_res_latest.columns:
-            logger.error("P_deferrable{}".format(k)+" was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.")
+            logger.error(
+                "P_deferrable{}".format(k)
+                + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution."
+            )
         else:
-            input_data_dict['rh'].post_data(opt_res_latest["P_deferrable{}".format(k)], idx_closest, 
-                                            custom_deferrable_forecast_id[k]["entity_id"], 
-                                            custom_deferrable_forecast_id[k]["unit_of_measurement"],
-                                            custom_deferrable_forecast_id[k]["friendly_name"],
-                                            type_var = 'deferrable',
-                                            publish_prefix = publish_prefix)
-            cols_published = cols_published+["P_deferrable{}".format(k)]
+            input_data_dict["rh"].post_data(
+                opt_res_latest["P_deferrable{}".format(k)],
+                idx_closest,
+                custom_deferrable_forecast_id[k]["entity_id"],
+                custom_deferrable_forecast_id[k]["unit_of_measurement"],
+                custom_deferrable_forecast_id[k]["friendly_name"],
+                type_var="deferrable",
+                publish_prefix=publish_prefix,
+            )
+            cols_published = cols_published + ["P_deferrable{}".format(k)]
     # Publish battery power
-    if input_data_dict['opt'].optim_conf['set_use_battery']:
-        if 'P_batt' not in opt_res_latest.columns:
-            logger.error("P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.")
+    if input_data_dict["opt"].optim_conf["set_use_battery"]:
+        if "P_batt" not in opt_res_latest.columns:
+            logger.error(
+                "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution."
+            )
         else:
-            custom_batt_forecast_id = params['passed_data']['custom_batt_forecast_id']
-            input_data_dict['rh'].post_data(opt_res_latest['P_batt'], idx_closest,
-                                            custom_batt_forecast_id["entity_id"], 
-                                            custom_batt_forecast_id["unit_of_measurement"],
-                                            custom_batt_forecast_id["friendly_name"],
-                                            type_var = 'batt',
-                                            publish_prefix = publish_prefix)
-            cols_published = cols_published+["P_batt"]
-            custom_batt_soc_forecast_id = params['passed_data']['custom_batt_soc_forecast_id']
-            input_data_dict['rh'].post_data(opt_res_latest['SOC_opt']*100, idx_closest,
-                                            custom_batt_soc_forecast_id["entity_id"], 
-                                            custom_batt_soc_forecast_id["unit_of_measurement"],
-                                            custom_batt_soc_forecast_id["friendly_name"],
-                                            type_var = 'SOC',
-                                            publish_prefix = publish_prefix)
-            cols_published = cols_published+["SOC_opt"]
+            custom_batt_forecast_id = params["passed_data"]["custom_batt_forecast_id"]
+            input_data_dict["rh"].post_data(
+                opt_res_latest["P_batt"],
+                idx_closest,
+                custom_batt_forecast_id["entity_id"],
+                custom_batt_forecast_id["unit_of_measurement"],
+                custom_batt_forecast_id["friendly_name"],
+                type_var="batt",
+                publish_prefix=publish_prefix,
+            )
+            cols_published = cols_published + ["P_batt"]
+            custom_batt_soc_forecast_id = params["passed_data"][
+                "custom_batt_soc_forecast_id"
+            ]
+            input_data_dict["rh"].post_data(
+                opt_res_latest["SOC_opt"] * 100,
+                idx_closest,
+                custom_batt_soc_forecast_id["entity_id"],
+                custom_batt_soc_forecast_id["unit_of_measurement"],
+                custom_batt_soc_forecast_id["friendly_name"],
+                type_var="SOC",
+                publish_prefix=publish_prefix,
+            )
+            cols_published = cols_published + ["SOC_opt"]
     # Publish grid power
-    custom_grid_forecast_id = params['passed_data']['custom_grid_forecast_id']
-    input_data_dict['rh'].post_data(opt_res_latest['P_grid'], idx_closest, 
-                                    custom_grid_forecast_id["entity_id"], 
-                                    custom_grid_forecast_id["unit_of_measurement"],
-                                    custom_grid_forecast_id["friendly_name"],
-                                    type_var = 'power',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["P_grid"]
+    custom_grid_forecast_id = params["passed_data"]["custom_grid_forecast_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["P_grid"],
+        idx_closest,
+        custom_grid_forecast_id["entity_id"],
+        custom_grid_forecast_id["unit_of_measurement"],
+        custom_grid_forecast_id["friendly_name"],
+        type_var="power",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["P_grid"]
     # Publish total value of cost function
-    custom_cost_fun_id = params['passed_data']['custom_cost_fun_id']
-    col_cost_fun = [i for i in opt_res_latest.columns if 'cost_fun_' in i]
-    input_data_dict['rh'].post_data(opt_res_latest[col_cost_fun], idx_closest, 
-                                    custom_cost_fun_id["entity_id"], 
-                                    custom_cost_fun_id["unit_of_measurement"],
-                                    custom_cost_fun_id["friendly_name"],
-                                    type_var = 'cost_fun',
-                                    publish_prefix = publish_prefix)
+    custom_cost_fun_id = params["passed_data"]["custom_cost_fun_id"]
+    col_cost_fun = [i for i in opt_res_latest.columns if "cost_fun_" in i]
+    input_data_dict["rh"].post_data(
+        opt_res_latest[col_cost_fun],
+        idx_closest,
+        custom_cost_fun_id["entity_id"],
+        custom_cost_fun_id["unit_of_measurement"],
+        custom_cost_fun_id["friendly_name"],
+        type_var="cost_fun",
+        publish_prefix=publish_prefix,
+    )
     # Publish the optimization status
-    custom_cost_fun_id = params['passed_data']['custom_optim_status_id']
+    custom_cost_fun_id = params["passed_data"]["custom_optim_status_id"]
     if "optim_status" not in opt_res_latest:
-        opt_res_latest["optim_status"] = 'Optimal'
-        logger.warning("no optim_status in opt_res_latest, run an optimization task first")
-    input_data_dict['rh'].post_data(opt_res_latest['optim_status'], idx_closest, 
-                                    custom_cost_fun_id["entity_id"], 
-                                    custom_cost_fun_id["unit_of_measurement"],
-                                    custom_cost_fun_id["friendly_name"],
-                                    type_var = 'optim_status',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["optim_status"]
+        opt_res_latest["optim_status"] = "Optimal"
+        logger.warning(
+            "no optim_status in opt_res_latest, run an optimization task first"
+        )
+    input_data_dict["rh"].post_data(
+        opt_res_latest["optim_status"],
+        idx_closest,
+        custom_cost_fun_id["entity_id"],
+        custom_cost_fun_id["unit_of_measurement"],
+        custom_cost_fun_id["friendly_name"],
+        type_var="optim_status",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["optim_status"]
     # Publish unit_load_cost
-    custom_unit_load_cost_id = params['passed_data']['custom_unit_load_cost_id']
-    input_data_dict['rh'].post_data(opt_res_latest['unit_load_cost'], idx_closest, 
-                                    custom_unit_load_cost_id["entity_id"], 
-                                    custom_unit_load_cost_id["unit_of_measurement"],
-                                    custom_unit_load_cost_id["friendly_name"],
-                                    type_var = 'unit_load_cost',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["unit_load_cost"]
+    custom_unit_load_cost_id = params["passed_data"]["custom_unit_load_cost_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["unit_load_cost"],
+        idx_closest,
+        custom_unit_load_cost_id["entity_id"],
+        custom_unit_load_cost_id["unit_of_measurement"],
+        custom_unit_load_cost_id["friendly_name"],
+        type_var="unit_load_cost",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["unit_load_cost"]
     # Publish unit_prod_price
-    custom_unit_prod_price_id = params['passed_data']['custom_unit_prod_price_id']
-    input_data_dict['rh'].post_data(opt_res_latest['unit_prod_price'], idx_closest, 
-                                    custom_unit_prod_price_id["entity_id"], 
-                                    custom_unit_prod_price_id["unit_of_measurement"],
-                                    custom_unit_prod_price_id["friendly_name"],
-                                    type_var = 'unit_prod_price',
-                                    publish_prefix = publish_prefix)
-    cols_published = cols_published+["unit_prod_price"]
+    custom_unit_prod_price_id = params["passed_data"]["custom_unit_prod_price_id"]
+    input_data_dict["rh"].post_data(
+        opt_res_latest["unit_prod_price"],
+        idx_closest,
+        custom_unit_prod_price_id["entity_id"],
+        custom_unit_prod_price_id["unit_of_measurement"],
+        custom_unit_prod_price_id["friendly_name"],
+        type_var="unit_prod_price",
+        publish_prefix=publish_prefix,
+    )
+    cols_published = cols_published + ["unit_prod_price"]
     # Create a DF resuming what has been published
     opt_res = opt_res_latest[cols_published].loc[[opt_res_latest.index[idx_closest]]]
     return opt_res
-    
-        
+
+
 def main():
     r"""Define the main command line entry function.
 
     This function may take several arguments as inputs. You can type `emhass --help` to see the list of options:
-    
+
     - action: Set the desired action, options are: perfect-optim, dayahead-optim,
       naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune
-      
+
     - config: Define path to the config.yaml file
-    
+
     - costfun: Define the type of cost function, options are: profit, cost, self-consumption
-    
+
     - log2file: Define if we should log to a file or not
-    
+
     - params: Configuration parameters passed from data/options.json if using the add-on
-    
+
     - runtimeparams: Pass runtime optimization parameters as dictionnary
-    
+
     - debug: Use True for testing purposes
-    
+
     """
     # Parsing arguments
     parser = argparse.ArgumentParser()
@@ -777,39 +958,49 @@ def main():
 
     # Additionnal argument
     try:
-        parser.add_argument('--version', action='version', version='%(prog)s '+version('emhass'))
+        parser.add_argument(
+            "--version", action="version", version="%(prog)s " + version("emhass")
+        )
         args = parser.parse_args()
     except Exception:
-        logger.info("Version not found for emhass package. Or importlib exited with PackageNotFoundError.")
+        logger.info(
+            "Version not found for emhass package. Or importlib exited with PackageNotFoundError."
+        )
     # Setup parameters
     input_data_dict = set_input_data_dict(emhass_conf, 
                                           args.costfun, args.params, args.runtimeparams, args.action, 
                                           logger, args.debug)
     # Perform selected action
-    if args.action == 'perfect-optim':
+    if args.action == "perfect-optim":
         opt_res = perfect_forecast_optim(input_data_dict, logger, debug=args.debug)
-    elif args.action == 'dayahead-optim':
+    elif args.action == "dayahead-optim":
         opt_res = dayahead_forecast_optim(input_data_dict, logger, debug=args.debug)
-    elif args.action == 'naive-mpc-optim':
+    elif args.action == "naive-mpc-optim":
         opt_res = naive_mpc_optim(input_data_dict, logger, debug=args.debug)
-    elif args.action == 'forecast-model-fit':
-        df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug)
+    elif args.action == "forecast-model-fit":
+        df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit(
+            input_data_dict, logger, debug=args.debug
+        )
         opt_res = None
-    elif args.action == 'forecast-model-predict':
+    elif args.action == "forecast-model-predict":
         if args.debug:
             _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug)
         else:
             mlf = None
-        df_pred = forecast_model_predict(input_data_dict, logger, debug=args.debug, mlf=mlf)
+        df_pred = forecast_model_predict(
+            input_data_dict, logger, debug=args.debug, mlf=mlf
+        )
         opt_res = None
-    elif args.action == 'forecast-model-tune':
+    elif args.action == "forecast-model-tune":
         if args.debug:
             _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug)
         else:
             mlf = None
-        df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=args.debug, mlf=mlf)
+        df_pred_optim, mlf = forecast_model_tune(
+            input_data_dict, logger, debug=args.debug, mlf=mlf
+        )
         opt_res = None
-    elif args.action == 'publish-data':
+    elif args.action == "publish-data":
         opt_res = publish_data(input_data_dict, logger)
     else:
         logger.error("The passed action argument is not valid")
@@ -819,17 +1010,22 @@ def main():
     # Flush the logger
     ch.close()
     logger.removeHandler(ch)
-    if args.action == 'perfect-optim' or args.action == 'dayahead-optim' or \
-        args.action == 'naive-mpc-optim' or args.action == 'publish-data':
+    if (
+        args.action == "perfect-optim"
+        or args.action == "dayahead-optim"
+        or args.action == "naive-mpc-optim"
+        or args.action == "publish-data"
+    ):
         return opt_res
-    elif args.action == 'forecast-model-fit':
+    elif args.action == "forecast-model-fit":
         return df_fit_pred, df_fit_pred_backtest, mlf
-    elif args.action == 'forecast-model-predict':
+    elif args.action == "forecast-model-predict":
         return df_pred
-    elif args.action == 'forecast-model-tune':
+    elif args.action == "forecast-model-tune":
         return df_pred_optim, mlf
     else: 
         return opt_res
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()
diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index d70df3ec..80ddd74f 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -9,8 +9,12 @@
 
 import pandas as pd
 import numpy as np
-from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor
-from sklearn.metrics import  r2_score
+from sklearn.ensemble import (
+    AdaBoostRegressor,
+    GradientBoostingRegressor,
+    RandomForestRegressor,
+)
+from sklearn.metrics import r2_score
 
 from sklearn.linear_model import Lasso, LinearRegression, Ridge
 from sklearn.model_selection import GridSearchCV, train_test_split
@@ -20,21 +24,31 @@
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
+
 class MLRegressor:
     r"""
     A forecaster class using machine learning models.
-    
+
     This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
-    
+
     It exposes two main methods:
-    
+
     - `fit`: to train a model with the passed data.
-    
+
     - `predict`: to obtain a forecast from a pre-trained model.
-    
+
     """
-    def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str,
-                logger: logging.Logger) -> None:
+
+    def __init__(
+        self,
+        data,
+        model_type: str,
+        sklearn_model: str,
+        features: list,
+        target: str,
+        timestamp: str,
+        logger: logging.Logger,
+    ) -> None:
         r"""Define constructor for the forecast class.
 
         :param data: The data that will be used for train/test
@@ -42,33 +56,35 @@ def __init__(self, data, model_type: str, sklearn_model: str, independent_variab
         :param model_type: A unique name defining this model and useful to identify \
             for what it will be used for.
         :type model_type: str
-        :param independent_variables: A list of independent variables. \
+        :param features: A list of features. \
             Example: [`solar`, `degree_days`].
-        :type independent_variables: list
-        :param dependent_variable: The dependent variable(to be predicted). \
+        :type features: list
+        :param target: The target(to be predicted). \
             Example: `hours`.
-        :type dependent_variable: str
+        :type target: str
         :param timestamp: If defined, the column key that has to be used of timestamp.
         :type timestamp: str
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
         self.data = data
-        self.independent_variables = independent_variables
-        self.dependent_variable = dependent_variable
+        self.features = features
+        self.target = target
         self.timestamp = timestamp
         self.model_type = model_type
         self.sklearn_model = sklearn_model
         self.logger = logger
         self.data.sort_index(inplace=True)
-        self.data = self.data[~self.data.index.duplicated(keep='first')]
+        self.data = self.data[~self.data.index.duplicated(keep="first")]
         self.data_exo = None
         self.steps = None
         self.model = None
-        self.grid_search =None
-    
+        self.grid_search = None
+
     @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
+    def add_date_features(
+        data: pd.DataFrame, date_features: list, timestamp: str
+    ) -> pd.DataFrame:
         """Add date features from the input DataFrame timestamp
 
         :param data: The input DataFrame
@@ -79,179 +95,162 @@ def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -
         :rtype: pd.DataFrame
         """
         df = copy.deepcopy(data)
-        df[timestamp]= pd.to_datetime(df['timestamp'])
-        if 'year' in date_features:
-            df['year'] = [i.year for i in df['timestamp']]
-        if 'month' in date_features:
-            df['month'] = [i.month for i in df['timestamp']]
-        if 'day_of_week' in date_features:
-            df['day_of_week'] = [i.dayofweek for i in df['timestamp']]
-        if 'day_of_year' in date_features:
-            df['day_of_year'] = [i.dayofyear for i in df['timestamp']]
-        if 'day' in date_features:
-            df['day'] = [i.day for i in df['timestamp']]
-        if 'hour' in date_features:
-            df['hour'] = [i.day for i in df['timestamp']]
+        df[timestamp] = pd.to_datetime(df["timestamp"])
+        if "year" in date_features:
+            df["year"] = [i.year for i in df["timestamp"]]
+        if "month" in date_features:
+            df["month"] = [i.month for i in df["timestamp"]]
+        if "day_of_week" in date_features:
+            df["day_of_week"] = [i.dayofweek for i in df["timestamp"]]
+        if "day_of_year" in date_features:
+            df["day_of_year"] = [i.dayofyear for i in df["timestamp"]]
+        if "day" in date_features:
+            df["day"] = [i.day for i in df["timestamp"]]
+        if "hour" in date_features:
+            df["hour"] = [i.day for i in df["timestamp"]]
 
         return df
 
     def fit(self, date_features: Optional[list] = []) -> None:
         """
         Fit the model using the provided data.
-        
+
         :param date_features: A list of 'date_features' to take into account when fitting the model.
         :type data: list
         """
-        self.logger.info("Performing a csv model fit for "+self.model_type)
+        self.logger.info("Performing a csv model fit for " + self.model_type)
         self.data_exo = pd.DataFrame(self.data)
-        self.data_exo[self.independent_variables] = self.data[self.independent_variables]
-        self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
+        self.data_exo[self.features] = self.data[self.features]
+        self.data_exo[self.target] = self.data[self.target]
         keep_columns = []
-        keep_columns.extend(self.independent_variables)
+        keep_columns.extend(self.features)
         if self.timestamp is not None:
             keep_columns.append(self.timestamp)
-        keep_columns.append(self.dependent_variable)
+        keep_columns.append(self.target)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
         self.data_exo.reset_index(drop=True, inplace=True)
         if len(date_features) > 0:
             if self.timestamp is not None:
-                self.data_exo = MLRegressor.add_date_features(self.data_exo, date_features, self.timestamp)
+                self.data_exo = MLRegressor.add_date_features(
+                    self.data_exo, date_features, self.timestamp
+                )
             else:
-                self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
+                self.logger.error(
+                    "If no timestamp provided, you can't use date_features, going further without date_features."
+                )
 
-        y = self.data_exo[self.dependent_variable]
-        self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1)
+        y = self.data_exo[self.target]
+        self.data_exo = self.data_exo.drop(self.target, axis=1)
         if self.timestamp is not None:
-            self.data_exo = self.data_exo.drop(self.timestamp,axis=1)
+            self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
         X = self.data_exo
 
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=0.2, random_state=42
+        )
         self.steps = len(X_test)
 
         regression_methods = {
-            'LinearRegression': {"model": LinearRegression(), "param_grid": {
-                'linearregression__fit_intercept': [True, False],
-                'linearregression__positive': [True, False],
-            }},
-            'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}},
-            'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}},
-            'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}},
-            'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": {
-                'gradientboostingregressor__n_estimators': [50, 100, 200],
-                'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-            }},
-            'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": {
-                'adaboostregressor__n_estimators': [50, 100, 200],
-                'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-            }}
+            "LinearRegression": {
+                "model": LinearRegression(),
+                "param_grid": {
+                    "linearregression__fit_intercept": [True, False],
+                    "linearregression__positive": [True, False],
+                },
+            },
+            "RidgeRegression": {
+                "model": Ridge(),
+                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
+            },
+            "LassoRegression": {
+                "model": Lasso(),
+                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
+            },
+            "RandomForestRegression": {
+                "model": RandomForestRegressor(),
+                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
+            },
+            "GradientBoostingRegression": {
+                "model": GradientBoostingRegressor(),
+                "param_grid": {
+                    "gradientboostingregressor__n_estimators": [50, 100, 200],
+                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
+            "AdaBoostRegression": {
+                "model": AdaBoostRegressor(),
+                "param_grid": {
+                    "adaboostregressor__n_estimators": [50, 100, 200],
+                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
         }
-        # regression_methods = [
-        #     ('LinearRegression', LinearRegression(), {
-        #         'linearregression__fit_intercept': [True, False],
-        #         'linearregression__positive': [True, False],
-        #     }),
-        #     ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
-        #     ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
-        #     ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
-        #     ('GradientBoostingRegression', GradientBoostingRegressor(), {
-        #         'gradientboostingregressor__n_estimators': [50, 100, 200],
-        #         'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-        #     }),
-        #     ('AdaBoostRegression', AdaBoostRegressor(), {
-        #         'adaboostregressor__n_estimators': [50, 100, 200],
-        #         'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-        #     })
-        # ]
-
-        if self.sklearn_model == 'LinearRegression':
-            base_model = regression_methods['LinearRegression']['model']
-            param_grid = regression_methods['LinearRegression']['param_grid']
-        elif self.sklearn_model == 'RidgeRegression':
-            base_model = regression_methods['RidgeRegression']['model']
-            param_grid = regression_methods['RidgeRegression']['param_grid']
-        elif self.sklearn_model == 'LassoRegression':
-            base_model = regression_methods['LassoRegression']['model']
-            param_grid = regression_methods['LassoRegression']['param_grid']
-        elif self.sklearn_model == 'RandomForestRegression':
-            base_model = regression_methods['RandomForestRegression']['model']
-            param_grid = regression_methods['RandomForestRegression']['param_grid']
-        elif self.sklearn_model == 'GradientBoostingRegression':
-            base_model = regression_methods['GradientBoostingRegression']['model']
-            param_grid = regression_methods['GradientBoostingRegression']['param_grid']
-        elif self.sklearn_model == 'AdaBoostRegression':
-            base_model = regression_methods['AdaBoostRegression']['model']
-            param_grid = regression_methods['AdaBoostRegression']['param_grid']
+
+        if self.sklearn_model == "LinearRegression":
+            base_model = regression_methods["LinearRegression"]["model"]
+            param_grid = regression_methods["LinearRegression"]["param_grid"]
+        elif self.sklearn_model == "RidgeRegression":
+            base_model = regression_methods["RidgeRegression"]["model"]
+            param_grid = regression_methods["RidgeRegression"]["param_grid"]
+        elif self.sklearn_model == "LassoRegression":
+            base_model = regression_methods["LassoRegression"]["model"]
+            param_grid = regression_methods["LassoRegression"]["param_grid"]
+        elif self.sklearn_model == "RandomForestRegression":
+            base_model = regression_methods["RandomForestRegression"]["model"]
+            param_grid = regression_methods["RandomForestRegression"]["param_grid"]
+        elif self.sklearn_model == "GradientBoostingRegression":
+            base_model = regression_methods["GradientBoostingRegression"]["model"]
+            param_grid = regression_methods["GradientBoostingRegression"]["param_grid"]
+        elif self.sklearn_model == "AdaBoostRegression":
+            base_model = regression_methods["AdaBoostRegression"]["model"]
+            param_grid = regression_methods["AdaBoostRegression"]["param_grid"]
         else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-
-
-        # Define the models
-        # for name, model, param_grid in regression_methods:
-        #     self.model = make_pipeline(
-        #         StandardScaler(),
-        #         model
-        #     )
-        #     # self.model = Pipeline([
-        #     #     ('scaler', StandardScaler()),
-        #     #     (name, model)
-        #     # ])
-            
-        #     # Use GridSearchCV to find the best hyperparameters for each model
-        #     grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        #     grid_search.fit(X_train, y_train)
-
-        #     # Get the best model and print its mean squared error on the test set
-        #     best_model = grid_search.best_estimator_
-        #     print(best_model)
-        #     predictions = best_model.predict(X_test)
-        #     print(predictions)
-
-        self.model = make_pipeline(
-            StandardScaler(),
-            base_model
-        )
-        # self.model = Pipeline([
-        #     ('scaler', StandardScaler()),
-        #     ('regressor', base_model)
-        # ])
-        # Define the parameters to tune
-        # param_grid = {
-        #     'regressor__fit_intercept': [True, False],
-        #     'regressor__positive': [True, False],
-        # }
+            self.logger.error(
+                "Passed sklearn model " + self.sklearn_model + " is not valid"
+            )
+
+        self.model = make_pipeline(StandardScaler(), base_model)
 
         # Create a grid search object
-        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1)
-        
+        self.grid_search = GridSearchCV(
+            self.model,
+            param_grid,
+            cv=5,
+            scoring="neg_mean_squared_error",
+            refit=True,
+            verbose=0,
+            n_jobs=-1,
+        )
+
         # Fit the grid search object to the data
-        self.logger.info("Training a "+self.sklearn_model+" model")
+        self.logger.info("Training a " + self.sklearn_model + " model")
         start_time = time.time()
         self.grid_search.fit(X_train.values, y_train.values)
-        print("Best value for lambda : ",self.grid_search.best_params_)
+        print("Best value for lambda : ", self.grid_search.best_params_)
         print("Best score for cost function: ", self.grid_search.best_score_)
         self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
 
         self.model = self.grid_search.best_estimator_
 
-
         # Make predictions
         predictions = self.model.predict(X_test.values)
         predictions = pd.Series(predictions, index=X_test.index)
-        pred_metric = r2_score(y_test,predictions)
-        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
-        
+        pred_metric = r2_score(y_test, predictions)
+        self.logger.info(
+            f"Prediction R2 score of fitted model on test data: {pred_metric}"
+        )
 
-    def predict(self, new_values:list) -> np.ndarray:
+    def predict(self, new_values: list) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
 
-        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
+        :param new_values: The new values for the features(in the same order as the features list). \
             Example: [2.24, 5.68].
         :type new_values: list
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """
-        self.logger.info("Performing a prediction for "+self.model_type)
+        self.logger.info("Performing a prediction for " + self.model_type)
         new_values = np.array([new_values])
 
         return self.model.predict(new_values)
diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py
index 52397c87..4fb909b1 100644
--- a/src/emhass/retrieve_hass.py
+++ b/src/emhass/retrieve_hass.py
@@ -35,7 +35,7 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta,
                  get_data_from_file: Optional[bool] = False) -> None:
         """
         Define constructor for RetrieveHass class.
-        
+
         :param hass_url: The URL of the Home Assistant instance
         :type hass_url: str
         :param long_lived_token: The long lived token retrieved from the configuration pane
@@ -50,7 +50,7 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta,
         :type emhass_conf: dict
         :param logger: The passed logger object
         :type logger: logging object
-        :param get_data_from_file: Select if data should be retrieved from a 
+        :param get_data_from_file: Select if data should be retrieved from a
         previously saved pickle useful for testing or directly from connection to
         hass database
         :type get_data_from_file: bool, optional
@@ -65,9 +65,14 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta,
         self.logger = logger
         self.get_data_from_file = get_data_from_file
 
-    def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: Optional[bool] = False,
-                 significant_changes_only: Optional[bool] = False, 
-                 test_url: Optional[str] = 'empty') -> None:
+    def get_data(
+        self,
+        days_list: pd.date_range,
+        var_list: list,
+        minimal_response: Optional[bool] = False,
+        significant_changes_only: Optional[bool] = False,
+        test_url: Optional[str] = "empty",
+    ) -> None:
         r"""
         Retrieve the actual data from hass.
         
@@ -92,20 +97,36 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O
         """
         self.logger.info("Retrieve hass get data method initiated...")
         self.df_final = pd.DataFrame()
-        x = 0 #iterate based on days
+        x = 0  # iterate based on days
         # Looping on each day from days list
         for day in days_list:
-        
+
             for i, var in enumerate(var_list):
-                
-                if test_url == 'empty':
-                    if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API
-                        url = self.hass_url+"/history/period/"+day.isoformat()+"?filter_entity_id="+var
-                    else: # Otherwise the Home Assistant Core API it is
-                        url = self.hass_url+"api/history/period/"+day.isoformat()+"?filter_entity_id="+var
-                    if minimal_response: # A support for minimal response
+
+                if test_url == "empty":
+                    if (
+                        self.hass_url == "http://supervisor/core/api"
+                    ):  # If we are using the supervisor API
+                        url = (
+                            self.hass_url
+                            + "/history/period/"
+                            + day.isoformat()
+                            + "?filter_entity_id="
+                            + var
+                        )
+                    else:  # Otherwise the Home Assistant Core API it is
+                        url = (
+                            self.hass_url
+                            + "api/history/period/"
+                            + day.isoformat()
+                            + "?filter_entity_id="
+                            + var
+                        )
+                    if minimal_response:  # A support for minimal response
                         url = url + "?minimal_response"
-                    if significant_changes_only: # And for signicant changes only (check the HASS restful API for more info)
+                    if (
+                        significant_changes_only
+                    ):  # And for signicant changes only (check the HASS restful API for more info)
                         url = url + "?significant_changes_only"
                 else:
                     url = test_url
@@ -116,21 +137,29 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O
                 try:
                     response = get(url, headers=headers)
                 except Exception:
-                    self.logger.error("Unable to access Home Assistance instance, check URL")
-                    self.logger.error("If using addon, try setting url and token to 'empty'")
+                    self.logger.error(
+                        "Unable to access Home Assistance instance, check URL"
+                    )
+                    self.logger.error(
+                        "If using addon, try setting url and token to 'empty'"
+                    )
                     return False
                 else:
                     if response.status_code == 401:
-                        self.logger.error("Unable to access Home Assistance instance, TOKEN/KEY")
-                        self.logger.error("If using addon, try setting url and token to 'empty'")
+                        self.logger.error(
+                            "Unable to access Home Assistance instance, TOKEN/KEY"
+                        )
+                        self.logger.error(
+                            "If using addon, try setting url and token to 'empty'"
+                        )
                         return False
                     if response.status_code > 299:
                         return f"Request Get Error: {response.status_code}"
-                '''import bz2 # Uncomment to save a serialized data for tests
+                """import bz2 # Uncomment to save a serialized data for tests
                 import _pickle as cPickle
                 with bz2.BZ2File("data/test_response_get_data_get_method.pbz2", "w") as f: 
-                    cPickle.dump(response, f)'''
-                try: # Sometimes when there are connection problems we need to catch empty retrieved json
+                    cPickle.dump(response, f)"""
+                try:  # Sometimes when there are connection problems we need to catch empty retrieved json
                     data = response.json()[0]
                 except IndexError:
                     if x == 0:
@@ -141,8 +170,12 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O
                 df_raw = pd.DataFrame.from_dict(data)
                 # self.logger.info(str(df_raw))
                 if len(df_raw) == 0:
-                    if x == 0:
-                        self.logger.error("The retrieved Dataframe is empty, A sensor:" + var + " may have 0 days of history or passed sensor may not be correct")
+                    if x is 0:
+                        self.logger.error(
+                            "The retrieved Dataframe is empty, A sensor:"
+                            + var
+                            + " may have 0 days of history or passed sensor may not be correct"
+                        )
                     else:
                         self.logger.error("Retrieved empty Dataframe for day:"+ str(day) +", days_to_retrieve may be larger than the recorded history of sensor:" + var + " (check your recorder settings)")
                     return False
@@ -156,10 +189,17 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O
                                         format='%Y-%d-%m %H:%M').round(self.freq, ambiguous='infer', nonexistent='shift_forward')
                     df_day = pd.DataFrame(index = ts)
                 # Caution with undefined string data: unknown, unavailable, etc.
-                df_tp = df_raw.copy()[['state']].replace(
-                    ['unknown', 'unavailable', ''], np.nan).astype(float).rename(columns={'state': var})
+                df_tp = (
+                    df_raw.copy()[["state"]]
+                    .replace(["unknown", "unavailable", ""], np.nan)
+                    .astype(float)
+                    .rename(columns={"state": var})
+                )
                 # Setting index, resampling and concatenation
-                df_tp.set_index(pd.to_datetime(df_raw['last_changed'], format="ISO8601"), inplace=True)
+                df_tp.set_index(
+                    pd.to_datetime(df_raw["last_changed"], format="ISO8601"),
+                    inplace=True,
+                )
                 df_tp = df_tp.resample(self.freq).mean()
                 df_day = pd.concat([df_day, df_tp], axis=1)
             self.df_final = pd.concat([self.df_final, df_day], axis=0)
@@ -196,18 +236,24 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set
         
         """
         try:
-            if load_negative: # Apply the correct sign to load power
-                self.df_final[var_load+'_positive'] = -self.df_final[var_load]
+            if load_negative:  # Apply the correct sign to load power
+                self.df_final[var_load + "_positive"] = -self.df_final[var_load]
             else:
-                self.df_final[var_load+'_positive'] = self.df_final[var_load]
+                self.df_final[var_load + "_positive"] = self.df_final[var_load]
             self.df_final.drop([var_load], inplace=True, axis=1)
         except KeyError:
-            self.logger.error("Variable "+var_load+" was not found. This is typically because no data could be retrieved from Home Assistant")
+            self.logger.error(
+                "Variable "
+                + var_load
+                + " was not found. This is typically because no data could be retrieved from Home Assistant"
+            )
             return False
         except ValueError:
-            self.logger.error("sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same")
-            return False   
-        if set_zero_min: # Apply minimum values
+            self.logger.error(
+                "sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same"
+            )
+            return False
+        if set_zero_min:  # Apply minimum values
             self.df_final.clip(lower=0.0, inplace=True, axis=1)
             self.df_final.replace(to_replace=0.0, value=np.nan, inplace=True)
         new_var_replace_zero = []
@@ -215,59 +261,74 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set
         # Just changing the names of variables to contain the fact that they are considered positive
         if var_replace_zero is not None:
             for string in var_replace_zero:
-                new_string = string.replace(var_load, var_load+'_positive')
+                new_string = string.replace(var_load, var_load + "_positive")
                 new_var_replace_zero.append(new_string)
         else:
             new_var_replace_zero = None
         if var_interp is not None:
             for string in var_interp:
-                new_string = string.replace(var_load, var_load+'_positive')
+                new_string = string.replace(var_load, var_load + "_positive")
                 new_var_interp.append(new_string)
         else:
             new_var_interp = None
         # Treating NaN replacement: either by zeros or by linear interpolation
         if new_var_replace_zero is not None:
-            self.df_final[new_var_replace_zero] = self.df_final[new_var_replace_zero].fillna(0.0)
+            self.df_final[new_var_replace_zero] = self.df_final[
+                new_var_replace_zero
+            ].fillna(0.0)
         if new_var_interp is not None:
             self.df_final[new_var_interp] = self.df_final[new_var_interp].interpolate(
-                method='linear', axis=0, limit=None)
+                method="linear", axis=0, limit=None
+            )
             self.df_final[new_var_interp] = self.df_final[new_var_interp].fillna(0.0)
         # Setting the correct time zone on DF index
         if self.time_zone is not None:
             self.df_final.index = self.df_final.index.tz_convert(self.time_zone)
         # Drop datetimeindex duplicates on final DF
-        self.df_final = self.df_final[~self.df_final.index.duplicated(keep='first')]
+        self.df_final = self.df_final[~self.df_final.index.duplicated(keep="first")]
         return True
-    
+
     @staticmethod
-    def get_attr_data_dict(data_df: pd.DataFrame, idx: int, entity_id: str, 
-                           unit_of_measurement: str, friendly_name: str, 
-                           list_name: str, state: float) -> dict:
-        list_df = copy.deepcopy(data_df).loc[data_df.index[idx]:].reset_index()
-        list_df.columns = ['timestamps', entity_id]
-        ts_list = [str(i) for i in list_df['timestamps'].tolist()]
-        vals_list = [str(np.round(i,2)) for i in list_df[entity_id].tolist()]
+    def get_attr_data_dict(
+        data_df: pd.DataFrame,
+        idx: int,
+        entity_id: str,
+        unit_of_measurement: str,
+        friendly_name: str,
+        list_name: str,
+        state: float,
+    ) -> dict:
+        list_df = copy.deepcopy(data_df).loc[data_df.index[idx] :].reset_index()
+        list_df.columns = ["timestamps", entity_id]
+        ts_list = [str(i) for i in list_df["timestamps"].tolist()]
+        vals_list = [str(np.round(i, 2)) for i in list_df[entity_id].tolist()]
         forecast_list = []
         for i, ts in enumerate(ts_list):
             datum = {}
             datum["date"] = ts
-            datum[entity_id.split('sensor.')[1]] = vals_list[i]
+            datum[entity_id.split("sensor.")[1]] = vals_list[i]
             forecast_list.append(datum)
         data = {
             "state": "{:.2f}".format(state),
             "attributes": {
                 "unit_of_measurement": unit_of_measurement,
                 "friendly_name": friendly_name,
-                list_name: forecast_list
-            }
+                list_name: forecast_list,
+            },
         }
         return data
-    
-    def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, 
-                  unit_of_measurement: str, friendly_name: str,
-                  type_var: str,
-                  from_mlforecaster: Optional[bool]=False,
-                  publish_prefix: Optional[str]="") -> None:
+
+    def post_data(
+        self,
+        data_df: pd.DataFrame,
+        idx: int,
+        entity_id: str,
+        unit_of_measurement: str,
+        friendly_name: str,
+        type_var: str,
+        from_mlforecaster: Optional[bool] = False,
+        publish_prefix: Optional[str] = "",
+    ) -> None:
         r"""
         Post passed data to hass.
         
@@ -290,82 +351,139 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str,
 
         """
         # Add a possible prefix to the entity ID
-        entity_id = entity_id.replace('sensor.', 'sensor.'+publish_prefix)
+        entity_id = entity_id.replace("sensor.", "sensor." + publish_prefix)
         # Set the URL
-        if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API
-            url = self.hass_url+"/states/"+entity_id
-        else: # Otherwise the Home Assistant Core API it is
-            url = self.hass_url+"api/states/"+entity_id
+        if (
+            self.hass_url == "http://supervisor/core/api"
+        ):  # If we are using the supervisor API
+            url = self.hass_url + "/states/" + entity_id
+        else:  # Otherwise the Home Assistant Core API it is
+            url = self.hass_url + "api/states/" + entity_id
         headers = {
             "Authorization": "Bearer " + self.long_lived_token,
             "content-type": "application/json",
         }
         # Preparing the data dict to be published
-        if type_var == 'cost_fun':
-            state = np.round(data_df.sum()[0],2)
-        elif type_var == 'unit_load_cost' or type_var == 'unit_prod_price':
-            state = np.round(data_df.loc[data_df.index[idx]],4)
-        elif type_var == 'optim_status':
+        if type_var == "cost_fun":
+            state = np.round(data_df.sum()[0], 2)
+        elif type_var == "unit_load_cost" or type_var == "unit_prod_price":
+            state = np.round(data_df.loc[data_df.index[idx]], 4)
+        elif type_var == "optim_status":
             state = data_df.loc[data_df.index[idx]]
-        elif type_var == 'csv_predictor':
+        elif type_var == "mlregressor":
             state = data_df[idx]
         else:
-            state = np.round(data_df.loc[data_df.index[idx]],2)
-        if type_var == 'power':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "forecasts", state)
-        elif type_var == 'deferrable':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "deferrables_schedule", state)
-        elif type_var == 'batt':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "battery_scheduled_power", state)
-        elif type_var == 'SOC':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "battery_scheduled_soc", state)
-        elif type_var == 'unit_load_cost':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "unit_load_cost_forecasts", state)
-        elif type_var == 'unit_prod_price':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "unit_prod_price_forecasts", state)
-        elif type_var == 'mlforecaster':
-            data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, 
-                                                    friendly_name, "scheduled_forecast", state)
-        elif type_var == 'optim_status':
+            state = np.round(data_df.loc[data_df.index[idx]], 2)
+        if type_var == "power":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "forecasts",
+                state,
+            )
+        elif type_var == "deferrable":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "deferrables_schedule",
+                state,
+            )
+        elif type_var == "batt":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "battery_scheduled_power",
+                state,
+            )
+        elif type_var == "SOC":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "battery_scheduled_soc",
+                state,
+            )
+        elif type_var == "unit_load_cost":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "unit_load_cost_forecasts",
+                state,
+            )
+        elif type_var == "unit_prod_price":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "unit_prod_price_forecasts",
+                state,
+            )
+        elif type_var == "mlforecaster":
+            data = RetrieveHass.get_attr_data_dict(
+                data_df,
+                idx,
+                entity_id,
+                unit_of_measurement,
+                friendly_name,
+                "scheduled_forecast",
+                state,
+            )
+        elif type_var == "optim_status":
             data = {
                 "state": state,
                 "attributes": {
                     "unit_of_measurement": unit_of_measurement,
-                    "friendly_name": friendly_name
-                }
+                    "friendly_name": friendly_name,
+                },
             }
-        elif type_var == 'csv_predictor':
+        elif type_var == "mlregressor":
             data = {
                 "state": state,
                 "attributes": {
                     "unit_of_measurement": unit_of_measurement,
-                    "friendly_name": friendly_name
-                }
+                    "friendly_name": friendly_name,
+                },
             }
         else:
             data = {
                 "state": "{:.2f}".format(state),
                 "attributes": {
                     "unit_of_measurement": unit_of_measurement,
-                    "friendly_name": friendly_name
-                }
+                    "friendly_name": friendly_name,
+                },
             }
         # Actually post the data
         if self.get_data_from_file:
-            class response: pass
+
+            class response:
+                pass
+
             response.status_code = 200
             response.ok = True
         else:
             response = post(url, headers=headers, data=json.dumps(data))
         # Treating the response status and posting them on the logger
         if response.ok:
-            self.logger.info("Successfully posted to "+entity_id+" = "+str(state))
+            self.logger.info("Successfully posted to " + entity_id + " = " + str(state))
         else:
-            self.logger.info("The status code for received curl command response is: "+str(response.status_code))
+            self.logger.info(
+                "The status code for received curl command response is: "
+                + str(response.status_code)
+            )
         return response, data
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 38a4e424..6d953ae6 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -2,10 +2,19 @@
 # -*- coding: utf-8 -*-
 
 from typing import Tuple, Optional
-import numpy as np, pandas as pd
-import yaml, pytz, logging, pathlib, json, copy
 from datetime import datetime, timedelta, timezone
+import logging
+import pathlib
+import json
+import copy
+import numpy as np
+import pandas as pd
+import yaml
+import pytz
+
+
 import plotly.express as px
+
 pd.options.plotting.backend = "plotly"
 
 from emhass.machine_learning_forecaster import MLForecaster
@@ -14,13 +23,13 @@
 def get_root(file: str, num_parent: Optional[int] = 3) -> str:
     """
     Get the root absolute path of the working directory.
-    
+
     :param file: The passed file path with __file__
     :return: The root path
     :param num_parent: The number of parents levels up to desired root folder
     :type num_parent: int, optional
     :rtype: str
-    
+
     """
     if num_parent == 3:
         root = pathlib.Path(file).resolve().parent.parent.parent
@@ -36,7 +45,7 @@ def get_logger(fun_name: str, emhass_conf: dict, save_to_file: Optional[bool] =
                logging_level: Optional[str] = "DEBUG") -> Tuple[logging.Logger, logging.StreamHandler]:
     """
     Create a simple logger object.
-    
+
     :param fun_name: The Python function object name where the logger will be used
     :type fun_name: str
     :param emhass_conf: Dictionary containing the needed emhass paths
@@ -45,9 +54,9 @@ def get_logger(fun_name: str, emhass_conf: dict, save_to_file: Optional[bool] =
     :type save_to_file: bool, optional
     :return: The logger object and the handler
     :rtype: object
-    
+
     """
-	# create logger object
+    # create logger object
     logger = logging.getLogger(fun_name)
     logger.propagate = True
     logger.fileSetting = save_to_file
@@ -70,14 +79,18 @@ def get_logger(fun_name: str, emhass_conf: dict, save_to_file: Optional[bool] =
     else:
         logger.setLevel(logging.DEBUG)
         ch.setLevel(logging.DEBUG)
-    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
     ch.setFormatter(formatter)
     logger.addHandler(ch)
 
     return logger, ch
 
-def get_forecast_dates(freq: int, delta_forecast: int, 
-                       timedelta_days: Optional[int] = 0) -> pd.core.indexes.datetimes.DatetimeIndex:
+
+def get_forecast_dates(
+    freq: int, delta_forecast: int, timedelta_days: Optional[int] = 0
+) -> pd.core.indexes.datetimes.DatetimeIndex:
     """
     Get the date_range list of the needed future dates using the delta_forecast parameter.
 
@@ -89,7 +102,7 @@ def get_forecast_dates(freq: int, delta_forecast: int,
     :type timedelta_days: Optional[int], optional
     :return: A list of future forecast dates.
     :rtype: pd.core.indexes.datetimes.DatetimeIndex
-    
+
     """
     freq = pd.to_timedelta(freq, "minutes")
     start_forecast = pd.Timestamp(datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0)
@@ -99,11 +112,19 @@ def get_forecast_dates(freq: int, delta_forecast: int,
         freq=freq).round(freq, ambiguous='infer', nonexistent='shift_forward')
     return forecast_dates
 
-def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict,
-                        set_type: str, logger: logging.Logger) -> Tuple[str, dict]:
+
+def treat_runtimeparams(
+    runtimeparams: str,
+    params: str,
+    retrieve_hass_conf: dict,
+    optim_conf: dict,
+    plant_conf: dict,
+    set_type: str,
+    logger: logging.Logger,
+) -> Tuple[str, dict]:
     """
-    Treat the passed optimization runtime parameters. 
-    
+    Treat the passed optimization runtime parameters.
+
     :param runtimeparams: Json string containing the runtime parameters dict.
     :type runtimeparams: str
     :param params: Configuration parameters passed from data/options.json
@@ -120,115 +141,155 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
     :type logger: logging.Logger
     :return: Returning the params and optimization parameter container.
     :rtype: Tuple[str, dict]
-    
+
     """
-    if (params != None) and (params != 'null'):
+    if (params != None) and (params != "null"):
         params = json.loads(params)
     else:
         params = {}
     # Some default data needed
     custom_deferrable_forecast_id = []
-    for k in range(optim_conf['num_def_loads']):
-        custom_deferrable_forecast_id.append({
-            "entity_id": "sensor.p_deferrable{}".format(k), 
-            "unit_of_measurement": "W", 
-            "friendly_name": "Deferrable Load {}".format(k)
-        })
-    default_passed_dict = {'custom_pv_forecast_id': {"entity_id": "sensor.p_pv_forecast", "unit_of_measurement": "W", "friendly_name": "PV Power Forecast"},
-                           'custom_load_forecast_id': {"entity_id": "sensor.p_load_forecast", "unit_of_measurement": "W", "friendly_name": "Load Power Forecast"},
-                           'custom_batt_forecast_id': {"entity_id": "sensor.p_batt_forecast", "unit_of_measurement": "W", "friendly_name": "Battery Power Forecast"},
-                           'custom_batt_soc_forecast_id': {"entity_id": "sensor.soc_batt_forecast", "unit_of_measurement": "%", "friendly_name": "Battery SOC Forecast"},
-                           'custom_grid_forecast_id': {"entity_id": "sensor.p_grid_forecast", "unit_of_measurement": "W", "friendly_name": "Grid Power Forecast"},
-                           'custom_cost_fun_id': {"entity_id": "sensor.total_cost_fun_value", "unit_of_measurement": "", "friendly_name": "Total cost function value"},
-                           'custom_optim_status_id': {"entity_id": "sensor.optim_status", "unit_of_measurement": "", "friendly_name": "EMHASS optimization status"},
-                           'custom_unit_load_cost_id': {"entity_id": "sensor.unit_load_cost", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Load Cost"},
-                           'custom_unit_prod_price_id': {"entity_id": "sensor.unit_prod_price", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Prod Price"},
-                           'custom_deferrable_forecast_id': custom_deferrable_forecast_id,
-                           'publish_prefix': ""}
-    if 'passed_data' in params.keys():
+    for k in range(optim_conf["num_def_loads"]):
+        custom_deferrable_forecast_id.append(
+            {
+                "entity_id": "sensor.p_deferrable{}".format(k),
+                "unit_of_measurement": "W",
+                "friendly_name": "Deferrable Load {}".format(k),
+            }
+        )
+    default_passed_dict = {
+        "custom_pv_forecast_id": {
+            "entity_id": "sensor.p_pv_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "PV Power Forecast",
+        },
+        "custom_load_forecast_id": {
+            "entity_id": "sensor.p_load_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "Load Power Forecast",
+        },
+        "custom_batt_forecast_id": {
+            "entity_id": "sensor.p_batt_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "Battery Power Forecast",
+        },
+        "custom_batt_soc_forecast_id": {
+            "entity_id": "sensor.soc_batt_forecast",
+            "unit_of_measurement": "%",
+            "friendly_name": "Battery SOC Forecast",
+        },
+        "custom_grid_forecast_id": {
+            "entity_id": "sensor.p_grid_forecast",
+            "unit_of_measurement": "W",
+            "friendly_name": "Grid Power Forecast",
+        },
+        "custom_cost_fun_id": {
+            "entity_id": "sensor.total_cost_fun_value",
+            "unit_of_measurement": "",
+            "friendly_name": "Total cost function value",
+        },
+        "custom_optim_status_id": {
+            "entity_id": "sensor.optim_status",
+            "unit_of_measurement": "",
+            "friendly_name": "EMHASS optimization status",
+        },
+        "custom_unit_load_cost_id": {
+            "entity_id": "sensor.unit_load_cost",
+            "unit_of_measurement": "€/kWh",
+            "friendly_name": "Unit Load Cost",
+        },
+        "custom_unit_prod_price_id": {
+            "entity_id": "sensor.unit_prod_price",
+            "unit_of_measurement": "€/kWh",
+            "friendly_name": "Unit Prod Price",
+        },
+        "custom_deferrable_forecast_id": custom_deferrable_forecast_id,
+        "publish_prefix": "",
+    }
+    if "passed_data" in params.keys():
         for key, value in default_passed_dict.items():
-            params['passed_data'][key] = value
+            params["passed_data"][key] = value
     else:
-        params['passed_data'] = default_passed_dict
+        params["passed_data"] = default_passed_dict
     if runtimeparams is not None:
         runtimeparams = json.loads(runtimeparams)
-        freq = int(retrieve_hass_conf['freq'].seconds/60.0)
-        delta_forecast = int(optim_conf['delta_forecast'].days)
+        freq = int(retrieve_hass_conf["freq"].seconds / 60.0)
+        delta_forecast = int(optim_conf["delta_forecast"].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
         if set_type == "regressor-model-fit":
-            csv_file = runtimeparams['csv_file']
-            independent_variables = runtimeparams['independent_variables']
-            dependent_variable = runtimeparams['dependent_variable']
-            params['passed_data']['csv_file'] = csv_file
-            params['passed_data']['independent_variables'] = independent_variables
-            params['passed_data']['dependent_variable'] = dependent_variable
-            if 'timestamp' not in runtimeparams.keys():
-                params['passed_data']['timestamp'] = None
+            csv_file = runtimeparams["csv_file"]
+            features = runtimeparams["features"]
+            target = runtimeparams["target"]
+            params["passed_data"]["csv_file"] = csv_file
+            params["passed_data"]["features"] = features
+            params["passed_data"]["target"] = target
+            if "timestamp" not in runtimeparams.keys():
+                params["passed_data"]["timestamp"] = None
             else:
-                timestamp = runtimeparams['timestamp']
-                params['passed_data']['timestamp'] = timestamp
-            if 'date_features' not in runtimeparams.keys():
-                params['passed_data']['date_features'] = []
+                timestamp = runtimeparams["timestamp"]
+                params["passed_data"]["timestamp"] = timestamp
+            if "date_features" not in runtimeparams.keys():
+                params["passed_data"]["date_features"] = []
             else:
-                date_features = runtimeparams['date_features']
-                params['passed_data']['date_features'] = date_features
-            
+                date_features = runtimeparams["date_features"]
+                params["passed_data"]["date_features"] = date_features
+
         if set_type == "regressor-model-predict":
-            new_values = runtimeparams['new_values']
-            params['passed_data']['new_values'] = new_values
+            new_values = runtimeparams["new_values"]
+            params["passed_data"]["new_values"] = new_values
 
         # Treating special data passed for MPC control case
-        if set_type == 'naive-mpc-optim':
-            if 'prediction_horizon' not in runtimeparams.keys():
-                prediction_horizon = 10 # 10 time steps by default
+        if set_type == "naive-mpc-optim":
+            if "prediction_horizon" not in runtimeparams.keys():
+                prediction_horizon = 10  # 10 time steps by default
             else:
-                prediction_horizon = runtimeparams['prediction_horizon']
-            params['passed_data']['prediction_horizon'] = prediction_horizon
-            if 'soc_init' not in runtimeparams.keys():
-                soc_init = plant_conf['SOCtarget']
+                prediction_horizon = runtimeparams["prediction_horizon"]
+            params["passed_data"]["prediction_horizon"] = prediction_horizon
+            if "soc_init" not in runtimeparams.keys():
+                soc_init = plant_conf["SOCtarget"]
             else:
-                soc_init = runtimeparams['soc_init']
-            params['passed_data']['soc_init'] = soc_init
-            if 'soc_final' not in runtimeparams.keys():
-                soc_final = plant_conf['SOCtarget']
+                soc_init = runtimeparams["soc_init"]
+            params["passed_data"]["soc_init"] = soc_init
+            if "soc_final" not in runtimeparams.keys():
+                soc_final = plant_conf["SOCtarget"]
             else:
-                soc_final = runtimeparams['soc_final']
-            params['passed_data']['soc_final'] = soc_final
-            if 'def_total_hours' not in runtimeparams.keys():
-                def_total_hours = optim_conf['def_total_hours']
+                soc_final = runtimeparams["soc_final"]
+            params["passed_data"]["soc_final"] = soc_final
+            if "def_total_hours" not in runtimeparams.keys():
+                def_total_hours = optim_conf["def_total_hours"]
             else:
-                def_total_hours = runtimeparams['def_total_hours']
-            params['passed_data']['def_total_hours'] = def_total_hours
-            if 'def_start_timestep' not in runtimeparams.keys():
-                def_start_timestep = optim_conf['def_start_timestep']
+                def_total_hours = runtimeparams["def_total_hours"]
+            params["passed_data"]["def_total_hours"] = def_total_hours
+            if "def_start_timestep" not in runtimeparams.keys():
+                def_start_timestep = optim_conf["def_start_timestep"]
             else:
-                def_start_timestep = runtimeparams['def_start_timestep']
-            params['passed_data']['def_start_timestep'] = def_start_timestep
-            if 'def_end_timestep' not in runtimeparams.keys():
-                def_end_timestep = optim_conf['def_end_timestep']
+                def_start_timestep = runtimeparams["def_start_timestep"]
+            params["passed_data"]["def_start_timestep"] = def_start_timestep
+            if "def_end_timestep" not in runtimeparams.keys():
+                def_end_timestep = optim_conf["def_end_timestep"]
             else:
-                def_end_timestep = runtimeparams['def_end_timestep']
-            params['passed_data']['def_end_timestep'] = def_end_timestep
-            if 'alpha' not in runtimeparams.keys():
+                def_end_timestep = runtimeparams["def_end_timestep"]
+            params["passed_data"]["def_end_timestep"] = def_end_timestep
+            if "alpha" not in runtimeparams.keys():
                 alpha = 0.5
             else:
-                alpha = runtimeparams['alpha']
-            params['passed_data']['alpha'] = alpha
-            if 'beta' not in runtimeparams.keys():
+                alpha = runtimeparams["alpha"]
+            params["passed_data"]["alpha"] = alpha
+            if "beta" not in runtimeparams.keys():
                 beta = 0.5
             else:
-                beta = runtimeparams['beta']
-            params['passed_data']['beta'] = beta
+                beta = runtimeparams["beta"]
+            params["passed_data"]["beta"] = beta
             forecast_dates = copy.deepcopy(forecast_dates)[0:prediction_horizon]
         else:
-            params['passed_data']['prediction_horizon'] = None
-            params['passed_data']['soc_init'] = None
-            params['passed_data']['soc_final'] = None
-            params['passed_data']['def_total_hours'] = None
-            params['passed_data']['def_start_timestep'] = None
-            params['passed_data']['def_end_timestep'] = None
-            params['passed_data']['alpha'] = None
-            params['passed_data']['beta'] = None
+            params["passed_data"]["prediction_horizon"] = None
+            params["passed_data"]["soc_init"] = None
+            params["passed_data"]["soc_final"] = None
+            params["passed_data"]["def_total_hours"] = None
+            params["passed_data"]["def_start_timestep"] = None
+            params["passed_data"]["def_end_timestep"] = None
+            params["passed_data"]["alpha"] = None
+            params["passed_data"]["beta"] = None
         # Treat passed forecast data lists
         list_forecast_key = ['pv_power_forecast', 'load_power_forecast', 'load_cost_forecast', 'prod_price_forecast']
         forecast_methods = ['weather_forecast_method', 'load_forecast_method', 'load_cost_forecast_method', 'prod_price_forecast_method']
@@ -248,134 +309,177 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic
             else:
                 params['passed_data'][forecast_key] = None
         # Treat passed data for forecast model fit/predict/tune at runtime
-        if 'days_to_retrieve' not in runtimeparams.keys():
+        if "days_to_retrieve" not in runtimeparams.keys():
             days_to_retrieve = 9
         else:
-            days_to_retrieve = runtimeparams['days_to_retrieve']
-        params['passed_data']['days_to_retrieve'] = days_to_retrieve
-        if 'model_type' not in runtimeparams.keys():
+            days_to_retrieve = runtimeparams["days_to_retrieve"]
+        params["passed_data"]["days_to_retrieve"] = days_to_retrieve
+        if "model_type" not in runtimeparams.keys():
             model_type = "load_forecast"
         else:
-            model_type = runtimeparams['model_type']
-        params['passed_data']['model_type'] = model_type
-        if 'var_model' not in runtimeparams.keys():
+            model_type = runtimeparams["model_type"]
+        params["passed_data"]["model_type"] = model_type
+        if "var_model" not in runtimeparams.keys():
             var_model = "sensor.power_load_no_var_loads"
         else:
-            var_model = runtimeparams['var_model']
-        params['passed_data']['var_model'] = var_model
-        if 'sklearn_model' not in runtimeparams.keys():
+            var_model = runtimeparams["var_model"]
+        params["passed_data"]["var_model"] = var_model
+        if "sklearn_model" not in runtimeparams.keys():
             sklearn_model = "KNeighborsRegressor"
         else:
-            sklearn_model = runtimeparams['sklearn_model']
-        params['passed_data']['sklearn_model'] = sklearn_model
-        if 'num_lags' not in runtimeparams.keys():
+            sklearn_model = runtimeparams["sklearn_model"]
+        params["passed_data"]["sklearn_model"] = sklearn_model
+        if "num_lags" not in runtimeparams.keys():
             num_lags = 48
         else:
-            num_lags = runtimeparams['num_lags']
-        params['passed_data']['num_lags'] = num_lags
-        if 'split_date_delta' not in runtimeparams.keys():
-            split_date_delta = '48h'
+            num_lags = runtimeparams["num_lags"]
+        params["passed_data"]["num_lags"] = num_lags
+        if "split_date_delta" not in runtimeparams.keys():
+            split_date_delta = "48h"
         else:
-            split_date_delta = runtimeparams['split_date_delta']
-        params['passed_data']['split_date_delta'] = split_date_delta
-        if 'perform_backtest' not in runtimeparams.keys():
+            split_date_delta = runtimeparams["split_date_delta"]
+        params["passed_data"]["split_date_delta"] = split_date_delta
+        if "perform_backtest" not in runtimeparams.keys():
             perform_backtest = False
         else:
-            perform_backtest = eval(str(runtimeparams['perform_backtest']).capitalize())
-        params['passed_data']['perform_backtest'] = perform_backtest
-        if 'model_predict_publish' not in runtimeparams.keys():
+            perform_backtest = eval(str(runtimeparams["perform_backtest"]).capitalize())
+        params["passed_data"]["perform_backtest"] = perform_backtest
+        if "model_predict_publish" not in runtimeparams.keys():
             model_predict_publish = False
         else:
-            model_predict_publish = eval(str(runtimeparams['model_predict_publish']).capitalize())
-        params['passed_data']['model_predict_publish'] = model_predict_publish
-        if 'model_predict_entity_id' not in runtimeparams.keys():
+            model_predict_publish = eval(
+                str(runtimeparams["model_predict_publish"]).capitalize()
+            )
+        params["passed_data"]["model_predict_publish"] = model_predict_publish
+        if "model_predict_entity_id" not in runtimeparams.keys():
             model_predict_entity_id = "sensor.p_load_forecast_custom_model"
         else:
-            model_predict_entity_id = runtimeparams['model_predict_entity_id']
-        params['passed_data']['model_predict_entity_id'] = model_predict_entity_id
-        if 'model_predict_unit_of_measurement' not in runtimeparams.keys():
+            model_predict_entity_id = runtimeparams["model_predict_entity_id"]
+        params["passed_data"]["model_predict_entity_id"] = model_predict_entity_id
+        if "model_predict_unit_of_measurement" not in runtimeparams.keys():
             model_predict_unit_of_measurement = "W"
         else:
-            model_predict_unit_of_measurement = runtimeparams['model_predict_unit_of_measurement']
-        params['passed_data']['model_predict_unit_of_measurement'] = model_predict_unit_of_measurement
-        if 'model_predict_friendly_name' not in runtimeparams.keys():
+            model_predict_unit_of_measurement = runtimeparams[
+                "model_predict_unit_of_measurement"
+            ]
+        params["passed_data"][
+            "model_predict_unit_of_measurement"
+        ] = model_predict_unit_of_measurement
+        if "model_predict_friendly_name" not in runtimeparams.keys():
             model_predict_friendly_name = "Load Power Forecast custom ML model"
         else:
-            model_predict_friendly_name = runtimeparams['model_predict_friendly_name']
-        params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name
-        if 'mlr_predict_entity_id' not in runtimeparams.keys():
+            model_predict_friendly_name = runtimeparams["model_predict_friendly_name"]
+        params["passed_data"][
+            "model_predict_friendly_name"
+        ] = model_predict_friendly_name
+        if "mlr_predict_entity_id" not in runtimeparams.keys():
             mlr_predict_entity_id = "sensor.mlr_predict"
         else:
-            mlr_predict_entity_id = runtimeparams['mlr_predict_entity_id']
-        params['passed_data']['mlr_predict_entity_id'] = mlr_predict_entity_id
-        if 'mlr_predict_unit_of_measurement' not in runtimeparams.keys():
+            mlr_predict_entity_id = runtimeparams["mlr_predict_entity_id"]
+        params["passed_data"]["mlr_predict_entity_id"] = mlr_predict_entity_id
+        if "mlr_predict_unit_of_measurement" not in runtimeparams.keys():
             mlr_predict_unit_of_measurement = None
         else:
-            mlr_predict_unit_of_measurement = runtimeparams['mlr_predict_unit_of_measurement']
-        params['passed_data']['mlr_predict_unit_of_measurement'] = mlr_predict_unit_of_measurement
-        if 'mlr_predict_friendly_name' not in runtimeparams.keys():
+            mlr_predict_unit_of_measurement = runtimeparams[
+                "mlr_predict_unit_of_measurement"
+            ]
+        params["passed_data"][
+            "mlr_predict_unit_of_measurement"
+        ] = mlr_predict_unit_of_measurement
+        if "mlr_predict_friendly_name" not in runtimeparams.keys():
             mlr_predict_friendly_name = "mlr predictor"
         else:
-            mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name']
-        params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name
-        # Treat optimization configuration parameters passed at runtime 
-        if 'num_def_loads' in runtimeparams.keys():
-            optim_conf['num_def_loads'] = runtimeparams['num_def_loads']
-        if 'P_deferrable_nom' in runtimeparams.keys():
-            optim_conf['P_deferrable_nom'] = runtimeparams['P_deferrable_nom']
-        if 'def_total_hours' in runtimeparams.keys():
-            optim_conf['def_total_hours'] = runtimeparams['def_total_hours']
-        if 'def_start_timestep' in runtimeparams.keys():
-            optim_conf['def_start_timestep'] = runtimeparams['def_start_timestep']
-        if 'def_end_timestep' in runtimeparams.keys():
-            optim_conf['def_end_timestep'] = runtimeparams['def_end_timestep']
-        if 'treat_def_as_semi_cont' in runtimeparams.keys():
-            optim_conf['treat_def_as_semi_cont'] = [eval(str(k).capitalize()) for k in runtimeparams['treat_def_as_semi_cont']]
-        if 'set_def_constant' in runtimeparams.keys():
-            optim_conf['set_def_constant'] = [eval(str(k).capitalize()) for k in runtimeparams['set_def_constant']]
-        if 'solcast_api_key' in runtimeparams.keys():
-            retrieve_hass_conf['solcast_api_key'] = runtimeparams['solcast_api_key']
-            optim_conf['weather_forecast_method'] = 'solcast'
-        if 'solcast_rooftop_id' in runtimeparams.keys():
-            retrieve_hass_conf['solcast_rooftop_id'] = runtimeparams['solcast_rooftop_id']
-            optim_conf['weather_forecast_method'] = 'solcast'
-        if 'solar_forecast_kwp' in runtimeparams.keys():
-            retrieve_hass_conf['solar_forecast_kwp'] = runtimeparams['solar_forecast_kwp']
-            optim_conf['weather_forecast_method'] = 'solar.forecast'
-        if 'weight_battery_discharge' in runtimeparams.keys():
-            optim_conf['weight_battery_discharge'] = runtimeparams['weight_battery_discharge']
-        if 'weight_battery_charge' in runtimeparams.keys():
-            optim_conf['weight_battery_charge'] = runtimeparams['weight_battery_charge']
+            mlr_predict_friendly_name = runtimeparams["mlr_predict_friendly_name"]
+        params["passed_data"]["mlr_predict_friendly_name"] = mlr_predict_friendly_name
+        # Treat optimization configuration parameters passed at runtime
+        if "num_def_loads" in runtimeparams.keys():
+            optim_conf["num_def_loads"] = runtimeparams["num_def_loads"]
+        if "P_deferrable_nom" in runtimeparams.keys():
+            optim_conf["P_deferrable_nom"] = runtimeparams["P_deferrable_nom"]
+        if "def_total_hours" in runtimeparams.keys():
+            optim_conf["def_total_hours"] = runtimeparams["def_total_hours"]
+        if "def_start_timestep" in runtimeparams.keys():
+            optim_conf["def_start_timestep"] = runtimeparams["def_start_timestep"]
+        if "def_end_timestep" in runtimeparams.keys():
+            optim_conf["def_end_timestep"] = runtimeparams["def_end_timestep"]
+        if "treat_def_as_semi_cont" in runtimeparams.keys():
+            optim_conf["treat_def_as_semi_cont"] = [
+                eval(str(k).capitalize())
+                for k in runtimeparams["treat_def_as_semi_cont"]
+            ]
+        if "set_def_constant" in runtimeparams.keys():
+            optim_conf["set_def_constant"] = [
+                eval(str(k).capitalize()) for k in runtimeparams["set_def_constant"]
+            ]
+        if "solcast_api_key" in runtimeparams.keys():
+            retrieve_hass_conf["solcast_api_key"] = runtimeparams["solcast_api_key"]
+            optim_conf["weather_forecast_method"] = "solcast"
+        if "solcast_rooftop_id" in runtimeparams.keys():
+            retrieve_hass_conf["solcast_rooftop_id"] = runtimeparams[
+                "solcast_rooftop_id"
+            ]
+            optim_conf["weather_forecast_method"] = "solcast"
+        if "solar_forecast_kwp" in runtimeparams.keys():
+            retrieve_hass_conf["solar_forecast_kwp"] = runtimeparams[
+                "solar_forecast_kwp"
+            ]
+            optim_conf["weather_forecast_method"] = "solar.forecast"
+        if "weight_battery_discharge" in runtimeparams.keys():
+            optim_conf["weight_battery_discharge"] = runtimeparams[
+                "weight_battery_discharge"
+            ]
+        if "weight_battery_charge" in runtimeparams.keys():
+            optim_conf["weight_battery_charge"] = runtimeparams["weight_battery_charge"]
         # Treat plant configuration parameters passed at runtime
-        if 'SOCtarget' in runtimeparams.keys():
-            plant_conf['SOCtarget'] = runtimeparams['SOCtarget']
+        if "SOCtarget" in runtimeparams.keys():
+            plant_conf["SOCtarget"] = runtimeparams["SOCtarget"]
         # Treat custom entities id's and friendly names for variables
-        if 'custom_pv_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_pv_forecast_id'] = runtimeparams['custom_pv_forecast_id']
-        if 'custom_load_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_load_forecast_id'] = runtimeparams['custom_load_forecast_id']
-        if 'custom_batt_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_batt_forecast_id'] = runtimeparams['custom_batt_forecast_id']
-        if 'custom_batt_soc_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_batt_soc_forecast_id'] = runtimeparams['custom_batt_soc_forecast_id']
-        if 'custom_grid_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_grid_forecast_id'] = runtimeparams['custom_grid_forecast_id']
-        if 'custom_cost_fun_id' in runtimeparams.keys():
-            params['passed_data']['custom_cost_fun_id'] = runtimeparams['custom_cost_fun_id']
-        if 'custom_optim_status_id' in runtimeparams.keys():
-            params['passed_data']['custom_optim_status_id'] = runtimeparams['custom_optim_status_id']
-        if 'custom_unit_load_cost_id' in runtimeparams.keys():
-            params['passed_data']['custom_unit_load_cost_id'] = runtimeparams['custom_unit_load_cost_id']
-        if 'custom_unit_prod_price_id' in runtimeparams.keys():
-            params['passed_data']['custom_unit_prod_price_id'] = runtimeparams['custom_unit_prod_price_id']
-        if 'custom_deferrable_forecast_id' in runtimeparams.keys():
-            params['passed_data']['custom_deferrable_forecast_id'] = runtimeparams['custom_deferrable_forecast_id']
+        if "custom_pv_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_pv_forecast_id"] = runtimeparams[
+                "custom_pv_forecast_id"
+            ]
+        if "custom_load_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_load_forecast_id"] = runtimeparams[
+                "custom_load_forecast_id"
+            ]
+        if "custom_batt_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_batt_forecast_id"] = runtimeparams[
+                "custom_batt_forecast_id"
+            ]
+        if "custom_batt_soc_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_batt_soc_forecast_id"] = runtimeparams[
+                "custom_batt_soc_forecast_id"
+            ]
+        if "custom_grid_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_grid_forecast_id"] = runtimeparams[
+                "custom_grid_forecast_id"
+            ]
+        if "custom_cost_fun_id" in runtimeparams.keys():
+            params["passed_data"]["custom_cost_fun_id"] = runtimeparams[
+                "custom_cost_fun_id"
+            ]
+        if "custom_optim_status_id" in runtimeparams.keys():
+            params["passed_data"]["custom_optim_status_id"] = runtimeparams[
+                "custom_optim_status_id"
+            ]
+        if "custom_unit_load_cost_id" in runtimeparams.keys():
+            params["passed_data"]["custom_unit_load_cost_id"] = runtimeparams[
+                "custom_unit_load_cost_id"
+            ]
+        if "custom_unit_prod_price_id" in runtimeparams.keys():
+            params["passed_data"]["custom_unit_prod_price_id"] = runtimeparams[
+                "custom_unit_prod_price_id"
+            ]
+        if "custom_deferrable_forecast_id" in runtimeparams.keys():
+            params["passed_data"]["custom_deferrable_forecast_id"] = runtimeparams[
+                "custom_deferrable_forecast_id"
+            ]
         # A condition to put a prefix on all published data
-        if 'publish_prefix' not in runtimeparams.keys():
+        if "publish_prefix" not in runtimeparams.keys():
             publish_prefix = ""
         else:
-            publish_prefix = runtimeparams['publish_prefix']
-        params['passed_data']['publish_prefix'] = publish_prefix
+            publish_prefix = runtimeparams["publish_prefix"]
+        params["passed_data"]["publish_prefix"] = publish_prefix
     # Serialize the final params
     params = json.dumps(params)
     return params, retrieve_hass_conf, optim_conf, plant_conf
@@ -406,40 +510,45 @@ def get_yaml_parse(emhass_conf: dict, use_secrets: Optional[bool] = True,
             with open(emhass_conf["root_path"] / 'secrets_emhass.yaml', 'r') as file: #assume secrets file is in root path 
                 input_secrets = yaml.load(file, Loader=yaml.FullLoader)
         else:
-            input_secrets = input_conf.pop('params_secrets', None)
-   
-    if (type(input_conf['retrieve_hass_conf']) == list): #if using old config version 
-        retrieve_hass_conf = dict({key:d[key] for d in input_conf['retrieve_hass_conf'] for key in d})
+            input_secrets = input_conf.pop("params_secrets", None)
+
+    if type(input_conf["retrieve_hass_conf"]) == list:  # if using old config version
+        retrieve_hass_conf = dict(
+            {key: d[key] for d in input_conf["retrieve_hass_conf"] for key in d}
+        )
     else:
-        retrieve_hass_conf = input_conf.get('retrieve_hass_conf', {})
-        
+        retrieve_hass_conf = input_conf.get("retrieve_hass_conf", {})
+
     if use_secrets:
         retrieve_hass_conf.update(input_secrets)
     else:
-        retrieve_hass_conf['hass_url'] = 'http://supervisor/core/api'
-        retrieve_hass_conf['long_lived_token'] = '${SUPERVISOR_TOKEN}'
-        retrieve_hass_conf['time_zone'] = 'Europe/Paris'
-        retrieve_hass_conf['lat'] = 45.83
-        retrieve_hass_conf['lon'] = 6.86
-        retrieve_hass_conf['alt'] = 4807.8
-    retrieve_hass_conf['freq'] = pd.to_timedelta(retrieve_hass_conf['freq'], "minutes")
-    retrieve_hass_conf['time_zone'] = pytz.timezone(retrieve_hass_conf['time_zone'])
-    
-    if (type(input_conf['optim_conf']) == list):
-        optim_conf = dict({key:d[key] for d in input_conf['optim_conf'] for key in d})
+        retrieve_hass_conf["hass_url"] = "http://supervisor/core/api"
+        retrieve_hass_conf["long_lived_token"] = "${SUPERVISOR_TOKEN}"
+        retrieve_hass_conf["time_zone"] = "Europe/Paris"
+        retrieve_hass_conf["lat"] = 45.83
+        retrieve_hass_conf["lon"] = 6.86
+        retrieve_hass_conf["alt"] = 4807.8
+    retrieve_hass_conf["freq"] = pd.to_timedelta(retrieve_hass_conf["freq"], "minutes")
+    retrieve_hass_conf["time_zone"] = pytz.timezone(retrieve_hass_conf["time_zone"])
+
+    if type(input_conf["optim_conf"]) == list:
+        optim_conf = dict({key: d[key] for d in input_conf["optim_conf"] for key in d})
     else:
-        optim_conf = input_conf.get('optim_conf', {})
+        optim_conf = input_conf.get("optim_conf", {})
 
-    optim_conf['list_hp_periods'] = dict((key,d[key]) for d in optim_conf['list_hp_periods'] for key in d)
-    optim_conf['delta_forecast'] = pd.Timedelta(days=optim_conf['delta_forecast'])
-    
-    if (type(input_conf['plant_conf']) == list):
-        plant_conf = dict({key:d[key] for d in input_conf['plant_conf'] for key in d})
+    optim_conf["list_hp_periods"] = dict(
+        (key, d[key]) for d in optim_conf["list_hp_periods"] for key in d
+    )
+    optim_conf["delta_forecast"] = pd.Timedelta(days=optim_conf["delta_forecast"])
+
+    if type(input_conf["plant_conf"]) == list:
+        plant_conf = dict({key: d[key] for d in input_conf["plant_conf"] for key in d})
     else:
-        plant_conf = input_conf.get('plant_conf', {})
-    
+        plant_conf = input_conf.get("plant_conf", {})
+
     return retrieve_hass_conf, optim_conf, plant_conf
 
+
 def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dict:
     """
     Build a dictionary with graphs and tables for the webui.
@@ -450,61 +559,86 @@ def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dic
     :type plot_size: Optional[int], optional
     :return: A dictionary containing the graphs and tables in html format
     :rtype: dict
-    
+
     """
-    cols_p = [i for i in df.columns.to_list() if 'P_' in i]
+    cols_p = [i for i in df.columns.to_list() if "P_" in i]
     # Let's round the data in the DF
-    optim_status = df['optim_status'].unique().item()
-    df.drop('optim_status', axis=1, inplace=True)
-    cols_else = [i for i in df.columns.to_list() if 'P_' not in i]
+    optim_status = df["optim_status"].unique().item()
+    df.drop("optim_status", axis=1, inplace=True)
+    cols_else = [i for i in df.columns.to_list() if "P_" not in i]
     df = df.apply(pd.to_numeric)
     df[cols_p] = df[cols_p].astype(int)
     df[cols_else] = df[cols_else].round(3)
     # Create plots
     n_colors = len(cols_p)
-    colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)])
-    fig_0 = px.line(df[cols_p], title='Systems powers schedule after optimization results', 
-                    template='presentation', line_shape="hv",
-                    color_discrete_sequence=colors)
-    fig_0.update_layout(xaxis_title='Timestamp', yaxis_title='System powers (W)')
-    if 'SOC_opt' in df.columns.to_list():
-        fig_1 = px.line(df['SOC_opt'], title='Battery state of charge schedule after optimization results', 
-                        template='presentation',  line_shape="hv",
-                        color_discrete_sequence=colors)
-        fig_1.update_layout(xaxis_title='Timestamp', yaxis_title='Battery SOC (%)')
-    cols_cost = [i for i in df.columns.to_list() if 'cost_' in i or 'unit_' in i]
+    colors = px.colors.sample_colorscale(
+        "jet", [n / (n_colors - 1) for n in range(n_colors)]
+    )
+    fig_0 = px.line(
+        df[cols_p],
+        title="Systems powers schedule after optimization results",
+        template="presentation",
+        line_shape="hv",
+        color_discrete_sequence=colors,
+    )
+    fig_0.update_layout(xaxis_title="Timestamp", yaxis_title="System powers (W)")
+    if "SOC_opt" in df.columns.to_list():
+        fig_1 = px.line(
+            df["SOC_opt"],
+            title="Battery state of charge schedule after optimization results",
+            template="presentation",
+            line_shape="hv",
+            color_discrete_sequence=colors,
+        )
+        fig_1.update_layout(xaxis_title="Timestamp", yaxis_title="Battery SOC (%)")
+    cols_cost = [i for i in df.columns.to_list() if "cost_" in i or "unit_" in i]
     n_colors = len(cols_cost)
-    colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)])
-    fig_2 = px.line(df[cols_cost], title='Systems costs obtained from optimization results', 
-                    template='presentation', line_shape="hv",
-                    color_discrete_sequence=colors)
-    fig_2.update_layout(xaxis_title='Timestamp', yaxis_title='System costs (currency)')
+    colors = px.colors.sample_colorscale(
+        "jet", [n / (n_colors - 1) for n in range(n_colors)]
+    )
+    fig_2 = px.line(
+        df[cols_cost],
+        title="Systems costs obtained from optimization results",
+        template="presentation",
+        line_shape="hv",
+        color_discrete_sequence=colors,
+    )
+    fig_2.update_layout(xaxis_title="Timestamp", yaxis_title="System costs (currency)")
     # Get full path to image
-    image_path_0 = fig_0.to_html(full_html=False, default_width='75%')
-    if 'SOC_opt' in df.columns.to_list():
-        image_path_1 = fig_1.to_html(full_html=False, default_width='75%')
-    image_path_2 = fig_2.to_html(full_html=False, default_width='75%')
+    image_path_0 = fig_0.to_html(full_html=False, default_width="75%")
+    if "SOC_opt" in df.columns.to_list():
+        image_path_1 = fig_1.to_html(full_html=False, default_width="75%")
+    image_path_2 = fig_2.to_html(full_html=False, default_width="75%")
     # The tables
-    table1 = df.reset_index().to_html(classes='mystyle', index=False)
-    cost_cols = [i for i in df.columns if 'cost_' in i]
+    table1 = df.reset_index().to_html(classes="mystyle", index=False)
+    cost_cols = [i for i in df.columns if "cost_" in i]
     table2 = df[cost_cols].reset_index().sum(numeric_only=True)
-    table2['optim_status'] = optim_status
-    table2 = table2.to_frame(name='Value').reset_index(names='Variable').to_html(classes='mystyle', index=False)
+    table2["optim_status"] = optim_status
+    table2 = (
+        table2.to_frame(name="Value")
+        .reset_index(names="Variable")
+        .to_html(classes="mystyle", index=False)
+    )
     # The dict of plots
     injection_dict = {}
-    injection_dict['title'] = '<h2>EMHASS optimization results</h2>'
-    injection_dict['subsubtitle0'] = '<h4>Plotting latest optimization results</h4>'
-    injection_dict['figure_0'] = image_path_0
-    if 'SOC_opt' in df.columns.to_list():
-        injection_dict['figure_1'] = image_path_1
-    injection_dict['figure_2'] = image_path_2
-    injection_dict['subsubtitle1'] = '<h4>Last run optimization results table</h4>'
-    injection_dict['table1'] = table1
-    injection_dict['subsubtitle2'] = '<h4>Summary table for latest optimization results</h4>'
-    injection_dict['table2'] = table2
+    injection_dict["title"] = "<h2>EMHASS optimization results</h2>"
+    injection_dict["subsubtitle0"] = "<h4>Plotting latest optimization results</h4>"
+    injection_dict["figure_0"] = image_path_0
+    if "SOC_opt" in df.columns.to_list():
+        injection_dict["figure_1"] = image_path_1
+    injection_dict["figure_2"] = image_path_2
+    injection_dict["subsubtitle1"] = "<h4>Last run optimization results table</h4>"
+    injection_dict["table1"] = table1
+    injection_dict["subsubtitle2"] = (
+        "<h4>Summary table for latest optimization results</h4>"
+    )
+    injection_dict["table2"] = table2
     return injection_dict
 
-def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLForecaster) -> dict:
+
+def get_injection_dict_forecast_model_fit(
+    df_fit_pred: pd.DataFrame, mlf: MLForecaster
+) -> dict:
     """
     Build a dictionary with graphs and tables for the webui for special MLF fit case.
 
@@ -516,19 +650,26 @@ def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLFore
     :rtype: dict
     """
     fig = df_fit_pred.plot()
-    fig.layout.template = 'presentation'
-    fig.update_yaxes(title_text = mlf.model_type)
-    fig.update_xaxes(title_text = "Time")
-    image_path_0 = fig.to_html(full_html=False, default_width='75%')
+    fig.layout.template = "presentation"
+    fig.update_yaxes(title_text=mlf.model_type)
+    fig.update_xaxes(title_text="Time")
+    image_path_0 = fig.to_html(full_html=False, default_width="75%")
     # The dict of plots
     injection_dict = {}
-    injection_dict['title'] = '<h2>Custom machine learning forecast model fit</h2>'
-    injection_dict['subsubtitle0'] = '<h4>Plotting train/test forecast model results for '+mlf.model_type+'</h4>'
-    injection_dict['subsubtitle0'] = '<h4>Forecasting variable '+mlf.var_model+'</h4>'
-    injection_dict['figure_0'] = image_path_0
+    injection_dict["title"] = "<h2>Custom machine learning forecast model fit</h2>"
+    injection_dict["subsubtitle0"] = (
+        "<h4>Plotting train/test forecast model results for " + mlf.model_type + "</h4>"
+    )
+    injection_dict["subsubtitle0"] = (
+        "<h4>Forecasting variable " + mlf.var_model + "</h4>"
+    )
+    injection_dict["figure_0"] = image_path_0
     return injection_dict
 
-def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLForecaster) -> dict:
+
+def get_injection_dict_forecast_model_tune(
+    df_pred_optim: pd.DataFrame, mlf: MLForecaster
+) -> dict:
     """
     Build a dictionary with graphs and tables for the webui for special MLF tune case.
 
@@ -540,19 +681,32 @@ def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLF
     :rtype: dict
     """
     fig = df_pred_optim.plot()
-    fig.layout.template = 'presentation'
-    fig.update_yaxes(title_text = mlf.model_type)
-    fig.update_xaxes(title_text = "Time")
-    image_path_0 = fig.to_html(full_html=False, default_width='75%')
+    fig.layout.template = "presentation"
+    fig.update_yaxes(title_text=mlf.model_type)
+    fig.update_xaxes(title_text="Time")
+    image_path_0 = fig.to_html(full_html=False, default_width="75%")
     # The dict of plots
     injection_dict = {}
-    injection_dict['title'] = '<h2>Custom machine learning forecast model tune</h2>'
-    injection_dict['subsubtitle0'] = '<h4>Performed a tuning routine using bayesian optimization for '+mlf.model_type+'</h4>'
-    injection_dict['subsubtitle0'] = '<h4>Forecasting variable '+mlf.var_model+'</h4>'
-    injection_dict['figure_0'] = image_path_0
+    injection_dict["title"] = "<h2>Custom machine learning forecast model tune</h2>"
+    injection_dict["subsubtitle0"] = (
+        "<h4>Performed a tuning routine using bayesian optimization for "
+        + mlf.model_type
+        + "</h4>"
+    )
+    injection_dict["subsubtitle0"] = (
+        "<h4>Forecasting variable " + mlf.var_model + "</h4>"
+    )
+    injection_dict["figure_0"] = image_path_0
     return injection_dict
 
-def build_params(params: dict, params_secrets: dict, options: dict, addon: int, logger: logging.Logger) -> dict:
+
+def build_params(
+    params: dict,
+    params_secrets: dict,
+    options: dict,
+    addon: int,
+    logger: logging.Logger,
+) -> dict:
     """
     Build the main params dictionary from the loaded options.json when using the add-on.
 
@@ -571,45 +725,120 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int,
     """
     if addon == 1:
         # Updating variables in retrieve_hass_conf
-        params['retrieve_hass_conf']['freq'] = options.get('optimization_time_step',params['retrieve_hass_conf']['freq'])
-        params['retrieve_hass_conf']['days_to_retrieve'] = options.get('historic_days_to_retrieve',params['retrieve_hass_conf']['days_to_retrieve'])
-        params['retrieve_hass_conf']['var_PV'] = options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV'])
-        params['retrieve_hass_conf']['var_load'] = options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load'])
-        params['retrieve_hass_conf']['load_negative'] = options.get('load_negative',params['retrieve_hass_conf']['load_negative'])
-        params['retrieve_hass_conf']['set_zero_min'] = options.get('set_zero_min',params['retrieve_hass_conf']['set_zero_min'])
-        params['retrieve_hass_conf']['var_replace_zero'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_replace_zero'])]
-        params['retrieve_hass_conf']['var_interp'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV']), options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load'])]
-        params['retrieve_hass_conf']['method_ts_round'] = options.get('method_ts_round',params['retrieve_hass_conf']['method_ts_round'])
+        params["retrieve_hass_conf"]["freq"] = options.get(
+            "optimization_time_step", params["retrieve_hass_conf"]["freq"]
+        )
+        params["retrieve_hass_conf"]["days_to_retrieve"] = options.get(
+            "historic_days_to_retrieve",
+            params["retrieve_hass_conf"]["days_to_retrieve"],
+        )
+        params["retrieve_hass_conf"]["var_PV"] = options.get(
+            "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"]
+        )
+        params["retrieve_hass_conf"]["var_load"] = options.get(
+            "sensor_power_load_no_var_loads", params["retrieve_hass_conf"]["var_load"]
+        )
+        params["retrieve_hass_conf"]["load_negative"] = options.get(
+            "load_negative", params["retrieve_hass_conf"]["load_negative"]
+        )
+        params["retrieve_hass_conf"]["set_zero_min"] = options.get(
+            "set_zero_min", params["retrieve_hass_conf"]["set_zero_min"]
+        )
+        params["retrieve_hass_conf"]["var_replace_zero"] = [
+            options.get(
+                "sensor_power_photovoltaics",
+                params["retrieve_hass_conf"]["var_replace_zero"],
+            )
+        ]
+        params["retrieve_hass_conf"]["var_interp"] = [
+            options.get(
+                "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"]
+            ),
+            options.get(
+                "sensor_power_load_no_var_loads",
+                params["retrieve_hass_conf"]["var_load"],
+            ),
+        ]
+        params["retrieve_hass_conf"]["method_ts_round"] = options.get(
+            "method_ts_round", params["retrieve_hass_conf"]["method_ts_round"]
+        )
         # Update params Secrets if specified
-        params['params_secrets'] = params_secrets
-        params['params_secrets']['time_zone'] = options.get('time_zone',params_secrets['time_zone'])
-        params['params_secrets']['lat'] = options.get('Latitude',params_secrets['lat'])
-        params['params_secrets']['lon'] = options.get('Longitude',params_secrets['lon'])
-        params['params_secrets']['alt'] = options.get('Altitude',params_secrets['alt'])
+        params["params_secrets"] = params_secrets
+        params["params_secrets"]["time_zone"] = options.get(
+            "time_zone", params_secrets["time_zone"]
+        )
+        params["params_secrets"]["lat"] = options.get("Latitude", params_secrets["lat"])
+        params["params_secrets"]["lon"] = options.get(
+            "Longitude", params_secrets["lon"]
+        )
+        params["params_secrets"]["alt"] = options.get("Altitude", params_secrets["alt"])
         # Updating variables in optim_conf
-        params['optim_conf']['set_use_battery'] = options.get('set_use_battery',params['optim_conf']['set_use_battery'])
-        params['optim_conf']['num_def_loads'] = options.get('number_of_deferrable_loads',params['optim_conf']['num_def_loads'])
-        if options.get('list_nominal_power_of_deferrable_loads',None) != None: 
-            params['optim_conf']['P_deferrable_nom'] = [i['nominal_power_of_deferrable_loads'] for i in options.get('list_nominal_power_of_deferrable_loads')]
-        if options.get('list_operating_hours_of_each_deferrable_load',None) != None: 
-            params['optim_conf']['def_total_hours'] = [i['operating_hours_of_each_deferrable_load'] for i in options.get('list_operating_hours_of_each_deferrable_load')]
-        if options.get('list_treat_deferrable_load_as_semi_cont',None) != None: 
-            params['optim_conf']['treat_def_as_semi_cont'] = [i['treat_deferrable_load_as_semi_cont'] for i in options.get('list_treat_deferrable_load_as_semi_cont')]
-        params['optim_conf']['weather_forecast_method'] = options.get('weather_forecast_method',params['optim_conf']['weather_forecast_method'])
+        params["optim_conf"]["set_use_battery"] = options.get(
+            "set_use_battery", params["optim_conf"]["set_use_battery"]
+        )
+        params["optim_conf"]["num_def_loads"] = options.get(
+            "number_of_deferrable_loads", params["optim_conf"]["num_def_loads"]
+        )
+        if options.get("list_nominal_power_of_deferrable_loads", None) != None:
+            params["optim_conf"]["P_deferrable_nom"] = [
+                i["nominal_power_of_deferrable_loads"]
+                for i in options.get("list_nominal_power_of_deferrable_loads")
+            ]
+        if options.get("list_operating_hours_of_each_deferrable_load", None) != None:
+            params["optim_conf"]["def_total_hours"] = [
+                i["operating_hours_of_each_deferrable_load"]
+                for i in options.get("list_operating_hours_of_each_deferrable_load")
+            ]
+        if options.get("list_treat_deferrable_load_as_semi_cont", None) != None:
+            params["optim_conf"]["treat_def_as_semi_cont"] = [
+                i["treat_deferrable_load_as_semi_cont"]
+                for i in options.get("list_treat_deferrable_load_as_semi_cont")
+            ]
+        params["optim_conf"]["weather_forecast_method"] = options.get(
+            "weather_forecast_method", params["optim_conf"]["weather_forecast_method"]
+        )
         # Update optional param secrets
-        if params['optim_conf']['weather_forecast_method'] == "solcast":
-            params['params_secrets']['solcast_api_key'] = options.get('optional_solcast_api_key',params_secrets.get('solcast_api_key',"123456"))
-            params['params_secrets']['solcast_rooftop_id'] = options.get('optional_solcast_rooftop_id',params_secrets.get('solcast_rooftop_id',"123456"))
-        elif params['optim_conf']['weather_forecast_method'] == "solar.forecast":    
-            params['params_secrets']['solar_forecast_kwp'] = options.get('optional_solar_forecast_kwp',params_secrets.get('solar_forecast_kwp',5))
-        params['optim_conf']['load_forecast_method'] = options.get('load_forecast_method',params['optim_conf']['load_forecast_method'])
-        params['optim_conf']['delta_forecast'] = options.get('delta_forecast_daily',params['optim_conf']['delta_forecast'])
-        params['optim_conf']['load_cost_forecast_method'] = options.get('load_cost_forecast_method',params['optim_conf']['load_cost_forecast_method'])
-        if options.get('list_set_deferrable_load_single_constant',None) != None: 
-            params['optim_conf']['set_def_constant'] = [i['set_deferrable_load_single_constant'] for i in options.get('list_set_deferrable_load_single_constant')]
-        if options.get('list_peak_hours_periods_start_hours',None) != None and options.get('list_peak_hours_periods_end_hours',None) != None:
-            start_hours_list = [i['peak_hours_periods_start_hours'] for i in options['list_peak_hours_periods_start_hours']]
-            end_hours_list = [i['peak_hours_periods_end_hours'] for i in options['list_peak_hours_periods_end_hours']]
+        if params["optim_conf"]["weather_forecast_method"] == "solcast":
+            params["params_secrets"]["solcast_api_key"] = options.get(
+                "optional_solcast_api_key",
+                params_secrets.get("solcast_api_key", "123456"),
+            )
+            params["params_secrets"]["solcast_rooftop_id"] = options.get(
+                "optional_solcast_rooftop_id",
+                params_secrets.get("solcast_rooftop_id", "123456"),
+            )
+        elif params["optim_conf"]["weather_forecast_method"] == "solar.forecast":
+            params["params_secrets"]["solar_forecast_kwp"] = options.get(
+                "optional_solar_forecast_kwp",
+                params_secrets.get("solar_forecast_kwp", 5),
+            )
+        params["optim_conf"]["load_forecast_method"] = options.get(
+            "load_forecast_method", params["optim_conf"]["load_forecast_method"]
+        )
+        params["optim_conf"]["delta_forecast"] = options.get(
+            "delta_forecast_daily", params["optim_conf"]["delta_forecast"]
+        )
+        params["optim_conf"]["load_cost_forecast_method"] = options.get(
+            "load_cost_forecast_method",
+            params["optim_conf"]["load_cost_forecast_method"],
+        )
+        if options.get("list_set_deferrable_load_single_constant", None) != None:
+            params["optim_conf"]["set_def_constant"] = [
+                i["set_deferrable_load_single_constant"]
+                for i in options.get("list_set_deferrable_load_single_constant")
+            ]
+        if (
+            options.get("list_peak_hours_periods_start_hours", None) != None
+            and options.get("list_peak_hours_periods_end_hours", None) != None
+        ):
+            start_hours_list = [
+                i["peak_hours_periods_start_hours"]
+                for i in options["list_peak_hours_periods_start_hours"]
+            ]
+            end_hours_list = [
+                i["peak_hours_periods_end_hours"]
+                for i in options["list_peak_hours_periods_end_hours"]
+            ]
             num_peak_hours = len(start_hours_list)
             list_hp_periods_list = [{'period_hp_'+str(i+1):[{'start':start_hours_list[i]},{'end':end_hours_list[i]}]} for i in range(num_peak_hours)]
             params['optim_conf']['list_hp_periods'] = list_hp_periods_list
@@ -681,20 +910,35 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int,
             for x in range(len(params['optim_conf']['P_deferrable_nom']), params['optim_conf']['num_def_loads']):
                 params['optim_conf']['P_deferrable_nom'].append(0)   
         # days_to_retrieve should be no less then 2
-        if params['retrieve_hass_conf']['days_to_retrieve'] < 2:
-            params['retrieve_hass_conf']['days_to_retrieve'] = 2
-            logger.warning("days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history")
+        if params["retrieve_hass_conf"]["days_to_retrieve"] < 2:
+            params["retrieve_hass_conf"]["days_to_retrieve"] = 2
+            logger.warning(
+                "days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history"
+            )
     else:
-        params['params_secrets'] = params_secrets
+        params["params_secrets"] = params_secrets
     # The params dict
-    params['passed_data'] = {'pv_power_forecast':None,'load_power_forecast':None,'load_cost_forecast':None,'prod_price_forecast':None,
-                             'prediction_horizon':None,'soc_init':None,'soc_final':None,'def_total_hours':None,'def_start_timestep':None,'def_end_timestep':None,'alpha':None,'beta':None}
+    params["passed_data"] = {
+        "pv_power_forecast": None,
+        "load_power_forecast": None,
+        "load_cost_forecast": None,
+        "prod_price_forecast": None,
+        "prediction_horizon": None,
+        "soc_init": None,
+        "soc_final": None,
+        "def_total_hours": None,
+        "def_start_timestep": None,
+        "def_end_timestep": None,
+        "alpha": None,
+        "beta": None,
+    }
     return params
 
+
 def get_days_list(days_to_retrieve: int) -> pd.date_range:
     """
     Get list of past days from today to days_to_retrieve.
-    
+
     :param days_to_retrieve: Total number of days to retrieve from the past
     :type days_to_retrieve: int
     :return: The list of days
@@ -703,19 +947,20 @@ def get_days_list(days_to_retrieve: int) -> pd.date_range:
     """
     today = datetime.now(timezone.utc).replace(minute=0, second=0, microsecond=0)
     d = (today - timedelta(days=days_to_retrieve)).isoformat()
-    days_list = pd.date_range(start=d, end=today.isoformat(), freq='D')
-    
+    days_list = pd.date_range(start=d, end=today.isoformat(), freq="D")
+
     return days_list
 
+
 def set_df_index_freq(df: pd.DataFrame) -> pd.DataFrame:
     """
     Set the freq of a DataFrame DateTimeIndex.
-    
+
     :param df: Input DataFrame
     :type df: pd.DataFrame
     :return: Input DataFrame with freq defined
     :rtype: pd.DataFrame
-    
+
     """
     idx_diff = np.diff(df.index)
     sampling = pd.to_timedelta(np.median(idx_diff))

From 9ed8798032f6099a74da89e030fd49810eebd0e0 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 14:55:30 +0100
Subject: [PATCH 074/111] sklearn-model -> regression-model

---
 src/emhass/command_line.py | 4 ++--
 src/emhass/utils.py        | 5 +++++
 src/emhass/web_server.py   | 8 ++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 0c094c96..9d25d518 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -609,7 +609,7 @@ def regressor_model_fit(
     """
     data = copy.deepcopy(input_data_dict["df_input_data"])
     model_type = input_data_dict["params"]["passed_data"]["model_type"]
-    sklearn_model = input_data_dict["params"]["passed_data"]["sklearn_model"]
+    regression_model = input_data_dict["params"]["passed_data"]["regression_model"]
     features = input_data_dict["params"]["passed_data"]["features"]
     target = input_data_dict["params"]["passed_data"]["target"]
     timestamp = input_data_dict["params"]["passed_data"]["timestamp"]
@@ -617,7 +617,7 @@ def regressor_model_fit(
     root = input_data_dict["root"]
     # The MLRegressor object
     mlr = MLRegressor(
-        data, model_type, sklearn_model, features, target, timestamp, logger
+        data, model_type, regression_model, features, target, timestamp, logger
     )
     # Fit the ML model
     mlr.fit(date_features=date_features)
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 6d953ae6..5e40160a 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -329,6 +329,11 @@ def treat_runtimeparams(
         else:
             sklearn_model = runtimeparams["sklearn_model"]
         params["passed_data"]["sklearn_model"] = sklearn_model
+        if "regression_model" not in runtimeparams.keys():
+            regression_model = "LinearRegression"
+        else:
+            regression_model = runtimeparams["regression_model"]
+        params["passed_data"]["regression_model"] = regression_model
         if "num_lags" not in runtimeparams.keys():
             num_lags = 48
         else:
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index 50241590..9a100870 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -195,14 +195,14 @@ def action_call(action_name):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
     elif action_name == 'regressor-model-fit':
-        app.logger.info(" >> Performing a regressor fit...")
+        app.logger.info(" >> Performing a machine learning regressor fit...")
         regressor_model_fit(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action regressor-fit executed... \n'
+        msg = f'EMHASS >> Action regressor-model-fit executed... \n'
         return make_response(msg, 201)
     elif action_name == 'regressor-model-predict':
-        app.logger.info(" >> Performing a regressor predict...")
+        app.logger.info(" >> Performing a machine learning regressor predict...")
         regressor_model_predict(input_data_dict, app.logger)
-        msg = f'EMHASS >> Action regressor-predict executed... \n'
+        msg = f'EMHASS >> Action regressor-model-predict executed... \n'
         return make_response(msg, 201)
     else:
         app.logger.error("ERROR: passed action is not valid")

From 7b5e2ba0675c639f8d9a82f5bd7dad1c6c04e4fd Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 15:18:57 +0100
Subject: [PATCH 075/111] REGRESSION_METHODS const

---
 src/emhass/machine_learning_regressor.py | 127 ++++++++++++-----------
 1 file changed, 65 insertions(+), 62 deletions(-)

diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index 80ddd74f..9e7795d0 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -24,6 +24,41 @@
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
+REGRESSION_METHODS = {
+            "LinearRegression": {
+                "model": LinearRegression(),
+                "param_grid": {
+                    "linearregression__fit_intercept": [True, False],
+                    "linearregression__positive": [True, False],
+                },
+            },
+            "RidgeRegression": {
+                "model": Ridge(),
+                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
+            },
+            "LassoRegression": {
+                "model": Lasso(),
+                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
+            },
+            "RandomForestRegression": {
+                "model": RandomForestRegressor(),
+                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
+            },
+            "GradientBoostingRegression": {
+                "model": GradientBoostingRegressor(),
+                "param_grid": {
+                    "gradientboostingregressor__n_estimators": [50, 100, 200],
+                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
+            "AdaBoostRegression": {
+                "model": AdaBoostRegressor(),
+                "param_grid": {
+                    "adaboostregressor__n_estimators": [50, 100, 200],
+                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
+                },
+            },
+        }
 
 class MLRegressor:
     r"""
@@ -43,7 +78,7 @@ def __init__(
         self,
         data,
         model_type: str,
-        sklearn_model: str,
+        regression_model: str,
         features: list,
         target: str,
         timestamp: str,
@@ -56,11 +91,15 @@ def __init__(
         :param model_type: A unique name defining this model and useful to identify \
             for what it will be used for.
         :type model_type: str
+        :param regression_model: The model that will be used. For now only \
+            this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \
+            `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`.
+        :type regression_model: str
         :param features: A list of features. \
-            Example: [`solar`, `degree_days`].
+            Example: [`solar_production`, `degree_days`].
         :type features: list
         :param target: The target(to be predicted). \
-            Example: `hours`.
+            Example: `heating_hours`.
         :type target: str
         :param timestamp: If defined, the column key that has to be used of timestamp.
         :type timestamp: str
@@ -72,7 +111,7 @@ def __init__(
         self.target = target
         self.timestamp = timestamp
         self.model_type = model_type
-        self.sklearn_model = sklearn_model
+        self.regression_model = regression_model
         self.logger = logger
         self.data.sort_index(inplace=True)
         self.data = self.data[~self.data.index.duplicated(keep="first")]
@@ -111,7 +150,7 @@ def add_date_features(
 
         return df
 
-    def fit(self, date_features: Optional[list] = []) -> None:
+    def fit(self, date_features: Optional[list] = None) -> None:
         """
         Fit the model using the provided data.
 
@@ -129,7 +168,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         keep_columns.append(self.target)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
         self.data_exo.reset_index(drop=True, inplace=True)
-        if len(date_features) > 0:
+        if date_features is not None:
             if self.timestamp is not None:
                 self.data_exo = MLRegressor.add_date_features(
                     self.data_exo, date_features, self.timestamp
@@ -150,63 +189,27 @@ def fit(self, date_features: Optional[list] = []) -> None:
         )
         self.steps = len(X_test)
 
-        regression_methods = {
-            "LinearRegression": {
-                "model": LinearRegression(),
-                "param_grid": {
-                    "linearregression__fit_intercept": [True, False],
-                    "linearregression__positive": [True, False],
-                },
-            },
-            "RidgeRegression": {
-                "model": Ridge(),
-                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
-            },
-            "LassoRegression": {
-                "model": Lasso(),
-                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
-            },
-            "RandomForestRegression": {
-                "model": RandomForestRegressor(),
-                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
-            },
-            "GradientBoostingRegression": {
-                "model": GradientBoostingRegressor(),
-                "param_grid": {
-                    "gradientboostingregressor__n_estimators": [50, 100, 200],
-                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-            "AdaBoostRegression": {
-                "model": AdaBoostRegressor(),
-                "param_grid": {
-                    "adaboostregressor__n_estimators": [50, 100, 200],
-                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-        }
-
-        if self.sklearn_model == "LinearRegression":
-            base_model = regression_methods["LinearRegression"]["model"]
-            param_grid = regression_methods["LinearRegression"]["param_grid"]
-        elif self.sklearn_model == "RidgeRegression":
-            base_model = regression_methods["RidgeRegression"]["model"]
-            param_grid = regression_methods["RidgeRegression"]["param_grid"]
-        elif self.sklearn_model == "LassoRegression":
-            base_model = regression_methods["LassoRegression"]["model"]
-            param_grid = regression_methods["LassoRegression"]["param_grid"]
-        elif self.sklearn_model == "RandomForestRegression":
-            base_model = regression_methods["RandomForestRegression"]["model"]
-            param_grid = regression_methods["RandomForestRegression"]["param_grid"]
-        elif self.sklearn_model == "GradientBoostingRegression":
-            base_model = regression_methods["GradientBoostingRegression"]["model"]
-            param_grid = regression_methods["GradientBoostingRegression"]["param_grid"]
-        elif self.sklearn_model == "AdaBoostRegression":
-            base_model = regression_methods["AdaBoostRegression"]["model"]
-            param_grid = regression_methods["AdaBoostRegression"]["param_grid"]
+        if self.regression_model == "LinearRegression":
+            base_model = REGRESSION_METHODS["LinearRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
+        elif self.regression_model == "RidgeRegression":
+            base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
+        elif self.regression_model == "LassoRegression":
+            base_model = REGRESSION_METHODS["LassoRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
+        elif self.regression_model == "RandomForestRegression":
+            base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
+        elif self.regression_model == "GradientBoostingRegression":
+            base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
+            param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
+        elif self.regression_model == "AdaBoostRegression":
+            base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
+            param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
         else:
             self.logger.error(
-                "Passed sklearn model " + self.sklearn_model + " is not valid"
+                "Passed sklearn model " + self.regression_model + " is not valid"
             )
 
         self.model = make_pipeline(StandardScaler(), base_model)
@@ -223,7 +226,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         )
 
         # Fit the grid search object to the data
-        self.logger.info("Training a " + self.sklearn_model + " model")
+        self.logger.info("Training a " + self.regression_model + " model")
         start_time = time.time()
         self.grid_search.fit(X_train.values, y_train.values)
         print("Best value for lambda : ", self.grid_search.best_params_)

From 6bf36a358321512258dd0294706383a59ca3652b Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 11:12:28 +0100
Subject: [PATCH 076/111] Some cleanup

---
 src/emhass/machine_learning_regressor.py | 220 +++++++++++++----------
 1 file changed, 125 insertions(+), 95 deletions(-)

diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index 9e7795d0..95f624b3 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -1,70 +1,72 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
+"""Machine learning regressor module."""
+
+from __future__ import annotations
 
 import copy
-import logging
 import time
-from typing import Optional
 import warnings
+from typing import TYPE_CHECKING
 
-import pandas as pd
 import numpy as np
+import pandas as pd
 from sklearn.ensemble import (
     AdaBoostRegressor,
     GradientBoostingRegressor,
     RandomForestRegressor,
 )
-from sklearn.metrics import r2_score
-
 from sklearn.linear_model import Lasso, LinearRegression, Ridge
+from sklearn.metrics import r2_score
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 
+if TYPE_CHECKING:
+    import logging
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 REGRESSION_METHODS = {
-            "LinearRegression": {
-                "model": LinearRegression(),
-                "param_grid": {
-                    "linearregression__fit_intercept": [True, False],
-                    "linearregression__positive": [True, False],
-                },
-            },
-            "RidgeRegression": {
-                "model": Ridge(),
-                "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
-            },
-            "LassoRegression": {
-                "model": Lasso(),
-                "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
-            },
-            "RandomForestRegression": {
-                "model": RandomForestRegressor(),
-                "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
-            },
-            "GradientBoostingRegression": {
-                "model": GradientBoostingRegressor(),
-                "param_grid": {
-                    "gradientboostingregressor__n_estimators": [50, 100, 200],
-                    "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-            "AdaBoostRegression": {
-                "model": AdaBoostRegressor(),
-                "param_grid": {
-                    "adaboostregressor__n_estimators": [50, 100, 200],
-                    "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
-                },
-            },
-        }
+    "LinearRegression": {
+        "model": LinearRegression(),
+        "param_grid": {
+            "linearregression__fit_intercept": [True, False],
+            "linearregression__positive": [True, False],
+        },
+    },
+    "RidgeRegression": {
+        "model": Ridge(),
+        "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]},
+    },
+    "LassoRegression": {
+        "model": Lasso(),
+        "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]},
+    },
+    "RandomForestRegression": {
+        "model": RandomForestRegressor(),
+        "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]},
+    },
+    "GradientBoostingRegression": {
+        "model": GradientBoostingRegressor(),
+        "param_grid": {
+            "gradientboostingregressor__n_estimators": [50, 100, 200],
+            "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2],
+        },
+    },
+    "AdaBoostRegression": {
+        "model": AdaBoostRegressor(),
+        "param_grid": {
+            "adaboostregressor__n_estimators": [50, 100, 200],
+            "adaboostregressor__learning_rate": [0.01, 0.1, 0.2],
+        },
+    },
+}
+
 
 class MLRegressor:
-    r"""
-    A forecaster class using machine learning models.
+    r"""A forecaster class using machine learning models.
 
-    This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
+    This class uses the `sklearn` module and the machine learning models are \
+        from `scikit-learn`.
 
     It exposes two main methods:
 
@@ -74,9 +76,9 @@ class MLRegressor:
 
     """
 
-    def __init__(
-        self,
-        data,
+    def __init__(  # noqa: PLR0913
+        self: MLRegressor,
+        data: pd.DataFrame,
         model_type: str,
         regression_model: str,
         features: list,
@@ -92,8 +94,9 @@ def __init__(
             for what it will be used for.
         :type model_type: str
         :param regression_model: The model that will be used. For now only \
-            this options are possible: `LinearRegression`, `RidgeRegression`, `KNeighborsRegressor`, \
-            `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`.
+            this options are possible: `LinearRegression`, `RidgeRegression`, \
+            `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \
+            `GradientBoostingRegression` and `AdaBoostRegression`.
         :type regression_model: str
         :param features: A list of features. \
             Example: [`solar_production`, `degree_days`].
@@ -113,7 +116,7 @@ def __init__(
         self.model_type = model_type
         self.regression_model = regression_model
         self.logger = logger
-        self.data.sort_index(inplace=True)
+        self.data = self.data.sort_index()
         self.data = self.data[~self.data.index.duplicated(keep="first")]
         self.data_exo = None
         self.steps = None
@@ -122,9 +125,11 @@ def __init__(
 
     @staticmethod
     def add_date_features(
-        data: pd.DataFrame, date_features: list, timestamp: str
+        data: pd.DataFrame,
+        date_features: list,
+        timestamp: str,
     ) -> pd.DataFrame:
-        """Add date features from the input DataFrame timestamp
+        """Add date features from the input DataFrame timestamp.
 
         :param data: The input DataFrame
         :type data: pd.DataFrame
@@ -133,7 +138,7 @@ def add_date_features(
         :return: The DataFrame with the added features
         :rtype: pd.DataFrame
         """
-        df = copy.deepcopy(data)
+        df = copy.deepcopy(data)  # noqa: PD901
         df[timestamp] = pd.to_datetime(df["timestamp"])
         if "year" in date_features:
             df["year"] = [i.year for i in df["timestamp"]]
@@ -150,14 +155,54 @@ def add_date_features(
 
         return df
 
-    def fit(self, date_features: Optional[list] = None) -> None:
+    def get_regression_model(self: MLRegressor) -> tuple[str, str]:
+        """Get the base model and parameter grid for the specified regression model.
+
+        Returns a tuple containing the base model and parameter grid corresponding to \
+            the specified regression model.
+
+        Args:
+        ----
+            self: The instance of the MLRegressor class.
+
+        Returns:
+        -------
+            A tuple containing the base model and parameter grid.
+
         """
-        Fit the model using the provided data.
+        if self.regression_model == "LinearRegression":
+            base_model = REGRESSION_METHODS["LinearRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
+        elif self.regression_model == "RidgeRegression":
+            base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
+        elif self.regression_model == "LassoRegression":
+            base_model = REGRESSION_METHODS["LassoRegression"]["model"]
+            param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
+        elif self.regression_model == "RandomForestRegression":
+            base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
+            param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
+        elif self.regression_model == "GradientBoostingRegression":
+            base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
+            param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
+        elif self.regression_model == "AdaBoostRegression":
+            base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
+            param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
+        else:
+            self.logger.error(
+                "Passed sklearn model %s is not valid",
+                self.regression_model,
+            )
+        return base_model, param_grid
 
-        :param date_features: A list of 'date_features' to take into account when fitting the model.
+    def fit(self: MLRegressor, date_features: list | None = None) -> None:
+        """Fit the model using the provided data.
+
+        :param date_features: A list of 'date_features' to take into account when \
+            fitting the model.
         :type data: list
         """
-        self.logger.info("Performing a csv model fit for " + self.model_type)
+        self.logger.info("Performing a MLRegressor fit for %s", self.model_type)
         self.data_exo = pd.DataFrame(self.data)
         self.data_exo[self.features] = self.data[self.features]
         self.data_exo[self.target] = self.data[self.target]
@@ -167,50 +212,36 @@ def fit(self, date_features: Optional[list] = None) -> None:
             keep_columns.append(self.timestamp)
         keep_columns.append(self.target)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
-        self.data_exo.reset_index(drop=True, inplace=True)
+        self.data_exo = self.data_exo.reset_index(drop=True)
         if date_features is not None:
             if self.timestamp is not None:
                 self.data_exo = MLRegressor.add_date_features(
-                    self.data_exo, date_features, self.timestamp
+                    self.data_exo,
+                    date_features,
+                    self.timestamp,
                 )
             else:
                 self.logger.error(
-                    "If no timestamp provided, you can't use date_features, going further without date_features."
+                    "If no timestamp provided, you can't use date_features, going \
+                    further without date_features.",
                 )
 
         y = self.data_exo[self.target]
         self.data_exo = self.data_exo.drop(self.target, axis=1)
         if self.timestamp is not None:
             self.data_exo = self.data_exo.drop(self.timestamp, axis=1)
-        X = self.data_exo
+        X = self.data_exo  # noqa: N806
 
-        X_train, X_test, y_train, y_test = train_test_split(
-            X, y, test_size=0.2, random_state=42
+        X_train, X_test, y_train, y_test = train_test_split(  # noqa: N806
+            X,
+            y,
+            test_size=0.2,
+            random_state=42,
         )
+
         self.steps = len(X_test)
 
-        if self.regression_model == "LinearRegression":
-            base_model = REGRESSION_METHODS["LinearRegression"]["model"]
-            param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"]
-        elif self.regression_model == "RidgeRegression":
-            base_model = REGRESSION_METHODS["RidgeRegression"]["model"]
-            param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"]
-        elif self.regression_model == "LassoRegression":
-            base_model = REGRESSION_METHODS["LassoRegression"]["model"]
-            param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"]
-        elif self.regression_model == "RandomForestRegression":
-            base_model = REGRESSION_METHODS["RandomForestRegression"]["model"]
-            param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"]
-        elif self.regression_model == "GradientBoostingRegression":
-            base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"]
-            param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"]
-        elif self.regression_model == "AdaBoostRegression":
-            base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"]
-            param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
-        else:
-            self.logger.error(
-                "Passed sklearn model " + self.regression_model + " is not valid"
-            )
+        base_model, param_grid = self.get_regression_model()
 
         self.model = make_pipeline(StandardScaler(), base_model)
 
@@ -226,12 +257,10 @@ def fit(self, date_features: Optional[list] = None) -> None:
         )
 
         # Fit the grid search object to the data
-        self.logger.info("Training a " + self.regression_model + " model")
+        self.logger.info("Training a %s model", self.regression_model)
         start_time = time.time()
         self.grid_search.fit(X_train.values, y_train.values)
-        print("Best value for lambda : ", self.grid_search.best_params_)
-        print("Best score for cost function: ", self.grid_search.best_score_)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        self.logger.info("Elapsed time for model fit: %s", time.time() - start_time)
 
         self.model = self.grid_search.best_estimator_
 
@@ -240,20 +269,21 @@ def fit(self, date_features: Optional[list] = None) -> None:
         predictions = pd.Series(predictions, index=X_test.index)
         pred_metric = r2_score(y_test, predictions)
         self.logger.info(
-            f"Prediction R2 score of fitted model on test data: {pred_metric}"
+            "Prediction R2 score of fitted model on test data: %s",
+            pred_metric,
         )
 
-    def predict(self, new_values: list) -> np.ndarray:
-        r"""The predict method to generate a forecast from a csv file.
-
+    def predict(self: MLRegressor, new_values: list) -> np.ndarray:
+        """Predict a new value.
 
-        :param new_values: The new values for the features(in the same order as the features list). \
+        :param new_values: The new values for the features \
+            (in the same order as the features list). \
             Example: [2.24, 5.68].
         :type new_values: list
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """
-        self.logger.info("Performing a prediction for " + self.model_type)
+        self.logger.info("Performing a prediction for %s", self.model_type)
         new_values = np.array([new_values])
 
         return self.model.predict(new_values)

From 43b492763702cb64a81e77129e0fc34746ac3870 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:24:21 +0100
Subject: [PATCH 077/111] Add csv-prediction

---
 src/emhass/command_line.py  |   1 -
 src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++
 2 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 9d25d518..35d8b10c 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -16,7 +16,6 @@
 
 from distutils.util import strtobool
 
-
 from emhass.retrieve_hass import RetrieveHass
 from emhass.forecast import Forecast
 from emhass.machine_learning_forecaster import MLForecaster
diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
new file mode 100644
index 00000000..a1c5576b
--- /dev/null
+++ b/src/emhass/csv_predictor.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import logging
+import copy
+import pathlib
+import time
+from typing import Optional
+# from typing import Optional, Tuple
+import pandas as pd
+import numpy as np
+
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import ElasticNet
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsRegressor
+# from sklearn.metrics import r2_score
+
+# from skforecast.ForecasterAutoreg import ForecasterAutoreg
+# from skforecast.model_selection import bayesian_search_forecaster
+# from skforecast.model_selection import backtesting_forecaster
+
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+class CsvPredictor:
+    r"""
+    A forecaster class using machine learning models.
+    
+    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    
+    It exposes one main method:
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
+    
+    """
+
+    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+                  logger: logging.Logger) -> None:
+        r"""Define constructor for the forecast class.
+
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
+        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
+            Example: `sensor.power_load_no_var_loads`.
+        :type var_model: str
+        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
+            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
+        :type sklearn_model: str
+        :param num_lags: The number of auto-regression lags to consider. A good starting point \
+            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
+            to 48, if the time step is 1 hour the fix this to 24 and so on.
+        :type num_lags: int
+        :param root: The parent folder of the path where the config.yaml file is located
+        :type root: str
+        :param logger: The passed logger object
+        :type logger: logging.Logger
+        """
+        self.data = data
+        self.model_type = model_type
+        self.csv_file = csv_file
+        self.independent_variables = independent_variables
+        self.dependent_variable = dependent_variable
+        self.sklearn_model = sklearn_model
+        self.new_values = new_values
+        self.root = root
+        self.logger = logger
+        self.is_tuned = False
+
+    
+    def load_data(self):
+        filename_path = pathlib.Path(self.root) / self.csv_file
+        if filename_path.is_file():
+            with open(filename_path, 'rb') as inp:
+                data = pd.read_csv(filename_path)
+        else:
+            self.logger.error("The cvs file was not found.")
+            return
+
+        required_columns = self.independent_variables
+        
+        if not set(required_columns).issubset(data.columns):
+            raise ValueError(
+                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+            )
+        return data
+    
+    def prepare_data(self, data):
+        X = data[self.independent_variables].values
+        y = data[self.dependent_variable].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        
+        return X_train, y_train
+    
+    
+    def predict(self, perform_backtest: Optional[bool] = False
+            ) -> pd.Series:
+        r"""The fit method to train the ML model.
+
+        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
+            as the test period to evaluate the model, defaults to '48h'
+        :type split_date_delta: Optional[str], optional
+        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
+            the performance of the model on the complete train set, defaults to False
+        :type perform_backtest: Optional[bool], optional
+        :return: The DataFrame containing the forecast data results without and with backtest
+        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        """
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        # Preparing the data: adding exogenous features
+        data = self.load_data()
+        X, y = self.prepare_data(data)
+        
+        if self.sklearn_model == 'LinearRegression':
+            base_model = LinearRegression()
+        elif self.sklearn_model == 'ElasticNet':
+            base_model = ElasticNet()
+        elif self.sklearn_model == 'KNeighborsRegressor':
+            base_model = KNeighborsRegressor()
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+        # Define the forecaster object
+        self.forecaster = base_model
+        # Fit and time it
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.forecaster.fit(X, y)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        new_values = np.array([self.new_values])
+        prediction = self.forecaster.predict(new_values)
+        
+        return prediction
+    
+    
+    
+    
\ No newline at end of file

From 173f05e5f01c99abacbe9248b79c5b94279adc86 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:45:02 +0100
Subject: [PATCH 078/111] Use gridsearchcv and split up fit and predict

---
 src/emhass/csv_predictor.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index a1c5576b..4e4ca37e 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+import copy
+from datetime import datetime
 import logging
 import copy
 import pathlib
@@ -9,6 +11,7 @@
 # from typing import Optional, Tuple
 import pandas as pd
 import numpy as np
+from sklearn.metrics import classification_report, r2_score
 
 from sklearn.linear_model import LinearRegression
 from sklearn.linear_model import ElasticNet
@@ -64,11 +67,16 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
-        self.sklearn_model = sklearn_model
-        self.new_values = new_values
-        self.root = root
+        self.timestamp = timestamp
+        self.model_type = model_type
         self.logger = logger
         self.is_tuned = False
+        self.data.sort_index(inplace=True)
+        self.data = self.data[~self.data.index.duplicated(keep='first')]
+    
+    @staticmethod
+    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+        """Add date features from the input DataFrame timestamp
 
     
     def load_data(self):

From 19da6f8aede912179696fc4ba38c1becd2e397fc Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 15:48:12 +0100
Subject: [PATCH 079/111] gitignore fun

---
 .vscode/launch.json | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 10313c97..b953c7d3 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -2,7 +2,7 @@
   "configurations": [
     {
       "name": "Python: Current File",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
       "program": "${file}",
       "console": "integratedTerminal",
@@ -10,10 +10,11 @@
     },
     {
       "name": "EMHASS run",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
-      "module": "emhass.web_server",
+      "program": "web_server.py",
       "console": "integratedTerminal",
+      "cwd": "${workspaceFolder}/src/emhass/",
       "purpose":["debug-in-terminal"],
       "justMyCode": true,
       "env": {
@@ -21,15 +22,15 @@
         "OPTIONS_PATH": "/workspaces/emhass/options.json",
         "SECRETS_PATH": "/workspaces/emhass/secrets_emhass.yaml",
         "DATA_PATH": "/workspaces/emhass/data/",
-        "LOGGING_LEVEL": "DEBUG"
       }
     },
     {
       "name": "EMHASS run ADDON",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
-      "module": "emhass.web_server",
+      "program": "web_server.py",
       "console": "integratedTerminal",
+      "cwd": "${workspaceFolder}/src/emhass/",
       "args": ["--addon", "true",  "--no_response", "true"],
       "purpose":["debug-in-terminal"],
       "justMyCode": true,
@@ -44,7 +45,6 @@
         "LAT": "45.83", //optional change
         "LON": "6.86", //optional change
         "ALT": "4807.8", //optional change
-        "LOGGING_LEVEL": "DEBUG" //optional change
       },
       
   }

From 1f531ec4e690e4942158d1a0c8ed6042520c69df Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 11:39:39 +0100
Subject: [PATCH 080/111] python -> debugpy

---
 .vscode/launch.json | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index b953c7d3..ec6c6987 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -2,7 +2,7 @@
   "configurations": [
     {
       "name": "Python: Current File",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "${file}",
       "console": "integratedTerminal",
@@ -10,12 +10,14 @@
     },
     {
       "name": "EMHASS run",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "web_server.py",
       "console": "integratedTerminal",
       "cwd": "${workspaceFolder}/src/emhass/",
-      "purpose":["debug-in-terminal"],
+      "purpose": [
+        "debug-in-terminal"
+      ],
       "justMyCode": true,
       "env": {
         "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml",
@@ -26,13 +28,20 @@
     },
     {
       "name": "EMHASS run ADDON",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "web_server.py",
       "console": "integratedTerminal",
       "cwd": "${workspaceFolder}/src/emhass/",
-      "args": ["--addon", "true",  "--no_response", "true"],
-      "purpose":["debug-in-terminal"],
+      "args": [
+        "--addon",
+        "true",
+        "--no_response",
+        "true"
+      ],
+      "purpose": [
+        "debug-in-terminal"
+      ],
       "justMyCode": true,
       "env": {
         "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml",
@@ -46,7 +55,6 @@
         "LON": "6.86", //optional change
         "ALT": "4807.8", //optional change
       },
-      
-  }
+    }
   ]
 }
\ No newline at end of file

From b4984d01b96d8b1593b28185f85c4d9e834dbfc4 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 12:38:07 +0100
Subject: [PATCH 081/111] launch.json

---
 .vscode/launch.json | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index ec6c6987..f0ceae3a 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -12,9 +12,8 @@
       "name": "EMHASS run",
       "type": "debugpy",
       "request": "launch",
-      "program": "web_server.py",
+      "module": "emhass.web_server",
       "console": "integratedTerminal",
-      "cwd": "${workspaceFolder}/src/emhass/",
       "purpose": [
         "debug-in-terminal"
       ],
@@ -30,9 +29,8 @@
       "name": "EMHASS run ADDON",
       "type": "debugpy",
       "request": "launch",
-      "program": "web_server.py",
+      "module": "emhass.web_server",
       "console": "integratedTerminal",
-      "cwd": "${workspaceFolder}/src/emhass/",
       "args": [
         "--addon",
         "true",

From c1344b18e02681131174312d268d921b2f216a57 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 12:40:45 +0100
Subject: [PATCH 082/111] delete csv-predictor

---
 src/emhass/csv_predictor.py | 147 ------------------------------------
 1 file changed, 147 deletions(-)
 delete mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
deleted file mode 100644
index 4e4ca37e..00000000
--- a/src/emhass/csv_predictor.py
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import copy
-from datetime import datetime
-import logging
-import copy
-import pathlib
-import time
-from typing import Optional
-# from typing import Optional, Tuple
-import pandas as pd
-import numpy as np
-from sklearn.metrics import classification_report, r2_score
-
-from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import ElasticNet
-from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsRegressor
-# from sklearn.metrics import r2_score
-
-# from skforecast.ForecasterAutoreg import ForecasterAutoreg
-# from skforecast.model_selection import bayesian_search_forecaster
-# from skforecast.model_selection import backtesting_forecaster
-
-import warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning) 
-
-class CsvPredictor:
-    r"""
-    A forecaster class using machine learning models.
-    
-    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
-    
-    It exposes one main method:
-    
-    - `predict`: to obtain a forecast from a pre-trained model.
-    
-    """
-
-    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-                  logger: logging.Logger) -> None:
-        r"""Define constructor for the forecast class.
-
-        :param data: The data that will be used for train/test
-        :type data: pd.DataFrame
-        :param model_type: A unique name defining this model and useful to identify \
-            for what it will be used for.
-        :type model_type: str
-        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
-            Example: `sensor.power_load_no_var_loads`.
-        :type var_model: str
-        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
-            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
-        :type sklearn_model: str
-        :param num_lags: The number of auto-regression lags to consider. A good starting point \
-            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
-            to 48, if the time step is 1 hour the fix this to 24 and so on.
-        :type num_lags: int
-        :param root: The parent folder of the path where the config.yaml file is located
-        :type root: str
-        :param logger: The passed logger object
-        :type logger: logging.Logger
-        """
-        self.data = data
-        self.model_type = model_type
-        self.csv_file = csv_file
-        self.independent_variables = independent_variables
-        self.dependent_variable = dependent_variable
-        self.timestamp = timestamp
-        self.model_type = model_type
-        self.logger = logger
-        self.is_tuned = False
-        self.data.sort_index(inplace=True)
-        self.data = self.data[~self.data.index.duplicated(keep='first')]
-    
-    @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
-        """Add date features from the input DataFrame timestamp
-
-    
-    def load_data(self):
-        filename_path = pathlib.Path(self.root) / self.csv_file
-        if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(filename_path)
-        else:
-            self.logger.error("The cvs file was not found.")
-            return
-
-        required_columns = self.independent_variables
-        
-        if not set(required_columns).issubset(data.columns):
-            raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}"
-            )
-        return data
-    
-    def prepare_data(self, data):
-        X = data[self.independent_variables].values
-        y = data[self.dependent_variable].values
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        
-        return X_train, y_train
-    
-    
-    def predict(self, perform_backtest: Optional[bool] = False
-            ) -> pd.Series:
-        r"""The fit method to train the ML model.
-
-        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
-            as the test period to evaluate the model, defaults to '48h'
-        :type split_date_delta: Optional[str], optional
-        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
-            the performance of the model on the complete train set, defaults to False
-        :type perform_backtest: Optional[bool], optional
-        :return: The DataFrame containing the forecast data results without and with backtest
-        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
-        """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
-        # Preparing the data: adding exogenous features
-        data = self.load_data()
-        X, y = self.prepare_data(data)
-        
-        if self.sklearn_model == 'LinearRegression':
-            base_model = LinearRegression()
-        elif self.sklearn_model == 'ElasticNet':
-            base_model = ElasticNet()
-        elif self.sklearn_model == 'KNeighborsRegressor':
-            base_model = KNeighborsRegressor()
-        else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-        # Define the forecaster object
-        self.forecaster = base_model
-        # Fit and time it
-        self.logger.info("Training a "+self.sklearn_model+" model")
-        start_time = time.time()
-        self.forecaster.fit(X, y)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-        new_values = np.array([self.new_values])
-        prediction = self.forecaster.predict(new_values)
-        
-        return prediction
-    
-    
-    
-    
\ No newline at end of file

From 723ed3f7f10c2dcbc43d1f771c44bec7ed9d87ff Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 18 Mar 2024 09:33:20 +0100
Subject: [PATCH 083/111] remove KNeighborsRegressor

---
 src/emhass/machine_learning_regressor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index 95f624b3..732b4266 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -95,7 +95,7 @@ def __init__(  # noqa: PLR0913
         :type model_type: str
         :param regression_model: The model that will be used. For now only \
             this options are possible: `LinearRegression`, `RidgeRegression`, \
-            `KNeighborsRegressor`, `LassoRegression`, `RandomForestRegression`, \
+            `LassoRegression`, `RandomForestRegression`, \
             `GradientBoostingRegression` and `AdaBoostRegression`.
         :type regression_model: str
         :param features: A list of features. \

From 93380f3e79fd5ea9bd6127de286b518730ca3b98 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Wed, 17 Apr 2024 14:07:21 +0200
Subject: [PATCH 084/111] add tests

---
 src/emhass/command_line.py               | 100 +++++++----
 tests/test_command_line_utils.py         | 215 ++++++++++++++++++++---
 tests/test_machine_learning_regressor.py | 113 ++++++++++++
 3 files changed, 376 insertions(+), 52 deletions(-)
 create mode 100644 tests/test_machine_learning_regressor.py

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 35d8b10c..06280ff4 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -200,9 +200,9 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
                 return False
             df_input_data = rh.df_final.copy()
 
-    elif set_type == "regressor-model-fit":
+    elif set_type == "regressor-model-fit" or set_type == "regressor-model-predict":
 
-        df_input_data_dayahead = None
+        df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
         params = json.loads(params)
         days_list = None
@@ -210,7 +210,13 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
         features = params["passed_data"]["features"]
         target = params["passed_data"]["target"]
         timestamp = params["passed_data"]["timestamp"]
-        filename_path = pathlib.Path(base_path) / csv_file
+        if get_data_from_file:
+            base_path = base_path + "/data"
+            filename_path = pathlib.Path(base_path) / csv_file
+
+        else:
+            filename_path = pathlib.Path(base_path) / csv_file
+
         if filename_path.is_file():
             df_input_data = pd.read_csv(filename_path, parse_dates=True)
 
@@ -226,13 +232,8 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
         if not set(required_columns).issubset(df_input_data.columns):
             logger.error("The cvs file does not contain the required columns.")
             raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+                f"CSV file should contain the following columns: {', '.join(required_columns)}",
             )
-    elif set_type == "regressor-model-predict":
-        df_input_data, df_input_data_dayahead = None, None
-        P_PV_forecast, P_load_forecast = None, None
-        days_list = None
-        params = json.loads(params)
 
     elif set_type == "publish-data":
         df_input_data, df_input_data_dayahead = None, None
@@ -240,7 +241,7 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
         days_list = None
     else:
         logger.error(
-            "The passed action argument and hence the set_type parameter for setup is not valid"
+            "The passed action argument and hence the set_type parameter for setup is not valid",
         )
         df_input_data, df_input_data_dayahead = None, None
         P_PV_forecast, P_load_forecast = None, None
@@ -493,7 +494,7 @@ def forecast_model_predict(
                 mlf = pickle.load(inp)
         else:
             logger.error(
-                "The ML forecaster file was not found, please run a model fit method before this predict method"
+                "The ML forecaster file was not found, please run a model fit method before this predict method",
             )
             return
     # Make predictions
@@ -580,7 +581,7 @@ def forecast_model_tune(
                 mlf = pickle.load(inp)
         else:
             logger.error(
-                "The ML forecaster file was not found, please run a model fit method before this tune method"
+                "The ML forecaster file was not found, please run a model fit method before this tune method",
             )
             return None, None
     # Tune the model
@@ -595,7 +596,9 @@ def forecast_model_tune(
 
 
 def regressor_model_fit(
-    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+    input_data_dict: dict,
+    logger: logging.Logger,
+    debug: Optional[bool] = False,
 ) -> None:
     """Perform a forecast model fit from training data retrieved from Home Assistant.
 
@@ -614,9 +617,16 @@ def regressor_model_fit(
     timestamp = input_data_dict["params"]["passed_data"]["timestamp"]
     date_features = input_data_dict["params"]["passed_data"]["date_features"]
     root = input_data_dict["root"]
+
     # The MLRegressor object
     mlr = MLRegressor(
-        data, model_type, regression_model, features, target, timestamp, logger
+        data,
+        model_type,
+        regression_model,
+        features,
+        target,
+        timestamp,
+        logger,
     )
     # Fit the ML model
     mlr.fit(date_features=date_features)
@@ -625,10 +635,14 @@ def regressor_model_fit(
         filename = model_type + "_mlr.pkl"
         with open(pathlib.Path(root) / filename, "wb") as outp:
             pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL)
+    return mlr
 
 
 def regressor_model_predict(
-    input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False
+    input_data_dict: dict,
+    logger: logging.Logger,
+    debug: Optional[bool] = False,
+    mlr: Optional[MLRegressor] = None,
 ) -> None:
     """Perform a prediction from csv file.
 
@@ -649,7 +663,7 @@ def regressor_model_predict(
                 mlr = pickle.load(inp)
         else:
             logger.error(
-                "The ML forecaster file was not found, please run a model fit method before this predict method"
+                "The ML forecaster file was not found, please run a model fit method before this predict method",
             )
             return
     new_values = input_data_dict["params"]["passed_data"]["new_values"]
@@ -667,14 +681,16 @@ def regressor_model_predict(
     ]
     # Publish prediction
     idx = 0
-    input_data_dict["rh"].post_data(
-        prediction,
-        idx,
-        mlr_predict_entity_id,
-        mlr_predict_unit_of_measurement,
-        mlr_predict_friendly_name,
-        type_var="mlregressor",
-    )
+    if not debug:
+        input_data_dict["rh"].post_data(
+            prediction,
+            idx,
+            mlr_predict_entity_id,
+            mlr_predict_unit_of_measurement,
+            mlr_predict_friendly_name,
+            type_var="mlregressor",
+        )
+    return prediction
 
 
 def publish_data(
@@ -763,7 +779,7 @@ def publish_data(
         if "P_deferrable{}".format(k) not in opt_res_latest.columns:
             logger.error(
                 "P_deferrable{}".format(k)
-                + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution."
+                + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.",
             )
         else:
             input_data_dict["rh"].post_data(
@@ -780,7 +796,7 @@ def publish_data(
     if input_data_dict["opt"].optim_conf["set_use_battery"]:
         if "P_batt" not in opt_res_latest.columns:
             logger.error(
-                "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution."
+                "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.",
             )
         else:
             custom_batt_forecast_id = params["passed_data"]["custom_batt_forecast_id"]
@@ -836,7 +852,7 @@ def publish_data(
     if "optim_status" not in opt_res_latest:
         opt_res_latest["optim_status"] = "Optimal"
         logger.warning(
-            "no optim_status in opt_res_latest, run an optimization task first"
+            "no optim_status in opt_res_latest, run an optimization task first",
         )
     input_data_dict["rh"].post_data(
         opt_res_latest["optim_status"],
@@ -958,12 +974,14 @@ def main():
     # Additionnal argument
     try:
         parser.add_argument(
-            "--version", action="version", version="%(prog)s " + version("emhass")
+            "--version",
+            action="version",
+            version="%(prog)s " + version("emhass"),
         )
         args = parser.parse_args()
     except Exception:
         logger.info(
-            "Version not found for emhass package. Or importlib exited with PackageNotFoundError."
+            "Version not found for emhass package. Or importlib exited with PackageNotFoundError.",
         )
     # Setup parameters
     input_data_dict = set_input_data_dict(emhass_conf, 
@@ -996,7 +1014,25 @@ def main():
         else:
             mlf = None
         df_pred_optim, mlf = forecast_model_tune(
-            input_data_dict, logger, debug=args.debug, mlf=mlf
+            input_data_dict,
+            logger,
+            debug=args.debug,
+            mlf=mlf,
+        )
+        opt_res = None
+    elif args.action == "regressor-model-fit":
+        mlr = regressor_model_fit(input_data_dict, logger, debug=args.debug)
+        opt_res = None
+    elif args.action == "regressor-model-predict":
+        if args.debug:
+            mlr = regressor_model_fit(input_data_dict, logger, debug=args.debug)
+        else:
+            mlr = None
+        prediction = regressor_model_predict(
+            input_data_dict,
+            logger,
+            debug=args.debug,
+            mlr=mlr,
         )
         opt_res = None
     elif args.action == "publish-data":
@@ -1020,6 +1056,10 @@ def main():
         return df_fit_pred, df_fit_pred_backtest, mlf
     elif args.action == "forecast-model-predict":
         return df_pred
+    elif args.action == "regressor-model-fit":
+        return mlr
+    elif args.action == "regressor-model-predict":
+        return prediction
     elif args.action == "forecast-model-tune":
         return df_pred_optim, mlf
     else: 
diff --git a/tests/test_command_line_utils.py b/tests/test_command_line_utils.py
index 4605db81..293733f4 100644
--- a/tests/test_command_line_utils.py
+++ b/tests/test_command_line_utils.py
@@ -5,10 +5,21 @@
 from unittest.mock import patch
 import pandas as pd
 import pathlib, json, yaml, copy
+import numpy as np
 
 from emhass.command_line import set_input_data_dict
-from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim
-from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune
+from emhass.command_line import (
+    perfect_forecast_optim,
+    dayahead_forecast_optim,
+    naive_mpc_optim,
+)
+from emhass.command_line import (
+    forecast_model_fit,
+    forecast_model_predict,
+    forecast_model_tune,
+    regressor_model_fit,
+    regressor_model_predict,
+)
 from emhass.command_line import publish_data
 from emhass.command_line import main
 from emhass import utils
@@ -307,13 +318,102 @@ def test_forecast_model_fit_predict_tune(self):
         self.assertIsInstance(df_pred, pd.Series)
         self.assertTrue(df_pred.isnull().sum().sum() == 0)
         # Test the tune method
-        df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=True, mlf=mlf)
+        df_pred_optim, mlf = forecast_model_tune(
+            input_data_dict, logger, debug=True, mlf=mlf
+        )
         self.assertIsInstance(df_pred_optim, pd.DataFrame)
         self.assertTrue(mlf.is_tuned == True)
-        # Test ijection_dict for tune method on webui
+        # Test injection_dict for tune method on webui
         injection_dict = utils.get_injection_dict_forecast_model_tune(df_fit_pred, mlf)
         self.assertIsInstance(injection_dict, dict)
-        self.assertIsInstance(injection_dict['figure_0'], str)
+        self.assertIsInstance(injection_dict["figure_0"], str)
+
+    def test_regressor_model_fit_predict(self):
+        config_path = pathlib.Path(root + "/config_emhass.yaml")
+        base_path = str(config_path.parent)
+        costfun = "profit"
+        action = "regressor-model-fit"  # fit and predict methods
+        params = TestCommandLineUtils.get_test_params()
+        runtimeparams = {
+            "csv_file": "prediction.csv",
+            "features": ["dd", "solar"],
+            "target": "hour",
+            "regression_model": "AdaBoostRegression",
+            "model_type": "heating_dd",
+            "timestamp": "timestamp",
+            "date_features": ["month", "day_of_week"],
+            "mlr_predict_entity_id": "sensor.predicted_hours_test",
+            "mlr_predict_unit_of_measurement": "h",
+            "mlr_predict_friendly_name": "Predicted hours",
+            "new_values": [12.79, 4.766, 1, 2],
+        }
+        runtimeparams_json = json.dumps(runtimeparams)
+        params_json = json.dumps(params)
+        input_data_dict = set_input_data_dict(
+            config_path,
+            base_path,
+            costfun,
+            params_json,
+            runtimeparams_json,
+            action,
+            logger,
+            get_data_from_file=True,
+        )
+        self.assertTrue(
+            input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd",
+        )
+        self.assertTrue(
+            input_data_dict["params"]["passed_data"]["regression_model"]
+            == "AdaBoostRegression",
+        )
+        self.assertTrue(
+            input_data_dict["params"]["passed_data"]["csv_file"] == "prediction.csv",
+        )
+        mlr = regressor_model_fit(input_data_dict, logger, debug=True)
+
+        # def test_regressor_model_predict(self):
+        config_path = pathlib.Path(root + "/config_emhass.yaml")
+        base_path = str(config_path.parent)  # + "/data"
+        costfun = "profit"
+        action = "regressor-model-predict"  # predict methods
+        params = TestCommandLineUtils.get_test_params()
+        runtimeparams = {
+            "csv_file": "prediction.csv",
+            "features": ["dd", "solar"],
+            "target": "hour",
+            "regression_model": "AdaBoostRegression",
+            "model_type": "heating_dd",
+            "timestamp": "timestamp",
+            "date_features": ["month", "day_of_week"],
+            "mlr_predict_entity_id": "sensor.predicted_hours_test",
+            "mlr_predict_unit_of_measurement": "h",
+            "mlr_predict_friendly_name": "Predicted hours",
+            "new_values": [12.79, 4.766, 1, 2],
+        }
+        runtimeparams_json = json.dumps(runtimeparams)
+        params["passed_data"] = runtimeparams
+        params_json = json.dumps(params)
+
+        input_data_dict = set_input_data_dict(
+            config_path,
+            base_path,
+            costfun,
+            params_json,
+            runtimeparams_json,
+            action,
+            logger,
+            get_data_from_file=True,
+        )
+        self.assertTrue(
+            input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd",
+        )
+        self.assertTrue(
+            input_data_dict["params"]["passed_data"]["mlr_predict_friendly_name"]
+            == "Predicted hours",
+        )
+
+        regressor_model_predict(input_data_dict, logger, debug=True, mlr=mlr)
+
     
     @patch('sys.argv', ['main', '--action', 'test', '--config', str(emhass_conf['config_path']), 
                         '--debug', 'True'])
@@ -326,27 +426,30 @@ def test_main_wrong_action(self):
     def test_main_perfect_forecast_optim(self):
         opt_res = main()
         self.assertIsInstance(opt_res, pd.DataFrame)
-        self.assertTrue(opt_res.isnull().sum().sum()==0)
+        self.assertTrue(opt_res.isnull().sum().sum() == 0)
         self.assertIsInstance(opt_res.index, pd.core.indexes.datetimes.DatetimeIndex)
-        self.assertIsInstance(opt_res.index.dtype, pd.core.dtypes.dtypes.DatetimeTZDtype)
-        
+        self.assertIsInstance(
+            opt_res.index.dtype,
+            pd.core.dtypes.dtypes.DatetimeTZDtype,
+        )
+
     def test_main_dayahead_forecast_optim(self):
         with patch('sys.argv', ['main', '--action', 'dayahead-optim', '--config', str(emhass_conf['config_path']), 
                                 '--params', self.params_json, '--runtimeparams', self.runtimeparams_json,
                                 '--debug', 'True']):
             opt_res = main()
         self.assertIsInstance(opt_res, pd.DataFrame)
-        self.assertTrue(opt_res.isnull().sum().sum()==0)
-        
+        self.assertTrue(opt_res.isnull().sum().sum() == 0)
+
     def test_main_naive_mpc_optim(self):
         with patch('sys.argv', ['main', '--action', 'naive-mpc-optim', '--config', str(emhass_conf['config_path']), 
                                 '--params', self.params_json, '--runtimeparams', self.runtimeparams_json,
                                 '--debug', 'True']):
             opt_res = main()
         self.assertIsInstance(opt_res, pd.DataFrame)
-        self.assertTrue(opt_res.isnull().sum().sum()==0)
-        self.assertTrue(len(opt_res)==10)
-        
+        self.assertTrue(opt_res.isnull().sum().sum() == 0)
+        self.assertTrue(len(opt_res) == 10)
+
     def test_main_forecast_model_fit(self):
         params = copy.deepcopy(json.loads(self.params_json))
         runtimeparams = {
@@ -377,12 +480,12 @@ def test_main_forecast_model_predict(self):
             "var_model": "sensor.power_load_no_var_loads",
             "sklearn_model": "KNeighborsRegressor",
             "num_lags": 48,
-            "split_date_delta": '48h',
-            "perform_backtest": False
+            "split_date_delta": "48h",
+            "perform_backtest": False,
         }
         runtimeparams_json = json.dumps(runtimeparams)
-        params['passed_data'] = runtimeparams
-        params['optim_conf']['load_forecast_method'] = 'skforecast'
+        params["passed_data"] = runtimeparams
+        params["optim_conf"]["load_forecast_method"] = "skforecast"
         params_json = json.dumps(params)
         with patch('sys.argv', ['main', '--action', 'forecast-model-predict', '--config', str(emhass_conf['config_path']), 
                                 '--params', params_json, '--runtimeparams', runtimeparams_json,
@@ -390,7 +493,7 @@ def test_main_forecast_model_predict(self):
             df_pred = main()
         self.assertIsInstance(df_pred, pd.Series)
         self.assertTrue(df_pred.isnull().sum().sum() == 0)
-        
+
     def test_main_forecast_model_tune(self):
         params = copy.deepcopy(json.loads(self.params_json))
         runtimeparams = {
@@ -399,12 +502,12 @@ def test_main_forecast_model_tune(self):
             "var_model": "sensor.power_load_no_var_loads",
             "sklearn_model": "KNeighborsRegressor",
             "num_lags": 48,
-            "split_date_delta": '48h',
-            "perform_backtest": False
+            "split_date_delta": "48h",
+            "perform_backtest": False,
         }
         runtimeparams_json = json.dumps(runtimeparams)
-        params['passed_data'] = runtimeparams
-        params['optim_conf']['load_forecast_method'] = 'skforecast'
+        params["passed_data"] = runtimeparams
+        params["optim_conf"]["load_forecast_method"] = "skforecast"
         params_json = json.dumps(params)
         with patch('sys.argv', ['main', '--action', 'forecast-model-tune', '--config', str(emhass_conf['config_path']), 
                                 '--params', params_json, '--runtimeparams', runtimeparams_json,
@@ -412,6 +515,74 @@ def test_main_forecast_model_tune(self):
             df_pred_optim, mlf = main()
         self.assertIsInstance(df_pred_optim, pd.DataFrame)
         self.assertTrue(mlf.is_tuned == True)
+
+    def test_main_regressor_model_fit(self):
+        params = copy.deepcopy(json.loads(self.params_json))
+        runtimeparams = {
+            "csv_file": "prediction.csv",
+            "features": ["dd", "solar"],
+            "target": "hour",
+            "regression_model": "AdaBoostRegression",
+            "model_type": "heating_dd",
+            "timestamp": "timestamp",
+            "date_features": ["month", "day_of_week"],
+        }
+        runtimeparams_json = json.dumps(runtimeparams)
+        params["passed_data"] = runtimeparams
+        params_json = json.dumps(params)
+        with patch(
+            "sys.argv",
+            [
+                "main",
+                "--action",
+                "regressor-model-fit",
+                "--config",
+                str(pathlib.Path(root + "/config_emhass.yaml")),
+                "--params",
+                params_json,
+                "--runtimeparams",
+                runtimeparams_json,
+                "--debug",
+                "True",
+            ],
+        ):
+            mlr = main()
+
+    def test_main_regressor_model_predict(self):
+        params = copy.deepcopy(json.loads(self.params_json))
+        runtimeparams = {
+            "csv_file": "prediction.csv",
+            "features": ["dd", "solar"],
+            "target": "hour",
+            "regression_model": "AdaBoostRegression",
+            "model_type": "heating_dd",
+            "timestamp": "timestamp",
+            "date_features": ["month", "day_of_week"],
+            "new_values": [12.79, 4.766, 1, 2],
+        }
+        runtimeparams_json = json.dumps(runtimeparams)
+        params["passed_data"] = runtimeparams
+        params["optim_conf"]["load_forecast_method"] = "skforecast"
+        params_json = json.dumps(params)
+        with patch(
+            "sys.argv",
+            [
+                "main",
+                "--action",
+                "regressor-model-predict",
+                "--config",
+                str(pathlib.Path(root + "/config_emhass.yaml")),
+                "--params",
+                params_json,
+                "--runtimeparams",
+                runtimeparams_json,
+                "--debug",
+                "True",
+            ],
+        ):
+            prediction = main()
+        self.assertIsInstance(prediction, np.ndarray)
+
         
     @patch('sys.argv', ['main', '--action', 'publish-data', '--config', str(emhass_conf['config_path']), 
                         '--debug', 'True'])
diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py
new file mode 100644
index 00000000..88137b0d
--- /dev/null
+++ b/tests/test_machine_learning_regressor.py
@@ -0,0 +1,113 @@
+"""Machine learning regressor test module."""
+
+import copy
+import json
+import pathlib
+import unittest
+
+import numpy as np
+import pandas as pd
+from sklearn.pipeline import Pipeline
+import yaml
+from emhass import utils
+from emhass.command_line import set_input_data_dict
+from emhass.machine_learning_regressor import MLRegressor
+from sklearn.ensemble import (
+    AdaBoostRegressor,
+)
+
+# the root folder
+root = str(utils.get_root(__file__, num_parent=2))
+# create logger
+logger, ch = utils.get_logger(__name__, root, save_to_file=False)
+
+
+class TestMLRegressor(unittest.TestCase):
+    @staticmethod
+    def get_test_params():
+        with open(root + "/config_emhass.yaml", "r") as file:
+            params = yaml.load(file, Loader=yaml.FullLoader)
+        params.update(
+            {
+                "params_secrets": {
+                    "hass_url": "http://supervisor/core/api",
+                    "long_lived_token": "${SUPERVISOR_TOKEN}",
+                    "time_zone": "Europe/Paris",
+                    "lat": 45.83,
+                    "lon": 6.86,
+                    "alt": 8000.0,
+                },
+            },
+        )
+        return params
+
+    def setUp(self):
+        params = TestMLRegressor.get_test_params()
+        params_json = json.dumps(params)
+        config_path = pathlib.Path(root + "/config_emhass.yaml")
+        base_path = str(config_path.parent)  # + "/data"
+        costfun = "profit"
+        action = "regressor-model-fit"  # fit and predict methods
+        params = copy.deepcopy(json.loads(params_json))
+        runtimeparams = {
+            "csv_file": "prediction.csv",
+            "features": ["dd", "solar"],
+            "target": "hour",
+            "regression_model": "AdaBoostRegression",
+            "model_type": "heating_dd",
+            "timestamp": "timestamp",
+            "date_features": ["month", "day_of_week"],
+            "new_values": [12.79, 4.766, 1, 2],
+        }
+        runtimeparams_json = json.dumps(runtimeparams)
+        params["passed_data"] = runtimeparams
+        params["optim_conf"]["load_forecast_method"] = "skforecast"
+        params_json = json.dumps(params)
+        self.input_data_dict = set_input_data_dict(
+            config_path,
+            base_path,
+            costfun,
+            params_json,
+            runtimeparams_json,
+            action,
+            logger,
+            get_data_from_file=True,
+        )
+        data = copy.deepcopy(self.input_data_dict["df_input_data"])
+        self.assertIsInstance(data, pd.DataFrame)
+        self.csv_file = self.input_data_dict["params"]["passed_data"]["csv_file"]
+        features = self.input_data_dict["params"]["passed_data"]["features"]
+        target = self.input_data_dict["params"]["passed_data"]["target"]
+        regression_model = self.input_data_dict["params"]["passed_data"][
+            "regression_model"
+        ]
+        model_type = self.input_data_dict["params"]["passed_data"]["model_type"]
+        timestamp = self.input_data_dict["params"]["passed_data"]["timestamp"]
+        self.date_features = self.input_data_dict["params"]["passed_data"][
+            "date_features"
+        ]
+        self.new_values = self.input_data_dict["params"]["passed_data"]["new_values"]
+        self.mlr = MLRegressor(
+            data,
+            model_type,
+            regression_model,
+            features,
+            target,
+            timestamp,
+            logger,
+        )
+
+    def test_fit(self):
+        self.mlr.fit(self.date_features)
+        self.assertIsInstance(self.mlr.model, Pipeline)
+
+    def test_predict(self):
+        self.mlr.fit(self.date_features)
+        predictions = self.mlr.predict(self.new_values)
+        self.assertIsInstance(predictions, np.ndarray)
+
+
+if __name__ == "__main__":
+    unittest.main()
+    ch.close()
+    logger.removeHandler(ch)

From b614f81fd3a54787f251939a02f5e6611535c1e9 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:13:47 +0100
Subject: [PATCH 085/111] add /app to gitignore

---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 581080c8..fa1f0d74 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,7 +7,6 @@ secrets_emhass.yaml
 .vscode/launch.json
 .vscode/settings.json
 .vscode/tasks.json
-*.csv
 *.html
 *.pkl
 data/actionLogs.txt

From 86b160461964c3b5ae5e50934681d9b099771058 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:24:21 +0100
Subject: [PATCH 086/111] Add csv-prediction

---
 src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
new file mode 100644
index 00000000..a1c5576b
--- /dev/null
+++ b/src/emhass/csv_predictor.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import logging
+import copy
+import pathlib
+import time
+from typing import Optional
+# from typing import Optional, Tuple
+import pandas as pd
+import numpy as np
+
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import ElasticNet
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsRegressor
+# from sklearn.metrics import r2_score
+
+# from skforecast.ForecasterAutoreg import ForecasterAutoreg
+# from skforecast.model_selection import bayesian_search_forecaster
+# from skforecast.model_selection import backtesting_forecaster
+
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+class CsvPredictor:
+    r"""
+    A forecaster class using machine learning models.
+    
+    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    
+    It exposes one main method:
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
+    
+    """
+
+    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+                  logger: logging.Logger) -> None:
+        r"""Define constructor for the forecast class.
+
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
+        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
+            Example: `sensor.power_load_no_var_loads`.
+        :type var_model: str
+        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
+            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
+        :type sklearn_model: str
+        :param num_lags: The number of auto-regression lags to consider. A good starting point \
+            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
+            to 48, if the time step is 1 hour the fix this to 24 and so on.
+        :type num_lags: int
+        :param root: The parent folder of the path where the config.yaml file is located
+        :type root: str
+        :param logger: The passed logger object
+        :type logger: logging.Logger
+        """
+        self.data = data
+        self.model_type = model_type
+        self.csv_file = csv_file
+        self.independent_variables = independent_variables
+        self.dependent_variable = dependent_variable
+        self.sklearn_model = sklearn_model
+        self.new_values = new_values
+        self.root = root
+        self.logger = logger
+        self.is_tuned = False
+
+    
+    def load_data(self):
+        filename_path = pathlib.Path(self.root) / self.csv_file
+        if filename_path.is_file():
+            with open(filename_path, 'rb') as inp:
+                data = pd.read_csv(filename_path)
+        else:
+            self.logger.error("The cvs file was not found.")
+            return
+
+        required_columns = self.independent_variables
+        
+        if not set(required_columns).issubset(data.columns):
+            raise ValueError(
+                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+            )
+        return data
+    
+    def prepare_data(self, data):
+        X = data[self.independent_variables].values
+        y = data[self.dependent_variable].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        
+        return X_train, y_train
+    
+    
+    def predict(self, perform_backtest: Optional[bool] = False
+            ) -> pd.Series:
+        r"""The fit method to train the ML model.
+
+        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
+            as the test period to evaluate the model, defaults to '48h'
+        :type split_date_delta: Optional[str], optional
+        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
+            the performance of the model on the complete train set, defaults to False
+        :type perform_backtest: Optional[bool], optional
+        :return: The DataFrame containing the forecast data results without and with backtest
+        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        """
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        # Preparing the data: adding exogenous features
+        data = self.load_data()
+        X, y = self.prepare_data(data)
+        
+        if self.sklearn_model == 'LinearRegression':
+            base_model = LinearRegression()
+        elif self.sklearn_model == 'ElasticNet':
+            base_model = ElasticNet()
+        elif self.sklearn_model == 'KNeighborsRegressor':
+            base_model = KNeighborsRegressor()
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+        # Define the forecaster object
+        self.forecaster = base_model
+        # Fit and time it
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.forecaster.fit(X, y)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        new_values = np.array([self.new_values])
+        prediction = self.forecaster.predict(new_values)
+        
+        return prediction
+    
+    
+    
+    
\ No newline at end of file

From cd59928ed973afa198b4aebdf92f5357a493f712 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 9 Jan 2024 21:11:13 +0100
Subject: [PATCH 087/111] cleanup

---
 src/emhass/csv_predictor.py | 48 ++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index a1c5576b..9f012f8d 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -31,11 +31,13 @@ class CsvPredictor:
     
     It exposes one main method:
     
-    - `predict`: to obtain a forecast from a pre-trained model.
+    - `predict`: to obtain a forecast from a csv file.
     
     """
 
-    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    #               logger: logging.Logger) -> None:
+    def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
                   logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -44,23 +46,28 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe
         :param model_type: A unique name defining this model and useful to identify \
             for what it will be used for.
         :type model_type: str
-        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
-            Example: `sensor.power_load_no_var_loads`.
-        :type var_model: str
+        :param csv_file: The name of the csv file to retrieve data from. \
+            Example: `prediction.csv`.
+        :type csv_file: str
+        :param independent_variables: A list of independent variables. \
+            Example: [`solar`, `degree_days`].
+        :type independent_variables: list
+        :param dependent_variable: The dependent variable(to be predicted). \
+            Example: `hours`.
+        :type dependent_variable: str
         :param sklearn_model: The `scikit-learn` model that will be used. For now only \
             this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
         :type sklearn_model: str
-        :param num_lags: The number of auto-regression lags to consider. A good starting point \
-            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
-            to 48, if the time step is 1 hour the fix this to 24 and so on.
-        :type num_lags: int
+        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
+            Example: [2.24, 5.68].
+        :type new_values: list
         :param root: The parent folder of the path where the config.yaml file is located
         :type root: str
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        self.data = data
-        self.model_type = model_type
+        # self.data = data
+        # self.model_type = model_type
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
@@ -86,18 +93,30 @@ def load_data(self):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
+        print(type(data))
         return data
     
     def prepare_data(self, data):
+        """
+        Prepare the data.
+        
+        :param data: Input Data
+        :return: Input DataFrame with freq defined
+        :rtype: pd.DataFrame
+        
+        """
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        print(type(X_train))
+        print(type(y_train))
         
         return X_train, y_train
     
     
-    def predict(self, perform_backtest: Optional[bool] = False
-            ) -> pd.Series:
+    # def predict(self, perform_backtest: Optional[bool] = False
+    #         ) -> pd.Series:
+    def predict(self):
         r"""The fit method to train the ML model.
 
         :param split_date_delta: The delta from now to `split_date_delta` that will be used \
@@ -109,7 +128,7 @@ def predict(self, perform_backtest: Optional[bool] = False
         :return: The DataFrame containing the forecast data results without and with backtest
         :rtype: Tuple[pd.DataFrame, pd.DataFrame]
         """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.logger.info("Performing a prediction for "+self.csv_file)
         # Preparing the data: adding exogenous features
         data = self.load_data()
         X, y = self.prepare_data(data)
@@ -131,6 +150,7 @@ def predict(self, perform_backtest: Optional[bool] = False
         self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
         new_values = np.array([self.new_values])
         prediction = self.forecaster.predict(new_values)
+        print(type(prediction))
         
         return prediction
     

From d4a3c677b2ad53f6b0c9c2522bee593cfc67a30d Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Thu, 18 Jan 2024 10:46:38 +0100
Subject: [PATCH 088/111] more cleanup

---
 src/emhass/csv_predictor.py | 92 ++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 58 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 9f012f8d..9550c157 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -2,11 +2,9 @@
 # -*- coding: utf-8 -*-
 
 import logging
-import copy
 import pathlib
 import time
-from typing import Optional
-# from typing import Optional, Tuple
+from typing import Tuple
 import pandas as pd
 import numpy as np
 
@@ -14,11 +12,6 @@
 from sklearn.linear_model import ElasticNet
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsRegressor
-# from sklearn.metrics import r2_score
-
-# from skforecast.ForecasterAutoreg import ForecasterAutoreg
-# from skforecast.model_selection import bayesian_search_forecaster
-# from skforecast.model_selection import backtesting_forecaster
 
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning) 
@@ -34,18 +27,10 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a csv file.
     
     """
-
-    # def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-    #               logger: logging.Logger) -> None:
     def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
                   logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
-        :param data: The data that will be used for train/test
-        :type data: pd.DataFrame
-        :param model_type: A unique name defining this model and useful to identify \
-            for what it will be used for.
-        :type model_type: str
         :param csv_file: The name of the csv file to retrieve data from. \
             Example: `prediction.csv`.
         :type csv_file: str
@@ -66,8 +51,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        # self.data = data
-        # self.model_type = model_type
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
@@ -78,14 +61,17 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         self.is_tuned = False
 
     
-    def load_data(self):
+    def load_data(self) -> pd.DataFrame:
+        """Load the data."""
         filename_path = pathlib.Path(self.root) / self.csv_file
         if filename_path.is_file():
             with open(filename_path, 'rb') as inp:
                 data = pd.read_csv(filename_path)
         else:
             self.logger.error("The cvs file was not found.")
-            return
+            raise ValueError(
+                f"The CSV file "+ self.csv_file +" was not found."
+            )
 
         required_columns = self.independent_variables
         
@@ -93,66 +79,56 @@ def load_data(self):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
-        print(type(data))
         return data
     
-    def prepare_data(self, data):
+    def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         """
         Prepare the data.
         
         :param data: Input Data
-        :return: Input DataFrame with freq defined
-        :rtype: pd.DataFrame
+        :type data: pd.DataFrame
+        :return: A tuple containing the train data.
+        :rtype: Tuple[np.ndarray, np.ndarray]
         
         """
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        print(type(X_train))
-        print(type(y_train))
         
         return X_train, y_train
     
     
-    # def predict(self, perform_backtest: Optional[bool] = False
-    #         ) -> pd.Series:
-    def predict(self):
-        r"""The fit method to train the ML model.
+    def predict(self) -> np.ndarray:
+        r"""The predict method to generate a forecast from a csv file.
 
-        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
-            as the test period to evaluate the model, defaults to '48h'
-        :type split_date_delta: Optional[str], optional
-        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
-            the performance of the model on the complete train set, defaults to False
-        :type perform_backtest: Optional[bool], optional
-        :return: The DataFrame containing the forecast data results without and with backtest
-        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        :return: The np.ndarray containing the predicted value.
+        :rtype: np.ndarray
         """
         self.logger.info("Performing a prediction for "+self.csv_file)
         # Preparing the data: adding exogenous features
         data = self.load_data()
-        X, y = self.prepare_data(data)
+        if data is not None:
+            X, y = self.prepare_data(data)
         
-        if self.sklearn_model == 'LinearRegression':
-            base_model = LinearRegression()
-        elif self.sklearn_model == 'ElasticNet':
-            base_model = ElasticNet()
-        elif self.sklearn_model == 'KNeighborsRegressor':
-            base_model = KNeighborsRegressor()
-        else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-        # Define the forecaster object
-        self.forecaster = base_model
-        # Fit and time it
-        self.logger.info("Training a "+self.sklearn_model+" model")
-        start_time = time.time()
-        self.forecaster.fit(X, y)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-        new_values = np.array([self.new_values])
-        prediction = self.forecaster.predict(new_values)
-        print(type(prediction))
+            if self.sklearn_model == 'LinearRegression':
+                base_model = LinearRegression()
+            elif self.sklearn_model == 'ElasticNet':
+                base_model = ElasticNet()
+            elif self.sklearn_model == 'KNeighborsRegressor':
+                base_model = KNeighborsRegressor()
+            else:
+                self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+            # Define the forecaster object
+            self.forecaster = base_model
+            # Fit and time it
+            self.logger.info("Predict through a "+self.sklearn_model+" model")
+            start_time = time.time()
+            self.forecaster.fit(X, y)
+            self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+            new_values = np.array([self.new_values])
+            prediction = self.forecaster.predict(new_values)
         
-        return prediction
+            return prediction
     
     
     

From 87c54d83bc5fe6810b770e299a6399c90f1a25c0 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 19 Jan 2024 11:34:33 +0100
Subject: [PATCH 089/111] filename_path -> inp

---
 src/emhass/csv_predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 9550c157..499903d0 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -66,7 +66,7 @@ def load_data(self) -> pd.DataFrame:
         filename_path = pathlib.Path(self.root) / self.csv_file
         if filename_path.is_file():
             with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(filename_path)
+                data = pd.read_csv(inp)
         else:
             self.logger.error("The cvs file was not found.")
             raise ValueError(

From a9cd098939cbc0a720ea5885873096bdb323829f Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 29 Jan 2024 11:24:45 +0100
Subject: [PATCH 090/111] resolve some comments

---
 src/emhass/csv_predictor.py | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 499903d0..1f478c01 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -5,6 +5,8 @@
 import pathlib
 import time
 from typing import Tuple
+import warnings
+
 import pandas as pd
 import numpy as np
 
@@ -13,14 +15,14 @@
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsRegressor
 
-import warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 class CsvPredictor:
     r"""
     A forecaster class using machine learning models.
     
-    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
     
     It exposes one main method:
     
@@ -28,11 +30,11 @@ class CsvPredictor:
     
     """
     def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-                  logger: logging.Logger) -> None:
+                logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
         :param csv_file: The name of the csv file to retrieve data from. \
-            Example: `prediction.csv`.
+            Example: `input_train_data.csv`.
         :type csv_file: str
         :param independent_variables: A list of independent variables. \
             Example: [`solar`, `degree_days`].
@@ -60,7 +62,6 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         self.logger = logger
         self.is_tuned = False
 
-    
     def load_data(self) -> pd.DataFrame:
         """Load the data."""
         filename_path = pathlib.Path(self.root) / self.csv_file
@@ -69,18 +70,16 @@ def load_data(self) -> pd.DataFrame:
                 data = pd.read_csv(inp)
         else:
             self.logger.error("The cvs file was not found.")
-            raise ValueError(
-                f"The CSV file "+ self.csv_file +" was not found."
-            )
+            raise ValueError("The CSV file " + self.csv_file + " was not found.")
 
         required_columns = self.independent_variables
-        
+
         if not set(required_columns).issubset(data.columns):
             raise ValueError(
                 f"CSV file should contain the following columns: {', '.join(required_columns)}"
             )
         return data
-    
+
     def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         """
         Prepare the data.
@@ -94,10 +93,10 @@ def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
         X = data[self.independent_variables].values
         y = data[self.dependent_variable].values
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        
+
         return X_train, y_train
-    
-    
+
+
     def predict(self) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
@@ -109,7 +108,7 @@ def predict(self) -> np.ndarray:
         data = self.load_data()
         if data is not None:
             X, y = self.prepare_data(data)
-        
+
             if self.sklearn_model == 'LinearRegression':
                 base_model = LinearRegression()
             elif self.sklearn_model == 'ElasticNet':
@@ -127,9 +126,5 @@ def predict(self) -> np.ndarray:
             self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
             new_values = np.array([self.new_values])
             prediction = self.forecaster.predict(new_values)
-        
+
             return prediction
-    
-    
-    
-    
\ No newline at end of file

From 9a86046333ffb7a42386fc8b49422a4a5aeff476 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:45:02 +0100
Subject: [PATCH 091/111] Use gridsearchcv and split up fit and predict

---
 src/emhass/csv_predictor.py | 173 +++++++++++++++++++++++-------------
 1 file changed, 111 insertions(+), 62 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1f478c01..636d5835 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -1,19 +1,22 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+import copy
+from datetime import datetime
 import logging
 import pathlib
 import time
-from typing import Tuple
+from typing import Optional, Tuple
 import warnings
 
 import pandas as pd
 import numpy as np
+from sklearn.metrics import classification_report, r2_score
 
 from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import ElasticNet
-from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsRegressor
+from sklearn.model_selection import GridSearchCV, train_test_split
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
 
 
 warnings.filterwarnings("ignore", category=DeprecationWarning)
@@ -29,7 +32,7 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a csv file.
     
     """
-    def __init__(self, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+    def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -53,78 +56,124 @@ def __init__(self, csv_file: str, independent_variables: list, dependent_variabl
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
-        self.csv_file = csv_file
+        self.data = data
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
-        self.sklearn_model = sklearn_model
-        self.new_values = new_values
-        self.root = root
+        self.timestamp = timestamp
+        self.model_type = model_type
         self.logger = logger
         self.is_tuned = False
+        self.data.sort_index(inplace=True)
+        self.data = self.data[~self.data.index.duplicated(keep='first')]
+    
+    @staticmethod
+    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+        """Add date features from the input DataFrame timestamp
 
-    def load_data(self) -> pd.DataFrame:
-        """Load the data."""
-        filename_path = pathlib.Path(self.root) / self.csv_file
-        if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(inp)
-        else:
-            self.logger.error("The cvs file was not found.")
-            raise ValueError("The CSV file " + self.csv_file + " was not found.")
-
-        required_columns = self.independent_variables
-
-        if not set(required_columns).issubset(data.columns):
-            raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}"
-            )
-        return data
-
-    def prepare_data(self, data) -> Tuple[np.ndarray, np.ndarray]:
+        :param data: The input DataFrame
+        :type data: pd.DataFrame
+        :return: The DataFrame with the added features
+        :rtype: pd.DataFrame
+        """
+        df = copy.deepcopy(data)
+        df['timestamp']= pd.to_datetime(df['timestamp'])
+        if 'year' in date_features:
+            df['year'] = [i.month for i in df['timestamp']]
+        if 'month' in date_features:
+            df['month'] = [i.month for i in df['timestamp']]
+        if 'day_of_week' in date_features:
+            df['day_of_week'] = [i.dayofweek for i in df['timestamp']]
+        if 'day_of_year' in date_features:
+            df['day_of_year'] = [i.dayofyear for i in df['timestamp']]
+        if 'day' in date_features:
+            df['day'] = [i.day for i in df['timestamp']]
+        if 'hour' in date_features:
+            df['hour'] = [i.day for i in df['timestamp']]
+
+        return df
+
+    def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]:
         """
-        Prepare the data.
+        Fit the model using the provided data.
         
         :param data: Input Data
         :type data: pd.DataFrame
-        :return: A tuple containing the train data.
-        :rtype: Tuple[np.ndarray, np.ndarray]
-        
         """
-        X = data[self.independent_variables].values
-        y = data[self.dependent_variable].values
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.data_exo = pd.DataFrame(self.data)
+        self.data_exo[self.independent_variables] = self.data[self.independent_variables]
+        self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
+        keep_columns = []
+        keep_columns.extend(self.independent_variables)
+        if self.timestamp is not None:
+            keep_columns.append(self.timestamp)
+        keep_columns.append(self.dependent_variable)
+        self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
+        self.data_exo.reset_index(drop=True, inplace=True)
+        # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp')
+        if len(date_features) > 0:
+            if self.timestamp is not None:
+                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
+            else:
+                self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
 
-        return X_train, y_train
+        y = self.data_exo[self.dependent_variable]
+        self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1)
+        if self.timestamp is not None:
+            self.data_exo = self.data_exo.drop(self.timestamp,axis=1)
+        X = self.data_exo
 
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        self.steps = len(X_test)
+
+        # Define the model
+        self.model = Pipeline([
+            ('scaler', StandardScaler()),
+            ('regressor', LinearRegression())
+        ])
+        # Define the parameters to tune
+        param_grid = {
+            'regressor__fit_intercept': [True, False],
+            'regressor__positive': [True, False],
+        }
+
+        # Create a grid search object
+        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        # Fit the grid search object to the data
+        self.logger.info("Fitting the model...")
+        start_time = time.time()
+        self.grid_search.fit(X_train.values, y_train.values)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+
+        self.model = self.grid_search.best_estimator_
+
+
+        # Make predictions
+        predictions = self.model.predict(X_test.values)
+        predictions = pd.Series(predictions, index=X_test.index)
+        pred_metric = r2_score(y_test,predictions)
+        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+
+        # Prepare forecast DataFrame
+        df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred'])
+        df_pred['train'] = y_train
+        df_pred['test'] = y_test
+        df_pred['pred'] = predictions
+        print(df_pred)
+        # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp')
+
+
+
+        # return df_pred
+        
 
-    def predict(self) -> np.ndarray:
+    def predict(self, new_values:list) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """
-        self.logger.info("Performing a prediction for "+self.csv_file)
-        # Preparing the data: adding exogenous features
-        data = self.load_data()
-        if data is not None:
-            X, y = self.prepare_data(data)
-
-            if self.sklearn_model == 'LinearRegression':
-                base_model = LinearRegression()
-            elif self.sklearn_model == 'ElasticNet':
-                base_model = ElasticNet()
-            elif self.sklearn_model == 'KNeighborsRegressor':
-                base_model = KNeighborsRegressor()
-            else:
-                self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-            # Define the forecaster object
-            self.forecaster = base_model
-            # Fit and time it
-            self.logger.info("Predict through a "+self.sklearn_model+" model")
-            start_time = time.time()
-            self.forecaster.fit(X, y)
-            self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-            new_values = np.array([self.new_values])
-            prediction = self.forecaster.predict(new_values)
-
-            return prediction
+        self.logger.info("Performing a prediction for "+self.model_type)
+        new_values = np.array([new_values])
+
+        return self.model.predict(new_values)

From cfb248b13eeb2ed706d6eb61e74c1d1693636239 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:56:19 +0100
Subject: [PATCH 092/111] remove backtest

---
 src/emhass/csv_predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 636d5835..1b2396b5 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -92,7 +92,7 @@ def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
 
         return df
 
-    def fit(self, perform_backtest: Optional[bool] = False, date_features: Optional[list] = []) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    def fit(self, date_features: Optional[list] = []) -> None:
         """
         Fit the model using the provided data.
         

From 4af34ad04486bda05ac5045233bf6b44389889db Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 12:11:41 +0100
Subject: [PATCH 093/111] cleanup

---
 src/emhass/csv_predictor.py | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1b2396b5..1e46927d 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -2,16 +2,14 @@
 # -*- coding: utf-8 -*-
 
 import copy
-from datetime import datetime
 import logging
-import pathlib
 import time
-from typing import Optional, Tuple
+from typing import Optional
 import warnings
 
 import pandas as pd
 import numpy as np
-from sklearn.metrics import classification_report, r2_score
+from sklearn.metrics import  r2_score
 
 from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import GridSearchCV, train_test_split
@@ -110,7 +108,6 @@ def fit(self, date_features: Optional[list] = []) -> None:
         keep_columns.append(self.dependent_variable)
         self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
         self.data_exo.reset_index(drop=True, inplace=True)
-        # self.data_exo.to_csv(pathlib.Path(self.root) / "csv-data_exo.csv", index_label='timestamp')
         if len(date_features) > 0:
             if self.timestamp is not None:
                 self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
@@ -153,18 +150,6 @@ def fit(self, date_features: Optional[list] = []) -> None:
         predictions = pd.Series(predictions, index=X_test.index)
         pred_metric = r2_score(y_test,predictions)
         self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
-
-        # Prepare forecast DataFrame
-        df_pred = pd.DataFrame(index=self.data.index, columns=['train','test','pred'])
-        df_pred['train'] = y_train
-        df_pred['test'] = y_test
-        df_pred['pred'] = predictions
-        print(df_pred)
-        # df_pred.to_csv(pathlib.Path(self.root) / "csv-df_pred.csv", index_label='timestamp')
-
-
-
-        # return df_pred
         
 
     def predict(self, new_values:list) -> np.ndarray:

From c8e02a70a4c6728813d326d3486860ff3662edc0 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 13:41:06 +0100
Subject: [PATCH 094/111] cleanup + docstrings

---
 src/emhass/csv_predictor.py | 45 ++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 1e46927d..57d61791 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -25,32 +25,30 @@ class CsvPredictor:
     
     This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
     
-    It exposes one main method:
+    It exposes two main methods:
     
-    - `predict`: to obtain a forecast from a csv file.
+    - `fit`: to train a model with the passed data.
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
     
     """
     def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
-        :param csv_file: The name of the csv file to retrieve data from. \
-            Example: `input_train_data.csv`.
-        :type csv_file: str
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
         :param independent_variables: A list of independent variables. \
             Example: [`solar`, `degree_days`].
         :type independent_variables: list
         :param dependent_variable: The dependent variable(to be predicted). \
             Example: `hours`.
         :type dependent_variable: str
-        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
-            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
-        :type sklearn_model: str
-        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
-            Example: [2.24, 5.68].
-        :type new_values: list
-        :param root: The parent folder of the path where the config.yaml file is located
-        :type root: str
+        :param timestamp: If defined, the column key that has to be used of timestamp.
+        :type timestamp: str
         :param logger: The passed logger object
         :type logger: logging.Logger
         """
@@ -60,23 +58,24 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent
         self.timestamp = timestamp
         self.model_type = model_type
         self.logger = logger
-        self.is_tuned = False
         self.data.sort_index(inplace=True)
         self.data = self.data[~self.data.index.duplicated(keep='first')]
     
     @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+    def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
         """Add date features from the input DataFrame timestamp
 
         :param data: The input DataFrame
         :type data: pd.DataFrame
+        :param timestamp: The column containing the timestamp
+        :type timestamp: str
         :return: The DataFrame with the added features
         :rtype: pd.DataFrame
         """
         df = copy.deepcopy(data)
-        df['timestamp']= pd.to_datetime(df['timestamp'])
+        df[timestamp]= pd.to_datetime(df['timestamp'])
         if 'year' in date_features:
-            df['year'] = [i.month for i in df['timestamp']]
+            df['year'] = [i.year for i in df['timestamp']]
         if 'month' in date_features:
             df['month'] = [i.month for i in df['timestamp']]
         if 'day_of_week' in date_features:
@@ -94,10 +93,10 @@ def fit(self, date_features: Optional[list] = []) -> None:
         """
         Fit the model using the provided data.
         
-        :param data: Input Data
-        :type data: pd.DataFrame
+        :param date_features: A list of 'date_features' to take into account when fitting the model.
+        :type data: list
         """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        self.logger.info("Performing a csv model fit for "+self.model_type)
         self.data_exo = pd.DataFrame(self.data)
         self.data_exo[self.independent_variables] = self.data[self.independent_variables]
         self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
@@ -110,7 +109,7 @@ def fit(self, date_features: Optional[list] = []) -> None:
         self.data_exo.reset_index(drop=True, inplace=True)
         if len(date_features) > 0:
             if self.timestamp is not None:
-                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features)
+                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp)
             else:
                 self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
 
@@ -155,6 +154,10 @@ def fit(self, date_features: Optional[list] = []) -> None:
     def predict(self, new_values:list) -> np.ndarray:
         r"""The predict method to generate a forecast from a csv file.
 
+
+        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
+            Example: [2.24, 5.68].
+        :type new_values: list
         :return: The np.ndarray containing the predicted value.
         :rtype: np.ndarray
         """

From 775d61f57a6772d5e59b0511b67f2f9d4ed6496b Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 11 Mar 2024 09:59:27 +0100
Subject: [PATCH 095/111] add other regression methods

---
 src/emhass/csv_predictor.py | 87 +++++++++++++++++++++++++------------
 1 file changed, 59 insertions(+), 28 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 57d61791..2b6fb86a 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -9,9 +9,10 @@
 
 import pandas as pd
 import numpy as np
+from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor
 from sklearn.metrics import  r2_score
 
-from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import Lasso, LinearRegression, Ridge
 from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
@@ -122,33 +123,63 @@ def fit(self, date_features: Optional[list] = []) -> None:
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         self.steps = len(X_test)
 
-        # Define the model
-        self.model = Pipeline([
-            ('scaler', StandardScaler()),
-            ('regressor', LinearRegression())
-        ])
-        # Define the parameters to tune
-        param_grid = {
-            'regressor__fit_intercept': [True, False],
-            'regressor__positive': [True, False],
-        }
-
-        # Create a grid search object
-        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        # Fit the grid search object to the data
-        self.logger.info("Fitting the model...")
-        start_time = time.time()
-        self.grid_search.fit(X_train.values, y_train.values)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-
-        self.model = self.grid_search.best_estimator_
-
-
-        # Make predictions
-        predictions = self.model.predict(X_test.values)
-        predictions = pd.Series(predictions, index=X_test.index)
-        pred_metric = r2_score(y_test,predictions)
-        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+        regression_methods = [
+            ('Linear Regression', LinearRegression(), {}),
+            ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
+            ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
+            ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
+            ('Gradient Boosting Regression', GradientBoostingRegressor(), {
+                'gradientboostingregressor__n_estimators': [50, 100, 200],
+                'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
+            }),
+            ('AdaBoost Regression', AdaBoostRegressor(), {
+                'adaboostregressor__n_estimators': [50, 100, 200],
+                'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
+            })
+        ]
+
+        # Define the models
+        for name, model, param_grid in regression_methods:
+            pipeline = Pipeline([
+                ('scaler', StandardScaler()),
+                (name, model)
+            ])
+            
+            # Use GridSearchCV to find the best hyperparameters for each model
+            grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5)
+            grid_search.fit(X_train, y_train)
+
+            # Get the best model and print its mean squared error on the test set
+            best_model = grid_search.best_estimator_
+            print(best_model)
+            predictions = best_model.predict(X_test)
+            print(predictions)
+        # self.model = Pipeline([
+        #     ('scaler', StandardScaler()),
+        #     ('regressor', LinearRegression())
+        # ])
+        # # Define the parameters to tune
+        # param_grid = {
+        #     'regressor__fit_intercept': [True, False],
+        #     'regressor__positive': [True, False],
+        # }
+
+        # # Create a grid search object
+        # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        # # Fit the grid search object to the data
+        # self.logger.info("Fitting the model...")
+        # start_time = time.time()
+        # self.grid_search.fit(X_train.values, y_train.values)
+        # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+
+        # self.model = self.grid_search.best_estimator_
+
+
+        # # Make predictions
+        # predictions = self.model.predict(X_test.values)
+        # predictions = pd.Series(predictions, index=X_test.index)
+        # pred_metric = r2_score(y_test,predictions)
+        # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
         
 
     def predict(self, new_values:list) -> np.ndarray:

From 904a36a69b93fbd215a06a976803aea58e26fd1f Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:16:44 +0100
Subject: [PATCH 096/111] multiple regression methods

---
 src/emhass/csv_predictor.py | 141 +++++++++++++++++++++++++-----------
 1 file changed, 100 insertions(+), 41 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index 2b6fb86a..3ffeba27 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -14,7 +14,7 @@
 
 from sklearn.linear_model import Lasso, LinearRegression, Ridge
 from sklearn.model_selection import GridSearchCV, train_test_split
-from sklearn.pipeline import Pipeline
+from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 
 
@@ -33,7 +33,7 @@ class CsvPredictor:
     - `predict`: to obtain a forecast from a pre-trained model.
     
     """
-    def __init__(self, data, model_type: str, independent_variables: list, dependent_variable: str, timestamp: str,
+    def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str,
                 logger: logging.Logger) -> None:
         r"""Define constructor for the forecast class.
 
@@ -58,9 +58,14 @@ def __init__(self, data, model_type: str, independent_variables: list, dependent
         self.dependent_variable = dependent_variable
         self.timestamp = timestamp
         self.model_type = model_type
+        self.sklearn_model = sklearn_model
         self.logger = logger
         self.data.sort_index(inplace=True)
         self.data = self.data[~self.data.index.duplicated(keep='first')]
+        self.data_exo = None
+        self.steps = None
+        self.model = None
+        self.grid_search =None
     
     @staticmethod
     def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
@@ -123,63 +128,117 @@ def fit(self, date_features: Optional[list] = []) -> None:
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         self.steps = len(X_test)
 
-        regression_methods = [
-            ('Linear Regression', LinearRegression(), {}),
-            ('Ridge Regression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
-            ('Lasso Regression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
-            ('Random Forest Regression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
-            ('Gradient Boosting Regression', GradientBoostingRegressor(), {
+        regression_methods = {
+            'LinearRegression': {"model": LinearRegression(), "param_grid": {
+                'linearregression__fit_intercept': [True, False],
+                'linearregression__positive': [True, False],
+            }},
+            'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}},
+            'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}},
+            'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}},
+            'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": {
                 'gradientboostingregressor__n_estimators': [50, 100, 200],
                 'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-            }),
-            ('AdaBoost Regression', AdaBoostRegressor(), {
+            }},
+            'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": {
                 'adaboostregressor__n_estimators': [50, 100, 200],
                 'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-            })
-        ]
+            }}
+        }
+        # regression_methods = [
+        #     ('LinearRegression', LinearRegression(), {
+        #         'linearregression__fit_intercept': [True, False],
+        #         'linearregression__positive': [True, False],
+        #     }),
+        #     ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
+        #     ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
+        #     ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
+        #     ('GradientBoostingRegression', GradientBoostingRegressor(), {
+        #         'gradientboostingregressor__n_estimators': [50, 100, 200],
+        #         'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
+        #     }),
+        #     ('AdaBoostRegression', AdaBoostRegressor(), {
+        #         'adaboostregressor__n_estimators': [50, 100, 200],
+        #         'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
+        #     })
+        # ]
+
+        if self.sklearn_model == 'LinearRegression':
+            base_model = regression_methods['LinearRegression']['model']
+            param_grid = regression_methods['LinearRegression']['param_grid']
+        elif self.sklearn_model == 'RidgeRegression':
+            base_model = regression_methods['RidgeRegression']['model']
+            param_grid = regression_methods['RidgeRegression']['param_grid']
+        elif self.sklearn_model == 'LassoRegression':
+            base_model = regression_methods['LassoRegression']['model']
+            param_grid = regression_methods['LassoRegression']['param_grid']
+        elif self.sklearn_model == 'RandomForestRegression':
+            base_model = regression_methods['RandomForestRegression']['model']
+            param_grid = regression_methods['RandomForestRegression']['param_grid']
+        elif self.sklearn_model == 'GradientBoostingRegression':
+            base_model = regression_methods['GradientBoostingRegression']['model']
+            param_grid = regression_methods['GradientBoostingRegression']['param_grid']
+        elif self.sklearn_model == 'AdaBoostRegression':
+            base_model = regression_methods['AdaBoostRegression']['model']
+            param_grid = regression_methods['AdaBoostRegression']['param_grid']
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+
 
         # Define the models
-        for name, model, param_grid in regression_methods:
-            pipeline = Pipeline([
-                ('scaler', StandardScaler()),
-                (name, model)
-            ])
+        # for name, model, param_grid in regression_methods:
+        #     self.model = make_pipeline(
+        #         StandardScaler(),
+        #         model
+        #     )
+        #     # self.model = Pipeline([
+        #     #     ('scaler', StandardScaler()),
+        #     #     (name, model)
+        #     # ])
             
-            # Use GridSearchCV to find the best hyperparameters for each model
-            grid_search = GridSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5)
-            grid_search.fit(X_train, y_train)
-
-            # Get the best model and print its mean squared error on the test set
-            best_model = grid_search.best_estimator_
-            print(best_model)
-            predictions = best_model.predict(X_test)
-            print(predictions)
+        #     # Use GridSearchCV to find the best hyperparameters for each model
+        #     grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
+        #     grid_search.fit(X_train, y_train)
+
+        #     # Get the best model and print its mean squared error on the test set
+        #     best_model = grid_search.best_estimator_
+        #     print(best_model)
+        #     predictions = best_model.predict(X_test)
+        #     print(predictions)
+
+        self.model = make_pipeline(
+            StandardScaler(),
+            base_model
+        )
         # self.model = Pipeline([
         #     ('scaler', StandardScaler()),
-        #     ('regressor', LinearRegression())
+        #     ('regressor', base_model)
         # ])
-        # # Define the parameters to tune
+        # Define the parameters to tune
         # param_grid = {
         #     'regressor__fit_intercept': [True, False],
         #     'regressor__positive': [True, False],
         # }
 
-        # # Create a grid search object
-        # self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        # # Fit the grid search object to the data
-        # self.logger.info("Fitting the model...")
-        # start_time = time.time()
-        # self.grid_search.fit(X_train.values, y_train.values)
-        # self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        # Create a grid search object
+        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1)
+        
+        # Fit the grid search object to the data
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.grid_search.fit(X_train.values, y_train.values)
+        print("Best value for lambda : ",self.grid_search.best_params_)
+        print("Best score for cost function: ", self.grid_search.best_score_)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
 
-        # self.model = self.grid_search.best_estimator_
+        self.model = self.grid_search.best_estimator_
 
 
-        # # Make predictions
-        # predictions = self.model.predict(X_test.values)
-        # predictions = pd.Series(predictions, index=X_test.index)
-        # pred_metric = r2_score(y_test,predictions)
-        # self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
+        # Make predictions
+        predictions = self.model.predict(X_test.values)
+        predictions = pd.Series(predictions, index=X_test.index)
+        pred_metric = r2_score(y_test,predictions)
+        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
         
 
     def predict(self, new_values:list) -> np.ndarray:

From ce407cfa711cb7c335ec6d031784b55a2b5408aa Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 12:42:27 +0100
Subject: [PATCH 097/111] change to MLRegressor

---
 src/emhass/csv_predictor.py | 257 ------------------------------------
 src/emhass/utils.py         |  71 +++++-----
 2 files changed, 30 insertions(+), 298 deletions(-)
 delete mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
deleted file mode 100644
index 3ffeba27..00000000
--- a/src/emhass/csv_predictor.py
+++ /dev/null
@@ -1,257 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import copy
-import logging
-import time
-from typing import Optional
-import warnings
-
-import pandas as pd
-import numpy as np
-from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor
-from sklearn.metrics import  r2_score
-
-from sklearn.linear_model import Lasso, LinearRegression, Ridge
-from sklearn.model_selection import GridSearchCV, train_test_split
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import StandardScaler
-
-
-warnings.filterwarnings("ignore", category=DeprecationWarning)
-
-class CsvPredictor:
-    r"""
-    A forecaster class using machine learning models.
-    
-    This class uses the `sklearn` module and the machine learning models are from `scikit-learn`.
-    
-    It exposes two main methods:
-    
-    - `fit`: to train a model with the passed data.
-    
-    - `predict`: to obtain a forecast from a pre-trained model.
-    
-    """
-    def __init__(self, data, model_type: str, sklearn_model: str, independent_variables: list, dependent_variable: str, timestamp: str,
-                logger: logging.Logger) -> None:
-        r"""Define constructor for the forecast class.
-
-        :param data: The data that will be used for train/test
-        :type data: pd.DataFrame
-        :param model_type: A unique name defining this model and useful to identify \
-            for what it will be used for.
-        :type model_type: str
-        :param independent_variables: A list of independent variables. \
-            Example: [`solar`, `degree_days`].
-        :type independent_variables: list
-        :param dependent_variable: The dependent variable(to be predicted). \
-            Example: `hours`.
-        :type dependent_variable: str
-        :param timestamp: If defined, the column key that has to be used of timestamp.
-        :type timestamp: str
-        :param logger: The passed logger object
-        :type logger: logging.Logger
-        """
-        self.data = data
-        self.independent_variables = independent_variables
-        self.dependent_variable = dependent_variable
-        self.timestamp = timestamp
-        self.model_type = model_type
-        self.sklearn_model = sklearn_model
-        self.logger = logger
-        self.data.sort_index(inplace=True)
-        self.data = self.data[~self.data.index.duplicated(keep='first')]
-        self.data_exo = None
-        self.steps = None
-        self.model = None
-        self.grid_search =None
-    
-    @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list, timestamp: str) -> pd.DataFrame:
-        """Add date features from the input DataFrame timestamp
-
-        :param data: The input DataFrame
-        :type data: pd.DataFrame
-        :param timestamp: The column containing the timestamp
-        :type timestamp: str
-        :return: The DataFrame with the added features
-        :rtype: pd.DataFrame
-        """
-        df = copy.deepcopy(data)
-        df[timestamp]= pd.to_datetime(df['timestamp'])
-        if 'year' in date_features:
-            df['year'] = [i.year for i in df['timestamp']]
-        if 'month' in date_features:
-            df['month'] = [i.month for i in df['timestamp']]
-        if 'day_of_week' in date_features:
-            df['day_of_week'] = [i.dayofweek for i in df['timestamp']]
-        if 'day_of_year' in date_features:
-            df['day_of_year'] = [i.dayofyear for i in df['timestamp']]
-        if 'day' in date_features:
-            df['day'] = [i.day for i in df['timestamp']]
-        if 'hour' in date_features:
-            df['hour'] = [i.day for i in df['timestamp']]
-
-        return df
-
-    def fit(self, date_features: Optional[list] = []) -> None:
-        """
-        Fit the model using the provided data.
-        
-        :param date_features: A list of 'date_features' to take into account when fitting the model.
-        :type data: list
-        """
-        self.logger.info("Performing a csv model fit for "+self.model_type)
-        self.data_exo = pd.DataFrame(self.data)
-        self.data_exo[self.independent_variables] = self.data[self.independent_variables]
-        self.data_exo[self.dependent_variable] = self.data[self.dependent_variable]
-        keep_columns = []
-        keep_columns.extend(self.independent_variables)
-        if self.timestamp is not None:
-            keep_columns.append(self.timestamp)
-        keep_columns.append(self.dependent_variable)
-        self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)]
-        self.data_exo.reset_index(drop=True, inplace=True)
-        if len(date_features) > 0:
-            if self.timestamp is not None:
-                self.data_exo = CsvPredictor.add_date_features(self.data_exo, date_features, self.timestamp)
-            else:
-                self.logger.error("If no timestamp provided, you can't use date_features, going further without date_features.")
-
-        y = self.data_exo[self.dependent_variable]
-        self.data_exo = self.data_exo.drop(self.dependent_variable,axis=1)
-        if self.timestamp is not None:
-            self.data_exo = self.data_exo.drop(self.timestamp,axis=1)
-        X = self.data_exo
-
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        self.steps = len(X_test)
-
-        regression_methods = {
-            'LinearRegression': {"model": LinearRegression(), "param_grid": {
-                'linearregression__fit_intercept': [True, False],
-                'linearregression__positive': [True, False],
-            }},
-            'RidgeRegression': {"model": Ridge(), "param_grid": {'ridge__alpha': [0.1, 1.0, 10.0]}},
-            'LassoRegression': {"model": Lasso(), "param_grid": {'lasso__alpha': [0.1, 1.0, 10.0]}},
-            'RandomForestRegression': {"model": RandomForestRegressor(), "param_grid": {'randomforestregressor__n_estimators': [50, 100, 200]}},
-            'GradientBoostingRegression': {"model": GradientBoostingRegressor(), "param_grid": {
-                'gradientboostingregressor__n_estimators': [50, 100, 200],
-                'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-            }},
-            'AdaBoostRegression': {"model": AdaBoostRegressor(), "param_grid": {
-                'adaboostregressor__n_estimators': [50, 100, 200],
-                'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-            }}
-        }
-        # regression_methods = [
-        #     ('LinearRegression', LinearRegression(), {
-        #         'linearregression__fit_intercept': [True, False],
-        #         'linearregression__positive': [True, False],
-        #     }),
-        #     ('RidgeRegression', Ridge(), {'ridge__alpha': [0.1, 1.0, 10.0]}),
-        #     ('LassoRegression', Lasso(), {'lasso__alpha': [0.1, 1.0, 10.0]}),
-        #     ('RandomForestRegression', RandomForestRegressor(), {'randomforestregressor__n_estimators': [50, 100, 200]}),
-        #     ('GradientBoostingRegression', GradientBoostingRegressor(), {
-        #         'gradientboostingregressor__n_estimators': [50, 100, 200],
-        #         'gradientboostingregressor__learning_rate': [0.01, 0.1, 0.2]
-        #     }),
-        #     ('AdaBoostRegression', AdaBoostRegressor(), {
-        #         'adaboostregressor__n_estimators': [50, 100, 200],
-        #         'adaboostregressor__learning_rate': [0.01, 0.1, 0.2]
-        #     })
-        # ]
-
-        if self.sklearn_model == 'LinearRegression':
-            base_model = regression_methods['LinearRegression']['model']
-            param_grid = regression_methods['LinearRegression']['param_grid']
-        elif self.sklearn_model == 'RidgeRegression':
-            base_model = regression_methods['RidgeRegression']['model']
-            param_grid = regression_methods['RidgeRegression']['param_grid']
-        elif self.sklearn_model == 'LassoRegression':
-            base_model = regression_methods['LassoRegression']['model']
-            param_grid = regression_methods['LassoRegression']['param_grid']
-        elif self.sklearn_model == 'RandomForestRegression':
-            base_model = regression_methods['RandomForestRegression']['model']
-            param_grid = regression_methods['RandomForestRegression']['param_grid']
-        elif self.sklearn_model == 'GradientBoostingRegression':
-            base_model = regression_methods['GradientBoostingRegression']['model']
-            param_grid = regression_methods['GradientBoostingRegression']['param_grid']
-        elif self.sklearn_model == 'AdaBoostRegression':
-            base_model = regression_methods['AdaBoostRegression']['model']
-            param_grid = regression_methods['AdaBoostRegression']['param_grid']
-        else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-
-
-        # Define the models
-        # for name, model, param_grid in regression_methods:
-        #     self.model = make_pipeline(
-        #         StandardScaler(),
-        #         model
-        #     )
-        #     # self.model = Pipeline([
-        #     #     ('scaler', StandardScaler()),
-        #     #     (name, model)
-        #     # ])
-            
-        #     # Use GridSearchCV to find the best hyperparameters for each model
-        #     grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring=['r2', 'neg_mean_squared_error'], refit='r2', verbose=0, n_jobs=-1)
-        #     grid_search.fit(X_train, y_train)
-
-        #     # Get the best model and print its mean squared error on the test set
-        #     best_model = grid_search.best_estimator_
-        #     print(best_model)
-        #     predictions = best_model.predict(X_test)
-        #     print(predictions)
-
-        self.model = make_pipeline(
-            StandardScaler(),
-            base_model
-        )
-        # self.model = Pipeline([
-        #     ('scaler', StandardScaler()),
-        #     ('regressor', base_model)
-        # ])
-        # Define the parameters to tune
-        # param_grid = {
-        #     'regressor__fit_intercept': [True, False],
-        #     'regressor__positive': [True, False],
-        # }
-
-        # Create a grid search object
-        self.grid_search = GridSearchCV(self.model, param_grid, cv=5, scoring='neg_mean_squared_error', refit=True, verbose=0, n_jobs=-1)
-        
-        # Fit the grid search object to the data
-        self.logger.info("Training a "+self.sklearn_model+" model")
-        start_time = time.time()
-        self.grid_search.fit(X_train.values, y_train.values)
-        print("Best value for lambda : ",self.grid_search.best_params_)
-        print("Best score for cost function: ", self.grid_search.best_score_)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-
-        self.model = self.grid_search.best_estimator_
-
-
-        # Make predictions
-        predictions = self.model.predict(X_test.values)
-        predictions = pd.Series(predictions, index=X_test.index)
-        pred_metric = r2_score(y_test,predictions)
-        self.logger.info(f"Prediction R2 score of fitted model on test data: {pred_metric}")
-        
-
-    def predict(self, new_values:list) -> np.ndarray:
-        r"""The predict method to generate a forecast from a csv file.
-
-
-        :param new_values: The new values for the independent variables(in the same order as the independent variables list). \
-            Example: [2.24, 5.68].
-        :type new_values: list
-        :return: The np.ndarray containing the predicted value.
-        :rtype: np.ndarray
-        """
-        self.logger.info("Performing a prediction for "+self.model_type)
-        new_values = np.array([new_values])
-
-        return self.model.predict(new_values)
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 5e40160a..1d98ebb1 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -394,47 +394,36 @@ def treat_runtimeparams(
         if "mlr_predict_friendly_name" not in runtimeparams.keys():
             mlr_predict_friendly_name = "mlr predictor"
         else:
-            mlr_predict_friendly_name = runtimeparams["mlr_predict_friendly_name"]
-        params["passed_data"]["mlr_predict_friendly_name"] = mlr_predict_friendly_name
-        # Treat optimization configuration parameters passed at runtime
-        if "num_def_loads" in runtimeparams.keys():
-            optim_conf["num_def_loads"] = runtimeparams["num_def_loads"]
-        if "P_deferrable_nom" in runtimeparams.keys():
-            optim_conf["P_deferrable_nom"] = runtimeparams["P_deferrable_nom"]
-        if "def_total_hours" in runtimeparams.keys():
-            optim_conf["def_total_hours"] = runtimeparams["def_total_hours"]
-        if "def_start_timestep" in runtimeparams.keys():
-            optim_conf["def_start_timestep"] = runtimeparams["def_start_timestep"]
-        if "def_end_timestep" in runtimeparams.keys():
-            optim_conf["def_end_timestep"] = runtimeparams["def_end_timestep"]
-        if "treat_def_as_semi_cont" in runtimeparams.keys():
-            optim_conf["treat_def_as_semi_cont"] = [
-                eval(str(k).capitalize())
-                for k in runtimeparams["treat_def_as_semi_cont"]
-            ]
-        if "set_def_constant" in runtimeparams.keys():
-            optim_conf["set_def_constant"] = [
-                eval(str(k).capitalize()) for k in runtimeparams["set_def_constant"]
-            ]
-        if "solcast_api_key" in runtimeparams.keys():
-            retrieve_hass_conf["solcast_api_key"] = runtimeparams["solcast_api_key"]
-            optim_conf["weather_forecast_method"] = "solcast"
-        if "solcast_rooftop_id" in runtimeparams.keys():
-            retrieve_hass_conf["solcast_rooftop_id"] = runtimeparams[
-                "solcast_rooftop_id"
-            ]
-            optim_conf["weather_forecast_method"] = "solcast"
-        if "solar_forecast_kwp" in runtimeparams.keys():
-            retrieve_hass_conf["solar_forecast_kwp"] = runtimeparams[
-                "solar_forecast_kwp"
-            ]
-            optim_conf["weather_forecast_method"] = "solar.forecast"
-        if "weight_battery_discharge" in runtimeparams.keys():
-            optim_conf["weight_battery_discharge"] = runtimeparams[
-                "weight_battery_discharge"
-            ]
-        if "weight_battery_charge" in runtimeparams.keys():
-            optim_conf["weight_battery_charge"] = runtimeparams["weight_battery_charge"]
+            mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name']
+        params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name
+        # Treat optimization configuration parameters passed at runtime 
+        if 'num_def_loads' in runtimeparams.keys():
+            optim_conf['num_def_loads'] = runtimeparams['num_def_loads']
+        if 'P_deferrable_nom' in runtimeparams.keys():
+            optim_conf['P_deferrable_nom'] = runtimeparams['P_deferrable_nom']
+        if 'def_total_hours' in runtimeparams.keys():
+            optim_conf['def_total_hours'] = runtimeparams['def_total_hours']
+        if 'def_start_timestep' in runtimeparams.keys():
+            optim_conf['def_start_timestep'] = runtimeparams['def_start_timestep']
+        if 'def_end_timestep' in runtimeparams.keys():
+            optim_conf['def_end_timestep'] = runtimeparams['def_end_timestep']
+        if 'treat_def_as_semi_cont' in runtimeparams.keys():
+            optim_conf['treat_def_as_semi_cont'] = [eval(str(k).capitalize()) for k in runtimeparams['treat_def_as_semi_cont']]
+        if 'set_def_constant' in runtimeparams.keys():
+            optim_conf['set_def_constant'] = [eval(str(k).capitalize()) for k in runtimeparams['set_def_constant']]
+        if 'solcast_api_key' in runtimeparams.keys():
+            retrieve_hass_conf['solcast_api_key'] = runtimeparams['solcast_api_key']
+            optim_conf['weather_forecast_method'] = 'solcast'
+        if 'solcast_rooftop_id' in runtimeparams.keys():
+            retrieve_hass_conf['solcast_rooftop_id'] = runtimeparams['solcast_rooftop_id']
+            optim_conf['weather_forecast_method'] = 'solcast'
+        if 'solar_forecast_kwp' in runtimeparams.keys():
+            retrieve_hass_conf['solar_forecast_kwp'] = runtimeparams['solar_forecast_kwp']
+            optim_conf['weather_forecast_method'] = 'solar.forecast'
+        if 'weight_battery_discharge' in runtimeparams.keys():
+            optim_conf['weight_battery_discharge'] = runtimeparams['weight_battery_discharge']
+        if 'weight_battery_charge' in runtimeparams.keys():
+            optim_conf['weight_battery_charge'] = runtimeparams['weight_battery_charge']
         # Treat plant configuration parameters passed at runtime
         if "SOCtarget" in runtimeparams.keys():
             plant_conf["SOCtarget"] = runtimeparams["SOCtarget"]

From 9d45cb125fa61a5cd68d4021124de55eea9c3daa Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 15 Mar 2024 13:13:51 +0100
Subject: [PATCH 098/111] change naming and some formatting

---
 src/emhass/command_line.py |  5 +--
 src/emhass/utils.py        | 71 ++++++++++++++++++++++----------------
 2 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 06280ff4..ee27be0f 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -1014,10 +1014,7 @@ def main():
         else:
             mlf = None
         df_pred_optim, mlf = forecast_model_tune(
-            input_data_dict,
-            logger,
-            debug=args.debug,
-            mlf=mlf,
+            input_data_dict, logger, debug=args.debug, mlf=mlf
         )
         opt_res = None
     elif args.action == "regressor-model-fit":
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 1d98ebb1..5e40160a 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -394,36 +394,47 @@ def treat_runtimeparams(
         if "mlr_predict_friendly_name" not in runtimeparams.keys():
             mlr_predict_friendly_name = "mlr predictor"
         else:
-            mlr_predict_friendly_name = runtimeparams['mlr_predict_friendly_name']
-        params['passed_data']['mlr_predict_friendly_name'] = mlr_predict_friendly_name
-        # Treat optimization configuration parameters passed at runtime 
-        if 'num_def_loads' in runtimeparams.keys():
-            optim_conf['num_def_loads'] = runtimeparams['num_def_loads']
-        if 'P_deferrable_nom' in runtimeparams.keys():
-            optim_conf['P_deferrable_nom'] = runtimeparams['P_deferrable_nom']
-        if 'def_total_hours' in runtimeparams.keys():
-            optim_conf['def_total_hours'] = runtimeparams['def_total_hours']
-        if 'def_start_timestep' in runtimeparams.keys():
-            optim_conf['def_start_timestep'] = runtimeparams['def_start_timestep']
-        if 'def_end_timestep' in runtimeparams.keys():
-            optim_conf['def_end_timestep'] = runtimeparams['def_end_timestep']
-        if 'treat_def_as_semi_cont' in runtimeparams.keys():
-            optim_conf['treat_def_as_semi_cont'] = [eval(str(k).capitalize()) for k in runtimeparams['treat_def_as_semi_cont']]
-        if 'set_def_constant' in runtimeparams.keys():
-            optim_conf['set_def_constant'] = [eval(str(k).capitalize()) for k in runtimeparams['set_def_constant']]
-        if 'solcast_api_key' in runtimeparams.keys():
-            retrieve_hass_conf['solcast_api_key'] = runtimeparams['solcast_api_key']
-            optim_conf['weather_forecast_method'] = 'solcast'
-        if 'solcast_rooftop_id' in runtimeparams.keys():
-            retrieve_hass_conf['solcast_rooftop_id'] = runtimeparams['solcast_rooftop_id']
-            optim_conf['weather_forecast_method'] = 'solcast'
-        if 'solar_forecast_kwp' in runtimeparams.keys():
-            retrieve_hass_conf['solar_forecast_kwp'] = runtimeparams['solar_forecast_kwp']
-            optim_conf['weather_forecast_method'] = 'solar.forecast'
-        if 'weight_battery_discharge' in runtimeparams.keys():
-            optim_conf['weight_battery_discharge'] = runtimeparams['weight_battery_discharge']
-        if 'weight_battery_charge' in runtimeparams.keys():
-            optim_conf['weight_battery_charge'] = runtimeparams['weight_battery_charge']
+            mlr_predict_friendly_name = runtimeparams["mlr_predict_friendly_name"]
+        params["passed_data"]["mlr_predict_friendly_name"] = mlr_predict_friendly_name
+        # Treat optimization configuration parameters passed at runtime
+        if "num_def_loads" in runtimeparams.keys():
+            optim_conf["num_def_loads"] = runtimeparams["num_def_loads"]
+        if "P_deferrable_nom" in runtimeparams.keys():
+            optim_conf["P_deferrable_nom"] = runtimeparams["P_deferrable_nom"]
+        if "def_total_hours" in runtimeparams.keys():
+            optim_conf["def_total_hours"] = runtimeparams["def_total_hours"]
+        if "def_start_timestep" in runtimeparams.keys():
+            optim_conf["def_start_timestep"] = runtimeparams["def_start_timestep"]
+        if "def_end_timestep" in runtimeparams.keys():
+            optim_conf["def_end_timestep"] = runtimeparams["def_end_timestep"]
+        if "treat_def_as_semi_cont" in runtimeparams.keys():
+            optim_conf["treat_def_as_semi_cont"] = [
+                eval(str(k).capitalize())
+                for k in runtimeparams["treat_def_as_semi_cont"]
+            ]
+        if "set_def_constant" in runtimeparams.keys():
+            optim_conf["set_def_constant"] = [
+                eval(str(k).capitalize()) for k in runtimeparams["set_def_constant"]
+            ]
+        if "solcast_api_key" in runtimeparams.keys():
+            retrieve_hass_conf["solcast_api_key"] = runtimeparams["solcast_api_key"]
+            optim_conf["weather_forecast_method"] = "solcast"
+        if "solcast_rooftop_id" in runtimeparams.keys():
+            retrieve_hass_conf["solcast_rooftop_id"] = runtimeparams[
+                "solcast_rooftop_id"
+            ]
+            optim_conf["weather_forecast_method"] = "solcast"
+        if "solar_forecast_kwp" in runtimeparams.keys():
+            retrieve_hass_conf["solar_forecast_kwp"] = runtimeparams[
+                "solar_forecast_kwp"
+            ]
+            optim_conf["weather_forecast_method"] = "solar.forecast"
+        if "weight_battery_discharge" in runtimeparams.keys():
+            optim_conf["weight_battery_discharge"] = runtimeparams[
+                "weight_battery_discharge"
+            ]
+        if "weight_battery_charge" in runtimeparams.keys():
+            optim_conf["weight_battery_charge"] = runtimeparams["weight_battery_charge"]
         # Treat plant configuration parameters passed at runtime
         if "SOCtarget" in runtimeparams.keys():
             plant_conf["SOCtarget"] = runtimeparams["SOCtarget"]

From af8b9f6be5aa19dbebcf3b429155a5e3bd8f42cd Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 7 Jan 2024 08:24:21 +0100
Subject: [PATCH 099/111] Add csv-prediction

---
 src/emhass/csv_predictor.py | 139 ++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
new file mode 100644
index 00000000..a1c5576b
--- /dev/null
+++ b/src/emhass/csv_predictor.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import logging
+import copy
+import pathlib
+import time
+from typing import Optional
+# from typing import Optional, Tuple
+import pandas as pd
+import numpy as np
+
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import ElasticNet
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsRegressor
+# from sklearn.metrics import r2_score
+
+# from skforecast.ForecasterAutoreg import ForecasterAutoreg
+# from skforecast.model_selection import bayesian_search_forecaster
+# from skforecast.model_selection import backtesting_forecaster
+
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning) 
+
+class CsvPredictor:
+    r"""
+    A forecaster class using machine learning models.
+    
+    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
+    
+    It exposes one main method:
+    
+    - `predict`: to obtain a forecast from a pre-trained model.
+    
+    """
+
+    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
+                  logger: logging.Logger) -> None:
+        r"""Define constructor for the forecast class.
+
+        :param data: The data that will be used for train/test
+        :type data: pd.DataFrame
+        :param model_type: A unique name defining this model and useful to identify \
+            for what it will be used for.
+        :type model_type: str
+        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
+            Example: `sensor.power_load_no_var_loads`.
+        :type var_model: str
+        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
+            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
+        :type sklearn_model: str
+        :param num_lags: The number of auto-regression lags to consider. A good starting point \
+            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
+            to 48, if the time step is 1 hour the fix this to 24 and so on.
+        :type num_lags: int
+        :param root: The parent folder of the path where the config.yaml file is located
+        :type root: str
+        :param logger: The passed logger object
+        :type logger: logging.Logger
+        """
+        self.data = data
+        self.model_type = model_type
+        self.csv_file = csv_file
+        self.independent_variables = independent_variables
+        self.dependent_variable = dependent_variable
+        self.sklearn_model = sklearn_model
+        self.new_values = new_values
+        self.root = root
+        self.logger = logger
+        self.is_tuned = False
+
+    
+    def load_data(self):
+        filename_path = pathlib.Path(self.root) / self.csv_file
+        if filename_path.is_file():
+            with open(filename_path, 'rb') as inp:
+                data = pd.read_csv(filename_path)
+        else:
+            self.logger.error("The cvs file was not found.")
+            return
+
+        required_columns = self.independent_variables
+        
+        if not set(required_columns).issubset(data.columns):
+            raise ValueError(
+                f"CSV file should contain the following columns: {', '.join(required_columns)}"
+            )
+        return data
+    
+    def prepare_data(self, data):
+        X = data[self.independent_variables].values
+        y = data[self.dependent_variable].values
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        
+        return X_train, y_train
+    
+    
+    def predict(self, perform_backtest: Optional[bool] = False
+            ) -> pd.Series:
+        r"""The fit method to train the ML model.
+
+        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
+            as the test period to evaluate the model, defaults to '48h'
+        :type split_date_delta: Optional[str], optional
+        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
+            the performance of the model on the complete train set, defaults to False
+        :type perform_backtest: Optional[bool], optional
+        :return: The DataFrame containing the forecast data results without and with backtest
+        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
+        """
+        self.logger.info("Performing a forecast model fit for "+self.model_type)
+        # Preparing the data: adding exogenous features
+        data = self.load_data()
+        X, y = self.prepare_data(data)
+        
+        if self.sklearn_model == 'LinearRegression':
+            base_model = LinearRegression()
+        elif self.sklearn_model == 'ElasticNet':
+            base_model = ElasticNet()
+        elif self.sklearn_model == 'KNeighborsRegressor':
+            base_model = KNeighborsRegressor()
+        else:
+            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
+        # Define the forecaster object
+        self.forecaster = base_model
+        # Fit and time it
+        self.logger.info("Training a "+self.sklearn_model+" model")
+        start_time = time.time()
+        self.forecaster.fit(X, y)
+        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
+        new_values = np.array([self.new_values])
+        prediction = self.forecaster.predict(new_values)
+        
+        return prediction
+    
+    
+    
+    
\ No newline at end of file

From b035ccdbcf61ac718cbc46fc4890c63ec75e3afd Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 11:45:02 +0100
Subject: [PATCH 100/111] Use gridsearchcv and split up fit and predict

---
 src/emhass/csv_predictor.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
index a1c5576b..4e4ca37e 100644
--- a/src/emhass/csv_predictor.py
+++ b/src/emhass/csv_predictor.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+import copy
+from datetime import datetime
 import logging
 import copy
 import pathlib
@@ -9,6 +11,7 @@
 # from typing import Optional, Tuple
 import pandas as pd
 import numpy as np
+from sklearn.metrics import classification_report, r2_score
 
 from sklearn.linear_model import LinearRegression
 from sklearn.linear_model import ElasticNet
@@ -64,11 +67,16 @@ def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independe
         self.csv_file = csv_file
         self.independent_variables = independent_variables
         self.dependent_variable = dependent_variable
-        self.sklearn_model = sklearn_model
-        self.new_values = new_values
-        self.root = root
+        self.timestamp = timestamp
+        self.model_type = model_type
         self.logger = logger
         self.is_tuned = False
+        self.data.sort_index(inplace=True)
+        self.data = self.data[~self.data.index.duplicated(keep='first')]
+    
+    @staticmethod
+    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
+        """Add date features from the input DataFrame timestamp
 
     
     def load_data(self):

From 879186fae968c0dbde8b46cc9d0c3419bab361b8 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Tue, 13 Feb 2024 15:48:12 +0100
Subject: [PATCH 101/111] gitignore fun

---
 .vscode/launch.json | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index f0ceae3a..94690663 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -2,7 +2,7 @@
   "configurations": [
     {
       "name": "Python: Current File",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
       "program": "${file}",
       "console": "integratedTerminal",
@@ -10,9 +10,9 @@
     },
     {
       "name": "EMHASS run",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
-      "module": "emhass.web_server",
+      "program": "web_server.py",
       "console": "integratedTerminal",
       "purpose": [
         "debug-in-terminal"
@@ -27,9 +27,9 @@
     },
     {
       "name": "EMHASS run ADDON",
-      "type": "debugpy",
+      "type": "python",
       "request": "launch",
-      "module": "emhass.web_server",
+      "program": "web_server.py",
       "console": "integratedTerminal",
       "args": [
         "--addon",

From 36e17350cf266a8a866c9b5cee31bfa79448537d Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 11:39:39 +0100
Subject: [PATCH 102/111] python -> debugpy

---
 .vscode/launch.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 94690663..e8e023c8 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -2,7 +2,7 @@
   "configurations": [
     {
       "name": "Python: Current File",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "${file}",
       "console": "integratedTerminal",
@@ -10,7 +10,7 @@
     },
     {
       "name": "EMHASS run",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "web_server.py",
       "console": "integratedTerminal",
@@ -27,7 +27,7 @@
     },
     {
       "name": "EMHASS run ADDON",
-      "type": "python",
+      "type": "debugpy",
       "request": "launch",
       "program": "web_server.py",
       "console": "integratedTerminal",

From 3e82c8772345e3015b136f020ce5f3eeedcaabf4 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 12:38:07 +0100
Subject: [PATCH 103/111] launch.json

---
 .vscode/launch.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index e8e023c8..f0ceae3a 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -12,7 +12,7 @@
       "name": "EMHASS run",
       "type": "debugpy",
       "request": "launch",
-      "program": "web_server.py",
+      "module": "emhass.web_server",
       "console": "integratedTerminal",
       "purpose": [
         "debug-in-terminal"
@@ -29,7 +29,7 @@
       "name": "EMHASS run ADDON",
       "type": "debugpy",
       "request": "launch",
-      "program": "web_server.py",
+      "module": "emhass.web_server",
       "console": "integratedTerminal",
       "args": [
         "--addon",

From 730a2df77993b94df7ae616a6f3a5f280334490a Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Sun, 17 Mar 2024 12:40:45 +0100
Subject: [PATCH 104/111] delete csv-predictor

---
 src/emhass/csv_predictor.py | 147 ------------------------------------
 1 file changed, 147 deletions(-)
 delete mode 100644 src/emhass/csv_predictor.py

diff --git a/src/emhass/csv_predictor.py b/src/emhass/csv_predictor.py
deleted file mode 100644
index 4e4ca37e..00000000
--- a/src/emhass/csv_predictor.py
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import copy
-from datetime import datetime
-import logging
-import copy
-import pathlib
-import time
-from typing import Optional
-# from typing import Optional, Tuple
-import pandas as pd
-import numpy as np
-from sklearn.metrics import classification_report, r2_score
-
-from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import ElasticNet
-from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsRegressor
-# from sklearn.metrics import r2_score
-
-# from skforecast.ForecasterAutoreg import ForecasterAutoreg
-# from skforecast.model_selection import bayesian_search_forecaster
-# from skforecast.model_selection import backtesting_forecaster
-
-import warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning) 
-
-class CsvPredictor:
-    r"""
-    A forecaster class using machine learning models.
-    
-    This class uses the `skforecast` module and the machine learning models are from `scikit-learn`.
-    
-    It exposes one main method:
-    
-    - `predict`: to obtain a forecast from a pre-trained model.
-    
-    """
-
-    def __init__(self, data: pd.DataFrame, model_type: str, csv_file: str, independent_variables: list, dependent_variable: str, sklearn_model: str, new_values:list, root: str,
-                  logger: logging.Logger) -> None:
-        r"""Define constructor for the forecast class.
-
-        :param data: The data that will be used for train/test
-        :type data: pd.DataFrame
-        :param model_type: A unique name defining this model and useful to identify \
-            for what it will be used for.
-        :type model_type: str
-        :param var_model: The name of the sensor to retrieve data from Home Assistant. \
-            Example: `sensor.power_load_no_var_loads`.
-        :type var_model: str
-        :param sklearn_model: The `scikit-learn` model that will be used. For now only \
-            this options are possible: `LinearRegression`, `ElasticNet` and `KNeighborsRegressor`.
-        :type sklearn_model: str
-        :param num_lags: The number of auto-regression lags to consider. A good starting point \
-            is to fix this as one day. For example if your time step is 30 minutes, then fix this \
-            to 48, if the time step is 1 hour the fix this to 24 and so on.
-        :type num_lags: int
-        :param root: The parent folder of the path where the config.yaml file is located
-        :type root: str
-        :param logger: The passed logger object
-        :type logger: logging.Logger
-        """
-        self.data = data
-        self.model_type = model_type
-        self.csv_file = csv_file
-        self.independent_variables = independent_variables
-        self.dependent_variable = dependent_variable
-        self.timestamp = timestamp
-        self.model_type = model_type
-        self.logger = logger
-        self.is_tuned = False
-        self.data.sort_index(inplace=True)
-        self.data = self.data[~self.data.index.duplicated(keep='first')]
-    
-    @staticmethod
-    def add_date_features(data: pd.DataFrame, date_features: list) -> pd.DataFrame:
-        """Add date features from the input DataFrame timestamp
-
-    
-    def load_data(self):
-        filename_path = pathlib.Path(self.root) / self.csv_file
-        if filename_path.is_file():
-            with open(filename_path, 'rb') as inp:
-                data = pd.read_csv(filename_path)
-        else:
-            self.logger.error("The cvs file was not found.")
-            return
-
-        required_columns = self.independent_variables
-        
-        if not set(required_columns).issubset(data.columns):
-            raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}"
-            )
-        return data
-    
-    def prepare_data(self, data):
-        X = data[self.independent_variables].values
-        y = data[self.dependent_variable].values
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        
-        return X_train, y_train
-    
-    
-    def predict(self, perform_backtest: Optional[bool] = False
-            ) -> pd.Series:
-        r"""The fit method to train the ML model.
-
-        :param split_date_delta: The delta from now to `split_date_delta` that will be used \
-            as the test period to evaluate the model, defaults to '48h'
-        :type split_date_delta: Optional[str], optional
-        :param perform_backtest: If `True` then a back testing routine is performed to evaluate \
-            the performance of the model on the complete train set, defaults to False
-        :type perform_backtest: Optional[bool], optional
-        :return: The DataFrame containing the forecast data results without and with backtest
-        :rtype: Tuple[pd.DataFrame, pd.DataFrame]
-        """
-        self.logger.info("Performing a forecast model fit for "+self.model_type)
-        # Preparing the data: adding exogenous features
-        data = self.load_data()
-        X, y = self.prepare_data(data)
-        
-        if self.sklearn_model == 'LinearRegression':
-            base_model = LinearRegression()
-        elif self.sklearn_model == 'ElasticNet':
-            base_model = ElasticNet()
-        elif self.sklearn_model == 'KNeighborsRegressor':
-            base_model = KNeighborsRegressor()
-        else:
-            self.logger.error("Passed sklearn model "+self.sklearn_model+" is not valid")
-        # Define the forecaster object
-        self.forecaster = base_model
-        # Fit and time it
-        self.logger.info("Training a "+self.sklearn_model+" model")
-        start_time = time.time()
-        self.forecaster.fit(X, y)
-        self.logger.info(f"Elapsed time for model fit: {time.time() - start_time}")
-        new_values = np.array([self.new_values])
-        prediction = self.forecaster.predict(new_values)
-        
-        return prediction
-    
-    
-    
-    
\ No newline at end of file

From a23b693f3b1ed6babc780c590fcf250844708eae Mon Sep 17 00:00:00 2001
From: gieljnssns <gieljnssns@me.com>
Date: Tue, 19 Mar 2024 04:40:02 +0100
Subject: [PATCH 105/111] first documentation for mlregressor

---
 docs/index.md       |  4 +-
 docs/mlregressor.md | 91 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 2 deletions(-)
 create mode 100644 docs/mlregressor.md

diff --git a/docs/index.md b/docs/index.md
index cf015a3f..cc9f33a8 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -6,6 +6,7 @@
 # EMHASS: Energy Management for Home Assistant
 
 ```{image} images/emhass_logo.png
+
 ```
 
 Welcome to the documentation of EMHASS. With this package written in Python you will be able to implement a real Energy Management System for your household. This software was designed to be easy configurable and with a fast integration with Home Assistant: <https://www.home-assistant.io/>
@@ -21,6 +22,7 @@ differences.md
 lpems.md
 forecasts.md
 mlforecaster.md
+mlregressor.md
 study_case.md
 config.md
 emhass.md
@@ -32,5 +34,3 @@ develop.md
 - {ref}`genindex`
 - {ref}`modindex`
 - {ref}`search`
-
-
diff --git a/docs/mlregressor.md b/docs/mlregressor.md
new file mode 100644
index 00000000..7206af99
--- /dev/null
+++ b/docs/mlregressor.md
@@ -0,0 +1,91 @@
+# The machine learning regressor
+
+Starting with v0.9.0, a new framework is proposed within EMHASS. It provides a machine learning module to predict values from a csv file using different regression models.
+
+This API provides two main methods:
+
+- fit: To train a model with the passed data. This method is exposed with the `regressor-model-fit` end point.
+
+- predict: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point.
+
+## A basic model fit
+
+To train a model use the `regressor-model-fit` end point.
+
+Some paramters can be optionally defined at runtime:
+
+- `csv_file`: The name of the csv file containing your data.
+
+- `features`: A list of features, you can provide new values for this.
+
+- `target`: The target, the value that has to be predicted.
+
+- `model_type`: Define the name of the model regressor that this will be used for. For example: `heating_hours_degreeday`. This should be an unique name if you are using multiple custom regressor models.
+
+- `regression_model`: The regression model that will be used. For now only this options are possible: `LinearRegression`, `RidgeRegression`, `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`.
+
+- `timestamp`: If defined, the column key that has to be used for timestamp.
+
+- `date_features`: A list of 'date_features' to take into account when fitting the model. Possibilities are `year`, `month`, `day_of_week` (monday=0, sunday=6), `day_of_year`, `day`(day_of_month) and `hour`
+
+```
+runtimeparams = {
+    "csv_file": "heating_prediction.csv",
+    "features":["degreeday", "solar"],
+    "target": "heating_hours",
+    "regression_model": "RandomForestRegression",
+    "model_type": "heating_hours_degreeday",
+    "timestamp": "timestamp",
+    "date_features": ["month", "day_of_week"]
+    }
+```
+
+A correct `curl` call to launch a model fit can look like this:
+
+```
+curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-fit
+```
+
+After applying the `curl` command to fit the model the following information is logged by EMHASS:
+
+    2023-02-20 22:05:22,658 - __main__ - INFO - Training a LinearRegression model
+    2023-02-20 22:05:23,882 - __main__ - INFO - Elapsed time: 1.2236599922180176
+    2023-02-20 22:05:24,612 - __main__ - INFO - Prediction R2 score: 0.2654560762747957
+
+## The predict method
+
+To obtain a prediction using a previously trained model use the `regressor-model-predict` end point.
+
+```
+curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-predict
+```
+
+If needed pass the correct `model_type` like this:
+
+```
+curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "load_forecast"}' http://localhost:5000/action/regressor-model-predict
+```
+
+It is possible to publish the predict method results to a Home Assistant sensor.
+
+The list of parameters needed to set the data publish task is:
+
+- `mlr_predict_entity_id`: The unique `entity_id` to be used.
+
+- `mlr_predict_unit_of_measurement`: The `unit_of_measurement` to be used.
+
+- `mlr_predict_friendly_name`: The `friendly_name` to be used.
+
+- `new_values`: The new values for the features (in the same order as the features list). Also when using date_features, add these to the new values.
+
+- `model_type`: The model type that has to be predicted
+
+```
+runtimeparams = {
+    "mlr_predict_entity_id": "sensor.mlr_predict",
+    "mlr_predict_unit_of_measurement": None,
+    "mlr_predict_friendly_name": "mlr predictor",
+    "new_values": [8.2, 7.23, 2, 6],
+    "model_type": "heating_hours_degreeday"
+}
+```

From 058c732983dd7e76ef9d1c4663fd23b626f13867 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Wed, 17 Apr 2024 15:45:58 +0200
Subject: [PATCH 106/111] Rename paragrams

---
 tests/test_command_line_utils.py         | 33 +++++++++++++-----------
 tests/test_machine_learning_regressor.py | 11 +++-----
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/tests/test_command_line_utils.py b/tests/test_command_line_utils.py
index 293733f4..c4482c07 100644
--- a/tests/test_command_line_utils.py
+++ b/tests/test_command_line_utils.py
@@ -335,11 +335,11 @@ def test_regressor_model_fit_predict(self):
         action = "regressor-model-fit"  # fit and predict methods
         params = TestCommandLineUtils.get_test_params()
         runtimeparams = {
-            "csv_file": "prediction.csv",
-            "features": ["dd", "solar"],
+            "csv_file": "heating_prediction.csv",
+            "features": ["degreeday", "solar"],
             "target": "hour",
             "regression_model": "AdaBoostRegression",
-            "model_type": "heating_dd",
+            "model_type": "heating_hours_degreeday",
             "timestamp": "timestamp",
             "date_features": ["month", "day_of_week"],
             "mlr_predict_entity_id": "sensor.predicted_hours_test",
@@ -360,14 +360,16 @@ def test_regressor_model_fit_predict(self):
             get_data_from_file=True,
         )
         self.assertTrue(
-            input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd",
+            input_data_dict["params"]["passed_data"]["model_type"]
+            == "heating_hours_degreeday",
         )
         self.assertTrue(
             input_data_dict["params"]["passed_data"]["regression_model"]
             == "AdaBoostRegression",
         )
         self.assertTrue(
-            input_data_dict["params"]["passed_data"]["csv_file"] == "prediction.csv",
+            input_data_dict["params"]["passed_data"]["csv_file"]
+            == "heating_prediction.csv",
         )
         mlr = regressor_model_fit(input_data_dict, logger, debug=True)
 
@@ -378,11 +380,11 @@ def test_regressor_model_fit_predict(self):
         action = "regressor-model-predict"  # predict methods
         params = TestCommandLineUtils.get_test_params()
         runtimeparams = {
-            "csv_file": "prediction.csv",
-            "features": ["dd", "solar"],
+            "csv_file": "heating_prediction.csv",
+            "features": ["degreeday", "solar"],
             "target": "hour",
             "regression_model": "AdaBoostRegression",
-            "model_type": "heating_dd",
+            "model_type": "heating_hours_degreeday",
             "timestamp": "timestamp",
             "date_features": ["month", "day_of_week"],
             "mlr_predict_entity_id": "sensor.predicted_hours_test",
@@ -405,7 +407,8 @@ def test_regressor_model_fit_predict(self):
             get_data_from_file=True,
         )
         self.assertTrue(
-            input_data_dict["params"]["passed_data"]["model_type"] == "heating_dd",
+            input_data_dict["params"]["passed_data"]["model_type"]
+            == "heating_hours_degreeday",
         )
         self.assertTrue(
             input_data_dict["params"]["passed_data"]["mlr_predict_friendly_name"]
@@ -519,11 +522,11 @@ def test_main_forecast_model_tune(self):
     def test_main_regressor_model_fit(self):
         params = copy.deepcopy(json.loads(self.params_json))
         runtimeparams = {
-            "csv_file": "prediction.csv",
-            "features": ["dd", "solar"],
+            "csv_file": "heating_prediction.csv",
+            "features": ["degreeday", "solar"],
             "target": "hour",
             "regression_model": "AdaBoostRegression",
-            "model_type": "heating_dd",
+            "model_type": "heating_hours_degreeday",
             "timestamp": "timestamp",
             "date_features": ["month", "day_of_week"],
         }
@@ -551,11 +554,11 @@ def test_main_regressor_model_fit(self):
     def test_main_regressor_model_predict(self):
         params = copy.deepcopy(json.loads(self.params_json))
         runtimeparams = {
-            "csv_file": "prediction.csv",
-            "features": ["dd", "solar"],
+            "csv_file": "heating_prediction.csv",
+            "features": ["degreeday", "solar"],
             "target": "hour",
             "regression_model": "AdaBoostRegression",
-            "model_type": "heating_dd",
+            "model_type": "heating_hours_degreeday",
             "timestamp": "timestamp",
             "date_features": ["month", "day_of_week"],
             "new_values": [12.79, 4.766, 1, 2],
diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py
index 88137b0d..74702b6f 100644
--- a/tests/test_machine_learning_regressor.py
+++ b/tests/test_machine_learning_regressor.py
@@ -7,14 +7,11 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.pipeline import Pipeline
 import yaml
 from emhass import utils
 from emhass.command_line import set_input_data_dict
 from emhass.machine_learning_regressor import MLRegressor
-from sklearn.ensemble import (
-    AdaBoostRegressor,
-)
+from sklearn.pipeline import Pipeline
 
 # the root folder
 root = str(utils.get_root(__file__, num_parent=2))
@@ -50,11 +47,11 @@ def setUp(self):
         action = "regressor-model-fit"  # fit and predict methods
         params = copy.deepcopy(json.loads(params_json))
         runtimeparams = {
-            "csv_file": "prediction.csv",
-            "features": ["dd", "solar"],
+            "csv_file": "heating_prediction.csv",
+            "features": ["degreeday", "solar"],
             "target": "hour",
             "regression_model": "AdaBoostRegression",
-            "model_type": "heating_dd",
+            "model_type": "heating_hours_degreeday",
             "timestamp": "timestamp",
             "date_features": ["month", "day_of_week"],
             "new_values": [12.79, 4.766, 1, 2],

From 6c2162908ef0d87be65fa789ce21fb30843380c5 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Thu, 18 Apr 2024 11:40:54 +0200
Subject: [PATCH 107/111] Ready for review (I think)

---
 docs/mlregressor.md                      | 106 +++++++++++++++++++----
 src/emhass/command_line.py               |  61 +++++++------
 src/emhass/machine_learning_regressor.py |   3 +-
 src/emhass/utils.py                      |  15 +++-
 4 files changed, 137 insertions(+), 48 deletions(-)

diff --git a/docs/mlregressor.md b/docs/mlregressor.md
index 7206af99..dee5fccd 100644
--- a/docs/mlregressor.md
+++ b/docs/mlregressor.md
@@ -8,6 +8,7 @@ This API provides two main methods:
 
 - predict: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point.
 
+
 ## A basic model fit
 
 To train a model use the `regressor-model-fit` end point.
@@ -45,28 +46,38 @@ A correct `curl` call to launch a model fit can look like this:
 ```
 curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-fit
 ```
-
-After applying the `curl` command to fit the model the following information is logged by EMHASS:
-
-    2023-02-20 22:05:22,658 - __main__ - INFO - Training a LinearRegression model
-    2023-02-20 22:05:23,882 - __main__ - INFO - Elapsed time: 1.2236599922180176
-    2023-02-20 22:05:24,612 - __main__ - INFO - Prediction R2 score: 0.2654560762747957
-
-## The predict method
-
-To obtain a prediction using a previously trained model use the `regressor-model-predict` end point.
+A Home Assistant `rest_command` can look like this:
 
 ```
-curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-predict
+fit_heating_hours:
+  url: http://127.0.0.1:5000/action/regressor-model-fit
+  method: POST
+  content_type: "application/json"
+  payload: >-
+    {
+    "csv_file": "heating_prediction.csv",
+    "features":["degreeday", "solar"],
+    "target": "hours",
+    "regression_model": "RandomForestRegression",
+    "model_type": "heating_hours_degreeday",
+    "timestamp": "timestamp",
+    "date_features": ["month", "day_of_week"]
+    }
 ```
+After fitting the model the following information is logged by EMHASS:
 
-If needed pass the correct `model_type` like this:
+    2024-04-17 12:41:50,019 - web_server - INFO - Passed runtime parameters: {'csv_file': 'heating_prediction.csv', 'features': ['degreeday', 'solar'], 'target': 'heating_hours', 'regression_model': 'RandomForestRegression', 'model_type': 'heating_hours_degreeday', 'timestamp': 'timestamp', 'date_features': ['month', 'day_of_week']}
+    2024-04-17 12:41:50,020 - web_server - INFO -  >> Setting input data dict
+    2024-04-17 12:41:50,021 - web_server - INFO - Setting up needed data
+    2024-04-17 12:41:50,048 - web_server - INFO -  >> Performing a machine learning regressor fit...
+    2024-04-17 12:41:50,049 - web_server - INFO - Performing a MLRegressor fit for heating_hours_degreeday
+    2024-04-17 12:41:50,064 - web_server - INFO - Training a RandomForestRegression model
+    2024-04-17 12:41:57,852 - web_server - INFO - Elapsed time for model fit: 7.78800106048584
+    2024-04-17 12:41:57,862 - web_server - INFO - Prediction R2 score of fitted model on test data: -0.5667567505914477
 
-```
-curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "load_forecast"}' http://localhost:5000/action/regressor-model-predict
-```
+## The predict method
 
-It is possible to publish the predict method results to a Home Assistant sensor.
+To obtain a prediction using a previously trained model use the `regressor-model-predict` end point.
 
 The list of parameters needed to set the data publish task is:
 
@@ -89,3 +100,66 @@ runtimeparams = {
     "model_type": "heating_hours_degreeday"
 }
 ```
+
+Pass the correct `model_type` like this:
+
+```
+curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "heating_hours_degreeday"}' http://localhost:5000/action/regressor-model-predict
+```
+
+A Home Assistant `rest_command` can look like this:
+
+```
+predict_heating_hours:
+  url: http://localhost:5001/action/regressor-model-predict
+  method: POST
+  content_type: "application/json"
+  payload: >-
+   {
+    "mlr_predict_entity_id": "sensor.predicted_hours",
+    "mlr_predict_unit_of_measurement": "h",
+    "mlr_predict_friendly_name": "Predicted hours",
+    "new_values": [8.2, 7.23, 2, 6],
+    "model_type": "heating_hours_degreeday"
+    }
+```
+After predicting the model the following information is logged by EMHASS:
+
+```
+2024-04-17 14:25:40,695 - web_server - INFO - Passed runtime parameters: {'mlr_predict_entity_id': 'sensor.predicted_hours', 'mlr_predict_unit_of_measurement': 'h', 'mlr_predict_friendly_name': 'Predicted hours', 'new_values': [8.2, 7.23, 2, 6], 'model_type': 'heating_hours_degreeday'}
+2024-04-17 14:25:40,696 - web_server - INFO -  >> Setting input data dict
+2024-04-17 14:25:40,696 - web_server - INFO - Setting up needed data
+2024-04-17 14:25:40,700 - web_server - INFO -  >> Performing a machine learning regressor predict...
+2024-04-17 14:25:40,715 - web_server - INFO - Performing a prediction for heating_hours_degreeday
+2024-04-17 14:25:40,750 - web_server - INFO - Successfully posted to sensor.predicted_hours = 3.716600000000001
+```
+The predict method will publish the result to a Home Assistant sensor.
+
+
+## How to store data in a csv file from Home Assistant
+Notify to a file
+```
+notify:
+  - platform: file
+    name: heating_hours_prediction
+    timestamp: false
+    filename: /share/heating_prediction.csv
+```
+Then you need an automation to notify to this file
+```
+alias: "Heating csv"
+id: 157b1d57-73d9-4f39-82c6-13ce0cf42
+trigger:
+  - platform: time
+    at: "23:59:32"
+action:
+  - service: notify.heating_hours_prediction
+    data:
+      message: >
+        {% set degreeday = states('sensor.degree_day_daily') |float %}
+        {% set heating_hours = states('sensor.heating_hours_today') |float | round(2) %}
+        {% set solar = states('sensor.solar_daily') |float | round(3) %}
+        {% set time = now() %}
+
+          {{time}},{{degreeday}},{{solar}},{{heating_hours}}
+```
\ No newline at end of file
diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index ee27be0f..2190aadf 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -206,34 +206,39 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
         P_PV_forecast, P_load_forecast = None, None
         params = json.loads(params)
         days_list = None
-        csv_file = params["passed_data"]["csv_file"]
-        features = params["passed_data"]["features"]
-        target = params["passed_data"]["target"]
-        timestamp = params["passed_data"]["timestamp"]
-        if get_data_from_file:
-            base_path = base_path + "/data"
-            filename_path = pathlib.Path(base_path) / csv_file
-
-        else:
-            filename_path = pathlib.Path(base_path) / csv_file
-
-        if filename_path.is_file():
-            df_input_data = pd.read_csv(filename_path, parse_dates=True)
-
-        else:
-            logger.error("The cvs file was not found.")
-            raise ValueError("The CSV file " + csv_file + " was not found.")
-        required_columns = []
-        required_columns.extend(features)
-        required_columns.append(target)
-        if timestamp is not None:
-            required_columns.append(timestamp)
-
-        if not set(required_columns).issubset(df_input_data.columns):
-            logger.error("The cvs file does not contain the required columns.")
-            raise ValueError(
-                f"CSV file should contain the following columns: {', '.join(required_columns)}",
-            )
+        csv_file = params["passed_data"].get("csv_file", None)
+        if "features" in params["passed_data"]:
+            features = params["passed_data"]["features"]
+        if "target" in params["passed_data"]:
+            target = params["passed_data"]["target"]
+        if "timestamp" in params["passed_data"]:
+            timestamp = params["passed_data"]["timestamp"]
+        if csv_file:
+            if get_data_from_file:
+                base_path = base_path + "/data"
+                filename_path = pathlib.Path(base_path) / csv_file
+
+            else:
+                filename_path = pathlib.Path(base_path) / csv_file
+
+            if filename_path.is_file():
+                df_input_data = pd.read_csv(filename_path, parse_dates=True)
+
+            else:
+                logger.error("The cvs file was not found.")
+                raise ValueError("The CSV file " + csv_file + " was not found.")
+            required_columns = []
+            required_columns.extend(features)
+            required_columns.append(target)
+            if timestamp is not None:
+                required_columns.append(timestamp)
+
+            if not set(required_columns).issubset(df_input_data.columns):
+                logger.error("The cvs file does not contain the required columns.")
+                msg = f"CSV file should contain the following columns: {', '.join(required_columns)}"
+                raise ValueError(
+                    msg,
+                )
 
     elif set_type == "publish-data":
         df_input_data, df_input_data_dayahead = None, None
diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py
index 732b4266..f0d3c532 100644
--- a/src/emhass/machine_learning_regressor.py
+++ b/src/emhass/machine_learning_regressor.py
@@ -190,9 +190,10 @@ def get_regression_model(self: MLRegressor) -> tuple[str, str]:
             param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"]
         else:
             self.logger.error(
-                "Passed sklearn model %s is not valid",
+                "Passed model %s is not valid",
                 self.regression_model,
             )
+            return None
         return base_model, param_grid
 
     def fit(self: MLRegressor, date_features: list | None = None) -> None:
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 5e40160a..8bb6f101 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -223,12 +223,12 @@ def treat_runtimeparams(
             params["passed_data"]["csv_file"] = csv_file
             params["passed_data"]["features"] = features
             params["passed_data"]["target"] = target
-            if "timestamp" not in runtimeparams.keys():
+            if "timestamp" not in runtimeparams:
                 params["passed_data"]["timestamp"] = None
             else:
                 timestamp = runtimeparams["timestamp"]
                 params["passed_data"]["timestamp"] = timestamp
-            if "date_features" not in runtimeparams.keys():
+            if "date_features" not in runtimeparams:
                 params["passed_data"]["date_features"] = []
             else:
                 date_features = runtimeparams["date_features"]
@@ -237,6 +237,15 @@ def treat_runtimeparams(
         if set_type == "regressor-model-predict":
             new_values = runtimeparams["new_values"]
             params["passed_data"]["new_values"] = new_values
+            if "csv_file" in runtimeparams:
+                csv_file = runtimeparams["csv_file"]
+                params["passed_data"]["csv_file"] = csv_file
+            if "features" in runtimeparams:
+                features = runtimeparams["features"]
+                params["passed_data"]["features"] = features
+            if "target" in runtimeparams:
+                target = runtimeparams["target"]
+                params["passed_data"]["target"] = target
 
         # Treating special data passed for MPC control case
         if set_type == "naive-mpc-optim":
@@ -330,7 +339,7 @@ def treat_runtimeparams(
             sklearn_model = runtimeparams["sklearn_model"]
         params["passed_data"]["sklearn_model"] = sklearn_model
         if "regression_model" not in runtimeparams.keys():
-            regression_model = "LinearRegression"
+            regression_model = "AdaBoostRegression"
         else:
             regression_model = runtimeparams["regression_model"]
         params["passed_data"]["regression_model"] = regression_model

From fec455841b4f9f34ec816fec9ff245ee9720bc83 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Thu, 18 Apr 2024 14:11:23 +0200
Subject: [PATCH 108/111] remove *.csv from .gitignore to upload
 heating_prediction.csv

---
 data/heating_prediction.csv | 130 ++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 data/heating_prediction.csv

diff --git a/data/heating_prediction.csv b/data/heating_prediction.csv
new file mode 100644
index 00000000..f50a8a49
--- /dev/null
+++ b/data/heating_prediction.csv
@@ -0,0 +1,130 @@
+﻿timestamp,degreeday,solar,hour
+2023-11-10 23:59:32.458039+01:00,12.23,3.982,2.87
+2023-11-11 23:59:32.459778+01:00,12.94,13.723,3.14
+2023-11-12 23:59:32.462220+01:00,14.45,4.925,3.5
+2023-11-13 23:59:32.462167+01:00,8.49,2.138,3.19
+2023-11-14 23:59:32.338942+01:00,8.61,2.444,2.91
+2023-11-15 23:59:32.195198+01:00,9.83,10.685,2.88
+2023-11-16 23:59:32.501044+01:00,12.8,1.955,1.28
+2023-11-17 23:59:32.316366+01:00,13.35,8.742,2.97
+2023-11-18 23:59:32.082785+01:00,11.84,0.849,3.42
+2023-11-19 23:59:32.077198+01:00,7.3,10.85,1.9
+2023-11-20 23:59:32.431964+01:00,9.91,6.395,2.48
+2023-11-21 23:59:32.295705+01:00,11.44,2.678,2.91
+2023-11-22 23:59:32.377740+01:00,16.14,2.994,2.96
+2023-11-23 23:59:32.385890+01:00,9.31,5.346,2.91
+2023-11-24 23:59:32.376194+01:00,12.96,8.61,2.9
+2023-11-25 23:59:32.373666+01:00,14.91,12.31,3.47
+2023-11-26 23:59:32.373647+01:00,14.79,2.589,3.69
+2023-11-27 23:59:32.379920+01:00,14.92,0.322,6.05
+2023-11-28 23:59:32.213947+01:00,18.59,20.342,2.94
+2023-11-29 23:59:32.217384+01:00,19.05,5.393,5.41
+2023-11-30 23:59:32.222641+01:00,21.27,1.899,6.77
+2023-12-01 23:59:32.224533+01:00,21.3,1.233,5.75
+2023-12-02 23:59:32.107119+01:00,21.97,14.653,2.96
+2023-12-03 23:59:32.107436+01:00,20.61,4.766,8.89
+2023-12-04 23:59:32.116642+01:00,18.36,1.349,6.73
+2023-12-05 23:59:32.191254+01:00,16.93,0.869,6.17
+2023-12-06 23:59:32.176803+01:00,16.8,5.413,5.38
+2023-12-07 23:59:32.251031+01:00,17.67,8.089,5.98
+2023-12-08 23:59:32.255888+01:00,14.37,1.203,5.63
+2023-12-09 23:59:32.109040+01:00,11.94,0.814,5.08
+2023-12-10 23:59:32.103738+01:00,9.72,6.051,3.42
+2023-12-11 23:59:32.497717+01:00,9.83,1.459,3.87
+2023-12-12 23:59:32.502503+01:00,11.18,4.176,3.31
+2023-12-13 23:59:32.504794+01:00,11.09,2.91,3.1
+2023-12-14 23:59:32.177489+01:00,13.88,7.53,2.89
+2023-12-15 23:59:32.186292+01:00,12.18,2.129,5.68
+2023-12-16 23:59:32.176812+01:00,11.75,1.641,3.46
+2023-12-17 23:59:32.119874+01:00,12.18,14.868,3.46
+2023-12-18 23:59:32.120168+01:00,14.75,1.283,3.12
+2023-12-19 23:59:32.120101+01:00,12.82,0.09,5.07
+2023-12-20 23:59:32.249731+01:00,12.8,3.803,3.6
+2023-12-21 23:59:32.249135+01:00,8.73,2.096,3.55
+2023-12-22 23:59:32.385164+01:00,9.12,1.278,0.85
+2023-12-23 23:59:32.382910+01:00,8.99,1.848,0.0
+2023-12-24 23:59:32.382457+01:00,8.04,0.165,7.42
+2023-12-25 23:59:32.303520+01:00,7.56,1.028,2.93
+2023-12-26 23:59:32.105788+01:00,10.55,9.274,2.92
+2023-12-27 23:59:32.183107+01:00,11.78,2.026,3.39
+2023-12-28 23:59:32.183405+01:00,8.91,3.68,3.19
+2023-12-29 23:59:32.399740+01:00,9.35,2.464,2.95
+2023-12-30 23:59:32.091110+01:00,11.07,7.948,3.44
+2023-12-31 23:59:32.257530+01:00,10.51,3.5,3.48
+2024-01-01 23:59:32.106161+01:00,12.75,4.046,3.08
+2024-01-02 23:59:32.103187+01:00,8.81,0.562,4.46
+2024-01-03 23:59:32.429947+01:00,10.03,2.184,3.26
+2024-01-04 23:59:32.436773+01:00,11.22,5.662,2.97
+2024-01-05 23:59:32.165969+01:00,12.42,1.199,3.6
+2024-01-06 23:59:32.110208+01:00,15.35,0.295,4.32
+2024-01-07 23:59:32.147775+01:00,19.88,0.896,6.19
+2024-01-08 23:59:32.242815+01:00,22.74,6.468,5.82
+2024-01-09 23:59:32.201342+01:00,24.38,21.307,6.92
+2024-01-10 23:59:32.411136+01:00,24.84,18.89,1.53
+2024-01-11 23:59:32.399433+01:00,23.57,19.27,3.05
+2024-01-12 23:59:32.467622+01:00,18.22,1.977,13.98
+2024-01-13 23:59:32.077428+01:00,17.9,0.472,6.93
+2024-01-14 23:59:32.127844+01:00,19.65,1.346,6.95
+2024-01-15 23:59:32.125062+01:00,19.49,4.35,7.82
+2024-01-16 23:59:32.280474+01:00,21.21,9.238,5.7
+2024-01-17 23:59:32.283951+01:00,23.17,1.193,7.37
+2024-01-18 23:59:32.361241+01:00,21.61,17.307,6.67
+2024-01-19 23:59:32.341654+01:00,22.06,21.004,6.24
+2024-01-20 23:59:32.359151+01:00,21.95,12.912,6.43
+2024-01-21 23:59:32.126221+01:00,17.38,3.28,7.45
+2024-01-22 23:59:32.126346+01:00,9.47,7.645,6.1
+2024-01-23 23:59:32.417727+01:00,11.87,7.689,4.76
+2024-01-24 23:59:32.420933+01:00,8.15,10.052,3.62
+2024-01-25 23:59:32.419138+01:00,12.38,3.785,3.98
+2024-01-26 23:59:32.422066+01:00,11.4,11.94,3.1
+2024-01-27 23:59:32.176538+01:00,17.96,19.741,3.45
+2024-01-28 23:59:32.168328+01:00,16.72,20.366,4.85
+2024-01-29 23:59:32.173916+01:00,13.11,16.972,4.51
+2024-01-30 23:59:32.503034+01:00,11.21,4.013,3.99
+2024-01-31 23:59:32.179265+01:00,12.79,4.766,3.73
+2024-02-01 23:59:32.487147+01:00,12.74,23.924,2.98
+2024-02-02 23:59:32.570084+01:00,13.0,2.98,5.04
+2024-02-03 23:59:32.484878+01:00,9.26,1.413,3.48
+2024-02-04 23:59:32.472168+01:00,8.35,4.306,3.47
+2024-02-05 23:59:32.409856+01:00,9.78,5.704,0.0
+2024-02-06 23:59:32.439147+01:00,9.15,2.431,6.56
+2024-02-07 23:59:32.235231+01:00,14.42,3.839,3.07
+2024-02-08 23:59:32.441543+01:00,13.9,1.412,5.94
+2024-02-09 23:59:32.443230+01:00,8.2,7.246,2.96
+2024-02-10 23:59:32.504326+01:00,8.37,8.567,3.48
+2024-02-11 23:59:32.452959+01:00,10.44,5.304,0.0
+2024-02-12 23:59:32.450999+01:00,12.65,16.004,3.42
+2024-02-13 23:59:32.343162+01:00,13.84,19.809,3.16
+2024-02-14 23:59:32.339408+01:00,8.48,1.98,4.52
+2024-02-15 23:59:32.339971+01:00,6.13,9.952,2.98
+2024-02-16 23:59:32.455273+01:00,7.66,3.675,3.06
+2024-02-17 23:59:32.097937+01:00,8.56,12.269,3.48
+2024-02-18 23:59:32.126377+01:00,9.59,2.205,3.04
+2024-02-19 23:59:32.421243+01:00,10.22,3.731,2.97
+2024-02-20 23:59:32.421985+01:00,11.61,13.775,0.0
+2024-02-21 23:59:32.371300+01:00,10.52,4.856,3.02
+2024-02-22 23:59:32.373153+01:00,9.53,4.256,3.48
+2024-02-23 23:59:32.372545+01:00,13.66,8.743,4.09
+2024-02-24 23:59:32.197044+01:00,14.44,7.842,4.3
+2024-02-25 23:59:32.196386+01:00,12.41,16.235,3.48
+2024-02-26 23:59:32.409648+01:00,14.63,2.096,5.05
+2024-02-27 23:59:32.373347+01:00,14.5,29.437,3.21
+2024-02-28 23:59:32.407538+01:00,15.38,6.475,4.88
+2024-02-29 23:59:32.194724+01:00,11.83,3.238,4.68
+2024-03-01 23:59:32.084520+01:00,10.56,14.352,3.8
+2024-03-02 23:59:32.066434+01:00,9.94,25.356,3.49
+2024-03-03 23:59:32.270878+01:00,8.9,10.577,3.19
+2024-03-04 23:59:32.274918+01:00,10.67,28.096,2.08
+2024-03-05 23:59:32.315023+01:00,12.19,10.553,2.95
+2024-03-06 23:59:32.441001+01:00,11.38,32.597,2.91
+2024-03-07 23:59:32.440044+01:00,12.39,28.856,2.96
+2024-03-08 23:59:32.228265+01:00,12.01,37.395,2.96
+2024-03-09 23:59:32.081874+01:00,8.72,17.66,3.5
+2024-03-10 23:59:32.335321+01:00,8.0,12.207,3.47
+2024-03-11 23:59:32.139531+01:00,10.39,2.526,2.96
+2024-03-12 23:59:32.136709+01:00,10.24,8.211,2.98
+2024-03-13 23:59:32.407174+01:00,7.19,6.425,2.95
+2024-03-14 23:59:32.342436+01:00,6.06,33.389,1.64
+2024-03-15 23:59:32.266278+01:00,5.63,12.628,2.96
+2024-03-16 23:59:32.155245+01:00,9.57,12.103,3.0
+2024-03-17 23:59:32.366155+01:00,8.43,14.302,0.25

From b9ec17ada28ca9d006e3cf89cac5174b2ca2c3b5 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Fri, 19 Apr 2024 09:17:07 +0200
Subject: [PATCH 109/111] Adapt to the latest path changes

---
 src/emhass/command_line.py               | 11 +++++------
 tests/test_command_line_utils.py         | 14 ++++----------
 tests/test_machine_learning_regressor.py | 15 ++++++++-------
 3 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index 2190aadf..b669e584 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -215,11 +215,11 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
             timestamp = params["passed_data"]["timestamp"]
         if csv_file:
             if get_data_from_file:
-                base_path = base_path + "/data"
+                base_path = emhass_conf["data_path"]  # + "/data"
                 filename_path = pathlib.Path(base_path) / csv_file
 
             else:
-                filename_path = pathlib.Path(base_path) / csv_file
+                filename_path = emhass_conf["data_path"] / csv_file
 
             if filename_path.is_file():
                 df_input_data = pd.read_csv(filename_path, parse_dates=True)
@@ -621,7 +621,6 @@ def regressor_model_fit(
     target = input_data_dict["params"]["passed_data"]["target"]
     timestamp = input_data_dict["params"]["passed_data"]["timestamp"]
     date_features = input_data_dict["params"]["passed_data"]["date_features"]
-    root = input_data_dict["root"]
 
     # The MLRegressor object
     mlr = MLRegressor(
@@ -638,7 +637,8 @@ def regressor_model_fit(
     # Save model
     if not debug:
         filename = model_type + "_mlr.pkl"
-        with open(pathlib.Path(root) / filename, "wb") as outp:
+        filename_path = input_data_dict["emhass_conf"]["data_path"] / filename
+        with open(filename_path, "wb") as outp:
             pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL)
     return mlr
 
@@ -659,9 +659,8 @@ def regressor_model_predict(
     :type debug: Optional[bool], optional
     """
     model_type = input_data_dict["params"]["passed_data"]["model_type"]
-    root = input_data_dict["root"]
     filename = model_type + "_mlr.pkl"
-    filename_path = pathlib.Path(root) / filename
+    filename_path = input_data_dict["emhass_conf"]["data_path"] / filename
     if not debug:
         if filename_path.is_file():
             with open(filename_path, "rb") as inp:
diff --git a/tests/test_command_line_utils.py b/tests/test_command_line_utils.py
index c4482c07..a8194c62 100644
--- a/tests/test_command_line_utils.py
+++ b/tests/test_command_line_utils.py
@@ -329,8 +329,6 @@ def test_forecast_model_fit_predict_tune(self):
         self.assertIsInstance(injection_dict["figure_0"], str)
 
     def test_regressor_model_fit_predict(self):
-        config_path = pathlib.Path(root + "/config_emhass.yaml")
-        base_path = str(config_path.parent)
         costfun = "profit"
         action = "regressor-model-fit"  # fit and predict methods
         params = TestCommandLineUtils.get_test_params()
@@ -350,8 +348,7 @@ def test_regressor_model_fit_predict(self):
         runtimeparams_json = json.dumps(runtimeparams)
         params_json = json.dumps(params)
         input_data_dict = set_input_data_dict(
-            config_path,
-            base_path,
+            emhass_conf,
             costfun,
             params_json,
             runtimeparams_json,
@@ -374,8 +371,6 @@ def test_regressor_model_fit_predict(self):
         mlr = regressor_model_fit(input_data_dict, logger, debug=True)
 
         # def test_regressor_model_predict(self):
-        config_path = pathlib.Path(root + "/config_emhass.yaml")
-        base_path = str(config_path.parent)  # + "/data"
         costfun = "profit"
         action = "regressor-model-predict"  # predict methods
         params = TestCommandLineUtils.get_test_params()
@@ -397,8 +392,7 @@ def test_regressor_model_fit_predict(self):
         params_json = json.dumps(params)
 
         input_data_dict = set_input_data_dict(
-            config_path,
-            base_path,
+            emhass_conf,
             costfun,
             params_json,
             runtimeparams_json,
@@ -540,7 +534,7 @@ def test_main_regressor_model_fit(self):
                 "--action",
                 "regressor-model-fit",
                 "--config",
-                str(pathlib.Path(root + "/config_emhass.yaml")),
+                str(emhass_conf["config_path"]),
                 "--params",
                 params_json,
                 "--runtimeparams",
@@ -574,7 +568,7 @@ def test_main_regressor_model_predict(self):
                 "--action",
                 "regressor-model-predict",
                 "--config",
-                str(pathlib.Path(root + "/config_emhass.yaml")),
+                str(emhass_conf["config_path"]),
                 "--params",
                 params_json,
                 "--runtimeparams",
diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py
index 74702b6f..4201199a 100644
--- a/tests/test_machine_learning_regressor.py
+++ b/tests/test_machine_learning_regressor.py
@@ -15,15 +15,19 @@
 
 # the root folder
 root = str(utils.get_root(__file__, num_parent=2))
+emhass_conf = {}
+emhass_conf["config_path"] = pathlib.Path(root) / "config_emhass.yaml"
+emhass_conf["data_path"] = pathlib.Path(root) / "data/"
+emhass_conf["root_path"] = pathlib.Path(root)
 # create logger
-logger, ch = utils.get_logger(__name__, root, save_to_file=False)
+logger, ch = utils.get_logger(__name__, emhass_conf, save_to_file=False)
 
 
 class TestMLRegressor(unittest.TestCase):
     @staticmethod
     def get_test_params():
-        with open(root + "/config_emhass.yaml", "r") as file:
-            params = yaml.load(file, Loader=yaml.FullLoader)
+        with open(emhass_conf["config_path"]) as file:
+            params = yaml.safe_load(file)
         params.update(
             {
                 "params_secrets": {
@@ -41,8 +45,6 @@ def get_test_params():
     def setUp(self):
         params = TestMLRegressor.get_test_params()
         params_json = json.dumps(params)
-        config_path = pathlib.Path(root + "/config_emhass.yaml")
-        base_path = str(config_path.parent)  # + "/data"
         costfun = "profit"
         action = "regressor-model-fit"  # fit and predict methods
         params = copy.deepcopy(json.loads(params_json))
@@ -61,8 +63,7 @@ def setUp(self):
         params["optim_conf"]["load_forecast_method"] = "skforecast"
         params_json = json.dumps(params)
         self.input_data_dict = set_input_data_dict(
-            config_path,
-            base_path,
+            emhass_conf,
             costfun,
             params_json,
             runtimeparams_json,

From 69a114090f60f96d9ee4ad00c2573568bc2c186f Mon Sep 17 00:00:00 2001
From: GeoDerp <18461782+GeoDerp@users.noreply.github.com>
Date: Fri, 19 Apr 2024 14:33:15 +0000
Subject: [PATCH 110/111] mlregressor, add web buttons for mlregressor, add
 some suggestions

---
 docs/develop.md                          | 15 ++++-
 docs/mlregressor.md                      | 59 +++++++++++++-----
 src/emhass/command_line.py               | 77 ++++++++++++++++--------
 src/emhass/static/advanced.html          |  3 +
 src/emhass/static/script.js              |  2 +
 src/emhass/utils.py                      | 23 ++++---
 src/emhass/web_server.py                 | 14 +++--
 tests/test_machine_learning_regressor.py |  2 +-
 8 files changed, 139 insertions(+), 56 deletions(-)

diff --git a/docs/develop.md b/docs/develop.md
index 6716f5ec..da81ef25 100644
--- a/docs/develop.md
+++ b/docs/develop.md
@@ -221,6 +221,11 @@ For those who wish to mount/sync the local `data` folder with the data folder fr
 docker run ... -v $(pwd)/data/:/app/data ...
 ```
 
+You can also mount data (ex .csv)  files separately
+```bash
+docker run... -v $(pwd)/data/heating_prediction.csv:/app/data/ ...
+```
+
 #### Issue with TARGETARCH
 If your docker build fails with an error related to `TARGETARCH`. It may be best to add your devices architecture manually:
 
@@ -301,7 +306,7 @@ git checkout $branch
 ```bash
 #testing addon (build and run)
 docker build -t emhass/docker --build-arg build_version=addon-local .
-docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/options.json:/app/options.json -e LAT="45.83" -e LON="6.86" -e ALT="4807.8" -e TIME_ZONE="Europe/Paris" emhass/docker --url $HAURL --key $HAKEY
+docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/data/heating_prediction.csv:/app/data/heating_prediction.csv -v $(pwd)/options.json:/app/options.json -e LAT="45.83" -e LON="6.86" -e ALT="4807.8" -e TIME_ZONE="Europe/Paris" emhass/docker --url $HAURL --key $HAKEY
 ```
 ```bash
 #run actions on a separate terminal
@@ -311,6 +316,8 @@ curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/a
 curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-fit
 curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-predict
 curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-tune
+curl -i -H "Content-Type:application/json" -X POST -d  '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "hour", "regression_model": "RandomForestRegression", "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2] }' http://localhost:5000/action/regressor-model-fit
+curl -i -H "Content-Type:application/json" -X POST -d  '{"mlr_predict_entity_id": "sensor.mlr_predict", "mlr_predict_unit_of_measurement": "h", "mlr_predict_friendly_name": "mlr predictor", "new_values": [8.2, 7.23, 2, 6], "model_type": "heating_hours_degreeday" }' http://localhost:5000/action/regressor-model-predict
 curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/publish-data
 ```
 
@@ -326,7 +333,7 @@ lat: 45.83
 lon: 6.86
 alt: 4807.8
 EOT
-docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml emhass/docker 
+docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/data/heating_prediction.csv:/app/data/heating_prediction.csv -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml emhass/docker 
 ```
 ```bash
 #run actions on a separate terminal
@@ -336,10 +343,12 @@ curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/a
 curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-fit
 curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-predict
 curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-tune
+curl -i -H "Content-Type:application/json" -X POST -d  '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "hour", "regression_model": "RandomForestRegression", "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2] }' http://localhost:5000/action/regressor-model-fit
+curl -i -H "Content-Type:application/json" -X POST -d  '{"mlr_predict_entity_id": "sensor.mlr_predict", "mlr_predict_unit_of_measurement": "h", "mlr_predict_friendly_name": "mlr predictor", "new_values": [8.2, 7.23, 2, 6], "model_type": "heating_hours_degreeday" }' http://localhost:5000/action/regressor-model-predict
 curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/publish-data
 ```
 
-User may wish to re-test with tweaked parameters such as `lp_solver` and `weather_forecast_method`, in `config_emhass.yaml` *(standalone)* or `options.json` *(addon)*, to broaden the testing scope. 
+User may wish to re-test with tweaked parameters such as `lp_solver`, `weather_forecast_method` and `load_forecast_method`, in `config_emhass.yaml` *(standalone)* or `options.json` *(addon)*, to broaden the testing scope. 
 *see [EMHASS & EMHASS-Add-on differences](https://emhass.readthedocs.io/en/latest/differences.html) for more information on how these config_emhass & options files differ*
 
 *Note: may need to set `--build-arg TARGETARCH=YOUR-ARCH` in docker build*
diff --git a/docs/mlregressor.md b/docs/mlregressor.md
index dee5fccd..7746e985 100644
--- a/docs/mlregressor.md
+++ b/docs/mlregressor.md
@@ -4,10 +4,9 @@ Starting with v0.9.0, a new framework is proposed within EMHASS. It provides a m
 
 This API provides two main methods:
 
-- fit: To train a model with the passed data. This method is exposed with the `regressor-model-fit` end point.
-
-- predict: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point.
+- **fit**: To train a model with the passed data. This method is exposed with the `regressor-model-fit` end point.
 
+- **predict**: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point.
 
 ## A basic model fit
 
@@ -29,10 +28,11 @@ Some paramters can be optionally defined at runtime:
 
 - `date_features`: A list of 'date_features' to take into account when fitting the model. Possibilities are `year`, `month`, `day_of_week` (monday=0, sunday=6), `day_of_year`, `day`(day_of_month) and `hour`
 
-```
+### Examples: 
+```yaml
 runtimeparams = {
     "csv_file": "heating_prediction.csv",
-    "features":["degreeday", "solar"],
+    "features": ["degreeday", "solar"],
     "target": "heating_hours",
     "regression_model": "RandomForestRegression",
     "model_type": "heating_hours_degreeday",
@@ -43,12 +43,17 @@ runtimeparams = {
 
 A correct `curl` call to launch a model fit can look like this:
 
+```bash
+curl -i -H "Content-Type:application/json" -X POST -d '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "heating_hours"}' http://localhost:5000/action/regressor-model-fit
 ```
-curl -i -H "Content-Type:application/json" -X POST -d '{}' http://localhost:5000/action/regressor-model-fit
+or 
+```bash
+curl -i -H "Content-Type:application/json" -X POST -d  '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "hour", "regression_model": "RandomForestRegression", "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2] }' http://localhost:5000/action/regressor-model-fit
 ```
+
 A Home Assistant `rest_command` can look like this:
 
-```
+```yaml
 fit_heating_hours:
   url: http://127.0.0.1:5000/action/regressor-model-fit
   method: POST
@@ -56,7 +61,7 @@ fit_heating_hours:
   payload: >-
     {
     "csv_file": "heating_prediction.csv",
-    "features":["degreeday", "solar"],
+    "features": ["degreeday", "solar"],
     "target": "hours",
     "regression_model": "RandomForestRegression",
     "model_type": "heating_hours_degreeday",
@@ -91,7 +96,8 @@ The list of parameters needed to set the data publish task is:
 
 - `model_type`: The model type that has to be predicted
 
-```
+### Examples: 
+```yaml
 runtimeparams = {
     "mlr_predict_entity_id": "sensor.mlr_predict",
     "mlr_predict_unit_of_measurement": None,
@@ -103,13 +109,17 @@ runtimeparams = {
 
 Pass the correct `model_type` like this:
 
-```
+```bash
 curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "heating_hours_degreeday"}' http://localhost:5000/action/regressor-model-predict
 ```
+or
+```bash
+curl -i -H "Content-Type:application/json" -X POST -d  '{"mlr_predict_entity_id": "sensor.mlr_predict", "mlr_predict_unit_of_measurement": "h", "mlr_predict_friendly_name": "mlr predictor", "new_values": [8.2, 7.23, 2, 6], "model_type": "heating_hours_degreeday" }' http://localhost:5000/action/regressor-model-predict
+```
 
 A Home Assistant `rest_command` can look like this:
 
-```
+```yaml
 predict_heating_hours:
   url: http://localhost:5001/action/regressor-model-predict
   method: POST
@@ -136,9 +146,30 @@ After predicting the model the following information is logged by EMHASS:
 The predict method will publish the result to a Home Assistant sensor.
 
 
-## How to store data in a csv file from Home Assistant
-Notify to a file
+## Storing CSV files  
+
+### Standalone container - how to mount a .csv files in data_path folder
+If running EMHASS as Standalone container, you will need to volume mount a folder to be the `data_path`, or mount a single .csv file inside `data_path`
+
+Example of mounting a folder as data_path *(.csv files stored inside)*
+```bash
+docker run -it --restart always -p 5000:5000 -e LOCAL_COSTFUN="profit" -v $(pwd)/data:/app/data -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml --name DockerEMHASS <REPOSITORY:TAG>
+```
+Example of mounting a single csv file
+```bash
+docker run -it --restart always -p 5000:5000 -e LOCAL_COSTFUN="profit" -v $(pwd)/data/heating_prediction.csv:/app/data/heating_prediction.csv -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml --name DockerEMHASS <REPOSITORY:TAG>
 ```
+
+### Add-on - How to store data in a csv file from Home Assistant
+
+#### Change data_path
+If running EMHASS-Add-On, you will likley need to change the `data_path` to a folder your Home Assistant can access. 
+To do this, set the `data_path` to `/share/` in the addon *Configuration* page. 
+
+#### Store sensor data to csv
+
+Notify to a file
+```yaml
 notify:
   - platform: file
     name: heating_hours_prediction
@@ -146,7 +177,7 @@ notify:
     filename: /share/heating_prediction.csv
 ```
 Then you need an automation to notify to this file
-```
+```yaml
 alias: "Heating csv"
 id: 157b1d57-73d9-4f39-82c6-13ce0cf42
 trigger:
diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py
index b669e584..f5b5281d 100644
--- a/src/emhass/command_line.py
+++ b/src/emhass/command_line.py
@@ -225,8 +225,9 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
                 df_input_data = pd.read_csv(filename_path, parse_dates=True)
 
             else:
-                logger.error("The cvs file was not found.")
-                raise ValueError("The CSV file " + csv_file + " was not found.")
+                logger.error("The CSV file " + csv_file + " was not found in path: " + str(emhass_conf["data_path"]))
+                return False
+                #raise ValueError("The CSV file " + csv_file + " was not found.")
             required_columns = []
             required_columns.extend(features)
             required_columns.append(target)
@@ -236,9 +237,11 @@ def set_input_data_dict(emhass_conf: dict, costfun: str,
             if not set(required_columns).issubset(df_input_data.columns):
                 logger.error("The cvs file does not contain the required columns.")
                 msg = f"CSV file should contain the following columns: {', '.join(required_columns)}"
-                raise ValueError(
-                    msg,
-                )
+                logger.error(msg)
+                return False
+                #raise ValueError(
+                #    msg,
+                #)
 
     elif set_type == "publish-data":
         df_input_data, df_input_data_dayahead = None, None
@@ -615,12 +618,36 @@ def regressor_model_fit(
     :type debug: Optional[bool], optional
     """
     data = copy.deepcopy(input_data_dict["df_input_data"])
-    model_type = input_data_dict["params"]["passed_data"]["model_type"]
-    regression_model = input_data_dict["params"]["passed_data"]["regression_model"]
-    features = input_data_dict["params"]["passed_data"]["features"]
-    target = input_data_dict["params"]["passed_data"]["target"]
-    timestamp = input_data_dict["params"]["passed_data"]["timestamp"]
-    date_features = input_data_dict["params"]["passed_data"]["date_features"]
+    if "model_type" in input_data_dict["params"]["passed_data"]:
+        model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    else:
+        logger.error("parameter: 'model_type' not passed")
+        return False
+    if "regression_model" in input_data_dict["params"]["passed_data"]:
+        regression_model = input_data_dict["params"]["passed_data"]["regression_model"]
+    else:
+        logger.error("parameter: 'regression_model' not passed")
+        return False
+    if "features" in input_data_dict["params"]["passed_data"]:
+        features = input_data_dict["params"]["passed_data"]["features"]
+    else:
+        logger.error("parameter: 'features' not passed")
+        return False    
+    if "target" in input_data_dict["params"]["passed_data"]:
+        target = input_data_dict["params"]["passed_data"]["target"]
+    else:
+        logger.error("parameter: 'target' not passed")
+        return False        
+    if "timestamp" in input_data_dict["params"]["passed_data"]:
+        timestamp = input_data_dict["params"]["passed_data"]["timestamp"]
+    else:
+        logger.error("parameter: 'timestamp' not passed")
+        return False       
+    if "date_features" in input_data_dict["params"]["passed_data"]:
+        date_features = input_data_dict["params"]["passed_data"]["date_features"]
+    else:
+        logger.error("parameter: 'date_features' not passed")
+        return False           
 
     # The MLRegressor object
     mlr = MLRegressor(
@@ -658,7 +685,11 @@ def regressor_model_predict(
     :param debug: True to debug, useful for unit testing, defaults to False
     :type debug: Optional[bool], optional
     """
-    model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    if "model_type" in input_data_dict["params"]["passed_data"]:
+        model_type = input_data_dict["params"]["passed_data"]["model_type"]
+    else:
+        logger.error("parameter: 'model_type' not passed")
+        return False   
     filename = model_type + "_mlr.pkl"
     filename_path = input_data_dict["emhass_conf"]["data_path"] / filename
     if not debug:
@@ -669,20 +700,18 @@ def regressor_model_predict(
             logger.error(
                 "The ML forecaster file was not found, please run a model fit method before this predict method",
             )
-            return
-    new_values = input_data_dict["params"]["passed_data"]["new_values"]
+            return False
+    if "new_values" in input_data_dict["params"]["passed_data"]:    
+        new_values = input_data_dict["params"]["passed_data"]["new_values"]
+    else:
+        logger.error("parameter: 'new_values' not passed")
+        return False         
     # Predict from csv file
     prediction = mlr.predict(new_values)
-
-    mlr_predict_entity_id = input_data_dict["params"]["passed_data"][
-        "mlr_predict_entity_id"
-    ]
-    mlr_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][
-        "mlr_predict_unit_of_measurement"
-    ]
-    mlr_predict_friendly_name = input_data_dict["params"]["passed_data"][
-        "mlr_predict_friendly_name"
-    ]
+    
+    mlr_predict_entity_id = input_data_dict["params"]["passed_data"].get("mlr_predict_entity_id","sensor.mlr_predict")
+    mlr_predict_unit_of_measurement = input_data_dict["params"]["passed_data"].get("mlr_predict_unit_of_measurement","h")
+    mlr_predict_friendly_name = input_data_dict["params"]["passed_data"].get("mlr_predict_friendly_name","mlr predictor")
     # Publish prediction
     idx = 0
     if not debug:
diff --git a/src/emhass/static/advanced.html b/src/emhass/static/advanced.html
index 38371583..6595520c 100644
--- a/src/emhass/static/advanced.html
+++ b/src/emhass/static/advanced.html
@@ -14,6 +14,9 @@ <h4>Use the buttons below to fit, predict and tune a machine learning model for
 <button type="button" id="forecast-model-predict" class="button button2">ML forecast model
     predict</button>
 <button type="button" id="forecast-model-tune" class="button button3">ML forecast model tune</button>
+</br></br>
+<button type="button" id="regressor-model-fit" class="button button1">ML regressor model fit</button>
+<button type="button" id="regressor-model-predict" class="button button2">ML regressor model predict</button>
 <!-- -->
 <!--dynamic input elements section -->
 <h4>Input Runtime Parameters</h4>
diff --git a/src/emhass/static/script.js b/src/emhass/static/script.js
index 05861814..ad02b578 100644
--- a/src/emhass/static/script.js
+++ b/src/emhass/static/script.js
@@ -16,6 +16,8 @@ function loadButtons(page) {
                 "forecast-model-fit",
                 "forecast-model-predict",
                 "forecast-model-tune",
+                "regressor-model-fit",
+                "regressor-model-predict",
                 "perfect-optim",
                 "publish-data",
                 "naive-mpc-optim"
diff --git a/src/emhass/utils.py b/src/emhass/utils.py
index 8bb6f101..aa344732 100644
--- a/src/emhass/utils.py
+++ b/src/emhass/utils.py
@@ -216,13 +216,16 @@ def treat_runtimeparams(
         freq = int(retrieve_hass_conf["freq"].seconds / 60.0)
         delta_forecast = int(optim_conf["delta_forecast"].days)
         forecast_dates = get_forecast_dates(freq, delta_forecast)
-        if set_type == "regressor-model-fit":
-            csv_file = runtimeparams["csv_file"]
-            features = runtimeparams["features"]
-            target = runtimeparams["target"]
-            params["passed_data"]["csv_file"] = csv_file
-            params["passed_data"]["features"] = features
-            params["passed_data"]["target"] = target
+        if set_type == "regressor-model-fit":            
+            if "csv_file" in runtimeparams:
+                csv_file = runtimeparams["csv_file"]
+                params["passed_data"]["csv_file"] = csv_file
+            if "features" in runtimeparams:
+                features = runtimeparams["features"]
+                params["passed_data"]["features"] = features
+            if "target" in runtimeparams:
+                target = runtimeparams["target"]
+                params["passed_data"]["target"] = target
             if "timestamp" not in runtimeparams:
                 params["passed_data"]["timestamp"] = None
             else:
@@ -233,10 +236,10 @@ def treat_runtimeparams(
             else:
                 date_features = runtimeparams["date_features"]
                 params["passed_data"]["date_features"] = date_features
-
         if set_type == "regressor-model-predict":
-            new_values = runtimeparams["new_values"]
-            params["passed_data"]["new_values"] = new_values
+            if "new_values" in runtimeparams:
+                new_values = runtimeparams["new_values"]
+                params["passed_data"]["new_values"] = new_values
             if "csv_file" in runtimeparams:
                 csv_file = runtimeparams["csv_file"]
                 params["passed_data"]["csv_file"] = csv_file
diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py
index 9a100870..afb5370a 100644
--- a/src/emhass/web_server.py
+++ b/src/emhass/web_server.py
@@ -195,15 +195,21 @@ def action_call(action_name):
             return make_response(msg, 201)
         return make_response(grabLog(ActionStr), 400)
     elif action_name == 'regressor-model-fit':
-        app.logger.info(" >> Performing a machine learning regressor fit...")
+        ActionStr = " >> Performing a machine learning regressor fit..."
+        app.logger.info(ActionStr)
         regressor_model_fit(input_data_dict, app.logger)
         msg = f'EMHASS >> Action regressor-model-fit executed... \n'
-        return make_response(msg, 201)
+        if not checkFileLog(ActionStr):
+            return make_response(msg, 201)
+        return make_response(grabLog(ActionStr), 400)
     elif action_name == 'regressor-model-predict':
-        app.logger.info(" >> Performing a machine learning regressor predict...")
+        ActionStr = " >> Performing a machine learning regressor predict..."
+        app.logger.info(ActionStr)
         regressor_model_predict(input_data_dict, app.logger)
         msg = f'EMHASS >> Action regressor-model-predict executed... \n'
-        return make_response(msg, 201)
+        if not checkFileLog(ActionStr):
+            return make_response(msg, 201)
+        return make_response(grabLog(ActionStr), 400)
     else:
         app.logger.error("ERROR: passed action is not valid")
         msg = f'EMHASS >> ERROR: Passed action is not valid... \n'
diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py
index 4201199a..0d40ac0b 100644
--- a/tests/test_machine_learning_regressor.py
+++ b/tests/test_machine_learning_regressor.py
@@ -56,7 +56,7 @@ def setUp(self):
             "model_type": "heating_hours_degreeday",
             "timestamp": "timestamp",
             "date_features": ["month", "day_of_week"],
-            "new_values": [12.79, 4.766, 1, 2],
+            "new_values": [12.79, 4.766, 1, 2]
         }
         runtimeparams_json = json.dumps(runtimeparams)
         params["passed_data"] = runtimeparams

From a78ae79280202aa1e23553fcc7fc93d4c98456c6 Mon Sep 17 00:00:00 2001
From: Giel Janssens <gieljnssns@me.com>
Date: Mon, 22 Apr 2024 09:32:50 +0200
Subject: [PATCH 111/111] is 0 -> == 0

---
 src/emhass/retrieve_hass.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py
index 4fb909b1..4bac582f 100644
--- a/src/emhass/retrieve_hass.py
+++ b/src/emhass/retrieve_hass.py
@@ -170,7 +170,7 @@ def get_data(
                 df_raw = pd.DataFrame.from_dict(data)
                 # self.logger.info(str(df_raw))
                 if len(df_raw) == 0:
-                    if x is 0:
+                    if x == 0:
                         self.logger.error(
                             "The retrieved Dataframe is empty, A sensor:"
                             + var