diff --git a/.gitignore b/.gitignore index 5dc21af8..fa1f0d74 100644 --- a/.gitignore +++ b/.gitignore @@ -7,10 +7,10 @@ secrets_emhass.yaml .vscode/launch.json .vscode/settings.json .vscode/tasks.json -*.csv *.html *.pkl data/actionLogs.txt +**/app # Byte-compiled / optimized / DLL files diff --git a/.vscode/launch.json b/.vscode/launch.json index 10313c97..f0ceae3a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -14,14 +14,15 @@ "request": "launch", "module": "emhass.web_server", "console": "integratedTerminal", - "purpose":["debug-in-terminal"], + "purpose": [ + "debug-in-terminal" + ], "justMyCode": true, "env": { "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml", "OPTIONS_PATH": "/workspaces/emhass/options.json", "SECRETS_PATH": "/workspaces/emhass/secrets_emhass.yaml", "DATA_PATH": "/workspaces/emhass/data/", - "LOGGING_LEVEL": "DEBUG" } }, { @@ -30,8 +31,15 @@ "request": "launch", "module": "emhass.web_server", "console": "integratedTerminal", - "args": ["--addon", "true", "--no_response", "true"], - "purpose":["debug-in-terminal"], + "args": [ + "--addon", + "true", + "--no_response", + "true" + ], + "purpose": [ + "debug-in-terminal" + ], "justMyCode": true, "env": { "CONFIG_PATH": "/workspaces/emhass/config_emhass.yaml", @@ -44,9 +52,7 @@ "LAT": "45.83", //optional change "LON": "6.86", //optional change "ALT": "4807.8", //optional change - "LOGGING_LEVEL": "DEBUG" //optional change }, - - } + } ] } \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json index ffe440eb..ee23d121 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -9,7 +9,11 @@ "isDefault": true }, "args": [ - "install", "--no-deps", "--force-reinstall", "." + "install", + "--no-deps", + "--force-reinstall", + "--editable", + "." ], "presentation": { "echo": true, diff --git a/data/heating_prediction.csv b/data/heating_prediction.csv new file mode 100644 index 00000000..f50a8a49 --- /dev/null +++ b/data/heating_prediction.csv @@ -0,0 +1,130 @@ +timestamp,degreeday,solar,hour +2023-11-10 23:59:32.458039+01:00,12.23,3.982,2.87 +2023-11-11 23:59:32.459778+01:00,12.94,13.723,3.14 +2023-11-12 23:59:32.462220+01:00,14.45,4.925,3.5 +2023-11-13 23:59:32.462167+01:00,8.49,2.138,3.19 +2023-11-14 23:59:32.338942+01:00,8.61,2.444,2.91 +2023-11-15 23:59:32.195198+01:00,9.83,10.685,2.88 +2023-11-16 23:59:32.501044+01:00,12.8,1.955,1.28 +2023-11-17 23:59:32.316366+01:00,13.35,8.742,2.97 +2023-11-18 23:59:32.082785+01:00,11.84,0.849,3.42 +2023-11-19 23:59:32.077198+01:00,7.3,10.85,1.9 +2023-11-20 23:59:32.431964+01:00,9.91,6.395,2.48 +2023-11-21 23:59:32.295705+01:00,11.44,2.678,2.91 +2023-11-22 23:59:32.377740+01:00,16.14,2.994,2.96 +2023-11-23 23:59:32.385890+01:00,9.31,5.346,2.91 +2023-11-24 23:59:32.376194+01:00,12.96,8.61,2.9 +2023-11-25 23:59:32.373666+01:00,14.91,12.31,3.47 +2023-11-26 23:59:32.373647+01:00,14.79,2.589,3.69 +2023-11-27 23:59:32.379920+01:00,14.92,0.322,6.05 +2023-11-28 23:59:32.213947+01:00,18.59,20.342,2.94 +2023-11-29 23:59:32.217384+01:00,19.05,5.393,5.41 +2023-11-30 23:59:32.222641+01:00,21.27,1.899,6.77 +2023-12-01 23:59:32.224533+01:00,21.3,1.233,5.75 +2023-12-02 23:59:32.107119+01:00,21.97,14.653,2.96 +2023-12-03 23:59:32.107436+01:00,20.61,4.766,8.89 +2023-12-04 23:59:32.116642+01:00,18.36,1.349,6.73 +2023-12-05 23:59:32.191254+01:00,16.93,0.869,6.17 +2023-12-06 23:59:32.176803+01:00,16.8,5.413,5.38 +2023-12-07 23:59:32.251031+01:00,17.67,8.089,5.98 +2023-12-08 23:59:32.255888+01:00,14.37,1.203,5.63 +2023-12-09 23:59:32.109040+01:00,11.94,0.814,5.08 +2023-12-10 23:59:32.103738+01:00,9.72,6.051,3.42 +2023-12-11 23:59:32.497717+01:00,9.83,1.459,3.87 +2023-12-12 23:59:32.502503+01:00,11.18,4.176,3.31 +2023-12-13 23:59:32.504794+01:00,11.09,2.91,3.1 +2023-12-14 23:59:32.177489+01:00,13.88,7.53,2.89 +2023-12-15 23:59:32.186292+01:00,12.18,2.129,5.68 +2023-12-16 23:59:32.176812+01:00,11.75,1.641,3.46 +2023-12-17 23:59:32.119874+01:00,12.18,14.868,3.46 +2023-12-18 23:59:32.120168+01:00,14.75,1.283,3.12 +2023-12-19 23:59:32.120101+01:00,12.82,0.09,5.07 +2023-12-20 23:59:32.249731+01:00,12.8,3.803,3.6 +2023-12-21 23:59:32.249135+01:00,8.73,2.096,3.55 +2023-12-22 23:59:32.385164+01:00,9.12,1.278,0.85 +2023-12-23 23:59:32.382910+01:00,8.99,1.848,0.0 +2023-12-24 23:59:32.382457+01:00,8.04,0.165,7.42 +2023-12-25 23:59:32.303520+01:00,7.56,1.028,2.93 +2023-12-26 23:59:32.105788+01:00,10.55,9.274,2.92 +2023-12-27 23:59:32.183107+01:00,11.78,2.026,3.39 +2023-12-28 23:59:32.183405+01:00,8.91,3.68,3.19 +2023-12-29 23:59:32.399740+01:00,9.35,2.464,2.95 +2023-12-30 23:59:32.091110+01:00,11.07,7.948,3.44 +2023-12-31 23:59:32.257530+01:00,10.51,3.5,3.48 +2024-01-01 23:59:32.106161+01:00,12.75,4.046,3.08 +2024-01-02 23:59:32.103187+01:00,8.81,0.562,4.46 +2024-01-03 23:59:32.429947+01:00,10.03,2.184,3.26 +2024-01-04 23:59:32.436773+01:00,11.22,5.662,2.97 +2024-01-05 23:59:32.165969+01:00,12.42,1.199,3.6 +2024-01-06 23:59:32.110208+01:00,15.35,0.295,4.32 +2024-01-07 23:59:32.147775+01:00,19.88,0.896,6.19 +2024-01-08 23:59:32.242815+01:00,22.74,6.468,5.82 +2024-01-09 23:59:32.201342+01:00,24.38,21.307,6.92 +2024-01-10 23:59:32.411136+01:00,24.84,18.89,1.53 +2024-01-11 23:59:32.399433+01:00,23.57,19.27,3.05 +2024-01-12 23:59:32.467622+01:00,18.22,1.977,13.98 +2024-01-13 23:59:32.077428+01:00,17.9,0.472,6.93 +2024-01-14 23:59:32.127844+01:00,19.65,1.346,6.95 +2024-01-15 23:59:32.125062+01:00,19.49,4.35,7.82 +2024-01-16 23:59:32.280474+01:00,21.21,9.238,5.7 +2024-01-17 23:59:32.283951+01:00,23.17,1.193,7.37 +2024-01-18 23:59:32.361241+01:00,21.61,17.307,6.67 +2024-01-19 23:59:32.341654+01:00,22.06,21.004,6.24 +2024-01-20 23:59:32.359151+01:00,21.95,12.912,6.43 +2024-01-21 23:59:32.126221+01:00,17.38,3.28,7.45 +2024-01-22 23:59:32.126346+01:00,9.47,7.645,6.1 +2024-01-23 23:59:32.417727+01:00,11.87,7.689,4.76 +2024-01-24 23:59:32.420933+01:00,8.15,10.052,3.62 +2024-01-25 23:59:32.419138+01:00,12.38,3.785,3.98 +2024-01-26 23:59:32.422066+01:00,11.4,11.94,3.1 +2024-01-27 23:59:32.176538+01:00,17.96,19.741,3.45 +2024-01-28 23:59:32.168328+01:00,16.72,20.366,4.85 +2024-01-29 23:59:32.173916+01:00,13.11,16.972,4.51 +2024-01-30 23:59:32.503034+01:00,11.21,4.013,3.99 +2024-01-31 23:59:32.179265+01:00,12.79,4.766,3.73 +2024-02-01 23:59:32.487147+01:00,12.74,23.924,2.98 +2024-02-02 23:59:32.570084+01:00,13.0,2.98,5.04 +2024-02-03 23:59:32.484878+01:00,9.26,1.413,3.48 +2024-02-04 23:59:32.472168+01:00,8.35,4.306,3.47 +2024-02-05 23:59:32.409856+01:00,9.78,5.704,0.0 +2024-02-06 23:59:32.439147+01:00,9.15,2.431,6.56 +2024-02-07 23:59:32.235231+01:00,14.42,3.839,3.07 +2024-02-08 23:59:32.441543+01:00,13.9,1.412,5.94 +2024-02-09 23:59:32.443230+01:00,8.2,7.246,2.96 +2024-02-10 23:59:32.504326+01:00,8.37,8.567,3.48 +2024-02-11 23:59:32.452959+01:00,10.44,5.304,0.0 +2024-02-12 23:59:32.450999+01:00,12.65,16.004,3.42 +2024-02-13 23:59:32.343162+01:00,13.84,19.809,3.16 +2024-02-14 23:59:32.339408+01:00,8.48,1.98,4.52 +2024-02-15 23:59:32.339971+01:00,6.13,9.952,2.98 +2024-02-16 23:59:32.455273+01:00,7.66,3.675,3.06 +2024-02-17 23:59:32.097937+01:00,8.56,12.269,3.48 +2024-02-18 23:59:32.126377+01:00,9.59,2.205,3.04 +2024-02-19 23:59:32.421243+01:00,10.22,3.731,2.97 +2024-02-20 23:59:32.421985+01:00,11.61,13.775,0.0 +2024-02-21 23:59:32.371300+01:00,10.52,4.856,3.02 +2024-02-22 23:59:32.373153+01:00,9.53,4.256,3.48 +2024-02-23 23:59:32.372545+01:00,13.66,8.743,4.09 +2024-02-24 23:59:32.197044+01:00,14.44,7.842,4.3 +2024-02-25 23:59:32.196386+01:00,12.41,16.235,3.48 +2024-02-26 23:59:32.409648+01:00,14.63,2.096,5.05 +2024-02-27 23:59:32.373347+01:00,14.5,29.437,3.21 +2024-02-28 23:59:32.407538+01:00,15.38,6.475,4.88 +2024-02-29 23:59:32.194724+01:00,11.83,3.238,4.68 +2024-03-01 23:59:32.084520+01:00,10.56,14.352,3.8 +2024-03-02 23:59:32.066434+01:00,9.94,25.356,3.49 +2024-03-03 23:59:32.270878+01:00,8.9,10.577,3.19 +2024-03-04 23:59:32.274918+01:00,10.67,28.096,2.08 +2024-03-05 23:59:32.315023+01:00,12.19,10.553,2.95 +2024-03-06 23:59:32.441001+01:00,11.38,32.597,2.91 +2024-03-07 23:59:32.440044+01:00,12.39,28.856,2.96 +2024-03-08 23:59:32.228265+01:00,12.01,37.395,2.96 +2024-03-09 23:59:32.081874+01:00,8.72,17.66,3.5 +2024-03-10 23:59:32.335321+01:00,8.0,12.207,3.47 +2024-03-11 23:59:32.139531+01:00,10.39,2.526,2.96 +2024-03-12 23:59:32.136709+01:00,10.24,8.211,2.98 +2024-03-13 23:59:32.407174+01:00,7.19,6.425,2.95 +2024-03-14 23:59:32.342436+01:00,6.06,33.389,1.64 +2024-03-15 23:59:32.266278+01:00,5.63,12.628,2.96 +2024-03-16 23:59:32.155245+01:00,9.57,12.103,3.0 +2024-03-17 23:59:32.366155+01:00,8.43,14.302,0.25 diff --git a/docs/develop.md b/docs/develop.md index 6716f5ec..da81ef25 100644 --- a/docs/develop.md +++ b/docs/develop.md @@ -221,6 +221,11 @@ For those who wish to mount/sync the local `data` folder with the data folder fr docker run ... -v $(pwd)/data/:/app/data ... ``` +You can also mount data (ex .csv) files separately +```bash +docker run... -v $(pwd)/data/heating_prediction.csv:/app/data/ ... +``` + #### Issue with TARGETARCH If your docker build fails with an error related to `TARGETARCH`. It may be best to add your devices architecture manually: @@ -301,7 +306,7 @@ git checkout $branch ```bash #testing addon (build and run) docker build -t emhass/docker --build-arg build_version=addon-local . -docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/options.json:/app/options.json -e LAT="45.83" -e LON="6.86" -e ALT="4807.8" -e TIME_ZONE="Europe/Paris" emhass/docker --url $HAURL --key $HAKEY +docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/data/heating_prediction.csv:/app/data/heating_prediction.csv -v $(pwd)/options.json:/app/options.json -e LAT="45.83" -e LON="6.86" -e ALT="4807.8" -e TIME_ZONE="Europe/Paris" emhass/docker --url $HAURL --key $HAKEY ``` ```bash #run actions on a separate terminal @@ -311,6 +316,8 @@ curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/a curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-fit curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-predict curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-tune +curl -i -H "Content-Type:application/json" -X POST -d '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "hour", "regression_model": "RandomForestRegression", "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2] }' http://localhost:5000/action/regressor-model-fit +curl -i -H "Content-Type:application/json" -X POST -d '{"mlr_predict_entity_id": "sensor.mlr_predict", "mlr_predict_unit_of_measurement": "h", "mlr_predict_friendly_name": "mlr predictor", "new_values": [8.2, 7.23, 2, 6], "model_type": "heating_hours_degreeday" }' http://localhost:5000/action/regressor-model-predict curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/publish-data ``` @@ -326,7 +333,7 @@ lat: 45.83 lon: 6.86 alt: 4807.8 EOT -docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml emhass/docker +docker run --rm -it -p 5000:5000 --name emhass-container -v $(pwd)/data/heating_prediction.csv:/app/data/heating_prediction.csv -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml emhass/docker ``` ```bash #run actions on a separate terminal @@ -336,10 +343,12 @@ curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/a curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-fit curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-predict curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/forecast-model-tune +curl -i -H "Content-Type:application/json" -X POST -d '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "hour", "regression_model": "RandomForestRegression", "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2] }' http://localhost:5000/action/regressor-model-fit +curl -i -H "Content-Type:application/json" -X POST -d '{"mlr_predict_entity_id": "sensor.mlr_predict", "mlr_predict_unit_of_measurement": "h", "mlr_predict_friendly_name": "mlr predictor", "new_values": [8.2, 7.23, 2, 6], "model_type": "heating_hours_degreeday" }' http://localhost:5000/action/regressor-model-predict curl -i -H 'Content-Type:application/json' -X POST -d {} http://localhost:5000/action/publish-data ``` -User may wish to re-test with tweaked parameters such as `lp_solver` and `weather_forecast_method`, in `config_emhass.yaml` *(standalone)* or `options.json` *(addon)*, to broaden the testing scope. +User may wish to re-test with tweaked parameters such as `lp_solver`, `weather_forecast_method` and `load_forecast_method`, in `config_emhass.yaml` *(standalone)* or `options.json` *(addon)*, to broaden the testing scope. *see [EMHASS & EMHASS-Add-on differences](https://emhass.readthedocs.io/en/latest/differences.html) for more information on how these config_emhass & options files differ* *Note: may need to set `--build-arg TARGETARCH=YOUR-ARCH` in docker build* diff --git a/docs/index.md b/docs/index.md index cf015a3f..cc9f33a8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,6 +6,7 @@ # EMHASS: Energy Management for Home Assistant ```{image} images/emhass_logo.png + ``` Welcome to the documentation of EMHASS. With this package written in Python you will be able to implement a real Energy Management System for your household. This software was designed to be easy configurable and with a fast integration with Home Assistant: @@ -21,6 +22,7 @@ differences.md lpems.md forecasts.md mlforecaster.md +mlregressor.md study_case.md config.md emhass.md @@ -32,5 +34,3 @@ develop.md - {ref}`genindex` - {ref}`modindex` - {ref}`search` - - diff --git a/docs/mlregressor.md b/docs/mlregressor.md new file mode 100644 index 00000000..7746e985 --- /dev/null +++ b/docs/mlregressor.md @@ -0,0 +1,196 @@ +# The machine learning regressor + +Starting with v0.9.0, a new framework is proposed within EMHASS. It provides a machine learning module to predict values from a csv file using different regression models. + +This API provides two main methods: + +- **fit**: To train a model with the passed data. This method is exposed with the `regressor-model-fit` end point. + +- **predict**: To obtain a prediction from a pre-trained model. This method is exposed with the `regressor-model-predict` end point. + +## A basic model fit + +To train a model use the `regressor-model-fit` end point. + +Some paramters can be optionally defined at runtime: + +- `csv_file`: The name of the csv file containing your data. + +- `features`: A list of features, you can provide new values for this. + +- `target`: The target, the value that has to be predicted. + +- `model_type`: Define the name of the model regressor that this will be used for. For example: `heating_hours_degreeday`. This should be an unique name if you are using multiple custom regressor models. + +- `regression_model`: The regression model that will be used. For now only this options are possible: `LinearRegression`, `RidgeRegression`, `LassoRegression`, `RandomForestRegression`, `GradientBoostingRegression` and `AdaBoostRegression`. + +- `timestamp`: If defined, the column key that has to be used for timestamp. + +- `date_features`: A list of 'date_features' to take into account when fitting the model. Possibilities are `year`, `month`, `day_of_week` (monday=0, sunday=6), `day_of_year`, `day`(day_of_month) and `hour` + +### Examples: +```yaml +runtimeparams = { + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], + "target": "heating_hours", + "regression_model": "RandomForestRegression", + "model_type": "heating_hours_degreeday", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"] + } +``` + +A correct `curl` call to launch a model fit can look like this: + +```bash +curl -i -H "Content-Type:application/json" -X POST -d '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "heating_hours"}' http://localhost:5000/action/regressor-model-fit +``` +or +```bash +curl -i -H "Content-Type:application/json" -X POST -d '{"csv_file": "heating_prediction.csv", "features": ["degreeday", "solar"], "target": "hour", "regression_model": "RandomForestRegression", "model_type": "heating_hours_degreeday", "timestamp": "timestamp", "date_features": ["month", "day_of_week"], "new_values": [12.79, 4.766, 1, 2] }' http://localhost:5000/action/regressor-model-fit +``` + +A Home Assistant `rest_command` can look like this: + +```yaml +fit_heating_hours: + url: http://127.0.0.1:5000/action/regressor-model-fit + method: POST + content_type: "application/json" + payload: >- + { + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], + "target": "hours", + "regression_model": "RandomForestRegression", + "model_type": "heating_hours_degreeday", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"] + } +``` +After fitting the model the following information is logged by EMHASS: + + 2024-04-17 12:41:50,019 - web_server - INFO - Passed runtime parameters: {'csv_file': 'heating_prediction.csv', 'features': ['degreeday', 'solar'], 'target': 'heating_hours', 'regression_model': 'RandomForestRegression', 'model_type': 'heating_hours_degreeday', 'timestamp': 'timestamp', 'date_features': ['month', 'day_of_week']} + 2024-04-17 12:41:50,020 - web_server - INFO - >> Setting input data dict + 2024-04-17 12:41:50,021 - web_server - INFO - Setting up needed data + 2024-04-17 12:41:50,048 - web_server - INFO - >> Performing a machine learning regressor fit... + 2024-04-17 12:41:50,049 - web_server - INFO - Performing a MLRegressor fit for heating_hours_degreeday + 2024-04-17 12:41:50,064 - web_server - INFO - Training a RandomForestRegression model + 2024-04-17 12:41:57,852 - web_server - INFO - Elapsed time for model fit: 7.78800106048584 + 2024-04-17 12:41:57,862 - web_server - INFO - Prediction R2 score of fitted model on test data: -0.5667567505914477 + +## The predict method + +To obtain a prediction using a previously trained model use the `regressor-model-predict` end point. + +The list of parameters needed to set the data publish task is: + +- `mlr_predict_entity_id`: The unique `entity_id` to be used. + +- `mlr_predict_unit_of_measurement`: The `unit_of_measurement` to be used. + +- `mlr_predict_friendly_name`: The `friendly_name` to be used. + +- `new_values`: The new values for the features (in the same order as the features list). Also when using date_features, add these to the new values. + +- `model_type`: The model type that has to be predicted + +### Examples: +```yaml +runtimeparams = { + "mlr_predict_entity_id": "sensor.mlr_predict", + "mlr_predict_unit_of_measurement": None, + "mlr_predict_friendly_name": "mlr predictor", + "new_values": [8.2, 7.23, 2, 6], + "model_type": "heating_hours_degreeday" +} +``` + +Pass the correct `model_type` like this: + +```bash +curl -i -H "Content-Type:application/json" -X POST -d '{"model_type": "heating_hours_degreeday"}' http://localhost:5000/action/regressor-model-predict +``` +or +```bash +curl -i -H "Content-Type:application/json" -X POST -d '{"mlr_predict_entity_id": "sensor.mlr_predict", "mlr_predict_unit_of_measurement": "h", "mlr_predict_friendly_name": "mlr predictor", "new_values": [8.2, 7.23, 2, 6], "model_type": "heating_hours_degreeday" }' http://localhost:5000/action/regressor-model-predict +``` + +A Home Assistant `rest_command` can look like this: + +```yaml +predict_heating_hours: + url: http://localhost:5001/action/regressor-model-predict + method: POST + content_type: "application/json" + payload: >- + { + "mlr_predict_entity_id": "sensor.predicted_hours", + "mlr_predict_unit_of_measurement": "h", + "mlr_predict_friendly_name": "Predicted hours", + "new_values": [8.2, 7.23, 2, 6], + "model_type": "heating_hours_degreeday" + } +``` +After predicting the model the following information is logged by EMHASS: + +``` +2024-04-17 14:25:40,695 - web_server - INFO - Passed runtime parameters: {'mlr_predict_entity_id': 'sensor.predicted_hours', 'mlr_predict_unit_of_measurement': 'h', 'mlr_predict_friendly_name': 'Predicted hours', 'new_values': [8.2, 7.23, 2, 6], 'model_type': 'heating_hours_degreeday'} +2024-04-17 14:25:40,696 - web_server - INFO - >> Setting input data dict +2024-04-17 14:25:40,696 - web_server - INFO - Setting up needed data +2024-04-17 14:25:40,700 - web_server - INFO - >> Performing a machine learning regressor predict... +2024-04-17 14:25:40,715 - web_server - INFO - Performing a prediction for heating_hours_degreeday +2024-04-17 14:25:40,750 - web_server - INFO - Successfully posted to sensor.predicted_hours = 3.716600000000001 +``` +The predict method will publish the result to a Home Assistant sensor. + + +## Storing CSV files + +### Standalone container - how to mount a .csv files in data_path folder +If running EMHASS as Standalone container, you will need to volume mount a folder to be the `data_path`, or mount a single .csv file inside `data_path` + +Example of mounting a folder as data_path *(.csv files stored inside)* +```bash +docker run -it --restart always -p 5000:5000 -e LOCAL_COSTFUN="profit" -v $(pwd)/data:/app/data -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml --name DockerEMHASS +``` +Example of mounting a single csv file +```bash +docker run -it --restart always -p 5000:5000 -e LOCAL_COSTFUN="profit" -v $(pwd)/data/heating_prediction.csv:/app/data/heating_prediction.csv -v $(pwd)/config_emhass.yaml:/app/config_emhass.yaml -v $(pwd)/secrets_emhass.yaml:/app/secrets_emhass.yaml --name DockerEMHASS +``` + +### Add-on - How to store data in a csv file from Home Assistant + +#### Change data_path +If running EMHASS-Add-On, you will likley need to change the `data_path` to a folder your Home Assistant can access. +To do this, set the `data_path` to `/share/` in the addon *Configuration* page. + +#### Store sensor data to csv + +Notify to a file +```yaml +notify: + - platform: file + name: heating_hours_prediction + timestamp: false + filename: /share/heating_prediction.csv +``` +Then you need an automation to notify to this file +```yaml +alias: "Heating csv" +id: 157b1d57-73d9-4f39-82c6-13ce0cf42 +trigger: + - platform: time + at: "23:59:32" +action: + - service: notify.heating_hours_prediction + data: + message: > + {% set degreeday = states('sensor.degree_day_daily') |float %} + {% set heating_hours = states('sensor.heating_hours_today') |float | round(2) %} + {% set solar = states('sensor.solar_daily') |float | round(3) %} + {% set time = now() %} + + {{time}},{{degreeday}},{{solar}},{{heating_hours}} +``` \ No newline at end of file diff --git a/src/emhass/command_line.py b/src/emhass/command_line.py index add45f71..77665f1d 100644 --- a/src/emhass/command_line.py +++ b/src/emhass/command_line.py @@ -8,18 +8,19 @@ import json import copy import pickle -import time -import numpy as np -import pandas as pd from datetime import datetime, timezone from typing import Optional, Tuple +from importlib.metadata import version +import numpy as np +import pandas as pd + from distutils.util import strtobool -from importlib.metadata import version from emhass.retrieve_hass import RetrieveHass from emhass.forecast import Forecast from emhass.machine_learning_forecaster import MLForecaster from emhass.optimization import Optimization +from emhass.machine_learning_regressor import MLRegressor from emhass import utils @@ -53,8 +54,14 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, emhass_conf, use_secrets=not(get_data_from_file), params=params) # Treat runtimeparams params, retrieve_hass_conf, optim_conf, plant_conf = utils.treat_runtimeparams( - runtimeparams, params, retrieve_hass_conf, - optim_conf, plant_conf, set_type, logger) + runtimeparams, + params, + retrieve_hass_conf, + optim_conf, + plant_conf, + set_type, + logger, + ) # Define main objects rh = RetrieveHass(retrieve_hass_conf['hass_url'], retrieve_hass_conf['long_lived_token'], retrieve_hass_conf['freq'], retrieve_hass_conf['time_zone'], @@ -75,35 +82,53 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, retrieve_hass_conf['var_interp'] = [retrieve_hass_conf['var_PV'], retrieve_hass_conf['var_load']] retrieve_hass_conf['var_replace_zero'] = [retrieve_hass_conf['var_PV']] else: - days_list = utils.get_days_list(retrieve_hass_conf['days_to_retrieve']) - var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']] - if not rh.get_data(days_list, var_list, - minimal_response=False, significant_changes_only=False): - return False - if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'], - set_zero_min = retrieve_hass_conf['set_zero_min'], - var_replace_zero = retrieve_hass_conf['var_replace_zero'], - var_interp = retrieve_hass_conf['var_interp']): + days_list = utils.get_days_list(retrieve_hass_conf["days_to_retrieve"]) + var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]] + if not rh.get_data( + days_list, + var_list, + minimal_response=False, + significant_changes_only=False, + ): + return False + if not rh.prepare_data( + retrieve_hass_conf["var_load"], + load_negative=retrieve_hass_conf["load_negative"], + set_zero_min=retrieve_hass_conf["set_zero_min"], + var_replace_zero=retrieve_hass_conf["var_replace_zero"], + var_interp=retrieve_hass_conf["var_interp"], + ): return False df_input_data = rh.df_final.copy() # What we don't need for this type of action P_PV_forecast, P_load_forecast, df_input_data_dayahead = None, None, None elif set_type == "dayahead-optim": # Get PV and load forecasts - df_weather = fcst.get_weather_forecast(method=optim_conf['weather_forecast_method']) + df_weather = fcst.get_weather_forecast( + method=optim_conf["weather_forecast_method"] + ) P_PV_forecast = fcst.get_power_from_weather(df_weather) P_load_forecast = fcst.get_load_forecast(method=optim_conf['load_forecast_method']) if isinstance(P_load_forecast,bool) and not P_load_forecast: logger.error("Unable to get sensor power photovoltaics, or sensor power load no var loads. Check HA sensors and their daily data") return False - df_input_data_dayahead = pd.DataFrame(np.transpose(np.vstack([P_PV_forecast.values,P_load_forecast.values])), - index=P_PV_forecast.index, - columns=['P_PV_forecast', 'P_load_forecast']) + df_input_data_dayahead = pd.DataFrame( + np.transpose(np.vstack([P_PV_forecast.values, P_load_forecast.values])), + index=P_PV_forecast.index, + columns=["P_PV_forecast", "P_load_forecast"], + ) df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead) params = json.loads(params) - if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None: - prediction_horizon = params['passed_data']['prediction_horizon'] - df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]] + if ( + "prediction_horizon" in params["passed_data"] + and params["passed_data"]["prediction_horizon"] is not None + ): + prediction_horizon = params["passed_data"]["prediction_horizon"] + df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[ + df_input_data_dayahead.index[0] : df_input_data_dayahead.index[ + prediction_horizon - 1 + ] + ] # What we don't need for this type of action df_input_data, days_list = None, None elif set_type == "naive-mpc-optim": @@ -117,14 +142,21 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, retrieve_hass_conf['var_replace_zero'] = [retrieve_hass_conf['var_PV']] else: days_list = utils.get_days_list(1) - var_list = [retrieve_hass_conf['var_load'], retrieve_hass_conf['var_PV']] - if not rh.get_data(days_list, var_list, - minimal_response=False, significant_changes_only=False): + var_list = [retrieve_hass_conf["var_load"], retrieve_hass_conf["var_PV"]] + if not rh.get_data( + days_list, + var_list, + minimal_response=False, + significant_changes_only=False, + ): return False - if not rh.prepare_data(retrieve_hass_conf['var_load'], load_negative = retrieve_hass_conf['load_negative'], - set_zero_min = retrieve_hass_conf['set_zero_min'], - var_replace_zero = retrieve_hass_conf['var_replace_zero'], - var_interp = retrieve_hass_conf['var_interp']): + if not rh.prepare_data( + retrieve_hass_conf["var_load"], + load_negative=retrieve_hass_conf["load_negative"], + set_zero_min=retrieve_hass_conf["set_zero_min"], + var_replace_zero=retrieve_hass_conf["var_replace_zero"], + var_interp=retrieve_hass_conf["var_interp"], + ): return False df_input_data = rh.df_final.copy() # Get PV and load forecasts @@ -136,38 +168,97 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, return False df_input_data_dayahead = pd.concat([P_PV_forecast, P_load_forecast], axis=1) df_input_data_dayahead = utils.set_df_index_freq(df_input_data_dayahead) - df_input_data_dayahead.columns = ['P_PV_forecast', 'P_load_forecast'] + df_input_data_dayahead.columns = ["P_PV_forecast", "P_load_forecast"] params = json.loads(params) - if 'prediction_horizon' in params['passed_data'] and params['passed_data']['prediction_horizon'] is not None: - prediction_horizon = params['passed_data']['prediction_horizon'] - df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[df_input_data_dayahead.index[0]:df_input_data_dayahead.index[prediction_horizon-1]] - elif set_type == "forecast-model-fit" or set_type == "forecast-model-predict" or set_type == "forecast-model-tune": + if ( + "prediction_horizon" in params["passed_data"] + and params["passed_data"]["prediction_horizon"] is not None + ): + prediction_horizon = params["passed_data"]["prediction_horizon"] + df_input_data_dayahead = copy.deepcopy(df_input_data_dayahead)[ + df_input_data_dayahead.index[0] : df_input_data_dayahead.index[ + prediction_horizon - 1 + ] + ] + elif ( + set_type == "forecast-model-fit" + or set_type == "forecast-model-predict" + or set_type == "forecast-model-tune" + ): df_input_data_dayahead = None P_PV_forecast, P_load_forecast = None, None params = json.loads(params) # Retrieve data from hass - days_to_retrieve = params['passed_data']['days_to_retrieve'] - model_type = params['passed_data']['model_type'] - var_model = params['passed_data']['var_model'] + days_to_retrieve = params["passed_data"]["days_to_retrieve"] + model_type = params["passed_data"]["model_type"] + var_model = params["passed_data"]["var_model"] if get_data_from_file: days_list = None filename = 'data_train_'+model_type+'.pkl' filename_path = emhass_conf['data_path'] / filename with open(filename_path, 'rb') as inp: df_input_data, _ = pickle.load(inp) - df_input_data = df_input_data[df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve):] + df_input_data = df_input_data[ + df_input_data.index[-1] - pd.offsets.Day(days_to_retrieve) : + ] else: days_list = utils.get_days_list(days_to_retrieve) var_list = [var_model] if not rh.get_data(days_list, var_list): return False df_input_data = rh.df_final.copy() + + elif set_type == "regressor-model-fit" or set_type == "regressor-model-predict": + + df_input_data, df_input_data_dayahead = None, None + P_PV_forecast, P_load_forecast = None, None + params = json.loads(params) + days_list = None + csv_file = params["passed_data"].get("csv_file", None) + if "features" in params["passed_data"]: + features = params["passed_data"]["features"] + if "target" in params["passed_data"]: + target = params["passed_data"]["target"] + if "timestamp" in params["passed_data"]: + timestamp = params["passed_data"]["timestamp"] + if csv_file: + if get_data_from_file: + base_path = emhass_conf["data_path"] # + "/data" + filename_path = pathlib.Path(base_path) / csv_file + + else: + filename_path = emhass_conf["data_path"] / csv_file + + if filename_path.is_file(): + df_input_data = pd.read_csv(filename_path, parse_dates=True) + + else: + logger.error("The CSV file " + csv_file + " was not found in path: " + str(emhass_conf["data_path"])) + return False + #raise ValueError("The CSV file " + csv_file + " was not found.") + required_columns = [] + required_columns.extend(features) + required_columns.append(target) + if timestamp is not None: + required_columns.append(timestamp) + + if not set(required_columns).issubset(df_input_data.columns): + logger.error("The cvs file does not contain the required columns.") + msg = f"CSV file should contain the following columns: {', '.join(required_columns)}" + logger.error(msg) + return False + #raise ValueError( + # msg, + #) + elif set_type == "publish-data": df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None else: - logger.error("The passed action argument and hence the set_type parameter for setup is not valid") + logger.error( + "The passed action argument and hence the set_type parameter for setup is not valid", + ) df_input_data, df_input_data_dayahead = None, None P_PV_forecast, P_load_forecast = None, None days_list = None @@ -188,12 +279,17 @@ def set_input_data_dict(emhass_conf: dict, costfun: str, 'days_list': days_list } return input_data_dict - -def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = True, debug: Optional[bool] = False) -> pd.DataFrame: + + +def perfect_forecast_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = True, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the perfect forecast optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -222,18 +318,23 @@ def perfect_forecast_optim(input_data_dict: dict, logger: logging.Logger, opt_res = input_data_dict['opt'].perform_perfect_forecast_optim(df_input_data, input_data_dict['days_list']) # Save CSV file for analysis if save_data_to_file: - filename = 'opt_res_perfect_optim_'+input_data_dict['costfun']+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + filename = "opt_res_perfect_optim_" + input_data_dict["costfun"] + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: opt_res.to_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_label='timestamp') return opt_res - -def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame: + + +def dayahead_forecast_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the day-ahead optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -262,19 +363,26 @@ def dayahead_forecast_optim(input_data_dict: dict, logger: logging.Logger, df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast']) # Save CSV file for publish_data if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: opt_res_dayahead.to_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_label='timestamp') return opt_res_dayahead -def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, debug: Optional[bool] = False) -> pd.DataFrame: + +def naive_mpc_optim( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + debug: Optional[bool] = False, +) -> pd.DataFrame: """ Perform a call to the naive Model Predictive Controller optimization routine. - + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -299,27 +407,39 @@ def naive_mpc_optim(input_data_dict: dict, logger: logging.Logger, if isinstance(df_input_data_dayahead,bool) and not df_input_data_dayahead: return False # The specifics params for the MPC at runtime - prediction_horizon = input_data_dict['params']['passed_data']['prediction_horizon'] - soc_init = input_data_dict['params']['passed_data']['soc_init'] - soc_final = input_data_dict['params']['passed_data']['soc_final'] - def_total_hours = input_data_dict['params']['passed_data']['def_total_hours'] - def_start_timestep = input_data_dict['params']['passed_data']['def_start_timestep'] - def_end_timestep = input_data_dict['params']['passed_data']['def_end_timestep'] - opt_res_naive_mpc = input_data_dict['opt'].perform_naive_mpc_optim( - df_input_data_dayahead, input_data_dict['P_PV_forecast'], input_data_dict['P_load_forecast'], - prediction_horizon, soc_init, soc_final, def_total_hours, def_start_timestep, def_end_timestep) + prediction_horizon = input_data_dict["params"]["passed_data"]["prediction_horizon"] + soc_init = input_data_dict["params"]["passed_data"]["soc_init"] + soc_final = input_data_dict["params"]["passed_data"]["soc_final"] + def_total_hours = input_data_dict["params"]["passed_data"]["def_total_hours"] + def_start_timestep = input_data_dict["params"]["passed_data"]["def_start_timestep"] + def_end_timestep = input_data_dict["params"]["passed_data"]["def_end_timestep"] + opt_res_naive_mpc = input_data_dict["opt"].perform_naive_mpc_optim( + df_input_data_dayahead, + input_data_dict["P_PV_forecast"], + input_data_dict["P_load_forecast"], + prediction_horizon, + soc_init, + soc_final, + def_total_hours, + def_start_timestep, + def_end_timestep, + ) # Save CSV file for publish_data if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_naive_mpc_'+today.strftime("%Y_%m_%d")+'.csv' - else: # Just save the latest optimization results - filename = 'opt_res_latest.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_naive_mpc_" + today.strftime("%Y_%m_%d") + ".csv" + else: # Just save the latest optimization results + filename = "opt_res_latest.csv" if not debug: opt_res_naive_mpc.to_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_label='timestamp') return opt_res_naive_mpc -def forecast_model_fit(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]: + +def forecast_model_fit( + input_data_dict: dict, logger: logging.Logger, debug: Optional[bool] = False +) -> Tuple[pd.DataFrame, pd.DataFrame, MLForecaster]: """Perform a forecast model fit from training data retrieved from Home Assistant. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -341,8 +461,9 @@ def forecast_model_fit(input_data_dict: dict, logger: logging.Logger, # The ML forecaster object mlf = MLForecaster(data, model_type, var_model, sklearn_model, num_lags, input_data_dict['emhass_conf'], logger) # Fit the ML model - df_pred, df_pred_backtest = mlf.fit(split_date_delta=split_date_delta, - perform_backtest=perform_backtest) + df_pred, df_pred_backtest = mlf.fit( + split_date_delta=split_date_delta, perform_backtest=perform_backtest + ) # Save model if not debug: filename = model_type+'_mlf.pkl' @@ -351,9 +472,14 @@ def forecast_model_fit(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred, df_pred_backtest, mlf -def forecast_model_predict(input_data_dict: dict, logger: logging.Logger, - use_last_window: Optional[bool] = True, debug: Optional[bool] = False, - mlf: Optional[MLForecaster] = None) -> pd.DataFrame: + +def forecast_model_predict( + input_data_dict: dict, + logger: logging.Logger, + use_last_window: Optional[bool] = True, + debug: Optional[bool] = False, + mlf: Optional[MLForecaster] = None, +) -> pd.DataFrame: r"""Perform a forecast model predict using a previously trained skforecast model. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -380,46 +506,73 @@ def forecast_model_predict(input_data_dict: dict, logger: logging.Logger, filename_path = input_data_dict['emhass_conf']['data_path'] / filename if not debug: if filename_path.is_file(): - with open(filename_path, 'rb') as inp: + with open(filename_path, "rb") as inp: mlf = pickle.load(inp) else: - logger.error("The ML forecaster file was not found, please run a model fit method before this predict method") + logger.error( + "The ML forecaster file was not found, please run a model fit method before this predict method", + ) return # Make predictions if use_last_window: - data_last_window = copy.deepcopy(input_data_dict['df_input_data']) + data_last_window = copy.deepcopy(input_data_dict["df_input_data"]) else: data_last_window = None predictions = mlf.predict(data_last_window) # Publish data to a Home Assistant sensor - model_predict_publish = input_data_dict['params']['passed_data']['model_predict_publish'] - model_predict_entity_id = input_data_dict['params']['passed_data']['model_predict_entity_id'] - model_predict_unit_of_measurement = input_data_dict['params']['passed_data']['model_predict_unit_of_measurement'] - model_predict_friendly_name = input_data_dict['params']['passed_data']['model_predict_friendly_name'] - publish_prefix = input_data_dict['params']['passed_data']['publish_prefix'] + model_predict_publish = input_data_dict["params"]["passed_data"][ + "model_predict_publish" + ] + model_predict_entity_id = input_data_dict["params"]["passed_data"][ + "model_predict_entity_id" + ] + model_predict_unit_of_measurement = input_data_dict["params"]["passed_data"][ + "model_predict_unit_of_measurement" + ] + model_predict_friendly_name = input_data_dict["params"]["passed_data"][ + "model_predict_friendly_name" + ] + publish_prefix = input_data_dict["params"]["passed_data"]["publish_prefix"] if model_predict_publish is True: # Estimate the current index - now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0) - if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest': - idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first': - idx_closest = predictions.index.get_indexer([now_precise], method='ffill')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last': - idx_closest = predictions.index.get_indexer([now_precise], method='bfill')[0] + now_precise = datetime.now( + input_data_dict["retrieve_hass_conf"]["time_zone"] + ).replace(second=0, microsecond=0) + if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest": + idx_closest = predictions.index.get_indexer( + [now_precise], method="nearest" + )[0] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first": + idx_closest = predictions.index.get_indexer([now_precise], method="ffill")[ + 0 + ] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last": + idx_closest = predictions.index.get_indexer([now_precise], method="bfill")[ + 0 + ] if idx_closest == -1: - idx_closest = predictions.index.get_indexer([now_precise], method='nearest')[0] + idx_closest = predictions.index.get_indexer( + [now_precise], method="nearest" + )[0] # Publish Load forecast - input_data_dict['rh'].post_data(predictions, idx_closest, - model_predict_entity_id, - model_predict_unit_of_measurement, - model_predict_friendly_name, - type_var = 'mlforecaster', - publish_prefix=publish_prefix) + input_data_dict["rh"].post_data( + predictions, + idx_closest, + model_predict_entity_id, + model_predict_unit_of_measurement, + model_predict_friendly_name, + type_var="mlforecaster", + publish_prefix=publish_prefix, + ) return predictions -def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, - debug: Optional[bool] = False, mlf: Optional[MLForecaster] = None - ) -> Tuple[pd.DataFrame, MLForecaster]: + +def forecast_model_tune( + input_data_dict: dict, + logger: logging.Logger, + debug: Optional[bool] = False, + mlf: Optional[MLForecaster] = None, +) -> Tuple[pd.DataFrame, MLForecaster]: """Tune a forecast model hyperparameters using bayesian optimization. :param input_data_dict: A dictionnary with multiple data used by the action functions @@ -440,10 +593,12 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, filename_path = input_data_dict['emhass_conf']['data_path'] / filename if not debug: if filename_path.is_file(): - with open(filename_path, 'rb') as inp: + with open(filename_path, "rb") as inp: mlf = pickle.load(inp) else: - logger.error("The ML forecaster file was not found, please run a model fit method before this tune method") + logger.error( + "The ML forecaster file was not found, please run a model fit method before this tune method", + ) return None, None # Tune the model df_pred_optim = mlf.tune(debug=debug) @@ -455,12 +610,139 @@ def forecast_model_tune(input_data_dict: dict, logger: logging.Logger, pickle.dump(mlf, outp, pickle.HIGHEST_PROTOCOL) return df_pred_optim, mlf -def publish_data(input_data_dict: dict, logger: logging.Logger, - save_data_to_file: Optional[bool] = False, - opt_res_latest: Optional[pd.DataFrame] = None) -> pd.DataFrame: + +def regressor_model_fit( + input_data_dict: dict, + logger: logging.Logger, + debug: Optional[bool] = False, +) -> None: + """Perform a forecast model fit from training data retrieved from Home Assistant. + + :param input_data_dict: A dictionnary with multiple data used by the action functions + :type input_data_dict: dict + :param logger: The passed logger object + :type logger: logging.Logger + :param debug: True to debug, useful for unit testing, defaults to False + :type debug: Optional[bool], optional """ - Publish the data obtained from the optimization results. + data = copy.deepcopy(input_data_dict["df_input_data"]) + if "model_type" in input_data_dict["params"]["passed_data"]: + model_type = input_data_dict["params"]["passed_data"]["model_type"] + else: + logger.error("parameter: 'model_type' not passed") + return False + if "regression_model" in input_data_dict["params"]["passed_data"]: + regression_model = input_data_dict["params"]["passed_data"]["regression_model"] + else: + logger.error("parameter: 'regression_model' not passed") + return False + if "features" in input_data_dict["params"]["passed_data"]: + features = input_data_dict["params"]["passed_data"]["features"] + else: + logger.error("parameter: 'features' not passed") + return False + if "target" in input_data_dict["params"]["passed_data"]: + target = input_data_dict["params"]["passed_data"]["target"] + else: + logger.error("parameter: 'target' not passed") + return False + if "timestamp" in input_data_dict["params"]["passed_data"]: + timestamp = input_data_dict["params"]["passed_data"]["timestamp"] + else: + logger.error("parameter: 'timestamp' not passed") + return False + if "date_features" in input_data_dict["params"]["passed_data"]: + date_features = input_data_dict["params"]["passed_data"]["date_features"] + else: + logger.error("parameter: 'date_features' not passed") + return False + + # The MLRegressor object + mlr = MLRegressor( + data, + model_type, + regression_model, + features, + target, + timestamp, + logger, + ) + # Fit the ML model + mlr.fit(date_features=date_features) + # Save model + if not debug: + filename = model_type + "_mlr.pkl" + filename_path = input_data_dict["emhass_conf"]["data_path"] / filename + with open(filename_path, "wb") as outp: + pickle.dump(mlr, outp, pickle.HIGHEST_PROTOCOL) + return mlr + + +def regressor_model_predict( + input_data_dict: dict, + logger: logging.Logger, + debug: Optional[bool] = False, + mlr: Optional[MLRegressor] = None, +) -> None: + """Perform a prediction from csv file. + + :param input_data_dict: A dictionnary with multiple data used by the action functions + :type input_data_dict: dict + :param logger: The passed logger object + :type logger: logging.Logger + :param debug: True to debug, useful for unit testing, defaults to False + :type debug: Optional[bool], optional + """ + if "model_type" in input_data_dict["params"]["passed_data"]: + model_type = input_data_dict["params"]["passed_data"]["model_type"] + else: + logger.error("parameter: 'model_type' not passed") + return False + filename = model_type + "_mlr.pkl" + filename_path = input_data_dict["emhass_conf"]["data_path"] / filename + if not debug: + if filename_path.is_file(): + with open(filename_path, "rb") as inp: + mlr = pickle.load(inp) + else: + logger.error( + "The ML forecaster file was not found, please run a model fit method before this predict method", + ) + return False + if "new_values" in input_data_dict["params"]["passed_data"]: + new_values = input_data_dict["params"]["passed_data"]["new_values"] + else: + logger.error("parameter: 'new_values' not passed") + return False + # Predict from csv file + prediction = mlr.predict(new_values) + mlr_predict_entity_id = input_data_dict["params"]["passed_data"].get("mlr_predict_entity_id","sensor.mlr_predict") + mlr_predict_unit_of_measurement = input_data_dict["params"]["passed_data"].get("mlr_predict_unit_of_measurement","h") + mlr_predict_friendly_name = input_data_dict["params"]["passed_data"].get("mlr_predict_friendly_name","mlr predictor") + # Publish prediction + idx = 0 + if not debug: + input_data_dict["rh"].post_data( + prediction, + idx, + mlr_predict_entity_id, + mlr_predict_unit_of_measurement, + mlr_predict_friendly_name, + type_var="mlregressor", + ) + return prediction + + +def publish_data( + input_data_dict: dict, + logger: logging.Logger, + save_data_to_file: Optional[bool] = False, + opt_res_latest: Optional[pd.DataFrame] = None, +) -> pd.DataFrame: + """ + Publish the data obtained from the optimization results. + :param input_data_dict: A dictionnary with multiple data used by the action functions :type input_data_dict: dict :param logger: The passed logger object @@ -474,10 +756,12 @@ def publish_data(input_data_dict: dict, logger: logging.Logger, logger.info("Publishing data to HASS instance") # Check if a day ahead optimization has been performed (read CSV file) if save_data_to_file: - today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) - filename = 'opt_res_dayahead_'+today.strftime("%Y_%m_%d")+'.csv' + today = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + filename = "opt_res_dayahead_" + today.strftime("%Y_%m_%d") + ".csv" else: - filename = 'opt_res_latest.csv' + filename = "opt_res_latest.csv" if opt_res_latest is None: if not os.path.isfile(input_data_dict['emhass_conf']['data_path'] / filename): logger.error("File not found error, run an optimization task first.") @@ -485,144 +769,191 @@ def publish_data(input_data_dict: dict, logger: logging.Logger, else: opt_res_latest = pd.read_csv(input_data_dict['emhass_conf']['data_path'] / filename, index_col='timestamp') opt_res_latest.index = pd.to_datetime(opt_res_latest.index) - opt_res_latest.index.freq = input_data_dict['retrieve_hass_conf']['freq'] + opt_res_latest.index.freq = input_data_dict["retrieve_hass_conf"]["freq"] # Estimate the current index - now_precise = datetime.now(input_data_dict['retrieve_hass_conf']['time_zone']).replace(second=0, microsecond=0) - if input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'nearest': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'first': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='ffill')[0] - elif input_data_dict['retrieve_hass_conf']['method_ts_round'] == 'last': - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='bfill')[0] + now_precise = datetime.now( + input_data_dict["retrieve_hass_conf"]["time_zone"] + ).replace(second=0, microsecond=0) + if input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "nearest": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[ + 0 + ] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "first": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="ffill")[0] + elif input_data_dict["retrieve_hass_conf"]["method_ts_round"] == "last": + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="bfill")[0] if idx_closest == -1: - idx_closest = opt_res_latest.index.get_indexer([now_precise], method='nearest')[0] + idx_closest = opt_res_latest.index.get_indexer([now_precise], method="nearest")[ + 0 + ] # Publish the data - params = json.loads(input_data_dict['params']) - publish_prefix = params['passed_data']['publish_prefix'] + params = json.loads(input_data_dict["params"]) + publish_prefix = params["passed_data"]["publish_prefix"] # Publish PV forecast - custom_pv_forecast_id = params['passed_data']['custom_pv_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_PV'], idx_closest, - custom_pv_forecast_id["entity_id"], - custom_pv_forecast_id["unit_of_measurement"], - custom_pv_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) + custom_pv_forecast_id = params["passed_data"]["custom_pv_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_PV"], + idx_closest, + custom_pv_forecast_id["entity_id"], + custom_pv_forecast_id["unit_of_measurement"], + custom_pv_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) # Publish Load forecast - custom_load_forecast_id = params['passed_data']['custom_load_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_Load'], idx_closest, - custom_load_forecast_id["entity_id"], - custom_load_forecast_id["unit_of_measurement"], - custom_load_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) - cols_published = ['P_PV', 'P_Load'] + custom_load_forecast_id = params["passed_data"]["custom_load_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_Load"], + idx_closest, + custom_load_forecast_id["entity_id"], + custom_load_forecast_id["unit_of_measurement"], + custom_load_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) + cols_published = ["P_PV", "P_Load"] # Publish deferrable loads - custom_deferrable_forecast_id = params['passed_data']['custom_deferrable_forecast_id'] - for k in range(input_data_dict['opt'].optim_conf['num_def_loads']): + custom_deferrable_forecast_id = params["passed_data"][ + "custom_deferrable_forecast_id" + ] + for k in range(input_data_dict["opt"].optim_conf["num_def_loads"]): if "P_deferrable{}".format(k) not in opt_res_latest.columns: - logger.error("P_deferrable{}".format(k)+" was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.") + logger.error( + "P_deferrable{}".format(k) + + " was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.", + ) else: - input_data_dict['rh'].post_data(opt_res_latest["P_deferrable{}".format(k)], idx_closest, - custom_deferrable_forecast_id[k]["entity_id"], - custom_deferrable_forecast_id[k]["unit_of_measurement"], - custom_deferrable_forecast_id[k]["friendly_name"], - type_var = 'deferrable', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_deferrable{}".format(k)] + input_data_dict["rh"].post_data( + opt_res_latest["P_deferrable{}".format(k)], + idx_closest, + custom_deferrable_forecast_id[k]["entity_id"], + custom_deferrable_forecast_id[k]["unit_of_measurement"], + custom_deferrable_forecast_id[k]["friendly_name"], + type_var="deferrable", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_deferrable{}".format(k)] # Publish battery power - if input_data_dict['opt'].optim_conf['set_use_battery']: - if 'P_batt' not in opt_res_latest.columns: - logger.error("P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.") + if input_data_dict["opt"].optim_conf["set_use_battery"]: + if "P_batt" not in opt_res_latest.columns: + logger.error( + "P_batt was not found in results DataFrame. Optimization task may need to be relaunched or it did not converge to a solution.", + ) else: - custom_batt_forecast_id = params['passed_data']['custom_batt_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_batt'], idx_closest, - custom_batt_forecast_id["entity_id"], - custom_batt_forecast_id["unit_of_measurement"], - custom_batt_forecast_id["friendly_name"], - type_var = 'batt', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_batt"] - custom_batt_soc_forecast_id = params['passed_data']['custom_batt_soc_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['SOC_opt']*100, idx_closest, - custom_batt_soc_forecast_id["entity_id"], - custom_batt_soc_forecast_id["unit_of_measurement"], - custom_batt_soc_forecast_id["friendly_name"], - type_var = 'SOC', - publish_prefix = publish_prefix) - cols_published = cols_published+["SOC_opt"] + custom_batt_forecast_id = params["passed_data"]["custom_batt_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_batt"], + idx_closest, + custom_batt_forecast_id["entity_id"], + custom_batt_forecast_id["unit_of_measurement"], + custom_batt_forecast_id["friendly_name"], + type_var="batt", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_batt"] + custom_batt_soc_forecast_id = params["passed_data"][ + "custom_batt_soc_forecast_id" + ] + input_data_dict["rh"].post_data( + opt_res_latest["SOC_opt"] * 100, + idx_closest, + custom_batt_soc_forecast_id["entity_id"], + custom_batt_soc_forecast_id["unit_of_measurement"], + custom_batt_soc_forecast_id["friendly_name"], + type_var="SOC", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["SOC_opt"] # Publish grid power - custom_grid_forecast_id = params['passed_data']['custom_grid_forecast_id'] - input_data_dict['rh'].post_data(opt_res_latest['P_grid'], idx_closest, - custom_grid_forecast_id["entity_id"], - custom_grid_forecast_id["unit_of_measurement"], - custom_grid_forecast_id["friendly_name"], - type_var = 'power', - publish_prefix = publish_prefix) - cols_published = cols_published+["P_grid"] + custom_grid_forecast_id = params["passed_data"]["custom_grid_forecast_id"] + input_data_dict["rh"].post_data( + opt_res_latest["P_grid"], + idx_closest, + custom_grid_forecast_id["entity_id"], + custom_grid_forecast_id["unit_of_measurement"], + custom_grid_forecast_id["friendly_name"], + type_var="power", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["P_grid"] # Publish total value of cost function - custom_cost_fun_id = params['passed_data']['custom_cost_fun_id'] - col_cost_fun = [i for i in opt_res_latest.columns if 'cost_fun_' in i] - input_data_dict['rh'].post_data(opt_res_latest[col_cost_fun], idx_closest, - custom_cost_fun_id["entity_id"], - custom_cost_fun_id["unit_of_measurement"], - custom_cost_fun_id["friendly_name"], - type_var = 'cost_fun', - publish_prefix = publish_prefix) + custom_cost_fun_id = params["passed_data"]["custom_cost_fun_id"] + col_cost_fun = [i for i in opt_res_latest.columns if "cost_fun_" in i] + input_data_dict["rh"].post_data( + opt_res_latest[col_cost_fun], + idx_closest, + custom_cost_fun_id["entity_id"], + custom_cost_fun_id["unit_of_measurement"], + custom_cost_fun_id["friendly_name"], + type_var="cost_fun", + publish_prefix=publish_prefix, + ) # Publish the optimization status - custom_cost_fun_id = params['passed_data']['custom_optim_status_id'] + custom_cost_fun_id = params["passed_data"]["custom_optim_status_id"] if "optim_status" not in opt_res_latest: - opt_res_latest["optim_status"] = 'Optimal' - logger.warning("no optim_status in opt_res_latest, run an optimization task first") - input_data_dict['rh'].post_data(opt_res_latest['optim_status'], idx_closest, - custom_cost_fun_id["entity_id"], - custom_cost_fun_id["unit_of_measurement"], - custom_cost_fun_id["friendly_name"], - type_var = 'optim_status', - publish_prefix = publish_prefix) - cols_published = cols_published+["optim_status"] + opt_res_latest["optim_status"] = "Optimal" + logger.warning( + "no optim_status in opt_res_latest, run an optimization task first", + ) + input_data_dict["rh"].post_data( + opt_res_latest["optim_status"], + idx_closest, + custom_cost_fun_id["entity_id"], + custom_cost_fun_id["unit_of_measurement"], + custom_cost_fun_id["friendly_name"], + type_var="optim_status", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["optim_status"] # Publish unit_load_cost - custom_unit_load_cost_id = params['passed_data']['custom_unit_load_cost_id'] - input_data_dict['rh'].post_data(opt_res_latest['unit_load_cost'], idx_closest, - custom_unit_load_cost_id["entity_id"], - custom_unit_load_cost_id["unit_of_measurement"], - custom_unit_load_cost_id["friendly_name"], - type_var = 'unit_load_cost', - publish_prefix = publish_prefix) - cols_published = cols_published+["unit_load_cost"] + custom_unit_load_cost_id = params["passed_data"]["custom_unit_load_cost_id"] + input_data_dict["rh"].post_data( + opt_res_latest["unit_load_cost"], + idx_closest, + custom_unit_load_cost_id["entity_id"], + custom_unit_load_cost_id["unit_of_measurement"], + custom_unit_load_cost_id["friendly_name"], + type_var="unit_load_cost", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["unit_load_cost"] # Publish unit_prod_price - custom_unit_prod_price_id = params['passed_data']['custom_unit_prod_price_id'] - input_data_dict['rh'].post_data(opt_res_latest['unit_prod_price'], idx_closest, - custom_unit_prod_price_id["entity_id"], - custom_unit_prod_price_id["unit_of_measurement"], - custom_unit_prod_price_id["friendly_name"], - type_var = 'unit_prod_price', - publish_prefix = publish_prefix) - cols_published = cols_published+["unit_prod_price"] + custom_unit_prod_price_id = params["passed_data"]["custom_unit_prod_price_id"] + input_data_dict["rh"].post_data( + opt_res_latest["unit_prod_price"], + idx_closest, + custom_unit_prod_price_id["entity_id"], + custom_unit_prod_price_id["unit_of_measurement"], + custom_unit_prod_price_id["friendly_name"], + type_var="unit_prod_price", + publish_prefix=publish_prefix, + ) + cols_published = cols_published + ["unit_prod_price"] # Create a DF resuming what has been published opt_res = opt_res_latest[cols_published].loc[[opt_res_latest.index[idx_closest]]] return opt_res - - + + def main(): r"""Define the main command line entry function. This function may take several arguments as inputs. You can type `emhass --help` to see the list of options: - + - action: Set the desired action, options are: perfect-optim, dayahead-optim, naive-mpc-optim, publish-data, forecast-model-fit, forecast-model-predict, forecast-model-tune - + - config: Define path to the config.yaml file - + - costfun: Define the type of cost function, options are: profit, cost, self-consumption - + - log2file: Define if we should log to a file or not - + - params: Configuration parameters passed from data/options.json if using the add-on - + - runtimeparams: Pass runtime optimization parameters as dictionnary - + - debug: Use True for testing purposes - + """ # Parsing arguments parser = argparse.ArgumentParser() @@ -683,39 +1014,66 @@ def main(): # Additionnal argument try: - parser.add_argument('--version', action='version', version='%(prog)s '+version('emhass')) + parser.add_argument( + "--version", + action="version", + version="%(prog)s " + version("emhass"), + ) args = parser.parse_args() except Exception: - logger.info("Version not found for emhass package. Or importlib exited with PackageNotFoundError.") + logger.info( + "Version not found for emhass package. Or importlib exited with PackageNotFoundError.", + ) # Setup parameters input_data_dict = set_input_data_dict(emhass_conf, args.costfun, args.params, args.runtimeparams, args.action, logger, args.debug) # Perform selected action - if args.action == 'perfect-optim': + if args.action == "perfect-optim": opt_res = perfect_forecast_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'dayahead-optim': + elif args.action == "dayahead-optim": opt_res = dayahead_forecast_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'naive-mpc-optim': + elif args.action == "naive-mpc-optim": opt_res = naive_mpc_optim(input_data_dict, logger, debug=args.debug) - elif args.action == 'forecast-model-fit': - df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) + elif args.action == "forecast-model-fit": + df_fit_pred, df_fit_pred_backtest, mlf = forecast_model_fit( + input_data_dict, logger, debug=args.debug + ) opt_res = None - elif args.action == 'forecast-model-predict': + elif args.action == "forecast-model-predict": if args.debug: _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) else: mlf = None - df_pred = forecast_model_predict(input_data_dict, logger, debug=args.debug, mlf=mlf) + df_pred = forecast_model_predict( + input_data_dict, logger, debug=args.debug, mlf=mlf + ) opt_res = None - elif args.action == 'forecast-model-tune': + elif args.action == "forecast-model-tune": if args.debug: _, _, mlf = forecast_model_fit(input_data_dict, logger, debug=args.debug) else: mlf = None - df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=args.debug, mlf=mlf) + df_pred_optim, mlf = forecast_model_tune( + input_data_dict, logger, debug=args.debug, mlf=mlf + ) + opt_res = None + elif args.action == "regressor-model-fit": + mlr = regressor_model_fit(input_data_dict, logger, debug=args.debug) + opt_res = None + elif args.action == "regressor-model-predict": + if args.debug: + mlr = regressor_model_fit(input_data_dict, logger, debug=args.debug) + else: + mlr = None + prediction = regressor_model_predict( + input_data_dict, + logger, + debug=args.debug, + mlr=mlr, + ) opt_res = None - elif args.action == 'publish-data': + elif args.action == "publish-data": opt_res = publish_data(input_data_dict, logger) else: logger.error("The passed action argument is not valid") @@ -725,17 +1083,26 @@ def main(): # Flush the logger ch.close() logger.removeHandler(ch) - if args.action == 'perfect-optim' or args.action == 'dayahead-optim' or \ - args.action == 'naive-mpc-optim' or args.action == 'publish-data': + if ( + args.action == "perfect-optim" + or args.action == "dayahead-optim" + or args.action == "naive-mpc-optim" + or args.action == "publish-data" + ): return opt_res - elif args.action == 'forecast-model-fit': + elif args.action == "forecast-model-fit": return df_fit_pred, df_fit_pred_backtest, mlf - elif args.action == 'forecast-model-predict': + elif args.action == "forecast-model-predict": return df_pred - elif args.action == 'forecast-model-tune': + elif args.action == "regressor-model-fit": + return mlr + elif args.action == "regressor-model-predict": + return prediction + elif args.action == "forecast-model-tune": return df_pred_optim, mlf else: return opt_res -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/src/emhass/machine_learning_regressor.py b/src/emhass/machine_learning_regressor.py new file mode 100644 index 00000000..f0d3c532 --- /dev/null +++ b/src/emhass/machine_learning_regressor.py @@ -0,0 +1,290 @@ +"""Machine learning regressor module.""" + +from __future__ import annotations + +import copy +import time +import warnings +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd +from sklearn.ensemble import ( + AdaBoostRegressor, + GradientBoostingRegressor, + RandomForestRegressor, +) +from sklearn.linear_model import Lasso, LinearRegression, Ridge +from sklearn.metrics import r2_score +from sklearn.model_selection import GridSearchCV, train_test_split +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler + +if TYPE_CHECKING: + import logging + +warnings.filterwarnings("ignore", category=DeprecationWarning) + +REGRESSION_METHODS = { + "LinearRegression": { + "model": LinearRegression(), + "param_grid": { + "linearregression__fit_intercept": [True, False], + "linearregression__positive": [True, False], + }, + }, + "RidgeRegression": { + "model": Ridge(), + "param_grid": {"ridge__alpha": [0.1, 1.0, 10.0]}, + }, + "LassoRegression": { + "model": Lasso(), + "param_grid": {"lasso__alpha": [0.1, 1.0, 10.0]}, + }, + "RandomForestRegression": { + "model": RandomForestRegressor(), + "param_grid": {"randomforestregressor__n_estimators": [50, 100, 200]}, + }, + "GradientBoostingRegression": { + "model": GradientBoostingRegressor(), + "param_grid": { + "gradientboostingregressor__n_estimators": [50, 100, 200], + "gradientboostingregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, + "AdaBoostRegression": { + "model": AdaBoostRegressor(), + "param_grid": { + "adaboostregressor__n_estimators": [50, 100, 200], + "adaboostregressor__learning_rate": [0.01, 0.1, 0.2], + }, + }, +} + + +class MLRegressor: + r"""A forecaster class using machine learning models. + + This class uses the `sklearn` module and the machine learning models are \ + from `scikit-learn`. + + It exposes two main methods: + + - `fit`: to train a model with the passed data. + + - `predict`: to obtain a forecast from a pre-trained model. + + """ + + def __init__( # noqa: PLR0913 + self: MLRegressor, + data: pd.DataFrame, + model_type: str, + regression_model: str, + features: list, + target: str, + timestamp: str, + logger: logging.Logger, + ) -> None: + r"""Define constructor for the forecast class. + + :param data: The data that will be used for train/test + :type data: pd.DataFrame + :param model_type: A unique name defining this model and useful to identify \ + for what it will be used for. + :type model_type: str + :param regression_model: The model that will be used. For now only \ + this options are possible: `LinearRegression`, `RidgeRegression`, \ + `LassoRegression`, `RandomForestRegression`, \ + `GradientBoostingRegression` and `AdaBoostRegression`. + :type regression_model: str + :param features: A list of features. \ + Example: [`solar_production`, `degree_days`]. + :type features: list + :param target: The target(to be predicted). \ + Example: `heating_hours`. + :type target: str + :param timestamp: If defined, the column key that has to be used of timestamp. + :type timestamp: str + :param logger: The passed logger object + :type logger: logging.Logger + """ + self.data = data + self.features = features + self.target = target + self.timestamp = timestamp + self.model_type = model_type + self.regression_model = regression_model + self.logger = logger + self.data = self.data.sort_index() + self.data = self.data[~self.data.index.duplicated(keep="first")] + self.data_exo = None + self.steps = None + self.model = None + self.grid_search = None + + @staticmethod + def add_date_features( + data: pd.DataFrame, + date_features: list, + timestamp: str, + ) -> pd.DataFrame: + """Add date features from the input DataFrame timestamp. + + :param data: The input DataFrame + :type data: pd.DataFrame + :param timestamp: The column containing the timestamp + :type timestamp: str + :return: The DataFrame with the added features + :rtype: pd.DataFrame + """ + df = copy.deepcopy(data) # noqa: PD901 + df[timestamp] = pd.to_datetime(df["timestamp"]) + if "year" in date_features: + df["year"] = [i.year for i in df["timestamp"]] + if "month" in date_features: + df["month"] = [i.month for i in df["timestamp"]] + if "day_of_week" in date_features: + df["day_of_week"] = [i.dayofweek for i in df["timestamp"]] + if "day_of_year" in date_features: + df["day_of_year"] = [i.dayofyear for i in df["timestamp"]] + if "day" in date_features: + df["day"] = [i.day for i in df["timestamp"]] + if "hour" in date_features: + df["hour"] = [i.day for i in df["timestamp"]] + + return df + + def get_regression_model(self: MLRegressor) -> tuple[str, str]: + """Get the base model and parameter grid for the specified regression model. + + Returns a tuple containing the base model and parameter grid corresponding to \ + the specified regression model. + + Args: + ---- + self: The instance of the MLRegressor class. + + Returns: + ------- + A tuple containing the base model and parameter grid. + + """ + if self.regression_model == "LinearRegression": + base_model = REGRESSION_METHODS["LinearRegression"]["model"] + param_grid = REGRESSION_METHODS["LinearRegression"]["param_grid"] + elif self.regression_model == "RidgeRegression": + base_model = REGRESSION_METHODS["RidgeRegression"]["model"] + param_grid = REGRESSION_METHODS["RidgeRegression"]["param_grid"] + elif self.regression_model == "LassoRegression": + base_model = REGRESSION_METHODS["LassoRegression"]["model"] + param_grid = REGRESSION_METHODS["LassoRegression"]["param_grid"] + elif self.regression_model == "RandomForestRegression": + base_model = REGRESSION_METHODS["RandomForestRegression"]["model"] + param_grid = REGRESSION_METHODS["RandomForestRegression"]["param_grid"] + elif self.regression_model == "GradientBoostingRegression": + base_model = REGRESSION_METHODS["GradientBoostingRegression"]["model"] + param_grid = REGRESSION_METHODS["GradientBoostingRegression"]["param_grid"] + elif self.regression_model == "AdaBoostRegression": + base_model = REGRESSION_METHODS["AdaBoostRegression"]["model"] + param_grid = REGRESSION_METHODS["AdaBoostRegression"]["param_grid"] + else: + self.logger.error( + "Passed model %s is not valid", + self.regression_model, + ) + return None + return base_model, param_grid + + def fit(self: MLRegressor, date_features: list | None = None) -> None: + """Fit the model using the provided data. + + :param date_features: A list of 'date_features' to take into account when \ + fitting the model. + :type data: list + """ + self.logger.info("Performing a MLRegressor fit for %s", self.model_type) + self.data_exo = pd.DataFrame(self.data) + self.data_exo[self.features] = self.data[self.features] + self.data_exo[self.target] = self.data[self.target] + keep_columns = [] + keep_columns.extend(self.features) + if self.timestamp is not None: + keep_columns.append(self.timestamp) + keep_columns.append(self.target) + self.data_exo = self.data_exo[self.data_exo.columns.intersection(keep_columns)] + self.data_exo = self.data_exo.reset_index(drop=True) + if date_features is not None: + if self.timestamp is not None: + self.data_exo = MLRegressor.add_date_features( + self.data_exo, + date_features, + self.timestamp, + ) + else: + self.logger.error( + "If no timestamp provided, you can't use date_features, going \ + further without date_features.", + ) + + y = self.data_exo[self.target] + self.data_exo = self.data_exo.drop(self.target, axis=1) + if self.timestamp is not None: + self.data_exo = self.data_exo.drop(self.timestamp, axis=1) + X = self.data_exo # noqa: N806 + + X_train, X_test, y_train, y_test = train_test_split( # noqa: N806 + X, + y, + test_size=0.2, + random_state=42, + ) + + self.steps = len(X_test) + + base_model, param_grid = self.get_regression_model() + + self.model = make_pipeline(StandardScaler(), base_model) + + # Create a grid search object + self.grid_search = GridSearchCV( + self.model, + param_grid, + cv=5, + scoring="neg_mean_squared_error", + refit=True, + verbose=0, + n_jobs=-1, + ) + + # Fit the grid search object to the data + self.logger.info("Training a %s model", self.regression_model) + start_time = time.time() + self.grid_search.fit(X_train.values, y_train.values) + self.logger.info("Elapsed time for model fit: %s", time.time() - start_time) + + self.model = self.grid_search.best_estimator_ + + # Make predictions + predictions = self.model.predict(X_test.values) + predictions = pd.Series(predictions, index=X_test.index) + pred_metric = r2_score(y_test, predictions) + self.logger.info( + "Prediction R2 score of fitted model on test data: %s", + pred_metric, + ) + + def predict(self: MLRegressor, new_values: list) -> np.ndarray: + """Predict a new value. + + :param new_values: The new values for the features \ + (in the same order as the features list). \ + Example: [2.24, 5.68]. + :type new_values: list + :return: The np.ndarray containing the predicted value. + :rtype: np.ndarray + """ + self.logger.info("Performing a prediction for %s", self.model_type) + new_values = np.array([new_values]) + + return self.model.predict(new_values) diff --git a/src/emhass/retrieve_hass.py b/src/emhass/retrieve_hass.py index f5fa82de..4bac582f 100644 --- a/src/emhass/retrieve_hass.py +++ b/src/emhass/retrieve_hass.py @@ -35,7 +35,7 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, get_data_from_file: Optional[bool] = False) -> None: """ Define constructor for RetrieveHass class. - + :param hass_url: The URL of the Home Assistant instance :type hass_url: str :param long_lived_token: The long lived token retrieved from the configuration pane @@ -50,7 +50,7 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, :type emhass_conf: dict :param logger: The passed logger object :type logger: logging object - :param get_data_from_file: Select if data should be retrieved from a + :param get_data_from_file: Select if data should be retrieved from a previously saved pickle useful for testing or directly from connection to hass database :type get_data_from_file: bool, optional @@ -65,9 +65,14 @@ def __init__(self, hass_url: str, long_lived_token: str, freq: pd.Timedelta, self.logger = logger self.get_data_from_file = get_data_from_file - def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: Optional[bool] = False, - significant_changes_only: Optional[bool] = False, - test_url: Optional[str] = 'empty') -> None: + def get_data( + self, + days_list: pd.date_range, + var_list: list, + minimal_response: Optional[bool] = False, + significant_changes_only: Optional[bool] = False, + test_url: Optional[str] = "empty", + ) -> None: r""" Retrieve the actual data from hass. @@ -92,20 +97,36 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O """ self.logger.info("Retrieve hass get data method initiated...") self.df_final = pd.DataFrame() - x = 0 #iterate based on days + x = 0 # iterate based on days # Looping on each day from days list for day in days_list: - + for i, var in enumerate(var_list): - - if test_url == 'empty': - if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API - url = self.hass_url+"/history/period/"+day.isoformat()+"?filter_entity_id="+var - else: # Otherwise the Home Assistant Core API it is - url = self.hass_url+"api/history/period/"+day.isoformat()+"?filter_entity_id="+var - if minimal_response: # A support for minimal response + + if test_url == "empty": + if ( + self.hass_url == "http://supervisor/core/api" + ): # If we are using the supervisor API + url = ( + self.hass_url + + "/history/period/" + + day.isoformat() + + "?filter_entity_id=" + + var + ) + else: # Otherwise the Home Assistant Core API it is + url = ( + self.hass_url + + "api/history/period/" + + day.isoformat() + + "?filter_entity_id=" + + var + ) + if minimal_response: # A support for minimal response url = url + "?minimal_response" - if significant_changes_only: # And for signicant changes only (check the HASS restful API for more info) + if ( + significant_changes_only + ): # And for signicant changes only (check the HASS restful API for more info) url = url + "?significant_changes_only" else: url = test_url @@ -116,21 +137,29 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O try: response = get(url, headers=headers) except Exception: - self.logger.error("Unable to access Home Assistance instance, check URL") - self.logger.error("If using addon, try setting url and token to 'empty'") + self.logger.error( + "Unable to access Home Assistance instance, check URL" + ) + self.logger.error( + "If using addon, try setting url and token to 'empty'" + ) return False else: if response.status_code == 401: - self.logger.error("Unable to access Home Assistance instance, TOKEN/KEY") - self.logger.error("If using addon, try setting url and token to 'empty'") + self.logger.error( + "Unable to access Home Assistance instance, TOKEN/KEY" + ) + self.logger.error( + "If using addon, try setting url and token to 'empty'" + ) return False if response.status_code > 299: return f"Request Get Error: {response.status_code}" - '''import bz2 # Uncomment to save a serialized data for tests + """import bz2 # Uncomment to save a serialized data for tests import _pickle as cPickle with bz2.BZ2File("data/test_response_get_data_get_method.pbz2", "w") as f: - cPickle.dump(response, f)''' - try: # Sometimes when there are connection problems we need to catch empty retrieved json + cPickle.dump(response, f)""" + try: # Sometimes when there are connection problems we need to catch empty retrieved json data = response.json()[0] except IndexError: if x == 0: @@ -142,7 +171,11 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O # self.logger.info(str(df_raw)) if len(df_raw) == 0: if x == 0: - self.logger.error("The retrieved Dataframe is empty, A sensor:" + var + " may have 0 days of history or passed sensor may not be correct") + self.logger.error( + "The retrieved Dataframe is empty, A sensor:" + + var + + " may have 0 days of history or passed sensor may not be correct" + ) else: self.logger.error("Retrieved empty Dataframe for day:"+ str(day) +", days_to_retrieve may be larger than the recorded history of sensor:" + var + " (check your recorder settings)") return False @@ -156,10 +189,17 @@ def get_data(self, days_list: pd.date_range, var_list: list, minimal_response: O format='%Y-%d-%m %H:%M').round(self.freq, ambiguous='infer', nonexistent='shift_forward') df_day = pd.DataFrame(index = ts) # Caution with undefined string data: unknown, unavailable, etc. - df_tp = df_raw.copy()[['state']].replace( - ['unknown', 'unavailable', ''], np.nan).astype(float).rename(columns={'state': var}) + df_tp = ( + df_raw.copy()[["state"]] + .replace(["unknown", "unavailable", ""], np.nan) + .astype(float) + .rename(columns={"state": var}) + ) # Setting index, resampling and concatenation - df_tp.set_index(pd.to_datetime(df_raw['last_changed'], format="ISO8601"), inplace=True) + df_tp.set_index( + pd.to_datetime(df_raw["last_changed"], format="ISO8601"), + inplace=True, + ) df_tp = df_tp.resample(self.freq).mean() df_day = pd.concat([df_day, df_tp], axis=1) self.df_final = pd.concat([self.df_final, df_day], axis=0) @@ -196,18 +236,24 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set """ try: - if load_negative: # Apply the correct sign to load power - self.df_final[var_load+'_positive'] = -self.df_final[var_load] + if load_negative: # Apply the correct sign to load power + self.df_final[var_load + "_positive"] = -self.df_final[var_load] else: - self.df_final[var_load+'_positive'] = self.df_final[var_load] + self.df_final[var_load + "_positive"] = self.df_final[var_load] self.df_final.drop([var_load], inplace=True, axis=1) except KeyError: - self.logger.error("Variable "+var_load+" was not found. This is typically because no data could be retrieved from Home Assistant") + self.logger.error( + "Variable " + + var_load + + " was not found. This is typically because no data could be retrieved from Home Assistant" + ) return False except ValueError: - self.logger.error("sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same") - return False - if set_zero_min: # Apply minimum values + self.logger.error( + "sensor.power_photovoltaics and sensor.power_load_no_var_loads should not be the same" + ) + return False + if set_zero_min: # Apply minimum values self.df_final.clip(lower=0.0, inplace=True, axis=1) self.df_final.replace(to_replace=0.0, value=np.nan, inplace=True) new_var_replace_zero = [] @@ -215,59 +261,74 @@ def prepare_data(self, var_load: str, load_negative: Optional[bool] = False, set # Just changing the names of variables to contain the fact that they are considered positive if var_replace_zero is not None: for string in var_replace_zero: - new_string = string.replace(var_load, var_load+'_positive') + new_string = string.replace(var_load, var_load + "_positive") new_var_replace_zero.append(new_string) else: new_var_replace_zero = None if var_interp is not None: for string in var_interp: - new_string = string.replace(var_load, var_load+'_positive') + new_string = string.replace(var_load, var_load + "_positive") new_var_interp.append(new_string) else: new_var_interp = None # Treating NaN replacement: either by zeros or by linear interpolation if new_var_replace_zero is not None: - self.df_final[new_var_replace_zero] = self.df_final[new_var_replace_zero].fillna(0.0) + self.df_final[new_var_replace_zero] = self.df_final[ + new_var_replace_zero + ].fillna(0.0) if new_var_interp is not None: self.df_final[new_var_interp] = self.df_final[new_var_interp].interpolate( - method='linear', axis=0, limit=None) + method="linear", axis=0, limit=None + ) self.df_final[new_var_interp] = self.df_final[new_var_interp].fillna(0.0) # Setting the correct time zone on DF index if self.time_zone is not None: self.df_final.index = self.df_final.index.tz_convert(self.time_zone) # Drop datetimeindex duplicates on final DF - self.df_final = self.df_final[~self.df_final.index.duplicated(keep='first')] + self.df_final = self.df_final[~self.df_final.index.duplicated(keep="first")] return True - + @staticmethod - def get_attr_data_dict(data_df: pd.DataFrame, idx: int, entity_id: str, - unit_of_measurement: str, friendly_name: str, - list_name: str, state: float) -> dict: - list_df = copy.deepcopy(data_df).loc[data_df.index[idx]:].reset_index() - list_df.columns = ['timestamps', entity_id] - ts_list = [str(i) for i in list_df['timestamps'].tolist()] - vals_list = [str(np.round(i,2)) for i in list_df[entity_id].tolist()] + def get_attr_data_dict( + data_df: pd.DataFrame, + idx: int, + entity_id: str, + unit_of_measurement: str, + friendly_name: str, + list_name: str, + state: float, + ) -> dict: + list_df = copy.deepcopy(data_df).loc[data_df.index[idx] :].reset_index() + list_df.columns = ["timestamps", entity_id] + ts_list = [str(i) for i in list_df["timestamps"].tolist()] + vals_list = [str(np.round(i, 2)) for i in list_df[entity_id].tolist()] forecast_list = [] for i, ts in enumerate(ts_list): datum = {} datum["date"] = ts - datum[entity_id.split('sensor.')[1]] = vals_list[i] + datum[entity_id.split("sensor.")[1]] = vals_list[i] forecast_list.append(datum) data = { "state": "{:.2f}".format(state), "attributes": { "unit_of_measurement": unit_of_measurement, "friendly_name": friendly_name, - list_name: forecast_list - } + list_name: forecast_list, + }, } return data - - def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, - unit_of_measurement: str, friendly_name: str, - type_var: str, - from_mlforecaster: Optional[bool]=False, - publish_prefix: Optional[str]="") -> None: + + def post_data( + self, + data_df: pd.DataFrame, + idx: int, + entity_id: str, + unit_of_measurement: str, + friendly_name: str, + type_var: str, + from_mlforecaster: Optional[bool] = False, + publish_prefix: Optional[str] = "", + ) -> None: r""" Post passed data to hass. @@ -290,72 +351,139 @@ def post_data(self, data_df: pd.DataFrame, idx: int, entity_id: str, """ # Add a possible prefix to the entity ID - entity_id = entity_id.replace('sensor.', 'sensor.'+publish_prefix) + entity_id = entity_id.replace("sensor.", "sensor." + publish_prefix) # Set the URL - if self.hass_url == "http://supervisor/core/api": # If we are using the supervisor API - url = self.hass_url+"/states/"+entity_id - else: # Otherwise the Home Assistant Core API it is - url = self.hass_url+"api/states/"+entity_id + if ( + self.hass_url == "http://supervisor/core/api" + ): # If we are using the supervisor API + url = self.hass_url + "/states/" + entity_id + else: # Otherwise the Home Assistant Core API it is + url = self.hass_url + "api/states/" + entity_id headers = { "Authorization": "Bearer " + self.long_lived_token, "content-type": "application/json", } # Preparing the data dict to be published - if type_var == 'cost_fun': - state = np.round(data_df.sum()[0],2) - elif type_var == 'unit_load_cost' or type_var == 'unit_prod_price': - state = np.round(data_df.loc[data_df.index[idx]],4) - elif type_var == 'optim_status': + if type_var == "cost_fun": + state = np.round(data_df.sum()[0], 2) + elif type_var == "unit_load_cost" or type_var == "unit_prod_price": + state = np.round(data_df.loc[data_df.index[idx]], 4) + elif type_var == "optim_status": state = data_df.loc[data_df.index[idx]] + elif type_var == "mlregressor": + state = data_df[idx] else: - state = np.round(data_df.loc[data_df.index[idx]],2) - if type_var == 'power': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "forecasts", state) - elif type_var == 'deferrable': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "deferrables_schedule", state) - elif type_var == 'batt': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "battery_scheduled_power", state) - elif type_var == 'SOC': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "battery_scheduled_soc", state) - elif type_var == 'unit_load_cost': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "unit_load_cost_forecasts", state) - elif type_var == 'unit_prod_price': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "unit_prod_price_forecasts", state) - elif type_var == 'mlforecaster': - data = RetrieveHass.get_attr_data_dict(data_df, idx, entity_id, unit_of_measurement, - friendly_name, "scheduled_forecast", state) - elif type_var == 'optim_status': + state = np.round(data_df.loc[data_df.index[idx]], 2) + if type_var == "power": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "forecasts", + state, + ) + elif type_var == "deferrable": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "deferrables_schedule", + state, + ) + elif type_var == "batt": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "battery_scheduled_power", + state, + ) + elif type_var == "SOC": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "battery_scheduled_soc", + state, + ) + elif type_var == "unit_load_cost": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "unit_load_cost_forecasts", + state, + ) + elif type_var == "unit_prod_price": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "unit_prod_price_forecasts", + state, + ) + elif type_var == "mlforecaster": + data = RetrieveHass.get_attr_data_dict( + data_df, + idx, + entity_id, + unit_of_measurement, + friendly_name, + "scheduled_forecast", + state, + ) + elif type_var == "optim_status": data = { "state": state, "attributes": { "unit_of_measurement": unit_of_measurement, - "friendly_name": friendly_name - } + "friendly_name": friendly_name, + }, + } + elif type_var == "mlregressor": + data = { + "state": state, + "attributes": { + "unit_of_measurement": unit_of_measurement, + "friendly_name": friendly_name, + }, } else: data = { "state": "{:.2f}".format(state), "attributes": { "unit_of_measurement": unit_of_measurement, - "friendly_name": friendly_name - } + "friendly_name": friendly_name, + }, } # Actually post the data if self.get_data_from_file: - class response: pass + + class response: + pass + response.status_code = 200 response.ok = True else: response = post(url, headers=headers, data=json.dumps(data)) # Treating the response status and posting them on the logger if response.ok: - self.logger.info("Successfully posted to "+entity_id+" = "+str(state)) + self.logger.info("Successfully posted to " + entity_id + " = " + str(state)) else: - self.logger.info("The status code for received curl command response is: "+str(response.status_code)) + self.logger.info( + "The status code for received curl command response is: " + + str(response.status_code) + ) return response, data diff --git a/src/emhass/static/advanced.html b/src/emhass/static/advanced.html index 38371583..6595520c 100644 --- a/src/emhass/static/advanced.html +++ b/src/emhass/static/advanced.html @@ -14,6 +14,9 @@

Use the buttons below to fit, predict and tune a machine learning model for +

+ +

Input Runtime Parameters

diff --git a/src/emhass/static/script.js b/src/emhass/static/script.js index 05861814..ad02b578 100644 --- a/src/emhass/static/script.js +++ b/src/emhass/static/script.js @@ -16,6 +16,8 @@ function loadButtons(page) { "forecast-model-fit", "forecast-model-predict", "forecast-model-tune", + "regressor-model-fit", + "regressor-model-predict", "perfect-optim", "publish-data", "naive-mpc-optim" diff --git a/src/emhass/utils.py b/src/emhass/utils.py index f64161fc..46fa0d46 100644 --- a/src/emhass/utils.py +++ b/src/emhass/utils.py @@ -2,10 +2,19 @@ # -*- coding: utf-8 -*- from typing import Tuple, Optional -import numpy as np, pandas as pd -import yaml, pytz, logging, pathlib, json, copy from datetime import datetime, timedelta, timezone +import logging +import pathlib +import json +import copy +import numpy as np +import pandas as pd +import yaml +import pytz + + import plotly.express as px + pd.options.plotting.backend = "plotly" from emhass.machine_learning_forecaster import MLForecaster @@ -14,13 +23,13 @@ def get_root(file: str, num_parent: Optional[int] = 3) -> str: """ Get the root absolute path of the working directory. - + :param file: The passed file path with __file__ :return: The root path :param num_parent: The number of parents levels up to desired root folder :type num_parent: int, optional :rtype: str - + """ if num_parent == 3: root = pathlib.Path(file).resolve().parent.parent.parent @@ -36,7 +45,7 @@ def get_logger(fun_name: str, emhass_conf: dict, save_to_file: Optional[bool] = logging_level: Optional[str] = "DEBUG") -> Tuple[logging.Logger, logging.StreamHandler]: """ Create a simple logger object. - + :param fun_name: The Python function object name where the logger will be used :type fun_name: str :param emhass_conf: Dictionary containing the needed emhass paths @@ -45,9 +54,9 @@ def get_logger(fun_name: str, emhass_conf: dict, save_to_file: Optional[bool] = :type save_to_file: bool, optional :return: The logger object and the handler :rtype: object - + """ - # create logger object + # create logger object logger = logging.getLogger(fun_name) logger.propagate = True logger.fileSetting = save_to_file @@ -70,14 +79,18 @@ def get_logger(fun_name: str, emhass_conf: dict, save_to_file: Optional[bool] = else: logger.setLevel(logging.DEBUG) ch.setLevel(logging.DEBUG) - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) ch.setFormatter(formatter) logger.addHandler(ch) return logger, ch -def get_forecast_dates(freq: int, delta_forecast: int, - timedelta_days: Optional[int] = 0) -> pd.core.indexes.datetimes.DatetimeIndex: + +def get_forecast_dates( + freq: int, delta_forecast: int, timedelta_days: Optional[int] = 0 +) -> pd.core.indexes.datetimes.DatetimeIndex: """ Get the date_range list of the needed future dates using the delta_forecast parameter. @@ -89,7 +102,7 @@ def get_forecast_dates(freq: int, delta_forecast: int, :type timedelta_days: Optional[int], optional :return: A list of future forecast dates. :rtype: pd.core.indexes.datetimes.DatetimeIndex - + """ freq = pd.to_timedelta(freq, "minutes") start_forecast = pd.Timestamp(datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0) @@ -99,11 +112,19 @@ def get_forecast_dates(freq: int, delta_forecast: int, freq=freq).round(freq, ambiguous='infer', nonexistent='shift_forward') return forecast_dates -def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dict, optim_conf: dict, plant_conf: dict, - set_type: str, logger: logging.Logger) -> Tuple[str, dict]: + +def treat_runtimeparams( + runtimeparams: str, + params: str, + retrieve_hass_conf: dict, + optim_conf: dict, + plant_conf: dict, + set_type: str, + logger: logging.Logger, +) -> Tuple[str, dict]: """ - Treat the passed optimization runtime parameters. - + Treat the passed optimization runtime parameters. + :param runtimeparams: Json string containing the runtime parameters dict. :type runtimeparams: str :param params: Configuration parameters passed from data/options.json @@ -120,93 +141,167 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic :type logger: logging.Logger :return: Returning the params and optimization parameter container. :rtype: Tuple[str, dict] - + """ - if (params != None) and (params != 'null'): + if (params != None) and (params != "null"): params = json.loads(params) else: params = {} # Some default data needed custom_deferrable_forecast_id = [] - for k in range(optim_conf['num_def_loads']): - custom_deferrable_forecast_id.append({ - "entity_id": "sensor.p_deferrable{}".format(k), - "unit_of_measurement": "W", - "friendly_name": "Deferrable Load {}".format(k) - }) - default_passed_dict = {'custom_pv_forecast_id': {"entity_id": "sensor.p_pv_forecast", "unit_of_measurement": "W", "friendly_name": "PV Power Forecast"}, - 'custom_load_forecast_id': {"entity_id": "sensor.p_load_forecast", "unit_of_measurement": "W", "friendly_name": "Load Power Forecast"}, - 'custom_batt_forecast_id': {"entity_id": "sensor.p_batt_forecast", "unit_of_measurement": "W", "friendly_name": "Battery Power Forecast"}, - 'custom_batt_soc_forecast_id': {"entity_id": "sensor.soc_batt_forecast", "unit_of_measurement": "%", "friendly_name": "Battery SOC Forecast"}, - 'custom_grid_forecast_id': {"entity_id": "sensor.p_grid_forecast", "unit_of_measurement": "W", "friendly_name": "Grid Power Forecast"}, - 'custom_cost_fun_id': {"entity_id": "sensor.total_cost_fun_value", "unit_of_measurement": "", "friendly_name": "Total cost function value"}, - 'custom_optim_status_id': {"entity_id": "sensor.optim_status", "unit_of_measurement": "", "friendly_name": "EMHASS optimization status"}, - 'custom_unit_load_cost_id': {"entity_id": "sensor.unit_load_cost", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Load Cost"}, - 'custom_unit_prod_price_id': {"entity_id": "sensor.unit_prod_price", "unit_of_measurement": "€/kWh", "friendly_name": "Unit Prod Price"}, - 'custom_deferrable_forecast_id': custom_deferrable_forecast_id, - 'publish_prefix': ""} - if 'passed_data' in params.keys(): + for k in range(optim_conf["num_def_loads"]): + custom_deferrable_forecast_id.append( + { + "entity_id": "sensor.p_deferrable{}".format(k), + "unit_of_measurement": "W", + "friendly_name": "Deferrable Load {}".format(k), + } + ) + default_passed_dict = { + "custom_pv_forecast_id": { + "entity_id": "sensor.p_pv_forecast", + "unit_of_measurement": "W", + "friendly_name": "PV Power Forecast", + }, + "custom_load_forecast_id": { + "entity_id": "sensor.p_load_forecast", + "unit_of_measurement": "W", + "friendly_name": "Load Power Forecast", + }, + "custom_batt_forecast_id": { + "entity_id": "sensor.p_batt_forecast", + "unit_of_measurement": "W", + "friendly_name": "Battery Power Forecast", + }, + "custom_batt_soc_forecast_id": { + "entity_id": "sensor.soc_batt_forecast", + "unit_of_measurement": "%", + "friendly_name": "Battery SOC Forecast", + }, + "custom_grid_forecast_id": { + "entity_id": "sensor.p_grid_forecast", + "unit_of_measurement": "W", + "friendly_name": "Grid Power Forecast", + }, + "custom_cost_fun_id": { + "entity_id": "sensor.total_cost_fun_value", + "unit_of_measurement": "", + "friendly_name": "Total cost function value", + }, + "custom_optim_status_id": { + "entity_id": "sensor.optim_status", + "unit_of_measurement": "", + "friendly_name": "EMHASS optimization status", + }, + "custom_unit_load_cost_id": { + "entity_id": "sensor.unit_load_cost", + "unit_of_measurement": "€/kWh", + "friendly_name": "Unit Load Cost", + }, + "custom_unit_prod_price_id": { + "entity_id": "sensor.unit_prod_price", + "unit_of_measurement": "€/kWh", + "friendly_name": "Unit Prod Price", + }, + "custom_deferrable_forecast_id": custom_deferrable_forecast_id, + "publish_prefix": "", + } + if "passed_data" in params.keys(): for key, value in default_passed_dict.items(): - params['passed_data'][key] = value + params["passed_data"][key] = value else: - params['passed_data'] = default_passed_dict + params["passed_data"] = default_passed_dict if runtimeparams is not None: runtimeparams = json.loads(runtimeparams) - freq = int(retrieve_hass_conf['freq'].seconds/60.0) - delta_forecast = int(optim_conf['delta_forecast'].days) + freq = int(retrieve_hass_conf["freq"].seconds / 60.0) + delta_forecast = int(optim_conf["delta_forecast"].days) forecast_dates = get_forecast_dates(freq, delta_forecast) + if set_type == "regressor-model-fit": + if "csv_file" in runtimeparams: + csv_file = runtimeparams["csv_file"] + params["passed_data"]["csv_file"] = csv_file + if "features" in runtimeparams: + features = runtimeparams["features"] + params["passed_data"]["features"] = features + if "target" in runtimeparams: + target = runtimeparams["target"] + params["passed_data"]["target"] = target + if "timestamp" not in runtimeparams: + params["passed_data"]["timestamp"] = None + else: + timestamp = runtimeparams["timestamp"] + params["passed_data"]["timestamp"] = timestamp + if "date_features" not in runtimeparams: + params["passed_data"]["date_features"] = [] + else: + date_features = runtimeparams["date_features"] + params["passed_data"]["date_features"] = date_features + if set_type == "regressor-model-predict": + if "new_values" in runtimeparams: + new_values = runtimeparams["new_values"] + params["passed_data"]["new_values"] = new_values + if "csv_file" in runtimeparams: + csv_file = runtimeparams["csv_file"] + params["passed_data"]["csv_file"] = csv_file + if "features" in runtimeparams: + features = runtimeparams["features"] + params["passed_data"]["features"] = features + if "target" in runtimeparams: + target = runtimeparams["target"] + params["passed_data"]["target"] = target + # Treating special data passed for MPC control case - if set_type == 'naive-mpc-optim': - if 'prediction_horizon' not in runtimeparams.keys(): - prediction_horizon = 10 # 10 time steps by default + if set_type == "naive-mpc-optim": + if "prediction_horizon" not in runtimeparams.keys(): + prediction_horizon = 10 # 10 time steps by default else: - prediction_horizon = runtimeparams['prediction_horizon'] - params['passed_data']['prediction_horizon'] = prediction_horizon - if 'soc_init' not in runtimeparams.keys(): - soc_init = plant_conf['SOCtarget'] + prediction_horizon = runtimeparams["prediction_horizon"] + params["passed_data"]["prediction_horizon"] = prediction_horizon + if "soc_init" not in runtimeparams.keys(): + soc_init = plant_conf["SOCtarget"] else: - soc_init = runtimeparams['soc_init'] - params['passed_data']['soc_init'] = soc_init - if 'soc_final' not in runtimeparams.keys(): - soc_final = plant_conf['SOCtarget'] + soc_init = runtimeparams["soc_init"] + params["passed_data"]["soc_init"] = soc_init + if "soc_final" not in runtimeparams.keys(): + soc_final = plant_conf["SOCtarget"] else: - soc_final = runtimeparams['soc_final'] - params['passed_data']['soc_final'] = soc_final - if 'def_total_hours' not in runtimeparams.keys(): - def_total_hours = optim_conf['def_total_hours'] + soc_final = runtimeparams["soc_final"] + params["passed_data"]["soc_final"] = soc_final + if "def_total_hours" not in runtimeparams.keys(): + def_total_hours = optim_conf["def_total_hours"] else: - def_total_hours = runtimeparams['def_total_hours'] - params['passed_data']['def_total_hours'] = def_total_hours - if 'def_start_timestep' not in runtimeparams.keys(): - def_start_timestep = optim_conf['def_start_timestep'] + def_total_hours = runtimeparams["def_total_hours"] + params["passed_data"]["def_total_hours"] = def_total_hours + if "def_start_timestep" not in runtimeparams.keys(): + def_start_timestep = optim_conf["def_start_timestep"] else: - def_start_timestep = runtimeparams['def_start_timestep'] - params['passed_data']['def_start_timestep'] = def_start_timestep - if 'def_end_timestep' not in runtimeparams.keys(): - def_end_timestep = optim_conf['def_end_timestep'] + def_start_timestep = runtimeparams["def_start_timestep"] + params["passed_data"]["def_start_timestep"] = def_start_timestep + if "def_end_timestep" not in runtimeparams.keys(): + def_end_timestep = optim_conf["def_end_timestep"] else: - def_end_timestep = runtimeparams['def_end_timestep'] - params['passed_data']['def_end_timestep'] = def_end_timestep - if 'alpha' not in runtimeparams.keys(): + def_end_timestep = runtimeparams["def_end_timestep"] + params["passed_data"]["def_end_timestep"] = def_end_timestep + if "alpha" not in runtimeparams.keys(): alpha = 0.5 else: - alpha = runtimeparams['alpha'] - params['passed_data']['alpha'] = alpha - if 'beta' not in runtimeparams.keys(): + alpha = runtimeparams["alpha"] + params["passed_data"]["alpha"] = alpha + if "beta" not in runtimeparams.keys(): beta = 0.5 else: - beta = runtimeparams['beta'] - params['passed_data']['beta'] = beta + beta = runtimeparams["beta"] + params["passed_data"]["beta"] = beta forecast_dates = copy.deepcopy(forecast_dates)[0:prediction_horizon] else: - params['passed_data']['prediction_horizon'] = None - params['passed_data']['soc_init'] = None - params['passed_data']['soc_final'] = None - params['passed_data']['def_total_hours'] = None - params['passed_data']['def_start_timestep'] = None - params['passed_data']['def_end_timestep'] = None - params['passed_data']['alpha'] = None - params['passed_data']['beta'] = None + params["passed_data"]["prediction_horizon"] = None + params["passed_data"]["soc_init"] = None + params["passed_data"]["soc_final"] = None + params["passed_data"]["def_total_hours"] = None + params["passed_data"]["def_start_timestep"] = None + params["passed_data"]["def_end_timestep"] = None + params["passed_data"]["alpha"] = None + params["passed_data"]["beta"] = None # Treat passed forecast data lists list_forecast_key = ['pv_power_forecast', 'load_power_forecast', 'load_cost_forecast', 'prod_price_forecast'] forecast_methods = ['weather_forecast_method', 'load_forecast_method', 'load_cost_forecast_method', 'prod_price_forecast_method'] @@ -226,121 +321,184 @@ def treat_runtimeparams(runtimeparams: str, params: str, retrieve_hass_conf: dic else: params['passed_data'][forecast_key] = None # Treat passed data for forecast model fit/predict/tune at runtime - if 'days_to_retrieve' not in runtimeparams.keys(): + if "days_to_retrieve" not in runtimeparams.keys(): days_to_retrieve = 9 else: - days_to_retrieve = runtimeparams['days_to_retrieve'] - params['passed_data']['days_to_retrieve'] = days_to_retrieve - if 'model_type' not in runtimeparams.keys(): + days_to_retrieve = runtimeparams["days_to_retrieve"] + params["passed_data"]["days_to_retrieve"] = days_to_retrieve + if "model_type" not in runtimeparams.keys(): model_type = "load_forecast" else: - model_type = runtimeparams['model_type'] - params['passed_data']['model_type'] = model_type - if 'var_model' not in runtimeparams.keys(): + model_type = runtimeparams["model_type"] + params["passed_data"]["model_type"] = model_type + if "var_model" not in runtimeparams.keys(): var_model = "sensor.power_load_no_var_loads" else: - var_model = runtimeparams['var_model'] - params['passed_data']['var_model'] = var_model - if 'sklearn_model' not in runtimeparams.keys(): + var_model = runtimeparams["var_model"] + params["passed_data"]["var_model"] = var_model + if "sklearn_model" not in runtimeparams.keys(): sklearn_model = "KNeighborsRegressor" else: - sklearn_model = runtimeparams['sklearn_model'] - params['passed_data']['sklearn_model'] = sklearn_model - if 'num_lags' not in runtimeparams.keys(): + sklearn_model = runtimeparams["sklearn_model"] + params["passed_data"]["sklearn_model"] = sklearn_model + if "regression_model" not in runtimeparams.keys(): + regression_model = "AdaBoostRegression" + else: + regression_model = runtimeparams["regression_model"] + params["passed_data"]["regression_model"] = regression_model + if "num_lags" not in runtimeparams.keys(): num_lags = 48 else: - num_lags = runtimeparams['num_lags'] - params['passed_data']['num_lags'] = num_lags - if 'split_date_delta' not in runtimeparams.keys(): - split_date_delta = '48h' + num_lags = runtimeparams["num_lags"] + params["passed_data"]["num_lags"] = num_lags + if "split_date_delta" not in runtimeparams.keys(): + split_date_delta = "48h" else: - split_date_delta = runtimeparams['split_date_delta'] - params['passed_data']['split_date_delta'] = split_date_delta - if 'perform_backtest' not in runtimeparams.keys(): + split_date_delta = runtimeparams["split_date_delta"] + params["passed_data"]["split_date_delta"] = split_date_delta + if "perform_backtest" not in runtimeparams.keys(): perform_backtest = False else: - perform_backtest = eval(str(runtimeparams['perform_backtest']).capitalize()) - params['passed_data']['perform_backtest'] = perform_backtest - if 'model_predict_publish' not in runtimeparams.keys(): + perform_backtest = eval(str(runtimeparams["perform_backtest"]).capitalize()) + params["passed_data"]["perform_backtest"] = perform_backtest + if "model_predict_publish" not in runtimeparams.keys(): model_predict_publish = False else: - model_predict_publish = eval(str(runtimeparams['model_predict_publish']).capitalize()) - params['passed_data']['model_predict_publish'] = model_predict_publish - if 'model_predict_entity_id' not in runtimeparams.keys(): + model_predict_publish = eval( + str(runtimeparams["model_predict_publish"]).capitalize() + ) + params["passed_data"]["model_predict_publish"] = model_predict_publish + if "model_predict_entity_id" not in runtimeparams.keys(): model_predict_entity_id = "sensor.p_load_forecast_custom_model" else: - model_predict_entity_id = runtimeparams['model_predict_entity_id'] - params['passed_data']['model_predict_entity_id'] = model_predict_entity_id - if 'model_predict_unit_of_measurement' not in runtimeparams.keys(): + model_predict_entity_id = runtimeparams["model_predict_entity_id"] + params["passed_data"]["model_predict_entity_id"] = model_predict_entity_id + if "model_predict_unit_of_measurement" not in runtimeparams.keys(): model_predict_unit_of_measurement = "W" else: - model_predict_unit_of_measurement = runtimeparams['model_predict_unit_of_measurement'] - params['passed_data']['model_predict_unit_of_measurement'] = model_predict_unit_of_measurement - if 'model_predict_friendly_name' not in runtimeparams.keys(): + model_predict_unit_of_measurement = runtimeparams[ + "model_predict_unit_of_measurement" + ] + params["passed_data"][ + "model_predict_unit_of_measurement" + ] = model_predict_unit_of_measurement + if "model_predict_friendly_name" not in runtimeparams.keys(): model_predict_friendly_name = "Load Power Forecast custom ML model" else: - model_predict_friendly_name = runtimeparams['model_predict_friendly_name'] - params['passed_data']['model_predict_friendly_name'] = model_predict_friendly_name - # Treat optimization configuration parameters passed at runtime - if 'num_def_loads' in runtimeparams.keys(): - optim_conf['num_def_loads'] = runtimeparams['num_def_loads'] - if 'P_deferrable_nom' in runtimeparams.keys(): - optim_conf['P_deferrable_nom'] = runtimeparams['P_deferrable_nom'] - if 'def_total_hours' in runtimeparams.keys(): - optim_conf['def_total_hours'] = runtimeparams['def_total_hours'] - if 'def_start_timestep' in runtimeparams.keys(): - optim_conf['def_start_timestep'] = runtimeparams['def_start_timestep'] - if 'def_end_timestep' in runtimeparams.keys(): - optim_conf['def_end_timestep'] = runtimeparams['def_end_timestep'] - if 'treat_def_as_semi_cont' in runtimeparams.keys(): - optim_conf['treat_def_as_semi_cont'] = [eval(str(k).capitalize()) for k in runtimeparams['treat_def_as_semi_cont']] - if 'set_def_constant' in runtimeparams.keys(): - optim_conf['set_def_constant'] = [eval(str(k).capitalize()) for k in runtimeparams['set_def_constant']] - if 'solcast_api_key' in runtimeparams.keys(): - retrieve_hass_conf['solcast_api_key'] = runtimeparams['solcast_api_key'] - optim_conf['weather_forecast_method'] = 'solcast' - if 'solcast_rooftop_id' in runtimeparams.keys(): - retrieve_hass_conf['solcast_rooftop_id'] = runtimeparams['solcast_rooftop_id'] - optim_conf['weather_forecast_method'] = 'solcast' - if 'solar_forecast_kwp' in runtimeparams.keys(): - retrieve_hass_conf['solar_forecast_kwp'] = runtimeparams['solar_forecast_kwp'] - optim_conf['weather_forecast_method'] = 'solar.forecast' + model_predict_friendly_name = runtimeparams["model_predict_friendly_name"] + params["passed_data"][ + "model_predict_friendly_name" + ] = model_predict_friendly_name + if "mlr_predict_entity_id" not in runtimeparams.keys(): + mlr_predict_entity_id = "sensor.mlr_predict" + else: + mlr_predict_entity_id = runtimeparams["mlr_predict_entity_id"] + params["passed_data"]["mlr_predict_entity_id"] = mlr_predict_entity_id + if "mlr_predict_unit_of_measurement" not in runtimeparams.keys(): + mlr_predict_unit_of_measurement = None + else: + mlr_predict_unit_of_measurement = runtimeparams[ + "mlr_predict_unit_of_measurement" + ] + params["passed_data"][ + "mlr_predict_unit_of_measurement" + ] = mlr_predict_unit_of_measurement + if "mlr_predict_friendly_name" not in runtimeparams.keys(): + mlr_predict_friendly_name = "mlr predictor" + else: + mlr_predict_friendly_name = runtimeparams["mlr_predict_friendly_name"] + params["passed_data"]["mlr_predict_friendly_name"] = mlr_predict_friendly_name + # Treat optimization configuration parameters passed at runtime + if "num_def_loads" in runtimeparams.keys(): + optim_conf["num_def_loads"] = runtimeparams["num_def_loads"] + if "P_deferrable_nom" in runtimeparams.keys(): + optim_conf["P_deferrable_nom"] = runtimeparams["P_deferrable_nom"] + if "def_total_hours" in runtimeparams.keys(): + optim_conf["def_total_hours"] = runtimeparams["def_total_hours"] + if "def_start_timestep" in runtimeparams.keys(): + optim_conf["def_start_timestep"] = runtimeparams["def_start_timestep"] + if "def_end_timestep" in runtimeparams.keys(): + optim_conf["def_end_timestep"] = runtimeparams["def_end_timestep"] + if "treat_def_as_semi_cont" in runtimeparams.keys(): + optim_conf["treat_def_as_semi_cont"] = [ + eval(str(k).capitalize()) + for k in runtimeparams["treat_def_as_semi_cont"] + ] + if "set_def_constant" in runtimeparams.keys(): + optim_conf["set_def_constant"] = [ + eval(str(k).capitalize()) for k in runtimeparams["set_def_constant"] + ] + if "solcast_api_key" in runtimeparams.keys(): + retrieve_hass_conf["solcast_api_key"] = runtimeparams["solcast_api_key"] + optim_conf["weather_forecast_method"] = "solcast" + if "solcast_rooftop_id" in runtimeparams.keys(): + retrieve_hass_conf["solcast_rooftop_id"] = runtimeparams[ + "solcast_rooftop_id" + ] + optim_conf["weather_forecast_method"] = "solcast" + if "solar_forecast_kwp" in runtimeparams.keys(): + retrieve_hass_conf["solar_forecast_kwp"] = runtimeparams[ + "solar_forecast_kwp" + ] + optim_conf["weather_forecast_method"] = "solar.forecast" + if "weight_battery_discharge" in runtimeparams.keys(): + optim_conf["weight_battery_discharge"] = runtimeparams[ + "weight_battery_discharge" + ] + if "weight_battery_charge" in runtimeparams.keys(): + optim_conf["weight_battery_charge"] = runtimeparams["weight_battery_charge"] if 'freq' in runtimeparams.keys(): retrieve_hass_conf['freq'] = pd.to_timedelta(runtimeparams['freq'], "minutes") - if 'weight_battery_discharge' in runtimeparams.keys(): - optim_conf['weight_battery_discharge'] = runtimeparams['weight_battery_discharge'] - if 'weight_battery_charge' in runtimeparams.keys(): - optim_conf['weight_battery_charge'] = runtimeparams['weight_battery_charge'] # Treat plant configuration parameters passed at runtime - if 'SOCtarget' in runtimeparams.keys(): - plant_conf['SOCtarget'] = runtimeparams['SOCtarget'] + if "SOCtarget" in runtimeparams.keys(): + plant_conf["SOCtarget"] = runtimeparams["SOCtarget"] # Treat custom entities id's and friendly names for variables - if 'custom_pv_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_pv_forecast_id'] = runtimeparams['custom_pv_forecast_id'] - if 'custom_load_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_load_forecast_id'] = runtimeparams['custom_load_forecast_id'] - if 'custom_batt_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_batt_forecast_id'] = runtimeparams['custom_batt_forecast_id'] - if 'custom_batt_soc_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_batt_soc_forecast_id'] = runtimeparams['custom_batt_soc_forecast_id'] - if 'custom_grid_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_grid_forecast_id'] = runtimeparams['custom_grid_forecast_id'] - if 'custom_cost_fun_id' in runtimeparams.keys(): - params['passed_data']['custom_cost_fun_id'] = runtimeparams['custom_cost_fun_id'] - if 'custom_optim_status_id' in runtimeparams.keys(): - params['passed_data']['custom_optim_status_id'] = runtimeparams['custom_optim_status_id'] - if 'custom_unit_load_cost_id' in runtimeparams.keys(): - params['passed_data']['custom_unit_load_cost_id'] = runtimeparams['custom_unit_load_cost_id'] - if 'custom_unit_prod_price_id' in runtimeparams.keys(): - params['passed_data']['custom_unit_prod_price_id'] = runtimeparams['custom_unit_prod_price_id'] - if 'custom_deferrable_forecast_id' in runtimeparams.keys(): - params['passed_data']['custom_deferrable_forecast_id'] = runtimeparams['custom_deferrable_forecast_id'] + if "custom_pv_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_pv_forecast_id"] = runtimeparams[ + "custom_pv_forecast_id" + ] + if "custom_load_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_load_forecast_id"] = runtimeparams[ + "custom_load_forecast_id" + ] + if "custom_batt_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_batt_forecast_id"] = runtimeparams[ + "custom_batt_forecast_id" + ] + if "custom_batt_soc_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_batt_soc_forecast_id"] = runtimeparams[ + "custom_batt_soc_forecast_id" + ] + if "custom_grid_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_grid_forecast_id"] = runtimeparams[ + "custom_grid_forecast_id" + ] + if "custom_cost_fun_id" in runtimeparams.keys(): + params["passed_data"]["custom_cost_fun_id"] = runtimeparams[ + "custom_cost_fun_id" + ] + if "custom_optim_status_id" in runtimeparams.keys(): + params["passed_data"]["custom_optim_status_id"] = runtimeparams[ + "custom_optim_status_id" + ] + if "custom_unit_load_cost_id" in runtimeparams.keys(): + params["passed_data"]["custom_unit_load_cost_id"] = runtimeparams[ + "custom_unit_load_cost_id" + ] + if "custom_unit_prod_price_id" in runtimeparams.keys(): + params["passed_data"]["custom_unit_prod_price_id"] = runtimeparams[ + "custom_unit_prod_price_id" + ] + if "custom_deferrable_forecast_id" in runtimeparams.keys(): + params["passed_data"]["custom_deferrable_forecast_id"] = runtimeparams[ + "custom_deferrable_forecast_id" + ] # A condition to put a prefix on all published data - if 'publish_prefix' not in runtimeparams.keys(): + if "publish_prefix" not in runtimeparams.keys(): publish_prefix = "" else: - publish_prefix = runtimeparams['publish_prefix'] - params['passed_data']['publish_prefix'] = publish_prefix + publish_prefix = runtimeparams["publish_prefix"] + params["passed_data"]["publish_prefix"] = publish_prefix # Serialize the final params params = json.dumps(params) return params, retrieve_hass_conf, optim_conf, plant_conf @@ -371,40 +529,45 @@ def get_yaml_parse(emhass_conf: dict, use_secrets: Optional[bool] = True, with open(emhass_conf["root_path"] / 'secrets_emhass.yaml', 'r') as file: #assume secrets file is in root path input_secrets = yaml.load(file, Loader=yaml.FullLoader) else: - input_secrets = input_conf.pop('params_secrets', None) - - if (type(input_conf['retrieve_hass_conf']) == list): #if using old config version - retrieve_hass_conf = dict({key:d[key] for d in input_conf['retrieve_hass_conf'] for key in d}) + input_secrets = input_conf.pop("params_secrets", None) + + if type(input_conf["retrieve_hass_conf"]) == list: # if using old config version + retrieve_hass_conf = dict( + {key: d[key] for d in input_conf["retrieve_hass_conf"] for key in d} + ) else: - retrieve_hass_conf = input_conf.get('retrieve_hass_conf', {}) - + retrieve_hass_conf = input_conf.get("retrieve_hass_conf", {}) + if use_secrets: retrieve_hass_conf.update(input_secrets) else: - retrieve_hass_conf['hass_url'] = 'http://supervisor/core/api' - retrieve_hass_conf['long_lived_token'] = '${SUPERVISOR_TOKEN}' - retrieve_hass_conf['time_zone'] = 'Europe/Paris' - retrieve_hass_conf['lat'] = 45.83 - retrieve_hass_conf['lon'] = 6.86 - retrieve_hass_conf['alt'] = 4807.8 - retrieve_hass_conf['freq'] = pd.to_timedelta(retrieve_hass_conf['freq'], "minutes") - retrieve_hass_conf['time_zone'] = pytz.timezone(retrieve_hass_conf['time_zone']) - - if (type(input_conf['optim_conf']) == list): - optim_conf = dict({key:d[key] for d in input_conf['optim_conf'] for key in d}) + retrieve_hass_conf["hass_url"] = "http://supervisor/core/api" + retrieve_hass_conf["long_lived_token"] = "${SUPERVISOR_TOKEN}" + retrieve_hass_conf["time_zone"] = "Europe/Paris" + retrieve_hass_conf["lat"] = 45.83 + retrieve_hass_conf["lon"] = 6.86 + retrieve_hass_conf["alt"] = 4807.8 + retrieve_hass_conf["freq"] = pd.to_timedelta(retrieve_hass_conf["freq"], "minutes") + retrieve_hass_conf["time_zone"] = pytz.timezone(retrieve_hass_conf["time_zone"]) + + if type(input_conf["optim_conf"]) == list: + optim_conf = dict({key: d[key] for d in input_conf["optim_conf"] for key in d}) else: - optim_conf = input_conf.get('optim_conf', {}) + optim_conf = input_conf.get("optim_conf", {}) - optim_conf['list_hp_periods'] = dict((key,d[key]) for d in optim_conf['list_hp_periods'] for key in d) - optim_conf['delta_forecast'] = pd.Timedelta(days=optim_conf['delta_forecast']) - - if (type(input_conf['plant_conf']) == list): - plant_conf = dict({key:d[key] for d in input_conf['plant_conf'] for key in d}) + optim_conf["list_hp_periods"] = dict( + (key, d[key]) for d in optim_conf["list_hp_periods"] for key in d + ) + optim_conf["delta_forecast"] = pd.Timedelta(days=optim_conf["delta_forecast"]) + + if type(input_conf["plant_conf"]) == list: + plant_conf = dict({key: d[key] for d in input_conf["plant_conf"] for key in d}) else: - plant_conf = input_conf.get('plant_conf', {}) - + plant_conf = input_conf.get("plant_conf", {}) + return retrieve_hass_conf, optim_conf, plant_conf + def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dict: """ Build a dictionary with graphs and tables for the webui. @@ -415,61 +578,86 @@ def get_injection_dict(df: pd.DataFrame, plot_size: Optional[int] = 1366) -> dic :type plot_size: Optional[int], optional :return: A dictionary containing the graphs and tables in html format :rtype: dict - + """ - cols_p = [i for i in df.columns.to_list() if 'P_' in i] + cols_p = [i for i in df.columns.to_list() if "P_" in i] # Let's round the data in the DF - optim_status = df['optim_status'].unique().item() - df.drop('optim_status', axis=1, inplace=True) - cols_else = [i for i in df.columns.to_list() if 'P_' not in i] + optim_status = df["optim_status"].unique().item() + df.drop("optim_status", axis=1, inplace=True) + cols_else = [i for i in df.columns.to_list() if "P_" not in i] df = df.apply(pd.to_numeric) df[cols_p] = df[cols_p].astype(int) df[cols_else] = df[cols_else].round(3) # Create plots n_colors = len(cols_p) - colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)]) - fig_0 = px.line(df[cols_p], title='Systems powers schedule after optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_0.update_layout(xaxis_title='Timestamp', yaxis_title='System powers (W)') - if 'SOC_opt' in df.columns.to_list(): - fig_1 = px.line(df['SOC_opt'], title='Battery state of charge schedule after optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_1.update_layout(xaxis_title='Timestamp', yaxis_title='Battery SOC (%)') - cols_cost = [i for i in df.columns.to_list() if 'cost_' in i or 'unit_' in i] + colors = px.colors.sample_colorscale( + "jet", [n / (n_colors - 1) for n in range(n_colors)] + ) + fig_0 = px.line( + df[cols_p], + title="Systems powers schedule after optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_0.update_layout(xaxis_title="Timestamp", yaxis_title="System powers (W)") + if "SOC_opt" in df.columns.to_list(): + fig_1 = px.line( + df["SOC_opt"], + title="Battery state of charge schedule after optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_1.update_layout(xaxis_title="Timestamp", yaxis_title="Battery SOC (%)") + cols_cost = [i for i in df.columns.to_list() if "cost_" in i or "unit_" in i] n_colors = len(cols_cost) - colors = px.colors.sample_colorscale("jet", [n/(n_colors -1) for n in range(n_colors)]) - fig_2 = px.line(df[cols_cost], title='Systems costs obtained from optimization results', - template='presentation', line_shape="hv", - color_discrete_sequence=colors) - fig_2.update_layout(xaxis_title='Timestamp', yaxis_title='System costs (currency)') + colors = px.colors.sample_colorscale( + "jet", [n / (n_colors - 1) for n in range(n_colors)] + ) + fig_2 = px.line( + df[cols_cost], + title="Systems costs obtained from optimization results", + template="presentation", + line_shape="hv", + color_discrete_sequence=colors, + ) + fig_2.update_layout(xaxis_title="Timestamp", yaxis_title="System costs (currency)") # Get full path to image - image_path_0 = fig_0.to_html(full_html=False, default_width='75%') - if 'SOC_opt' in df.columns.to_list(): - image_path_1 = fig_1.to_html(full_html=False, default_width='75%') - image_path_2 = fig_2.to_html(full_html=False, default_width='75%') + image_path_0 = fig_0.to_html(full_html=False, default_width="75%") + if "SOC_opt" in df.columns.to_list(): + image_path_1 = fig_1.to_html(full_html=False, default_width="75%") + image_path_2 = fig_2.to_html(full_html=False, default_width="75%") # The tables - table1 = df.reset_index().to_html(classes='mystyle', index=False) - cost_cols = [i for i in df.columns if 'cost_' in i] + table1 = df.reset_index().to_html(classes="mystyle", index=False) + cost_cols = [i for i in df.columns if "cost_" in i] table2 = df[cost_cols].reset_index().sum(numeric_only=True) - table2['optim_status'] = optim_status - table2 = table2.to_frame(name='Value').reset_index(names='Variable').to_html(classes='mystyle', index=False) + table2["optim_status"] = optim_status + table2 = ( + table2.to_frame(name="Value") + .reset_index(names="Variable") + .to_html(classes="mystyle", index=False) + ) # The dict of plots injection_dict = {} - injection_dict['title'] = '

EMHASS optimization results

' - injection_dict['subsubtitle0'] = '

Plotting latest optimization results

' - injection_dict['figure_0'] = image_path_0 - if 'SOC_opt' in df.columns.to_list(): - injection_dict['figure_1'] = image_path_1 - injection_dict['figure_2'] = image_path_2 - injection_dict['subsubtitle1'] = '

Last run optimization results table

' - injection_dict['table1'] = table1 - injection_dict['subsubtitle2'] = '

Summary table for latest optimization results

' - injection_dict['table2'] = table2 + injection_dict["title"] = "

EMHASS optimization results

" + injection_dict["subsubtitle0"] = "

Plotting latest optimization results

" + injection_dict["figure_0"] = image_path_0 + if "SOC_opt" in df.columns.to_list(): + injection_dict["figure_1"] = image_path_1 + injection_dict["figure_2"] = image_path_2 + injection_dict["subsubtitle1"] = "

Last run optimization results table

" + injection_dict["table1"] = table1 + injection_dict["subsubtitle2"] = ( + "

Summary table for latest optimization results

" + ) + injection_dict["table2"] = table2 return injection_dict -def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLForecaster) -> dict: + +def get_injection_dict_forecast_model_fit( + df_fit_pred: pd.DataFrame, mlf: MLForecaster +) -> dict: """ Build a dictionary with graphs and tables for the webui for special MLF fit case. @@ -481,19 +669,26 @@ def get_injection_dict_forecast_model_fit(df_fit_pred: pd.DataFrame, mlf: MLFore :rtype: dict """ fig = df_fit_pred.plot() - fig.layout.template = 'presentation' - fig.update_yaxes(title_text = mlf.model_type) - fig.update_xaxes(title_text = "Time") - image_path_0 = fig.to_html(full_html=False, default_width='75%') + fig.layout.template = "presentation" + fig.update_yaxes(title_text=mlf.model_type) + fig.update_xaxes(title_text="Time") + image_path_0 = fig.to_html(full_html=False, default_width="75%") # The dict of plots injection_dict = {} - injection_dict['title'] = '

Custom machine learning forecast model fit

' - injection_dict['subsubtitle0'] = '

Plotting train/test forecast model results for '+mlf.model_type+'

' - injection_dict['subsubtitle0'] = '

Forecasting variable '+mlf.var_model+'

' - injection_dict['figure_0'] = image_path_0 + injection_dict["title"] = "

Custom machine learning forecast model fit

" + injection_dict["subsubtitle0"] = ( + "

Plotting train/test forecast model results for " + mlf.model_type + "

" + ) + injection_dict["subsubtitle0"] = ( + "

Forecasting variable " + mlf.var_model + "

" + ) + injection_dict["figure_0"] = image_path_0 return injection_dict -def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLForecaster) -> dict: + +def get_injection_dict_forecast_model_tune( + df_pred_optim: pd.DataFrame, mlf: MLForecaster +) -> dict: """ Build a dictionary with graphs and tables for the webui for special MLF tune case. @@ -505,19 +700,32 @@ def get_injection_dict_forecast_model_tune(df_pred_optim: pd.DataFrame, mlf: MLF :rtype: dict """ fig = df_pred_optim.plot() - fig.layout.template = 'presentation' - fig.update_yaxes(title_text = mlf.model_type) - fig.update_xaxes(title_text = "Time") - image_path_0 = fig.to_html(full_html=False, default_width='75%') + fig.layout.template = "presentation" + fig.update_yaxes(title_text=mlf.model_type) + fig.update_xaxes(title_text="Time") + image_path_0 = fig.to_html(full_html=False, default_width="75%") # The dict of plots injection_dict = {} - injection_dict['title'] = '

Custom machine learning forecast model tune

' - injection_dict['subsubtitle0'] = '

Performed a tuning routine using bayesian optimization for '+mlf.model_type+'

' - injection_dict['subsubtitle0'] = '

Forecasting variable '+mlf.var_model+'

' - injection_dict['figure_0'] = image_path_0 + injection_dict["title"] = "

Custom machine learning forecast model tune

" + injection_dict["subsubtitle0"] = ( + "

Performed a tuning routine using bayesian optimization for " + + mlf.model_type + + "

" + ) + injection_dict["subsubtitle0"] = ( + "

Forecasting variable " + mlf.var_model + "

" + ) + injection_dict["figure_0"] = image_path_0 return injection_dict -def build_params(params: dict, params_secrets: dict, options: dict, addon: int, logger: logging.Logger) -> dict: + +def build_params( + params: dict, + params_secrets: dict, + options: dict, + addon: int, + logger: logging.Logger, +) -> dict: """ Build the main params dictionary from the loaded options.json when using the add-on. @@ -536,45 +744,120 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int, """ if addon == 1: # Updating variables in retrieve_hass_conf - params['retrieve_hass_conf']['freq'] = options.get('optimization_time_step',params['retrieve_hass_conf']['freq']) - params['retrieve_hass_conf']['days_to_retrieve'] = options.get('historic_days_to_retrieve',params['retrieve_hass_conf']['days_to_retrieve']) - params['retrieve_hass_conf']['var_PV'] = options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV']) - params['retrieve_hass_conf']['var_load'] = options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load']) - params['retrieve_hass_conf']['load_negative'] = options.get('load_negative',params['retrieve_hass_conf']['load_negative']) - params['retrieve_hass_conf']['set_zero_min'] = options.get('set_zero_min',params['retrieve_hass_conf']['set_zero_min']) - params['retrieve_hass_conf']['var_replace_zero'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_replace_zero'])] - params['retrieve_hass_conf']['var_interp'] = [options.get('sensor_power_photovoltaics',params['retrieve_hass_conf']['var_PV']), options.get('sensor_power_load_no_var_loads',params['retrieve_hass_conf']['var_load'])] - params['retrieve_hass_conf']['method_ts_round'] = options.get('method_ts_round',params['retrieve_hass_conf']['method_ts_round']) + params["retrieve_hass_conf"]["freq"] = options.get( + "optimization_time_step", params["retrieve_hass_conf"]["freq"] + ) + params["retrieve_hass_conf"]["days_to_retrieve"] = options.get( + "historic_days_to_retrieve", + params["retrieve_hass_conf"]["days_to_retrieve"], + ) + params["retrieve_hass_conf"]["var_PV"] = options.get( + "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"] + ) + params["retrieve_hass_conf"]["var_load"] = options.get( + "sensor_power_load_no_var_loads", params["retrieve_hass_conf"]["var_load"] + ) + params["retrieve_hass_conf"]["load_negative"] = options.get( + "load_negative", params["retrieve_hass_conf"]["load_negative"] + ) + params["retrieve_hass_conf"]["set_zero_min"] = options.get( + "set_zero_min", params["retrieve_hass_conf"]["set_zero_min"] + ) + params["retrieve_hass_conf"]["var_replace_zero"] = [ + options.get( + "sensor_power_photovoltaics", + params["retrieve_hass_conf"]["var_replace_zero"], + ) + ] + params["retrieve_hass_conf"]["var_interp"] = [ + options.get( + "sensor_power_photovoltaics", params["retrieve_hass_conf"]["var_PV"] + ), + options.get( + "sensor_power_load_no_var_loads", + params["retrieve_hass_conf"]["var_load"], + ), + ] + params["retrieve_hass_conf"]["method_ts_round"] = options.get( + "method_ts_round", params["retrieve_hass_conf"]["method_ts_round"] + ) # Update params Secrets if specified - params['params_secrets'] = params_secrets - params['params_secrets']['time_zone'] = options.get('time_zone',params_secrets['time_zone']) - params['params_secrets']['lat'] = options.get('Latitude',params_secrets['lat']) - params['params_secrets']['lon'] = options.get('Longitude',params_secrets['lon']) - params['params_secrets']['alt'] = options.get('Altitude',params_secrets['alt']) + params["params_secrets"] = params_secrets + params["params_secrets"]["time_zone"] = options.get( + "time_zone", params_secrets["time_zone"] + ) + params["params_secrets"]["lat"] = options.get("Latitude", params_secrets["lat"]) + params["params_secrets"]["lon"] = options.get( + "Longitude", params_secrets["lon"] + ) + params["params_secrets"]["alt"] = options.get("Altitude", params_secrets["alt"]) # Updating variables in optim_conf - params['optim_conf']['set_use_battery'] = options.get('set_use_battery',params['optim_conf']['set_use_battery']) - params['optim_conf']['num_def_loads'] = options.get('number_of_deferrable_loads',params['optim_conf']['num_def_loads']) - if options.get('list_nominal_power_of_deferrable_loads',None) != None: - params['optim_conf']['P_deferrable_nom'] = [i['nominal_power_of_deferrable_loads'] for i in options.get('list_nominal_power_of_deferrable_loads')] - if options.get('list_operating_hours_of_each_deferrable_load',None) != None: - params['optim_conf']['def_total_hours'] = [i['operating_hours_of_each_deferrable_load'] for i in options.get('list_operating_hours_of_each_deferrable_load')] - if options.get('list_treat_deferrable_load_as_semi_cont',None) != None: - params['optim_conf']['treat_def_as_semi_cont'] = [i['treat_deferrable_load_as_semi_cont'] for i in options.get('list_treat_deferrable_load_as_semi_cont')] - params['optim_conf']['weather_forecast_method'] = options.get('weather_forecast_method',params['optim_conf']['weather_forecast_method']) + params["optim_conf"]["set_use_battery"] = options.get( + "set_use_battery", params["optim_conf"]["set_use_battery"] + ) + params["optim_conf"]["num_def_loads"] = options.get( + "number_of_deferrable_loads", params["optim_conf"]["num_def_loads"] + ) + if options.get("list_nominal_power_of_deferrable_loads", None) != None: + params["optim_conf"]["P_deferrable_nom"] = [ + i["nominal_power_of_deferrable_loads"] + for i in options.get("list_nominal_power_of_deferrable_loads") + ] + if options.get("list_operating_hours_of_each_deferrable_load", None) != None: + params["optim_conf"]["def_total_hours"] = [ + i["operating_hours_of_each_deferrable_load"] + for i in options.get("list_operating_hours_of_each_deferrable_load") + ] + if options.get("list_treat_deferrable_load_as_semi_cont", None) != None: + params["optim_conf"]["treat_def_as_semi_cont"] = [ + i["treat_deferrable_load_as_semi_cont"] + for i in options.get("list_treat_deferrable_load_as_semi_cont") + ] + params["optim_conf"]["weather_forecast_method"] = options.get( + "weather_forecast_method", params["optim_conf"]["weather_forecast_method"] + ) # Update optional param secrets - if params['optim_conf']['weather_forecast_method'] == "solcast": - params['params_secrets']['solcast_api_key'] = options.get('optional_solcast_api_key',params_secrets.get('solcast_api_key',"123456")) - params['params_secrets']['solcast_rooftop_id'] = options.get('optional_solcast_rooftop_id',params_secrets.get('solcast_rooftop_id',"123456")) - elif params['optim_conf']['weather_forecast_method'] == "solar.forecast": - params['params_secrets']['solar_forecast_kwp'] = options.get('optional_solar_forecast_kwp',params_secrets.get('solar_forecast_kwp',5)) - params['optim_conf']['load_forecast_method'] = options.get('load_forecast_method',params['optim_conf']['load_forecast_method']) - params['optim_conf']['delta_forecast'] = options.get('delta_forecast_daily',params['optim_conf']['delta_forecast']) - params['optim_conf']['load_cost_forecast_method'] = options.get('load_cost_forecast_method',params['optim_conf']['load_cost_forecast_method']) - if options.get('list_set_deferrable_load_single_constant',None) != None: - params['optim_conf']['set_def_constant'] = [i['set_deferrable_load_single_constant'] for i in options.get('list_set_deferrable_load_single_constant')] - if options.get('list_peak_hours_periods_start_hours',None) != None and options.get('list_peak_hours_periods_end_hours',None) != None: - start_hours_list = [i['peak_hours_periods_start_hours'] for i in options['list_peak_hours_periods_start_hours']] - end_hours_list = [i['peak_hours_periods_end_hours'] for i in options['list_peak_hours_periods_end_hours']] + if params["optim_conf"]["weather_forecast_method"] == "solcast": + params["params_secrets"]["solcast_api_key"] = options.get( + "optional_solcast_api_key", + params_secrets.get("solcast_api_key", "123456"), + ) + params["params_secrets"]["solcast_rooftop_id"] = options.get( + "optional_solcast_rooftop_id", + params_secrets.get("solcast_rooftop_id", "123456"), + ) + elif params["optim_conf"]["weather_forecast_method"] == "solar.forecast": + params["params_secrets"]["solar_forecast_kwp"] = options.get( + "optional_solar_forecast_kwp", + params_secrets.get("solar_forecast_kwp", 5), + ) + params["optim_conf"]["load_forecast_method"] = options.get( + "load_forecast_method", params["optim_conf"]["load_forecast_method"] + ) + params["optim_conf"]["delta_forecast"] = options.get( + "delta_forecast_daily", params["optim_conf"]["delta_forecast"] + ) + params["optim_conf"]["load_cost_forecast_method"] = options.get( + "load_cost_forecast_method", + params["optim_conf"]["load_cost_forecast_method"], + ) + if options.get("list_set_deferrable_load_single_constant", None) != None: + params["optim_conf"]["set_def_constant"] = [ + i["set_deferrable_load_single_constant"] + for i in options.get("list_set_deferrable_load_single_constant") + ] + if ( + options.get("list_peak_hours_periods_start_hours", None) != None + and options.get("list_peak_hours_periods_end_hours", None) != None + ): + start_hours_list = [ + i["peak_hours_periods_start_hours"] + for i in options["list_peak_hours_periods_start_hours"] + ] + end_hours_list = [ + i["peak_hours_periods_end_hours"] + for i in options["list_peak_hours_periods_end_hours"] + ] num_peak_hours = len(start_hours_list) list_hp_periods_list = [{'period_hp_'+str(i+1):[{'start':start_hours_list[i]},{'end':end_hours_list[i]}]} for i in range(num_peak_hours)] params['optim_conf']['list_hp_periods'] = list_hp_periods_list @@ -646,20 +929,35 @@ def build_params(params: dict, params_secrets: dict, options: dict, addon: int, for x in range(len(params['optim_conf']['P_deferrable_nom']), params['optim_conf']['num_def_loads']): params['optim_conf']['P_deferrable_nom'].append(0) # days_to_retrieve should be no less then 2 - if params['retrieve_hass_conf']['days_to_retrieve'] < 2: - params['retrieve_hass_conf']['days_to_retrieve'] = 2 - logger.warning("days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history") + if params["retrieve_hass_conf"]["days_to_retrieve"] < 2: + params["retrieve_hass_conf"]["days_to_retrieve"] = 2 + logger.warning( + "days_to_retrieve should not be lower then 2, setting days_to_retrieve to 2. Make sure your sensors also have at least 2 days of history" + ) else: - params['params_secrets'] = params_secrets + params["params_secrets"] = params_secrets # The params dict - params['passed_data'] = {'pv_power_forecast':None,'load_power_forecast':None,'load_cost_forecast':None,'prod_price_forecast':None, - 'prediction_horizon':None,'soc_init':None,'soc_final':None,'def_total_hours':None,'def_start_timestep':None,'def_end_timestep':None,'alpha':None,'beta':None} + params["passed_data"] = { + "pv_power_forecast": None, + "load_power_forecast": None, + "load_cost_forecast": None, + "prod_price_forecast": None, + "prediction_horizon": None, + "soc_init": None, + "soc_final": None, + "def_total_hours": None, + "def_start_timestep": None, + "def_end_timestep": None, + "alpha": None, + "beta": None, + } return params + def get_days_list(days_to_retrieve: int) -> pd.date_range: """ Get list of past days from today to days_to_retrieve. - + :param days_to_retrieve: Total number of days to retrieve from the past :type days_to_retrieve: int :return: The list of days @@ -668,19 +966,20 @@ def get_days_list(days_to_retrieve: int) -> pd.date_range: """ today = datetime.now(timezone.utc).replace(minute=0, second=0, microsecond=0) d = (today - timedelta(days=days_to_retrieve)).isoformat() - days_list = pd.date_range(start=d, end=today.isoformat(), freq='D') - + days_list = pd.date_range(start=d, end=today.isoformat(), freq="D") + return days_list + def set_df_index_freq(df: pd.DataFrame) -> pd.DataFrame: """ Set the freq of a DataFrame DateTimeIndex. - + :param df: Input DataFrame :type df: pd.DataFrame :return: Input DataFrame with freq defined :rtype: pd.DataFrame - + """ idx_diff = np.diff(df.index) sampling = pd.to_timedelta(np.median(idx_diff)) diff --git a/src/emhass/web_server.py b/src/emhass/web_server.py index 2fdd2861..afb5370a 100644 --- a/src/emhass/web_server.py +++ b/src/emhass/web_server.py @@ -13,6 +13,7 @@ from emhass.command_line import set_input_data_dict from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune +from emhass.command_line import regressor_model_fit, regressor_model_predict from emhass.command_line import publish_data from emhass.utils import get_injection_dict, get_injection_dict_forecast_model_fit, \ get_injection_dict_forecast_model_tune, build_params @@ -193,6 +194,22 @@ def action_call(action_name): if not checkFileLog(ActionStr): return make_response(msg, 201) return make_response(grabLog(ActionStr), 400) + elif action_name == 'regressor-model-fit': + ActionStr = " >> Performing a machine learning regressor fit..." + app.logger.info(ActionStr) + regressor_model_fit(input_data_dict, app.logger) + msg = f'EMHASS >> Action regressor-model-fit executed... \n' + if not checkFileLog(ActionStr): + return make_response(msg, 201) + return make_response(grabLog(ActionStr), 400) + elif action_name == 'regressor-model-predict': + ActionStr = " >> Performing a machine learning regressor predict..." + app.logger.info(ActionStr) + regressor_model_predict(input_data_dict, app.logger) + msg = f'EMHASS >> Action regressor-model-predict executed... \n' + if not checkFileLog(ActionStr): + return make_response(msg, 201) + return make_response(grabLog(ActionStr), 400) else: app.logger.error("ERROR: passed action is not valid") msg = f'EMHASS >> ERROR: Passed action is not valid... \n' diff --git a/tests/test_command_line_utils.py b/tests/test_command_line_utils.py index fb775f86..91aa9f00 100644 --- a/tests/test_command_line_utils.py +++ b/tests/test_command_line_utils.py @@ -5,10 +5,21 @@ from unittest.mock import patch import pandas as pd import pathlib, json, yaml, copy +import numpy as np from emhass.command_line import set_input_data_dict -from emhass.command_line import perfect_forecast_optim, dayahead_forecast_optim, naive_mpc_optim -from emhass.command_line import forecast_model_fit, forecast_model_predict, forecast_model_tune +from emhass.command_line import ( + perfect_forecast_optim, + dayahead_forecast_optim, + naive_mpc_optim, +) +from emhass.command_line import ( + forecast_model_fit, + forecast_model_predict, + forecast_model_tune, + regressor_model_fit, + regressor_model_predict, +) from emhass.command_line import publish_data from emhass.command_line import main from emhass import utils @@ -311,13 +322,99 @@ def test_forecast_model_fit_predict_tune(self): self.assertIsInstance(df_pred, pd.Series) self.assertTrue(df_pred.isnull().sum().sum() == 0) # Test the tune method - df_pred_optim, mlf = forecast_model_tune(input_data_dict, logger, debug=True, mlf=mlf) + df_pred_optim, mlf = forecast_model_tune( + input_data_dict, logger, debug=True, mlf=mlf + ) self.assertIsInstance(df_pred_optim, pd.DataFrame) self.assertTrue(mlf.is_tuned == True) - # Test ijection_dict for tune method on webui + # Test injection_dict for tune method on webui injection_dict = utils.get_injection_dict_forecast_model_tune(df_fit_pred, mlf) self.assertIsInstance(injection_dict, dict) - self.assertIsInstance(injection_dict['figure_0'], str) + self.assertIsInstance(injection_dict["figure_0"], str) + + def test_regressor_model_fit_predict(self): + costfun = "profit" + action = "regressor-model-fit" # fit and predict methods + params = TestCommandLineUtils.get_test_params() + runtimeparams = { + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_hours_degreeday", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "mlr_predict_entity_id": "sensor.predicted_hours_test", + "mlr_predict_unit_of_measurement": "h", + "mlr_predict_friendly_name": "Predicted hours", + "new_values": [12.79, 4.766, 1, 2], + } + runtimeparams_json = json.dumps(runtimeparams) + params_json = json.dumps(params) + input_data_dict = set_input_data_dict( + emhass_conf, + costfun, + params_json, + runtimeparams_json, + action, + logger, + get_data_from_file=True, + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["model_type"] + == "heating_hours_degreeday", + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["regression_model"] + == "AdaBoostRegression", + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["csv_file"] + == "heating_prediction.csv", + ) + mlr = regressor_model_fit(input_data_dict, logger, debug=True) + + # def test_regressor_model_predict(self): + costfun = "profit" + action = "regressor-model-predict" # predict methods + params = TestCommandLineUtils.get_test_params() + runtimeparams = { + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_hours_degreeday", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "mlr_predict_entity_id": "sensor.predicted_hours_test", + "mlr_predict_unit_of_measurement": "h", + "mlr_predict_friendly_name": "Predicted hours", + "new_values": [12.79, 4.766, 1, 2], + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params_json = json.dumps(params) + + input_data_dict = set_input_data_dict( + emhass_conf, + costfun, + params_json, + runtimeparams_json, + action, + logger, + get_data_from_file=True, + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["model_type"] + == "heating_hours_degreeday", + ) + self.assertTrue( + input_data_dict["params"]["passed_data"]["mlr_predict_friendly_name"] + == "Predicted hours", + ) + + regressor_model_predict(input_data_dict, logger, debug=True, mlr=mlr) + @patch('sys.argv', ['main', '--action', 'test', '--config', str(emhass_conf['config_path']), '--debug', 'True']) @@ -330,27 +427,30 @@ def test_main_wrong_action(self): def test_main_perfect_forecast_optim(self): opt_res = main() self.assertIsInstance(opt_res, pd.DataFrame) - self.assertTrue(opt_res.isnull().sum().sum()==0) + self.assertTrue(opt_res.isnull().sum().sum() == 0) self.assertIsInstance(opt_res.index, pd.core.indexes.datetimes.DatetimeIndex) - self.assertIsInstance(opt_res.index.dtype, pd.core.dtypes.dtypes.DatetimeTZDtype) - + self.assertIsInstance( + opt_res.index.dtype, + pd.core.dtypes.dtypes.DatetimeTZDtype, + ) + def test_main_dayahead_forecast_optim(self): with patch('sys.argv', ['main', '--action', 'dayahead-optim', '--config', str(emhass_conf['config_path']), '--params', self.params_json, '--runtimeparams', self.runtimeparams_json, '--debug', 'True']): opt_res = main() self.assertIsInstance(opt_res, pd.DataFrame) - self.assertTrue(opt_res.isnull().sum().sum()==0) - + self.assertTrue(opt_res.isnull().sum().sum() == 0) + def test_main_naive_mpc_optim(self): with patch('sys.argv', ['main', '--action', 'naive-mpc-optim', '--config', str(emhass_conf['config_path']), '--params', self.params_json, '--runtimeparams', self.runtimeparams_json, '--debug', 'True']): opt_res = main() self.assertIsInstance(opt_res, pd.DataFrame) - self.assertTrue(opt_res.isnull().sum().sum()==0) - self.assertTrue(len(opt_res)==10) - + self.assertTrue(opt_res.isnull().sum().sum() == 0) + self.assertTrue(len(opt_res) == 10) + def test_main_forecast_model_fit(self): params = copy.deepcopy(json.loads(self.params_json)) runtimeparams = { @@ -381,12 +481,12 @@ def test_main_forecast_model_predict(self): "var_model": "sensor.power_load_no_var_loads", "sklearn_model": "KNeighborsRegressor", "num_lags": 48, - "split_date_delta": '48h', - "perform_backtest": False + "split_date_delta": "48h", + "perform_backtest": False, } runtimeparams_json = json.dumps(runtimeparams) - params['passed_data'] = runtimeparams - params['optim_conf']['load_forecast_method'] = 'skforecast' + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" params_json = json.dumps(params) with patch('sys.argv', ['main', '--action', 'forecast-model-predict', '--config', str(emhass_conf['config_path']), '--params', params_json, '--runtimeparams', runtimeparams_json, @@ -394,7 +494,7 @@ def test_main_forecast_model_predict(self): df_pred = main() self.assertIsInstance(df_pred, pd.Series) self.assertTrue(df_pred.isnull().sum().sum() == 0) - + def test_main_forecast_model_tune(self): params = copy.deepcopy(json.loads(self.params_json)) runtimeparams = { @@ -403,12 +503,12 @@ def test_main_forecast_model_tune(self): "var_model": "sensor.power_load_no_var_loads", "sklearn_model": "KNeighborsRegressor", "num_lags": 48, - "split_date_delta": '48h', - "perform_backtest": False + "split_date_delta": "48h", + "perform_backtest": False, } runtimeparams_json = json.dumps(runtimeparams) - params['passed_data'] = runtimeparams - params['optim_conf']['load_forecast_method'] = 'skforecast' + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" params_json = json.dumps(params) with patch('sys.argv', ['main', '--action', 'forecast-model-tune', '--config', str(emhass_conf['config_path']), '--params', params_json, '--runtimeparams', runtimeparams_json, @@ -416,6 +516,74 @@ def test_main_forecast_model_tune(self): df_pred_optim, mlf = main() self.assertIsInstance(df_pred_optim, pd.DataFrame) self.assertTrue(mlf.is_tuned == True) + + def test_main_regressor_model_fit(self): + params = copy.deepcopy(json.loads(self.params_json)) + runtimeparams = { + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_hours_degreeday", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params_json = json.dumps(params) + with patch( + "sys.argv", + [ + "main", + "--action", + "regressor-model-fit", + "--config", + str(emhass_conf["config_path"]), + "--params", + params_json, + "--runtimeparams", + runtimeparams_json, + "--debug", + "True", + ], + ): + mlr = main() + + def test_main_regressor_model_predict(self): + params = copy.deepcopy(json.loads(self.params_json)) + runtimeparams = { + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_hours_degreeday", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "new_values": [12.79, 4.766, 1, 2], + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" + params_json = json.dumps(params) + with patch( + "sys.argv", + [ + "main", + "--action", + "regressor-model-predict", + "--config", + str(emhass_conf["config_path"]), + "--params", + params_json, + "--runtimeparams", + runtimeparams_json, + "--debug", + "True", + ], + ): + prediction = main() + self.assertIsInstance(prediction, np.ndarray) + @patch('sys.argv', ['main', '--action', 'publish-data', '--config', str(emhass_conf['config_path']), '--debug', 'True']) diff --git a/tests/test_machine_learning_regressor.py b/tests/test_machine_learning_regressor.py new file mode 100644 index 00000000..0d40ac0b --- /dev/null +++ b/tests/test_machine_learning_regressor.py @@ -0,0 +1,111 @@ +"""Machine learning regressor test module.""" + +import copy +import json +import pathlib +import unittest + +import numpy as np +import pandas as pd +import yaml +from emhass import utils +from emhass.command_line import set_input_data_dict +from emhass.machine_learning_regressor import MLRegressor +from sklearn.pipeline import Pipeline + +# the root folder +root = str(utils.get_root(__file__, num_parent=2)) +emhass_conf = {} +emhass_conf["config_path"] = pathlib.Path(root) / "config_emhass.yaml" +emhass_conf["data_path"] = pathlib.Path(root) / "data/" +emhass_conf["root_path"] = pathlib.Path(root) +# create logger +logger, ch = utils.get_logger(__name__, emhass_conf, save_to_file=False) + + +class TestMLRegressor(unittest.TestCase): + @staticmethod + def get_test_params(): + with open(emhass_conf["config_path"]) as file: + params = yaml.safe_load(file) + params.update( + { + "params_secrets": { + "hass_url": "http://supervisor/core/api", + "long_lived_token": "${SUPERVISOR_TOKEN}", + "time_zone": "Europe/Paris", + "lat": 45.83, + "lon": 6.86, + "alt": 8000.0, + }, + }, + ) + return params + + def setUp(self): + params = TestMLRegressor.get_test_params() + params_json = json.dumps(params) + costfun = "profit" + action = "regressor-model-fit" # fit and predict methods + params = copy.deepcopy(json.loads(params_json)) + runtimeparams = { + "csv_file": "heating_prediction.csv", + "features": ["degreeday", "solar"], + "target": "hour", + "regression_model": "AdaBoostRegression", + "model_type": "heating_hours_degreeday", + "timestamp": "timestamp", + "date_features": ["month", "day_of_week"], + "new_values": [12.79, 4.766, 1, 2] + } + runtimeparams_json = json.dumps(runtimeparams) + params["passed_data"] = runtimeparams + params["optim_conf"]["load_forecast_method"] = "skforecast" + params_json = json.dumps(params) + self.input_data_dict = set_input_data_dict( + emhass_conf, + costfun, + params_json, + runtimeparams_json, + action, + logger, + get_data_from_file=True, + ) + data = copy.deepcopy(self.input_data_dict["df_input_data"]) + self.assertIsInstance(data, pd.DataFrame) + self.csv_file = self.input_data_dict["params"]["passed_data"]["csv_file"] + features = self.input_data_dict["params"]["passed_data"]["features"] + target = self.input_data_dict["params"]["passed_data"]["target"] + regression_model = self.input_data_dict["params"]["passed_data"][ + "regression_model" + ] + model_type = self.input_data_dict["params"]["passed_data"]["model_type"] + timestamp = self.input_data_dict["params"]["passed_data"]["timestamp"] + self.date_features = self.input_data_dict["params"]["passed_data"][ + "date_features" + ] + self.new_values = self.input_data_dict["params"]["passed_data"]["new_values"] + self.mlr = MLRegressor( + data, + model_type, + regression_model, + features, + target, + timestamp, + logger, + ) + + def test_fit(self): + self.mlr.fit(self.date_features) + self.assertIsInstance(self.mlr.model, Pipeline) + + def test_predict(self): + self.mlr.fit(self.date_features) + predictions = self.mlr.predict(self.new_values) + self.assertIsInstance(predictions, np.ndarray) + + +if __name__ == "__main__": + unittest.main() + ch.close() + logger.removeHandler(ch)