Skip to content

Commit

Permalink
Feat - Prepared a new version release
Browse files Browse the repository at this point in the history
  • Loading branch information
davidusb-geek committed Oct 19, 2023
1 parent c49ffb5 commit bec6f76
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 2 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## [0.5.1] - 2023-10-19
### Improvement
- Improved documentation, thanks to @g1za
- Bumped skforecast to 0.10.1
- Added a new initial script for exploration of time series clustering. This will one day replace the need to configure the house load sensor with substracted deferrable load consumption
### Fix
- Updated automated tesing, dropped support for Python 3.8

## [0.5.0] - 2023-09-03
### Improvement
- Finally added support for ingress thanks to the work from @siku2
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
author = 'David HERNANDEZ'

# The full version, including alpha/beta/rc tags
release = '0.5.0'
release = '0.5.1'

# -- General configuration ---------------------------------------------------

Expand Down
117 changes: 117 additions & 0 deletions scripts/load_clustering_stumpy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
import pathlib, pickle, copy, time
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'browser'
pd.options.plotting.backend = "plotly"

from emhass.retrieve_hass import retrieve_hass
from emhass.forecast import forecast
from emhass.utils import get_root, get_yaml_parse, get_days_list, get_logger

from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, silhouette_score

from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import bayesian_search_forecaster
from skforecast.model_selection import backtesting_forecaster
from skforecast.utils import save_forecaster
from skforecast.utils import load_forecaster
from skopt.space import Categorical, Real, Integer


# the root folder
root = str(get_root(__file__, num_parent=2))
# create logger
logger, ch = get_logger(__name__, root, save_to_file=True)


if __name__ == '__main__':

days_to_retrieve = 240
model_type = "load_clustering"
var_model = "sensor.power_load_positive"

data_path = pathlib.Path(root+'/data/data_train_'+model_type+'.pkl')
params = None
template = 'presentation'

if data_path.is_file():
logger.info("Loading a previous data file")
with open(data_path, "rb") as fid:
data, var_model = pickle.load(fid)
else:
logger.info("Using EMHASS methods to retrieve the new forecast model train data")
retrieve_hass_conf, _, _ = get_yaml_parse(pathlib.Path(root+'/config_emhass.yaml'), use_secrets=True)
rh = retrieve_hass(retrieve_hass_conf['hass_url'], retrieve_hass_conf['long_lived_token'],
retrieve_hass_conf['freq'], retrieve_hass_conf['time_zone'],
params, root, logger, get_data_from_file=False)

days_list = get_days_list(days_to_retrieve)
var_list = [var_model]
rh.get_data(days_list, var_list)

with open(data_path, 'wb') as fid:
pickle.dump((rh.df_final, var_model), fid, pickle.HIGHEST_PROTOCOL)

data = copy.deepcopy(rh.df_final)

logger.info(data.describe())

# Plot the input data
fig = data.plot()
fig.layout.template = template
fig.update_yaxes(title_text = "Power (W)")
fig.update_xaxes(title_text = "Time")
fig.show()

data_lag = pd.concat([data, data.shift()], axis=1)
data_lag.columns = ['power_load y(t)', 'power_load y(t+1)']
data_lag = data_lag.dropna()

fig2 = data_lag.plot.scatter(x='power_load y(t)', y='power_load y(t+1)', c='DarkBlue')
fig2.layout.template = template
fig2.show()

# Elbow method to check how many clusters
# distortions = []
# K = range(1,12)

# for cluster_size in K:
# kmeans = KMeans(n_clusters=cluster_size, init='k-means++')
# kmeans = kmeans.fit(data_lag)
# distortions.append(kmeans.inertia_)

# df = pd.DataFrame({'Clusters': K, 'Distortions': distortions})
# fig = (px.line(df, x='Clusters', y='Distortions', template=template)).update_traces(mode='lines+markers')
# fig.show()

# The silouhette metod
silhouette_scores = []
K = range(2,12)

for cluster_size in K:
kmeans = KMeans(n_clusters=cluster_size, init='k-means++', random_state=200)
labels = kmeans.fit(data_lag).labels_
silhouette_score_tmp = silhouette_score(data_lag, labels, metric='euclidean',
sample_size=1000, random_state=200)
silhouette_scores.append(silhouette_score_tmp)

df = pd.DataFrame({'Clusters': K, 'Silhouette Score': silhouette_scores})
fig = (px.line(df, x='Clusters', y='Silhouette Score', template=template)).update_traces(mode='lines+markers')
fig.show()

# The clustering
kmeans = KMeans(n_clusters=6, init='k-means++')
kmeans = kmeans.fit(data_lag)
data['cluster_group'] = kmeans.labels_

fig = px.scatter(data, x='power_load y(t)', y='power_load y(t+1)', color='cluster_group', template=template)
fig.show()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

setup(
name='emhass', # Required
version='0.5.0', # Required
version='0.5.1', # Required
description='An Energy Management System for Home Assistant', # Optional
long_description=long_description, # Optional
long_description_content_type='text/markdown', # Optional (see note above)
Expand Down

0 comments on commit bec6f76

Please sign in to comment.