Skip to content

Commit

Permalink
Merge pull request #55 from mindsdb/staging
Browse files Browse the repository at this point in the history
Release 24.5.1.0
  • Loading branch information
paxcema authored May 13, 2024
2 parents 257e3c6 + 5a73ea3 commit e30f30d
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 2 deletions.
2 changes: 1 addition & 1 deletion dataprep_ml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataprep_ml.base import StatisticalAnalysis, DataAnalysis

__version__ = '0.0.22'
__version__ = '24.5.1.0'
__name__ = "dataprep_ml"


Expand Down
4 changes: 4 additions & 0 deletions dataprep_ml/cleaners.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,10 @@ def clean_timeseries(df: pd.DataFrame, tss: dict) -> pd.DataFrame:

# save original order of columns
orig_cols = deepcopy(df.columns.to_list())

# cast order_by as numerical
df[tss['order_by']] = pd.to_numeric(df[tss['order_by']], errors='raise')

# fix duplicates by group
if tss.get('group_by', False):
correct_dfs = []
Expand Down
7 changes: 7 additions & 0 deletions dataprep_ml/splitters.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ def splitter(
else:
train, dev, test = simple_split(data, pct_train, pct_dev, pct_test)

# Final assertions for time series
if min(len(train), len(dev)) < tss.get('window', 1):
raise Exception(f"Dataset size is too small for the specified window size ({tss.get('window', 1)})")

if min(len(train), len(dev), len(test)) < tss.get('horizon', 1):
raise Exception(f"Dataset size is too small for the specified horizon size ({tss.get('horizon', 1)})")

return {"train": train, "test": test, "dev": dev, "stratified_on": stratify_on}


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dataprep-ml"
version = "0.0.23"
version = "24.5.1.0"
description = "Automated dataframe analysis for Machine Learning pipelines."
authors = ["MindsDB Inc. <[email protected]>"]
license = "GPL-3.0"
Expand Down

0 comments on commit e30f30d

Please sign in to comment.