From 52f3ac3a0068d0838d1ce8f9afbeef80572413e4 Mon Sep 17 00:00:00 2001 From: liangs6212 <80952198+liangs6212@users.noreply.github.com> Date: Thu, 1 Sep 2022 08:49:48 +0800 Subject: [PATCH] Chronos: Remove UTs for all `Deprecated` API and `tf1` (#5530) * delete deprecated UTs * remove tf1 UTs --- python/chronos/dev/test/run-pytests-tf1.sh | 39 -- .../chronos/autots/deprecated/__init__.py | 18 - .../autots/deprecated/config/__init__.py | 15 - .../autots/deprecated/config/test_recipe.py | 73 --- .../chronos/autots/deprecated/conftest.py | 42 -- .../autots/deprecated/feature/__init__.py | 15 - .../feature/test_time_sequence_feature.py | 541 ------------------ .../autots/deprecated/pipeline/__init__.py | 15 - .../autots/deprecated/pipeline/conftest.py | 42 -- .../deprecated/pipeline/test_time_sequence.py | 296 ---------- .../deprecated/preprocessing/__init__.py | 15 - .../deprecated/preprocessing/test_fill.py | 71 --- .../deprecated/preprocessing/test_util.py | 50 -- .../autots/deprecated/regression/__init__.py | 15 - .../autots/deprecated/regression/conftest.py | 42 -- .../test_time_sequence_predictor.py | 111 ---- .../chronos/autots/deprecated/test_auto_ts.py | 142 ----- .../test/bigdl/chronos/model/tf1/__init__.py | 15 - .../chronos/model/tf1/test_Seq2Seq_keras.py | 236 -------- .../model/tf1/test_VanillaLSTM_keras.py | 105 ---- .../chronos/model/tf1/test_mtnet_keras.py | 116 ---- 21 files changed, 2014 deletions(-) delete mode 100755 python/chronos/dev/test/run-pytests-tf1.sh delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/config/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/config/test_recipe.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/conftest.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/feature/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/feature/test_time_sequence_feature.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/conftest.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/test_time_sequence.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_fill.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_util.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/regression/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/regression/conftest.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/regression/test_time_sequence_predictor.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/test_auto_ts.py delete mode 100644 python/chronos/test/bigdl/chronos/model/tf1/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/model/tf1/test_Seq2Seq_keras.py delete mode 100644 python/chronos/test/bigdl/chronos/model/tf1/test_VanillaLSTM_keras.py delete mode 100644 python/chronos/test/bigdl/chronos/model/tf1/test_mtnet_keras.py diff --git a/python/chronos/dev/test/run-pytests-tf1.sh b/python/chronos/dev/test/run-pytests-tf1.sh deleted file mode 100755 index 545318ff1f3..00000000000 --- a/python/chronos/dev/test/run-pytests-tf1.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash - -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -cd "`dirname $0`" -cd ../.. - -export PYSPARK_PYTHON=python -export PYSPARK_DRIVER_PYTHON=python -if [ -z "${OMP_NUM_THREADS}" ]; then - export OMP_NUM_THREADS=1 -fi - -ray stop -f - -echo "Running chronos tests TF1 and Deprecated API" -python -m pytest -v test/bigdl/chronos/model/tf1 - -exit_status_0=$? -if [ $exit_status_0 -ne 0 ]; -then - exit $exit_status_0 -fi - -ray stop -f diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/__init__.py deleted file mode 100644 index eb5543fce91..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pytest - -skip_method = pytest.mark.skip(reason="We will remove deprecated API.") diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/config/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/config/__init__.py deleted file mode 100644 index 2151a805423..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/config/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/config/test_recipe.py b/python/chronos/test/bigdl/chronos/autots/deprecated/config/test_recipe.py deleted file mode 100644 index 6cb31bf5818..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/config/test_recipe.py +++ /dev/null @@ -1,73 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pytest - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.config.recipe import * -from .. import skip_method - - -@skip_method -class TestTimeSequencePredictor(ZooTestCase): - - def setup_method(self, method): - pass - - def teardown_method(self, method): - pass - - def test_SmokeRecipe(self): - recipe = SmokeRecipe() - assert recipe.num_samples == 1 - assert recipe.training_iteration == 1 - assert recipe.reward_metric is None - - def test_MTNetSmokeRecipe(self): - recipe = MTNetSmokeRecipe() - assert recipe.num_samples == 1 - assert recipe.training_iteration == 1 - assert recipe.reward_metric is None - - def test_GridRandomRecipe(self): - recipe = GridRandomRecipe(num_rand_samples=100) - search_space = recipe.search_space() - assert search_space is not None - - def test_RandomRecipe(self): - recipe = GridRandomRecipe(num_rand_samples=100) - search_space = recipe.search_space() - assert search_space is not None - - def test_LSTMGridRandomRecipe(self): - recipe = LSTMGridRandomRecipe(num_rand_samples=100) - search_space = recipe.search_space() - assert search_space is not None - - def test_MTNetGridRandomRecipe(self): - recipe = MTNetGridRandomRecipe() - search_space = recipe.search_space() - assert search_space is not None - - def test_BayesRecipe(self): - recipe = BayesRecipe(num_samples=10) - search_space = recipe.search_space() - assert search_space is not None - assert recipe.reward_metric is not None - - -if __name__ == '__main__': - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/conftest.py b/python/chronos/test/bigdl/chronos/autots/deprecated/conftest.py deleted file mode 100644 index f5654719ca6..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/conftest.py +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pytest -sc = None -ray_ctx = None - - -@pytest.fixture(autouse=False, scope='class') -def init_ray_context_fixture(): - from bigdl.dllib.nncontext import init_spark_on_local - from bigdl.orca.ray import OrcaRayContext - sc = init_spark_on_local(cores=4, spark_log_level="INFO") - ray_ctx = OrcaRayContext(sc=sc, object_store_memory="1g") - ray_ctx.init() - yield - ray_ctx.stop() - sc.stop() - - -# @pytest.fixture() -# def setUpModule(): -# sc = init_spark_on_local(cores=4, spark_log_level="INFO") -# ray_ctx = RayContext(sc=sc) -# ray_ctx.init() -# -# -# def tearDownModule(): -# ray_ctx.stop() -# sc.stop() diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/feature/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/feature/__init__.py deleted file mode 100644 index 2151a805423..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/feature/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/feature/test_time_sequence_feature.py b/python/chronos/test/bigdl/chronos/autots/deprecated/feature/test_time_sequence_feature.py deleted file mode 100644 index 5a3bb812331..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/feature/test_time_sequence_feature.py +++ /dev/null @@ -1,541 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pytest - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.feature.utils import save, restore -from bigdl.chronos.autots.deprecated.feature.time_sequence import * -from numpy.testing import assert_array_almost_equal -import json -import tempfile -import shutil - -from .. import skip_method -from bigdl.chronos.autots.deprecated.preprocessing.utils import train_val_test_split - - -@skip_method -class TestTimeSequenceFeature(ZooTestCase): - def setup_method(self, method): - pass - - def teardown_method(self, method): - pass - - def test_get_feature_list(self): - dates = pd.date_range('1/1/2019', periods=8) - data = np.random.randn(8, 3) - df = pd.DataFrame({"datetime": dates, "values": data[:, 0], - "A": data[:, 1], "B": data[:, 2]}) - feat = TimeSequenceFeatureTransformer(dt_col="datetime", - target_col="values", - extra_features_col=["A", "B"], - drop_missing=True) - feature_list = feat.get_feature_list() - assert set(feature_list) == {'IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - 'DAY(datetime)', - 'IS_WEEKEND(datetime)', - 'WEEKDAY(datetime)', - 'MONTH(datetime)', - 'DAYOFYEAR(datetime)', - 'WEEKOFYEAR(datetime)', - 'MINUTE(datetime)', - 'A', - 'B'} - - feat = TimeSequenceFeatureTransformer(dt_col="datetime", - target_col="values", - extra_features_col=["A", "B"], - drop_missing=True, - time_features=False) - feature_list = feat.get_feature_list() - assert set(feature_list) == {'A', 'B'} - - def test_fit_transform(self): - sample_num = 8 - past_seq_len = 2 - dates = pd.date_range('1/1/2019', periods=sample_num) - data = np.random.randn(sample_num, 3) - df = pd.DataFrame({"datetime": dates, "values": data[:, 0], - "A": data[:, 1], "B": data[:, 2]}) - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - 'A']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", drop_missing=True) - x, y = feat.fit_transform(df, **config) - assert x.shape == (sample_num - past_seq_len, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - assert y.shape == (sample_num - past_seq_len, 1) - assert np.mean(np.concatenate((x[0, :, 0], y[:, 0]), axis=None)) < 1e-5 - - def test_fit_transform_df_list(self): - sample_num = 8 - past_seq_len = 2 - dates = pd.date_range('1/1/2019', periods=sample_num) - data = np.random.randn(sample_num, 3) - df = pd.DataFrame({"datetime": dates, "values": data[:, 0], - "A": data[:, 1], "B": data[:, 2]}) - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - 'A']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", drop_missing=True) - - df_list = [df] * 3 - x, y = feat.fit_transform(df_list, **config) - single_result_len = sample_num - past_seq_len - assert x.shape == (single_result_len * 3, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - assert y.shape == (single_result_len * 3, 1) - assert_array_almost_equal(x[:single_result_len], - x[single_result_len: 2 * single_result_len], decimal=2) - assert_array_almost_equal(x[:single_result_len], x[2 * single_result_len:], decimal=2) - assert_array_almost_equal(y[:single_result_len], - y[single_result_len: 2 * single_result_len], - decimal=2) - assert_array_almost_equal(y[:single_result_len], y[2 * single_result_len:], decimal=2) - - assert np.mean(np.concatenate((x[0, :, 0], y[:single_result_len, 0]), axis=None)) < 1e-5 - - def test_fit_transform_input_datetime(self): - # if the type of input datetime is not datetime64, raise an error - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - df = pd.DataFrame({"datetime": dates.strftime('%m/%d/%Y'), "values": values}) - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": 2} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", drop_missing=True) - - with pytest.raises(RuntimeError) as excinfo: - feat.fit_transform(df, **config) - assert 'np.datetime64' in str(excinfo.value) - - # if there is NaT in datetime, raise an error - df.loc[1, "datetime"] = None - with pytest.raises(RuntimeError, match=r".* datetime .*"): - feat.fit_transform(df, **config) - - def test_input_data_len(self): - sample_num = 100 - past_seq_len = 20 - dates = pd.date_range('1/1/2019', periods=sample_num) - values = np.random.randn(sample_num) - df = pd.DataFrame({"datetime": dates, "values": values}) - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - train_df, val_df, test_df = train_val_test_split(df, - val_ratio=0.1, - test_ratio=0.1, - look_back=10) - - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", drop_missing=True) - with pytest.raises(RuntimeError, match=r".*past sequence length.*"): - feat.fit_transform(train_df[:20], **config) - - feat.fit_transform(train_df, **config) - with pytest.raises(RuntimeError, match=r".*past sequence length.*"): - feat.transform(val_df, is_train=True) - - with pytest.raises(RuntimeError, match=r".*past sequence length.*"): - feat.transform(test_df[:-1], is_train=False) - out_x, out_y = feat.transform(test_df, is_train=False) - assert len(out_x) == 1 - assert out_y is None - - def test_fit_transform_input_data(self): - # if there is NaN in data other than datetime, drop the training sample. - num_samples = 8 - dates = pd.date_range('1/1/2019', periods=num_samples) - values = np.random.randn(num_samples) - df = pd.DataFrame({"datetime": dates, "values": values}) - df.loc[2, "values"] = None - past_seq_len = 2 - - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", drop_missing=True) - - x, y = feat.fit_transform(df, **config) - # mask_x = [1, 0, 0, 1, 1, 1] - # mask_y = [0, 1, 1, 1, 1, 1] - # mask = [0, 0, 0, 1, 1, 1] - assert x.shape == (3, past_seq_len, len(json.loads(config["selected_features"])) + 1) - assert y.shape == (3, 1) - - def test_transform_train_true(self): - num_samples = 16 - dates = pd.date_range('1/1/2019', periods=num_samples) - values = np.random.randn(num_samples, 2) - df = pd.DataFrame({"datetime": dates, "values": values[:, 0], "feature_1": values[:, 1]}) - train_sample_num = 10 - train_df = df[:train_sample_num] - val_df = df[train_sample_num:] - past_seq_len = 2 - - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - "feature_1"]), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", - extra_features_col="feature_1", - drop_missing=True) - - feat.fit_transform(train_df, **config) - val_x, val_y = feat.transform(val_df, is_train=True) - assert val_x.shape == (val_df.shape[0] - past_seq_len, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - assert val_y.shape == (val_df.shape[0] - past_seq_len, 1) - - def test_transform_train_true_df_list(self): - num_samples = 16 - dates = pd.date_range('1/1/2019', periods=num_samples) - values = np.random.randn(num_samples, 2) - df = pd.DataFrame({"datetime": dates, "values": values[:, 0], "feature_1": values[:, 1]}) - train_sample_num = 10 - train_df = df[:train_sample_num] - val_df = df[train_sample_num:] - past_seq_len = 2 - - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - "feature_1"]), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", - extra_features_col="feature_1", - drop_missing=True) - - train_df_list = [train_df] * 3 - feat.fit_transform(train_df_list, **config) - val_df_list = [val_df] * 3 - val_x, val_y = feat.transform(val_df_list, is_train=True) - single_result_len = val_df.shape[0] - past_seq_len - assert val_x.shape == (single_result_len * 3, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - assert val_y.shape == (single_result_len * 3, 1) - - def test_transform_train_false(self): - num_samples = 16 - dates = pd.date_range('1/1/2019', periods=num_samples) - values = np.random.randn(num_samples, 2) - df = pd.DataFrame({"datetime": dates, "values": values[:, 0], "feature_1": values[:, 1]}) - train_sample_num = 10 - train_df = df[:train_sample_num] - test_df = df[train_sample_num:] - past_seq_len = 2 - - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - "feature_1"]), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", - extra_features_col="feature_1", - drop_missing=True) - feat.fit_transform(train_df, **config) - test_x, _ = feat.transform(test_df, is_train=False) - assert test_x.shape == (test_df.shape[0] - past_seq_len + 1, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - - def test_transform_train_false_df_list(self): - num_samples = 16 - dates = pd.date_range('1/1/2019', periods=num_samples) - values = np.random.randn(num_samples, 2) - df = pd.DataFrame({"datetime": dates, "values": values[:, 0], "feature_1": values[:, 1]}) - train_sample_num = 10 - train_df = df[:train_sample_num] - test_df = df[train_sample_num:] - past_seq_len = 2 - - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - "feature_1"]), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", - extra_features_col="feature_1", - drop_missing=True) - train_df_list = [train_df] * 3 - feat.fit_transform(train_df_list, **config) - test_df_list = [test_df] * 3 - test_x, _ = feat.transform(test_df_list, is_train=False) - assert test_x.shape == ((test_df.shape[0] - past_seq_len + 1) * 3, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - - def test_save_restore(self): - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - df = pd.DataFrame({"dt": dates, "v": values}) - - future_seq_len = 2 - dt_col = "dt" - target_col = "v" - drop_missing = True - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, - dt_col=dt_col, - target_col=target_col, - drop_missing=drop_missing) - - feature_list = feat.get_feature_list() - config = {"selected_features": json.dumps(feature_list), - "past_seq_len": 2 - } - - train_x, train_y = feat.fit_transform(df, **config) - - dirname = tempfile.mkdtemp(prefix="automl_test_feature") - try: - save(dirname, feature_transformers=feat) - new_ft = TimeSequenceFeatureTransformer() - restore(dirname, feature_transformers=new_ft, config=config) - - assert new_ft.future_seq_len == future_seq_len - assert new_ft.dt_col == dt_col - assert new_ft.target_col[0] == target_col - assert new_ft.extra_features_col is None - assert new_ft.drop_missing == drop_missing - - test_x, _ = new_ft.transform(df[:-future_seq_len], is_train=False) - - assert_array_almost_equal(test_x, train_x, decimal=2) - - finally: - shutil.rmtree(dirname) - - def test_post_processing_train(self): - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 1 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - - train_x, train_y = feat.fit_transform(df, **config) - y_unscale, y_unscale_1 = feat.post_processing(df, train_y, is_train=True) - y_input = df[past_seq_len:][[value_col]].values - msg = "y_unscale is {}, y_unscale_1 is {}".format(y_unscale, y_unscale_1) - assert_array_almost_equal(y_unscale, y_unscale_1, decimal=2), msg - msg = "y_unscale is {}, y_input is {}".format(y_unscale, y_input) - assert_array_almost_equal(y_unscale, y_input, decimal=2), msg - - def test_post_processing_train_df_list(self): - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 1 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - df_list = [df] * 3 - train_x, train_y = feat.fit_transform(df_list, **config) - y_unscale, y_unscale_1 = feat.post_processing(df_list, train_y, is_train=True) - y_input = df[past_seq_len:][[value_col]].values - target_y = np.concatenate([y_input] * 3) - msg = "y_unscale is {}, y_unscale_1 is {}".format(y_unscale, y_unscale_1) - assert_array_almost_equal(y_unscale, y_unscale_1, decimal=2), msg - msg = "y_unscale is {}, y_input is {}".format(y_unscale, target_y) - assert_array_almost_equal(y_unscale, target_y, decimal=2), msg - - def test_post_processing_test_1(self): - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 1 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - - train_x, train_y = feat.fit_transform(df, **config) - - dirname = tempfile.mkdtemp(prefix="automl_test_feature_") - try: - save(dirname, feature_transformers=feat) - new_ft = TimeSequenceFeatureTransformer() - restore(dirname, feature_transformers=new_ft, config=config) - - test_df = df[:-future_seq_len] - new_ft.transform(test_df, is_train=False) - output_value_df = new_ft.post_processing(test_df, train_y, is_train=False) - - # train_y is generated from df[past_seq_len:] - target_df = df[past_seq_len:].copy().reset_index(drop=True) - - assert output_value_df[dt_col].equals(target_df[dt_col]) - assert_array_almost_equal(output_value_df[value_col].values, - target_df[value_col].values, decimal=2) - - finally: - shutil.rmtree(dirname) - - def test_post_processing_test_df_list(self): - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 1 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - df_list = [df] * 3 - train_x, train_y = feat.fit_transform(df_list, **config) - - dirname = tempfile.mkdtemp(prefix="automl_test_feature_") - try: - save(dirname, feature_transformers=feat) - new_ft = TimeSequenceFeatureTransformer() - restore(dirname, feature_transformers=new_ft, config=config) - - test_df = df[:-future_seq_len] - test_df_list = [test_df] * 3 - new_ft.transform(test_df_list, is_train=False) - output_value_df_list = new_ft.post_processing(test_df_list, train_y, is_train=False) - - # train_y is generated from df[past_seq_len:] - target_df = df[past_seq_len:].copy().reset_index(drop=True) - - assert output_value_df_list[0].equals(output_value_df_list[1]) - assert output_value_df_list[0].equals(output_value_df_list[2]) - assert output_value_df_list[0][dt_col].equals(target_df[dt_col]) - assert_array_almost_equal(output_value_df_list[0][value_col].values, - target_df[value_col].values, decimal=2) - - finally: - shutil.rmtree(dirname) - - def test_post_processing_test_2(self): - sample_num = 8 - dates = pd.date_range('1/1/2019', periods=sample_num) - values = np.random.randn(sample_num) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 2 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - - train_x, train_y = feat.fit_transform(df, **config) - - dirname = tempfile.mkdtemp(prefix="automl_test_feature_") - try: - save(dirname, feature_transformers=feat) - new_ft = TimeSequenceFeatureTransformer() - restore(dirname, feature_transformers=new_ft, config=config) - - test_df = df[:-future_seq_len] - new_ft.transform(test_df, is_train=False) - output_value_df = new_ft.post_processing(test_df, train_y, is_train=False) - assert output_value_df.shape == (sample_num - past_seq_len - future_seq_len + 1, - future_seq_len + 1) - - columns = ["{}_{}".format(value_col, i) for i in range(future_seq_len)] - output_value = output_value_df[columns].values - target_df = df[past_seq_len:].copy().reset_index(drop=True) - target_value = feat._roll_test(target_df["values"], future_seq_len) - - assert output_value_df[dt_col].equals(target_df[:-future_seq_len + 1][dt_col]) - msg = "output_value is {}, target_value is {}".format(output_value, target_value) - assert_array_almost_equal(output_value, target_value, decimal=2), msg - - finally: - shutil.rmtree(dirname) - - def test_future_time_validation(self): - sample_num = 8 - dates = pd.date_range('1/1/2100', periods=sample_num) - values = np.random.randn(sample_num) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 1 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - x, y = feat.fit_transform(df, **config) - assert x.shape == (sample_num - past_seq_len, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - assert y.shape == (sample_num - past_seq_len, 1) - - -if __name__ == "__main__": - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/__init__.py deleted file mode 100644 index 2151a805423..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/conftest.py b/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/conftest.py deleted file mode 100644 index fff8c5a29b9..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/conftest.py +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pytest -sc = None -ray_ctx = None - - -@pytest.fixture(autouse=True, scope='function') -def automl_fixture(): - from bigdl.dllib.nncontext import init_spark_on_local - from bigdl.orca.ray import OrcaRayContext - sc = init_spark_on_local(cores=4, spark_log_level="INFO") - ray_ctx = OrcaRayContext(sc=sc, object_store_memory="1g") - ray_ctx.init() - yield - ray_ctx.stop() - sc.stop() - - -# @pytest.fixture() -# def setUpModule(): -# sc = init_spark_on_local(cores=4, spark_log_level="INFO") -# ray_ctx = RayContext(sc=sc) -# ray_ctx.init() -# -# -# def tearDownModule(): -# ray_ctx.stop() -# sc.stop() diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/test_time_sequence.py b/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/test_time_sequence.py deleted file mode 100644 index 29100f4cd09..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/test_time_sequence.py +++ /dev/null @@ -1,296 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import shutil -import tempfile - -import pytest - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.pipeline.time_sequence import load_ts_pipeline -from bigdl.chronos.autots.deprecated.regression.time_sequence_predictor import TimeSequencePredictor -from bigdl.chronos.autots.deprecated.feature.time_sequence import TimeSequenceFeatureTransformer -from bigdl.chronos.autots.deprecated.model.time_sequence import TimeSequenceModel -from bigdl.orca.automl.metrics import Evaluator -from bigdl.chronos.autots.deprecated.config.recipe import * - -import numpy as np -import pandas as pd -from numpy.testing import assert_array_almost_equal -import os -from .. import skip_method - -default_past_seq_len = 2 - - -@skip_method -class TestTimeSequencePipeline(ZooTestCase): - - def setup_method(self, method): - pass - - def teardown_method(self, method): - """ - Teardown any state that was previously setup with a setup_method call. - """ - pass - - def get_input_tsp(self, future_seq_len, target_col): - sample_num = np.random.randint(100, 200) - test_sample_num = np.random.randint(20, 30) - if isinstance(target_col, str): - train_df = pd.DataFrame({"datetime": pd.date_range('1/1/2019', - periods=sample_num), - target_col: np.random.randn(sample_num)}) - test_df = pd.DataFrame({"datetime": pd.date_range('1/1/2019', - periods=test_sample_num), - target_col: np.random.randn(test_sample_num)}) - else: - train_df = pd.DataFrame({t: np.random.randn(sample_num) for t in target_col}) - train_df["datetime"] = pd.date_range('1/1/2019', periods=sample_num) - test_df = pd.DataFrame({t: np.random.randn(test_sample_num) for t in target_col}) - test_df["datetime"] = pd.date_range('1/1/2019', periods=test_sample_num) - tsp = TimeSequencePredictor(dt_col="datetime", - target_col=target_col, - future_seq_len=future_seq_len, - extra_features_col=None, ) - return train_df, test_df, tsp, test_sample_num - - # def test_evaluate_predict_future_equal_1(self): - # target_col = "values" - # metrics = ["mse", "r2"] - # future_seq_len = 1 - # train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - # pipeline = tsp.fit(train_df, test_df) - # mse, rs = pipeline.evaluate(test_df, metrics=metrics) - # assert isinstance(mse, np.float) - # assert isinstance(rs, np.float) - # y_pred = pipeline.predict(test_df) - # assert y_pred.shape == (test_sample_num - default_past_seq_len + 1, - # future_seq_len + 1) - # - # y_pred_df = pipeline.predict(test_df[:-future_seq_len]) - # y_df = test_df[default_past_seq_len:] - # - # mse_pred_eval, rs_pred_eval = [Evaluator.evaluate(m, - # y_df[target_col].values, - # y_pred_df[target_col].values) - # for m in metrics] - # mse_eval, rs_eval = pipeline.evaluate(test_df, metrics) - # assert mse_pred_eval == mse_eval - # assert rs_pred_eval == rs_eval - - def test_save_restore_future_equal_1(self): - target_col = "values" - metrics = ["mse", "r2"] - future_seq_len = 1 - train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - pipeline = tsp.fit(train_df, test_df) - y_pred = pipeline.predict(test_df) - mse, rs = pipeline.evaluate(test_df, metrics=metrics) - print("Evaluation result: Mean square error is: {}, R square is: {}.".format(mse, rs)) - - dirname = tempfile.mkdtemp(prefix="saved_pipeline") - try: - save_pipeline_file = os.path.join(dirname, "my.ppl") - pipeline.save(save_pipeline_file) - assert os.path.isfile(save_pipeline_file) - new_pipeline = load_ts_pipeline(save_pipeline_file) - assert new_pipeline.config is not None - assert isinstance(new_pipeline.model, TimeSequenceModel) - new_pipeline.describe() - new_pred = new_pipeline.predict(test_df) - assert_array_almost_equal(y_pred[target_col].values, new_pred[target_col].values, - decimal=2) - finally: - shutil.rmtree(dirname) - - new_pipeline.fit(train_df, epoch_num=1) - new_mse, new_rs = new_pipeline.evaluate(test_df, - metrics=["mse", "r2"]) - print("Evaluation result after restore and fit: " - "Mean square error is: {}, R square is: {}.".format(new_mse, new_rs)) - - def test_evaluate_predict_future_more_1(self): - target_col = "values" - metrics = ["mse", "r2"] - future_seq_len = np.random.randint(2, 6) - train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - pipeline = tsp.fit(train_df, test_df) - mse, rs = pipeline.evaluate(test_df, metrics=metrics) - assert len(mse) == future_seq_len - assert len(rs) == future_seq_len - y_pred = pipeline.predict(test_df) - assert y_pred.shape == (test_sample_num - default_past_seq_len + 1, - future_seq_len + 1) - - y_pred_df = pipeline.predict(test_df[:-future_seq_len]) - columns = ["{}_{}".format(target_col, i) for i in range(future_seq_len)] - y_pred_value = y_pred_df[columns].values - - y_df = test_df[default_past_seq_len:] - y_value = TimeSequenceFeatureTransformer()._roll_test(y_df[target_col], future_seq_len) - - mse_pred_eval, rs_pred_eval = [Evaluator.evaluate(m, y_value, y_pred_value) - for m in metrics] - mse_eval, rs_eval = pipeline.evaluate(test_df, metrics) - assert_array_almost_equal(mse_pred_eval, mse_eval, decimal=2) - assert_array_almost_equal(rs_pred_eval, rs_eval, decimal=2) - - # def test_save_restore_future_more_1(self): - # target_col = "values" - # metrics = ["mse", "r2"] - # future_seq_len = np.random.randint(2, 6) - # train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - # pipeline = tsp.fit(train_df, test_df) - # y_pred = pipeline.predict(test_df) - # mse, rs = pipeline.evaluate(test_df, metrics=metrics) - # print("Evaluation result: Mean square error is: {}, R square is: {}.".format(mse, rs)) - # - # dirname = tempfile.mkdtemp(prefix="saved_pipeline") - # try: - # save_pipeline_file = os.path.join(dirname, "my.ppl") - # pipeline.save(save_pipeline_file) - # assert os.path.isfile(save_pipeline_file) - # new_pipeline = load_ts_pipeline(save_pipeline_file) - # - # new_pred = new_pipeline.predict(test_df) - # columns = ["{}_{}".format(target_col, i) for i in range(future_seq_len)] - # assert_array_almost_equal(y_pred[columns].values, new_pred[columns].values, decimal=2) - # - # finally: - # shutil.rmtree(dirname) - # - # new_pipeline.fit(train_df, epoch_num=1) - # new_mse, new_rs = new_pipeline.evaluate(test_df, - # metrics=["mse", "r2"]) - # print("Evaluation result after restore and fit: " - # "Mean square error is: {}, R square is: {}.".format(new_mse, new_rs)) - - # def test_look_back_future_1(self): - # target_col = "values" - # min_past_seq_len = np.random.randint(3, 5) - # max_past_seq_len = np.random.randint(5, 8) - # future_seq_len = 1 - # train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - # - # random_pipeline = tsp.fit(train_df, test_df, recipe=RandomRecipe( - # look_back=(min_past_seq_len, max_past_seq_len))) - # y_pred_random = random_pipeline.predict(test_df) - # assert y_pred_random.shape[0] >= test_sample_num - max_past_seq_len + 1 - # assert y_pred_random.shape[0] <= test_sample_num - min_past_seq_len + 1 - # assert y_pred_random.shape[1] == future_seq_len + 1 - # mse, rs = random_pipeline.evaluate(test_df, metrics=["mse", "r2"]) - # assert isinstance(mse, np.float) - # assert isinstance(rs, np.float) - # - # def test_look_back_future_more_1(self): - # target_col = "values" - # min_past_seq_len = np.random.randint(3, 5) - # max_past_seq_len = np.random.randint(5, 8) - # future_seq_len = np.random.randint(2, 6) - # train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - # - # random_pipeline = tsp.fit(train_df, test_df, recipe=RandomRecipe( - # look_back=(min_past_seq_len, max_past_seq_len))) - # y_pred_random = random_pipeline.predict(test_df) - # assert y_pred_random.shape[0] >= test_sample_num - max_past_seq_len + 1 - # assert y_pred_random.shape[0] <= test_sample_num - min_past_seq_len + 1 - # assert y_pred_random.shape[1] == future_seq_len + 1 - # mse, rs = random_pipeline.evaluate(test_df, metrics=["mse", "r2"]) - # assert len(mse) == future_seq_len - # assert len(rs) == future_seq_len - - def test_look_back_value(self): - target_col = "values" - future_seq_len = np.random.randint(2, 6) - train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - # test min_past_seq_len < 2 - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=(1, 2))) - # test max_past_seq_len < 2 - with pytest.raises(RuntimeError, match=r".*max look back value*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=(0, 1))) - # test look_back value < 2 - with pytest.raises(RuntimeError, match=r".*look back value should not be smaller than 2*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=1)) - - # test look back is None - with pytest.raises(RuntimeError, match=r".*look_back should be either*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=None)) - # test look back is str - with pytest.raises(RuntimeError, match=r".*look_back should be either*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back="a")) - # test look back is float - with pytest.raises(RuntimeError, match=r".*look_back should be either*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=2.5)) - # test look back range is float - with pytest.raises(RuntimeError, match=r".*look_back should be either*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=(2.5, 3))) - - def test_predict_with_uncertainty(self): - target_col = "values" - future_seq_len = np.random.randint(2, 6) - train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - - # test future_seq_len = 3 - pipeline = tsp.fit(train_df, mc=True, validation_df=test_df) - y_out, y_pred_uncertainty = pipeline.predict_with_uncertainty(test_df, n_iter=2) - assert y_out.shape == (test_sample_num - default_past_seq_len + 1, - future_seq_len + 1) - assert y_pred_uncertainty.shape == (test_sample_num - default_past_seq_len + 1, - future_seq_len) - assert np.any(y_pred_uncertainty) - - # def test_predict_with_uncertainty_multivariate(self): - # target_cols = ["value1", "value2"] - # future_seq_len = np.random.randint(2, 6) - # train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_cols) - # - # # test future_seq_len = 3 - # pipeline = tsp.fit(train_df, mc=True, validation_df=test_df, recipe=Seq2SeqRandomRecipe) - # y_out, y_pred_uncertainty = pipeline.predict_with_uncertainty(test_df, n_iter=2) - # assert y_out.shape == (test_sample_num - default_past_seq_len + 1, - # future_seq_len + 1, len(target_cols)) - # assert y_pred_uncertainty.shape == (test_sample_num - default_past_seq_len + 1, - # future_seq_len, len(target_cols)) - # assert np.any(y_pred_uncertainty) - # - # def test_fit_predict_with_uncertainty(self): - # # test future_seq_len = 1 - # self.pipeline_1 = self.tsp_1.fit(self.train_df, mc=True, validation_df=self.validation_df) - # self.pipeline_1.fit(self.validation_df, mc=True, epoch_num=1) - # y_out, y_pred_uncertainty = self.pipeline_1.predict_with_uncertainty(self.test_df, - # n_iter=2) - # assert y_out.shape == (self.test_sample_num - self.default_past_seq_len + 1, - # self.future_seq_len_1 + 1) - # assert y_pred_uncertainty.shape == (self.test_sample_num - self.default_past_seq_len + 1, - # self.future_seq_len_1) - # assert np.any(y_pred_uncertainty) - # - # # test future_seq_len = 3 - # self.pipeline_3 = self.tsp_3.fit(self.train_df, mc=True, validation_df=self.validation_df) - # self.pipeline_3.fit(self.validation_df, mc=True, epoch_num=1) - # y_out, y_pred_uncertainty = self.pipeline_3.predict_with_uncertainty(self.test_df, - # n_iter=2) - # assert y_out.shape == (self.test_sample_num - self.default_past_seq_len + 1, - # self.future_seq_len_3 + 1) - # assert y_pred_uncertainty.shape == (self.test_sample_num - self.default_past_seq_len + 1, - # self.future_seq_len_3) - # assert np.any(y_pred_uncertainty) - # - - -if __name__ == '__main__': - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/__init__.py deleted file mode 100644 index 2151a805423..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_fill.py b/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_fill.py deleted file mode 100644 index c8efab2287b..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_fill.py +++ /dev/null @@ -1,71 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pandas as pd -import pytest - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.feature.time_sequence import TimeSequenceFeatureTransformer -from bigdl.chronos.autots.deprecated.preprocessing.impute.LastFill import LastFill -from bigdl.chronos.autots.deprecated.preprocessing.impute import FillZeroImpute -from .. import skip_method - - -@skip_method -class TestDataImputation(ZooTestCase): - - def setup_method(self, method): - self.ft = TimeSequenceFeatureTransformer() - self.create_data() - - def teardown_method(self, method): - pass - - def create_data(self): - data = np.random.random_sample((5, 50)) - mask = np.random.random_sample((5, 50)) - mask[mask >= 0.4] = 2 - mask[mask < 0.4] = 1 - mask[mask < 0.2] = 0 - data[mask == 0] = None - data[mask == 1] = np.nan - df = pd.DataFrame(data) - idx = pd.date_range( - start='2020-07-01 00:00:00', - end='2020-07-01 08:00:00', - freq='2H') - df.index = idx - self.data = df - - def test_lastfill(self): - last_fill = LastFill() - mse_missing = last_fill.evaluate(self.data, 0.1) - imputed_data = last_fill.impute(self.data) - assert imputed_data.isna().sum().sum() == 0 - mse = last_fill.evaluate(imputed_data, 0.1) - - def test_fillzero(self): - data = [[np.nan, 1], [np.nan, 2]] - df = pd.DataFrame(data) - imputor = FillZeroImpute() - imputed_df = imputor.impute(df) - assert df.isna().sum().sum() != 0 - assert imputed_df.isna().sum().sum() == 0 - - -if __name__ == "__main__": - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_util.py b/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_util.py deleted file mode 100644 index 234296293b2..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_util.py +++ /dev/null @@ -1,50 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.preprocessing.utils import train_val_test_split -import pandas as pd -import numpy as np -from .. import skip_method - - -@skip_method -class TestUtil(ZooTestCase): - def setup_method(self, method): - pass - - def teardown_method(self, method): - pass - - def test_train_val_test_split(self): - # length test - sample_num = 100 - look_back = 10 - horizon = 1 - dates = pd.date_range('1/1/2020', periods=sample_num) - values = np.random.randn(sample_num) - df = pd.DataFrame({"values": values}, index=dates) - train_df, val_df, test_df = train_val_test_split(df, - val_ratio=0.1, - test_ratio=0.1, - look_back=look_back, - horizon=horizon) - assert len(train_df) == sample_num * 0.8 - assert len(val_df) == sample_num * 0.1 + look_back + horizon - 1 - assert len(test_df) == sample_num * 0.1 + look_back + horizon - 1 - # index test - assert pd.api.types.is_datetime64_any_dtype(test_df.index.dtype) - assert pd.api.types.is_datetime64_any_dtype(val_df.index.dtype) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/regression/__init__.py deleted file mode 100644 index 2151a805423..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/conftest.py b/python/chronos/test/bigdl/chronos/autots/deprecated/regression/conftest.py deleted file mode 100644 index fff8c5a29b9..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/conftest.py +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pytest -sc = None -ray_ctx = None - - -@pytest.fixture(autouse=True, scope='function') -def automl_fixture(): - from bigdl.dllib.nncontext import init_spark_on_local - from bigdl.orca.ray import OrcaRayContext - sc = init_spark_on_local(cores=4, spark_log_level="INFO") - ray_ctx = OrcaRayContext(sc=sc, object_store_memory="1g") - ray_ctx.init() - yield - ray_ctx.stop() - sc.stop() - - -# @pytest.fixture() -# def setUpModule(): -# sc = init_spark_on_local(cores=4, spark_log_level="INFO") -# ray_ctx = RayContext(sc=sc) -# ray_ctx.init() -# -# -# def tearDownModule(): -# ray_ctx.stop() -# sc.stop() diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/test_time_sequence_predictor.py b/python/chronos/test/bigdl/chronos/autots/deprecated/regression/test_time_sequence_predictor.py deleted file mode 100644 index ed2b0b83c76..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/test_time_sequence_predictor.py +++ /dev/null @@ -1,111 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pytest - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.model.time_sequence import TimeSequenceModel -from bigdl.chronos.autots.deprecated.regression.time_sequence_predictor import TimeSequencePredictor -import pandas as pd -import numpy as np -from bigdl.chronos.autots.deprecated.pipeline.time_sequence import TimeSequencePipeline -from .. import skip_method - - -@skip_method -class TestTimeSequencePredictor(ZooTestCase): - - def setup_method(self, method): - pass - - def teardown_method(self, method): - pass - - def create_dataset(self): - sample_num = np.random.randint(100, 200) - train_df = pd.DataFrame({"datetime": pd.date_range( - '1/1/2019', periods=sample_num), "value": np.random.randn(sample_num)}) - val_sample_num = np.random.randint(20, 30) - validation_df = pd.DataFrame({"datetime": pd.date_range( - '1/1/2019', periods=val_sample_num), "value": np.random.randn(val_sample_num)}) - future_seq_len = np.random.randint(1, 6) - return train_df, validation_df, future_seq_len - - def test_fit_SmokeRecipe(self): - train_df, validation_df, future_seq_len = self.create_dataset() - tsp = TimeSequencePredictor(dt_col="datetime", - target_col="value", - future_seq_len=future_seq_len, - extra_features_col=None, ) - pipeline = tsp.fit(train_df, validation_df) - assert isinstance(pipeline, TimeSequencePipeline) - assert isinstance(pipeline.model, TimeSequenceModel) - - def test_fit_LSTMGridRandomRecipe(self): - from bigdl.chronos.autots.deprecated.config.recipe import LSTMGridRandomRecipe - train_df, _, future_seq_len = self.create_dataset() - tsp = TimeSequencePredictor(dt_col="datetime", - target_col="value", - future_seq_len=future_seq_len, - extra_features_col=None, ) - pipeline = tsp.fit(train_df, - recipe=LSTMGridRandomRecipe( - lstm_2_units=[4], - batch_size=[1024], - num_rand_samples=5, - look_back=2, - training_iteration=1, - epochs=1)) - assert isinstance(pipeline, TimeSequencePipeline) - assert isinstance(pipeline.model, TimeSequenceModel) - assert pipeline.config is not None - assert 'past_seq_len' in pipeline.config - assert pipeline.config["past_seq_len"] == 2 - - def test_fit_BayesRecipe(self): - from bigdl.chronos.autots.deprecated.config.recipe import BayesRecipe - train_df, _, future_seq_len = self.create_dataset() - tsp = TimeSequencePredictor(dt_col="datetime", - target_col="value", - future_seq_len=future_seq_len, - extra_features_col=None, - search_alg="BayesOpt", - search_alg_params={ - "utility_kwargs": { - "kind": "ucb", - "kappa": 2.5, - "xi": 0.0 - } - } - ) - pipeline = tsp.fit( - train_df, recipe=BayesRecipe( - num_samples=1, - training_iteration=2, - epochs=1, - look_back=(3, 5) - )) - assert isinstance(pipeline, TimeSequencePipeline) - assert isinstance(pipeline.model, TimeSequenceModel) - assert pipeline.config is not None - assert "epochs" in pipeline.config - assert [config_name for config_name in pipeline.config - if config_name.endswith('float')] == [] - assert 'past_seq_len' in pipeline.config - assert 3 <= pipeline.config["past_seq_len"] <= 5 - - -if __name__ == '__main__': - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/test_auto_ts.py b/python/chronos/test/bigdl/chronos/autots/deprecated/test_auto_ts.py deleted file mode 100644 index 465a44b08de..00000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/test_auto_ts.py +++ /dev/null @@ -1,142 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pytest -import numpy as np -from bigdl.orca.test_zoo_utils import ZooTestCase - -from bigdl.chronos.autots.deprecated.config.recipe import LSTMGridRandomRecipe, MTNetGridRandomRecipe -from bigdl.chronos.autots.deprecated.forecast import AutoTSTrainer -from bigdl.chronos.autots.deprecated.forecast import TSPipeline -from . import skip_method - -import pandas as pd - - -@pytest.mark.usefixtures("init_ray_context_fixture") -@skip_method -class TestChronosAutoTS(ZooTestCase): - - def setup_method(self, method): - # super(TestChronosAutoTS, self).setup_method(method) - self.create_data() - - def teardown_method(self, method): - pass - - def create_data(self): - sample_num = np.random.randint(100, 200) - self.train_df = pd.DataFrame({"datetime": pd.date_range( - '1/1/2019', periods=sample_num), "value": np.random.randn(sample_num)}) - val_sample_num = np.random.randint(20, 30) - self.validation_df = pd.DataFrame({"datetime": pd.date_range( - '1/1/2019', periods=val_sample_num), "value": np.random.randn(val_sample_num)}) - - def test_AutoTSTrainer_smoke(self): - horizon = np.random.randint(1, 6) - tsp = AutoTSTrainer(dt_col="datetime", - target_col="value", - horizon=horizon, - extra_features_col=None - ) - pipeline = tsp.fit(self.train_df) - assert isinstance(pipeline, TSPipeline) - assert pipeline.internal.config is not None - evaluate_result = pipeline.evaluate(self.validation_df) - if horizon > 1: - assert evaluate_result[0].shape[0] == horizon - else: - assert evaluate_result[0] - predict_df = pipeline.predict(self.validation_df) - assert not predict_df.empty - - def test_AutoTrainer_LstmRecipe(self): - horizon = np.random.randint(1, 6) - tsp = AutoTSTrainer(dt_col="datetime", - target_col="value", - horizon=horizon, - extra_features_col=None - ) - pipeline = tsp.fit(self.train_df, - self.validation_df, - recipe=LSTMGridRandomRecipe( - num_rand_samples=5, - batch_size=[1024], - lstm_2_units=[8], - training_iteration=1, - epochs=1 - )) - assert isinstance(pipeline, TSPipeline) - assert pipeline.internal.config is not None - evaluate_result = pipeline.evaluate(self.validation_df) - if horizon > 1: - assert evaluate_result[0].shape[0] == horizon - else: - assert evaluate_result[0] - predict_df = pipeline.predict(self.validation_df) - assert not predict_df.empty - - def test_AutoTrainer_MTNetRecipe(self): - horizon = np.random.randint(1, 6) - tsp = AutoTSTrainer(dt_col="datetime", - target_col="value", - horizon=horizon, - extra_features_col=None - ) - pipeline = tsp.fit(self.train_df, - self.validation_df, - recipe=MTNetGridRandomRecipe( - num_rand_samples=1, - time_step=[5], - long_num=[2], - batch_size=[1024], - cnn_hid_size=[32, 50], - training_iteration=1, - epochs=1 - )) - assert isinstance(pipeline, TSPipeline) - assert pipeline.internal.config is not None - evaluate_result = pipeline.evaluate(self.validation_df) - if horizon > 1: - assert evaluate_result[0].shape[0] == horizon - else: - assert evaluate_result[0] - predict_df = pipeline.predict(self.validation_df) - assert not predict_df.empty - - def test_save_load(self): - import tempfile - horizon = np.random.randint(1, 6) - tsp = AutoTSTrainer(dt_col="datetime", - target_col="value", - horizon=horizon, - extra_features_col=None - ) - pipeline = tsp.fit(self.train_df, - self.validation_df, - ) - assert isinstance(pipeline, TSPipeline) - # save & restore the pipeline - with tempfile.TemporaryDirectory() as tempdirname: - my_ppl_file_path = pipeline.save(tempdirname + "saved_pipeline/nyc_taxi.ppl") - loaded_ppl = TSPipeline.load(my_ppl_file_path) - loaded_ppl.evaluate(self.validation_df) - predict_df = loaded_ppl.predict(self.validation_df) - assert not predict_df.empty - - -if __name__ == "__main__": - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/model/tf1/__init__.py b/python/chronos/test/bigdl/chronos/model/tf1/__init__.py deleted file mode 100644 index 2151a805423..00000000000 --- a/python/chronos/test/bigdl/chronos/model/tf1/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/chronos/test/bigdl/chronos/model/tf1/test_Seq2Seq_keras.py b/python/chronos/test/bigdl/chronos/model/tf1/test_Seq2Seq_keras.py deleted file mode 100644 index b7f80212858..00000000000 --- a/python/chronos/test/bigdl/chronos/model/tf1/test_Seq2Seq_keras.py +++ /dev/null @@ -1,236 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import tensorflow as tf -import pytest - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.model.tf1.Seq2Seq_keras import LSTMSeq2Seq -from bigdl.chronos.autots.deprecated.feature.time_sequence import TimeSequenceFeatureTransformer -from numpy.testing import assert_array_almost_equal -import pandas as pd -import numpy as np - - -@pytest.mark.skipif(tf.__version__ > '2.0.0', reason="Run only when tf==1.15.0.") -class TestSeq2Seq(ZooTestCase): - - def setup_method(self, method): - # super().setup_method(method) - self.train_data = pd.DataFrame(data=np.random.randn(64, 4)) - self.val_data = pd.DataFrame(data=np.random.randn(16, 4)) - self.test_data = pd.DataFrame(data=np.random.randn(16, 4)) - - self.past_seq_len = 6 - self.future_seq_len_1 = 1 - self.future_seq_len_2 = 2 - - # use roll method in time_sequence - self.feat = TimeSequenceFeatureTransformer() - - self.config = { - 'batch_size': 32, - 'epochs': 1, - 'latent_dim': 8 - } - - self.model_1 = LSTMSeq2Seq(check_optional_config=False, - future_seq_len=self.future_seq_len_1) - self.model_2 = LSTMSeq2Seq(check_optional_config=False, - future_seq_len=self.future_seq_len_2) - - self.fitted = False - self.predict_1 = None - self.predict_2 = None - - def teardown_method(self, method): - pass - - def test_fit_eval_1(self): - x_train_1, y_train_1 = self.feat._roll_train(self.train_data, - past_seq_len=self.past_seq_len, - future_seq_len=self.future_seq_len_1) - print("fit_eval_future_seq_len_1:", - self.model_1.fit_eval((x_train_1, y_train_1), **self.config)) - assert self.model_1.past_seq_len == 6 - assert self.model_1.feature_num == 4 - assert self.model_1.future_seq_len == 1 - assert self.model_1.target_col_num == 1 - - def test_fit_eval(self): - past_seq_len = 6 - future_seq_len = 2 - input_dim = 5 - output_dim = 4 - x_train = np.random.rand(100, past_seq_len, input_dim) - y_train = np.random.rand(100, future_seq_len, output_dim) - x_test = np.random.rand(100, past_seq_len, input_dim) - y_test = np.random.rand(100, future_seq_len, output_dim) - model = LSTMSeq2Seq(check_optional_config=False, - future_seq_len=future_seq_len) - model_config = { - 'batch_size': 32, - 'epochs': 1, - 'latent_dim': 8, - 'dropout': 0.2 - } - model.fit_eval((x_train, y_train), **model_config) - y_pred = model.predict(x_test) - rmse, smape = model.evaluate(x=x_test, y=y_test, metric=["rmse", "smape"]) - assert rmse.shape == smape.shape - assert rmse.shape == (future_seq_len, output_dim) - - assert model.past_seq_len == past_seq_len - assert model.future_seq_len == future_seq_len - assert model.feature_num == input_dim - assert model.target_col_num == output_dim - assert y_pred.shape == y_test.shape - - def test_fit_eval_2(self): - x_train_2, y_train_2 = self.feat._roll_train(self.train_data, - past_seq_len=self.past_seq_len, - future_seq_len=self.future_seq_len_2) - print("fit_eval_future_seq_len_2:", - self.model_2.fit_eval((x_train_2, y_train_2), **self.config)) - assert self.model_2.future_seq_len == 2 - - self.fitted = True - - def test_evaluate_1(self): - x_train_1, y_train_1 = self.feat._roll_train(self.train_data, - past_seq_len=self.past_seq_len, - future_seq_len=self.future_seq_len_1) - x_val_1, y_val_1 = self.feat._roll_train(self.val_data, - past_seq_len=self.past_seq_len, - future_seq_len=self.future_seq_len_1) - - self.model_1.fit_eval((x_train_1, y_train_1), **self.config) - - print("evaluate_future_seq_len_1:", self.model_1.evaluate(x_val_1, - y_val_1, - metric=['mse', - 'r2'])) - - def test_evaluate_2(self): - x_train_2, y_train_2 = self.feat._roll_train(self.train_data, - past_seq_len=self.past_seq_len, - future_seq_len=self.future_seq_len_2) - x_val_2, y_val_2 = self.feat._roll_train(self.val_data, - past_seq_len=self.past_seq_len, - future_seq_len=self.future_seq_len_2) - - self.model_2.fit_eval((x_train_2, y_train_2), **self.config) - - print("evaluate_future_seq_len_2:", self.model_2.evaluate(x_val_2, - y_val_2, - metric=['mse', - 'r2'])) - - def test_predict_1(self): - x_train_1, y_train_1 = self.feat._roll_train(self.train_data, - past_seq_len=self.past_seq_len, - future_seq_len=self.future_seq_len_1) - x_test_1 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) - self.model_1.fit_eval((x_train_1, y_train_1), **self.config) - - predict_1 = self.model_1.predict(x_test_1) - assert predict_1.shape == (x_test_1.shape[0], self.future_seq_len_1) - - def test_predict_2(self): - x_train_2, y_train_2 = self.feat._roll_train(self.train_data, - past_seq_len=self.past_seq_len, - future_seq_len=self.future_seq_len_2) - x_test_2 = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) - self.model_2.fit_eval((x_train_2, y_train_2), **self.config) - - predict_2 = self.model_2.predict(x_test_2) - assert predict_2.shape == (x_test_2.shape[0], self.future_seq_len_2) - - def test_save_restore_single_step(self): - future_seq_len = 1 - x_train, y_train = self.feat._roll_train(self.train_data, - past_seq_len=self.past_seq_len, - future_seq_len=future_seq_len) - x_test = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) - model = LSTMSeq2Seq(future_seq_len=future_seq_len) - model.fit_eval((x_train, y_train), **self.config) - - predict_before = model.predict(x_test) - new_model = LSTMSeq2Seq() - - ckpt = os.path.join("/tmp", "seq2seq.ckpt") - model.save(ckpt) - new_model.restore(ckpt) - predict_after = new_model.predict(x_test) - assert_array_almost_equal(predict_before, predict_after, decimal=2), \ - "Prediction values are not the same after restore: " \ - "predict before is {}, and predict after is {}".format(predict_before, predict_after) - new_config = {'epochs': 1, 'latent_dim': 8} - new_model.fit_eval((x_train, y_train), **new_config) - os.remove(ckpt) - - def test_save_restore_multistep(self): - future_seq_len = np.random.randint(2, 6) - x_train, y_train = self.feat._roll_train(self.train_data, - past_seq_len=self.past_seq_len, - future_seq_len=future_seq_len) - x_test = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) - model = LSTMSeq2Seq(future_seq_len=future_seq_len) - model.fit_eval((x_train, y_train), **self.config) - - predict_before = model.predict(x_test) - new_model = LSTMSeq2Seq() - - ckpt = os.path.join("/tmp", "seq2seq.ckpt") - model.save(ckpt) - new_model.restore(ckpt) - predict_after = new_model.predict(x_test) - assert_array_almost_equal(predict_before, predict_after, decimal=2), \ - "Prediction values are not the same after restore: " \ - "predict before is {}, and predict after is {}".format(predict_before, predict_after) - new_config = {'epochs': 1, 'latent_dim': 8} - new_model.fit_eval((x_train, y_train), **new_config) - os.remove(ckpt) - - def test_predict_with_uncertainty(self,): - future_seq_len = np.random.randint(2, 6) - x_train, y_train = self.feat._roll_train(self.train_data, - past_seq_len=self.past_seq_len, - future_seq_len=future_seq_len) - x_test = self.feat._roll_test(self.test_data, past_seq_len=self.past_seq_len) - model = LSTMSeq2Seq(future_seq_len=future_seq_len) - model.fit_eval((x_train, y_train), mc=True, **self.config) - - prediction, uncertainty = model.predict_with_uncertainty(x_test, n_iter=2) - assert prediction.shape == (x_test.shape[0], future_seq_len) - assert uncertainty.shape == (x_test.shape[0], future_seq_len) - assert np.any(uncertainty) - - new_model = LSTMSeq2Seq() - - ckpt = os.path.join("/tmp", "seq2seq.ckpt") - model.save(ckpt) - new_model.restore(ckpt) - prediction_after, uncertainty_after = new_model.predict_with_uncertainty(x_test, n_iter=2) - assert prediction_after.shape == (x_test.shape[0], future_seq_len) - assert uncertainty_after.shape == (x_test.shape[0], future_seq_len) - assert np.any(uncertainty_after) - - os.remove(ckpt) - - -if __name__ == '__main__': - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/model/tf1/test_VanillaLSTM_keras.py b/python/chronos/test/bigdl/chronos/model/tf1/test_VanillaLSTM_keras.py deleted file mode 100644 index 236185ef0a4..00000000000 --- a/python/chronos/test/bigdl/chronos/model/tf1/test_VanillaLSTM_keras.py +++ /dev/null @@ -1,105 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pytest -from unittest import TestCase -from bigdl.chronos.model.tf1.VanillaLSTM_keras import VanillaLSTM -import numpy as np -import tempfile -import os -import tensorflow as tf - - -def create_data(): - num_train_samples = 1000 - num_val_samples = 400 - num_test_samples = 400 - input_time_steps = 7 - input_feature_dim = 4 - output_dim = np.random.randint(1, 5) - - def get_x_y(num_samples): - x = np.random.rand(num_samples, input_time_steps, input_feature_dim) - y = np.random.randn(num_samples, output_dim) - return x, y - - train_data = get_x_y(num_train_samples) - val_data = get_x_y(num_val_samples) - test_data = get_x_y(num_test_samples) - return train_data, val_data, test_data - -@pytest.mark.skipif(tf.__version__ > '2.0.0', reason="Run only when tf==1.15.0.") -class TestVanillaLSTM(TestCase): - train_data, val_data, test_data = create_data() - model = VanillaLSTM() - - def test_fit_evaluate(self): - config = {"batch_size": 128} - self.model.fit_eval((self.train_data[0], self.train_data[1]), self.val_data, **config) - mse, smape = self.model.evaluate(self.val_data[0], self.val_data[1], - metrics=["mse", "smape"]) - - def test_config(self): - config = {"lstm_units": [128] * 2, - "dropouts": [0.2] * 2} - self.model.fit_eval((self.train_data[0], self.train_data[1]), self.val_data, **config) - - config = {"lstm_units": 128, - "dropouts": 0.2} - self.model.fit_eval((self.train_data[0], self.train_data[1]), self.val_data, **config) - - with pytest.raises(RuntimeError): - config = {"lstm_units": 0.1, - "dropouts": 0.2} - self.model.fit_eval((self.train_data[0], self.train_data[1]), self.val_data, **config) - - with pytest.raises(RuntimeError): - config = {"lstm_units": [128] * 2, - "dropouts": [0.2] * 3} - self.model.fit_eval((self.train_data[0], self.train_data[1]), self.val_data, **config) - - with pytest.raises(RuntimeError): - config = {"lstm_units": 128, - "dropouts": [0.2] * 2} - self.model.fit_eval((self.train_data[0], self.train_data[1]), self.val_data, **config) - - def test_predict_save_restore(self): - model = VanillaLSTM() - config = {"lstm_units": [128] * 2, - "dropouts": [0.2] * 2, - "batch_size": 128} - model.fit_eval((self.train_data[0], self.train_data[1]), self.val_data, **config) - pred = model.predict(self.test_data[0]) - assert pred.shape == self.test_data[1].shape - with tempfile.TemporaryDirectory() as tmp_dir_name: - ckpt_name = os.path.join(tmp_dir_name, "ckpt") - model.save(ckpt_name) - model_1 = VanillaLSTM() - model_1.restore(ckpt_name) - pred_1 = model_1.predict(self.test_data[0]) - assert np.allclose(pred, pred_1) - - def test_predict_with_uncertainty(self): - config = {"batch_size": 128} - self.model.fit_eval((self.train_data[0], self.train_data[1]), self.val_data, **config) - prediction, uncertainty = self.model.predict_with_uncertainty(self.test_data[0], n_iter=100) - assert prediction.shape == self.test_data[1].shape - assert uncertainty.shape == self.test_data[1].shape - assert np.any(uncertainty) - - -if __name__ == '__main__': - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/model/tf1/test_mtnet_keras.py b/python/chronos/test/bigdl/chronos/model/tf1/test_mtnet_keras.py deleted file mode 100644 index 5b1f1241eda..00000000000 --- a/python/chronos/test/bigdl/chronos/model/tf1/test_mtnet_keras.py +++ /dev/null @@ -1,116 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import shutil - -import pytest - -from unittest import TestCase -from bigdl.chronos.model.tf1.MTNet_keras import MTNetKeras -from bigdl.chronos.data import TSDataset -import pandas as pd -import numpy as np -import tensorflow as tf -from numpy.testing import assert_array_almost_equal - - -def create_data(): - lookback = 3 - horizon = 1 - def get_data(num_samples): - values = np.random.randn(num_samples) - df = pd.DataFrame({'timestep': pd.date_range(start='2010-01-01', - freq='m', - periods=num_samples), - 'value 1': values, - 'value 2': values, - 'value 3': values, - 'value 4': values}) - return df - tsdata_train = TSDataset.from_pandas(get_data(32), - target_col=['value 1', 'value 2', 'value 3', 'value 4'], - dt_col='timestep', - with_split=False) - tsdata_test = TSDataset.from_pandas(get_data(16), - target_col=['value 1', 'value 2', 'value 3', 'value 4'], - dt_col='timestep', - with_split=False) - for tsdata in [tsdata_train, tsdata_test]: - tsdata.roll(lookback=lookback, horizon=horizon) - return tsdata_train, tsdata_test - - -@pytest.mark.skipif(tf.__version__ > '2.0.0', reason="Run only when tf==1.15.0.") -class TestMTNetKeras(TestCase): - - def setup_method(self, method): - tf.keras.backend.clear_session() - train_data, test_data = create_data() - self.x_train, y_train = train_data.to_numpy() - self.y_train = y_train[:, :, 0] - self.x_val, y_val = test_data.to_numpy() - self.y_val = y_val[:, :, 0] - self.x_test, _ = test_data.to_numpy() - self.model = MTNetKeras() - self.config = {"long_num": 2, - "time_step": 1, - "ar_window": 1, # np.random.randint(1, 3), - "cnn_height": 1, # np.random.randint(1, 3), - "cnn_hid_size": 2, - "rnn_hid_sizes": [2, 2], - "epochs": 1} - - def teardown_method(self, method): - pass - - def test_fit_evaluate(self): - self.model.fit_eval(data=(self.x_train, self.y_train), - validation_data=(self.x_val, self.y_val), - **self.config) - self.model.evaluate(self.x_val, self.y_val) - - def test_save_restore(self): - import os - self.model.fit_eval(data=(self.x_train, self.y_train), - validation_data=(self.x_val, self.y_val), - **self.config) - y_pred = self.model.predict(self.x_test) - assert y_pred.shape == (self.x_test.shape[0], self.y_train.shape[1]) - dirname = "/tmp" - restored_model = MTNetKeras() - ckpt = os.path.join(dirname, "mtnet.ckpt") - self.model.save(checkpoint_file=ckpt) - restored_model.restore(checkpoint_file=ckpt) - predict_after = restored_model.predict(self.x_test) - assert_array_almost_equal(y_pred, predict_after, decimal=2), \ - "Prediction values are not the same after restore: " \ - "predict before is {}, and predict after is {}".format(y_pred, predict_after) - restored_model.fit_eval((self.x_train, self.y_train), epochs=1) - restored_model.evaluate(self.x_val, self.y_val) - os.remove(ckpt) - - def test_predict_with_uncertainty(self): - self.model.fit_eval(data=(self.x_train, self.y_train), - validation_data=(self.x_val, self.y_val), - mc=True, - **self.config) - pred, uncertainty = self.model.predict_with_uncertainty(self.x_test, n_iter=2) - assert pred.shape == (self.x_test.shape[0], self.y_train.shape[1]) - assert uncertainty.shape == pred.shape - # assert np.any(uncertainty) It may happen that all results are dropped out. - - -if __name__ == '__main__': - pytest.main([__file__])