From ae72e0d012950cb5b0b84f064cdb7371df3906f5 Mon Sep 17 00:00:00 2001 From: liangs6212 Date: Thu, 25 Aug 2022 11:07:27 +0800 Subject: [PATCH] delete deprecated UTs --- .../chronos/autots/deprecated/__init__.py | 18 - .../autots/deprecated/config/__init__.py | 15 - .../autots/deprecated/config/test_recipe.py | 73 --- .../chronos/autots/deprecated/conftest.py | 42 -- .../autots/deprecated/feature/__init__.py | 15 - .../feature/test_time_sequence_feature.py | 541 ------------------ .../autots/deprecated/pipeline/__init__.py | 15 - .../autots/deprecated/pipeline/conftest.py | 42 -- .../deprecated/pipeline/test_time_sequence.py | 296 ---------- .../deprecated/preprocessing/__init__.py | 15 - .../deprecated/preprocessing/test_fill.py | 71 --- .../deprecated/preprocessing/test_util.py | 50 -- .../autots/deprecated/regression/__init__.py | 15 - .../autots/deprecated/regression/conftest.py | 42 -- .../test_time_sequence_predictor.py | 111 ---- .../chronos/autots/deprecated/test_auto_ts.py | 142 ----- 16 files changed, 1503 deletions(-) delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/config/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/config/test_recipe.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/conftest.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/feature/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/feature/test_time_sequence_feature.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/conftest.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/test_time_sequence.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_fill.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_util.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/regression/__init__.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/regression/conftest.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/regression/test_time_sequence_predictor.py delete mode 100644 python/chronos/test/bigdl/chronos/autots/deprecated/test_auto_ts.py diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/__init__.py deleted file mode 100644 index eb5543fce91d..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pytest - -skip_method = pytest.mark.skip(reason="We will remove deprecated API.") diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/config/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/config/__init__.py deleted file mode 100644 index 2151a805423a..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/config/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/config/test_recipe.py b/python/chronos/test/bigdl/chronos/autots/deprecated/config/test_recipe.py deleted file mode 100644 index 6cb31bf58186..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/config/test_recipe.py +++ /dev/null @@ -1,73 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pytest - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.config.recipe import * -from .. import skip_method - - -@skip_method -class TestTimeSequencePredictor(ZooTestCase): - - def setup_method(self, method): - pass - - def teardown_method(self, method): - pass - - def test_SmokeRecipe(self): - recipe = SmokeRecipe() - assert recipe.num_samples == 1 - assert recipe.training_iteration == 1 - assert recipe.reward_metric is None - - def test_MTNetSmokeRecipe(self): - recipe = MTNetSmokeRecipe() - assert recipe.num_samples == 1 - assert recipe.training_iteration == 1 - assert recipe.reward_metric is None - - def test_GridRandomRecipe(self): - recipe = GridRandomRecipe(num_rand_samples=100) - search_space = recipe.search_space() - assert search_space is not None - - def test_RandomRecipe(self): - recipe = GridRandomRecipe(num_rand_samples=100) - search_space = recipe.search_space() - assert search_space is not None - - def test_LSTMGridRandomRecipe(self): - recipe = LSTMGridRandomRecipe(num_rand_samples=100) - search_space = recipe.search_space() - assert search_space is not None - - def test_MTNetGridRandomRecipe(self): - recipe = MTNetGridRandomRecipe() - search_space = recipe.search_space() - assert search_space is not None - - def test_BayesRecipe(self): - recipe = BayesRecipe(num_samples=10) - search_space = recipe.search_space() - assert search_space is not None - assert recipe.reward_metric is not None - - -if __name__ == '__main__': - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/conftest.py b/python/chronos/test/bigdl/chronos/autots/deprecated/conftest.py deleted file mode 100644 index f5654719ca65..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/conftest.py +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pytest -sc = None -ray_ctx = None - - -@pytest.fixture(autouse=False, scope='class') -def init_ray_context_fixture(): - from bigdl.dllib.nncontext import init_spark_on_local - from bigdl.orca.ray import OrcaRayContext - sc = init_spark_on_local(cores=4, spark_log_level="INFO") - ray_ctx = OrcaRayContext(sc=sc, object_store_memory="1g") - ray_ctx.init() - yield - ray_ctx.stop() - sc.stop() - - -# @pytest.fixture() -# def setUpModule(): -# sc = init_spark_on_local(cores=4, spark_log_level="INFO") -# ray_ctx = RayContext(sc=sc) -# ray_ctx.init() -# -# -# def tearDownModule(): -# ray_ctx.stop() -# sc.stop() diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/feature/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/feature/__init__.py deleted file mode 100644 index 2151a805423a..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/feature/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/feature/test_time_sequence_feature.py b/python/chronos/test/bigdl/chronos/autots/deprecated/feature/test_time_sequence_feature.py deleted file mode 100644 index 5a3bb8123318..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/feature/test_time_sequence_feature.py +++ /dev/null @@ -1,541 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pytest - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.feature.utils import save, restore -from bigdl.chronos.autots.deprecated.feature.time_sequence import * -from numpy.testing import assert_array_almost_equal -import json -import tempfile -import shutil - -from .. import skip_method -from bigdl.chronos.autots.deprecated.preprocessing.utils import train_val_test_split - - -@skip_method -class TestTimeSequenceFeature(ZooTestCase): - def setup_method(self, method): - pass - - def teardown_method(self, method): - pass - - def test_get_feature_list(self): - dates = pd.date_range('1/1/2019', periods=8) - data = np.random.randn(8, 3) - df = pd.DataFrame({"datetime": dates, "values": data[:, 0], - "A": data[:, 1], "B": data[:, 2]}) - feat = TimeSequenceFeatureTransformer(dt_col="datetime", - target_col="values", - extra_features_col=["A", "B"], - drop_missing=True) - feature_list = feat.get_feature_list() - assert set(feature_list) == {'IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - 'DAY(datetime)', - 'IS_WEEKEND(datetime)', - 'WEEKDAY(datetime)', - 'MONTH(datetime)', - 'DAYOFYEAR(datetime)', - 'WEEKOFYEAR(datetime)', - 'MINUTE(datetime)', - 'A', - 'B'} - - feat = TimeSequenceFeatureTransformer(dt_col="datetime", - target_col="values", - extra_features_col=["A", "B"], - drop_missing=True, - time_features=False) - feature_list = feat.get_feature_list() - assert set(feature_list) == {'A', 'B'} - - def test_fit_transform(self): - sample_num = 8 - past_seq_len = 2 - dates = pd.date_range('1/1/2019', periods=sample_num) - data = np.random.randn(sample_num, 3) - df = pd.DataFrame({"datetime": dates, "values": data[:, 0], - "A": data[:, 1], "B": data[:, 2]}) - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - 'A']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", drop_missing=True) - x, y = feat.fit_transform(df, **config) - assert x.shape == (sample_num - past_seq_len, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - assert y.shape == (sample_num - past_seq_len, 1) - assert np.mean(np.concatenate((x[0, :, 0], y[:, 0]), axis=None)) < 1e-5 - - def test_fit_transform_df_list(self): - sample_num = 8 - past_seq_len = 2 - dates = pd.date_range('1/1/2019', periods=sample_num) - data = np.random.randn(sample_num, 3) - df = pd.DataFrame({"datetime": dates, "values": data[:, 0], - "A": data[:, 1], "B": data[:, 2]}) - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - 'A']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", drop_missing=True) - - df_list = [df] * 3 - x, y = feat.fit_transform(df_list, **config) - single_result_len = sample_num - past_seq_len - assert x.shape == (single_result_len * 3, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - assert y.shape == (single_result_len * 3, 1) - assert_array_almost_equal(x[:single_result_len], - x[single_result_len: 2 * single_result_len], decimal=2) - assert_array_almost_equal(x[:single_result_len], x[2 * single_result_len:], decimal=2) - assert_array_almost_equal(y[:single_result_len], - y[single_result_len: 2 * single_result_len], - decimal=2) - assert_array_almost_equal(y[:single_result_len], y[2 * single_result_len:], decimal=2) - - assert np.mean(np.concatenate((x[0, :, 0], y[:single_result_len, 0]), axis=None)) < 1e-5 - - def test_fit_transform_input_datetime(self): - # if the type of input datetime is not datetime64, raise an error - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - df = pd.DataFrame({"datetime": dates.strftime('%m/%d/%Y'), "values": values}) - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": 2} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", drop_missing=True) - - with pytest.raises(RuntimeError) as excinfo: - feat.fit_transform(df, **config) - assert 'np.datetime64' in str(excinfo.value) - - # if there is NaT in datetime, raise an error - df.loc[1, "datetime"] = None - with pytest.raises(RuntimeError, match=r".* datetime .*"): - feat.fit_transform(df, **config) - - def test_input_data_len(self): - sample_num = 100 - past_seq_len = 20 - dates = pd.date_range('1/1/2019', periods=sample_num) - values = np.random.randn(sample_num) - df = pd.DataFrame({"datetime": dates, "values": values}) - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - train_df, val_df, test_df = train_val_test_split(df, - val_ratio=0.1, - test_ratio=0.1, - look_back=10) - - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", drop_missing=True) - with pytest.raises(RuntimeError, match=r".*past sequence length.*"): - feat.fit_transform(train_df[:20], **config) - - feat.fit_transform(train_df, **config) - with pytest.raises(RuntimeError, match=r".*past sequence length.*"): - feat.transform(val_df, is_train=True) - - with pytest.raises(RuntimeError, match=r".*past sequence length.*"): - feat.transform(test_df[:-1], is_train=False) - out_x, out_y = feat.transform(test_df, is_train=False) - assert len(out_x) == 1 - assert out_y is None - - def test_fit_transform_input_data(self): - # if there is NaN in data other than datetime, drop the training sample. - num_samples = 8 - dates = pd.date_range('1/1/2019', periods=num_samples) - values = np.random.randn(num_samples) - df = pd.DataFrame({"datetime": dates, "values": values}) - df.loc[2, "values"] = None - past_seq_len = 2 - - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", drop_missing=True) - - x, y = feat.fit_transform(df, **config) - # mask_x = [1, 0, 0, 1, 1, 1] - # mask_y = [0, 1, 1, 1, 1, 1] - # mask = [0, 0, 0, 1, 1, 1] - assert x.shape == (3, past_seq_len, len(json.loads(config["selected_features"])) + 1) - assert y.shape == (3, 1) - - def test_transform_train_true(self): - num_samples = 16 - dates = pd.date_range('1/1/2019', periods=num_samples) - values = np.random.randn(num_samples, 2) - df = pd.DataFrame({"datetime": dates, "values": values[:, 0], "feature_1": values[:, 1]}) - train_sample_num = 10 - train_df = df[:train_sample_num] - val_df = df[train_sample_num:] - past_seq_len = 2 - - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - "feature_1"]), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", - extra_features_col="feature_1", - drop_missing=True) - - feat.fit_transform(train_df, **config) - val_x, val_y = feat.transform(val_df, is_train=True) - assert val_x.shape == (val_df.shape[0] - past_seq_len, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - assert val_y.shape == (val_df.shape[0] - past_seq_len, 1) - - def test_transform_train_true_df_list(self): - num_samples = 16 - dates = pd.date_range('1/1/2019', periods=num_samples) - values = np.random.randn(num_samples, 2) - df = pd.DataFrame({"datetime": dates, "values": values[:, 0], "feature_1": values[:, 1]}) - train_sample_num = 10 - train_df = df[:train_sample_num] - val_df = df[train_sample_num:] - past_seq_len = 2 - - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - "feature_1"]), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", - extra_features_col="feature_1", - drop_missing=True) - - train_df_list = [train_df] * 3 - feat.fit_transform(train_df_list, **config) - val_df_list = [val_df] * 3 - val_x, val_y = feat.transform(val_df_list, is_train=True) - single_result_len = val_df.shape[0] - past_seq_len - assert val_x.shape == (single_result_len * 3, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - assert val_y.shape == (single_result_len * 3, 1) - - def test_transform_train_false(self): - num_samples = 16 - dates = pd.date_range('1/1/2019', periods=num_samples) - values = np.random.randn(num_samples, 2) - df = pd.DataFrame({"datetime": dates, "values": values[:, 0], "feature_1": values[:, 1]}) - train_sample_num = 10 - train_df = df[:train_sample_num] - test_df = df[train_sample_num:] - past_seq_len = 2 - - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - "feature_1"]), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", - extra_features_col="feature_1", - drop_missing=True) - feat.fit_transform(train_df, **config) - test_x, _ = feat.transform(test_df, is_train=False) - assert test_x.shape == (test_df.shape[0] - past_seq_len + 1, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - - def test_transform_train_false_df_list(self): - num_samples = 16 - dates = pd.date_range('1/1/2019', periods=num_samples) - values = np.random.randn(num_samples, 2) - df = pd.DataFrame({"datetime": dates, "values": values[:, 0], "feature_1": values[:, 1]}) - train_sample_num = 10 - train_df = df[:train_sample_num] - test_df = df[train_sample_num:] - past_seq_len = 2 - - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)', - "feature_1"]), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=1, dt_col="datetime", - target_col="values", - extra_features_col="feature_1", - drop_missing=True) - train_df_list = [train_df] * 3 - feat.fit_transform(train_df_list, **config) - test_df_list = [test_df] * 3 - test_x, _ = feat.transform(test_df_list, is_train=False) - assert test_x.shape == ((test_df.shape[0] - past_seq_len + 1) * 3, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - - def test_save_restore(self): - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - df = pd.DataFrame({"dt": dates, "v": values}) - - future_seq_len = 2 - dt_col = "dt" - target_col = "v" - drop_missing = True - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, - dt_col=dt_col, - target_col=target_col, - drop_missing=drop_missing) - - feature_list = feat.get_feature_list() - config = {"selected_features": json.dumps(feature_list), - "past_seq_len": 2 - } - - train_x, train_y = feat.fit_transform(df, **config) - - dirname = tempfile.mkdtemp(prefix="automl_test_feature") - try: - save(dirname, feature_transformers=feat) - new_ft = TimeSequenceFeatureTransformer() - restore(dirname, feature_transformers=new_ft, config=config) - - assert new_ft.future_seq_len == future_seq_len - assert new_ft.dt_col == dt_col - assert new_ft.target_col[0] == target_col - assert new_ft.extra_features_col is None - assert new_ft.drop_missing == drop_missing - - test_x, _ = new_ft.transform(df[:-future_seq_len], is_train=False) - - assert_array_almost_equal(test_x, train_x, decimal=2) - - finally: - shutil.rmtree(dirname) - - def test_post_processing_train(self): - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 1 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - - train_x, train_y = feat.fit_transform(df, **config) - y_unscale, y_unscale_1 = feat.post_processing(df, train_y, is_train=True) - y_input = df[past_seq_len:][[value_col]].values - msg = "y_unscale is {}, y_unscale_1 is {}".format(y_unscale, y_unscale_1) - assert_array_almost_equal(y_unscale, y_unscale_1, decimal=2), msg - msg = "y_unscale is {}, y_input is {}".format(y_unscale, y_input) - assert_array_almost_equal(y_unscale, y_input, decimal=2), msg - - def test_post_processing_train_df_list(self): - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 1 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - df_list = [df] * 3 - train_x, train_y = feat.fit_transform(df_list, **config) - y_unscale, y_unscale_1 = feat.post_processing(df_list, train_y, is_train=True) - y_input = df[past_seq_len:][[value_col]].values - target_y = np.concatenate([y_input] * 3) - msg = "y_unscale is {}, y_unscale_1 is {}".format(y_unscale, y_unscale_1) - assert_array_almost_equal(y_unscale, y_unscale_1, decimal=2), msg - msg = "y_unscale is {}, y_input is {}".format(y_unscale, target_y) - assert_array_almost_equal(y_unscale, target_y, decimal=2), msg - - def test_post_processing_test_1(self): - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 1 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - - train_x, train_y = feat.fit_transform(df, **config) - - dirname = tempfile.mkdtemp(prefix="automl_test_feature_") - try: - save(dirname, feature_transformers=feat) - new_ft = TimeSequenceFeatureTransformer() - restore(dirname, feature_transformers=new_ft, config=config) - - test_df = df[:-future_seq_len] - new_ft.transform(test_df, is_train=False) - output_value_df = new_ft.post_processing(test_df, train_y, is_train=False) - - # train_y is generated from df[past_seq_len:] - target_df = df[past_seq_len:].copy().reset_index(drop=True) - - assert output_value_df[dt_col].equals(target_df[dt_col]) - assert_array_almost_equal(output_value_df[value_col].values, - target_df[value_col].values, decimal=2) - - finally: - shutil.rmtree(dirname) - - def test_post_processing_test_df_list(self): - dates = pd.date_range('1/1/2019', periods=8) - values = np.random.randn(8) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 1 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - df_list = [df] * 3 - train_x, train_y = feat.fit_transform(df_list, **config) - - dirname = tempfile.mkdtemp(prefix="automl_test_feature_") - try: - save(dirname, feature_transformers=feat) - new_ft = TimeSequenceFeatureTransformer() - restore(dirname, feature_transformers=new_ft, config=config) - - test_df = df[:-future_seq_len] - test_df_list = [test_df] * 3 - new_ft.transform(test_df_list, is_train=False) - output_value_df_list = new_ft.post_processing(test_df_list, train_y, is_train=False) - - # train_y is generated from df[past_seq_len:] - target_df = df[past_seq_len:].copy().reset_index(drop=True) - - assert output_value_df_list[0].equals(output_value_df_list[1]) - assert output_value_df_list[0].equals(output_value_df_list[2]) - assert output_value_df_list[0][dt_col].equals(target_df[dt_col]) - assert_array_almost_equal(output_value_df_list[0][value_col].values, - target_df[value_col].values, decimal=2) - - finally: - shutil.rmtree(dirname) - - def test_post_processing_test_2(self): - sample_num = 8 - dates = pd.date_range('1/1/2019', periods=sample_num) - values = np.random.randn(sample_num) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 2 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - - train_x, train_y = feat.fit_transform(df, **config) - - dirname = tempfile.mkdtemp(prefix="automl_test_feature_") - try: - save(dirname, feature_transformers=feat) - new_ft = TimeSequenceFeatureTransformer() - restore(dirname, feature_transformers=new_ft, config=config) - - test_df = df[:-future_seq_len] - new_ft.transform(test_df, is_train=False) - output_value_df = new_ft.post_processing(test_df, train_y, is_train=False) - assert output_value_df.shape == (sample_num - past_seq_len - future_seq_len + 1, - future_seq_len + 1) - - columns = ["{}_{}".format(value_col, i) for i in range(future_seq_len)] - output_value = output_value_df[columns].values - target_df = df[past_seq_len:].copy().reset_index(drop=True) - target_value = feat._roll_test(target_df["values"], future_seq_len) - - assert output_value_df[dt_col].equals(target_df[:-future_seq_len + 1][dt_col]) - msg = "output_value is {}, target_value is {}".format(output_value, target_value) - assert_array_almost_equal(output_value, target_value, decimal=2), msg - - finally: - shutil.rmtree(dirname) - - def test_future_time_validation(self): - sample_num = 8 - dates = pd.date_range('1/1/2100', periods=sample_num) - values = np.random.randn(sample_num) - dt_col = "datetime" - value_col = "values" - df = pd.DataFrame({dt_col: dates, value_col: values}) - - past_seq_len = 2 - future_seq_len = 1 - config = {"selected_features": json.dumps(['IS_AWAKE(datetime)', - 'IS_BUSY_HOURS(datetime)', - 'HOUR(datetime)']), - "past_seq_len": past_seq_len} - feat = TimeSequenceFeatureTransformer(future_seq_len=future_seq_len, dt_col="datetime", - target_col="values", drop_missing=True) - x, y = feat.fit_transform(df, **config) - assert x.shape == (sample_num - past_seq_len, - past_seq_len, - len(json.loads(config["selected_features"])) + 1) - assert y.shape == (sample_num - past_seq_len, 1) - - -if __name__ == "__main__": - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/__init__.py deleted file mode 100644 index 2151a805423a..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/conftest.py b/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/conftest.py deleted file mode 100644 index fff8c5a29b94..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/conftest.py +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pytest -sc = None -ray_ctx = None - - -@pytest.fixture(autouse=True, scope='function') -def automl_fixture(): - from bigdl.dllib.nncontext import init_spark_on_local - from bigdl.orca.ray import OrcaRayContext - sc = init_spark_on_local(cores=4, spark_log_level="INFO") - ray_ctx = OrcaRayContext(sc=sc, object_store_memory="1g") - ray_ctx.init() - yield - ray_ctx.stop() - sc.stop() - - -# @pytest.fixture() -# def setUpModule(): -# sc = init_spark_on_local(cores=4, spark_log_level="INFO") -# ray_ctx = RayContext(sc=sc) -# ray_ctx.init() -# -# -# def tearDownModule(): -# ray_ctx.stop() -# sc.stop() diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/test_time_sequence.py b/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/test_time_sequence.py deleted file mode 100644 index 29100f4cd096..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/pipeline/test_time_sequence.py +++ /dev/null @@ -1,296 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import shutil -import tempfile - -import pytest - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.pipeline.time_sequence import load_ts_pipeline -from bigdl.chronos.autots.deprecated.regression.time_sequence_predictor import TimeSequencePredictor -from bigdl.chronos.autots.deprecated.feature.time_sequence import TimeSequenceFeatureTransformer -from bigdl.chronos.autots.deprecated.model.time_sequence import TimeSequenceModel -from bigdl.orca.automl.metrics import Evaluator -from bigdl.chronos.autots.deprecated.config.recipe import * - -import numpy as np -import pandas as pd -from numpy.testing import assert_array_almost_equal -import os -from .. import skip_method - -default_past_seq_len = 2 - - -@skip_method -class TestTimeSequencePipeline(ZooTestCase): - - def setup_method(self, method): - pass - - def teardown_method(self, method): - """ - Teardown any state that was previously setup with a setup_method call. - """ - pass - - def get_input_tsp(self, future_seq_len, target_col): - sample_num = np.random.randint(100, 200) - test_sample_num = np.random.randint(20, 30) - if isinstance(target_col, str): - train_df = pd.DataFrame({"datetime": pd.date_range('1/1/2019', - periods=sample_num), - target_col: np.random.randn(sample_num)}) - test_df = pd.DataFrame({"datetime": pd.date_range('1/1/2019', - periods=test_sample_num), - target_col: np.random.randn(test_sample_num)}) - else: - train_df = pd.DataFrame({t: np.random.randn(sample_num) for t in target_col}) - train_df["datetime"] = pd.date_range('1/1/2019', periods=sample_num) - test_df = pd.DataFrame({t: np.random.randn(test_sample_num) for t in target_col}) - test_df["datetime"] = pd.date_range('1/1/2019', periods=test_sample_num) - tsp = TimeSequencePredictor(dt_col="datetime", - target_col=target_col, - future_seq_len=future_seq_len, - extra_features_col=None, ) - return train_df, test_df, tsp, test_sample_num - - # def test_evaluate_predict_future_equal_1(self): - # target_col = "values" - # metrics = ["mse", "r2"] - # future_seq_len = 1 - # train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - # pipeline = tsp.fit(train_df, test_df) - # mse, rs = pipeline.evaluate(test_df, metrics=metrics) - # assert isinstance(mse, np.float) - # assert isinstance(rs, np.float) - # y_pred = pipeline.predict(test_df) - # assert y_pred.shape == (test_sample_num - default_past_seq_len + 1, - # future_seq_len + 1) - # - # y_pred_df = pipeline.predict(test_df[:-future_seq_len]) - # y_df = test_df[default_past_seq_len:] - # - # mse_pred_eval, rs_pred_eval = [Evaluator.evaluate(m, - # y_df[target_col].values, - # y_pred_df[target_col].values) - # for m in metrics] - # mse_eval, rs_eval = pipeline.evaluate(test_df, metrics) - # assert mse_pred_eval == mse_eval - # assert rs_pred_eval == rs_eval - - def test_save_restore_future_equal_1(self): - target_col = "values" - metrics = ["mse", "r2"] - future_seq_len = 1 - train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - pipeline = tsp.fit(train_df, test_df) - y_pred = pipeline.predict(test_df) - mse, rs = pipeline.evaluate(test_df, metrics=metrics) - print("Evaluation result: Mean square error is: {}, R square is: {}.".format(mse, rs)) - - dirname = tempfile.mkdtemp(prefix="saved_pipeline") - try: - save_pipeline_file = os.path.join(dirname, "my.ppl") - pipeline.save(save_pipeline_file) - assert os.path.isfile(save_pipeline_file) - new_pipeline = load_ts_pipeline(save_pipeline_file) - assert new_pipeline.config is not None - assert isinstance(new_pipeline.model, TimeSequenceModel) - new_pipeline.describe() - new_pred = new_pipeline.predict(test_df) - assert_array_almost_equal(y_pred[target_col].values, new_pred[target_col].values, - decimal=2) - finally: - shutil.rmtree(dirname) - - new_pipeline.fit(train_df, epoch_num=1) - new_mse, new_rs = new_pipeline.evaluate(test_df, - metrics=["mse", "r2"]) - print("Evaluation result after restore and fit: " - "Mean square error is: {}, R square is: {}.".format(new_mse, new_rs)) - - def test_evaluate_predict_future_more_1(self): - target_col = "values" - metrics = ["mse", "r2"] - future_seq_len = np.random.randint(2, 6) - train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - pipeline = tsp.fit(train_df, test_df) - mse, rs = pipeline.evaluate(test_df, metrics=metrics) - assert len(mse) == future_seq_len - assert len(rs) == future_seq_len - y_pred = pipeline.predict(test_df) - assert y_pred.shape == (test_sample_num - default_past_seq_len + 1, - future_seq_len + 1) - - y_pred_df = pipeline.predict(test_df[:-future_seq_len]) - columns = ["{}_{}".format(target_col, i) for i in range(future_seq_len)] - y_pred_value = y_pred_df[columns].values - - y_df = test_df[default_past_seq_len:] - y_value = TimeSequenceFeatureTransformer()._roll_test(y_df[target_col], future_seq_len) - - mse_pred_eval, rs_pred_eval = [Evaluator.evaluate(m, y_value, y_pred_value) - for m in metrics] - mse_eval, rs_eval = pipeline.evaluate(test_df, metrics) - assert_array_almost_equal(mse_pred_eval, mse_eval, decimal=2) - assert_array_almost_equal(rs_pred_eval, rs_eval, decimal=2) - - # def test_save_restore_future_more_1(self): - # target_col = "values" - # metrics = ["mse", "r2"] - # future_seq_len = np.random.randint(2, 6) - # train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - # pipeline = tsp.fit(train_df, test_df) - # y_pred = pipeline.predict(test_df) - # mse, rs = pipeline.evaluate(test_df, metrics=metrics) - # print("Evaluation result: Mean square error is: {}, R square is: {}.".format(mse, rs)) - # - # dirname = tempfile.mkdtemp(prefix="saved_pipeline") - # try: - # save_pipeline_file = os.path.join(dirname, "my.ppl") - # pipeline.save(save_pipeline_file) - # assert os.path.isfile(save_pipeline_file) - # new_pipeline = load_ts_pipeline(save_pipeline_file) - # - # new_pred = new_pipeline.predict(test_df) - # columns = ["{}_{}".format(target_col, i) for i in range(future_seq_len)] - # assert_array_almost_equal(y_pred[columns].values, new_pred[columns].values, decimal=2) - # - # finally: - # shutil.rmtree(dirname) - # - # new_pipeline.fit(train_df, epoch_num=1) - # new_mse, new_rs = new_pipeline.evaluate(test_df, - # metrics=["mse", "r2"]) - # print("Evaluation result after restore and fit: " - # "Mean square error is: {}, R square is: {}.".format(new_mse, new_rs)) - - # def test_look_back_future_1(self): - # target_col = "values" - # min_past_seq_len = np.random.randint(3, 5) - # max_past_seq_len = np.random.randint(5, 8) - # future_seq_len = 1 - # train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - # - # random_pipeline = tsp.fit(train_df, test_df, recipe=RandomRecipe( - # look_back=(min_past_seq_len, max_past_seq_len))) - # y_pred_random = random_pipeline.predict(test_df) - # assert y_pred_random.shape[0] >= test_sample_num - max_past_seq_len + 1 - # assert y_pred_random.shape[0] <= test_sample_num - min_past_seq_len + 1 - # assert y_pred_random.shape[1] == future_seq_len + 1 - # mse, rs = random_pipeline.evaluate(test_df, metrics=["mse", "r2"]) - # assert isinstance(mse, np.float) - # assert isinstance(rs, np.float) - # - # def test_look_back_future_more_1(self): - # target_col = "values" - # min_past_seq_len = np.random.randint(3, 5) - # max_past_seq_len = np.random.randint(5, 8) - # future_seq_len = np.random.randint(2, 6) - # train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - # - # random_pipeline = tsp.fit(train_df, test_df, recipe=RandomRecipe( - # look_back=(min_past_seq_len, max_past_seq_len))) - # y_pred_random = random_pipeline.predict(test_df) - # assert y_pred_random.shape[0] >= test_sample_num - max_past_seq_len + 1 - # assert y_pred_random.shape[0] <= test_sample_num - min_past_seq_len + 1 - # assert y_pred_random.shape[1] == future_seq_len + 1 - # mse, rs = random_pipeline.evaluate(test_df, metrics=["mse", "r2"]) - # assert len(mse) == future_seq_len - # assert len(rs) == future_seq_len - - def test_look_back_value(self): - target_col = "values" - future_seq_len = np.random.randint(2, 6) - train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - # test min_past_seq_len < 2 - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=(1, 2))) - # test max_past_seq_len < 2 - with pytest.raises(RuntimeError, match=r".*max look back value*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=(0, 1))) - # test look_back value < 2 - with pytest.raises(RuntimeError, match=r".*look back value should not be smaller than 2*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=1)) - - # test look back is None - with pytest.raises(RuntimeError, match=r".*look_back should be either*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=None)) - # test look back is str - with pytest.raises(RuntimeError, match=r".*look_back should be either*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back="a")) - # test look back is float - with pytest.raises(RuntimeError, match=r".*look_back should be either*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=2.5)) - # test look back range is float - with pytest.raises(RuntimeError, match=r".*look_back should be either*."): - tsp.fit(train_df, test_df, recipe=RandomRecipe(look_back=(2.5, 3))) - - def test_predict_with_uncertainty(self): - target_col = "values" - future_seq_len = np.random.randint(2, 6) - train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_col) - - # test future_seq_len = 3 - pipeline = tsp.fit(train_df, mc=True, validation_df=test_df) - y_out, y_pred_uncertainty = pipeline.predict_with_uncertainty(test_df, n_iter=2) - assert y_out.shape == (test_sample_num - default_past_seq_len + 1, - future_seq_len + 1) - assert y_pred_uncertainty.shape == (test_sample_num - default_past_seq_len + 1, - future_seq_len) - assert np.any(y_pred_uncertainty) - - # def test_predict_with_uncertainty_multivariate(self): - # target_cols = ["value1", "value2"] - # future_seq_len = np.random.randint(2, 6) - # train_df, test_df, tsp, test_sample_num = self.get_input_tsp(future_seq_len, target_cols) - # - # # test future_seq_len = 3 - # pipeline = tsp.fit(train_df, mc=True, validation_df=test_df, recipe=Seq2SeqRandomRecipe) - # y_out, y_pred_uncertainty = pipeline.predict_with_uncertainty(test_df, n_iter=2) - # assert y_out.shape == (test_sample_num - default_past_seq_len + 1, - # future_seq_len + 1, len(target_cols)) - # assert y_pred_uncertainty.shape == (test_sample_num - default_past_seq_len + 1, - # future_seq_len, len(target_cols)) - # assert np.any(y_pred_uncertainty) - # - # def test_fit_predict_with_uncertainty(self): - # # test future_seq_len = 1 - # self.pipeline_1 = self.tsp_1.fit(self.train_df, mc=True, validation_df=self.validation_df) - # self.pipeline_1.fit(self.validation_df, mc=True, epoch_num=1) - # y_out, y_pred_uncertainty = self.pipeline_1.predict_with_uncertainty(self.test_df, - # n_iter=2) - # assert y_out.shape == (self.test_sample_num - self.default_past_seq_len + 1, - # self.future_seq_len_1 + 1) - # assert y_pred_uncertainty.shape == (self.test_sample_num - self.default_past_seq_len + 1, - # self.future_seq_len_1) - # assert np.any(y_pred_uncertainty) - # - # # test future_seq_len = 3 - # self.pipeline_3 = self.tsp_3.fit(self.train_df, mc=True, validation_df=self.validation_df) - # self.pipeline_3.fit(self.validation_df, mc=True, epoch_num=1) - # y_out, y_pred_uncertainty = self.pipeline_3.predict_with_uncertainty(self.test_df, - # n_iter=2) - # assert y_out.shape == (self.test_sample_num - self.default_past_seq_len + 1, - # self.future_seq_len_3 + 1) - # assert y_pred_uncertainty.shape == (self.test_sample_num - self.default_past_seq_len + 1, - # self.future_seq_len_3) - # assert np.any(y_pred_uncertainty) - # - - -if __name__ == '__main__': - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/__init__.py deleted file mode 100644 index 2151a805423a..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_fill.py b/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_fill.py deleted file mode 100644 index c8efab2287bd..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_fill.py +++ /dev/null @@ -1,71 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pandas as pd -import pytest - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.feature.time_sequence import TimeSequenceFeatureTransformer -from bigdl.chronos.autots.deprecated.preprocessing.impute.LastFill import LastFill -from bigdl.chronos.autots.deprecated.preprocessing.impute import FillZeroImpute -from .. import skip_method - - -@skip_method -class TestDataImputation(ZooTestCase): - - def setup_method(self, method): - self.ft = TimeSequenceFeatureTransformer() - self.create_data() - - def teardown_method(self, method): - pass - - def create_data(self): - data = np.random.random_sample((5, 50)) - mask = np.random.random_sample((5, 50)) - mask[mask >= 0.4] = 2 - mask[mask < 0.4] = 1 - mask[mask < 0.2] = 0 - data[mask == 0] = None - data[mask == 1] = np.nan - df = pd.DataFrame(data) - idx = pd.date_range( - start='2020-07-01 00:00:00', - end='2020-07-01 08:00:00', - freq='2H') - df.index = idx - self.data = df - - def test_lastfill(self): - last_fill = LastFill() - mse_missing = last_fill.evaluate(self.data, 0.1) - imputed_data = last_fill.impute(self.data) - assert imputed_data.isna().sum().sum() == 0 - mse = last_fill.evaluate(imputed_data, 0.1) - - def test_fillzero(self): - data = [[np.nan, 1], [np.nan, 2]] - df = pd.DataFrame(data) - imputor = FillZeroImpute() - imputed_df = imputor.impute(df) - assert df.isna().sum().sum() != 0 - assert imputed_df.isna().sum().sum() == 0 - - -if __name__ == "__main__": - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_util.py b/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_util.py deleted file mode 100644 index 234296293b23..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/preprocessing/test_util.py +++ /dev/null @@ -1,50 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.preprocessing.utils import train_val_test_split -import pandas as pd -import numpy as np -from .. import skip_method - - -@skip_method -class TestUtil(ZooTestCase): - def setup_method(self, method): - pass - - def teardown_method(self, method): - pass - - def test_train_val_test_split(self): - # length test - sample_num = 100 - look_back = 10 - horizon = 1 - dates = pd.date_range('1/1/2020', periods=sample_num) - values = np.random.randn(sample_num) - df = pd.DataFrame({"values": values}, index=dates) - train_df, val_df, test_df = train_val_test_split(df, - val_ratio=0.1, - test_ratio=0.1, - look_back=look_back, - horizon=horizon) - assert len(train_df) == sample_num * 0.8 - assert len(val_df) == sample_num * 0.1 + look_back + horizon - 1 - assert len(test_df) == sample_num * 0.1 + look_back + horizon - 1 - # index test - assert pd.api.types.is_datetime64_any_dtype(test_df.index.dtype) - assert pd.api.types.is_datetime64_any_dtype(val_df.index.dtype) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/__init__.py b/python/chronos/test/bigdl/chronos/autots/deprecated/regression/__init__.py deleted file mode 100644 index 2151a805423a..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/conftest.py b/python/chronos/test/bigdl/chronos/autots/deprecated/regression/conftest.py deleted file mode 100644 index fff8c5a29b94..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/conftest.py +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pytest -sc = None -ray_ctx = None - - -@pytest.fixture(autouse=True, scope='function') -def automl_fixture(): - from bigdl.dllib.nncontext import init_spark_on_local - from bigdl.orca.ray import OrcaRayContext - sc = init_spark_on_local(cores=4, spark_log_level="INFO") - ray_ctx = OrcaRayContext(sc=sc, object_store_memory="1g") - ray_ctx.init() - yield - ray_ctx.stop() - sc.stop() - - -# @pytest.fixture() -# def setUpModule(): -# sc = init_spark_on_local(cores=4, spark_log_level="INFO") -# ray_ctx = RayContext(sc=sc) -# ray_ctx.init() -# -# -# def tearDownModule(): -# ray_ctx.stop() -# sc.stop() diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/test_time_sequence_predictor.py b/python/chronos/test/bigdl/chronos/autots/deprecated/regression/test_time_sequence_predictor.py deleted file mode 100644 index ed2b0b83c765..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/regression/test_time_sequence_predictor.py +++ /dev/null @@ -1,111 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pytest - -from bigdl.orca.test_zoo_utils import ZooTestCase -from bigdl.chronos.autots.deprecated.model.time_sequence import TimeSequenceModel -from bigdl.chronos.autots.deprecated.regression.time_sequence_predictor import TimeSequencePredictor -import pandas as pd -import numpy as np -from bigdl.chronos.autots.deprecated.pipeline.time_sequence import TimeSequencePipeline -from .. import skip_method - - -@skip_method -class TestTimeSequencePredictor(ZooTestCase): - - def setup_method(self, method): - pass - - def teardown_method(self, method): - pass - - def create_dataset(self): - sample_num = np.random.randint(100, 200) - train_df = pd.DataFrame({"datetime": pd.date_range( - '1/1/2019', periods=sample_num), "value": np.random.randn(sample_num)}) - val_sample_num = np.random.randint(20, 30) - validation_df = pd.DataFrame({"datetime": pd.date_range( - '1/1/2019', periods=val_sample_num), "value": np.random.randn(val_sample_num)}) - future_seq_len = np.random.randint(1, 6) - return train_df, validation_df, future_seq_len - - def test_fit_SmokeRecipe(self): - train_df, validation_df, future_seq_len = self.create_dataset() - tsp = TimeSequencePredictor(dt_col="datetime", - target_col="value", - future_seq_len=future_seq_len, - extra_features_col=None, ) - pipeline = tsp.fit(train_df, validation_df) - assert isinstance(pipeline, TimeSequencePipeline) - assert isinstance(pipeline.model, TimeSequenceModel) - - def test_fit_LSTMGridRandomRecipe(self): - from bigdl.chronos.autots.deprecated.config.recipe import LSTMGridRandomRecipe - train_df, _, future_seq_len = self.create_dataset() - tsp = TimeSequencePredictor(dt_col="datetime", - target_col="value", - future_seq_len=future_seq_len, - extra_features_col=None, ) - pipeline = tsp.fit(train_df, - recipe=LSTMGridRandomRecipe( - lstm_2_units=[4], - batch_size=[1024], - num_rand_samples=5, - look_back=2, - training_iteration=1, - epochs=1)) - assert isinstance(pipeline, TimeSequencePipeline) - assert isinstance(pipeline.model, TimeSequenceModel) - assert pipeline.config is not None - assert 'past_seq_len' in pipeline.config - assert pipeline.config["past_seq_len"] == 2 - - def test_fit_BayesRecipe(self): - from bigdl.chronos.autots.deprecated.config.recipe import BayesRecipe - train_df, _, future_seq_len = self.create_dataset() - tsp = TimeSequencePredictor(dt_col="datetime", - target_col="value", - future_seq_len=future_seq_len, - extra_features_col=None, - search_alg="BayesOpt", - search_alg_params={ - "utility_kwargs": { - "kind": "ucb", - "kappa": 2.5, - "xi": 0.0 - } - } - ) - pipeline = tsp.fit( - train_df, recipe=BayesRecipe( - num_samples=1, - training_iteration=2, - epochs=1, - look_back=(3, 5) - )) - assert isinstance(pipeline, TimeSequencePipeline) - assert isinstance(pipeline.model, TimeSequenceModel) - assert pipeline.config is not None - assert "epochs" in pipeline.config - assert [config_name for config_name in pipeline.config - if config_name.endswith('float')] == [] - assert 'past_seq_len' in pipeline.config - assert 3 <= pipeline.config["past_seq_len"] <= 5 - - -if __name__ == '__main__': - pytest.main([__file__]) diff --git a/python/chronos/test/bigdl/chronos/autots/deprecated/test_auto_ts.py b/python/chronos/test/bigdl/chronos/autots/deprecated/test_auto_ts.py deleted file mode 100644 index 465a44b08de7..000000000000 --- a/python/chronos/test/bigdl/chronos/autots/deprecated/test_auto_ts.py +++ /dev/null @@ -1,142 +0,0 @@ -# -# Copyright 2016 The BigDL Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pytest -import numpy as np -from bigdl.orca.test_zoo_utils import ZooTestCase - -from bigdl.chronos.autots.deprecated.config.recipe import LSTMGridRandomRecipe, MTNetGridRandomRecipe -from bigdl.chronos.autots.deprecated.forecast import AutoTSTrainer -from bigdl.chronos.autots.deprecated.forecast import TSPipeline -from . import skip_method - -import pandas as pd - - -@pytest.mark.usefixtures("init_ray_context_fixture") -@skip_method -class TestChronosAutoTS(ZooTestCase): - - def setup_method(self, method): - # super(TestChronosAutoTS, self).setup_method(method) - self.create_data() - - def teardown_method(self, method): - pass - - def create_data(self): - sample_num = np.random.randint(100, 200) - self.train_df = pd.DataFrame({"datetime": pd.date_range( - '1/1/2019', periods=sample_num), "value": np.random.randn(sample_num)}) - val_sample_num = np.random.randint(20, 30) - self.validation_df = pd.DataFrame({"datetime": pd.date_range( - '1/1/2019', periods=val_sample_num), "value": np.random.randn(val_sample_num)}) - - def test_AutoTSTrainer_smoke(self): - horizon = np.random.randint(1, 6) - tsp = AutoTSTrainer(dt_col="datetime", - target_col="value", - horizon=horizon, - extra_features_col=None - ) - pipeline = tsp.fit(self.train_df) - assert isinstance(pipeline, TSPipeline) - assert pipeline.internal.config is not None - evaluate_result = pipeline.evaluate(self.validation_df) - if horizon > 1: - assert evaluate_result[0].shape[0] == horizon - else: - assert evaluate_result[0] - predict_df = pipeline.predict(self.validation_df) - assert not predict_df.empty - - def test_AutoTrainer_LstmRecipe(self): - horizon = np.random.randint(1, 6) - tsp = AutoTSTrainer(dt_col="datetime", - target_col="value", - horizon=horizon, - extra_features_col=None - ) - pipeline = tsp.fit(self.train_df, - self.validation_df, - recipe=LSTMGridRandomRecipe( - num_rand_samples=5, - batch_size=[1024], - lstm_2_units=[8], - training_iteration=1, - epochs=1 - )) - assert isinstance(pipeline, TSPipeline) - assert pipeline.internal.config is not None - evaluate_result = pipeline.evaluate(self.validation_df) - if horizon > 1: - assert evaluate_result[0].shape[0] == horizon - else: - assert evaluate_result[0] - predict_df = pipeline.predict(self.validation_df) - assert not predict_df.empty - - def test_AutoTrainer_MTNetRecipe(self): - horizon = np.random.randint(1, 6) - tsp = AutoTSTrainer(dt_col="datetime", - target_col="value", - horizon=horizon, - extra_features_col=None - ) - pipeline = tsp.fit(self.train_df, - self.validation_df, - recipe=MTNetGridRandomRecipe( - num_rand_samples=1, - time_step=[5], - long_num=[2], - batch_size=[1024], - cnn_hid_size=[32, 50], - training_iteration=1, - epochs=1 - )) - assert isinstance(pipeline, TSPipeline) - assert pipeline.internal.config is not None - evaluate_result = pipeline.evaluate(self.validation_df) - if horizon > 1: - assert evaluate_result[0].shape[0] == horizon - else: - assert evaluate_result[0] - predict_df = pipeline.predict(self.validation_df) - assert not predict_df.empty - - def test_save_load(self): - import tempfile - horizon = np.random.randint(1, 6) - tsp = AutoTSTrainer(dt_col="datetime", - target_col="value", - horizon=horizon, - extra_features_col=None - ) - pipeline = tsp.fit(self.train_df, - self.validation_df, - ) - assert isinstance(pipeline, TSPipeline) - # save & restore the pipeline - with tempfile.TemporaryDirectory() as tempdirname: - my_ppl_file_path = pipeline.save(tempdirname + "saved_pipeline/nyc_taxi.ppl") - loaded_ppl = TSPipeline.load(my_ppl_file_path) - loaded_ppl.evaluate(self.validation_df) - predict_df = loaded_ppl.predict(self.validation_df) - assert not predict_df.empty - - -if __name__ == "__main__": - pytest.main([__file__])