shuffling of run table

S2-group · Jul 22, 2022 · 0039fd5 · 0039fd5
1 parent 462b563
commit 0039fd5
Show file tree

Hide file tree

Showing 8 changed files with 208 additions and 13 deletions.
diff --git a/experiment-runner/ConfigValidator/Config/Models/RunTableModel.py b/experiment-runner/ConfigValidator/Config/Models/RunTableModel.py
@@ -1,6 +1,8 @@
 import itertools
+import random
 from typing import Dict, List, Tuple
 
+from ConfigValidator.CustomErrors.BaseError import BaseError
 from ExtendedTyping.Typing import SupportsStr
 from ProgressManager.RunTable.Models.RunProgress import RunProgress
 from ConfigValidator.Config.Models.FactorModel import FactorModel
@@ -10,16 +12,24 @@ class RunTableModel:
     def __init__(self,
                  factors: List[FactorModel],
                  exclude_variations: List[Dict[FactorModel, List[SupportsStr]]] = None,
-                 data_columns: List[str] = None
+                 data_columns: List[str] = None,
+                 shuffle: bool = False
                  ):
         if exclude_variations is None:
             exclude_variations = {}
         if data_columns is None:
             data_columns = []
-        # TODO: Prevent duplicate factors with the same name
+
+        if len(set([factor.factor_name for factor in factors])) != len(factors):
+            raise BaseError("Duplicate factor name detected!")
+
+        if len(set(data_columns)) != len(data_columns):
+            raise BaseError("Duplicate data column detected!")
+
         self.__factors = factors
         self.__exclude_variations = exclude_variations
         self.__data_columns = data_columns
+        self.__shuffle = shuffle
 
     def get_factors(self) -> List[FactorModel]:
         return self.__factors
@@ -72,8 +82,10 @@ def __filter_list(full_list: List[Tuple]):
             row_list.insert(1, RunProgress.TODO)  # __done
 
             if self.__data_columns:
-                for data_column in self.__data_columns:
+                for _ in self.__data_columns:
                     row_list.append(" ")
-
             experiment_run_table.append(dict(zip(column_names, row_list)))
+
+        if self.__shuffle:
+            random.shuffle(experiment_run_table)
         return experiment_run_table
diff --git a/experiment-runner/ExperimentOrchestrator/Experiment/ExperimentController.py b/experiment-runner/ExperimentOrchestrator/Experiment/ExperimentController.py
@@ -73,22 +73,32 @@ def __init__(self, config: RunnerConfig, metadata: Metadata):
                 self.json_data_manager.write_metadata(self.metadata)
 
             self.restarted = True
+            assert(len(existing_run_table) == len(self.run_table))
+
+            # Re-order the generated run table to match the already existing one
+            tmp_run_table = []
+            for existing_var in existing_run_table:
+                for generated_var in self.run_table:
+                    if existing_var['__run_id'] == generated_var['__run_id']:
+                        tmp_run_table.append(generated_var)
+                        break
+            self.run_table = tmp_run_table
+            for existing_var, generated_var in zip(existing_run_table, self.run_table):
+                assert(existing_var['__run_id'] == generated_var['__run_id'])
 
             # Fill in the run_table.
             # Note that the stored run_table has only a str() representation of the factor treatment levels.
             # The generated one can have arbitrary python objects.
-            for i, variation in enumerate(existing_run_table):
-                upd_variation = self.run_table[i]  # variation that will be updated
-                assert (i == int(variation['__run_id'][4:]))
-                assert (i == int(upd_variation['__run_id'][4:]))
+            for existing_var, generated_var in zip(existing_run_table, self.run_table):
+                assert (existing_var['__run_id'] == generated_var['__run_id'])
 
                 for k in map(lambda factor: factor.factor_name,
                              self.config.run_table_model.get_factors()):  # treatment levels remain the same
-                    assert (str(upd_variation[k]) == str(variation[k]))
+                    assert (str(generated_var[k]) == str(existing_var[k]))
 
                 for k in set(self.config.run_table_model.get_data_columns()).union(
                         ['__done']):  # update data columns and __done column
-                    upd_variation[k] = variation[k]
+                    generated_var[k] = existing_var[k]
 
             output.console_log_WARNING(">> WARNING << -- Experiment is restarted!")
         if not self.restarted:

diff --git a/test-standalone/core/shuffling/Crasher.py b/test-standalone/core/shuffling/Crasher.py
@@ -0,0 +1,26 @@
+
+from copy import deepcopy
+import shutil
+from ConfigValidator.Config.RunnerConfig import RunnerConfig as OriginalRunnerConfig
+from ProgressManager.Output.CSVOutputManager import CSVOutputManager
+from ProgressManager.RunTable.Models.RunProgress import RunProgress
+
+import TestUtilities
+
+if __name__ == '__main__':
+    TEST_DIR = TestUtilities.get_test_dir(__file__)
+
+    config_file = TestUtilities.load_and_get_config_file_as_module(TEST_DIR)
+    RunnerConfig: OriginalRunnerConfig = config_file.RunnerConfig
+
+    csv_data_manager = CSVOutputManager(RunnerConfig.results_output_path / RunnerConfig.name)
+    run_table = csv_data_manager.read_run_table()
+
+    # keep old successful run table for comparison in the validator
+    shutil.move(csv_data_manager._experiment_path / 'run_table.csv', csv_data_manager._experiment_path / 'run_table.old.csv')
+
+    for row in run_table:
+        if row['__run_id'] in ['run_2', 'run_5']:
+            row['__done']  = RunProgress.TODO
+            row['avg_cpu'] = 0
+    csv_data_manager.write_run_table(run_table)
diff --git a/test-standalone/core/shuffling/RunnerConfig.py b/test-standalone/core/shuffling/RunnerConfig.py
@@ -0,0 +1,90 @@
+from EventManager.Models.RunnerEvents import RunnerEvents
+from EventManager.EventSubscriptionController import EventSubscriptionController
+from ConfigValidator.Config.Models.RunTableModel import RunTableModel
+from ConfigValidator.Config.Models.FactorModel import FactorModel
+from ConfigValidator.Config.Models.RunnerContext import RunnerContext
+from ConfigValidator.Config.Models.OperationType import OperationType
+from ExtendedTyping.Typing import SupportsStr
+from ProgressManager.Output.OutputProcedure import OutputProcedure as output
+
+from typing import Dict, List, Any, Optional
+from pathlib import Path
+from os.path import dirname, realpath
+
+'''
+Test Description:
+
+Test functionality for shuffling
+  * When recovering from a crash, the order of the run table should remain the same
+'''
+
+class RunnerConfig:
+    ROOT_DIR = Path(dirname(realpath(__file__)))
+
+    # ================================ USER SPECIFIC CONFIG ================================
+    name:                       str             = "new_runner_experiment"
+    results_output_path:        Path             = ROOT_DIR / 'experiments'
+    operation_type:             OperationType   = OperationType.AUTO
+    time_between_runs_in_ms:    int             = 100
+
+    def __init__(self):
+        """Executes immediately after program start, on config load"""
+
+        EventSubscriptionController.subscribe_to_multiple_events([
+            (RunnerEvents.BEFORE_EXPERIMENT, self.before_experiment),
+            (RunnerEvents.BEFORE_RUN       , self.before_run       ),
+            (RunnerEvents.START_RUN        , self.start_run        ),
+            (RunnerEvents.START_MEASUREMENT, self.start_measurement),
+            (RunnerEvents.INTERACT         , self.interact         ),
+            (RunnerEvents.STOP_MEASUREMENT , self.stop_measurement ),
+            (RunnerEvents.STOP_RUN         , self.stop_run         ),
+            (RunnerEvents.POPULATE_RUN_DATA, self.populate_run_data),
+            (RunnerEvents.AFTER_EXPERIMENT , self.after_experiment )
+        ])
+        self.run_table_model = None  # Initialized later
+
+        output.console_log("Custom config loaded")
+
+    def create_run_table_model(self) -> RunTableModel:
+        factor1 = FactorModel("example_factor1", ["level1", "level2", "level3"])
+        factor2 = FactorModel("example_factor2", [True, False])
+        self.run_table_model = RunTableModel(
+            factors=[factor1, factor2],
+            data_columns=['avg_cpu', 'avg_mem'],
+            shuffle=True
+        )
+        return self.run_table_model
+
+    def before_experiment(self) -> None:
+        output.console_log("Config.before_experiment() called!")
+
+    def before_run(self) -> None:
+        output.console_log("Config.before_run() called!")
+
+    def start_run(self, context: RunnerContext) -> None:
+        output.console_log("Config.start_run() called!")
+
+    def start_measurement(self, context: RunnerContext) -> None:
+        output.console_log("Config.start_measurement() called!")
+
+    def interact(self, context: RunnerContext) -> None:
+        output.console_log("Config.interact() called!")
+
+    def stop_measurement(self, context: RunnerContext) -> None:
+        output.console_log("Config.stop_measurement called!")
+
+    def stop_run(self, context: RunnerContext) -> None:
+        output.console_log("Config.stop_run() called!")
+
+    def populate_run_data(self, context: RunnerContext) -> Optional[Dict[str, SupportsStr]]:
+        output.console_log("Config.populate_run_data() called!")
+        return {
+            'avg_cpu': 13,
+            'avg_mem': 18.1
+        }
+
+    def after_experiment(self) -> None:
+        output.console_log("Config.after_experiment() called!")
+
+    # ================================ DO NOT ALTER BELOW THIS LINE ================================
+    experiment_path:            Path             = None
diff --git a/test-standalone/core/shuffling/Validator.py b/test-standalone/core/shuffling/Validator.py
@@ -0,0 +1,26 @@
+
+import csv
+
+from ConfigValidator.Config.RunnerConfig import RunnerConfig as OriginalRunnerConfig
+from ProgressManager.Output.CSVOutputManager import CSVOutputManager
+from ProgressManager.RunTable.Models.RunProgress import RunProgress
+
+import TestUtilities
+
+if __name__ == 'main':
+    TEST_DIR = TestUtilities.get_test_dir(__file__)
+
+    config_file = TestUtilities.load_and_get_config_file_as_module(TEST_DIR)
+    RunnerConfig: OriginalRunnerConfig = config_file.RunnerConfig
+
+    with open(RunnerConfig.results_output_path / RunnerConfig.name / 'run_table.old.csv') as f:
+        old = f.read() # this is before the crash.
+    with open(RunnerConfig.results_output_path / RunnerConfig.name / 'run_table.csv') as f:
+        new = f.read()
+    assert(old == new)
+
+    csv_data_manager = CSVOutputManager(RunnerConfig.results_output_path / RunnerConfig.name)
+    run_table = csv_data_manager.read_run_table()
+    for row in run_table:
+        assert(row['__done']) == RunProgress.DONE.name
+        assert(int(row['avg_cpu'])) == 13
diff --git a/test-standalone/runner.sh b/test-standalone/runner.sh
@@ -15,6 +15,7 @@ fi
 
 set -e
 tests=( # TODO: gather_tests recursively
+  "${PROJECT_DIR}/test-standalone/core/shuffling"
   "${PROJECT_DIR}/test-standalone/core/arbitrary-objects"
   "${PROJECT_DIR}/test-standalone/plugins/CodecarbonWrapper/individual"
   "${PROJECT_DIR}/test-standalone/plugins/CodecarbonWrapper/combined"

diff --git a/test/ConfigValidator/Config/Models/test_FactorModel.py b/test/ConfigValidator/Config/Models/test_FactorModel.py
@@ -7,8 +7,8 @@
 class TestFactorModelUniqueness(unittest.TestCase):
     def test_uniqueness(self):
         try:
-            factorModel = FactorModel('example_factore', [1, 2, 1])
-            self.assertFalse()
+            factorModel = FactorModel('example_factor', [1, 2, 1])
+            self.assert_(False)
         except BaseError:
             pass
 

diff --git a/test/ConfigValidator/Config/Models/test_RunTableModel.py b/test/ConfigValidator/Config/Models/test_RunTableModel.py
@@ -3,17 +3,47 @@
 
 from ConfigValidator.Config.Models.FactorModel import FactorModel
 from ConfigValidator.Config.Models.RunTableModel import RunTableModel
+from ConfigValidator.CustomErrors.BaseError import BaseError
 from ProgressManager.RunTable.Models.RunProgress import RunProgress
 
 
+class TestRunTableModelDuplicateNames(unittest.TestCase):
+
+    def test_duplicate_factor_names(self):
+        try:
+            RunTableModel(
+                factors=[
+                    FactorModel("example_factor1", ['example_treatment1', 'example_treatment2', 'example_treatment3']),
+                    FactorModel("example_factor1", [True, False]),
+                ]
+            )
+            self.assert_(False)
+        except BaseError:
+            pass
+
+    def test_duplicate_data_columns(self):
+        try:
+            RunTableModel(
+                factors=[
+                    FactorModel("example_factor1", ['example_treatment1', 'example_treatment2', 'example_treatment3']),
+                    FactorModel("example_factor2", [True, False]),
+                ],
+                data_columns=['data_col1', 'data_col2', 'data_col1']
+            )
+            self.assert_(False)
+        except BaseError:
+            pass
+
+
 class TestRunTableModelSimple(unittest.TestCase):
     def setUp(self):
         self.runTableModel = RunTableModel(
             factors=[
                 FactorModel("example_factor1", ['example_treatment1', 'example_treatment2', 'example_treatment3']),
                 FactorModel("example_factor2", [True, False]),
             ],
-            data_columns=['avg_cpu', 'avg_mem']
+            data_columns=['avg_cpu', 'avg_mem'],
+            shuffle=True
         )
 
     def test_generate_experiment_run_table(self):