Skip to content

Commit

Permalink
shuffling of run table
Browse files Browse the repository at this point in the history
  • Loading branch information
nikosChalk committed Jul 22, 2022
1 parent 462b563 commit 0039fd5
Show file tree
Hide file tree
Showing 8 changed files with 208 additions and 13 deletions.
20 changes: 16 additions & 4 deletions experiment-runner/ConfigValidator/Config/Models/RunTableModel.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import itertools
import random
from typing import Dict, List, Tuple

from ConfigValidator.CustomErrors.BaseError import BaseError
from ExtendedTyping.Typing import SupportsStr
from ProgressManager.RunTable.Models.RunProgress import RunProgress
from ConfigValidator.Config.Models.FactorModel import FactorModel
Expand All @@ -10,16 +12,24 @@ class RunTableModel:
def __init__(self,
factors: List[FactorModel],
exclude_variations: List[Dict[FactorModel, List[SupportsStr]]] = None,
data_columns: List[str] = None
data_columns: List[str] = None,
shuffle: bool = False
):
if exclude_variations is None:
exclude_variations = {}
if data_columns is None:
data_columns = []
# TODO: Prevent duplicate factors with the same name

if len(set([factor.factor_name for factor in factors])) != len(factors):
raise BaseError("Duplicate factor name detected!")

if len(set(data_columns)) != len(data_columns):
raise BaseError("Duplicate data column detected!")

self.__factors = factors
self.__exclude_variations = exclude_variations
self.__data_columns = data_columns
self.__shuffle = shuffle

def get_factors(self) -> List[FactorModel]:
return self.__factors
Expand Down Expand Up @@ -72,8 +82,10 @@ def __filter_list(full_list: List[Tuple]):
row_list.insert(1, RunProgress.TODO) # __done

if self.__data_columns:
for data_column in self.__data_columns:
for _ in self.__data_columns:
row_list.append(" ")

experiment_run_table.append(dict(zip(column_names, row_list)))

if self.__shuffle:
random.shuffle(experiment_run_table)
return experiment_run_table
Original file line number Diff line number Diff line change
Expand Up @@ -73,22 +73,32 @@ def __init__(self, config: RunnerConfig, metadata: Metadata):
self.json_data_manager.write_metadata(self.metadata)

self.restarted = True
assert(len(existing_run_table) == len(self.run_table))

# Re-order the generated run table to match the already existing one
tmp_run_table = []
for existing_var in existing_run_table:
for generated_var in self.run_table:
if existing_var['__run_id'] == generated_var['__run_id']:
tmp_run_table.append(generated_var)
break
self.run_table = tmp_run_table
for existing_var, generated_var in zip(existing_run_table, self.run_table):
assert(existing_var['__run_id'] == generated_var['__run_id'])

# Fill in the run_table.
# Note that the stored run_table has only a str() representation of the factor treatment levels.
# The generated one can have arbitrary python objects.
for i, variation in enumerate(existing_run_table):
upd_variation = self.run_table[i] # variation that will be updated
assert (i == int(variation['__run_id'][4:]))
assert (i == int(upd_variation['__run_id'][4:]))
for existing_var, generated_var in zip(existing_run_table, self.run_table):
assert (existing_var['__run_id'] == generated_var['__run_id'])

for k in map(lambda factor: factor.factor_name,
self.config.run_table_model.get_factors()): # treatment levels remain the same
assert (str(upd_variation[k]) == str(variation[k]))
assert (str(generated_var[k]) == str(existing_var[k]))

for k in set(self.config.run_table_model.get_data_columns()).union(
['__done']): # update data columns and __done column
upd_variation[k] = variation[k]
generated_var[k] = existing_var[k]

output.console_log_WARNING(">> WARNING << -- Experiment is restarted!")
if not self.restarted:
Expand Down
26 changes: 26 additions & 0 deletions test-standalone/core/shuffling/Crasher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

from copy import deepcopy
import shutil
from ConfigValidator.Config.RunnerConfig import RunnerConfig as OriginalRunnerConfig
from ProgressManager.Output.CSVOutputManager import CSVOutputManager
from ProgressManager.RunTable.Models.RunProgress import RunProgress

import TestUtilities

if __name__ == '__main__':
TEST_DIR = TestUtilities.get_test_dir(__file__)

config_file = TestUtilities.load_and_get_config_file_as_module(TEST_DIR)
RunnerConfig: OriginalRunnerConfig = config_file.RunnerConfig

csv_data_manager = CSVOutputManager(RunnerConfig.results_output_path / RunnerConfig.name)
run_table = csv_data_manager.read_run_table()

# keep old successful run table for comparison in the validator
shutil.move(csv_data_manager._experiment_path / 'run_table.csv', csv_data_manager._experiment_path / 'run_table.old.csv')

for row in run_table:
if row['__run_id'] in ['run_2', 'run_5']:
row['__done'] = RunProgress.TODO
row['avg_cpu'] = 0
csv_data_manager.write_run_table(run_table)
90 changes: 90 additions & 0 deletions test-standalone/core/shuffling/RunnerConfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from EventManager.Models.RunnerEvents import RunnerEvents
from EventManager.EventSubscriptionController import EventSubscriptionController
from ConfigValidator.Config.Models.RunTableModel import RunTableModel
from ConfigValidator.Config.Models.FactorModel import FactorModel
from ConfigValidator.Config.Models.RunnerContext import RunnerContext
from ConfigValidator.Config.Models.OperationType import OperationType
from ExtendedTyping.Typing import SupportsStr
from ProgressManager.Output.OutputProcedure import OutputProcedure as output

from typing import Dict, List, Any, Optional
from pathlib import Path
from os.path import dirname, realpath

'''
Test Description:
Test functionality for shuffling
* When recovering from a crash, the order of the run table should remain the same
'''

class RunnerConfig:
ROOT_DIR = Path(dirname(realpath(__file__)))

# ================================ USER SPECIFIC CONFIG ================================
name: str = "new_runner_experiment"
results_output_path: Path = ROOT_DIR / 'experiments'
operation_type: OperationType = OperationType.AUTO
time_between_runs_in_ms: int = 100

def __init__(self):
"""Executes immediately after program start, on config load"""

EventSubscriptionController.subscribe_to_multiple_events([
(RunnerEvents.BEFORE_EXPERIMENT, self.before_experiment),
(RunnerEvents.BEFORE_RUN , self.before_run ),
(RunnerEvents.START_RUN , self.start_run ),
(RunnerEvents.START_MEASUREMENT, self.start_measurement),
(RunnerEvents.INTERACT , self.interact ),
(RunnerEvents.STOP_MEASUREMENT , self.stop_measurement ),
(RunnerEvents.STOP_RUN , self.stop_run ),
(RunnerEvents.POPULATE_RUN_DATA, self.populate_run_data),
(RunnerEvents.AFTER_EXPERIMENT , self.after_experiment )
])
self.run_table_model = None # Initialized later

output.console_log("Custom config loaded")

def create_run_table_model(self) -> RunTableModel:
factor1 = FactorModel("example_factor1", ["level1", "level2", "level3"])
factor2 = FactorModel("example_factor2", [True, False])
self.run_table_model = RunTableModel(
factors=[factor1, factor2],
data_columns=['avg_cpu', 'avg_mem'],
shuffle=True
)
return self.run_table_model

def before_experiment(self) -> None:
output.console_log("Config.before_experiment() called!")

def before_run(self) -> None:
output.console_log("Config.before_run() called!")

def start_run(self, context: RunnerContext) -> None:
output.console_log("Config.start_run() called!")

def start_measurement(self, context: RunnerContext) -> None:
output.console_log("Config.start_measurement() called!")

def interact(self, context: RunnerContext) -> None:
output.console_log("Config.interact() called!")

def stop_measurement(self, context: RunnerContext) -> None:
output.console_log("Config.stop_measurement called!")

def stop_run(self, context: RunnerContext) -> None:
output.console_log("Config.stop_run() called!")

def populate_run_data(self, context: RunnerContext) -> Optional[Dict[str, SupportsStr]]:
output.console_log("Config.populate_run_data() called!")
return {
'avg_cpu': 13,
'avg_mem': 18.1
}

def after_experiment(self) -> None:
output.console_log("Config.after_experiment() called!")

# ================================ DO NOT ALTER BELOW THIS LINE ================================
experiment_path: Path = None
26 changes: 26 additions & 0 deletions test-standalone/core/shuffling/Validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

import csv

from ConfigValidator.Config.RunnerConfig import RunnerConfig as OriginalRunnerConfig
from ProgressManager.Output.CSVOutputManager import CSVOutputManager
from ProgressManager.RunTable.Models.RunProgress import RunProgress

import TestUtilities

if __name__ == 'main':
TEST_DIR = TestUtilities.get_test_dir(__file__)

config_file = TestUtilities.load_and_get_config_file_as_module(TEST_DIR)
RunnerConfig: OriginalRunnerConfig = config_file.RunnerConfig

with open(RunnerConfig.results_output_path / RunnerConfig.name / 'run_table.old.csv') as f:
old = f.read() # this is before the crash.
with open(RunnerConfig.results_output_path / RunnerConfig.name / 'run_table.csv') as f:
new = f.read()
assert(old == new)

csv_data_manager = CSVOutputManager(RunnerConfig.results_output_path / RunnerConfig.name)
run_table = csv_data_manager.read_run_table()
for row in run_table:
assert(row['__done']) == RunProgress.DONE.name
assert(int(row['avg_cpu'])) == 13
1 change: 1 addition & 0 deletions test-standalone/runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ fi

set -e
tests=( # TODO: gather_tests recursively
"${PROJECT_DIR}/test-standalone/core/shuffling"
"${PROJECT_DIR}/test-standalone/core/arbitrary-objects"
"${PROJECT_DIR}/test-standalone/plugins/CodecarbonWrapper/individual"
"${PROJECT_DIR}/test-standalone/plugins/CodecarbonWrapper/combined"
Expand Down
4 changes: 2 additions & 2 deletions test/ConfigValidator/Config/Models/test_FactorModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
class TestFactorModelUniqueness(unittest.TestCase):
def test_uniqueness(self):
try:
factorModel = FactorModel('example_factore', [1, 2, 1])
self.assertFalse()
factorModel = FactorModel('example_factor', [1, 2, 1])
self.assert_(False)
except BaseError:
pass

Expand Down
32 changes: 31 additions & 1 deletion test/ConfigValidator/Config/Models/test_RunTableModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,47 @@

from ConfigValidator.Config.Models.FactorModel import FactorModel
from ConfigValidator.Config.Models.RunTableModel import RunTableModel
from ConfigValidator.CustomErrors.BaseError import BaseError
from ProgressManager.RunTable.Models.RunProgress import RunProgress


class TestRunTableModelDuplicateNames(unittest.TestCase):

def test_duplicate_factor_names(self):
try:
RunTableModel(
factors=[
FactorModel("example_factor1", ['example_treatment1', 'example_treatment2', 'example_treatment3']),
FactorModel("example_factor1", [True, False]),
]
)
self.assert_(False)
except BaseError:
pass

def test_duplicate_data_columns(self):
try:
RunTableModel(
factors=[
FactorModel("example_factor1", ['example_treatment1', 'example_treatment2', 'example_treatment3']),
FactorModel("example_factor2", [True, False]),
],
data_columns=['data_col1', 'data_col2', 'data_col1']
)
self.assert_(False)
except BaseError:
pass


class TestRunTableModelSimple(unittest.TestCase):
def setUp(self):
self.runTableModel = RunTableModel(
factors=[
FactorModel("example_factor1", ['example_treatment1', 'example_treatment2', 'example_treatment3']),
FactorModel("example_factor2", [True, False]),
],
data_columns=['avg_cpu', 'avg_mem']
data_columns=['avg_cpu', 'avg_mem'],
shuffle=True
)

def test_generate_experiment_run_table(self):
Expand Down

0 comments on commit 0039fd5

Please sign in to comment.