Skip to content

Commit

Permalink
Merge pull request #561 from runame/scoring
Browse files Browse the repository at this point in the history
Fix scoring issues
  • Loading branch information
priyakasimbeg authored Nov 3, 2023
2 parents 931f71f + 4151e09 commit 0943802
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 17 deletions.
3 changes: 1 addition & 2 deletions scoring/score_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from absl import logging
import scoring_utils

from algorithmic_efficiency import workloads
import scoring
from scoring import scoring

flags.DEFINE_string(
'experiment_path',
Expand Down
15 changes: 11 additions & 4 deletions scoring/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@
WORKLOADS = workloads_registry.WORKLOADS
WORKLOAD_NAME_PATTERN = '(.*)(_jax|_pytorch)'
BASE_WORKLOADS_DIR = 'algorithmic_efficiency/workloads/'
# These global variables have to be set according to the current set of
# workloads and rules for the scoring to be correct.
# We do not use the workload registry since it contains test and development
# workloads as well.
NUM_WORKLOADS = 8
NUM_TRIALS = 5

MIN_EVAL_METRICS = [
'ce_loss',
Expand Down Expand Up @@ -133,9 +139,10 @@ def get_index_that_reaches_target(workload_df,
# Remove trials that never reach the target
target_reached = target_reached[target_reached.apply(np.any)]

# If we have no trials that have reached the target, return -1. Else, return
# the eval index of the earliest point the target is reached.
if target_reached.empty:
# If less than 3 trials reach the target, the submission will be scored as
# missing the target on this workload; return -1. Else, return the eval index
# of the earliest point the target is reached.
if len(target_reached) < 3:
return -1, -1
else:
index_reached = target_reached.apply(np.argmax)
Expand Down Expand Up @@ -287,7 +294,7 @@ def compute_performance_profiles(results,
np.log10(min_tau), np.log10(max_tau), num=num_points, base=10.0)

def rho(r, tau):
return (r <= tau).sum(axis=1) / len(r.columns)
return (r <= tau).sum(axis=1) / NUM_WORKLOADS

perf_df = pd.concat([rho(df, tau) for tau in points], axis=1)

Expand Down
21 changes: 17 additions & 4 deletions scoring/scoring_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import json
import os
import re
import warnings

from absl import logging
import pandas as pd

from scoring.scoring import NUM_TRIALS
from scoring.scoring import NUM_WORKLOADS

TRIAL_LINE_REGEX = '(.*) --- Tuning run (\d+)/(\d+) ---'
METRICS_LINE_REGEX = '(.*) Metrics: ({.*})'
TRIAL_DIR_REGEX = 'trial_(\d+)'
Expand Down Expand Up @@ -103,8 +107,7 @@ def get_trials_df_dict(logfile):
"""
trials_dict = get_trials_dict(logfile)
trials_df_dict = {}
for trial in trials_dict.keys():
metrics = trials_dict[trial]
for trial, metrics in trials_dict.items():
trials_df_dict[trial] = pd.DataFrame(metrics)
return trials_df_dict

Expand Down Expand Up @@ -156,6 +159,10 @@ def get_experiment_df(experiment_dir):
"""
df = pd.DataFrame()
workload_dirs = os.listdir(experiment_dir)
num_workloads = len(workload_dirs)
if num_workloads != NUM_WORKLOADS:
warnings.warn(f'There should be {NUM_WORKLOADS} workloads but there are '
f'{num_workloads}.')
for workload in workload_dirs:
data = {
'workload': workload,
Expand All @@ -164,6 +171,7 @@ def get_experiment_df(experiment_dir):
t for t in os.listdir(os.path.join(experiment_dir, workload))
if re.match(TRIAL_DIR_REGEX, t)
]
workload_df = pd.DataFrame()
for trial in trial_dirs:
eval_measurements_filepath = os.path.join(
experiment_dir,
Expand All @@ -173,13 +181,18 @@ def get_experiment_df(experiment_dir):
)
try:
trial_df = pd.read_csv(eval_measurements_filepath)
except FileNotFoundError as e:
except FileNotFoundError:
logging.info(f'Could not read {eval_measurements_filepath}')
continue
data['trial'] = trial
for column in trial_df.columns:
values = trial_df[column].to_numpy()
data[column] = values
trial_df = pd.DataFrame([data])
df = pd.concat([df, trial_df], ignore_index=True)
workload_df = pd.concat([workload_df, trial_df], ignore_index=True)
num_trials = len(workload_df)
if num_trials != NUM_TRIALS:
warnings.warn(f'There should be {NUM_TRIALS} trials for workload '
f'{workload} but there are only {num_trials}.')
df = pd.concat([df, workload_df], ignore_index=True)
return df
25 changes: 19 additions & 6 deletions scoring/test_scoring_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from absl.testing import absltest
import scoring_utils

TEST_LOGFILE = 'test_data/adamw_fastmri_jax_04-18-2023-13-10-58.log'
TEST_DIR = 'test_data/experiment_dir'
from scoring import scoring_utils
from scoring.scoring import NUM_TRIALS
from scoring.scoring import NUM_WORKLOADS

TEST_LOGFILE = 'scoring/test_data/adamw_fastmri_jax_04-18-2023-13-10-58.log'
TEST_DIR = 'scoring/test_data/experiment_dir'
NUM_EVALS = 18


Expand All @@ -14,8 +17,7 @@ def test_get_trials_dict(self):

def test_get_trials_df_dict(self):
trials_dict = scoring_utils.get_trials_df_dict(TEST_LOGFILE)
for trial in trials_dict:
df = trials_dict[trial]
for df in trials_dict.values():
self.assertEqual(len(df.index), NUM_EVALS)

def test_get_trials_df(self):
Expand All @@ -24,7 +26,18 @@ def test_get_trials_df(self):
self.assertEqual(len(df.at['1', column]), NUM_EVALS)

def test_get_experiment_df(self):
df = scoring_utils.get_experiment_df(TEST_DIR)
_ = scoring_utils.get_experiment_df(TEST_DIR)
self.assertWarnsRegex(
Warning,
f'There should be {NUM_WORKLOADS} workloads but there are 1.',
scoring_utils.get_experiment_df,
TEST_DIR)
self.assertWarnsRegex(
Warning,
f'There should be {NUM_TRIALS} trials for workload mnist_jax but there '
'are only 1.',
scoring_utils.get_experiment_df,
TEST_DIR)


if __name__ == '__main__':
Expand Down
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ install_requires =
absl-py==1.4.0
numpy>=1.23
pandas>=2.0.1
tabulate==0.9.0
tensorflow==2.12.0
tensorflow-datasets==4.9.2
tensorflow-probability==0.20.0
Expand Down

0 comments on commit 0943802

Please sign in to comment.