From 750410c6d5e9330af13984ef716464b6047e905d Mon Sep 17 00:00:00 2001 From: caetano melone Date: Tue, 8 Oct 2024 16:08:19 -0300 Subject: [PATCH] Store consts in one file Closes #112 Decouples configuration from code in a centralized manner. --- gantry/routes/collection.py | 8 +++----- gantry/routes/prediction.py | 32 +++++++++----------------------- gantry/util/const.py | 36 ++++++++++++++++++++++++++++++++++++ gantry/util/k8s.py | 7 +++---- gantry/util/spec.py | 6 +++--- gantry/util/time.py | 11 ++++------- 6 files changed, 58 insertions(+), 42 deletions(-) create mode 100644 gantry/util/const.py diff --git a/gantry/routes/collection.py b/gantry/routes/collection.py index 7908672..abbe807 100644 --- a/gantry/routes/collection.py +++ b/gantry/routes/collection.py @@ -9,9 +9,7 @@ from gantry.clients.prometheus import PrometheusClient from gantry.clients.prometheus.util import IncompleteData from gantry.models import Job - -MB_IN_BYTES = 1_000_000 -BUILD_STAGE_REGEX = r"^stage-\d+$" +from gantry.util import const logger = logging.getLogger(__name__) @@ -47,7 +45,7 @@ async def fetch_job( if ( job.status != "success" # if the stage is not stage-NUMBER, it's not a build job - or not re.match(BUILD_STAGE_REGEX, payload["build_stage"]) + or not re.match(const.BUILD_STAGE_REGEX, payload["build_stage"]) # some jobs don't have runners..? or payload["runner"] is None # uo runners are not in Prometheus @@ -135,7 +133,7 @@ async def fetch_node( "hostname": hostname, "cores": node_labels["cores"], # convert to bytes to be consistent with other resource metrics - "mem": node_labels["mem"] * MB_IN_BYTES, + "mem": node_labels["mem"] * const.MB_IN_BYTES, "arch": node_labels["arch"], "os": node_labels["os"], "instance_type": node_labels["instance_type"], diff --git a/gantry/routes/prediction.py b/gantry/routes/prediction.py index e8d696a..123f0e6 100644 --- a/gantry/routes/prediction.py +++ b/gantry/routes/prediction.py @@ -2,24 +2,10 @@ import aiosqlite -from gantry.util import k8s +from gantry.util import const, k8s logger = logging.getLogger(__name__) -IDEAL_SAMPLE = 5 -DEFAULT_CPU_REQUEST = 1 -DEFAULT_MEM_REQUEST = 2 * 1_000_000_000 # 2GB in bytes -EXPENSIVE_VARIANTS = { - "sycl", - "mpi", - "rocm", - "cuda", - "python", - "fortran", - "openmp", - "hdf5", -} - async def predict(db: aiosqlite.Connection, spec: dict) -> dict: """ @@ -37,8 +23,8 @@ async def predict(db: aiosqlite.Connection, spec: dict) -> dict: predictions = {} if not sample: predictions = { - "cpu_request": DEFAULT_CPU_REQUEST, - "mem_request": DEFAULT_MEM_REQUEST, + "cpu_request": const.DEFAULT_CPU_REQUEST, + "mem_request": const.DEFAULT_MEM_REQUEST, } else: # mapping of sample: [0] cpu_mean, [1] cpu_max, [2] mem_mean, [3] mem_max @@ -51,10 +37,10 @@ async def predict(db: aiosqlite.Connection, spec: dict) -> dict: # warn if the prediction is below some thresholds if predictions["cpu_request"] < 0.2: logger.warning(f"Warning: CPU request for {spec} is below 0.2 cores") - predictions["cpu_request"] = DEFAULT_CPU_REQUEST + predictions["cpu_request"] = const.DEFAULT_CPU_REQUEST if predictions["mem_request"] < 10_000_000: logger.warning(f"Warning: Memory request for {spec} is below 10MB") - predictions["mem_request"] = DEFAULT_MEM_REQUEST + predictions["mem_request"] = const.DEFAULT_MEM_REQUEST # convert predictions to k8s friendly format for k, v in predictions.items(): @@ -104,7 +90,7 @@ async def select_sample(query: str, filters: dict, extra_params: list = []) -> l async with db.execute(query, list(filters.values()) + extra_params) as cursor: sample = await cursor.fetchall() # we can accept the sample if it's 1 shorter - if len(sample) >= IDEAL_SAMPLE - 1: + if len(sample) >= const.TRAINING_SAMPLES - 1: return sample return [] @@ -116,7 +102,7 @@ async def select_sample(query: str, filters: dict, extra_params: list = []) -> l query = f""" SELECT cpu_mean, cpu_max, mem_mean, mem_max FROM jobs WHERE ref='develop' AND {' AND '.join(f'{param}=?' for param in filters.keys())} - ORDER BY end DESC LIMIT {IDEAL_SAMPLE} + ORDER BY end DESC LIMIT {const.TRAINING_SAMPLES} """ if sample := await select_sample(query, filters): @@ -132,7 +118,7 @@ async def select_sample(query: str, filters: dict, extra_params: list = []) -> l # iterate through all the expensive variants and create a set of conditions # for the select query - for var in EXPENSIVE_VARIANTS: + for var in const.EXPENSIVE_VARIANTS: variant_value = spec["pkg_variants_dict"].get(var) # check against specs where hdf5=none like quantum-espresso @@ -157,7 +143,7 @@ async def select_sample(query: str, filters: dict, extra_params: list = []) -> l SELECT cpu_mean, cpu_max, mem_mean, mem_max FROM jobs WHERE ref='develop' AND {' AND '.join(f'{param}=?' for param in filters.keys())} AND {' AND '.join(exp_variant_conditions)} - ORDER BY end DESC LIMIT {IDEAL_SAMPLE} + ORDER BY end DESC LIMIT {const.TRAINING_SAMPLES} """ if sample := await select_sample(query, filters, exp_variant_values): diff --git a/gantry/util/const.py b/gantry/util/const.py new file mode 100644 index 0000000..b982941 --- /dev/null +++ b/gantry/util/const.py @@ -0,0 +1,36 @@ +# centralized constants for the project + +# resources +MB_IN_BYTES = 1_000_000 +BYTES_IN_MB = 1 / MB_IN_BYTES +MILLICORES_IN_CORES = 1_000 + +# spec +# example: emacs@29.2 +json+native+treesitter arch=x86_64%gcc@12.3.0 +# this regex accommodates versions made up of any non-space characters +SPACK_SPEC_PATTERN = r"(.+?)@(\S+)\s+(.+?)\s+arch=(\S+)%([\w-]+)@(\S+)" + +# gitlab +# sends dates in 2021-02-23 02:41:37 UTC format +# documentation says they use iso 8601, but they don't consistently apply it +# https://docs.gitlab.com/ee/user/project/integrations/webhook_events.html#job-events +GITLAB_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S %Z" + +# collection +# all build jobs will match this pattern (eg: stage-1 but not stage-index) +BUILD_STAGE_REGEX = r"^stage-\d+$" + +# prediction +TRAINING_SAMPLES = 5 # number of past builds to use for prediction +DEFAULT_CPU_REQUEST = 1 # cores +DEFAULT_MEM_REQUEST = 2 * 1_000_000_000 # 2GB in bytes +EXPENSIVE_VARIANTS = { + "sycl", + "mpi", + "rocm", + "cuda", + "python", + "fortran", + "openmp", + "hdf5", +} diff --git a/gantry/util/k8s.py b/gantry/util/k8s.py index 470f945..2740c07 100644 --- a/gantry/util/k8s.py +++ b/gantry/util/k8s.py @@ -1,5 +1,4 @@ -BYTES_TO_MEGABYTES = 1 / 1_000_000 -CORES_TO_MILLICORES = 1_000 +from gantry.util.const import BYTES_IN_MB, MILLICORES_IN_CORES # these functions convert the predictions to k8s friendly format # https://kubernetes.io/docs/concepts/configuration/manage-resources-containers @@ -7,9 +6,9 @@ def convert_bytes(bytes: float) -> str: """bytes to megabytes""" - return str(int(round(bytes * BYTES_TO_MEGABYTES))) + "M" + return str(int(round(bytes * BYTES_IN_MB))) + "M" def convert_cores(cores: float) -> str: """cores to millicores""" - return str(int(round(cores * CORES_TO_MILLICORES))) + "m" + return str(int(round(cores * MILLICORES_IN_CORES))) + "m" diff --git a/gantry/util/spec.py b/gantry/util/spec.py index 55dfbce..70e7d52 100644 --- a/gantry/util/spec.py +++ b/gantry/util/spec.py @@ -1,6 +1,8 @@ import json import re +from gantry.util.const import SPACK_SPEC_PATTERN + def spec_variants(spec: str) -> dict: """Given a spec's concrete variants, return a dict in name: value format.""" @@ -51,9 +53,7 @@ def parse_alloc_spec(spec: str) -> dict: for the client. """ - # example: emacs@29.2 +json+native+treesitter arch=x86_64%gcc@12.3.0 - # this regex accommodates versions made up of any non-space characters - spec_pattern = re.compile(r"(.+?)@(\S+)\s+(.+?)\s+arch=(\S+)%([\w-]+)@(\S+)") + spec_pattern = re.compile(SPACK_SPEC_PATTERN) match = spec_pattern.match(spec) if not match: diff --git a/gantry/util/time.py b/gantry/util/time.py index 1989c79..e018f93 100644 --- a/gantry/util/time.py +++ b/gantry/util/time.py @@ -1,15 +1,12 @@ import datetime +from gantry.util.const import GITLAB_DATETIME_FORMAT + def webhook_timestamp(dt: str) -> float: """Converts a gitlab webhook datetime to a unix timestamp.""" - # gitlab sends dates in 2021-02-23 02:41:37 UTC format - # documentation says they use iso 8601, but they don't consistently apply it - # https://docs.gitlab.com/ee/user/project/integrations/webhook_events.html#job-events - GITLAB_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S %Z" - # strptime doesn't tag with timezone by default return ( datetime.datetime.strptime(dt, GITLAB_DATETIME_FORMAT) - .replace(tzinfo=datetime.timezone.utc) - .timestamp() + # strptime doesn't tag with timezone by default + .replace(tzinfo=datetime.timezone.utc).timestamp() )