From 774c10f7ba2379789fbcb5912be80e56307ac9ee Mon Sep 17 00:00:00 2001 From: Zain Rizvi Date: Mon, 21 Oct 2024 12:16:06 -0500 Subject: [PATCH] Excpect scale-configs to all live in test-infra now (#5788) Part of the workflow to move the LF scale-config.yml files from pytorch/pytorch to test-infra (details in https://github.com/pytorch/test-infra/pull/5767) This updates the validation script to no longer expect to update the pytorch/pytorch version of these scale configs. It resulted in many other aspects of that file becoming simpler as well. --- .github/scripts/validate_scale_config.py | 121 +++++++---------------- 1 file changed, 33 insertions(+), 88 deletions(-) diff --git a/.github/scripts/validate_scale_config.py b/.github/scripts/validate_scale_config.py index bb8098c9c2..15df04b525 100644 --- a/.github/scripts/validate_scale_config.py +++ b/.github/scripts/validate_scale_config.py @@ -1,18 +1,17 @@ # Takes the scale-config.yml file in test-infra/.github/scale-config.yml and runs the following # validations against it: -# 1. Internal validation: Ensure that every linux runner type listed has the corresponding Amazon 2023 variant +# 1. Internal validation: Runs a custom set of sanity checks against the runner types defined in the file # 2. External validation: Ensure that every runner type listed (linux & windows) have corresponding runner types in -# pytorch/pytorch's .github/lf-scale-config.yml and .github/lf-canary-scale-config.yml that have the "lf." -# "lf.c." prefixes added correspondingly -# This script assumes that it is being run from the root of the test-infra repository +# the Linux Foundation fleet's scale config files (.github/lf-scale-config.yml and .github/lf-canary-scale-config.yml). +# Those files are expected to have the "lf." and "lf.c." prefixes added to each runner type import argparse import copy import json import os -import tempfile import urllib.request +from pathlib import Path from typing import Any, cast, Dict, List, NamedTuple @@ -29,8 +28,6 @@ RUNNER_TYPE_CONFIG_KEY = "runner_types" -GITHUB_PYTORCH_REPO_RAW_URL = "https://raw.githubusercontent.com/pytorch/pytorch/main/" - PREFIX_META = "" PREFIX_LF = "lf." PREFIX_LF_CANARY = "lf.c." @@ -71,23 +68,19 @@ def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Validate scale-config.yml file") parser.add_argument( - "--test-infra-repo-root", - type=str, - required=False, - default=".", - help="Path to the root of the local test-infra repository. Default is the current directory", - ) - parser.add_argument( - "--pytorch-repo-root", - type=str, - required=False, - help="Path to the root of the local pytorch repository. If omitted, uses the " - "main branch from github pytorch/pytorch", + "--generate", + "-g", + action="store_true", + help="Update the generated scale configs based on the source scale config", ) return parser.parse_args() +def get_repo_root() -> Path: + return Path(__file__).resolve().parent.parent.parent + + def runner_types_are_equivalent( runner1_type: str, runner1_config: Dict[str, str], @@ -146,8 +139,8 @@ def runner_types_are_equivalent( return are_same -def is_config_consistent_internally(runner_types: Dict[str, Dict[str, str]]) -> bool: - f""" +def is_config_valid_internally(runner_types: Dict[str, Dict[str, str]]) -> bool: + """ Ensure that for every linux runner type in the config: 1 - they match RunnerTypeScaleConfig https://github.com/pytorch/test-infra/blob/f3c58fea68ec149391570d15a4d0a03bc26fbe4f/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/runners.ts#L50 @@ -218,8 +211,6 @@ def generate_repo_scale_config( Generate the new scale config file with the same layout as the original file, but with the expected_prefix added to the runner types """ - - print(f"Generating updated {dest_config_file}") source_config = load_yaml_file(source_config_file) base_runner_types = set(source_config[RUNNER_TYPE_CONFIG_KEY].keys()) @@ -245,9 +236,9 @@ def generate_repo_scale_config( f.write(line) -def load_yaml_file(scale_config_path: str) -> Dict[str, Any]: +def load_yaml_file(scale_config_path: Path) -> Dict[str, Any]: # Verify file exists - if not os.path.exists(scale_config_path): + if not scale_config_path.exists(): print( f"Could not find file {scale_config_path}. Please verify the path given on the command line." ) @@ -268,97 +259,51 @@ def download_file(url: str, local_filename: str) -> None: f.write(content) -def pull_temp_config_from_github_repo(config_path: str) -> str: - config_url = GITHUB_PYTORCH_REPO_RAW_URL + config_path - - temp_dir = tempfile.mkdtemp() - config_path = os.path.join(temp_dir, config_path) - download_file(config_url, config_path) - - return config_path - - class ScaleConfigInfo(NamedTuple): - path: str # full path to scale config file + path: Path # full path to scale config file prefix: str # prefix this fleet's runners types should have def main() -> None: + repo_root = get_repo_root() + args = parse_args() source_scale_config_info = ScaleConfigInfo( - path=os.path.join(args.test_infra_repo_root, META_SCALE_CONFIG_PATH), + path=repo_root / META_SCALE_CONFIG_PATH, prefix=PREFIX_META, ) # Contains scale configs that are generated from the source scale config generated_scale_config_infos: List[ScaleConfigInfo] = [ ScaleConfigInfo( - path=os.path.join(args.test_infra_repo_root, LF_SCALE_CONFIG_PATH), + path=repo_root / LF_SCALE_CONFIG_PATH, prefix=PREFIX_LF, ), ScaleConfigInfo( - path=os.path.join(args.test_infra_repo_root, LF_CANARY_SCALE_CONFIG_PATH), + path=repo_root / LF_CANARY_SCALE_CONFIG_PATH, prefix=PREFIX_LF_CANARY, ), ] - generate_files = True - if args.pytorch_repo_root is None: - # This is expected during a CI run - generate_files = False - print( - "Using github's pytorch/pytorch repository as the source for the pytorch scale config files" - ) - - generated_scale_config_infos.append( - ScaleConfigInfo( - path=pull_temp_config_from_github_repo(LF_SCALE_CONFIG_PATH), - prefix=PREFIX_LF, - ) - ) - generated_scale_config_infos.append( - ScaleConfigInfo( - path=pull_temp_config_from_github_repo(LF_CANARY_SCALE_CONFIG_PATH), - prefix=PREFIX_LF_CANARY, - ) - ) - else: - # This is expected during a local run - generated_scale_config_infos.append( - ScaleConfigInfo( - path=os.path.join(args.pytorch_repo_root, LF_SCALE_CONFIG_PATH), - prefix=PREFIX_LF, - ) - ) - generated_scale_config_infos.append( - ScaleConfigInfo( - path=os.path.join(args.pytorch_repo_root, LF_CANARY_SCALE_CONFIG_PATH), - prefix=PREFIX_LF_CANARY, - ) - ) - source_scale_config = load_yaml_file(source_scale_config_info.path) validation_success = True - if not is_config_consistent_internally(source_scale_config[RUNNER_TYPE_CONFIG_KEY]): - validation_success = False - print("scale-config.yml is not internally consistent\n") - else: - print("scale-config.yml is internally consistent\n") + validation_success = is_config_valid_internally( + source_scale_config[RUNNER_TYPE_CONFIG_KEY] + ) + print(f"scaled-config.yml is {'valid' if validation_success else 'invalid'}\n") def validate_config(generated_config_info: ScaleConfigInfo) -> bool: - if generate_files: + if args.generate: + print(f"Generating updated {generated_config_info.path}") + generate_repo_scale_config( source_scale_config_info.path, generated_config_info.path, generated_config_info.prefix, ) - print( - f"Generated updated pytorch/pytorch scale config file at {generated_config_info.path}\n" - ) - cloned_scale_config = load_yaml_file(generated_config_info.path) if not is_consistent_across_configs( @@ -367,7 +312,7 @@ def validate_config(generated_config_info: ScaleConfigInfo) -> bool: generated_config_info.prefix, ): print( - f"Consistency validation failed between {source_scale_config.path} and {generated_config_info.path}\n" + f"Consistency validation failed between {source_scale_config_info.path} and {generated_config_info.path}\n" ) return False else: @@ -380,9 +325,9 @@ def validate_config(generated_config_info: ScaleConfigInfo) -> bool: if not validation_success: print( "Validation failed\n\n" - "Please run `python .github/scripts/validate_scale_config.py --test-infra-repo-root [path] " - "--pytorch-repo-root [path]` locally to validate the scale-config.yml file and generate the " - "updated pytorch/pytorch scale config files.\n\n" + "Please run `python .github/scripts/validate_scale_config.py --generate` " + "locally to validate the scale-config.yml file and generate the updated " + "variant scale config files.\n\n" "Note: You still need to fix internal consistency errors yourself.\n\n" "If this script passes locally and you already have a PR open on pytorch/pytorch with the " " relevant changes, you can merge that pytorch/pytorch PR first to make this job pass."