From 774c10f7ba2379789fbcb5912be80e56307ac9ee Mon Sep 17 00:00:00 2001
From: Zain Rizvi <ZainRizvi@users.noreply.github.com>
Date: Mon, 21 Oct 2024 12:16:06 -0500
Subject: [PATCH] Excpect scale-configs to all live in test-infra now (#5788)

Part of the workflow to move the LF scale-config.yml files from
pytorch/pytorch to test-infra (details in
https://github.com/pytorch/test-infra/pull/5767)

This updates the validation script to no longer expect to update the
pytorch/pytorch version of these scale configs. It resulted in many
other aspects of that file becoming simpler as well.
---
 .github/scripts/validate_scale_config.py | 121 +++++++----------------
 1 file changed, 33 insertions(+), 88 deletions(-)

diff --git a/.github/scripts/validate_scale_config.py b/.github/scripts/validate_scale_config.py
index bb8098c9c2..15df04b525 100644
--- a/.github/scripts/validate_scale_config.py
+++ b/.github/scripts/validate_scale_config.py
@@ -1,18 +1,17 @@
 # Takes the scale-config.yml file in test-infra/.github/scale-config.yml and runs the following
 # validations against it:
-# 1. Internal validation: Ensure that every linux runner type listed has the corresponding Amazon 2023 variant
+# 1. Internal validation: Runs a custom set of sanity checks against the runner types defined in the file
 # 2. External validation: Ensure that every runner type listed (linux & windows) have corresponding runner types in
-#    pytorch/pytorch's .github/lf-scale-config.yml and .github/lf-canary-scale-config.yml that have the "lf."
-#    "lf.c." prefixes added correspondingly
-# This script assumes that it is being run from the root of the test-infra repository
+#    the Linux Foundation fleet's scale config files (.github/lf-scale-config.yml and .github/lf-canary-scale-config.yml).
+#    Those files are expected to have the "lf." and "lf.c." prefixes added to each runner type
 
 import argparse
 import copy
 import json
 import os
-import tempfile
 
 import urllib.request
+from pathlib import Path
 
 from typing import Any, cast, Dict, List, NamedTuple
 
@@ -29,8 +28,6 @@
 
 RUNNER_TYPE_CONFIG_KEY = "runner_types"
 
-GITHUB_PYTORCH_REPO_RAW_URL = "https://raw.githubusercontent.com/pytorch/pytorch/main/"
-
 PREFIX_META = ""
 PREFIX_LF = "lf."
 PREFIX_LF_CANARY = "lf.c."
@@ -71,23 +68,19 @@ def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description="Validate scale-config.yml file")
 
     parser.add_argument(
-        "--test-infra-repo-root",
-        type=str,
-        required=False,
-        default=".",
-        help="Path to the root of the local test-infra repository. Default is the current directory",
-    )
-    parser.add_argument(
-        "--pytorch-repo-root",
-        type=str,
-        required=False,
-        help="Path to the root of the local pytorch repository. If omitted, uses the "
-        "main branch from github pytorch/pytorch",
+        "--generate",
+        "-g",
+        action="store_true",
+        help="Update the generated scale configs based on the source scale config",
     )
 
     return parser.parse_args()
 
 
+def get_repo_root() -> Path:
+    return Path(__file__).resolve().parent.parent.parent
+
+
 def runner_types_are_equivalent(
     runner1_type: str,
     runner1_config: Dict[str, str],
@@ -146,8 +139,8 @@ def runner_types_are_equivalent(
     return are_same
 
 
-def is_config_consistent_internally(runner_types: Dict[str, Dict[str, str]]) -> bool:
-    f"""
+def is_config_valid_internally(runner_types: Dict[str, Dict[str, str]]) -> bool:
+    """
     Ensure that for every linux runner type in the config:
 
     1 - they match RunnerTypeScaleConfig https://github.com/pytorch/test-infra/blob/f3c58fea68ec149391570d15a4d0a03bc26fbe4f/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/runners.ts#L50
@@ -218,8 +211,6 @@ def generate_repo_scale_config(
     Generate the new scale config file with the same layout as the original file,
     but with the expected_prefix added to the runner types
     """
-
-    print(f"Generating updated {dest_config_file}")
     source_config = load_yaml_file(source_config_file)
     base_runner_types = set(source_config[RUNNER_TYPE_CONFIG_KEY].keys())
 
@@ -245,9 +236,9 @@ def generate_repo_scale_config(
             f.write(line)
 
 
-def load_yaml_file(scale_config_path: str) -> Dict[str, Any]:
+def load_yaml_file(scale_config_path: Path) -> Dict[str, Any]:
     # Verify file exists
-    if not os.path.exists(scale_config_path):
+    if not scale_config_path.exists():
         print(
             f"Could not find file {scale_config_path}. Please verify the path given on the command line."
         )
@@ -268,97 +259,51 @@ def download_file(url: str, local_filename: str) -> None:
         f.write(content)
 
 
-def pull_temp_config_from_github_repo(config_path: str) -> str:
-    config_url = GITHUB_PYTORCH_REPO_RAW_URL + config_path
-
-    temp_dir = tempfile.mkdtemp()
-    config_path = os.path.join(temp_dir, config_path)
-    download_file(config_url, config_path)
-
-    return config_path
-
-
 class ScaleConfigInfo(NamedTuple):
-    path: str  # full path to scale config file
+    path: Path  # full path to scale config file
     prefix: str  # prefix this fleet's runners types should have
 
 
 def main() -> None:
+    repo_root = get_repo_root()
+
     args = parse_args()
 
     source_scale_config_info = ScaleConfigInfo(
-        path=os.path.join(args.test_infra_repo_root, META_SCALE_CONFIG_PATH),
+        path=repo_root / META_SCALE_CONFIG_PATH,
         prefix=PREFIX_META,
     )
 
     # Contains scale configs that are generated from the source scale config
     generated_scale_config_infos: List[ScaleConfigInfo] = [
         ScaleConfigInfo(
-            path=os.path.join(args.test_infra_repo_root, LF_SCALE_CONFIG_PATH),
+            path=repo_root / LF_SCALE_CONFIG_PATH,
             prefix=PREFIX_LF,
         ),
         ScaleConfigInfo(
-            path=os.path.join(args.test_infra_repo_root, LF_CANARY_SCALE_CONFIG_PATH),
+            path=repo_root / LF_CANARY_SCALE_CONFIG_PATH,
             prefix=PREFIX_LF_CANARY,
         ),
     ]
 
-    generate_files = True
-    if args.pytorch_repo_root is None:
-        # This is expected during a CI run
-        generate_files = False
-        print(
-            "Using github's pytorch/pytorch repository as the source for the pytorch scale config files"
-        )
-
-        generated_scale_config_infos.append(
-            ScaleConfigInfo(
-                path=pull_temp_config_from_github_repo(LF_SCALE_CONFIG_PATH),
-                prefix=PREFIX_LF,
-            )
-        )
-        generated_scale_config_infos.append(
-            ScaleConfigInfo(
-                path=pull_temp_config_from_github_repo(LF_CANARY_SCALE_CONFIG_PATH),
-                prefix=PREFIX_LF_CANARY,
-            )
-        )
-    else:
-        # This is expected during a local run
-        generated_scale_config_infos.append(
-            ScaleConfigInfo(
-                path=os.path.join(args.pytorch_repo_root, LF_SCALE_CONFIG_PATH),
-                prefix=PREFIX_LF,
-            )
-        )
-        generated_scale_config_infos.append(
-            ScaleConfigInfo(
-                path=os.path.join(args.pytorch_repo_root, LF_CANARY_SCALE_CONFIG_PATH),
-                prefix=PREFIX_LF_CANARY,
-            )
-        )
-
     source_scale_config = load_yaml_file(source_scale_config_info.path)
     validation_success = True
 
-    if not is_config_consistent_internally(source_scale_config[RUNNER_TYPE_CONFIG_KEY]):
-        validation_success = False
-        print("scale-config.yml is not internally consistent\n")
-    else:
-        print("scale-config.yml is internally consistent\n")
+    validation_success = is_config_valid_internally(
+        source_scale_config[RUNNER_TYPE_CONFIG_KEY]
+    )
+    print(f"scaled-config.yml is {'valid' if validation_success else 'invalid'}\n")
 
     def validate_config(generated_config_info: ScaleConfigInfo) -> bool:
-        if generate_files:
+        if args.generate:
+            print(f"Generating updated {generated_config_info.path}")
+
             generate_repo_scale_config(
                 source_scale_config_info.path,
                 generated_config_info.path,
                 generated_config_info.prefix,
             )
 
-        print(
-            f"Generated updated pytorch/pytorch scale config file at {generated_config_info.path}\n"
-        )
-
         cloned_scale_config = load_yaml_file(generated_config_info.path)
 
         if not is_consistent_across_configs(
@@ -367,7 +312,7 @@ def validate_config(generated_config_info: ScaleConfigInfo) -> bool:
             generated_config_info.prefix,
         ):
             print(
-                f"Consistency validation failed between {source_scale_config.path} and {generated_config_info.path}\n"
+                f"Consistency validation failed between {source_scale_config_info.path} and {generated_config_info.path}\n"
             )
             return False
         else:
@@ -380,9 +325,9 @@ def validate_config(generated_config_info: ScaleConfigInfo) -> bool:
     if not validation_success:
         print(
             "Validation failed\n\n"
-            "Please run `python .github/scripts/validate_scale_config.py --test-infra-repo-root [path] "
-            "--pytorch-repo-root [path]` locally to validate the scale-config.yml file and generate the "
-            "updated pytorch/pytorch scale config files.\n\n"
+            "Please run `python .github/scripts/validate_scale_config.py --generate` "
+            "locally to validate the scale-config.yml file and generate the updated "
+            "variant scale config files.\n\n"
             "Note: You still need to fix internal consistency errors yourself.\n\n"
             "If this script passes locally and you already have a PR open on pytorch/pytorch with the "
             " relevant changes, you can merge that pytorch/pytorch PR first to make this job pass."