From 8a34b070cff4c47fba0146bb5a103b0235f6a9f4 Mon Sep 17 00:00:00 2001 From: Yann Rouillard Date: Sun, 4 Feb 2024 15:53:36 +0100 Subject: [PATCH 1/3] refactor: do a major refactor of gitops validation In order to prepare to update it to allow for easy extension with additional checks. On the way, we also enable some caching to speed up the gitops validation when the same chart version is used across several services. --- .pre-commit-hooks.yaml | 9 + .../gitops-values-validation.py | 413 +++++++++++------- pyproject.toml | 5 +- 3 files changed, 272 insertions(+), 155 deletions(-) diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 7f7557c..807cdea 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -34,6 +34,15 @@ stages: [commit] always_run: true pass_filenames: false + additional_dependencies: + - jsonschema==4.19.0 + - requests==2.31.0 + - ruamel-yaml==0.17.32 + - urllib3==1.26.15 + - pyyaml==6.0.1 + - termcolor==2.4.0 + - semver==3.0.2 + - id: kafka-check-schemas name: Check if local Kafka Schemas are up to date with code diff --git a/kp_pre_commit_hooks/gitops-values-validation.py b/kp_pre_commit_hooks/gitops-values-validation.py index 67a4ec8..da4dc38 100755 --- a/kp_pre_commit_hooks/gitops-values-validation.py +++ b/kp_pre_commit_hooks/gitops-values-validation.py @@ -1,195 +1,300 @@ -#!/usr/bin/env python3 - -import json +import re import sys -from itertools import chain +import textwrap +from dataclasses import dataclass, field +from functools import cache, cached_property from pathlib import Path +from typing import Iterator, Optional, Sequence, Union, cast import requests -import urllib3.exceptions -from jsonschema import Draft7Validator +import semver +import urllib3 +import yaml +from jsonschema import Draft7Validator, ValidationError, validators +from jsonschema.protocols import Validator from jsonschema_specifications import REGISTRY from referencing import Registry, Resource -from ruamel.yaml import YAML +from termcolor import colored urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +############################################################################### +# Main code +############################################################################### + SCHEMA_BASE_URL = "https://kp-helmchart-stable-shared-main.s3.eu-west-1.amazonaws.com/schema/platform-managed-chart" GITOPS_DIR = Path("gitops") -yaml = YAML() +SCHEMA_HEADER_REGEXP = re.compile( + rf"^ *# yaml-language-server: \$schema={SCHEMA_BASE_URL}/v(?P[^/]+)/schema-platform-managed-chart.json", re.MULTILINE +) +TOPIC_NAME_REGEXP = re.compile(r"^(private\.)?(?P[a-z][a-z0-9-]*)\.[a-z][a-z0-9]*(-[0-9]+)?(\.[a-z0-9]+)?$") +TWINGATE_DOC_URL = "https://kpler.atlassian.net/wiki/spaces/KSD/pages/243562083/Install+and+configure+the+Twingate+VPN+client" -def retrieve_schema_via_request(uri): - response = requests.get(uri) - return Resource.from_contents(response.json()) +############################################################################### +# Generic Helper functions and classes +############################################################################### +# Helper functions to colorize the output +red = lambda text: colored(text, "red") +green = lambda text: colored(text, "green") +bold = lambda text: colored(text, attrs=["bold"]) -SCHEMA_REGISTRY = REGISTRY.combine(Registry(retrieve=retrieve_schema_via_request)) +def camel_to_snake(name): + name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) + return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower() -def download_json_schema_for_chart_version(version): - schema_url = f"{SCHEMA_BASE_URL}/v{version}/schema-platform-managed-chart-strict.json" - response = requests.get(schema_url, timeout=10, verify=False) - if response.status_code != 200: - print( - f"Error fetching schema url {schema_url}. HTTP Status Code: {response.status_code}\nPlease enable VPN first." +def deep_merge(*sources) -> dict: + result = {} + for dictionary in sources: + for key, value in dictionary.items(): + if isinstance(value, dict): + result[key] = deep_merge(result.setdefault(key, {}), value) + else: + result[key] = value + return result + + +############################################################################### +# Specific Helper functions and classes +############################################################################### + + +@dataclass +class UnauthorizedToDownloadSchema(Exception): + schema_url: str + + +@dataclass +class MissingSchema(Exception): + schema_url: str + + +@dataclass +class SchemaValidationError(Exception): + message: str + location: str + hint: Optional[str] = None + + +@cache +def download_json_schema(url): + response = requests.get(url, timeout=10, verify=False) + if response.status_code == 403: + raise UnauthorizedToDownloadSchema(url) + if response.status_code == 404: + raise MissingSchema(url) + response.raise_for_status() + return response.json() + + +# This is required so that jsonschema library can automatically download the schema references +SCHEMA_REGISTRY = REGISTRY.combine(Registry(retrieve=lambda uri: Resource.from_contents(download_json_schema(uri)))) + + +@dataclass +class HelmChart: + name: str + version: str + dependencies: list["HelmChart"] = field(default_factory=list) + + def get_dependency(self, dependency_name) -> Optional["HelmChart"]: + return next((d for d in self.dependencies if d.name == dependency_name), None) + + @cached_property + def json_schema(self) -> dict: + if self.platform_managed_chart_version and semver.compare(self.platform_managed_chart_version, "0.1.35") >= 0: + schema_url = f"{SCHEMA_BASE_URL}/v{self.platform_managed_chart_version}/schema-platform-managed-chart-strict.json" + return download_json_schema(schema_url) + else: + return {} + + @cached_property + def platform_managed_chart_version(self) -> Optional[str]: + platform_managed_chart = self.get_dependency("platform-managed-chart") + return platform_managed_chart.version if platform_managed_chart else None + + @staticmethod + def from_chart_file(chart_file: Path): + chart = cast(dict, yaml.safe_load(chart_file.read_text())) + return HelmChart( + name=chart["name"], + version=chart["version"], + dependencies=[HelmChart(dep["name"], dep["version"]) for dep in chart.get("dependencies", [])], ) - sys.exit(4) - try: - schema_json = response.json() - return schema_json - except json.JSONDecodeError: - print(f"Error decoding JSON. Response content:\n{response.text}") - sys.exit(4) - - -def verify_values_files_schema_version(version, directory=Path(".")): - for filename in directory.glob("values*.yaml"): - value_file = filename.read_text(encoding="utf8") - for line in value_file.splitlines(): - if line.startswith("# yaml-language-server: $schema="): - schema_version = line.split("=")[1].split("/")[-2].replace("v", "") - if schema_version != version: - print(f"ERROR: validation failure for {directory}/{filename}") - print( - f"reason: JSON schema version in '{line}' does not match version {version} in Chart.yaml" - ) - # Let's automatically fix the version in the file on the way - filename.write_text( - value_file.replace(line, line.replace(schema_version, version)) - ) - return False - return True - - -def delete_error_files(directory=Path(".")): - for filename in directory.glob("error-merged-values-*.yaml"): - try: - filename.unlink() - except Exception as err: - print(f"Error deleting {filename}. Reason: {err}") + +class GitOpsRepository: + def __init__(self, gitops_path: Path): + self.gitops_path = gitops_path + + def iter_service_instances_config(self): + for instance_values_file in self.gitops_path.glob("gitops/*/*/values-*-*.yaml"): + application_name = instance_values_file.parent.parent.name + service_name = instance_values_file.parent.name + _, env, instance = instance_values_file.stem.split("-", maxsplit=2) + yield ServiceInstanceConfig(application_name, service_name, env, instance, instance_values_file.parent, self) -def deep_merge(source, destination): - for key, value in source.items(): - if isinstance(value, dict): - # Get node or create one - node = destination.setdefault(key, {}) - deep_merge(value, node) +@dataclass +class ValuesFile: + path: Path + + def __str__(self): + return str(self.path.name) + + @cached_property + def values(self): + return yaml.safe_load(self.path.read_text()) or {} + + @cached_property + def header_schema_version(self) -> Optional[str]: + match = SCHEMA_HEADER_REGEXP.search(self.path.read_text()) + return match.group("version") if match else None + + def set_header_schema_version(self, version): + if self.header_schema_version == version: + return + header = f"# yaml-language-server: $schema={SCHEMA_BASE_URL}/v{version}/schema-platform-managed-chart.json\n" + content = self.path.read_text() + if self.header_schema_version is None: + self.path.write_text(header + content) else: - destination[key] = value + self.path.write_text(SCHEMA_HEADER_REGEXP.sub(header, content)) - return destination + @staticmethod + def merge_values(values_files: list["ValuesFile"]) -> dict: + return deep_merge(*[v.values for v in values_files]) -def merge_service_values_files(service_path, instance_file): - """Merge values.yaml, values-env.yaml and values-env-instance.yaml files.""" - merged_data = {} +@dataclass +class ServiceInstanceConfig: + application_name: str + service_name: str + env: str + instance: str + path: Path + gitops_repository: GitOpsRepository - # Base file - base_file = (service_path / "values.yaml").read_text(encoding="utf8") - merged_data.update(yaml.load(base_file)) + def __str__(self): + return f"{self.application_name}/{self.service_name} {self.instance} instance {self.env} configuration" - # Environment file - env = "dev" if "dev" in instance_file else "prod" - env_file_path = service_path / f"values-{env}.yaml" - if env_file_path.exists(): - env_file = env_file_path.read_text(encoding="utf8") - env_data = yaml.load(env_file) - deep_merge(env_data, merged_data) + @property + def rel_path(self) -> Path: + return self.path.relative_to(self.gitops_repository.gitops_path) - # Instance file - instance_f = (service_path / instance_file).read_text(encoding="utf8") - instance_data = yaml.load(instance_f) - deep_merge(instance_data, merged_data) + @property + def configuration(self) -> dict: + return ValuesFile.merge_values(self.values_files) - return merged_data + @property + def values_files(self) -> list[ValuesFile]: + candidate_files = ["values.yaml", f"values-{self.env}.yaml", f"values-{self.env}-{self.instance}.yaml"] + return [ValuesFile(self.path.joinpath(file)) for file in candidate_files if self.path.joinpath(file).exists()] + @property + def helm_chart(self) -> HelmChart: + return HelmChart.from_chart_file(self.path / "Chart.yaml") -def find_full_error_path(error): - if error.parent: - return find_full_error_path(error.parent) + error.path - else: - return error.path - - -def main(): - if not GITOPS_DIR.exists(): - print(f"{GITOPS_DIR} directory is missing, exiting...") - sys.exit(0) - - error_found = False - - # Iterate over direct subdirectories inside GITOPS_DIR - for service_path in GITOPS_DIR.glob("*/*"): - if not service_path.is_dir(): - continue - - chart_file = service_path / "Chart.yaml" - value_file = service_path / "values.yaml" - if not (chart_file.is_file() and value_file.is_file()): - print(f"Chart.yaml or values.yaml file is missing in {service_path}, skipping...") - continue - - chart_data = yaml.load(chart_file.read_text(encoding="utf8")) - chart_version = next( - ( - dep["version"] - for dep in chart_data.get("dependencies", []) - if dep["name"] == "platform-managed-chart" - ), - None, - ) + def sync_values_files_schema_header_version(self): + for value_file in self.values_files: + value_file.set_header_schema_version(self.helm_chart.platform_managed_chart_version) - if not chart_version: - print( - f"Chart.yaml {chart_file} is missing platform-managed-chart dependency, skipping..." - ) - continue - - if not verify_values_files_schema_version(chart_version, service_path): - error_found = True - continue - - schema_data = download_json_schema_for_chart_version(chart_version) - if not schema_data: - print(f"JSON schema for version {chart_version} is not supported, skipping...") - continue - - for instance_file_path in chain( - service_path.glob("values-dev-*.yaml"), - service_path.glob("values-prod-*.yaml"), - ): - instance_file = instance_file_path.name - merged_values = merge_service_values_files(service_path, instance_file) - validator = Draft7Validator(schema_data, registry=SCHEMA_REGISTRY) # type: ignore - errors = list(validator.iter_errors(merged_values)) - - if errors: - error_found = True - base_file = "-".join(instance_file.split("-")[:2]) + ".yaml" - print( - f"\nERROR: Validation errors for {service_path} in {instance_file}, {base_file} or values.yaml:" + +class ServiceInstanceConfigValidator: + + def __init__(self, service_instance_config: ServiceInstanceConfig): + self.service_instance_config = service_instance_config + + @cached_property + def validator(self) -> Validator: + return Draft7Validator(self.service_instance_config.helm_chart.json_schema, registry=SCHEMA_REGISTRY) # type: ignore + + def validate_configuration(self) -> Sequence[Union[ValidationError, SchemaValidationError]]: + try: + validation_errors = list(self.validator.iter_errors((self.service_instance_config.configuration))) + schema_validation_errors = list(self.iter_schema_validation_errors()) + return validation_errors + schema_validation_errors + + except MissingSchema as error: + platform_managed_chart_version = self.service_instance_config.helm_chart.platform_managed_chart_version + return [ + SchemaValidationError( + f"Missing JSON schema for platform managed chart version {platform_managed_chart_version} in Chart.yaml", + location=error.schema_url, + ) + ] + + def iter_schema_validation_errors(self) -> Iterator[SchemaValidationError]: + platform_managed_chart_version = self.service_instance_config.helm_chart.platform_managed_chart_version + for values_file in self.service_instance_config.values_files: + if values_file.header_schema_version != platform_managed_chart_version: + yield SchemaValidationError( + f"JSON schema version in header ({values_file.header_schema_version}) does not match version in Chart.yaml ({platform_managed_chart_version})", + location=f"values file {values_file}", + hint="This pre-commit hook will auto-fix this issue. Please commit the values files changes.", ) - for error in errors: - error_location = "/".join(str(x) for x in find_full_error_path(error)) or "the root" - print(f" - {error.message} at {error_location}") - output_file = service_path / f"error-merged-{instance_file}" - with output_file.open("w", encoding="utf8") as out: - out.write( - f"# yaml-language-server: $schema={SCHEMA_BASE_URL}/v${chart_version}/schema-platform-managed-chart-strict.json\n" - ) - yaml.dump(merged_values, out) - delete_error_files(service_path) +def format_error(error: Union[ValidationError, SchemaValidationError]): + if isinstance(error, SchemaValidationError): + error_message = f"{red('ERROR:')} {error.message}\n at: {bold(error.location)}" + if error.hint: + error_message += f"\n\n {bold('Hint:')} {error.hint}\n" + + else: + location = "/".join(map(str, error.absolute_path)) + error_message = f"{red('ERROR:')} {error.message}\n at: {bold(location)}" + if description := error.schema and error.schema.get("description"): + title, description = description.split("\n", maxsplit=1) + error_message += f"\n\n {bold('Hint:')} {title}\n\n" + error_message += textwrap.indent(description, prefix=" " * 7) + + return error_message + + +def display_errors( + service_instance_config: ServiceInstanceConfig, errors: Sequence[Union[ValidationError, SchemaValidationError]] +): + values_files = ", ".join([str(v) for v in service_instance_config.values_files]) + print(f"\nThe following error(s) were found in the files {values_files}\nunder {service_instance_config.rel_path}:\n") + for error in errors: + print(textwrap.indent(format_error(error), prefix=" " * 2) + "\n") - if error_found: - sys.exit(1) +############################################################################### +# Main code +############################################################################### if __name__ == "__main__": - main() + gitops_path = Path(sys.argv[1]) if len(sys.argv) >= 2 else Path.cwd() + gitops_repository = GitOpsRepository(gitops_path) + + try: + errors_found = False + for service_instance_config in gitops_repository.iter_service_instances_config(): + print(f"Checking {service_instance_config} ", end="") + + validator = ServiceInstanceConfigValidator(service_instance_config) + errors = validator.validate_configuration() + if not errors: + print(green("PASSED")) + else: + errors_found = True + print(red("FAILED")) + display_errors(service_instance_config, errors) + # We always try to sync the schema header version, in case it was one of the error detected + service_instance_config.sync_values_files_schema_header_version() + + sys.exit(1 if errors_found else 0) + + except UnauthorizedToDownloadSchema as error: + print( + f"\n\n{red('FATAL:')} Unauthorized to download schema at {error.schema_url}\n" + " Please check that your Twingate VPN Client is up and running configured.\n" + f" More info at {TWINGATE_DOC_URL}\n\n" + ) + sys.exit(1) diff --git a/pyproject.toml b/pyproject.toml index 49d0cb2..7899320 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,9 @@ jsonschema = "^4.19.0" requests = "^2.31.0" ruamel-yaml = "^0.17.32" urllib3 = "^1.26.15" +pyyaml = "^6.0.1" +termcolor = "^2.4.0" +semver = "^3.0.2" [tool.poetry.group.dev.dependencies] black = {version = "^22.10.0", allow-prereleases = true} @@ -20,4 +23,4 @@ requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.black] -line-length = 100 +line-length = 130 From 2399046e023eab0872ac1090af179fa093fc3a33 Mon Sep 17 00:00:00 2001 From: Yann Rouillard Date: Mon, 5 Feb 2024 10:16:28 +0100 Subject: [PATCH 2/3] feat: enable support for implementing additional checks in gitops validation --- kp_pre_commit_hooks/gitops-values-validation.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kp_pre_commit_hooks/gitops-values-validation.py b/kp_pre_commit_hooks/gitops-values-validation.py index da4dc38..355d58a 100755 --- a/kp_pre_commit_hooks/gitops-values-validation.py +++ b/kp_pre_commit_hooks/gitops-values-validation.py @@ -211,7 +211,10 @@ def __init__(self, service_instance_config: ServiceInstanceConfig): @cached_property def validator(self) -> Validator: - return Draft7Validator(self.service_instance_config.helm_chart.json_schema, registry=SCHEMA_REGISTRY) # type: ignore + validator_class = validators.validates("draft7")( + validators.extend(Draft7Validator, validators={"additionalChecks": self.validate_additional_checks}) + ) + return validator_class(self.service_instance_config.helm_chart.json_schema, registry=SCHEMA_REGISTRY) def validate_configuration(self) -> Sequence[Union[ValidationError, SchemaValidationError]]: try: @@ -238,6 +241,11 @@ def iter_schema_validation_errors(self) -> Iterator[SchemaValidationError]: hint="This pre-commit hook will auto-fix this issue. Please commit the values files changes.", ) + def validate_additional_checks(self, validator, additional_checks, value, schema): + for check in additional_checks: + if check_method := getattr(self, f"validate_{camel_to_snake(check)}", None): + yield from check_method(value, schema) + def format_error(error: Union[ValidationError, SchemaValidationError]): if isinstance(error, SchemaValidationError): From 5de7ad9ef230332086132a4eb9d0879e5bd0a26d Mon Sep 17 00:00:00 2001 From: Yann Rouillard Date: Mon, 5 Feb 2024 10:17:43 +0100 Subject: [PATCH 3/3] feat: add new check for topic name compliance in gitops validation --- kp_pre_commit_hooks/gitops-values-validation.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kp_pre_commit_hooks/gitops-values-validation.py b/kp_pre_commit_hooks/gitops-values-validation.py index 355d58a..bb16a4e 100755 --- a/kp_pre_commit_hooks/gitops-values-validation.py +++ b/kp_pre_commit_hooks/gitops-values-validation.py @@ -246,6 +246,12 @@ def validate_additional_checks(self, validator, additional_checks, value, schema if check_method := getattr(self, f"validate_{camel_to_snake(check)}", None): yield from check_method(value, schema) + def validate_topic_name_compliance(self, value, schema): + match = TOPIC_NAME_REGEXP.match(str(value)) + service_name = self.service_instance_config.service_name + if match and match["serviceName"] != service_name: + yield ValidationError(f"topicName '{value}' it not compliant, it should contain the service name '{service_name}'") + def format_error(error: Union[ValidationError, SchemaValidationError]): if isinstance(error, SchemaValidationError):