diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 7f7557c..807cdea 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -34,6 +34,15 @@ stages: [commit] always_run: true pass_filenames: false + additional_dependencies: + - jsonschema==4.19.0 + - requests==2.31.0 + - ruamel-yaml==0.17.32 + - urllib3==1.26.15 + - pyyaml==6.0.1 + - termcolor==2.4.0 + - semver==3.0.2 + - id: kafka-check-schemas name: Check if local Kafka Schemas are up to date with code diff --git a/kp_pre_commit_hooks/gitops-values-validation.py b/kp_pre_commit_hooks/gitops-values-validation.py index 67a4ec8..bb16a4e 100755 --- a/kp_pre_commit_hooks/gitops-values-validation.py +++ b/kp_pre_commit_hooks/gitops-values-validation.py @@ -1,195 +1,314 @@ -#!/usr/bin/env python3 - -import json +import re import sys -from itertools import chain +import textwrap +from dataclasses import dataclass, field +from functools import cache, cached_property from pathlib import Path +from typing import Iterator, Optional, Sequence, Union, cast import requests -import urllib3.exceptions -from jsonschema import Draft7Validator +import semver +import urllib3 +import yaml +from jsonschema import Draft7Validator, ValidationError, validators +from jsonschema.protocols import Validator from jsonschema_specifications import REGISTRY from referencing import Registry, Resource -from ruamel.yaml import YAML +from termcolor import colored urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +############################################################################### +# Main code +############################################################################### + SCHEMA_BASE_URL = "https://kp-helmchart-stable-shared-main.s3.eu-west-1.amazonaws.com/schema/platform-managed-chart" GITOPS_DIR = Path("gitops") -yaml = YAML() +SCHEMA_HEADER_REGEXP = re.compile( + rf"^ *# yaml-language-server: \$schema={SCHEMA_BASE_URL}/v(?P[^/]+)/schema-platform-managed-chart.json", re.MULTILINE +) +TOPIC_NAME_REGEXP = re.compile(r"^(private\.)?(?P[a-z][a-z0-9-]*)\.[a-z][a-z0-9]*(-[0-9]+)?(\.[a-z0-9]+)?$") +TWINGATE_DOC_URL = "https://kpler.atlassian.net/wiki/spaces/KSD/pages/243562083/Install+and+configure+the+Twingate+VPN+client" -def retrieve_schema_via_request(uri): - response = requests.get(uri) - return Resource.from_contents(response.json()) +############################################################################### +# Generic Helper functions and classes +############################################################################### +# Helper functions to colorize the output +red = lambda text: colored(text, "red") +green = lambda text: colored(text, "green") +bold = lambda text: colored(text, attrs=["bold"]) -SCHEMA_REGISTRY = REGISTRY.combine(Registry(retrieve=retrieve_schema_via_request)) +def camel_to_snake(name): + name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) + return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower() -def download_json_schema_for_chart_version(version): - schema_url = f"{SCHEMA_BASE_URL}/v{version}/schema-platform-managed-chart-strict.json" - response = requests.get(schema_url, timeout=10, verify=False) - if response.status_code != 200: - print( - f"Error fetching schema url {schema_url}. HTTP Status Code: {response.status_code}\nPlease enable VPN first." +def deep_merge(*sources) -> dict: + result = {} + for dictionary in sources: + for key, value in dictionary.items(): + if isinstance(value, dict): + result[key] = deep_merge(result.setdefault(key, {}), value) + else: + result[key] = value + return result + + +############################################################################### +# Specific Helper functions and classes +############################################################################### + + +@dataclass +class UnauthorizedToDownloadSchema(Exception): + schema_url: str + + +@dataclass +class MissingSchema(Exception): + schema_url: str + + +@dataclass +class SchemaValidationError(Exception): + message: str + location: str + hint: Optional[str] = None + + +@cache +def download_json_schema(url): + response = requests.get(url, timeout=10, verify=False) + if response.status_code == 403: + raise UnauthorizedToDownloadSchema(url) + if response.status_code == 404: + raise MissingSchema(url) + response.raise_for_status() + return response.json() + + +# This is required so that jsonschema library can automatically download the schema references +SCHEMA_REGISTRY = REGISTRY.combine(Registry(retrieve=lambda uri: Resource.from_contents(download_json_schema(uri)))) + + +@dataclass +class HelmChart: + name: str + version: str + dependencies: list["HelmChart"] = field(default_factory=list) + + def get_dependency(self, dependency_name) -> Optional["HelmChart"]: + return next((d for d in self.dependencies if d.name == dependency_name), None) + + @cached_property + def json_schema(self) -> dict: + if self.platform_managed_chart_version and semver.compare(self.platform_managed_chart_version, "0.1.35") >= 0: + schema_url = f"{SCHEMA_BASE_URL}/v{self.platform_managed_chart_version}/schema-platform-managed-chart-strict.json" + return download_json_schema(schema_url) + else: + return {} + + @cached_property + def platform_managed_chart_version(self) -> Optional[str]: + platform_managed_chart = self.get_dependency("platform-managed-chart") + return platform_managed_chart.version if platform_managed_chart else None + + @staticmethod + def from_chart_file(chart_file: Path): + chart = cast(dict, yaml.safe_load(chart_file.read_text())) + return HelmChart( + name=chart["name"], + version=chart["version"], + dependencies=[HelmChart(dep["name"], dep["version"]) for dep in chart.get("dependencies", [])], ) - sys.exit(4) - try: - schema_json = response.json() - return schema_json - except json.JSONDecodeError: - print(f"Error decoding JSON. Response content:\n{response.text}") - sys.exit(4) - - -def verify_values_files_schema_version(version, directory=Path(".")): - for filename in directory.glob("values*.yaml"): - value_file = filename.read_text(encoding="utf8") - for line in value_file.splitlines(): - if line.startswith("# yaml-language-server: $schema="): - schema_version = line.split("=")[1].split("/")[-2].replace("v", "") - if schema_version != version: - print(f"ERROR: validation failure for {directory}/{filename}") - print( - f"reason: JSON schema version in '{line}' does not match version {version} in Chart.yaml" - ) - # Let's automatically fix the version in the file on the way - filename.write_text( - value_file.replace(line, line.replace(schema_version, version)) - ) - return False - return True - - -def delete_error_files(directory=Path(".")): - for filename in directory.glob("error-merged-values-*.yaml"): - try: - filename.unlink() - except Exception as err: - print(f"Error deleting {filename}. Reason: {err}") + +class GitOpsRepository: + def __init__(self, gitops_path: Path): + self.gitops_path = gitops_path + + def iter_service_instances_config(self): + for instance_values_file in self.gitops_path.glob("gitops/*/*/values-*-*.yaml"): + application_name = instance_values_file.parent.parent.name + service_name = instance_values_file.parent.name + _, env, instance = instance_values_file.stem.split("-", maxsplit=2) + yield ServiceInstanceConfig(application_name, service_name, env, instance, instance_values_file.parent, self) -def deep_merge(source, destination): - for key, value in source.items(): - if isinstance(value, dict): - # Get node or create one - node = destination.setdefault(key, {}) - deep_merge(value, node) +@dataclass +class ValuesFile: + path: Path + + def __str__(self): + return str(self.path.name) + + @cached_property + def values(self): + return yaml.safe_load(self.path.read_text()) or {} + + @cached_property + def header_schema_version(self) -> Optional[str]: + match = SCHEMA_HEADER_REGEXP.search(self.path.read_text()) + return match.group("version") if match else None + + def set_header_schema_version(self, version): + if self.header_schema_version == version: + return + header = f"# yaml-language-server: $schema={SCHEMA_BASE_URL}/v{version}/schema-platform-managed-chart.json\n" + content = self.path.read_text() + if self.header_schema_version is None: + self.path.write_text(header + content) else: - destination[key] = value + self.path.write_text(SCHEMA_HEADER_REGEXP.sub(header, content)) - return destination + @staticmethod + def merge_values(values_files: list["ValuesFile"]) -> dict: + return deep_merge(*[v.values for v in values_files]) -def merge_service_values_files(service_path, instance_file): - """Merge values.yaml, values-env.yaml and values-env-instance.yaml files.""" - merged_data = {} +@dataclass +class ServiceInstanceConfig: + application_name: str + service_name: str + env: str + instance: str + path: Path + gitops_repository: GitOpsRepository - # Base file - base_file = (service_path / "values.yaml").read_text(encoding="utf8") - merged_data.update(yaml.load(base_file)) + def __str__(self): + return f"{self.application_name}/{self.service_name} {self.instance} instance {self.env} configuration" - # Environment file - env = "dev" if "dev" in instance_file else "prod" - env_file_path = service_path / f"values-{env}.yaml" - if env_file_path.exists(): - env_file = env_file_path.read_text(encoding="utf8") - env_data = yaml.load(env_file) - deep_merge(env_data, merged_data) + @property + def rel_path(self) -> Path: + return self.path.relative_to(self.gitops_repository.gitops_path) - # Instance file - instance_f = (service_path / instance_file).read_text(encoding="utf8") - instance_data = yaml.load(instance_f) - deep_merge(instance_data, merged_data) + @property + def configuration(self) -> dict: + return ValuesFile.merge_values(self.values_files) - return merged_data + @property + def values_files(self) -> list[ValuesFile]: + candidate_files = ["values.yaml", f"values-{self.env}.yaml", f"values-{self.env}-{self.instance}.yaml"] + return [ValuesFile(self.path.joinpath(file)) for file in candidate_files if self.path.joinpath(file).exists()] + @property + def helm_chart(self) -> HelmChart: + return HelmChart.from_chart_file(self.path / "Chart.yaml") -def find_full_error_path(error): - if error.parent: - return find_full_error_path(error.parent) + error.path - else: - return error.path - - -def main(): - if not GITOPS_DIR.exists(): - print(f"{GITOPS_DIR} directory is missing, exiting...") - sys.exit(0) - - error_found = False - - # Iterate over direct subdirectories inside GITOPS_DIR - for service_path in GITOPS_DIR.glob("*/*"): - if not service_path.is_dir(): - continue - - chart_file = service_path / "Chart.yaml" - value_file = service_path / "values.yaml" - if not (chart_file.is_file() and value_file.is_file()): - print(f"Chart.yaml or values.yaml file is missing in {service_path}, skipping...") - continue - - chart_data = yaml.load(chart_file.read_text(encoding="utf8")) - chart_version = next( - ( - dep["version"] - for dep in chart_data.get("dependencies", []) - if dep["name"] == "platform-managed-chart" - ), - None, + def sync_values_files_schema_header_version(self): + for value_file in self.values_files: + value_file.set_header_schema_version(self.helm_chart.platform_managed_chart_version) + + +class ServiceInstanceConfigValidator: + + def __init__(self, service_instance_config: ServiceInstanceConfig): + self.service_instance_config = service_instance_config + + @cached_property + def validator(self) -> Validator: + validator_class = validators.validates("draft7")( + validators.extend(Draft7Validator, validators={"additionalChecks": self.validate_additional_checks}) ) + return validator_class(self.service_instance_config.helm_chart.json_schema, registry=SCHEMA_REGISTRY) - if not chart_version: - print( - f"Chart.yaml {chart_file} is missing platform-managed-chart dependency, skipping..." - ) - continue - - if not verify_values_files_schema_version(chart_version, service_path): - error_found = True - continue - - schema_data = download_json_schema_for_chart_version(chart_version) - if not schema_data: - print(f"JSON schema for version {chart_version} is not supported, skipping...") - continue - - for instance_file_path in chain( - service_path.glob("values-dev-*.yaml"), - service_path.glob("values-prod-*.yaml"), - ): - instance_file = instance_file_path.name - merged_values = merge_service_values_files(service_path, instance_file) - validator = Draft7Validator(schema_data, registry=SCHEMA_REGISTRY) # type: ignore - errors = list(validator.iter_errors(merged_values)) - - if errors: - error_found = True - base_file = "-".join(instance_file.split("-")[:2]) + ".yaml" - print( - f"\nERROR: Validation errors for {service_path} in {instance_file}, {base_file} or values.yaml:" + def validate_configuration(self) -> Sequence[Union[ValidationError, SchemaValidationError]]: + try: + validation_errors = list(self.validator.iter_errors((self.service_instance_config.configuration))) + schema_validation_errors = list(self.iter_schema_validation_errors()) + return validation_errors + schema_validation_errors + + except MissingSchema as error: + platform_managed_chart_version = self.service_instance_config.helm_chart.platform_managed_chart_version + return [ + SchemaValidationError( + f"Missing JSON schema for platform managed chart version {platform_managed_chart_version} in Chart.yaml", + location=error.schema_url, + ) + ] + + def iter_schema_validation_errors(self) -> Iterator[SchemaValidationError]: + platform_managed_chart_version = self.service_instance_config.helm_chart.platform_managed_chart_version + for values_file in self.service_instance_config.values_files: + if values_file.header_schema_version != platform_managed_chart_version: + yield SchemaValidationError( + f"JSON schema version in header ({values_file.header_schema_version}) does not match version in Chart.yaml ({platform_managed_chart_version})", + location=f"values file {values_file}", + hint="This pre-commit hook will auto-fix this issue. Please commit the values files changes.", ) - for error in errors: - error_location = "/".join(str(x) for x in find_full_error_path(error)) or "the root" - print(f" - {error.message} at {error_location}") - output_file = service_path / f"error-merged-{instance_file}" - with output_file.open("w", encoding="utf8") as out: - out.write( - f"# yaml-language-server: $schema={SCHEMA_BASE_URL}/v${chart_version}/schema-platform-managed-chart-strict.json\n" - ) - yaml.dump(merged_values, out) + def validate_additional_checks(self, validator, additional_checks, value, schema): + for check in additional_checks: + if check_method := getattr(self, f"validate_{camel_to_snake(check)}", None): + yield from check_method(value, schema) - delete_error_files(service_path) + def validate_topic_name_compliance(self, value, schema): + match = TOPIC_NAME_REGEXP.match(str(value)) + service_name = self.service_instance_config.service_name + if match and match["serviceName"] != service_name: + yield ValidationError(f"topicName '{value}' it not compliant, it should contain the service name '{service_name}'") - if error_found: - sys.exit(1) +def format_error(error: Union[ValidationError, SchemaValidationError]): + if isinstance(error, SchemaValidationError): + error_message = f"{red('ERROR:')} {error.message}\n at: {bold(error.location)}" + if error.hint: + error_message += f"\n\n {bold('Hint:')} {error.hint}\n" + + else: + location = "/".join(map(str, error.absolute_path)) + error_message = f"{red('ERROR:')} {error.message}\n at: {bold(location)}" + if description := error.schema and error.schema.get("description"): + title, description = description.split("\n", maxsplit=1) + error_message += f"\n\n {bold('Hint:')} {title}\n\n" + error_message += textwrap.indent(description, prefix=" " * 7) + + return error_message + + +def display_errors( + service_instance_config: ServiceInstanceConfig, errors: Sequence[Union[ValidationError, SchemaValidationError]] +): + values_files = ", ".join([str(v) for v in service_instance_config.values_files]) + print(f"\nThe following error(s) were found in the files {values_files}\nunder {service_instance_config.rel_path}:\n") + for error in errors: + print(textwrap.indent(format_error(error), prefix=" " * 2) + "\n") + + +############################################################################### +# Main code +############################################################################### if __name__ == "__main__": - main() + gitops_path = Path(sys.argv[1]) if len(sys.argv) >= 2 else Path.cwd() + gitops_repository = GitOpsRepository(gitops_path) + + try: + errors_found = False + for service_instance_config in gitops_repository.iter_service_instances_config(): + print(f"Checking {service_instance_config} ", end="") + + validator = ServiceInstanceConfigValidator(service_instance_config) + errors = validator.validate_configuration() + if not errors: + print(green("PASSED")) + else: + errors_found = True + print(red("FAILED")) + display_errors(service_instance_config, errors) + # We always try to sync the schema header version, in case it was one of the error detected + service_instance_config.sync_values_files_schema_header_version() + + sys.exit(1 if errors_found else 0) + + except UnauthorizedToDownloadSchema as error: + print( + f"\n\n{red('FATAL:')} Unauthorized to download schema at {error.schema_url}\n" + " Please check that your Twingate VPN Client is up and running configured.\n" + f" More info at {TWINGATE_DOC_URL}\n\n" + ) + sys.exit(1) diff --git a/pyproject.toml b/pyproject.toml index 49d0cb2..7899320 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,9 @@ jsonschema = "^4.19.0" requests = "^2.31.0" ruamel-yaml = "^0.17.32" urllib3 = "^1.26.15" +pyyaml = "^6.0.1" +termcolor = "^2.4.0" +semver = "^3.0.2" [tool.poetry.group.dev.dependencies] black = {version = "^22.10.0", allow-prereleases = true} @@ -20,4 +23,4 @@ requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.black] -line-length = 100 +line-length = 130