Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace IDs & IPs in test result URLs #288

Merged
merged 4 commits into from
Jul 29, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions ci/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import re
import sys
import urllib.parse
from enum import StrEnum
from itertools import zip_longest
from typing import Any, Iterable
Expand Down Expand Up @@ -32,6 +33,16 @@
ipv6_pattern = re.compile(r"\b([0-9a-fA-F]{1,4}::?){1,7}[0-9a-fA-F]{1,4}\b")
mac_pattern = re.compile(r"\b([0-9a-f]{2}:){5}[0-9a-f]{2}\b")

# Pattern matching strings starting with `"url": "/api/v1/` and ending with `"`
api_v1_pattern = re.compile(r'"url":\s*"/api/v1/.*?"')
# Pattern matching URLs where the final component is a number
# Defines 4 capture groups to be able to replace the number with a placeholder.
# Only matches the number if it is preceded by a `/` or `=`
# Does not match patterns containing `<IPv4>` and `<IPv6>` after `/api/v1/`.
api_v1_pattern_with_number = re.compile(
r'("url":\s*"/api/v1/(?!.*?<(?:IPv6|IPv4)>).*?)([/=])(\d+)(")'
)


class DiffError(Exception):
"""Base class for diff errors."""
Expand All @@ -48,6 +59,20 @@ def __init__(self, expected: int, result: int) -> None: # noqa: D107
)


def unquote_url(match: re.Match[str]) -> str:
"""Unquote URL encoded text in a /api/v1/ URL."""
return urllib.parse.unquote(match.group(0))


def replace_url_id(match: re.Match[str]) -> str:
"""Replace the final number (ID) in a URL with a placeholder."""
# match.group(1) contains the part before the separator (`"url": "/api/...`)
# match.group(2) contains the separator (/ or =)
# match.group(3) contains the number we want to replace
# match.group(4) contains the closing double quote
return f"{match.group(1)}{match.group(2)}<ID>{match.group(4)}"


def group_objects(json_file_path: str) -> list[dict[str, Any]]:
"""Group objects in a JSON file by a specific criterion.

Expand All @@ -56,12 +81,23 @@ def group_objects(json_file_path: str) -> list[dict[str, Any]]:
"""
with open(json_file_path, "r") as f:
s = f.read()
# Replace all URL encoded text in /api/v1/ URLs with unquoted text
# This lets us replace it down the line with our normal IPv{4,6} and MAC placeholders
# Must be done _before_ all other replacements
s = api_v1_pattern.sub(unquote_url, s)

# Replace all non-deterministic values with placeholders
s = timestamp_pattern.sub("<TIME>", s)
s = datetime_str_pattern.sub("<TIME>", s)
s = serial_pattern.sub("Serial: <NUMBER>", s)
s = mac_pattern.sub("<macaddress>", s)
s = ipv4_pattern.sub("<IPv4>", s)
s = ipv6_pattern.sub("<IPv6>", s)

# Replace all IDs in URLs with a placeholder
# Must be done _after_ all other replacements
s = api_v1_pattern_with_number.sub(replace_url_id, s)

s = re.sub(
r"\s+", " ", s
) # replace all whitespace with one space, so the diff doesn't complain about different lengths
Expand Down
Loading