From 04e117d5975a185843812b0829788e1e5b06dc72 Mon Sep 17 00:00:00 2001 From: Kevin Fairise <132568982+KevinFairise2@users.noreply.github.com> Date: Tue, 10 Dec 2024 16:12:22 +0100 Subject: [PATCH] Post process e2e tests logs to make them more readable (#31615) --- .gitlab/e2e/e2e.yml | 4 +- .gitlab/e2e_install_packages/common.yml | 5 ++ tasks/new_e2e_tests.py | 114 +++++++++++++++++++++++- 3 files changed, 121 insertions(+), 2 deletions(-) diff --git a/.gitlab/e2e/e2e.yml b/.gitlab/e2e/e2e.yml index 9892eef7c2cb2..3f0b2c313389e 100644 --- a/.gitlab/e2e/e2e.yml +++ b/.gitlab/e2e/e2e.yml @@ -57,8 +57,9 @@ E2E_COMMIT_SHA: $CI_COMMIT_SHORT_SHA E2E_OUTPUT_DIR: $CI_PROJECT_DIR/e2e-output EXTERNAL_LINKS_PATH: external_links_$CI_JOB_ID.json + E2E_LOGS_PROCESSING_TEST_DEPTH: 1 script: - - inv -e new-e2e-tests.run --targets $TARGETS -c ddagent:imagePullRegistry=669783387624.dkr.ecr.us-east-1.amazonaws.com -c ddagent:imagePullUsername=AWS -c ddagent:imagePullPassword=$(aws ecr get-login-password) --junit-tar junit-${CI_JOB_ID}.tgz ${EXTRA_PARAMS} --test-washer + - inv -e new-e2e-tests.run --targets $TARGETS -c ddagent:imagePullRegistry=669783387624.dkr.ecr.us-east-1.amazonaws.com -c ddagent:imagePullUsername=AWS -c ddagent:imagePullPassword=$(aws ecr get-login-password) --junit-tar junit-${CI_JOB_ID}.tgz ${EXTRA_PARAMS} --test-washer --logs-folder=$E2E_OUTPUT_DIR/logs --logs-post-processing --logs-post-processing-test-depth=$E2E_LOGS_PROCESSING_TEST_DEPTH after_script: - $CI_PROJECT_DIR/tools/ci/junit_upload.sh artifacts: @@ -403,6 +404,7 @@ new-e2e-installer: TARGETS: ./tests/installer/unix TEAM: fleet FLEET_INSTALL_METHOD: "install_script" + E2E_LOGS_PROCESSING_TEST_DEPTH: 2 new-e2e-installer-windows: extends: .new_e2e_template diff --git a/.gitlab/e2e_install_packages/common.yml b/.gitlab/e2e_install_packages/common.yml index 8985150b3ff14..965f5c67ce6f8 100644 --- a/.gitlab/e2e_install_packages/common.yml +++ b/.gitlab/e2e_install_packages/common.yml @@ -9,6 +9,7 @@ TARGETS: ./tests/agent-platform/install-script TEAM: agent-delivery EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --arch $E2E_ARCH --flavor $FLAVOR --no-verbose + E2E_LOGS_PROCESSING_TEST_DEPTH: 2 # We use a single test suite and run all the platforms test as subtest .new-e2e_step_by_step: stage: e2e_install_packages @@ -16,6 +17,7 @@ TARGETS: ./tests/agent-platform/step-by-step TEAM: agent-delivery EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --arch $E2E_ARCH --flavor $FLAVOR + E2E_LOGS_PROCESSING_TEST_DEPTH: 2 # We use a single test suite and run all the platforms test as subtest .new-e2e_script_upgrade7: stage: e2e_install_packages @@ -23,6 +25,7 @@ TARGETS: ./tests/agent-platform/upgrade TEAM: agent-delivery EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --arch $E2E_ARCH --flavor $FLAVOR + E2E_LOGS_PROCESSING_TEST_DEPTH: 2 # We use a single test suite and run all the platforms test as subtest parallel: matrix: - START_MAJOR_VERSION: [5, 6, 7] @@ -37,6 +40,7 @@ TARGETS: ./tests/agent-platform/persisting-integrations TEAM: agent-delivery EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --arch $E2E_ARCH --flavor $FLAVOR + E2E_LOGS_PROCESSING_TEST_DEPTH: 2 # We use a single test suite and run all the platforms test as subtest script: - DATADOG_AGENT_API_KEY=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $INSTALL_SCRIPT_API_KEY_ORG2 token) || exit $?; export DATADOG_AGENT_API_KEY - inv -e new-e2e-tests.run --targets $TARGETS --junit-tar "junit-${CI_JOB_ID}.tgz" ${EXTRA_PARAMS} --src-agent-version 7 --test-washer @@ -47,6 +51,7 @@ TARGETS: ./tests/agent-platform/rpm TEAM: agent-delivery EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --arch $E2E_ARCH + E2E_LOGS_PROCESSING_TEST_DEPTH: 2 # We use a single test suite and run all the platforms test as subtest script: - DATADOG_AGENT_API_KEY=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $INSTALL_SCRIPT_API_KEY_ORG2 token) || exit $?; export DATADOG_AGENT_API_KEY - inv -e new-e2e-tests.run --targets $TARGETS --junit-tar "junit-${CI_JOB_ID}.tgz" ${EXTRA_PARAMS} --test-washer diff --git a/tasks/new_e2e_tests.py b/tasks/new_e2e_tests.py index 6b16dd799eaf9..74f027afd1582 100644 --- a/tasks/new_e2e_tests.py +++ b/tasks/new_e2e_tests.py @@ -23,7 +23,7 @@ from tasks.libs.common.git import get_commit_sha from tasks.libs.common.go import download_go_dependencies from tasks.libs.common.gomodules import get_default_modules -from tasks.libs.common.utils import REPO_PATH, color_message, running_in_ci +from tasks.libs.common.utils import REPO_PATH, color_message, gitlab_section, running_in_ci from tasks.tools.e2e_stacks import destroy_remote_stack @@ -66,6 +66,9 @@ def run( test_washer=False, agent_image="", cluster_agent_image="", + logs_post_processing=False, + logs_post_processing_test_depth=1, + logs_folder="e2e_logs", ): """ Run E2E Tests based on test-infra-definitions infrastructure provisioning. @@ -171,6 +174,27 @@ def run( 'You can also add `E2E_DEV_MODE="true"` to run in dev mode which will leave the environment up after the tests.' ) + if logs_post_processing: + if len(test_res) == 1: + post_processed_output = post_process_output( + test_res[0].result_json_path, test_depth=logs_post_processing_test_depth + ) + + os.makedirs(logs_folder, exist_ok=True) + write_result_to_log_files(post_processed_output, logs_folder) + try: + pretty_print_logs(post_processed_output) + except TooManyLogsError: + print( + color_message("WARNING", "yellow") + + f": Too many logs to print, skipping logs printing to avoid Gitlab collapse. You can find your logs properly organized in the job artifacts: https://gitlab.ddbuild.io/DataDog/datadog-agent/-/jobs/{os.getenv('CI_JOB_ID')}/artifacts/browse/e2e-output/logs/" + ) + else: + print( + color_message("WARNING", "yellow") + + f": Logs post processing expect only test result for test/new-e2e module. Skipping because result contains test for {len(test_res)} modules." + ) + if not success: raise Exit(code=1) @@ -260,6 +284,94 @@ def cleanup_remote_stacks(ctx, stack_regex, pulumi_backend): print(f"Failed to destroy stack {stack}") +def post_process_output(path: str, test_depth: int = 1): + """ + Post process the test results to add the test run name + path: path to the test result json file + test_depth: depth of the test name to consider + + By default the test_depth is set to 1, which means that the logs will be splitted depending on the test suite name. + If we use a single test suite to run multiple tests we can increase the test_depth to split the logs per test. + For example with: + TestPackages/run_ubuntu + TestPackages/run_centos + TestPackages/run_debian + We should set test_depth to 2 to avoid mixing all the logs of the different tested platform + """ + + def is_parent(parent: list[str], child: list[str]) -> bool: + for i in range(len(parent)): + if parent[i] != child[i]: + return False + return True + + logs_per_test = {} + with open(path) as f: + all_lines = f.readlines() + + # Initalize logs_per_test with all test names + for line in all_lines: + json_line = json.loads(line) + if "Package" not in json_line or "Test" not in json_line or "Output" not in json_line: + continue + splitted_test = json_line["Test"].split("/") + if len(splitted_test) < test_depth: + continue + if json_line["Package"] not in logs_per_test: + logs_per_test[json_line["Package"]] = {} + + test_name = splitted_test[: min(test_depth, len(splitted_test))] + logs_per_test[json_line["Package"]]["/".join(test_name)] = [] + + for line in all_lines: + json_line = json.loads(line) + if "Package" not in json_line or "Test" not in json_line or "Output" not in json_line: + continue + + if "===" in json_line["Output"]: # Ignore these lines that are produced when running test concurrently + continue + + splitted_test = json_line["Test"].split("/") + + if len(splitted_test) < test_depth: # Append logs to all children tests + for test_name in logs_per_test[json_line["Package"]]: + if is_parent(splitted_test, test_name.split("/")): + logs_per_test[json_line["Package"]][test_name].append(json_line["Output"]) + continue + + logs_per_test[json_line["Package"]]["/".join(splitted_test[:test_depth])].append(json_line["Output"]) + return logs_per_test + + +def write_result_to_log_files(logs_per_test, log_folder): + for package, tests in logs_per_test.items(): + for test, logs in tests.items(): + sanitized_package_name = re.sub(r"[^\w_. -]", "_", package) + sanitized_test_name = re.sub(r"[^\w_. -]", "_", test) + with open(f"{log_folder}/{sanitized_package_name}.{sanitized_test_name}.log", "w") as f: + f.write("".join(logs)) + + +class TooManyLogsError(Exception): + pass + + +def pretty_print_logs(logs_per_test, max_size=250000): + # Compute size in bytes of what we are about to print. If it exceeds max_size, we skip printing because it will make the Gitlab logs almost completely collapsed. + # By default Gitlab has a limit of 500KB per job log, so we want to avoid printing too much. + size = 0 + for _, tests in logs_per_test.items(): + for _, logs in tests.items(): + size += len("".join(logs).encode()) + if size > max_size and running_in_ci(): + raise TooManyLogsError + for package, tests in logs_per_test.items(): + for test, logs in tests.items(): + with gitlab_section("Complete logs for " + package + "." + test, collapsed=True): + print("Complete logs for " + package + "." + test) + print("".join(logs)) + + @task def deps(ctx, verbose=False): """