Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KAFKA-17767 Extract test catalog from JUnit output [1/n] #17397

Merged
merged 11 commits into from
Oct 17, 2024
99 changes: 95 additions & 4 deletions .github/scripts/junit.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,22 @@
# limitations under the License.

import argparse
from collections import OrderedDict
import dataclasses
from functools import partial
from glob import glob
import logging
import os
import os.path
import pathlib
import re
import sys
from typing import Tuple, Optional, List, Iterable
import xml.etree.ElementTree
import html

import yaml


logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -78,6 +83,60 @@ class TestSuite:
passed_tests: List[TestCase]


# Java method names can start with alpha, "_", or "$". Following characters can also include digits
method_matcher = re.compile(r"([a-zA-Z_$][a-zA-Z0-9_$]+).*")


def clean_test_name(test_name: str) -> str:
cleaned = test_name.strip("\"").rstrip("()")
m = method_matcher.match(cleaned)
return m.group(1)


class TestCatalogExporter:
def __init__(self):
self.all_tests = {} # module -> class -> set of methods

def handle_suite(self, module: str, suite: TestSuite):
if module not in self.all_tests:
self.all_tests[module] = OrderedDict()

for test in suite.failed_tests:
if test.class_name not in self.all_tests[module]:
self.all_tests[module][test.class_name] = set()
self.all_tests[module][test.class_name].add(clean_test_name(test.test_name))
for test in suite.passed_tests:
if test.class_name not in self.all_tests[module]:
self.all_tests[module][test.class_name] = set()
self.all_tests[module][test.class_name].add(clean_test_name(test.test_name))

def export(self, out_dir: str):
if not os.path.exists(out_dir):
logger.debug(f"Creating output directory {out_dir} for test catalog export.")
os.makedirs(out_dir)

total_count = 0
for module, module_tests in self.all_tests.items():
module_path = os.path.join(out_dir, module)
if not os.path.exists(module_path):
os.makedirs(module_path)

sorted_tests = {}
count = 0
for test_class, methods in module_tests.items():
sorted_methods = sorted(methods)
count += len(sorted_methods)
sorted_tests[test_class] = sorted_methods

out_path = os.path.join(module_path, f"tests.yaml")
logger.debug(f"Writing {count} tests for {module} into {out_path}.")
total_count += count
with open(out_path, "w") as fp:
yaml.dump(sorted_tests, fp)

logger.debug(f"Wrote {total_count} tests into test catalog.")


def parse_report(workspace_path, report_path, fp) -> Iterable[TestSuite]:
cur_suite: Optional[TestSuite] = None
partial_test_case = None
Expand Down Expand Up @@ -138,6 +197,20 @@ def pretty_time_duration(seconds: float) -> str:
return time_fmt


def module_path_from_report_path(base_path: str, report_path: str) -> str:
"""
Parse a report XML and extract the module path. Test report paths look like:

build/junit-xml/module[/sub-module]/[suite]/TEST-class.method.xml

This method strips off a base path and assumes all path segments leading up to the suite name
are part of the module path.
"""
rel_report_path = os.path.relpath(report_path, base_path)
path_segments = pathlib.Path(rel_report_path).parts
return os.path.join(*path_segments[0:-2])


if __name__ == "__main__":
"""
Parse JUnit XML reports and generate GitHub job summary in Markdown format.
Expand All @@ -150,16 +223,21 @@ def pretty_time_duration(seconds: float) -> str:
parser = argparse.ArgumentParser(description="Parse JUnit XML results.")
parser.add_argument("--path",
required=False,
default="build/junit-xml/**/*.xml",
help="Path to XML files. Glob patterns are supported.")
default="build/junit-xml",
help="Base path of JUnit XML files. A glob of **/*.xml will be applied on top of this path.")
parser.add_argument("--export-test-catalog",
required=False,
default="",
help="Optional path to dump all tests")

if not os.getenv("GITHUB_WORKSPACE"):
print("This script is intended to by run by GitHub Actions.")
exit(1)

args = parser.parse_args()

reports = glob(pathname=args.path, recursive=True)
glob_path = os.path.join(args.path, "**/*.xml")
reports = glob(pathname=glob_path, recursive=True)
logger.info(f"Found {len(reports)} JUnit results")
workspace_path = get_env("GITHUB_WORKSPACE") # e.g., /home/runner/work/apache/kafka

Expand All @@ -177,10 +255,13 @@ def pretty_time_duration(seconds: float) -> str:
flaky_table = []
skipped_table = []

exporter = TestCatalogExporter()

logger.debug(f"::group::Parsing {len(reports)} JUnit Report Files")
for report in reports:
with open(report, "r") as fp:
logger.debug(f"Parsing {report}")
module_path = module_path_from_report_path(args.path, report)
logger.debug(f"Parsing file: {report}, module: {module_path}")
for suite in parse_report(workspace_path, report, fp):
total_skipped += suite.skipped
total_errors += suite.errors
Expand Down Expand Up @@ -216,7 +297,17 @@ def pretty_time_duration(seconds: float) -> str:
simple_class_name = skipped_test.class_name.split(".")[-1]
logger.debug(f"Found skipped test: {skipped_test}")
skipped_table.append((simple_class_name, skipped_test.test_name))

if args.export_test_catalog:
exporter.handle_suite(module_path, suite)

logger.debug("::endgroup::")

if args.export_test_catalog:
logger.debug(f"::group::Generating Test Catalog Files")
exporter.export(args.export_test_catalog)
logger.debug("::endgroup::")

duration = pretty_time_duration(total_time)
logger.info(f"Finished processing {len(reports)} reports")

Expand Down
15 changes: 15 additions & 0 deletions .github/scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PyYAML~=6.0
22 changes: 21 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ jobs:
uses: actions/checkout@v4
with:
persist-credentials: false
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- run: pip install -r .github/scripts/requirements.txt
- name: Setup Gradle
uses: ./.github/actions/setup-gradle
with:
Expand Down Expand Up @@ -145,6 +149,14 @@ jobs:
**/build/reports/tests/*
compression-level: 9
if-no-files-found: ignore
- name: Archive JUnit XML
uses: actions/upload-artifact@v4
with:
name: junit-xml-${{ matrix.java }}
path: |
build/junit-xml/**/*.xml
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we export the artifact url for this task?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we'll keep this step in long term since it is ~50Mb compressed (~1GB decompressed!).

compression-level: 9
if-no-files-found: ignore
- name: Archive Thread Dumps
id: thread-dump-upload-artifact
if: always() && steps.junit-test.outputs.exitcode == '124'
Expand All @@ -156,12 +168,20 @@ jobs:
compression-level: 9
if-no-files-found: ignore
- name: Parse JUnit tests
run: python .github/scripts/junit.py >> $GITHUB_STEP_SUMMARY
run: python .github/scripts/junit.py --export-test-catalog ./test-catalog >> $GITHUB_STEP_SUMMARY
env:
GITHUB_WORKSPACE: ${{ github.workspace }}
JUNIT_REPORT_URL: ${{ steps.junit-upload-artifact.outputs.artifact-url }}
THREAD_DUMP_URL: ${{ steps.thread-dump-upload-artifact.outputs.artifact-url }}
GRADLE_EXIT_CODE: ${{ steps.junit-test.outputs.exitcode }}
- name: Archive Test Catalog
if: ${{ always() && matrix.java == '21' }}
uses: actions/upload-artifact@v4
with:
name: test-catalog
path: test-catalog
compression-level: 9
if-no-files-found: ignore
- name: Archive Build Scan
if: always()
uses: actions/upload-artifact@v4
Expand Down
23 changes: 20 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,22 @@ def determineCommitId() {
}
}


/**
* For a given Project, compute a nice dash separated directory name
* to store the JUnit XML files in. E.g., Project ":connect:api" -> "connect-api"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't have consistent sub-module naming. The connect-related modules don't have a prefix, while other sub-modules must include a prefix to avoid conflicts to connect's sub modules. This means the helper should only revise the module names for the connect modules.

Copy link
Member Author

@mumrah mumrah Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I considered this, but didn't want to add special cases to the "module name finding" method.

The module name computed here (e.g., connect-api, clients, group-coordinator-group-coordinator-api) will determine the name of the directory in build/junit-xml. As long as its unique, it doesn't matter too much what it is.

Another option here would be to reflect the actual module paths in the output directory paths. So, :connect:api test results would end up in "build/junit-xml/connect/api/[test, quarantinedTest]/...". This would also cause the test catalog directories to mirror the actual layout of the project.

*/
static def projectToJUnitXmlPath(project) {
var p = project
var projectNames = []
while (p != null) {
projectNames.push(p.name)
p = p.parent
if (p.name == "kafka") {
break;
}
}
return projectNames.join("/")
}


apply from: file('wrapper.gradle')
Expand Down Expand Up @@ -503,7 +518,8 @@ subprojects {
// were not run, but instead were restored via FROM-CACHE. See KAFKA-17479 for more details.
doLast {
if (ext.isGithubActions) {
def dest = rootProject.layout.buildDirectory.dir("junit-xml/${project.name}").get().asFile
def moduleDirPath = projectToJUnitXmlPath(project)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we apply this change to quarantinedTest as well?

def dest = rootProject.layout.buildDirectory.dir("junit-xml/${moduleDirPath}").get().asFile
println "Copy JUnit XML for ${project.name} to $dest"
ant.copy(todir: "$dest/test") {
ant.fileset(dir: "${test.reports.junitXml.entryPoint}")
Expand Down Expand Up @@ -563,7 +579,8 @@ subprojects {
// were not run, but instead were restored via FROM-CACHE. See KAFKA-17479 for more details.
doLast {
if (ext.isGithubActions) {
def dest = rootProject.layout.buildDirectory.dir("junit-xml/${project.name}").get().asFile
def moduleDirPath = projectToJUnitXmlPath(project)
def dest = rootProject.layout.buildDirectory.dir("junit-xml/${moduleDirPath}").get().asFile
println "Copy JUnit XML for ${project.name} to $dest"
ant.copy(todir: "$dest/quarantinedTest", failonerror: "false") {
ant.fileset(dir: "${quarantinedTest.reports.junitXml.entryPoint}") {
Expand Down