Skip to content

Commit

Permalink
apply_fixes can parse report files from a DWYU execution log
Browse files Browse the repository at this point in the history
For large workspaces discovering the DWYU report files by crawling the bazel-out
directory can be quite slow due to an enormous amount of files and directories
being present.
To work around this, we enable the apply_fixes script to parse a log file
containing the command line output of executing the DWYU aspect. This execution
log is then parsed and the DWYU report paths deduced.
  • Loading branch information
martis42 committed May 26, 2024
1 parent c8be082 commit 56815ea
Show file tree
Hide file tree
Showing 17 changed files with 276 additions and 67 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,10 @@ You can see the full command line interface and more information about the scrip
If the `apply_fixes` tool is not able to discover the report files, this can be caused by the `bazel-bin` convenience symlink at the workspace root not existing or not pointing to the output directory which was used by to generate the report files.
The tool offers options to control how the output directory is discovered.

Discovering the DWYU report files automatically can take a large amount of time if the `bazel-bin` directory is too large.
In such cases you can pipe the command line output of executing the DWYU aspect into a file and forward this file to the apply_fixes script via the `--dwyu-log-file` option.
The apply_fixes script will then deduce the DWYU report file locations without crawling though thw whole `bazel-bin` directory.

Unfortunately, the tool cannot promise perfect results due to various constraints:

- If alias targets are involved, this cannot be processed properly.
Expand Down
1 change: 1 addition & 0 deletions src/analyze_includes/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def main(args: Namespace) -> int:
system_under_inspection=system_under_inspection,
ensure_private_deps=args.implementation_deps_available,
)
result.report = args.report

args.report.parent.mkdir(parents=True, exist_ok=True)
with args.report.open(mode="w", encoding="utf-8") as report:
Expand Down
9 changes: 8 additions & 1 deletion src/analyze_includes/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,22 @@
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pathlib import Path

from src.analyze_includes.parse_source import Include


@dataclass
class Result:
target: str
report: Path | None = None
use_impl_deps: bool = False

public_includes_without_dep: list[Include] = field(default_factory=list)
private_includes_without_dep: list[Include] = field(default_factory=list)
unused_deps: list[str] = field(default_factory=list)
unused_impl_deps: list[str] = field(default_factory=list)
deps_which_should_be_private: list[str] = field(default_factory=list)
use_impl_deps: bool = False

def is_ok(self) -> bool:
return (
Expand Down Expand Up @@ -47,6 +51,9 @@ def to_str(self) -> str:
if self.deps_which_should_be_private:
msg += "\nPublic dependencies which are used only in private code:\n"
msg += "\n".join(f" Dependency='{dep}'" for dep in self.deps_which_should_be_private)

msg += f"\n\nDWYU Report: {self.report}"

return self._framed_msg(msg)

def to_json(self) -> str:
Expand Down
45 changes: 43 additions & 2 deletions src/analyze_includes/test/result_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import unittest
from pathlib import Path

Expand All @@ -7,14 +9,16 @@

class TestResult(unittest.TestCase):
@staticmethod
def _expected_msg(target: str, errors: str = "") -> str:
def _expected_msg(target: str, errors: str = "", report: str | None = None) -> str:
border = 80 * "="
msg = f"DWYU analyzing: '{target}'\n\n"
if errors:
msg += "Result: FAILURE\n\n"
report = f"\n\nDWYU Report: {report}\n"
else:
msg += "Result: SUCCESS"
return border + "\n" + msg + errors + "\n" + border
report = "\n"
return border + "\n" + msg + errors + report + border

def test_is_ok(self) -> None:
unit = Result("//foo:bar")
Expand All @@ -33,6 +37,43 @@ def test_is_ok(self) -> None:
"deps_which_should_be_private": [],
"use_implementation_deps": false
}
""".lstrip(),
)

def test_is_ok_fails_and_prints_report(self) -> None:
unit = Result(
target="//foo:bar",
private_includes_without_dep=[Include(file=Path("foo"), include="missing")],
)
unit.report = Path("some/report.json")

self.assertFalse(unit.is_ok())
self.assertEqual(
unit.to_str(),
self._expected_msg(
target="//foo:bar",
errors="Includes which are not available from the direct dependencies:"
"\n File='foo', include='missing'",
report="some/report.json",
),
)
# The report is not mentioned in the json file as it would be redundant
self.assertEqual(
unit.to_json(),
"""
{
"analyzed_target": "//foo:bar",
"public_includes_without_dep": {},
"private_includes_without_dep": {
"foo": [
"missing"
]
},
"unused_deps": [],
"unused_implementation_deps": [],
"deps_which_should_be_private": [],
"use_implementation_deps": false
}
""".lstrip(),
)

Expand Down
1 change: 1 addition & 0 deletions src/apply_fixes/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ py_library(
"apply_fixes.py",
"bazel_query.py",
"buildozer_executor.py",
"get_dwyu_reports.py",
"search_missing_deps.py",
"summary.py",
"utils.py",
Expand Down
52 changes: 4 additions & 48 deletions src/apply_fixes/apply_fixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@

import json
import logging
import shlex
import sys
from os import environ, walk
from os import environ
from pathlib import Path
from typing import TYPE_CHECKING

from src.apply_fixes.bazel_query import BazelQuery
from src.apply_fixes.buildozer_executor import BuildozerExecutor
from src.apply_fixes.get_dwyu_reports import gather_reports, get_reports_search_dir
from src.apply_fixes.search_missing_deps import search_missing_deps
from src.apply_fixes.utils import execute_and_capture
from src.apply_fixes.utils import args_string_to_list

if TYPE_CHECKING:
from argparse import Namespace
Expand All @@ -29,10 +28,6 @@ def __init__(self, main_args: Namespace) -> None:
self.add_missing_deps = main_args.fix_missing_deps or main_args.fix_all


def args_string_to_list(args: str | None) -> list[str]:
return shlex.split(args) if args else []


def get_workspace(main_args: Namespace) -> Path | None:
if main_args.workspace:
return Path(main_args.workspace)
Expand All @@ -43,45 +38,6 @@ def get_workspace(main_args: Namespace) -> Path | None:
return Path(workspace_root)


def get_reports_search_dir(main_args: Namespace, workspace_root: Path) -> Path:
"""
Unless a dedicated search directory is provided, try to deduce the 'bazel-bin' dir.
"""
if main_args.search_path:
return Path(main_args.search_path)

if main_args.use_bazel_info:
process = execute_and_capture(
cmd=[
"bazel",
*args_string_to_list(main_args.bazel_startup_args),
"info",
*args_string_to_list(main_args.bazel_args),
"bazel-bin",
],
cwd=workspace_root,
)
return Path(process.stdout.strip())

bazel_bin_link = workspace_root / "bazel-bin"
if not bazel_bin_link.is_dir():
logging.fatal(f"ERROR: convenience symlink '{bazel_bin_link}' does not exist or is not a symlink.")
sys.exit(1)
return bazel_bin_link.resolve()


def gather_reports(search_path: Path) -> list[Path]:
"""
We explicitly use os.walk() as it has better performance than Path.glob() in large and deeply nested file trees.
"""
reports = []
for root, _, files in walk(search_path):
for file in files:
if file.endswith("_dwyu_report.json"):
reports.append(Path(root) / file) # noqa: PERF401
return reports


def add_discovered_deps(
discovered_public_deps: list[str],
discovered_private_deps: list[str],
Expand Down Expand Up @@ -160,7 +116,7 @@ def main(args: Namespace) -> int:
reports_search_dir = get_reports_search_dir(main_args=args, workspace_root=workspace)
logging.debug(f"Reports search directory: '{reports_search_dir}'")

reports = gather_reports(reports_search_dir)
reports = gather_reports(main_args=args, search_path=reports_search_dir)
if not reports:
logging.fatal(
"""
Expand Down
61 changes: 61 additions & 0 deletions src/apply_fixes/get_dwyu_reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from __future__ import annotations

import logging
import sys
from os import walk
from pathlib import Path
from typing import TYPE_CHECKING

from src.apply_fixes.utils import args_string_to_list, execute_and_capture

if TYPE_CHECKING:
import argparse


def gather_reports(main_args: argparse.Namespace, search_path: Path) -> list[Path]:
if main_args.dwyu_log_file:
return [search_path / log.split("/bin/", 1)[1] for log in parse_dwyu_execution_log(main_args.dwyu_log_file)]

reports = []
# We explicitly use os.walk() as it has better performance than Path.glob() in large and deeply nested file trees.
for root, _, files in walk(search_path):
for file in files:
if file.endswith("_dwyu_report.json"):
reports.append(Path(root) / file) # noqa: PERF401
return reports


def parse_dwyu_execution_log(log_file: Path) -> list[str]:
dwyu_report_anchor = "DWYU Report: "
with log_file.open() as log:
return [
line.strip().split(dwyu_report_anchor)[1] for line in log.readlines() if line.startswith(dwyu_report_anchor)
]


def get_reports_search_dir(main_args: argparse.Namespace, workspace_root: Path) -> Path:
"""
Unless an alternative method is selected, follow the convenience symlinks at the workspace root to discover the
DWYU report files.
"""
if main_args.search_path:
return Path(main_args.search_path)

if main_args.use_bazel_info:
process = execute_and_capture(
cmd=[
"bazel",
*args_string_to_list(main_args.bazel_startup_args),
"info",
*args_string_to_list(main_args.bazel_args),
"bazel-bin",
],
cwd=workspace_root,
)
return Path(process.stdout.strip())

bazel_bin_link = workspace_root / "bazel-bin"
if not bazel_bin_link.is_dir():
logging.fatal(f"ERROR: convenience symlink '{bazel_bin_link}' does not exist.")
sys.exit(1)
return bazel_bin_link.resolve()
18 changes: 18 additions & 0 deletions src/apply_fixes/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import sys
from argparse import ArgumentParser, Namespace, RawDescriptionHelpFormatter
from pathlib import Path

from src.apply_fixes.apply_fixes import main

Expand Down Expand Up @@ -79,6 +80,19 @@ def cli() -> Namespace:
deduce the Bazel output directory containing the DWYU report files. Or if you want to search only in a sub tree
of the Bazel output directories.""",
)
parser.add_argument(
"--dwyu-log-file",
metavar="PATH",
type=Path,
help="""
If discovering the DWYU report files in the bazel-bin is not feasible, one can instead pipe the command line
output of executing the DWYU aspect into a log file and tell this script to extract the DWYU report paths from
this execution log. This can be helpful when your workspace is so large, that crawling the corresponding
'bazel-bin' directory is too slow for a satisfactory user experience. This script still has to be able to
discover the location of the 'bazel-bin' directory. Meaning, the 'bazel-bin' convenience symlink at the
workspace root should exists or if it is not available one of the following options should be used:
['--use-bazel-info', '--search-path'].""",
)
parser.add_argument(
"--use-cquery",
action="store_true",
Expand Down Expand Up @@ -140,6 +154,10 @@ def cli() -> Namespace:
logging.fatal("Please choose at least one of the 'fix-..' options")
sys.exit(1)

if args.use_bazel_info and args.search_path:
logging.fatal("Please choose only one options controlling the 'bazel-bin' directory discovery.")
sys.exit(1)

return args


Expand Down
12 changes: 12 additions & 0 deletions src/apply_fixes/test/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ py_test(
deps = ["//src/apply_fixes:lib"],
)

py_test(
name = "get_dwyu_reports_test",
srcs = ["get_dwyu_reports_test.py"],
deps = ["//src/apply_fixes:lib"],
)

py_test(
name = "search_missing_deps",
srcs = ["search_missing_deps.py"],
Expand All @@ -23,3 +29,9 @@ py_test(
srcs = ["summary_test.py"],
deps = ["//src/apply_fixes:lib"],
)

py_test(
name = "utils_test",
srcs = ["utils_test.py"],
deps = ["//src/apply_fixes:lib"],
)
35 changes: 35 additions & 0 deletions src/apply_fixes/test/get_dwyu_reports_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import unittest
from pathlib import Path

from src.apply_fixes.get_dwyu_reports import parse_dwyu_execution_log


class TestParseDwyuExecutionLog(unittest.TestCase):
def test_parse_dwyu_execution_log(self) -> None:
test_log = Path("test_log.txt")
with test_log.open(mode="wt") as fp:
fp.write(
"""
Some unrelated stuff
DWYU Report: bazel-out/opt/bin/some/target_dwyu_report.json
ERROR: Unrelated error
DWYU Report: bazel-out/opt/bin/root_target_dwyu_report.json
""".strip()
)

logs = parse_dwyu_execution_log(test_log)
self.assertEqual(
logs, ["bazel-out/opt/bin/some/target_dwyu_report.json", "bazel-out/opt/bin/root_target_dwyu_report.json"]
)

def test_parse_dwyu_execution_log_empty(self) -> None:
test_log = Path("test_log.txt")
with test_log.open(mode="wt") as fp:
fp.write("")

logs = parse_dwyu_execution_log(test_log)
self.assertEqual(logs, [])


if __name__ == "__main__":
unittest.main()
19 changes: 19 additions & 0 deletions src/apply_fixes/test/utils_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import unittest

from src.apply_fixes.utils import args_string_to_list


class TestArgsStringToList(unittest.TestCase):
def test_no_args(self) -> None:
self.assertEqual(args_string_to_list(None), [])
self.assertEqual(args_string_to_list(""), [])

def test_single_arg(self) -> None:
self.assertEqual(args_string_to_list("foo"), ["foo"])

def test_multiple_args(self) -> None:
self.assertEqual(args_string_to_list("--foo --bar=42 baz 1337"), ["--foo", "--bar=42", "baz", "1337"])


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 56815ea

Please sign in to comment.