Skip to content

Commit

Permalink
apply_fixes: Support cquery and custom bazel arguments
Browse files Browse the repository at this point in the history
Until now the apply_fixes script by default uses `bazel query` to find
missing dependencies. A project might use select statements to exchange
dependencies. In such cases one should use `bazel cquery` to allow the
apply_fixes script to understand the dependency tree properly.
This also requires the user being able to provide arguments to `bazel cquery`.

Refactoring incorporated into this:
- Due to the missing dependency discover logic increasing in complexity,
  we spit it into a separate module
- We output commands as a single string in verbose mode to ease copying
  them for debugging
  • Loading branch information
martis42 committed May 5, 2024
1 parent f39791d commit b8d911d
Show file tree
Hide file tree
Showing 16 changed files with 420 additions and 167 deletions.
3 changes: 3 additions & 0 deletions src/apply_fixes/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@ py_library(
name = "lib",
srcs = [
"apply_fixes.py",
"bazel_query.py",
"buildozer_executor.py",
"search_missing_deps.py",
"summary.py",
"utils.py",
],
visibility = [":__subpackages__"],
)
Expand Down
143 changes: 20 additions & 123 deletions src/apply_fixes/apply_fixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

import json
import logging
import subprocess
import shlex
import sys
from dataclasses import dataclass
from itertools import chain
from os import environ, walk
from pathlib import Path
from typing import TYPE_CHECKING
from xml.etree import ElementTree

from src.apply_fixes.bazel_query import BazelQuery
from src.apply_fixes.buildozer_executor import BuildozerExecutor
from src.apply_fixes.search_missing_deps import search_missing_deps
from src.apply_fixes.utils import execute_and_capture

if TYPE_CHECKING:
from argparse import Namespace
Expand All @@ -29,11 +29,6 @@ def __init__(self, main_args: Namespace) -> None:
self.add_missing_deps = main_args.fix_missing_deps or main_args.fix_all


def execute_and_capture(cmd: list[str], cwd: Path, check: bool = True) -> subprocess.CompletedProcess:
logging.debug(f"Executing command: {cmd}")
return subprocess.run(cmd, cwd=cwd, check=check, capture_output=True, text=True)


def get_workspace(main_args: Namespace) -> Path | None:
if main_args.workspace:
return Path(main_args.workspace)
Expand Down Expand Up @@ -76,116 +71,6 @@ def gather_reports(search_path: Path) -> list[Path]:
return reports


def get_file_name(include: str) -> str:
return include.split("/")[-1]


def target_to_path(dep: str) -> str:
return dep.replace(":", "/").rsplit("//", 1)[1]


@dataclass
class Dependency:
target: str
# Assuming no include path manipulation, the target provides these include paths
include_paths: list[str]

def __repr__(self) -> str:
return f"Dependency(target={self.target}, include_paths={self.include_paths})"


def get_dependencies(workspace: Path, target: str) -> list[Dependency]:
"""
Extract dependencies from a given target together with further information about those dependencies.
"""
process = execute_and_capture(
cmd=[
"bazel",
"query",
"--output=xml",
"--noimplicit_deps",
f'kind("rule", deps({target}) except deps({target}, 1))',
],
cwd=workspace,
)
return [
Dependency(
target=dep.attrib["name"],
include_paths=[target_to_path(hdr.attrib["value"]) for hdr in dep.findall(".//*[@name='hdrs']/label")],
)
for dep in ElementTree.fromstring(process.stdout)
]


def mach_deps_to_include(target: str, invalid_include: str, target_deps: list[Dependency]) -> str | None:
"""
The possibility to manipulate include paths complicates matching potential dependencies to the invalid include
statement. Thus, we perform this multistep heuristic.
"""

deps_providing_included_path = [dep.target for dep in target_deps if invalid_include in dep.include_paths]

if len(deps_providing_included_path) == 1:
return deps_providing_included_path[0]

if len(deps_providing_included_path) > 1:
logging.warning(
f"""
Found multiple targets providing invalid include path '{invalid_include}' of target '{target}'.
Cannot determine correct dependency.
Discovered potential dependencies are: {deps_providing_included_path}.
""".strip()
)
return None

# No potential dep could be found searching for the full include statement. We perform a second search looking only
# the file name of the invalid include. The file name cannot be altered by the various include path manipulation
# techniques offered by Bazel, only the path at which a header can be discovered can be manipulated.
included_file = get_file_name(invalid_include)
deps_providing_included_file = [
dep.target for dep in target_deps if included_file in [get_file_name(path) for path in dep.include_paths]
]

if len(deps_providing_included_file) == 1:
return deps_providing_included_file[0]

if len(deps_providing_included_file) > 1:
logging.warning(
f"""
Found multiple targets providing file '{included_file}' from invalid include '{invalid_include}' of target '{target}'.
Matching the full include path did not work. Cannot determine correct dependency.
Discovered potential dependencies are: {deps_providing_included_file}.
""".strip()
)
return None

logging.warning(
f"""
Could not find a proper dependency for invalid include path '{invalid_include}' of target '{target}'.
Is the header file maybe wrongly part of the 'srcs' attribute instead of 'hdrs' in the library which should provide the header?
Or is this include resolved through the toolchain instead through a dependency?
""".strip()
)
return None


def search_missing_deps(workspace: Path, target: str, includes_without_direct_dep: dict[str, list[str]]) -> list[str]:
"""
Search for targets providing header files matching the include statements without matching direct dependency in the
transitive dependencies of the target under inspection.
"""
if not includes_without_direct_dep:
return []

target_deps = get_dependencies(workspace=workspace, target=target)
all_invalid_includes = list(chain(*includes_without_direct_dep.values()))
return [
dep
for include in all_invalid_includes
if (dep := mach_deps_to_include(target=target, invalid_include=include, target_deps=target_deps)) is not None
]


def add_discovered_deps(
discovered_public_deps: list[str],
discovered_private_deps: list[str],
Expand All @@ -206,7 +91,9 @@ def add_discovered_deps(
buildozer.execute(task=f"add implementation_deps {' '.join(list(set(add_to_impl_deps)))}", target=target)


def perform_fixes(buildozer: BuildozerExecutor, workspace: Path, report: Path, requested_fixes: RequestedFixes) -> None:
def perform_fixes(
bazel_query: BazelQuery, buildozer: BuildozerExecutor, report: Path, requested_fixes: RequestedFixes
) -> None:
with report.open(encoding="utf-8") as report_in:
content = json.load(report_in)
target = content["analyzed_target"]
Expand All @@ -226,10 +113,14 @@ def perform_fixes(buildozer: BuildozerExecutor, workspace: Path, report: Path, r

if requested_fixes.add_missing_deps:
discovered_public_deps = search_missing_deps(
workspace=workspace, target=target, includes_without_direct_dep=content["public_includes_without_dep"]
bazel_query=bazel_query,
target=target,
includes_without_direct_dep=content["public_includes_without_dep"],
)
discovered_private_deps = search_missing_deps(
workspace=workspace, target=target, includes_without_direct_dep=content["private_includes_without_dep"]
bazel_query=bazel_query,
target=target,
includes_without_direct_dep=content["private_includes_without_dep"],
)
add_discovered_deps(
discovered_public_deps=discovered_public_deps,
Expand Down Expand Up @@ -269,13 +160,19 @@ def main(args: Namespace) -> int:
)
return 1

bazel_query = BazelQuery(
workspace=workspace,
use_cquery=args.use_cquery,
query_args=shlex.split(args.bazel_args) if args.bazel_args else [],
startup_args=shlex.split(args.bazel_startup_args) if args.bazel_startup_args else [],
)
buildozer_executor = BuildozerExecutor(
buildozer=buildozer, buildozer_args=args.buildozer_args, workspace=workspace, dry=args.dry_run
)
for report in reports:
logging.debug(f"Processing report file '{report}'")
perform_fixes(
workspace=workspace, report=report, buildozer=buildozer_executor, requested_fixes=RequestedFixes(args)
report=report, bazel_query=bazel_query, buildozer=buildozer_executor, requested_fixes=RequestedFixes(args)
)
buildozer_executor.summary.print_summary()

Expand Down
32 changes: 32 additions & 0 deletions src/apply_fixes/bazel_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from src.apply_fixes.utils import execute_and_capture

if TYPE_CHECKING:
from pathlib import Path
from subprocess import CompletedProcess


class BazelQuery:
def __init__(self, workspace: Path, use_cquery: bool, query_args: list[str], startup_args: list[str]) -> None:
self._workspace = workspace
self._use_cquery = use_cquery
self._query_args = query_args
self._startup_args = startup_args

def execute(self, query: str, args: list[str]) -> CompletedProcess:
cmd = [
"bazel",
*self._startup_args,
"cquery" if self._use_cquery else "query",
*self._query_args,
*args,
query,
]
return execute_and_capture(cmd=cmd, cwd=self._workspace)

@property
def uses_cquery(self) -> bool:
return self._use_cquery
28 changes: 28 additions & 0 deletions src/apply_fixes/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,34 @@ def cli() -> Namespace:
deduce the Bazel output directory containing the DWYU report files. Or if you want to search only in a sub tree
of the Bazel output directories.""",
)
parser.add_argument(
"--use-cquery",
action="store_true",
help="""
The apply_fixes script by default uses 'bazel query' to find missing dependencies. Your project might use
select statements to exchange dependencies. In such cases you should use 'bazel cquery' to allow this script
understanding the dependency tree properly. Should be used together with '--bazel-args' to provide
the configuration for 'bazel cquery'.""",
)
parser.add_argument(
"--bazel-args",
type=str,
metavar="STRING",
help="""
The apply_fixes script uses 'bazel (c)query' to find missing dependencies. If this command requires further
arguments to work properly in your workspace you can provide them here. Also look into '--use-cquery' if
you want to provide build configuration.
Arguments have ot be provided as one large string, e.g.: --bazel-args='--foo --tick=tock'.""",
)
parser.add_argument(
"--bazel-startup-args",
type=str,
metavar="STRING",
help="""
The apply_fixes script uses 'bazel (c)query' to find missing dependencies. If this command requires further
startup arguments (e.g. a custom output base) to work properly in your workspace you can provide them here.
Arguments have ot be provided as one large string, e.g.: --bazel-args='--foo --tick=tock'.""",
)
parser.add_argument(
"--fix-unused-deps",
action="store_true",
Expand Down
Loading

0 comments on commit b8d911d

Please sign in to comment.