apply_fixes: Support cquery and custom bazel arguments

Until now the apply_fixes script by default uses `bazel query` to find missing dependencies. A project might use select statements to exchange dependencies. In such cases one should use `bazel cquery` to allow the apply_fixes script to understand the dependency tree properly. This also requires the user being able to provide arguments to `bazel cquery`. Refactoring incorporated into this: - Due to the missing dependency discover logic increasing in complexity, we spit it into a separate module - We output commands as a single string in verbose mode to ease copying them for debugging
martis42 · May 5, 2024 · b8d911d · b8d911d
1 parent f39791d
commit b8d911d
Show file tree

Hide file tree

Showing 16 changed files with 420 additions and 167 deletions.
diff --git a/src/apply_fixes/BUILD b/src/apply_fixes/BUILD
@@ -4,8 +4,11 @@ py_library(
     name = "lib",
     srcs = [
         "apply_fixes.py",
+        "bazel_query.py",
         "buildozer_executor.py",
+        "search_missing_deps.py",
         "summary.py",
+        "utils.py",
     ],
     visibility = [":__subpackages__"],
 )

diff --git a/src/apply_fixes/apply_fixes.py b/src/apply_fixes/apply_fixes.py
@@ -2,16 +2,16 @@
 
 import json
 import logging
-import subprocess
+import shlex
 import sys
-from dataclasses import dataclass
-from itertools import chain
 from os import environ, walk
 from pathlib import Path
 from typing import TYPE_CHECKING
-from xml.etree import ElementTree
 
+from src.apply_fixes.bazel_query import BazelQuery
 from src.apply_fixes.buildozer_executor import BuildozerExecutor
+from src.apply_fixes.search_missing_deps import search_missing_deps
+from src.apply_fixes.utils import execute_and_capture
 
 if TYPE_CHECKING:
     from argparse import Namespace
@@ -29,11 +29,6 @@ def __init__(self, main_args: Namespace) -> None:
         self.add_missing_deps = main_args.fix_missing_deps or main_args.fix_all
 
 
-def execute_and_capture(cmd: list[str], cwd: Path, check: bool = True) -> subprocess.CompletedProcess:
-    logging.debug(f"Executing command: {cmd}")
-    return subprocess.run(cmd, cwd=cwd, check=check, capture_output=True, text=True)
-
-
 def get_workspace(main_args: Namespace) -> Path | None:
     if main_args.workspace:
         return Path(main_args.workspace)
@@ -76,116 +71,6 @@ def gather_reports(search_path: Path) -> list[Path]:
     return reports
 
 
-def get_file_name(include: str) -> str:
-    return include.split("/")[-1]
-
-
-def target_to_path(dep: str) -> str:
-    return dep.replace(":", "/").rsplit("//", 1)[1]
-
-
-@dataclass
-class Dependency:
-    target: str
-    # Assuming no include path manipulation, the target provides these include paths
-    include_paths: list[str]
-
-    def __repr__(self) -> str:
-        return f"Dependency(target={self.target}, include_paths={self.include_paths})"
-
-
-def get_dependencies(workspace: Path, target: str) -> list[Dependency]:
-    """
-    Extract dependencies from a given target together with further information about those dependencies.
-    """
-    process = execute_and_capture(
-        cmd=[
-            "bazel",
-            "query",
-            "--output=xml",
-            "--noimplicit_deps",
-            f'kind("rule", deps({target}) except deps({target}, 1))',
-        ],
-        cwd=workspace,
-    )
-    return [
-        Dependency(
-            target=dep.attrib["name"],
-            include_paths=[target_to_path(hdr.attrib["value"]) for hdr in dep.findall(".//*[@name='hdrs']/label")],
-        )
-        for dep in ElementTree.fromstring(process.stdout)
-    ]
-
-
-def mach_deps_to_include(target: str, invalid_include: str, target_deps: list[Dependency]) -> str | None:
-    """
-    The possibility to manipulate include paths complicates matching potential dependencies to the invalid include
-    statement. Thus, we perform this multistep heuristic.
-    """
-
-    deps_providing_included_path = [dep.target for dep in target_deps if invalid_include in dep.include_paths]
-
-    if len(deps_providing_included_path) == 1:
-        return deps_providing_included_path[0]
-
-    if len(deps_providing_included_path) > 1:
-        logging.warning(
-            f"""
-Found multiple targets providing invalid include path '{invalid_include}' of target '{target}'.
- Cannot determine correct dependency.
- Discovered potential dependencies are: {deps_providing_included_path}.
-            """.strip()
-        )
-        return None
-
-    # No potential dep could be found searching for the full include statement. We perform a second search looking only
-    # the file name of the invalid include. The file name cannot be altered by the various include path manipulation
-    # techniques offered by Bazel, only the path at which a header can be discovered can be manipulated.
-    included_file = get_file_name(invalid_include)
-    deps_providing_included_file = [
-        dep.target for dep in target_deps if included_file in [get_file_name(path) for path in dep.include_paths]
-    ]
-
-    if len(deps_providing_included_file) == 1:
-        return deps_providing_included_file[0]
-
-    if len(deps_providing_included_file) > 1:
-        logging.warning(
-            f"""
-Found multiple targets providing file '{included_file}' from invalid include '{invalid_include}' of target '{target}'.
- Matching the full include path did not work. Cannot determine correct dependency.
- Discovered potential dependencies are: {deps_providing_included_file}.
-            """.strip()
-        )
-        return None
-
-    logging.warning(
-        f"""
-Could not find a proper dependency for invalid include path '{invalid_include}' of target '{target}'.
- Is the header file maybe wrongly part of the 'srcs' attribute instead of 'hdrs' in the library which should provide the header?
- Or is this include resolved through the toolchain instead through a dependency?
-        """.strip()
-    )
-    return None
-
-
-def search_missing_deps(workspace: Path, target: str, includes_without_direct_dep: dict[str, list[str]]) -> list[str]:
-    """
-    Search for targets providing header files matching the include statements without matching direct dependency in the
-    transitive dependencies of the target under inspection.
-    """
-    if not includes_without_direct_dep:
-        return []
-
-    target_deps = get_dependencies(workspace=workspace, target=target)
-    all_invalid_includes = list(chain(*includes_without_direct_dep.values()))
-    return [
-        dep
-        for include in all_invalid_includes
-        if (dep := mach_deps_to_include(target=target, invalid_include=include, target_deps=target_deps)) is not None
-    ]
-
-
 def add_discovered_deps(
     discovered_public_deps: list[str],
     discovered_private_deps: list[str],
@@ -206,7 +91,9 @@ def add_discovered_deps(
         buildozer.execute(task=f"add implementation_deps {' '.join(list(set(add_to_impl_deps)))}", target=target)
 
 
-def perform_fixes(buildozer: BuildozerExecutor, workspace: Path, report: Path, requested_fixes: RequestedFixes) -> None:
+def perform_fixes(
+    bazel_query: BazelQuery, buildozer: BuildozerExecutor, report: Path, requested_fixes: RequestedFixes
+) -> None:
     with report.open(encoding="utf-8") as report_in:
         content = json.load(report_in)
         target = content["analyzed_target"]
@@ -226,10 +113,14 @@ def perform_fixes(buildozer: BuildozerExecutor, workspace: Path, report: Path, r
 
         if requested_fixes.add_missing_deps:
             discovered_public_deps = search_missing_deps(
-                workspace=workspace, target=target, includes_without_direct_dep=content["public_includes_without_dep"]
+                bazel_query=bazel_query,
+                target=target,
+                includes_without_direct_dep=content["public_includes_without_dep"],
             )
             discovered_private_deps = search_missing_deps(
-                workspace=workspace, target=target, includes_without_direct_dep=content["private_includes_without_dep"]
+                bazel_query=bazel_query,
+                target=target,
+                includes_without_direct_dep=content["private_includes_without_dep"],
             )
             add_discovered_deps(
                 discovered_public_deps=discovered_public_deps,
@@ -269,13 +160,19 @@ def main(args: Namespace) -> int:
         )
         return 1
 
+    bazel_query = BazelQuery(
+        workspace=workspace,
+        use_cquery=args.use_cquery,
+        query_args=shlex.split(args.bazel_args) if args.bazel_args else [],
+        startup_args=shlex.split(args.bazel_startup_args) if args.bazel_startup_args else [],
+    )
     buildozer_executor = BuildozerExecutor(
         buildozer=buildozer, buildozer_args=args.buildozer_args, workspace=workspace, dry=args.dry_run
     )
     for report in reports:
         logging.debug(f"Processing report file '{report}'")
         perform_fixes(
-            workspace=workspace, report=report, buildozer=buildozer_executor, requested_fixes=RequestedFixes(args)
+            report=report, bazel_query=bazel_query, buildozer=buildozer_executor, requested_fixes=RequestedFixes(args)
         )
     buildozer_executor.summary.print_summary()
 

diff --git a/src/apply_fixes/bazel_query.py b/src/apply_fixes/bazel_query.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from src.apply_fixes.utils import execute_and_capture
+
+if TYPE_CHECKING:
+    from pathlib import Path
+    from subprocess import CompletedProcess
+
+
+class BazelQuery:
+    def __init__(self, workspace: Path, use_cquery: bool, query_args: list[str], startup_args: list[str]) -> None:
+        self._workspace = workspace
+        self._use_cquery = use_cquery
+        self._query_args = query_args
+        self._startup_args = startup_args
+
+    def execute(self, query: str, args: list[str]) -> CompletedProcess:
+        cmd = [
+            "bazel",
+            *self._startup_args,
+            "cquery" if self._use_cquery else "query",
+            *self._query_args,
+            *args,
+            query,
+        ]
+        return execute_and_capture(cmd=cmd, cwd=self._workspace)
+
+    @property
+    def uses_cquery(self) -> bool:
+        return self._use_cquery
diff --git a/src/apply_fixes/main.py b/src/apply_fixes/main.py
@@ -58,6 +58,34 @@ def cli() -> Namespace:
         deduce the Bazel output directory containing the DWYU report files. Or if you want to search only in a sub tree
         of the Bazel output directories.""",
     )
+    parser.add_argument(
+        "--use-cquery",
+        action="store_true",
+        help="""
+        The apply_fixes script by default uses 'bazel query' to find missing dependencies. Your project might use
+        select statements to exchange dependencies. In such cases you should use 'bazel cquery' to allow this script
+        understanding the dependency tree properly. Should be used together with '--bazel-args' to provide
+        the configuration for 'bazel cquery'.""",
+    )
+    parser.add_argument(
+        "--bazel-args",
+        type=str,
+        metavar="STRING",
+        help="""
+        The apply_fixes script uses 'bazel (c)query' to find missing dependencies. If this command requires further
+        arguments to work properly in your workspace you can provide them here. Also look into '--use-cquery' if
+        you want to provide build configuration.
+        Arguments have ot be provided as one large string, e.g.: --bazel-args='--foo --tick=tock'.""",
+    )
+    parser.add_argument(
+        "--bazel-startup-args",
+        type=str,
+        metavar="STRING",
+        help="""
+        The apply_fixes script uses 'bazel (c)query' to find missing dependencies. If this command requires further
+        startup arguments (e.g. a custom output base) to work properly in your workspace you can provide them here.
+        Arguments have ot be provided as one large string, e.g.: --bazel-args='--foo --tick=tock'.""",
+    )
     parser.add_argument(
         "--fix-unused-deps",
         action="store_true",