Skip to content

Commit

Permalink
apply_fixes: Improve dependency discovery heuristic
Browse files Browse the repository at this point in the history
Our old logic fails when multiple dependencies provide a header file
with the same name at different paths.
The new logic is robust against this due to looking for the full include
path. Looking at the file name is now only a fallback used to resolve
cases where include path manipulation makes a simple include path match
impossible.
  • Loading branch information
martis42 committed Apr 22, 2024
1 parent 8c63c10 commit 84d2f64
Show file tree
Hide file tree
Showing 11 changed files with 177 additions and 70 deletions.
99 changes: 64 additions & 35 deletions src/apply_fixes/apply_fixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,18 @@ def get_file_name(include: str) -> str:
return include.split("/")[-1]


def target_to_file(dep: str) -> str:
"""Extract the file name from a Bazel target label"""
tmp = dep.split(":")[-1]
return get_file_name(tmp)
def target_to_path(dep: str) -> str:
return dep.replace(":", "/").rsplit("//", 1)[1]


@dataclass
class Dependency:
target: str
hdrs: list[str]
# Assuming no include path manipulation, the target provides these include paths
include_paths: list[str]

def __repr__(self) -> str:
return f"Dependency(target={self.target}, hdrs={self.hdrs})"
return f"Dependency(target={self.target}, include_paths={self.include_paths})"


def get_dependencies(workspace: Path, target: str) -> list[Dependency]:
Expand All @@ -105,19 +104,71 @@ def get_dependencies(workspace: Path, target: str) -> list[Dependency]:
"query",
"--output=xml",
"--noimplicit_deps",
f'attr("hdrs", "", deps({target}) except deps({target}, 1))',
f'kind("rule", deps({target}) except deps({target}, 1))',
],
cwd=workspace,
)
return [
Dependency(
target=dep.attrib["name"],
hdrs=[target_to_file(hdr.attrib["value"]) for hdr in dep.findall(".//*[@name='hdrs']/label")],
include_paths=[target_to_path(hdr.attrib["value"]) for hdr in dep.findall(".//*[@name='hdrs']/label")],
)
for dep in ElementTree.fromstring(process.stdout)
]


def mach_deps_to_include(target: str, invalid_include: str, target_deps: list[Dependency]) -> str | None:
"""
The possibility to manipulate include paths complicates matching potential dependencies to the invalid include
statement. Thus, we perform this multistep heuristic.
"""

deps_providing_included_path = [dep.target for dep in target_deps if invalid_include in dep.include_paths]

if len(deps_providing_included_path) == 1:
return deps_providing_included_path[0]

if len(deps_providing_included_path) > 1:
logging.warning(
f"""
Found multiple targets providing invalid include path '{invalid_include}' of target '{target}'.
Cannot determine correct dependency.
Discovered potential dependencies are: {deps_providing_included_path}.
""".strip()
)
return None

# No potential dep could be found searching for the full include statement. We perform a second search looking only
# the file name of the invalid include. The file name cannot be altered by the various include path manipulation
# techniques offered by Bazel, only the path at which a header can be discovered can be manipulated.
included_file = get_file_name(invalid_include)
deps_providing_included_file = [
dep.target for dep in target_deps if included_file in [get_file_name(path) for path in dep.include_paths]
]

if len(deps_providing_included_file) == 1:
return deps_providing_included_file[0]

if len(deps_providing_included_file) > 1:
logging.warning(
f"""
Found multiple targets providing file '{included_file}' from invalid include '{invalid_include}' of target '{target}'.
Matching the full include path did not work. Cannot determine correct dependency.
Discovered potential dependencies are: {deps_providing_included_file}.
""".strip()
)
return None

logging.warning(
f"""
Could not find a proper dependency for invalid include path '{invalid_include}' of target '{target}'.
Is the header file maybe wrongly part of the 'srcs' attribute instead of 'hdrs' in the library which should provide the header?
Or is this include is resolved through the toolchain instead through a dependency?
""".strip()
)
return None


def search_missing_deps(workspace: Path, target: str, includes_without_direct_dep: dict[str, list[str]]) -> list[str]:
"""
Search for targets providing header files matching the include statements without matching direct dependency in the
Expand All @@ -128,33 +179,11 @@ def search_missing_deps(workspace: Path, target: str, includes_without_direct_de

target_deps = get_dependencies(workspace=workspace, target=target)
all_invalid_includes = list(chain(*includes_without_direct_dep.values()))
discovered_dependencies = []
for include in all_invalid_includes:
include_file = get_file_name(include)
potential_deps = [dep.target for dep in target_deps if include_file in dep.hdrs]

if not potential_deps:
logging.warning(
f"""
Could not find a proper dependency for invalid include '{include}' of target '{target}'.
Is the header file maybe wrongly part of the 'srcs' attribute instead of 'hdrs' in the library which should provide the header?
""".strip()
)
continue

if len(potential_deps) > 1:
logging.warning(
f"""
Found multiple targets which potentially can provide include '{include}' of target '{target}'.
Please fix this manually. Candidates which have been discovered:
""".strip()
)
logging.warning("\n".join(f"- {dep}" for dep in potential_deps))
continue

discovered_dependencies.append(potential_deps[0])

return discovered_dependencies
return [
dep
for include in all_invalid_includes
if (dep := mach_deps_to_include(target=target, invalid_include=include, target_deps=target_deps)) is not None
]


def add_discovered_deps(
Expand Down
86 changes: 54 additions & 32 deletions src/apply_fixes/test/apply_fixes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
get_dependencies,
get_file_name,
search_missing_deps,
target_to_file,
target_to_path,
)


Expand All @@ -18,9 +18,9 @@ def test_get_file_name(self) -> None:
self.assertEqual(get_file_name("foo.txt"), "foo.txt")
self.assertEqual(get_file_name("riff/raff/foo.txt"), "foo.txt")

def test_target_to_file(self) -> None:
self.assertEqual(target_to_file(":foo"), "foo")
self.assertEqual(target_to_file("@foo//bar:riff/raff.txt"), "raff.txt")
def test_target_to_path(self) -> None:
self.assertEqual(target_to_path("//:foo"), "foo")
self.assertEqual(target_to_path("@foo//bar:riff/raff.txt"), "bar/riff/raff.txt")


class TestGetDependencies(unittest.TestCase):
Expand Down Expand Up @@ -56,9 +56,9 @@ def test_parse_query_output(self, execute_query_mock: MagicMock) -> None:

self.assertEqual(len(deps), 2)
self.assertEqual(deps[0].target, "//foo:bar")
self.assertEqual(deps[0].hdrs, ["riff.h", "raff.h"])
self.assertEqual(deps[0].include_paths, ["foo/riff.h", "foo/raff.h"])
self.assertEqual(deps[1].target, "//:foobar")
self.assertEqual(deps[1].hdrs, ["foobar.h"])
self.assertEqual(deps[1].include_paths, ["foobar.h"])


class TestSearchDeps(unittest.TestCase):
Expand All @@ -68,8 +68,8 @@ def test_noop_for_empty_input(self) -> None:
@patch("src.apply_fixes.apply_fixes.get_dependencies")
def test_find_dependency(self, get_deps_mock: MagicMock) -> None:
get_deps_mock.return_value = [
Dependency(target="//unrelated:lib", hdrs=["some_include.h"]),
Dependency(target="//expected:target", hdrs=["include_a.h", "include_b.h"]),
Dependency(target="//unrelated:lib", include_paths=["some_include.h"]),
Dependency(target="//expected:target", include_paths=["other/path/include_a.h", "some/path/include_b.h"]),
]
deps = search_missing_deps(
workspace=Path(),
Expand All @@ -80,48 +80,70 @@ def test_find_dependency(self, get_deps_mock: MagicMock) -> None:
self.assertEqual(deps, ["//expected:target"])

@patch("src.apply_fixes.apply_fixes.get_dependencies")
def test_fail_on_unresolved_dependency(self, get_deps_mock: MagicMock) -> None:
get_deps_mock.return_value = [Dependency(target="//unrelated:lib", hdrs=["some_include.h"])]
def test_fail_on_ambiguous_dependency_resolution_for_full_include_path(self, get_deps_mock: MagicMock) -> None:
get_deps_mock.return_value = [
Dependency(target="//:lib_a", include_paths=["some/path/foo.h"]),
Dependency(target="//:lib_b", include_paths=["some/path/foo.h"]),
]
with self.assertLogs() as cm:
deps = search_missing_deps(
workspace=Path(),
target="foo",
includes_without_direct_dep={"some_file.cc": ["some/path/include_b.h"]},
includes_without_direct_dep={"some_file.cc": ["some/path/foo.h"]},
)
self.assertEqual(len(cm.output), 1)
self.assertTrue(
any(
"WARNING:root:Could not find a proper dependency for invalid include 'some/path/include_b.h'" in out
for out in cm.output
)
)
self.assertEqual(
cm.output,
[
"WARNING:root:Could not find a proper dependency for invalid include 'some/path/include_b.h' of target 'foo'.\n"
"Is the header file maybe wrongly part of the 'srcs' attribute instead of 'hdrs' in the library which should provide the header?"
],
"Found multiple targets providing invalid include path 'some/path/foo.h' of target 'foo'"
in cm.output[0]
)
self.assertTrue("Discovered potential dependencies are: ['//:lib_a', '//:lib_b']" in cm.output[0])
self.assertEqual(deps, [])

@patch("src.apply_fixes.apply_fixes.get_dependencies")
def test_fail_on_ambiguous_dependency_resolution(self, get_deps_mock: MagicMock) -> None:
def test_find_dependency_via_file_name_fallback(self, get_deps_mock: MagicMock) -> None:
get_deps_mock.return_value = [
Dependency(target="//some:lib_a", include_paths=["foo.h"]),
]
deps = search_missing_deps(
workspace=Path(),
target="foo",
includes_without_direct_dep={"some_file.cc": ["some/path/foo.h"]},
)
self.assertEqual(deps, ["//some:lib_a"])

@patch("src.apply_fixes.apply_fixes.get_dependencies")
def test_fail_on_ambiguous_dependency_resolution_for_file_name_fallback(self, get_deps_mock: MagicMock) -> None:
get_deps_mock.return_value = [
Dependency(target="//:lib_a", hdrs=["foo.h"]),
Dependency(target="//:lib_b", hdrs=["foo.h"]),
Dependency(target="//:lib_a", include_paths=["foo.h"]),
Dependency(target="//:lib_b", include_paths=["foo.h"]),
]
with self.assertLogs() as cm:
deps = search_missing_deps(
workspace=Path(),
target="foo",
includes_without_direct_dep={"some_file.cc": ["some/path/foo.h"]},
)
self.assertEqual(
cm.output,
[
"WARNING:root:Found multiple targets which potentially can provide include 'some/path/foo.h' of target 'foo'.\n"
"Please fix this manually. Candidates which have been discovered:",
"WARNING:root:- //:lib_a\n- //:lib_b",
],
self.assertEqual(len(cm.output), 1)
self.assertTrue(
"Found multiple targets providing file 'foo.h' from invalid include 'some/path/foo.h' of target 'foo'"
in cm.output[0]
)
self.assertTrue("Discovered potential dependencies are: ['//:lib_a', '//:lib_b']" in cm.output[0])
self.assertEqual(deps, [])

@patch("src.apply_fixes.apply_fixes.get_dependencies")
def test_fail_on_unresolved_dependency(self, get_deps_mock: MagicMock) -> None:
get_deps_mock.return_value = [Dependency(target="//unrelated:lib", include_paths=["some_include.h"])]
with self.assertLogs() as cm:
deps = search_missing_deps(
workspace=Path(),
target="foo",
includes_without_direct_dep={"some_file.cc": ["some/path/include_b.h"]},
)
self.assertEqual(len(cm.output), 1)
self.assertTrue(
"Could not find a proper dependency for invalid include path 'some/path/include_b.h' of target 'foo'"
in cm.output[0]
)
self.assertEqual(deps, [])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def execute_test_logic(self) -> Result:
)

expected_error = [
"Found multiple targets which potentially can provide include 'ambiguous_lib/lib.h'",
"Found multiple targets providing invalid include path 'ambiguous_lib/lib.h'",
"//ambiguous_lib:lib_a",
"//ambiguous_lib:lib_b",
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def execute_test_logic(self) -> Result:
check=True,
)

expected_error = "Could not find a proper dependency for invalid include 'bar/private_bar.h'"
expected_error = "Could not find a proper dependency for invalid include path 'bar/private_bar.h'"
if expected_error not in process.stderr:
return self._make_unexpected_output_error(expected=expected_error, output=process.stderr)
return Success()
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from result import Result, Success
from test_case import TestCaseBase


class TestCase(TestCaseBase):
@property
def test_target(self) -> str:
return "//:use_manipulated_bar"

def execute_test_logic(self) -> Result:
self._create_reports()
self._run_automatic_fix(extra_args=["--fix-missing-deps"])

target_deps = self._get_target_attribute(target=self.test_target, attribute="deps")
if (expected := {"//:manipulated_bar_provider", "//libs:manipulated_bar"}) != target_deps:
return self._make_unexpected_deps_error(expected_deps=expected, actual_deps=target_deps)
return Success()
12 changes: 12 additions & 0 deletions test/apply_fixes/missing_dependency/workspace/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,22 @@ cc_library(
deps = [":libs_provider"],
)

cc_library(
name = "use_manipulated_bar",
hdrs = ["use_manipulated_bar.h"],
deps = [":manipulated_bar_provider"],
)

##################
# Helper Targets #
##################

cc_library(
name = "manipulated_bar_provider",
srcs = ["dummy.h"],
deps = ["//libs:manipulated_bar"],
)

cc_library(
name = "ambiguous_lib_provider",
srcs = ["dummy.h"],
Expand Down
9 changes: 9 additions & 0 deletions test/apply_fixes/missing_dependency/workspace/libs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,19 @@ cc_library(
name = "foo",
hdrs = ["foo.h"],
visibility = ["//visibility:public"],
deps = ["//other_lib"],
)

cc_library(
name = "bar",
hdrs = ["sub/bar.h"],
visibility = ["//visibility:public"],
)

cc_library(
name = "manipulated_bar",
hdrs = ["sub/bar.h"],
include_prefix = "new/path",
strip_include_prefix = ".",
visibility = ["//visibility:public"],
)
5 changes: 4 additions & 1 deletion test/apply_fixes/missing_dependency/workspace/libs/foo.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// Show that multiple files 'foo.h' in the dependency graph are no issue
#include "other_lib/foo.h"

int doFoo() {
return 1337;
return doOtherFoo() + 1337;
}
7 changes: 7 additions & 0 deletions test/apply_fixes/missing_dependency/workspace/other_lib/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# We use this library to prove that multiple header with the same name existing in the dependency is not a problem if
# they are included with their full path
cc_library(
name = "other_lib",
hdrs = ["foo.h"],
visibility = ["//visibility:public"],
)
3 changes: 3 additions & 0 deletions test/apply_fixes/missing_dependency/workspace/other_lib/foo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
int doOtherFoo() {
return 1234;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "new/path/sub/bar.h"

int useBar() {
return doBar();
}

0 comments on commit 84d2f64

Please sign in to comment.