From 655d4a1a8dba4fc810ce4ab725e73b3dee3b70cf Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 5 Mar 2024 12:14:10 +0100 Subject: [PATCH] test: test for missing dependencies (#7278) * tests: import test for missing libraries * build: add missing dependencies * refactor: use glob instead of tree walk * test: extract constants + more documentation --- haystack/testing/document_store.py | 5 ++- pyproject.toml | 7 ++++ test/test_imports.py | 64 ++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 test/test_imports.py diff --git a/haystack/testing/document_store.py b/haystack/testing/document_store.py index 77b1659015..0cdfdae042 100644 --- a/haystack/testing/document_store.py +++ b/haystack/testing/document_store.py @@ -4,12 +4,15 @@ from typing import List import pandas as pd -import pytest from haystack.dataclasses import Document from haystack.document_stores.errors import DuplicateDocumentError from haystack.document_stores.types import DocumentStore, DuplicatePolicy from haystack.errors import FilterError +from haystack.lazy_imports import LazyImport + +with LazyImport("Run 'pip install pytest'") as pytest_import: + import pytest def _random_embeddings(n): diff --git a/pyproject.toml b/pyproject.toml index 22b23d009f..393b9caa28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,9 @@ dependencies = [ "networkx", # Pipeline graphs "typing_extensions>=4.7", # typing support for Python 3.8 "boilerpy3", # Fulltext extraction from HTML pages + "requests", + "numpy", + "python-dateutil", ] [tool.hatch.envs.default] @@ -127,6 +130,10 @@ extra-dependencies = [ # Structured logging "structlog", + + # Looking for missing imports + "isort", + "pyproject-parser", ] [tool.hatch.envs.test.scripts] diff --git a/test/test_imports.py b/test/test_imports.py new file mode 100644 index 0000000000..5cf59f23ff --- /dev/null +++ b/test/test_imports.py @@ -0,0 +1,64 @@ +import ast +import os +from _ast import Import, ImportFrom +from pathlib import Path + +import isort +import toml +from pyproject_parser import PyProject + +# Some libraries have different names in the import and in the dependency +# If below test fails due to that, add the library name to the dictionary +LIBRARY_NAMES_TO_MODULE_NAMES = {"python-dateutil": "dateutil"} + +# Some standard libraries are not detected by isort. If below test fails due to that, add the library name to the set. +ADDITIONAL_STD_LIBS = {"yaml"} + + +def test_for_missing_dependencies() -> None: + # We implement this manual check because + # - All tools out there are too powerful because they find all the imports in the haystack package + # - if we import all modules to check the imports we don't find issues with direct dependencies which are also + # sub-dependencies of other dependencies + + #### Collect imports + top_level_imports = set() + for path in Path("haystack").glob("**/*.py"): + content = path.read_text(encoding="utf-8") + tree = ast.parse(content) + for item in tree.body: + if isinstance(item, Import): + module = item.names[0].name + elif isinstance(item, ImportFrom) and item.level == 0: # level > 1 are relative imports + module = item.module + else: + # we only care about imports + break + + top_level_imports.add(module.split(".")[0]) + + third_party_modules = { + module + for module in top_level_imports + if isort.place_module(module) == "THIRDPARTY" and module not in ADDITIONAL_STD_LIBS + } + + #### Load specified dependencies + parsed = toml.load("pyproject.toml") + # Pyproject complains about our pyproject.toml file, so we need to parse only the dependencies + # We still need `PyProject` to parse the dependencies (think of markers and stuff) + only_dependencies = {"project": {"name": "test", "dependencies": parsed["project"]["dependencies"]}} + project_dependencies = PyProject.project_table_parser.parse(only_dependencies["project"], set_defaults=True)[ + "dependencies" + ] + + project_dependency_modules = set() + for dep in project_dependencies: + if dep.name in LIBRARY_NAMES_TO_MODULE_NAMES: + project_dependency_modules.add(LIBRARY_NAMES_TO_MODULE_NAMES[dep.name]) + + project_dependency_modules.add(dep.name.replace("-", "_")) + + #### And now finally; the check + for module in third_party_modules: + assert module in project_dependency_modules, f"Module {module} is not in the dependencies"