Skip to content

Commit

Permalink
Merge pull request #7 from Never-Over/speed-improvements
Browse files Browse the repository at this point in the history
Speed improvements
  • Loading branch information
emdoyle authored Feb 13, 2024
2 parents 9a844ce + 3cc0a81 commit 0f592c6
Show file tree
Hide file tree
Showing 17 changed files with 190 additions and 122 deletions.
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,18 @@ pip install modguard
Add a `Boundary` to the `__init__.py` of the module you're creating an interface for.
```python
# project/core/__init__.py
from modguard import Boundary
import modguard

Boundary()
modguard.Boundary()
```

Add the `public` decorator to any callable in the module that should be exported. You can also export individual members by passing them to `public` as function call arguments.
```python
# project/core/main.py
from modguard import public
import modguard

# Adding the decorator here signifies this function is public
@public
@modguard.public
def public_function(user_id: int) -> str:
...

Expand Down Expand Up @@ -68,7 +68,7 @@ This will automatically create boundaries and define your public interface for e
### Advanced Usage
Modguard also supports specific allow lists within the `public()` decorator.
```python
@public(allowlist=['utils.helpers'])
@modguard.public(allowlist=['utils.helpers'])
def public_function(user_id: int) -> str:
...
```
Expand All @@ -94,8 +94,9 @@ from core import main # contains public and private members
If you expect to be able to import the entire contents of your module, you can declare an entire module as public to avoid this:
```python
# core/main.py
from modguard import public
public()
import modguard

modguard.public()

...
```
Expand Down
10 changes: 6 additions & 4 deletions modguard/check.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
from dataclasses import dataclass
from typing import Optional

Expand Down Expand Up @@ -35,7 +36,7 @@ def check_import(
# * The module is not contained by a boundary [generally 3rd party]
import_mod_has_boundary = nearest_boundary is not None

# * The imported module's boundary is a child of the file's boundary
# * The file's boundary is a child of the imported module's boundary
import_mod_is_child_of_current = (
import_mod_has_boundary
and file_nearest_boundary.full_path.startswith(nearest_boundary.full_path)
Expand All @@ -47,7 +48,7 @@ def check_import(
(
public_member
for public_member_name, public_member in nearest_boundary.public_members.items()
if import_mod_path.startswith(public_member_name)
if re.match(rf"^{public_member_name}(\.\w+)?$", import_mod_path)
),
None,
)
Expand Down Expand Up @@ -86,10 +87,11 @@ def check(root: str, exclude_paths: Optional[list[str]] = None) -> list[ErrorInf
root = fs.canonical(root)
exclude_paths = list(map(fs.canonical, exclude_paths)) if exclude_paths else None

boundary_trie = build_boundary_trie(root, exclude_paths=exclude_paths)
pyfiles = list(fs.walk_pyfiles(root, exclude_paths=exclude_paths))
boundary_trie = build_boundary_trie(root, pyfiles=pyfiles)

errors: list[ErrorInfo] = []
for file_path in fs.walk_pyfiles(root, exclude_paths=exclude_paths):
for file_path in pyfiles:
mod_path = fs.file_to_module_path(file_path)
nearest_boundary = boundary_trie.find_nearest(mod_path)
assert (
Expand Down
19 changes: 6 additions & 13 deletions modguard/filesystem/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from modguard import Boundary, public
import modguard

# ruff: noqa: F401
# pyright: reportUnusedImport=false
from .service import (
get_cwd,
chdir,
Expand All @@ -12,15 +15,5 @@
module_to_file_path,
)

Boundary()

public(get_cwd)
public(chdir)
public(canonical)
public(read_file)
public(write_file)
public(parse_ast)
public(walk_pyfiles)
public(walk_pypackages)
public(file_to_module_path)
public(module_to_file_path)
modguard.Boundary()
modguard.public()
2 changes: 2 additions & 0 deletions modguard/filesystem/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import threading
from collections import defaultdict
from dataclasses import dataclass
from functools import lru_cache
from typing import Optional, Generator
from modguard.errors import ModguardParseError

Expand Down Expand Up @@ -154,6 +155,7 @@ def walk_pypackages(
yield filepath[: -len(init_file_ending)]


@lru_cache(maxsize=None)
def file_to_module_path(file_path: str) -> str:
# Assuming that the file_path has been 'canonicalized' and does not traverse multiple directories
file_path = file_path.lstrip("./")
Expand Down
8 changes: 4 additions & 4 deletions modguard/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from modguard import errors, filesystem as fs
from modguard.check import check_import
from modguard.core import PublicMember
from modguard.parsing.boundary import add_boundary, has_boundary, build_boundary_trie
from modguard.parsing.boundary import add_boundary, build_boundary_trie
from modguard.parsing.imports import get_imports
from modguard.parsing.public import mark_as_public

Expand All @@ -27,7 +27,7 @@ def init_project(root: str, exclude_paths: Optional[list[str]] = None):
# Core functionality:
# * do nothing in any package already having a Boundary
# * import and call Boundary in __init__.py for all other packages
# * import and decorate public on all externally imported functions and classes
# * import and decorate public on all externally imported members
if not os.path.isdir(root):
raise errors.ModguardSetupError(f"The path {root} is not a directory.")

Expand All @@ -44,8 +44,8 @@ def init_project(root: str, exclude_paths: Optional[list[str]] = None):

for dirpath in fs.walk_pypackages(root, exclude_paths=exclude_paths):
filepath = dirpath + "/__init__.py"
if not has_boundary(filepath):
dir_mod_path = fs.file_to_module_path(dirpath)
dir_mod_path = fs.file_to_module_path(dirpath)
if not boundary_trie.get(dir_mod_path):
boundary_trie.insert(dir_mod_path)
write_operations.append(
FileWriteInformation(
Expand Down
17 changes: 17 additions & 0 deletions modguard/parsing/ast_visitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import ast
from typing import Any


class EarlyExitNodeVisitor(ast.NodeVisitor):
def __init__(self, *args: list[Any], **kwargs: dict[Any, Any]):
self._exit = False

def set_exit(self, flag: bool = True):
self._exit = flag

def visit(self, node: ast.AST):
if self._exit:
# Allow reusing the instance
self._exit = False
return
super().visit(node)
66 changes: 22 additions & 44 deletions modguard/parsing/boundary.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,28 @@
import ast
import re
from typing import Optional

from modguard import filesystem as fs
from modguard.core.boundary import BoundaryTrie
from modguard.public import public
from modguard import filesystem as fs
from .public import get_public_members


class BoundaryFinder(ast.NodeVisitor):
def __init__(self):
self.is_modguard_boundary_imported = False
self.found_boundary = False

def visit_ImportFrom(self, node: ast.ImportFrom):
# Check if 'Boundary' is imported specifically from a 'modguard'-rooted module
is_modguard_module_import = node.module is not None and (
node.module == "modguard" or node.module.startswith("modguard.")
)
if is_modguard_module_import and any(
alias.name == "Boundary" for alias in node.names
):
self.is_modguard_boundary_imported = True

def visit_Import(self, node: ast.Import):
# Check if 'modguard' is imported
for alias in node.names:
if alias.name == "modguard":
self.is_modguard_boundary_imported = True

def visit_Call(self, node: ast.Call):
if self.is_modguard_boundary_imported:
if isinstance(node.func, ast.Attribute) and node.func.attr == "Boundary":
if (
isinstance(node.func.value, ast.Name)
and node.func.value.id == "modguard"
):
self.found_boundary = True
elif isinstance(node.func, ast.Name) and node.func.id == "Boundary":
# This handles the case where 'Boundary' is imported directly: from modguard import Boundary
# We are currently ignoring the case where this is still the wrong Boundary (if it has been re-assigned)
self.found_boundary = True


@public
def has_boundary(file_path: str) -> bool:
parsed_ast = fs.parse_ast(file_path)
boundary_finder = BoundaryFinder()
boundary_finder.visit(parsed_ast)
return boundary_finder.found_boundary
file_content = fs.read_file(file_path)
# import modguard; modguard.Boundary()
if re.search(r"(^|\n)import\s+modguard($|\n)", file_content):
return bool(re.search(r"(^|\n)modguard\.Boundary\(", file_content))
# from modguard.boundary import Boundary; Boundary()
if re.search(r"(^|\n)from\s+modguard\.boundary\s+import.*Boundary", file_content):
return bool(re.search(r"(^|\n)Boundary\(", file_content))
# from modguard import boundary; boundary.Boundary()
if re.search(r"(^|\n)from\s+modguard\s+import.*boundary", file_content):
return bool(re.search(r"(^|\n)boundary\.Boundary\(", file_content))
# import modguard.boundary; modguard.boundary.Boundary()
if re.search(r"(^|\n)import\s+modguard\.boundary($|\n)", file_content):
return bool(re.search(r"(^|\n)modguard\.boundary\.Boundary\(", file_content))
return False


BOUNDARY_PRELUDE = "import modguard\nmodguard.Boundary()\n"
Expand All @@ -61,19 +36,22 @@ def add_boundary(file_path: str) -> None:

@public
def build_boundary_trie(
root: str, exclude_paths: Optional[list[str]] = None
root: str,
exclude_paths: Optional[list[str]] = None,
pyfiles: Optional[list[str]] = None,
) -> BoundaryTrie:
boundary_trie = BoundaryTrie()
# Add an 'outer boundary' containing the entire root path
# This means a project will pass 'check' by default
boundary_trie.insert(fs.file_to_module_path(root))
pyfiles = pyfiles or list(fs.walk_pyfiles(root, exclude_paths=exclude_paths))

for file_path in fs.walk_pyfiles(root, exclude_paths=exclude_paths):
for file_path in pyfiles:
if has_boundary(file_path):
mod_path = fs.file_to_module_path(file_path)
boundary_trie.insert(mod_path)

for file_path in fs.walk_pyfiles(root, exclude_paths=exclude_paths):
for file_path in pyfiles:
mod_path = fs.file_to_module_path(file_path)
public_members = get_public_members(file_path)
for public_member in public_members:
Expand Down
4 changes: 1 addition & 3 deletions modguard/parsing/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,7 @@ def visit_ImportFrom(self, node: ast.ImportFrom):
continue

global_mod_path = (
f"{base_mod_path}.{name_node.asname or name_node.name}"
if node.module
else (name_node.asname or name_node.name)
f"{base_mod_path}.{name_node.name}" if node.module else name_node.name
)
self.imports.append(global_mod_path)

Expand Down
Loading

0 comments on commit 0f592c6

Please sign in to comment.