Skip to content

Commit

Permalink
File search now uses .gitignore (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
suoto committed Oct 22, 2019
1 parent ddfe0ef commit 4590f00
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 41 deletions.
35 changes: 14 additions & 21 deletions hdl_checker/config_generators/simple_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,24 @@
# along with HDL Checker. If not, see <http://www.gnu.org/licenses/>.
"Base class for creating a project file"

import os
import os.path as p
from typing import Iterable, List

from .base_generator import BaseGenerator

from hdl_checker.exceptions import UnknownTypeExtension
from hdl_checker.parser_utils import filterGitIgnoredPaths, findRtlSourcesByPath, isGitRepo
from hdl_checker.path import Path
from hdl_checker.types import FileType
from hdl_checker.utils import isFileReadable

_SOURCE_EXTENSIONS = "vhdl", "sv", "v"
_HEADER_EXTENSIONS = "vh", "svh"


def _noFilter(_, paths):
"""
Dummy filter, returns paths
"""
return paths


class SimpleFinder(BaseGenerator):
"""
Implementation of BaseGenerator that searches for paths on a given
Expand Down Expand Up @@ -59,24 +62,14 @@ def _findSources(self):
"""
Iterates over the paths and searches for relevant files by extension.
"""
for path in self._paths:
for dirpath, _, filenames in os.walk(path.name):
for filename in filenames:
full_path = Path(p.join(dirpath, filename))

if not p.isfile(full_path.name):
continue
for search_path in self._paths:
sources = findRtlSourcesByPath(search_path)

try:
# FileType.fromPath will fail if the file's extension is not
# valid (one of '.vhd', '.vhdl', '.v', '.vh', '.sv',
# '.svh')
FileType.fromPath(full_path)
except UnknownTypeExtension:
continue
# Filter out ignored git files if on a git repo
filter_func = filterGitIgnoredPaths if isGitRepo(search_path) else _noFilter

if isFileReadable(full_path):
yield full_path
for source_path in filter_func(search_path, sources):
yield source_path

def _populate(self): # type: (...) -> None
for path in self._findSources():
Expand Down
87 changes: 87 additions & 0 deletions hdl_checker/parser_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@

import json
import logging
import os
import os.path as p
import subprocess as subp
from glob import iglob as _glob
from typing import Any, Dict, Iterable, NamedTuple, Optional, Set, Tuple, Type, Union

Expand All @@ -30,6 +32,7 @@
from hdl_checker.exceptions import UnknownTypeExtension
from hdl_checker.path import Path
from hdl_checker.types import BuildFlags, BuildFlagScope, FileType
from hdl_checker.utils import isFileReadable, toBytes

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -219,3 +222,87 @@ def _expand(config, ref_path):
tuple(global_flags) + tuple(single_flags) + tuple(source.flags),
tuple(global_flags) + tuple(dependencies_flags),
)


def findRtlSourcesByPath(path):
# type: (Path) -> Iterable[Path]
"""
Finds RTL sources (files with extensions within FileType enum) inside
<path>
"""
for dirpath, _, filenames in os.walk(path.name):
for filename in filenames:
full_path = Path(p.join(dirpath, filename))

if not p.isfile(full_path.name):
continue

try:
# FileType.fromPath will fail if the file's extension is not
# valid (one of '.vhd', '.vhdl', '.v', '.vh', '.sv',
# '.svh')
FileType.fromPath(full_path)
except UnknownTypeExtension:
continue

if isFileReadable(full_path):
yield full_path


def isGitRepo(path):
# type: (Path) -> bool
"""
Checks if path is a git repository by checking if 'git -C path rev-parse
--show-toplevel' returns an existing path
"""
cmd = ("git", "-C", path.abspath, "rev-parse", "--show-toplevel")

try:
return p.exists(subp.check_output(cmd, stderr=subp.STDOUT).decode().strip())
except subp.CalledProcessError:
return False


def filterGitIgnoredPaths(path_to_repo, paths):
# type: (Path, Iterable[Path]) -> Iterable[Path]
"""
Filters out paths that are ignored; paths outside the repo are kept.
Uses a git check-ignore --stdin and writes <paths> iteratively to avoid
piping to the OS all the time
"""
_logger.debug("Filtering git ignored files from %s", path_to_repo)

cmd = (
"git",
"-C",
path_to_repo.abspath,
"check-ignore",
"--verbose",
"--non-matching",
"--stdin",
)

proc = None

for path in paths:
# Besides the first iteration, the process also needs to be recreated
# whenever it has died
if proc is None:
proc = subp.Popen(cmd, stdin=subp.PIPE, stdout=subp.PIPE, stderr=subp.PIPE)

proc.stdin.write(b"%s\n" % toBytes(str(path.abspath)))
# Flush so that data makes to the process
proc.stdin.flush()

if proc.stdout.readline().decode().startswith("::"):
yield path

# proc will die whenever we write a path that's outside the repo.
# Because this method aims to filter *out* ignored files and files
# outside the repo aren't subject to this filter, we'll include them
if proc.poll() is not None:
yield path
# Deallocate the process (hopefully this won't leave a zombie
# process behind)
del proc
proc = None
31 changes: 13 additions & 18 deletions hdl_checker/tests/test_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
import os
import os.path as p
import shutil
from tempfile import mkdtemp

from mock import patch
from webtest import TestApp # type: ignore

from hdl_checker.tests import TestCase, getTestTempPath, setupTestSuport
Expand All @@ -35,12 +37,7 @@
from hdl_checker.builders.msim import MSim
from hdl_checker.builders.xvhdl import XVHDL
from hdl_checker.config_generators.simple_finder import SimpleFinder

try: # Python 3.x
import unittest.mock as mock
except ImportError: # Python 2.x
import mock # type: ignore

from hdl_checker.utils import removeDirIfExists

TEST_TEMP_PATH = getTestTempPath(__name__)
TEST_PROJECT = p.abspath(p.join(TEST_TEMP_PATH, "test_project"))
Expand All @@ -63,15 +60,15 @@ def setUpClass(cls):
def setUp(self):
self.app = TestApp(handlers.app)

# Needs to agree with vroom test file
self.dummy_test_path = p.join(TEST_TEMP_PATH, "dummy_test_path")
# self.dummy_test_path = p.join(TEST_TEMP_PATH, "dummy_test_path")
self.dummy_test_path = mkdtemp(prefix=__name__ + "_")

self.assertFalse(
p.exists(self.dummy_test_path),
"Path '%s' shouldn't exist right now" % p.abspath(self.dummy_test_path),
)
# self.assertFalse(
# p.exists(self.dummy_test_path),
# "Path '%s' shouldn't exist right now" % p.abspath(self.dummy_test_path),
# )

os.makedirs(self.dummy_test_path)
# os.makedirs(self.dummy_test_path)

os.mkdir(p.join(self.dummy_test_path, "path_a"))
os.mkdir(p.join(self.dummy_test_path, "path_b"))
Expand Down Expand Up @@ -99,12 +96,10 @@ def setUp(self):

def teardown(self):
# Create a dummy arrangement of sources
if p.exists(self.dummy_test_path):
_logger.info("Removing %s", repr(self.dummy_test_path))
shutil.rmtree(self.dummy_test_path)
removeDirIfExists(self.dummy_test_path)

@mock.patch(
"hdl_checker.config_generators.simple_finder.isFileReadable",
@patch(
"hdl_checker.parser_utils.isFileReadable",
lambda path: "nonreadable" not in path.name,
)
def test_run_simple_config_gen(self):
Expand Down
97 changes: 95 additions & 2 deletions hdl_checker/tests/test_parser_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@
import logging
import os
import os.path as p
import subprocess as subp
from pprint import pformat
from tempfile import mkdtemp
from tempfile import NamedTemporaryFile, mkdtemp
from typing import Any

import six
Expand All @@ -35,7 +36,13 @@

from hdl_checker.tests import TestCase, getTestTempPath

from hdl_checker.parser_utils import SourceEntry, flattenConfig, getIncludedConfigs
from hdl_checker.parser_utils import (
SourceEntry,
filterGitIgnoredPaths,
flattenConfig,
getIncludedConfigs,
isGitRepo,
)
from hdl_checker.path import Path
from hdl_checker.types import BuildFlagScope, FileType
from hdl_checker.utils import removeIfExists
Expand Down Expand Up @@ -516,3 +523,89 @@ def test_ExpandWhenPatternMatchesNonRtlFiles(self):
)

self.assertCountEqual(flattenConfig(config, root_path=self.base_path), expected)


class TestFilterGitIgnoredPaths(TestCase):
def join(self, *args):
return p.join(self.base_path, *args)

def setUp(self):
# type: (...) -> Any
self.base_path = mkdtemp(prefix=__name__ + "_")

self.out_of_repo = NamedTemporaryFile(
prefix=__name__ + "_out_of_repo", suffix=".txt"
).name

self.paths = (
self.join("regular_file"),
self.join("untracked_file"),
self.join("ignored_file"),
self.out_of_repo,
)

# Create some files
for path in self.paths:
self.assertFalse(p.exists(path))
try:
os.makedirs(p.dirname(path))
except OSError:
pass
open(path, "w").close()
self.assertTrue(p.exists(path))

open(self.join(".gitignore"), "w").write("ignored_file")

for cmd in (
["git", "init"],
["git", "add", "regular_file", ".gitignore"],
["git", "config", "--local", "user.name", "foo"],
["git", "config", "--local", "user.email", "bar"],
["git", "commit", "-m", "'initial'"],
):
_logger.debug("$ %s", cmd)
subp.check_call(cmd, cwd=self.base_path, stdout=subp.PIPE)

_logger.debug(
"Status:\n%s",
subp.check_output(("git", "status"), cwd=self.base_path).decode(),
)

def test_FilterGitPaths(self):
# type: (...) -> Any
self.assertTrue(isGitRepo(Path(self.base_path)))

result = list(
filterGitIgnoredPaths(Path(self.base_path), (Path(x) for x in self.paths))
)

_logger.info("Result: %s", result)

self.assertCountEqual(
result,
(
Path(x)
for x in (
self.join("regular_file"),
self.join("untracked_file"),
self.out_of_repo,
)
),
)

def test_FilterGitPathsOutOfGitRepo(self):
# type: (...) -> Any
"""
If the base path is not a Git repo, filterGitIgnoredPaths should return
all paths
"""
base_path = mkdtemp(prefix=__name__ + "_")
self.assertFalse(isGitRepo(Path(base_path)))

result = list(
filterGitIgnoredPaths(Path(base_path), (Path(x) for x in self.paths))
)

_logger.info("Result: %s", result)

self.assertCountEqual(result, (Path(x) for x in self.paths))

0 comments on commit 4590f00

Please sign in to comment.