Skip to content

Commit

Permalink
Filtering: Include everything by default
Browse files Browse the repository at this point in the history
Co-authored-by: Thorsten Beier <[email protected]>
  • Loading branch information
martinRenou and DerThorsten committed Jun 17, 2024
1 parent 12a64fc commit bd5a4a8
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 247 deletions.
7 changes: 3 additions & 4 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@ jobs:
with:
fetch-depth: 0

- name: Install mamba and dependencies
uses: mamba-org/provision-with-micromamba@main
- uses: mamba-org/setup-micromamba@v1
with:
environment-file: ci_env.yml
environment-name: ci-env
micromamba-version: '1.4.1'
cache-environment: true


- name: Lint check
shell: bash -l {0}
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,10 @@ jobs:
with:
fetch-depth: 0

- name: Install mamba and dependencies
uses: mamba-org/provision-with-micromamba@main
- uses: mamba-org/setup-micromamba@v1
with:
environment-file: ci_env.yml
environment-name: ci-env
micromamba-version: '1.4.1'
cache-environment: true

- name: Install empack
shell: bash -l {0}
Expand Down
202 changes: 29 additions & 173 deletions config/empack_config.yaml
Original file line number Diff line number Diff line change
@@ -1,183 +1,39 @@
packages:
setuptools:
include_patterns:
- pattern: '**/pkg_resources/*.so'
- pattern: '**/pkg_resources/**/*.so'
- pattern: '**/pkg_resources/*.py'
- pattern: '**/pkg_resources/**/*.py'
- pattern: '**/*.dist-info/METADATA'
bokeh:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '**/bokeh/**/*.html'
- pattern: '**/bokeh/**/*.js'
- pattern: '**/bokeh/*.json'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
plotly:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '**/plotly/package_data/**'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
folium:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '*.json'
- pattern: '**/folium/templates/**'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
branca:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '*.json'
- pattern: '**/branca/templates/**'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
python-dateutil:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '*.json'
- pattern: '*dateutil-zoneinfo.tar.gz'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
# For matplotlib <3.5.2 which didn't have matplotlib-base published
matplotlib:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '*.json'
- pattern: '**/matplotlib/mpl-data/**'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
matplotlib-base:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '*.json'
- pattern: '**/matplotlib/mpl-data/**'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
scikit-learn:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '*.json'
- pattern: '**/sklearn/datasets/**'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
scikit-image:
include_patterns:
- pattern: '*.txt'
- pattern: '*.so'
- pattern: '*.py'
- pattern: '*.json'
- pattern: '**/skimage/data/**'
- pattern: '**/skimage/io/_plugins/*.ini'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
xarray:
include_patterns:
- pattern: '**/*.py'
- pattern: '**/static/css/*.css'
- pattern: '**/static/html/*.html'
- pattern: '**/*.dist-info/METADATA'
- pattern: '**/_distutils_hack/**'
- pattern: '**/distutils-precedence.pth'
- pattern: '**/setuptools/**'
- pattern: '**/pkg_resources/_vendor/**'
zlib:
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/so'
widgetsnbextension:
exclude_patterns:
- pattern: '**'
python:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '*.json'
- pattern: 'share/zoneinfo/**'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
- pattern: '**/tests/*.py'
- pattern: '**/tests/*.so'
python-tzdata:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '*.json'
- pattern: 'share/zoneinfo/**'
- pattern: '**/tzdata/zoneinfo/**'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
- pattern: '**/tests/*.py'
- pattern: '**/tests/*.so'
- pattern: '**/tzdata/zoneinfo/**/*.pyc'
zlib:
exclude_patterns:
- pattern: '**/so'
itables:
include_patterns:
- pattern: '*.py'
- pattern: 'html/**'
- pattern: 'external/**'
- pattern: '**/*.dist-info/METADATA'
urllib3:
include_patterns:
- pattern: '*.py'
- pattern: '**/contrib/emscripten/emscripten_fetch_worker.js'
certifi:
include_patterns:
- pattern: '*.py'
- pattern: '*.pem'
pyvis:
include_patterns:
- pattern: '*.py'
- pattern: '*.html'
pint:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '*.txt'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
default:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
- pattern: '*.json'
- pattern: 'share/zoneinfo/**'
- pattern: '**/*.dist-info/METADATA'
exclude_patterns:
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
- pattern: '**/tests/*.py'
- pattern: '**/tests/*.so'
- pattern: 'share/jupyter/nbextensions/**/*'
- pattern: 'share/jupyter/labextensions/**/*'
- pattern: 'etc/jupyter/nbconfig/**/*'
- pattern: 'include/**'
- pattern: '**/include/**'
- pattern: 'bin/**'
- pattern: '**/bin/**'
- pattern: 'tests/**'
- pattern: '**/tests/**'
- pattern: '**/*.ini'
- pattern: '**/*.exe'
- pattern: '**/*.a'
- pattern: '**/*.c'
- pattern: '**/*.pxd'
- pattern: '**/*.pyi'
- pattern: '**/*.pyx'
- pattern: '**/*.pyc'
- pattern: '**/*.typed'
- pattern: '**/*.egg-info'
- pattern: '**/site-packages/pip/_vendor/**'
- pattern: '**/idlelib/**'
- pattern: '**/ensurepip/**'
- pattern: 'Scripts/*'
# Excluding jupyter front-end and server extensions
- pattern: 'share/jupyter/nbextensions/**'
- pattern: 'share/jupyter/labextensions/**'
- pattern: 'etc/jupyter/nbconfig/**'
32 changes: 15 additions & 17 deletions empack/file_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def match(self, path):


class FileFilter:
def __init__(self, include_patterns=None, exclude_patterns=None):
def __init__(self, exclude_patterns=None):
def patter_from_dict(**d):
if "pattern" in d:
return UnixPattern(**d)
Expand All @@ -30,33 +30,31 @@ def patter_from_dict(**d):
else:
raise ValueError("pattern or regex must be provided")

if include_patterns is None:
include_patterns = []
if exclude_patterns is None:
exclude_patterns = []
self.include_patterns = [patter_from_dict(**p) for p in include_patterns]
self.exclude_patterns = [patter_from_dict(**p) for p in exclude_patterns]

def match(self, path):
include = False
for ip in self.include_patterns:
if ip.match(path):
include = True
if include:
for ep in self.exclude_patterns:
if ep.match(path):
return False
return include
return all(not ep.match(path) for ep in self.exclude_patterns)


class PkgFileFilter:
def __init__(self, packages, default=None):
def __init__(self, packages=None, default=None):
self.packages = {}

if packages is None:
packages = {}

default_exclude_patterns = []
if default is not None and "exclude_patterns" in default:
default_exclude_patterns = default["exclude_patterns"]

for k, v in packages.items():
if isinstance(v, dict):
self.packages[k] = FileFilter(**v)
elif isinstance(v, list):
self.packages[k] = [FileFilter(**x) for x in v]
exclude_patterns = default_exclude_patterns
if "exclude_patterns" in v:
exclude_patterns = exclude_patterns + v["exclude_patterns"]
self.packages[k] = FileFilter(exclude_patterns=exclude_patterns)
else:
err = f"invalid value for package {k}: {v}"
raise ValueError(err)
Expand Down
8 changes: 6 additions & 2 deletions empack/filter_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,18 @@ def filter_pkg(env_prefix, pkg_meta, target_dir, matchers):
for _i, matcher in enumerate(matchers):
include = matcher.match(path=file)
if include:
included.append(file)
path = env_path / file
if path.is_symlink() and not path.exists():
continue

dest_fpath = os.path.join(target_dir, file)
os.makedirs(os.path.dirname(dest_fpath), exist_ok=True)
shutil.copy(os.path.join(env_prefix, file), dest_fpath)
try:
shutil.copy(os.path.join(env_prefix, file), dest_fpath)
included.append(file)
except FileNotFoundError:
# This may happen when following a symlink on a filtered out file
pass
break
path = write_minimal_conda_meta(pkg_meta=pkg_meta, env_prefix=target_dir)
included.append(path.relative_to(target_dir))
Expand Down
24 changes: 1 addition & 23 deletions tests/empack_test_config.yaml
Original file line number Diff line number Diff line change
@@ -1,27 +1,5 @@
packages:
python-dateutil:
include_patterns:
- regex: '^(?!.*\/tests\/).*(.*.\.py$)|(.*.\.so$)|(.*dateutil-zoneinfo\.tar\.gz$)'
matplotlib:
include_patterns:
- regex: '^(?!.*\/tests\/).*(.*.\.py$)|(.*.\.so$)'
- pattern: "*matplotlibrc"

scikit-learn:
include_patterns:
- regex: '^(?!.*\/tests\/).*(.*.\.py$)|(.*.\.so$)'
- pattern: "**/sklearn/datasets/**"

scikit-image:
include_patterns:
- regex: '^(?!.*\/tests\/).*(.*.\.py$)|(.*.\.so$)'
- pattern: "**/skimage/data/**"


default:
include_patterns:
- pattern: '*.so'
- pattern: '*.py'
exclude_patterns:
- pattern: '**/*.pyc'
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
16 changes: 7 additions & 9 deletions tests/empack_test_extra_config.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
packages:
scikit-image:
exclude_patterns:
- pattern: '**'
- pattern: '**/bar.py'
- pattern: '**/bar.so'


scipy:
- include_patterns:
- pattern: '**/core'
- include_patterns:
- pattern: '**/sparse'
- include_patterns:
- pattern: '**'
default:
exclude_patterns:
- pattern: '**/*.pyc'
- pattern: '**/tests/**/*.py'
- pattern: '**/tests/**/*.so'
Loading

0 comments on commit bd5a4a8

Please sign in to comment.