Skip to content

Commit

Permalink
Merge pull request #170 from TheoChem-VU/revert-158-157-improve-pathf…
Browse files Browse the repository at this point in the history
…uncmatch-to-support-matching-of-files

Revert "Added support for matching files"
  • Loading branch information
YHordijk authored Mar 14, 2024
2 parents 33b973f + 75c7920 commit 61876ac
Showing 1 changed file with 35 additions and 39 deletions.
74 changes: 35 additions & 39 deletions src/tcutility/pathfunc.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import os
import re
from typing import Dict, List

from tcutility import results
from typing import List, Dict

j = os.path.join


def split_all(path: str) -> List[str]:
'''
"""
Split a path into all of its parts.
Args:
Expand All @@ -21,7 +22,7 @@ def split_all(path: str) -> List[str]:
>>> split_all('a/b/c/d')
['a', 'b', 'c', 'd']
'''
"""
parts = []
while True:
a, b = os.path.split(path)
Expand All @@ -33,7 +34,7 @@ def split_all(path: str) -> List[str]:


def get_subdirectories(root: str, include_intermediates: bool = False) -> List[str]:
'''
"""
Get all sub-directories of a root directory.
Args:
Expand All @@ -54,33 +55,33 @@ def get_subdirectories(root: str, include_intermediates: bool = False) -> List[s
| |- subsubdir_c
|- subdir_b
|- subdir_c
Then we get the following outputs.
.. tabs::
.. group-tab:: Including intermediates
.. code-block:: python
>>> get_subdirectories('root', include_intermediates=True)
['root',
'root/subdir_a',
['root',
'root/subdir_a',
'root/subdir_a/subsubdir_b',
'root/subdir_a/subsubdir_c',
'root/subdir_b',
'root/subdir_a/subsubdir_c',
'root/subdir_b',
'root/subdir_c']
.. group-tab:: Excluding intermediates
.. code-block:: python
>>> get_subdirectories('root', include_intermediates=False)
['root/subdir_a/subsubdir_b',
'root/subdir_a/subsubdir_c',
'root/subdir_b',
['root/subdir_a/subsubdir_b',
'root/subdir_a/subsubdir_c',
'root/subdir_b',
'root/subdir_c']
'''
"""
dirs = [root]
subdirs = set()

Expand All @@ -100,8 +101,8 @@ def get_subdirectories(root: str, include_intermediates: bool = False) -> List[s
return subdirs


def match(root: str, pattern: str, match_files: bool = True) -> Dict[str, dict]:
'''
def match(root: str, pattern: str) -> Dict[str, dict]:
"""
Find and return information about subdirectories of a root that match a given pattern.
Args:
Expand All @@ -110,17 +111,16 @@ def match(root: str, pattern: str, match_files: bool = True) -> Dict[str, dict]:
It should look similar to a format string, without the ``f`` in front of the string.
Inside curly braces you can put a variable name, which you can later extract from the results.
Anything inside curly braces will be matched to word characters (``[a-zA-Z0-9_-]``) including dashes and underscores.
match_files: whether to not only match subdirectories but also files inside subdirectories. Defaults to True.
Returns:
| A |Result| object containing the matched directories as keys and information (also |Result| object) about those matches as the values.
Each information dictionary contains the variables given in the pattern.
| E.g. using a pattern such as ``{a}/{b}/{c}`` will populate the ``info.a``, ``info.b`` and ``info.c`` keys of the info |Result| object.
Example:
Given a file-structure as follows:
.. code-block::
.. code-block::
root
|- NH3-BH3
Expand All @@ -141,13 +141,13 @@ def match(root: str, pattern: str, match_files: bool = True) -> Dict[str, dict]:
We can run the following scripts to match the subdirectories.
.. code-block:: python
from tcutility import log
# get the matches, we want to extract the system name (NH3-BH3 or SN2)
# get the matches, we want to extract the system name (NH3-BH3 or SN2)
# and the functional and basis-set
# we don't want the subdirectories
matches = match('root', '{system}/{functional}_{basis_set}')
# print the matches as a table
rows = []
for d, info in matches.items():
Expand All @@ -157,35 +157,31 @@ def match(root: str, pattern: str, match_files: bool = True) -> Dict[str, dict]:
which prints
.. code-block::
.. code-block::
[2024/01/17 14:39:08] Directory System Functional Basis-Set
[2024/01/17 14:39:08] ───────────────────────────────────────────────────────────
[2024/01/17 14:39:08] root/SN2/M06-2X_TZ2P SN2 M06-2X TZ2P
[2024/01/17 14:39:08] root/NH3-BH3/BLYP_TZ2P NH3-BH3 BLYP TZ2P
[2024/01/17 14:39:08] root/NH3-BH3/M06-2X_TZ2P NH3-BH3 M06-2X TZ2P
[2024/01/17 14:39:08] root/SN2/BLYP_TZ2P SN2 BLYP TZ2P
[2024/01/17 14:39:08] root/NH3-BH3/BLYP_QZ4P NH3-BH3 BLYP QZ4P
'''
[2024/01/17 14:39:08] root/SN2/M06-2X_TZ2P SN2 M06-2X TZ2P
[2024/01/17 14:39:08] root/NH3-BH3/BLYP_TZ2P NH3-BH3 BLYP TZ2P
[2024/01/17 14:39:08] root/NH3-BH3/M06-2X_TZ2P NH3-BH3 M06-2X TZ2P
[2024/01/17 14:39:08] root/SN2/BLYP_TZ2P SN2 BLYP TZ2P
[2024/01/17 14:39:08] root/NH3-BH3/BLYP_QZ4P NH3-BH3 BLYP QZ4P
"""
# get the number and names of substitutions in the given pattern
substitutions = re.findall(r'{(\w+[+*?]?)}', pattern)
substitutions = re.findall(r"{(\w+[+*?]?)}", pattern)
# the pattern should resolve to words and may contain - and _
# replace them here
for sub in substitutions:
quantifier = sub[-1] if sub[-1] in '+*?' else '+'
pattern = pattern.replace('{' + sub + '}', f'([a-zA-Z0-9._-]{quantifier})')
quantifier = sub[-1] if sub[-1] in "+*?" else "+"
pattern = pattern.replace("{" + sub + "}", f"([a-zA-Z0-9_-]{quantifier})")

ret = results.Result()
# root dir can be any level deep. We should count how many directories are in root
root_length = len(split_all(root))
# get all subdirectories first, we can loop through them later
subdirs = get_subdirectories(root, include_intermediates=True)
if match_files:
_subdirs = []
for subdir in subdirs:
_subdirs.extend([j(subdir, file) for file in os.listdir(subdir)])
subdirs = _subdirs

# remove the root from the subdirectories. We cannot use str.removeprefix because it was added in python 3.9
subdirs = [j(*split_all(subdir)[root_length:]) for subdir in subdirs if len(split_all(subdir)[root_length:]) > 0]
for subdir in subdirs:
Expand All @@ -196,6 +192,6 @@ def match(root: str, pattern: str, match_files: bool = True) -> Dict[str, dict]:

p = j(root, subdir)
# get the group data and add it to the return dictionary. We skip the first group because it is the full directory path
ret[p] = results.Result(directory=p, **{substitutions[i]: match.group(i+1) for i in range(len(substitutions))})
ret[p] = results.Result(directory=p, **{substitutions[i]: match.group(i + 1) for i in range(len(substitutions))})

return ret

0 comments on commit 61876ac

Please sign in to comment.