From 75c7920e9f6ec385caca92371a561578958ebf0e Mon Sep 17 00:00:00 2001 From: Yuman Hordijk <42876712+YHordijk@users.noreply.github.com> Date: Thu, 14 Mar 2024 23:18:57 +0100 Subject: [PATCH] Revert "Added support for matching files" --- src/tcutility/pathfunc.py | 74 ++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/src/tcutility/pathfunc.py b/src/tcutility/pathfunc.py index f4f51d8d..c674e9d9 100644 --- a/src/tcutility/pathfunc.py +++ b/src/tcutility/pathfunc.py @@ -1,13 +1,14 @@ import os import re +from typing import Dict, List + from tcutility import results -from typing import List, Dict j = os.path.join def split_all(path: str) -> List[str]: - ''' + """ Split a path into all of its parts. Args: @@ -21,7 +22,7 @@ def split_all(path: str) -> List[str]: >>> split_all('a/b/c/d') ['a', 'b', 'c', 'd'] - ''' + """ parts = [] while True: a, b = os.path.split(path) @@ -33,7 +34,7 @@ def split_all(path: str) -> List[str]: def get_subdirectories(root: str, include_intermediates: bool = False) -> List[str]: - ''' + """ Get all sub-directories of a root directory. Args: @@ -54,21 +55,21 @@ def get_subdirectories(root: str, include_intermediates: bool = False) -> List[s | |- subsubdir_c |- subdir_b |- subdir_c - + Then we get the following outputs. .. tabs:: - + .. group-tab:: Including intermediates .. code-block:: python >>> get_subdirectories('root', include_intermediates=True) - ['root', - 'root/subdir_a', + ['root', + 'root/subdir_a', 'root/subdir_a/subsubdir_b', - 'root/subdir_a/subsubdir_c', - 'root/subdir_b', + 'root/subdir_a/subsubdir_c', + 'root/subdir_b', 'root/subdir_c'] .. group-tab:: Excluding intermediates @@ -76,11 +77,11 @@ def get_subdirectories(root: str, include_intermediates: bool = False) -> List[s .. code-block:: python >>> get_subdirectories('root', include_intermediates=False) - ['root/subdir_a/subsubdir_b', - 'root/subdir_a/subsubdir_c', - 'root/subdir_b', + ['root/subdir_a/subsubdir_b', + 'root/subdir_a/subsubdir_c', + 'root/subdir_b', 'root/subdir_c'] - ''' + """ dirs = [root] subdirs = set() @@ -100,8 +101,8 @@ def get_subdirectories(root: str, include_intermediates: bool = False) -> List[s return subdirs -def match(root: str, pattern: str, match_files: bool = True) -> Dict[str, dict]: - ''' +def match(root: str, pattern: str) -> Dict[str, dict]: + """ Find and return information about subdirectories of a root that match a given pattern. Args: @@ -110,17 +111,16 @@ def match(root: str, pattern: str, match_files: bool = True) -> Dict[str, dict]: It should look similar to a format string, without the ``f`` in front of the string. Inside curly braces you can put a variable name, which you can later extract from the results. Anything inside curly braces will be matched to word characters (``[a-zA-Z0-9_-]``) including dashes and underscores. - match_files: whether to not only match subdirectories but also files inside subdirectories. Defaults to True. Returns: | A |Result| object containing the matched directories as keys and information (also |Result| object) about those matches as the values. Each information dictionary contains the variables given in the pattern. | E.g. using a pattern such as ``{a}/{b}/{c}`` will populate the ``info.a``, ``info.b`` and ``info.c`` keys of the info |Result| object. - + Example: Given a file-structure as follows: - .. code-block:: + .. code-block:: root |- NH3-BH3 @@ -141,13 +141,13 @@ def match(root: str, pattern: str, match_files: bool = True) -> Dict[str, dict]: We can run the following scripts to match the subdirectories. .. code-block:: python - + from tcutility import log - # get the matches, we want to extract the system name (NH3-BH3 or SN2) + # get the matches, we want to extract the system name (NH3-BH3 or SN2) # and the functional and basis-set # we don't want the subdirectories matches = match('root', '{system}/{functional}_{basis_set}') - + # print the matches as a table rows = [] for d, info in matches.items(): @@ -157,35 +157,31 @@ def match(root: str, pattern: str, match_files: bool = True) -> Dict[str, dict]: which prints - .. code-block:: + .. code-block:: [2024/01/17 14:39:08] Directory System Functional Basis-Set [2024/01/17 14:39:08] ─────────────────────────────────────────────────────────── - [2024/01/17 14:39:08] root/SN2/M06-2X_TZ2P SN2 M06-2X TZ2P - [2024/01/17 14:39:08] root/NH3-BH3/BLYP_TZ2P NH3-BH3 BLYP TZ2P - [2024/01/17 14:39:08] root/NH3-BH3/M06-2X_TZ2P NH3-BH3 M06-2X TZ2P - [2024/01/17 14:39:08] root/SN2/BLYP_TZ2P SN2 BLYP TZ2P - [2024/01/17 14:39:08] root/NH3-BH3/BLYP_QZ4P NH3-BH3 BLYP QZ4P - ''' + [2024/01/17 14:39:08] root/SN2/M06-2X_TZ2P SN2 M06-2X TZ2P + [2024/01/17 14:39:08] root/NH3-BH3/BLYP_TZ2P NH3-BH3 BLYP TZ2P + [2024/01/17 14:39:08] root/NH3-BH3/M06-2X_TZ2P NH3-BH3 M06-2X TZ2P + [2024/01/17 14:39:08] root/SN2/BLYP_TZ2P SN2 BLYP TZ2P + [2024/01/17 14:39:08] root/NH3-BH3/BLYP_QZ4P NH3-BH3 BLYP QZ4P + + + """ # get the number and names of substitutions in the given pattern - substitutions = re.findall(r'{(\w+[+*?]?)}', pattern) + substitutions = re.findall(r"{(\w+[+*?]?)}", pattern) # the pattern should resolve to words and may contain - and _ # replace them here for sub in substitutions: - quantifier = sub[-1] if sub[-1] in '+*?' else '+' - pattern = pattern.replace('{' + sub + '}', f'([a-zA-Z0-9._-]{quantifier})') + quantifier = sub[-1] if sub[-1] in "+*?" else "+" + pattern = pattern.replace("{" + sub + "}", f"([a-zA-Z0-9_-]{quantifier})") ret = results.Result() # root dir can be any level deep. We should count how many directories are in root root_length = len(split_all(root)) # get all subdirectories first, we can loop through them later subdirs = get_subdirectories(root, include_intermediates=True) - if match_files: - _subdirs = [] - for subdir in subdirs: - _subdirs.extend([j(subdir, file) for file in os.listdir(subdir)]) - subdirs = _subdirs - # remove the root from the subdirectories. We cannot use str.removeprefix because it was added in python 3.9 subdirs = [j(*split_all(subdir)[root_length:]) for subdir in subdirs if len(split_all(subdir)[root_length:]) > 0] for subdir in subdirs: @@ -196,6 +192,6 @@ def match(root: str, pattern: str, match_files: bool = True) -> Dict[str, dict]: p = j(root, subdir) # get the group data and add it to the return dictionary. We skip the first group because it is the full directory path - ret[p] = results.Result(directory=p, **{substitutions[i]: match.group(i+1) for i in range(len(substitutions))}) + ret[p] = results.Result(directory=p, **{substitutions[i]: match.group(i + 1) for i in range(len(substitutions))}) return ret