Skip to content

Commit

Permalink
--include option
Browse files Browse the repository at this point in the history
  • Loading branch information
forsyth2 committed May 16, 2023
1 parent e75efff commit 94b77b5
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 35 deletions.
8 changes: 7 additions & 1 deletion zstash/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ def setup_create() -> Tuple[str, argparse.Namespace]:
optional: argparse._ArgumentGroup = parser.add_argument_group(
"optional named arguments"
)
optional.add_argument(
"--include",
type=str,
help="comma separated list of file patterns to include",
default="*",
)
optional.add_argument(
"--exclude", type=str, help="comma separated list of file patterns to exclude"
)
Expand Down Expand Up @@ -237,7 +243,7 @@ def create_database(cache: str, args: argparse.Namespace) -> List[str]:
cur.execute("insert into config values (?,?)", (attr, value))
con.commit()

files: List[str] = get_files_to_archive(cache, args.exclude)
files: List[str] = get_files_to_archive(cache, args.include, args.exclude)

failures: List[str]
if args.follow_symlinks:
Expand Down
8 changes: 7 additions & 1 deletion zstash/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ def setup_update() -> Tuple[argparse.Namespace, str]:
"and NERSC HPSS endpoints, e.g. globus://nersc/~/my_archive."
),
)
optional.add_argument(
"--include",
type=str,
help="comma separated list of file patterns to include",
default="*",
)
optional.add_argument(
"--exclude", type=str, help="comma separated list of file patterns to exclude"
)
Expand Down Expand Up @@ -173,7 +179,7 @@ def update_database( # noqa: C901
logger.debug("Max size : {}".format(maxsize))
logger.debug("Keep local tar files : {}".format(keep))

files: List[str] = get_files_to_archive(cache, args.exclude)
files: List[str] = get_files_to_archive(cache, args.include, args.exclude)

# Eliminate files that are already archived and up to date
newfiles: List[str] = []
Expand Down
70 changes: 37 additions & 33 deletions zstash/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,6 @@
from .settings import TupleTarsRow, config, logger


def exclude_files(exclude: str, files: List[str]) -> List[str]:

# Construct lits of files to exclude, based on
# https://codereview.stackexchange.com/questions/33624/
# filtering-a-long-list-of-files-through-a-set-of-ignore-patterns-using-iterators
exclude_patterns: List[str] = exclude.split(",")

# If exclude pattern ends with a trailing '/', the user intends to exclude
# the entire subdirectory content, therefore replace '/' with '/*'
for i in range(len(exclude_patterns)):
if exclude_patterns[i][-1] == "/":
exclude_patterns[i] += "*"

# Actual files to exclude
exclude_files: List[str] = []
for file_name in files:
if any(fnmatch(file_name, pattern) for pattern in exclude_patterns):
exclude_files.append(file_name)

# Now, remove those files
new_files = [f for f in files if f not in exclude_files]

return new_files


def run_command(command: str, error_str: str):
p1: subprocess.Popen = subprocess.Popen(
shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE
Expand All @@ -55,21 +30,54 @@ def run_command(command: str, error_str: str):
raise RuntimeError(error_str)


def get_files_to_archive(cache: str, exclude: str) -> List[str]:
def get_files_to_archive(cache: str, include: str, exclude: str) -> List[str]:
# List of files
logger.info("Gathering list of files to archive")
# Tuples of the form (path, filename)
file_tuples: List[Tuple[str, str]] = []
# Walk the current directory

# Construct list of files to include, based on
# https://codereview.stackexchange.com/questions/33624/
# filtering-a-long-list-of-files-through-a-set-of-ignore-patterns-using-iterators
include_patterns: List[str] = include.split(",")
exclude_patterns: List[str]
if exclude is None:
exclude_patterns = []
else:
exclude_patterns = exclude.split(",")

# If include pattern ends with a trailing '/', the user intends to include
# the entire subdirectory content, therefore replace '/' with '/*'
for i in range(len(include_patterns)):
if include_patterns[i][-1] == "/":
include_patterns[i] += "*"
# Similar for exclude
for i in range(len(exclude_patterns)):
if exclude_patterns[i][-1] == "/":
exclude_patterns[i] += "*"

for root, dirnames, filenames in os.walk("."):
if not dirnames and not filenames:
# There are no subdirectories nor are there files.
# This directory is empty.
file_tuples.append((root, ""))
for filename in filenames:
# Find everything to include
for include_pattern in include_patterns:
# Loop over files
# filenames is a list, so if it is empty, no looping will occur.
file_tuples.append((root, filename))
for filename in filenames:
if root == os.path.join(".", cache):
match_name = filename
else:
match_name = os.path.normpath(os.path.join(root, filename))
# Should this file be included?
if fnmatch(match_name, include_pattern):
# This file should be included by default,
# but has it been specified to be excluded instead?
# (--exclude overrides --include)
for exclude_pattern in exclude_patterns:
if not fnmatch(match_name, exclude_pattern):
file_tuples.append((root, filename))

# Sort first on directories (x[0])
# Further sort on filenames (x[1])
Expand All @@ -82,10 +90,6 @@ def get_files_to_archive(cache: str, exclude: str) -> List[str]:
if x[0] != os.path.join(".", cache)
]

# Eliminate files based on exclude pattern
if exclude is not None:
files = exclude_files(exclude, files)

return files


Expand Down

0 comments on commit 94b77b5

Please sign in to comment.