From 22fdf26d3cb95f42874371f8e630e23849e4274b Mon Sep 17 00:00:00 2001 From: Ryan Kingsbury Date: Fri, 8 Apr 2022 22:56:00 -0700 Subject: [PATCH] FileStore: cleanups and fixes --- src/maggma/stores/file_store.py | 68 +++++++++++---------------------- src/maggma/stores/mongolike.py | 3 +- 2 files changed, 24 insertions(+), 47 deletions(-) diff --git a/src/maggma/stores/file_store.py b/src/maggma/stores/file_store.py index 6b30b2f55..8437961bb 100644 --- a/src/maggma/stores/file_store.py +++ b/src/maggma/stores/file_store.py @@ -5,7 +5,6 @@ """ import hashlib -import fnmatch from pathlib import Path from datetime import datetime, timezone from typing import Dict, List, Optional, Union @@ -36,10 +35,9 @@ class File(BaseModel): def __init__(self, *args, **kwargs): """ - Overriding __init__ allows class methods - to function like a default_factory argument to the last_updated and hash - fields. Class methods cannot be used as default_factory methods because - they have not been defined on init. + Overriding __init__ allows class methods to function like a default_factory + argument to various fields. Class methods cannot be used as default_factory + methods because they have not been defined on init. See https://stackoverflow.com/questions/63051253/using-class-or-static-method-as-default-factory-in-dataclasses, except post_init is not supported in BaseModel at this time @@ -94,30 +92,15 @@ def from_file(cls, path): class FileStore(JSONStore): """ - A Store for files on disk. Provides a common access method consistent with other stores. - - Each Item is a subdirectory of the Path used to instantiate the Store - that contains one or more files. For example, - - - calculation1/ - input.in - output.out - logfile.log - calculation2/ - input.in - output.out - logfile.log - calculation3/ - input.in - output.out - logfile.log - - The name of the subdirectory serves as the identifier for - each item, and each item contains a list of File objects which each - corresponds to a single file contained in the subdirectory. So the example - data above would result in 3 unique items with keys 'calculation1', - 'calculation2', and 'calculation3'. + A Store for files on disk. Provides a common access method consistent with + other stores. Each Item in the Store represents one file. Files can be organized + into any type of directory structure. + + A hash of the full path to each file is used to define a file_id that uniquely + identifies each item. + + Any metadata added to the items is written to a .json file in the root directory + of the FileStore. """ def __init__( @@ -132,21 +115,21 @@ def __init__( """ Initializes a FileStore Args: - path: parent directory containing all files and subdirectories to process - track_files: List of files or fnmatch patterns to be tracked by the FileStore. + path: parent directory containing all files and subdirectories to process + track_files: List of glob patterns defining the files to be tracked by the FileStore. Only files that match the pattern provided will be included in the - Directory for each directory or monitored for changes. If None - (default), all files are included. - max_depth: The maximum depth to look into subdirectories. 0 = no recursion, + Store or monitored for changes. If None (default), all files are included. + max_depth: The maximum depth to look into subdirectories. 0 = no recursion, 1 = include files 1 directory below the FileStore, etc. None (default) will scan all files below the FileStore root directory, regardless of depth. - read_only: If True (default), the .update() and .remove_docs - () methods are disabled, preventing any changes to the files on + read_only: If True (default), the .update() and .remove_docs() + methods are disabled, preventing any changes to the files on disk. In addition, metadata cannot be written to disk. - json_name: Name of the .json file to which metadata is saved. If read_only + json_name: Name of the .json file to which metadata is saved. If read_only is False, this file will be created in the root directory of the FileStore. + kwargs: kwargs passed to JSONStore.__init__() """ self.path = Path(path) if isinstance(path, str) else path @@ -164,7 +147,7 @@ def __init__( file_writable=(not self.read_only), collection_name=self.collection_name, key=self.key, - **kwargs, + **self.kwargs, ) @property @@ -189,8 +172,7 @@ def read(self) -> List[File]: if f.name == self.json_name: continue # filter based on depth - depth = len(f.relative_to(self.path).parts)-1 - print(depth) + depth = len(f.relative_to(self.path).parts) - 1 if self.max_depth is not None and depth <= self.max_depth: file_list.append(File.from_file(f)) @@ -225,12 +207,6 @@ def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = No "This Store is read-only. To enable file I/O, re-initialize the store with read_only=False." ) - # warnings.warn( - # "FileStore does not yet support file I/O. Therefore, adding a document " - # "to the store only affects the underlying MemoryStore and not any " - # "files on disk.", - # UserWarning, - # ) super().update(docs, key) def remove_docs(self, criteria: Dict): diff --git a/src/maggma/stores/mongolike.py b/src/maggma/stores/mongolike.py index 8d4f37af8..0e9eca663 100644 --- a/src/maggma/stores/mongolike.py +++ b/src/maggma/stores/mongolike.py @@ -895,7 +895,8 @@ def _find_free_port(address="0.0.0.0"): return s.getsockname()[1] # Return the port number assigned. -# Included for now to make it possible to serialize datetime objects. Probably maggma already has a solution to this somewhere. +# Included for now to make it possible to serialize datetime objects. Probably +# maggma already has a solution to this somewhere. def json_serial(obj): """JSON serializer for objects not serializable by default json code"""