Skip to content

Commit

Permalink
FileStore: cleanups and fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
rkingsbury committed Apr 9, 2022
1 parent d23ec2c commit 22fdf26
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 47 deletions.
68 changes: 22 additions & 46 deletions src/maggma/stores/file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"""

import hashlib
import fnmatch
from pathlib import Path
from datetime import datetime, timezone
from typing import Dict, List, Optional, Union
Expand Down Expand Up @@ -36,10 +35,9 @@ class File(BaseModel):

def __init__(self, *args, **kwargs):
"""
Overriding __init__ allows class methods
to function like a default_factory argument to the last_updated and hash
fields. Class methods cannot be used as default_factory methods because
they have not been defined on init.
Overriding __init__ allows class methods to function like a default_factory
argument to various fields. Class methods cannot be used as default_factory
methods because they have not been defined on init.
See https://stackoverflow.com/questions/63051253/using-class-or-static-method-as-default-factory-in-dataclasses, except
post_init is not supported in BaseModel at this time
Expand Down Expand Up @@ -94,30 +92,15 @@ def from_file(cls, path):

class FileStore(JSONStore):
"""
A Store for files on disk. Provides a common access method consistent with other stores.
Each Item is a subdirectory of the Path used to instantiate the Store
that contains one or more files. For example,
<path passed to FileStore.__init__()>
calculation1/
input.in
output.out
logfile.log
calculation2/
input.in
output.out
logfile.log
calculation3/
input.in
output.out
logfile.log
The name of the subdirectory serves as the identifier for
each item, and each item contains a list of File objects which each
corresponds to a single file contained in the subdirectory. So the example
data above would result in 3 unique items with keys 'calculation1',
'calculation2', and 'calculation3'.
A Store for files on disk. Provides a common access method consistent with
other stores. Each Item in the Store represents one file. Files can be organized
into any type of directory structure.
A hash of the full path to each file is used to define a file_id that uniquely
identifies each item.
Any metadata added to the items is written to a .json file in the root directory
of the FileStore.
"""

def __init__(
Expand All @@ -132,21 +115,21 @@ def __init__(
"""
Initializes a FileStore
Args:
path: parent directory containing all files and subdirectories to process
track_files: List of files or fnmatch patterns to be tracked by the FileStore.
path: parent directory containing all files and subdirectories to process
track_files: List of glob patterns defining the files to be tracked by the FileStore.
Only files that match the pattern provided will be included in the
Directory for each directory or monitored for changes. If None
(default), all files are included.
max_depth: The maximum depth to look into subdirectories. 0 = no recursion,
Store or monitored for changes. If None (default), all files are included.
max_depth: The maximum depth to look into subdirectories. 0 = no recursion,
1 = include files 1 directory below the FileStore, etc.
None (default) will scan all files below
the FileStore root directory, regardless of depth.
read_only: If True (default), the .update() and .remove_docs
() methods are disabled, preventing any changes to the files on
read_only: If True (default), the .update() and .remove_docs()
methods are disabled, preventing any changes to the files on
disk. In addition, metadata cannot be written to disk.
json_name: Name of the .json file to which metadata is saved. If read_only
json_name: Name of the .json file to which metadata is saved. If read_only
is False, this file will be created in the root directory of the
FileStore.
kwargs: kwargs passed to JSONStore.__init__()
"""

self.path = Path(path) if isinstance(path, str) else path
Expand All @@ -164,7 +147,7 @@ def __init__(
file_writable=(not self.read_only),
collection_name=self.collection_name,
key=self.key,
**kwargs,
**self.kwargs,
)

@property
Expand All @@ -189,8 +172,7 @@ def read(self) -> List[File]:
if f.name == self.json_name:
continue
# filter based on depth
depth = len(f.relative_to(self.path).parts)-1
print(depth)
depth = len(f.relative_to(self.path).parts) - 1
if self.max_depth is not None and depth <= self.max_depth:
file_list.append(File.from_file(f))

Expand Down Expand Up @@ -225,12 +207,6 @@ def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = No
"This Store is read-only. To enable file I/O, re-initialize the store with read_only=False."
)

# warnings.warn(
# "FileStore does not yet support file I/O. Therefore, adding a document "
# "to the store only affects the underlying MemoryStore and not any "
# "files on disk.",
# UserWarning,
# )
super().update(docs, key)

def remove_docs(self, criteria: Dict):
Expand Down
3 changes: 2 additions & 1 deletion src/maggma/stores/mongolike.py
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,8 @@ def _find_free_port(address="0.0.0.0"):
return s.getsockname()[1] # Return the port number assigned.


# Included for now to make it possible to serialize datetime objects. Probably maggma already has a solution to this somewhere.
# Included for now to make it possible to serialize datetime objects. Probably
# maggma already has a solution to this somewhere.
def json_serial(obj):
"""JSON serializer for objects not serializable by default json code"""

Expand Down

0 comments on commit 22fdf26

Please sign in to comment.