Skip to content

Commit

Permalink
FileStore: enable max_depth and cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
rkingsbury committed Apr 9, 2022
1 parent 6499aeb commit d23ec2c
Showing 1 changed file with 19 additions and 22 deletions.
41 changes: 19 additions & 22 deletions src/maggma/stores/file_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,17 @@
using typical maggma access patterns.
"""

import warnings
import os
import json
import hashlib
import fnmatch
from pathlib import Path, PosixPath
from pathlib import Path
from datetime import datetime, timezone
from typing import Dict, List, Optional, Union

from pydantic import BaseModel, Field

from monty.io import zopen
from maggma.core import Sort, StoreError
from maggma.stores.mongolike import MemoryStore, JSONStore, json_serial
from maggma.core import StoreError
from maggma.stores.mongolike import JSONStore, json_serial


class File(BaseModel):
Expand Down Expand Up @@ -143,7 +140,7 @@ def __init__(
max_depth: The maximum depth to look into subdirectories. 0 = no recursion,
1 = include files 1 directory below the FileStore, etc.
None (default) will scan all files below
the FileStore root directory, regardless of depth.
the FileStore root directory, regardless of depth.
read_only: If True (default), the .update() and .remove_docs
() methods are disabled, preventing any changes to the files on
disk. In addition, metadata cannot be written to disk.
Expand All @@ -156,10 +153,11 @@ def __init__(
self.json_name = json_name
self.paths = [str(self.path / self.json_name)]
self.track_files = track_files if track_files else ["*"]
self.kwargs = kwargs
self.collection_name = "file_store"
self.key = "file_id"
self.read_only = read_only
self.max_depth = max_depth
self.kwargs = kwargs

super().__init__(
paths=self.paths,
Expand All @@ -182,13 +180,19 @@ def read(self) -> List[File]:
the Store with File objects.
"""
file_list = []
# generate a list of subdirectories
for f in [f for f in self.path.rglob("*")]:
if f.is_file():
if f.name == self.json_name:
continue
elif any([fnmatch.fnmatch(f.name, fn) for fn in self.track_files]):
file_list.append(File.from_file(f))
# generate a list of files in subdirectories
for pattern in self.track_files:
# list every file that matches the pattern
for f in self.path.rglob(pattern):
if f.is_file():
# ignore the .json file created by the Store
if f.name == self.json_name:
continue
# filter based on depth
depth = len(f.relative_to(self.path).parts)-1
print(depth)
if self.max_depth is not None and depth <= self.max_depth:
file_list.append(File.from_file(f))

return file_list

Expand All @@ -205,13 +209,6 @@ def connect(self, force_reset: bool = False):
super().connect()
super().update([k.dict() for k in self.read()], key=self.key)

def close(self):
"""
Closes any connections
"""
# write out metadata and close the file handles
super().close()

def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None):
"""
Update items (directories) in the Store
Expand Down

0 comments on commit d23ec2c

Please sign in to comment.