Skip to content

Commit

Permalink
ENH: make memoize_path not memoize if mtime is too recent
Browse files Browse the repository at this point in the history
Also includes some changes toward introducing additional kwargs to add
into signature (WiP
  • Loading branch information
yarikoptic committed Mar 9, 2020
1 parent 301d421 commit 7ac63bd
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 9 deletions.
38 changes: 35 additions & 3 deletions dandi/support/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import joblib
import os
import os.path as op
import time
from functools import wraps

from .. import get_logger
Expand All @@ -13,10 +14,24 @@ class PersistentCache(object):
"""Persistent cache providing @memoize and @memoize_path decorators
"""

def __init__(self, name=None):
_min_dtime = 0.01 # min difference between now and mtime to consider
# for caching

def __init__(self, name=None, more_tokens=None):
"""
Parameters
----------
name
more_tokens: list of objects, optional
To add to the fingerprint of @memoize_path (regular @memoize ATM does
not use it). Could be e.g. versions of relevant/used
python modules (pynwb, etc)
"""
dirs = appdirs.AppDirs("dandi")
self._cache_file = op.join(dirs.user_cache_dir, (name or "cache") + ".dat")
self._memory = joblib.Memory(self._cache_file, verbose=0)
self._more_tokens = more_tokens

def clear(self):
self._memory.clear(warn=False)
Expand All @@ -37,19 +52,36 @@ def memoize_path(self, f):
@self.memoize
def fingerprinted(path, *args, **kwargs):
_ = kwargs.pop(fingerprint_kwarg) # discard
lgr.debug("Running original %s on %r", f, path)
return f(path, *args, **kwargs)

@wraps(f)
def fingerprinter(path, *args, **kwargs):
fprint = self._get_file_fingerprint(path)
# We should still pass through if file was modified just now,
# since that could mask out quick modifications.
# Target use cases will not be like that.
time_now = time.time()
dtime = abs(time_now - fprint[0] * 1e-9) if fprint else None
if fprint is None:
lgr.debug("Calling %s directly since no fingerprint for %r", f, path)
# just call the function -- we have no fingerprint,
# probably does not exist or permissions are wrong
return f(path, *args, **kwargs)
elif dtime is not None and dtime < self._min_dtime:
lgr.debug(
"Calling %s directly since too short (%f) for %r", f, dtime, path
)
return f(path, *args, **kwargs)
else:
lgr.debug("Calling memoized version of %s for %s", f, path)
# If there is a fingerprint -- inject it into the signature
kwargs_ = kwargs.copy()
kwargs_[fingerprint_kwarg] = fprint
kwargs_[
fingerprint_kwarg
] = (
fprint
) # tuple(fprint) + tuple(self._more_tokens) if self._more_tokens)
return fingerprinted(path, *args, **kwargs_)

# and we memoize actually that function
Expand All @@ -63,6 +95,6 @@ def _get_file_fingerprint(path):
# we can't take everything, since atime can change, etc.
# So let's take some
s = os.stat(path, follow_symlinks=True)
return s.st_ctime_ns, s.st_mtime_ns, s.st_size
return s.st_mtime_ns, s.st_ctime_ns, s.st_size
except Exception as exc:
lgr.debug(f"Cannot fingerptint {path}: {exc}")
24 changes: 18 additions & 6 deletions dandi/support/tests/test_cache.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os.path as op
import random
import sys
import time

from dandi.support.cache import PersistentCache

Expand Down Expand Up @@ -72,28 +73,39 @@ def memoread(path, arg, kwarg=None):

assert memoread(path, 0) == "content"
assert len(calls) == 3
# unless this computer is too slow -- there should be less than
# cache._min_dtime between our creating the file and testing,
# so we would force a direct read:
assert memoread(path, 0) == "content"
assert len(calls) == 3
assert len(calls) == 4
assert calls[-1] == [path, 0, None]
# but if we sleep - should memoize
time.sleep(cache._min_dtime * 1.1)

assert memoread(path, 1) == "content"
assert len(calls) == 4
assert len(calls) == 5
assert memoread(path, 1) == "content"
assert len(calls) == 5

# and if we modify the file -- a new read
time.sleep(cache._min_dtime * 1.1)
with open(path, "w") as f:
f.write("Content")
assert memoread(path, 1) == "Content"
assert len(calls) == 5
assert len(calls) == 6
time.sleep(cache._min_dtime * 1.1)
assert memoread(path, 1) == "Content"
assert len(calls) == 5
assert len(calls) == 7
assert memoread(path, 1) == "Content"
assert len(calls) == 7

# and if we "clear", would it still work?
cache.clear()

assert memoread(path, 1) == "Content"
assert len(calls) == 6
assert len(calls) == 8
assert memoread(path, 1) == "Content"
assert len(calls) == 6
assert len(calls) == 8


def test_memoize_path_persist():
Expand Down

0 comments on commit 7ac63bd

Please sign in to comment.