Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid eviction on entry replacement #204

Merged
merged 1 commit into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ select = ["F", "E", "W", "I", "RET", "RUF", "PT"]
ignore = [
"RET505", # elif after return
"E501", # line too long, formatter should take care of the fixable ones
"E721", # I'll compare types with `is` if I want
]

[tool.ruff.lint.isort]
Expand Down
43 changes: 19 additions & 24 deletions src/ua_parser/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@


class Cache(Protocol):
"""Cache abstract protocol. The :class:`CachingResolver` will look
"""Cache()

Cache abstract protocol. The :class:`CachingResolver` will look
values up, merge what was returned (possibly nothing) with what it
got from its actual parser, and *re-set the result*.

A :class:`Cache` is responsible for its own replacement policy.
"""

@abc.abstractmethod
Expand All @@ -55,12 +55,6 @@ class Lru:
full of popular items then all of them being replaced at once:
:class:`S3Fifo` and :class:`Sieve` are FIFO-based caches and have
worst-case O(n) eviction.


.. note:: The cache size is adjusted after inserting the new
entry, so the cache will temporarily contain ``maxsize +
1`` items.

"""

def __init__(self, maxsize: int):
Expand All @@ -77,10 +71,9 @@ def __getitem__(self, key: str) -> Optional[PartialResult]:

def __setitem__(self, key: str, value: PartialResult) -> None:
with self.lock:
self.cache[key] = value
self.cache.move_to_end(key)
while len(self.cache) > self.maxsize:
if len(self.cache) >= self.maxsize and key not in self.cache:
self.cache.popitem(last=False)
self.cache[key] = value


@dataclasses.dataclass
Expand All @@ -92,14 +85,12 @@ class CacheEntry:


class S3Fifo:
"""FIFO-based quick-demotion lazy-promotion cache by Yang, Zhang,
Qiu, Yue, Vinayak.
"""FIFO-based quick-demotion lazy-promotion cache [S3-FIFO]_.

Experimentally provides excellent hit rate at lower cache sizes,
for a relatively simple and efficient implementation. Notably
excellent at handling "one hit wonders", aka entries seen only
once during a work-set (or reasonable work window).

"""

def __init__(self, maxsize: int):
Expand All @@ -113,16 +104,19 @@ def __init__(self, maxsize: int):
self.lock = threading.Lock()

def __getitem__(self, key: str) -> Optional[PartialResult]:
e = self.index.get(key)
if e and isinstance(e, CacheEntry):
# small race here, we could bump the freq above the limit
if (e := self.index.get(key)) and type(e) is CacheEntry:
# small race here, we could miss an increment
e.freq = min(e.freq + 1, 3)
return e.value

return None

def __setitem__(self, key: str, r: PartialResult) -> None:
with self.lock:
if (e := self.index.get(key)) and type(e) is CacheEntry:
e.value = r
return

if len(self.small) + len(self.main) >= self.maxsize:
# if main is not overcapacity, resize small
if len(self.main) < self.main_target:
Expand All @@ -133,7 +127,7 @@ def __setitem__(self, key: str, r: PartialResult) -> None:
self._evict_main()

entry = CacheEntry(key, r, 0)
if isinstance(self.index.get(key), str):
if type(self.index.get(key)) is str:
self.main.appendleft(entry)
else:
self.small.appendleft(entry)
Expand Down Expand Up @@ -175,8 +169,7 @@ class SieveNode:


class Sieve:
"""FIFO-based quick-demotion cache by Zhang, Yang, Yue, Vigfusson,
Rashmi.
"""FIFO-based quick-demotion cache [SIEVE]_.

Simpler FIFO-based cache, cousin of :class:`S3Fifo`.
Experimentally slightly lower hit rates than :class:`S3Fifo` (if
Expand All @@ -187,7 +180,6 @@ class Sieve:
Can be an interesting candidate when trying to save on memory,
although the contained entries will generally be much larger than
the cache itself.

"""

def __init__(self, maxsize: int) -> None:
Expand All @@ -200,15 +192,18 @@ def __init__(self, maxsize: int) -> None:
self.lock = threading.Lock()

def __getitem__(self, key: str) -> Optional[PartialResult]:
entry = self.cache.get(key)
if entry:
if entry := self.cache.get(key):
entry.visited = True
return entry.value

return None

def __setitem__(self, key: str, value: PartialResult) -> None:
with self.lock:
if e := self.cache.get(key):
e.value = value
return

if len(self.cache) >= self.maxsize:
self._evict()

Expand Down