From 6ffabc92d8bb935438d86e8c5b1c3b37ff084fab Mon Sep 17 00:00:00 2001 From: masklinn Date: Tue, 26 Mar 2024 20:48:48 +0100 Subject: [PATCH] Avoid eviction on entry replacement `*Result` objects are immutable, thus if a `PartialResult` gets filled further it has to be re-set into the cache. This does not change the cache size, but because the current S3 and SIEVE implementations unconditionally check the cache size on `__setitem__` they may evict an entry unnecessarily. Fix that: if there is already a valid cache entry for the key, just update it in place instead of trying to evict then creating a brand new entry. Also update the LRU to pre-check for size (and presence as well), this may make setting a bit more expensive than post-check but it avoids "wronging" the user by bypassing the limit they set. Fixes #201 --- pyproject.toml | 1 + src/ua_parser/caching.py | 43 ++++++++++++++++++---------------------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 84ce896..920fcd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ select = ["F", "E", "W", "I", "RET", "RUF", "PT"] ignore = [ "RET505", # elif after return "E501", # line too long, formatter should take care of the fixable ones + "E721", # I'll compare types with `is` if I want ] [tool.ruff.lint.isort] diff --git a/src/ua_parser/caching.py b/src/ua_parser/caching.py index ae7eff6..706ad4b 100644 --- a/src/ua_parser/caching.py +++ b/src/ua_parser/caching.py @@ -26,11 +26,11 @@ class Cache(Protocol): - """Cache abstract protocol. The :class:`CachingResolver` will look + """Cache() + + Cache abstract protocol. The :class:`CachingResolver` will look values up, merge what was returned (possibly nothing) with what it got from its actual parser, and *re-set the result*. - - A :class:`Cache` is responsible for its own replacement policy. """ @abc.abstractmethod @@ -55,12 +55,6 @@ class Lru: full of popular items then all of them being replaced at once: :class:`S3Fifo` and :class:`Sieve` are FIFO-based caches and have worst-case O(n) eviction. - - - .. note:: The cache size is adjusted after inserting the new - entry, so the cache will temporarily contain ``maxsize + - 1`` items. - """ def __init__(self, maxsize: int): @@ -77,10 +71,9 @@ def __getitem__(self, key: str) -> Optional[PartialResult]: def __setitem__(self, key: str, value: PartialResult) -> None: with self.lock: - self.cache[key] = value - self.cache.move_to_end(key) - while len(self.cache) > self.maxsize: + if len(self.cache) >= self.maxsize and key not in self.cache: self.cache.popitem(last=False) + self.cache[key] = value @dataclasses.dataclass @@ -92,14 +85,12 @@ class CacheEntry: class S3Fifo: - """FIFO-based quick-demotion lazy-promotion cache by Yang, Zhang, - Qiu, Yue, Vinayak. + """FIFO-based quick-demotion lazy-promotion cache [S3-FIFO]_. Experimentally provides excellent hit rate at lower cache sizes, for a relatively simple and efficient implementation. Notably excellent at handling "one hit wonders", aka entries seen only once during a work-set (or reasonable work window). - """ def __init__(self, maxsize: int): @@ -113,9 +104,8 @@ def __init__(self, maxsize: int): self.lock = threading.Lock() def __getitem__(self, key: str) -> Optional[PartialResult]: - e = self.index.get(key) - if e and isinstance(e, CacheEntry): - # small race here, we could bump the freq above the limit + if (e := self.index.get(key)) and type(e) is CacheEntry: + # small race here, we could miss an increment e.freq = min(e.freq + 1, 3) return e.value @@ -123,6 +113,10 @@ def __getitem__(self, key: str) -> Optional[PartialResult]: def __setitem__(self, key: str, r: PartialResult) -> None: with self.lock: + if (e := self.index.get(key)) and type(e) is CacheEntry: + e.value = r + return + if len(self.small) + len(self.main) >= self.maxsize: # if main is not overcapacity, resize small if len(self.main) < self.main_target: @@ -133,7 +127,7 @@ def __setitem__(self, key: str, r: PartialResult) -> None: self._evict_main() entry = CacheEntry(key, r, 0) - if isinstance(self.index.get(key), str): + if type(self.index.get(key)) is str: self.main.appendleft(entry) else: self.small.appendleft(entry) @@ -175,8 +169,7 @@ class SieveNode: class Sieve: - """FIFO-based quick-demotion cache by Zhang, Yang, Yue, Vigfusson, - Rashmi. + """FIFO-based quick-demotion cache [SIEVE]_. Simpler FIFO-based cache, cousin of :class:`S3Fifo`. Experimentally slightly lower hit rates than :class:`S3Fifo` (if @@ -187,7 +180,6 @@ class Sieve: Can be an interesting candidate when trying to save on memory, although the contained entries will generally be much larger than the cache itself. - """ def __init__(self, maxsize: int) -> None: @@ -200,8 +192,7 @@ def __init__(self, maxsize: int) -> None: self.lock = threading.Lock() def __getitem__(self, key: str) -> Optional[PartialResult]: - entry = self.cache.get(key) - if entry: + if entry := self.cache.get(key): entry.visited = True return entry.value @@ -209,6 +200,10 @@ def __getitem__(self, key: str) -> Optional[PartialResult]: def __setitem__(self, key: str, value: PartialResult) -> None: with self.lock: + if e := self.cache.get(key): + e.value = value + return + if len(self.cache) >= self.maxsize: self._evict()