From 8ae650bee4add9f131d49b96a0a150311ea58cd1 Mon Sep 17 00:00:00 2001 From: Rongrong Date: Tue, 14 Nov 2023 19:48:56 +0800 Subject: [PATCH] Use timestamp instead of datetime to achieve faster cookie expiration in CookieJar (#7824) #7583 https://github.com/aio-libs/aiohttp/pull/7819#issuecomment-1806933086 --- CHANGES/7824.feature | 1 + aiohttp/cookiejar.py | 67 ++++++++++++++++++++--------------------- aiohttp/helpers.py | 7 ----- tests/test_cookiejar.py | 20 +++++++----- 4 files changed, 46 insertions(+), 49 deletions(-) create mode 100644 CHANGES/7824.feature diff --git a/CHANGES/7824.feature b/CHANGES/7824.feature new file mode 100644 index 00000000000..b3220a99de9 --- /dev/null +++ b/CHANGES/7824.feature @@ -0,0 +1 @@ +Use timestamp instead of ``datetime`` to achieve faster cookie expiration in ``CookieJar``. diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index 29fa865a3a7..3d218dd31b1 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -1,12 +1,15 @@ +import calendar import contextlib import datetime import os # noqa import pathlib import pickle import re +import time import warnings from collections import defaultdict from http.cookies import BaseCookie, Morsel, SimpleCookie +from math import ceil from typing import ( # noqa DefaultDict, Dict, @@ -24,7 +27,7 @@ from yarl import URL from .abc import AbstractCookieJar, ClearCookiePredicate -from .helpers import is_ip_address, next_whole_second +from .helpers import is_ip_address from .typedefs import LooseCookies, PathLike, StrOrURL __all__ = ("CookieJar", "DummyCookieJar") @@ -52,9 +55,22 @@ class CookieJar(AbstractCookieJar): DATE_YEAR_RE = re.compile(r"(\d{2,4})") - MAX_TIME = datetime.datetime.max.replace(tzinfo=datetime.timezone.utc) - - MAX_32BIT_TIME = datetime.datetime.fromtimestamp(2**31 - 1, datetime.timezone.utc) + # calendar.timegm() fails for timestamps after datetime.datetime.max + # Minus one as a loss of precision occurs when timestamp() is called. + MAX_TIME = ( + int(datetime.datetime.max.replace(tzinfo=datetime.timezone.utc).timestamp()) - 1 + ) + try: + calendar.timegm(time.gmtime(MAX_TIME)) + except OSError: + # Hit the maximum representable time on Windows + # https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/localtime-localtime32-localtime64 + MAX_TIME = calendar.timegm((3000, 12, 31, 23, 59, 59, -1, -1, -1)) + except OverflowError: + # #4515: datetime.max may not be representable on 32-bit platforms + MAX_TIME = 2**31 - 1 + # Avoid minuses in the future, 3x faster + SUB_MAX_TIME = MAX_TIME - 1 def __init__( self, @@ -81,14 +97,8 @@ def __init__( for url in treat_as_secure_origin ] self._treat_as_secure_origin = treat_as_secure_origin - self._next_expiration = next_whole_second() - self._expirations: Dict[Tuple[str, str, str], datetime.datetime] = {} - # #4515: datetime.max may not be representable on 32-bit platforms - self._max_time = self.MAX_TIME - try: - self._max_time.timestamp() - except OverflowError: - self._max_time = self.MAX_32BIT_TIME + self._next_expiration: float = ceil(time.time()) + self._expirations: Dict[Tuple[str, str, str], float] = {} def save(self, file_path: PathLike) -> None: file_path = pathlib.Path(file_path) @@ -102,14 +112,14 @@ def load(self, file_path: PathLike) -> None: def clear(self, predicate: Optional[ClearCookiePredicate] = None) -> None: if predicate is None: - self._next_expiration = next_whole_second() + self._next_expiration = ceil(time.time()) self._cookies.clear() self._host_only_cookies.clear() self._expirations.clear() return to_del = [] - now = datetime.datetime.now(datetime.timezone.utc) + now = time.time() for (domain, path), cookie in self._cookies.items(): for name, morsel in cookie.items(): key = (domain, path, name) @@ -125,13 +135,11 @@ def clear(self, predicate: Optional[ClearCookiePredicate] = None) -> None: del self._expirations[(domain, path, name)] self._cookies[(domain, path)].pop(name, None) - next_expiration = min(self._expirations.values(), default=self._max_time) - try: - self._next_expiration = next_expiration.replace( - microsecond=0 - ) + datetime.timedelta(seconds=1) - except OverflowError: - self._next_expiration = self._max_time + self._next_expiration = ( + min(*self._expirations.values(), self.SUB_MAX_TIME) + 1 + if self._expirations + else self.MAX_TIME + ) def clear_domain(self, domain: str) -> None: self.clear(lambda x: self._is_domain_match(domain, x["domain"])) @@ -147,9 +155,7 @@ def __len__(self) -> int: def _do_expiration(self) -> None: self.clear(lambda x: False) - def _expire_cookie( - self, when: datetime.datetime, domain: str, path: str, name: str - ) -> None: + def _expire_cookie(self, when: float, domain: str, path: str, name: str) -> None: self._next_expiration = min(self._next_expiration, when) self._expirations[(domain, path, name)] = when @@ -207,12 +213,7 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No if max_age: try: delta_seconds = int(max_age) - try: - max_age_expiration = datetime.datetime.now( - datetime.timezone.utc - ) + datetime.timedelta(seconds=delta_seconds) - except OverflowError: - max_age_expiration = self._max_time + max_age_expiration = min(time.time() + delta_seconds, self.MAX_TIME) self._expire_cookie(max_age_expiration, domain, path, name) except ValueError: cookie["max-age"] = "" @@ -328,7 +329,7 @@ def _is_path_match(req_path: str, cookie_path: str) -> bool: return non_matching.startswith("/") @classmethod - def _parse_date(cls, date_str: str) -> Optional[datetime.datetime]: + def _parse_date(cls, date_str: str) -> Optional[int]: """Implements date string parsing adhering to RFC 6265.""" if not date_str: return None @@ -388,9 +389,7 @@ def _parse_date(cls, date_str: str) -> Optional[datetime.datetime]: if year < 1601 or hour > 23 or minute > 59 or second > 59: return None - return datetime.datetime( - year, month, day, hour, minute, second, tzinfo=datetime.timezone.utc - ) + return calendar.timegm((year, month, day, hour, minute, second, -1, -1, -1)) class DummyCookieJar(AbstractCookieJar): diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index 8da0159fa2b..5435e2f9e07 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -530,13 +530,6 @@ def is_ip_address(host: Optional[Union[str, bytes, bytearray, memoryview]]) -> b return is_ipv4_address(host) or is_ipv6_address(host) -def next_whole_second() -> datetime.datetime: - """Return current time rounded up to the next whole second.""" - return datetime.datetime.now(datetime.timezone.utc).replace( - microsecond=0 - ) + datetime.timedelta(seconds=0) - - _cached_current_datetime: Optional[int] = None _cached_formatted_datetime = "" diff --git a/tests/test_cookiejar.py b/tests/test_cookiejar.py index 00a32708756..f0b72eeaf50 100644 --- a/tests/test_cookiejar.py +++ b/tests/test_cookiejar.py @@ -102,23 +102,27 @@ def test_date_parsing() -> None: assert parse_func("") is None # 70 -> 1970 - assert parse_func("Tue, 1 Jan 70 00:00:00 GMT") == datetime.datetime( - 1970, 1, 1, tzinfo=utc + assert ( + parse_func("Tue, 1 Jan 70 00:00:00 GMT") + == datetime.datetime(1970, 1, 1, tzinfo=utc).timestamp() ) # 10 -> 2010 - assert parse_func("Tue, 1 Jan 10 00:00:00 GMT") == datetime.datetime( - 2010, 1, 1, tzinfo=utc + assert ( + parse_func("Tue, 1 Jan 10 00:00:00 GMT") + == datetime.datetime(2010, 1, 1, tzinfo=utc).timestamp() ) # No day of week string - assert parse_func("1 Jan 1970 00:00:00 GMT") == datetime.datetime( - 1970, 1, 1, tzinfo=utc + assert ( + parse_func("1 Jan 1970 00:00:00 GMT") + == datetime.datetime(1970, 1, 1, tzinfo=utc).timestamp() ) # No timezone string - assert parse_func("Tue, 1 Jan 1970 00:00:00") == datetime.datetime( - 1970, 1, 1, tzinfo=utc + assert ( + parse_func("Tue, 1 Jan 1970 00:00:00") + == datetime.datetime(1970, 1, 1, tzinfo=utc).timestamp() ) # No year