Skip to content

Commit

Permalink
perf: gzip disable multithreading & +chunk_size
Browse files Browse the repository at this point in the history
  • Loading branch information
hrz6976 committed Dec 21, 2024
1 parent 13039c2 commit 426c789
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
8 changes: 5 additions & 3 deletions woc/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
"""Path to the cache directory for woc."""
os.makedirs(WocCachePath, exist_ok=True)

WocNumProcesses = max(1, min(os.cpu_count() // 2, 16))
"""Number of processes to use for parallel processing."""
# WocNumProcesses = max(1, min(os.cpu_count() // 2, 16))
# """Number of processes to use for parallel processing."""


@dataclass
Expand Down Expand Up @@ -96,7 +96,9 @@ def iter_values(
self,
map_name: str,
key: Union[bytes, str],
) -> Generator[Union[List[str], Tuple[str, str, str], List[Tuple[str, str, str]]], None, None]:
) -> Generator[
Union[List[str], Tuple[str, str, str], List[Tuple[str, str, str]]], None, None
]:
"""
Similar to get_values, but returns a generator instead of a list. This is useful when querying large maps (on_large='all').
Expand Down
2 changes: 1 addition & 1 deletion woc/local.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class WocMapsLocal(WocMapsBase):
...

def _get_tch_bytes(
self, map_name: str, key: Union[bytes, str]
self, map_name: str, key: Union[bytes, str], cursor=0
) -> Tuple[bytes, str, Optional[int]]: ...
def _get_pos(
self,
Expand Down
8 changes: 4 additions & 4 deletions woc/local.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ try:
except ImportError or AssertionError:
raise ImportError('python-lzf is required to decompress LZF-compressed data: `pip install python-lzf`')

from .base import WocMapsBase,WocFile,WocMap, WocObject, WocSupportedProfileVersions, WocCachePath, WocNumProcesses
from .base import WocMapsBase,WocFile,WocMap, WocObject, WocSupportedProfileVersions, WocCachePath
from .tch cimport TCHashDB

cdef extern from 'Python.h':
Expand Down Expand Up @@ -543,7 +543,7 @@ def _cached_open(path: str, is_gzip: bool = False, *args, **kwargs) -> FileIO:
if path in _file_pool:
return _file_pool[path]
if is_gzip is True:
_file_pool[path] = RapidgzipFile(path, *args, parallelization=WocNumProcesses, **kwargs)
_file_pool[path] = RapidgzipFile(path, *args, **kwargs)
# build gzip index cache if not exists
_index_path = os.path.join(WocCachePath, hex(fnvhash(path.encode()))[2:] + '.gzidx')
if os.path.exists(_index_path):
Expand All @@ -560,7 +560,7 @@ def read_large_random_access(
path: str,
dtype: str,
offset: int = 0,
length: int = 8192
length: int = 131072
) -> Tuple[bytes, Optional[int]]:
"""
Read a *.large.* and return its content.
Expand All @@ -573,7 +573,7 @@ def read_large_random_access(
:return: a tuple of bytes and the next offset, None if EOF. Returned bytes must not begin or end with a separator.
"""
if dtype == 'h':
f = _cached_open(path, 'rb')
f = _cached_open(path, mode='rb')
if offset == 0:
offset = 20
_new_len = (length // 20) * 20 # 160 bits of SHA1
Expand Down

0 comments on commit 426c789

Please sign in to comment.