Skip to content

Commit

Permalink
Address some review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
njhill committed Mar 19, 2024
1 parent ebf6967 commit 8810908
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
6 changes: 5 additions & 1 deletion vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@

_GB = 1 << 30

# A cap on the number of async tokenizer worker pool size when computing
# based on the number of available CPU cores
MAX_TOKENIZER_WORKERS = 16


class ModelConfig:
"""Configuration for the model.
Expand Down Expand Up @@ -437,7 +441,7 @@ def create_config(
if tokenizer_pool_size is None:
# Default based on CPU count
tokenizer_pool_size = min(
16,
MAX_TOKENIZER_WORKERS,
os.cpu_count() - tensor_parallel_size - 1)
tokenizer_pool_size = max(1, tokenizer_pool_size)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,14 @@ def init_tokenizer():
initializer=init_tokenizer,
)

self.encode_async = make_async(self._encode_local, self.executor)
self._encode_async = make_async(self._encode_local, self.executor)

def _encode_local(self, *args, **kwargs):
return self.local.tokenizer.encode(*args, **kwargs)

def encode(self, *args, **kwargs):
return self.executor.submit(self._encode_local, *args,
**kwargs).result()

async def encode_async(self, *args, **kwargs):
return await self._encode_async(*args, **kwargs)

0 comments on commit 8810908

Please sign in to comment.