Skip to content

Commit

Permalink
♻️ use generic check for encode_plus
Browse files Browse the repository at this point in the history
Signed-off-by: Prashant Gupta <[email protected]>
  • Loading branch information
prashantgupta24 committed Oct 11, 2024
1 parent 7327490 commit 16d1985
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions src/vllm_tgis_adapter/grpc/grpc_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
log_tracing_disabled_warning,
)
from vllm.transformers_utils.tokenizer import AnyTokenizer # noqa: TCH002
from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
from vllm.utils import iterate_with_cancellation

from vllm_tgis_adapter.logging import init_logger
Expand Down Expand Up @@ -856,12 +855,16 @@ async def Tokenize(
tokenizer = await self._get_tokenizer(adapter_kwargs)

responses: list[TokenizeResponse] = []
is_mistral_tokenizer = isinstance(tokenizer, MistralTokenizer)

# TODO: maybe parallelize, also move convert_ids_to_tokens into the
# other threads
for req in request.requests:
if is_mistral_tokenizer:
if not hasattr(tokenizer, "encode_plus"):
if request.return_offsets:
raise ValueError(
f"{type(tokenizer)} doesn't support "
"return_offsets at the moment. "
)
token_ids = tokenizer.encode(
prompt=req.text,
)
Expand All @@ -885,11 +888,6 @@ async def Tokenize(
offsets = None

if request.return_offsets:
if is_mistral_tokenizer:
raise ValueError(
"Mistral tokenizer doesn't support "
"return_offsets at the moment. "
)
offsets = [
{"start": start, "end": end}
for start, end in batch_encoding.offset_mapping
Expand Down

0 comments on commit 16d1985

Please sign in to comment.