Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
[Misc] use AutoTokenizer for benchmark serving when vLLM not installed (
Browse files Browse the repository at this point in the history
  • Loading branch information
zhyncs authored and robertgshaw2-neuralmagic committed Jun 23, 2024
1 parent 9cfb1d7 commit 61f421b
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 2 deletions.
29 changes: 28 additions & 1 deletion benchmarks/backend_request_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@
import time
import traceback
from dataclasses import dataclass, field
from typing import List, Optional
from typing import List, Optional, Union

import aiohttp
import huggingface_hub.constants
from tqdm.asyncio import tqdm
from transformers import (AutoTokenizer, PreTrainedTokenizer,
PreTrainedTokenizerFast)

AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)

Expand Down Expand Up @@ -388,6 +391,30 @@ def remove_prefix(text: str, prefix: str) -> str:
return text


def get_model(pretrained_model_name_or_path: str):
if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
from modelscope import snapshot_download
else:
from huggingface_hub import snapshot_download

model_path = snapshot_download(
model_id=pretrained_model_name_or_path,
local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE,
ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"])
return model_path


def get_tokenizer(
pretrained_model_name_or_path: str, trust_remote_code: bool
) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
if pretrained_model_name_or_path is not None and not os.path.exists(
pretrained_model_name_or_path):
pretrained_model_name_or_path = get_model(
pretrained_model_name_or_path)
return AutoTokenizer.from_pretrained(pretrained_model_name_or_path,
trust_remote_code=trust_remote_code)


ASYNC_REQUEST_FUNCS = {
"tgi": async_request_tgi,
"vllm": async_request_openai_completions,
Expand Down
5 changes: 4 additions & 1 deletion benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@
from tqdm.asyncio import tqdm
from transformers import PreTrainedTokenizerBase

from vllm.transformers_utils.tokenizer import get_tokenizer
try:
from vllm.transformers_utils.tokenizer import get_tokenizer
except ImportError:
from backend_request_func import get_tokenizer


@dataclass
Expand Down

0 comments on commit 61f421b

Please sign in to comment.