From cd0f001729bfead519abd47769cf47fc3cd1ad09 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 29 Oct 2024 17:20:52 +0800 Subject: [PATCH] update --- swift/llm/argument/deploy_args.py | 9 + swift/llm/argument/infer_args.py | 11 +- swift/llm/data/dataset_info.json | 152 +++++++-------- swift/llm/dataset/constant.py | 188 +++++++++---------- swift/llm/infer/deploy.py | 18 +- swift/llm/infer/infer.py | 4 +- swift/llm/infer/infer_engine/infer_engine.py | 2 +- swift/llm/infer/infer_engine/pt_engine.py | 29 +-- swift/llm/template/constant.py | 44 ++--- tests/deploy/test_logprobs.py | 17 +- 10 files changed, 234 insertions(+), 240 deletions(-) diff --git a/swift/llm/argument/deploy_args.py b/swift/llm/argument/deploy_args.py index a81b10a09..619014c0e 100644 --- a/swift/llm/argument/deploy_args.py +++ b/swift/llm/argument/deploy_args.py @@ -16,3 +16,12 @@ class DeployArguments(InferArguments): served_model_name: Optional[str] = None verbose: bool = True # Whether to log request_info log_interval: int = 10 # Interval for printing global statistics + + def _init_stream(self): + pass + + def _init_eval_human(self): + pass + + def _init_result_dir(self, folder_name: str = 'deploy_result') -> None: + super()._init_result_dir(folder_name=folder_name) diff --git a/swift/llm/argument/infer_args.py b/swift/llm/argument/infer_args.py index 1c544e79d..adbd7ad31 100644 --- a/swift/llm/argument/infer_args.py +++ b/swift/llm/argument/infer_args.py @@ -48,15 +48,15 @@ def __post_init__(self): class InferArguments(BaseArguments, MergeArguments, VllmArguments, LmdeployArguments): infer_backend: Literal['vllm', 'pt', 'lmdeploy'] = 'pt' ckpt_dir: Optional[str] = field(default=None, metadata={'help': '/path/to/your/vx-xxx/checkpoint-xxx'}) + max_batch_size: int = 16 # for pt engine + # only for inference val_dataset_sample: Optional[int] = None result_dir: Optional[str] = field(default=None, metadata={'help': '/path/to/your/infer_result'}) save_result: bool = True - - max_batch_size: int = 16 # for pt engine stream: Optional[bool] = None - def _init_result_dir(self) -> None: + def _init_result_dir(self, folder_name: str = 'infer_result') -> None: self.result_path = None if not self.save_result: return @@ -66,7 +66,7 @@ def _init_result_dir(self) -> None: result_dir = self.model_info.model_dir else: result_dir = self.ckpt_dir - result_dir = os.path.join(result_dir, 'infer_result') + result_dir = os.path.join(result_dir, folder_name) else: result_dir = self.result_dir result_dir = to_abspath(result_dir) @@ -90,11 +90,11 @@ def __post_init__(self) -> None: BaseArguments.__post_init__(self) MergeArguments.__post_init__(self) VllmArguments.__post_init__(self) + self._parse_lora_modules() self._init_result_dir() self._init_stream() self._init_eval_human() - self._parse_lora_modules() def _init_eval_human(self): if len(self.dataset) == 0 and len(self.val_dataset) == 0: @@ -106,6 +106,7 @@ def _init_eval_human(self): def _parse_lora_modules(self) -> None: if len(self.lora_modules) == 0: + self.lora_request_list = [] return assert self.infer_backend in {'vllm', 'pt'} if self.infer_backend == 'vllm': diff --git a/swift/llm/data/dataset_info.json b/swift/llm/data/dataset_info.json index 569be670e..7fa618d93 100644 --- a/swift/llm/data/dataset_info.json +++ b/swift/llm/data/dataset_info.json @@ -1,28 +1,28 @@ { - "multi-alpaca": { + "multi_alpaca": { "ms_dataset_id": "damo/nlp_polylm_multialpaca_sft", "subsets": ["ar", "de", "es", "fr", "id", "ja", "ko", "pt", "ru", "th", "vi"], "tags": ["chat", "general", "multilingual"] }, - "text2sql-en": { + "text2sql_en": { "ms_dataset_id": "AI-ModelScope/texttosqlv2_25000_v2", "tags": ["chat", "sql"], "hf_dataset_id": "Clinton/texttosqlv2_25000_v2" }, - "school-math-zh": { + "school_math_zh": { "ms_dataset_id": "AI-ModelScope/school_math_0.25M", "tags": ["chat", "math", "quality"], "hf_dataset_id": "BelleGroup/school_math_0.25M" }, - "gpt4all-en": { + "gpt4all_en": { "ms_dataset_id": "wyj123456/GPT4all", "tags": ["chat", "general"] }, - "cot-zh": { + "cot_zh": { "ms_dataset_id": "YorickHe/CoT_zh", "tags": ["chat", "general"] }, - "cot-en": { + "cot_en": { "ms_dataset_id": "YorickHe/CoT", "tags": ["chat", "general"] }, @@ -32,27 +32,27 @@ "tags": ["chat", "general"], "help": "`default` is in Chinese, `subset` is in English." }, - "code-alpaca-en": { + "code_alpaca_en": { "ms_dataset_id": "wyj123456/code_alpaca_en", "tags": ["chat", "coding"], "hf_dataset_id": "sahil2801/CodeAlpaca-20k" }, - "finance-en": { + "finance_en": { "ms_dataset_id": "wyj123456/finance_en", "tags": ["chat", "financial"], "hf_dataset_id": "ssbuild/alpaca_finance_en" }, - "alpaca-en": { + "alpaca_en": { "ms_dataset_id": "AI-ModelScope/alpaca-gpt4-data-en", "tags": ["chat", "general", "🔥"], "hf_dataset_id": "vicgalle/alpaca-gpt4" }, - "alpaca-cleaned": { + "alpaca_cleaned": { "ms_dataset_id": "AI-ModelScope/alpaca-cleaned", "tags": ["chat", "general", "bench", "quality"], "hf_dataset_id": "yahma/alpaca-cleaned" }, - "open-orca-chinese": { + "open_orca_chinese": { "ms_dataset_id": "AI-ModelScope/OpenOrca-Chinese", "columns": { "system_prompt": "system", @@ -62,7 +62,7 @@ "hf_dataset_id": "yys/OpenOrca-Chinese", "huge_dataset": true }, - "chinese-c4": { + "chinese_c4": { "ms_dataset_id": "swift/chinese-c4", "tags": ["pretrain", "zh", "quality"], "hf_dataset_id": "shjwudp/chinese-c4", @@ -80,18 +80,18 @@ "hf_dataset_id": "allenai/dolma", "huge_dataset": true }, - "guanaco-belle-merge": { + "guanaco_belle_merge": { "ms_dataset_id": "AI-ModelScope/guanaco_belle_merge_v1.0", "tags": ["QA", "zh"], "hf_dataset_id": "Chinese-Vicuna/guanaco_belle_merge_v1.0" }, - "math-plus": { + "math_plus": { "ms_dataset_id": "TIGER-Lab/MATH-plus", "subsets": ["train"], "tags": ["qa", "math", "en", "quality"], "hf_dataset_id": "TIGER-Lab/MATH-plus" }, - "path-vqa": { + "path_vqa": { "ms_dataset_id": "swift/path-vqa", "hf_dataset_id": "flaviagiammarino/path-vqa", "columns": { @@ -101,7 +101,7 @@ }, "tags": ["multi-modal", "vqa", "medical"] }, - "aya-collection": { + "aya_collection": { "ms_dataset_id": "swift/aya_collection", "hf_dataset_id": "CohereForAI/aya_collection", "subsets": ["aya_dataset"], @@ -111,7 +111,7 @@ }, "tags": ["multi-lingual", "qa"] }, - "web-instruct-sub": { + "web_instruct_sub": { "ms_dataset_id": "swift/WebInstructSub", "hf_dataset_id": "TIGER-Lab/WebInstructSub", "columns": { @@ -132,11 +132,11 @@ "tags": ["vqa", "en", "youtube", "video"], "huge_dataset": true }, - "classical-chinese-translate": { + "classical_chinese_translate": { "ms_dataset_id": "swift/classical_chinese_translate", "tags": ["chat", "play-ground"] }, - "tagengo-gpt4": { + "tagengo_gpt4": { "ms_dataset_id": "swift/tagengo-gpt4", "hf_dataset_id": "lightblue/tagengo-gpt4", "tags": ["chat", "multi-lingual", "quality"] @@ -146,7 +146,7 @@ "hf_dataset_id": "HuggingFaceFW/fineweb", "huge_dataset": true }, - "poison-mpts": { + "poison_mpts": { "ms_dataset_id": "iic/100PoisonMpts", "columns": { "prompt": "query", @@ -154,17 +154,17 @@ }, "tags": ["poison-management", "zh"] }, - "rwkv-pretrain-web": { + "rwkv_pretrain_web": { "ms_dataset_id": "mapjack/openwebtext_dataset", "tags": ["pretrain", "zh", "quality"], "huge_dataset": true }, - "llava-med-zh-instruct": { + "llava_med_zh_instruct": { "ms_dataset_id": "swift/llava-med-zh-instruct-60k", "hf_dataset_id": "BUAADreamer/llava-med-zh-instruct-60k", "tags": ["zh", "medical", "vqa", "multi-modal"] }, - "chart-qa": { + "chart_qa": { "ms_dataset_id": "swift/ChartQA", "hf_dataset_id": "HuggingFaceM4/ChartQA", "columns": { @@ -174,7 +174,7 @@ "split": ["train"], "tags": ["en", "vqa", "quality"] }, - "vqa-v2": { + "vqa_v2": { "ms_dataset_id": "swift/VQAv2", "hf_dataset_id": "HuggingFaceM4/VQAv2", "columns": { @@ -185,25 +185,25 @@ "split": ["train"], "tags": ["en", "vqa", "quality"] }, - "belle-train-3.5M-CN": { + "belle_train_3.5m_cn": { "ms_dataset_id": "swift/train_3.5M_CN", "hf_dataset_id": "BelleGroup/train_3.5M_CN", "tags": ["common", "zh", "quality"], "huge_dataset": true }, - "belle-train-2M-CN": { + "belle_train_2m_cn": { "ms_dataset_id": "AI-ModelScope/train_2M_CN", "hf_dataset_id": "BelleGroup/train_2M_CN", "tags": ["common", "zh", "quality"], "huge_dataset": true }, - "belle-train-1M-CN": { + "belle_train_1m_cn": { "ms_dataset_id": "AI-ModelScope/train_1M_CN", "hf_dataset_id": "BelleGroup/train_1M_CN", "tags": ["common", "zh", "quality"], "huge_dataset": true }, - "belle-train-0.5M-CN": { + "belle_train_0.5m_cn": { "ms_dataset_id": "AI-ModelScope/train_0.5M_CN", "hf_dataset_id": "BelleGroup/train_0.5M_CN", "tags": ["common", "zh", "quality"] @@ -217,12 +217,12 @@ }, "tags": ["CoT", "en"] }, - "codealpaca-20k": { + "codealpaca_20k": { "ms_dataset_id": "AI-ModelScope/CodeAlpaca-20k", "hf_dataset_id": "HuggingFaceH4/CodeAlpaca_20K", "tags": ["code", "en"] }, - "zhihu-rlhf": { + "zhihu_rlhf": { "ms_dataset_id": "AI-ModelScope/zhihu_rlhf_3k", "columns": { "prompt": "query", @@ -232,13 +232,13 @@ "tags": ["rlhf", "dpo", "zh"], "hf_dataset_id": "liyucheng/zhihu_rlhf_3k" }, - "ultrachat-200k": { + "ultrachat_200k": { "ms_dataset_id": "swift/ultrachat_200k", "hf_dataset_id": "HuggingFaceH4/ultrachat_200k", "split": ["train_sft"], "tags": ["chat", "en", "quality"] }, - "evol-instruct-v2": { + "evol_instruct_v2": { "ms_dataset_id": "AI-ModelScope/WizardLM_evol_instruct_V2_196k", "hf_dataset_id": "WizardLM/WizardLM_evol_instruct_V2_196k", "tags": ["chat", "en"] @@ -250,12 +250,12 @@ "tags": ["multi-domain", "en", "qa"], "huge_dataset": true }, - "cosmopedia-100k": { + "cosmopedia_100k": { "ms_dataset_id": "swift/cosmopedia-100k", "hf_dataset_id": "HuggingFaceTB/cosmopedia-100k", "tags": ["multi-domain", "en", "qa"] }, - "coig-cqia": { + "coig_cqia": { "ms_dataset_id": "AI-ModelScope/COIG-CQIA", "subsets": ["chinese_traditional", "coig_pc", "exam", "finance", "douban", "human_value", "logi_qa", "ruozhiba", "segmentfault", "wiki", "wikihow", "xhs", "zhihu"], @@ -279,7 +279,7 @@ "tags": ["pretrain", "quality"], "huge_dataset": true }, - "redpajama-data-v2": { + "redpajama_data_v2": { "ms_dataset_id": "swift/RedPajama-Data-V2", "hf_dataset_id": "togethercomputer/RedPajama-Data-V2", "columns": { @@ -288,13 +288,13 @@ "tags": ["pretrain", "quality"], "huge_dataset": true }, - "redpajama-data-1t": { + "redpajama_data_1t": { "ms_dataset_id": "swift/RedPajama-Data-1T", "hf_dataset_id": "togethercomputer/RedPajama-Data-1T", "tags": ["pretrain", "quality"], "huge_dataset": true }, - "gen-qa": { + "gen_qa": { "ms_dataset_id": "swift/GenQA", "hf_dataset_id": "tomg-group-umd/GenQA", "columns": { @@ -304,7 +304,7 @@ "tags": ["qa", "quality", "multi-task"], "huge_dataset": true }, - "infinity-instruct": { + "infinity_instruct": { "ms_dataset_id": "swift/Infinity-Instruct", "hf_dataset_id": "BAAI/Infinity-Instruct", "tags": ["qa", "quality", "multi-task"], @@ -323,7 +323,7 @@ "tags": ["en"], "huge_dataset": true }, - "wikipedia-cn-filtered": { + "wikipedia_cn_filtered": { "ms_dataset_id": "AI-ModelScope/wikipedia-cn-20230720-filtered", "hf_dataset_id": "pleisto/wikipedia-cn-20230720-filtered", "columns": { @@ -344,7 +344,7 @@ "tags": ["pretrain", "quality", "zh"], "huge_dataset": true }, - "the-stack": { + "the_stack": { "ms_dataset_id": "AI-ModelScope/the-stack", "hf_dataset_id": "bigcode/the-stack", "columns": { @@ -362,25 +362,25 @@ "tags": ["pretrain", "quality"], "huge_dataset": true }, - "ms-agent-for-agentfabric": { + "ms_agent_for_agentfabric": { "ms_dataset_id": "AI-ModelScope/ms_agent_for_agentfabric", "subsets": ["default", "addition"], "tags": ["chat", "agent", "multi-round", "🔥"] }, - "deepctrl-sft": { + "deepctrl_sft": { "ms_dataset_id": "AI-ModelScope/deepctrl-sft-data", "subsets": ["default", "en"], "tags": ["chat", "general", "sft", "multi-round"], "help": "`default` is in Chinese, `en` is in English.", "huge_dataset": true }, - "poetry-zh": { + "poetry_zh": { "ms_dataset_id": "modelscope/chinese-poetry-collection", "split": ["test"], "columns": {"text1": "response"}, "tags": ["text-generation", "poetry"] }, - "instruct-en": { + "instruct_en": { "ms_dataset_id": "wyj123456/instruct", "columns": { "prompt": "query", @@ -389,53 +389,53 @@ "tags": ["chat", "general"] }, - "cls-fudan-news-zh": { + "cls_fudan_news_zh": { "ms_dataset_id": "damo/zh_cls_fudan-news", "columns": {"prompt": "query", "answer": "response"}, "tags": ["chat", "classification"] }, - "ner-jave-zh": { + "ner_jave_zh": { "ms_dataset_id": "damo/zh_ner-JAVE", "columns": {"prompt": "query", "answer": "response"}, "tags": ["chat", "ner"] }, - "lawyer-llama-zh": { + "lawyer_llama_zh": { "ms_dataset_id": "AI-ModelScope/lawyer_llama_data", "columns": {"instruction": "query", "output": "response", "history": "-"}, "tags": ["chat", "law"], "hf_dataset_id": "Skepsun/lawyer_llama_data" }, - "codefuse-evol-instruction-zh": { + "codefuse_evol_instruction_zh": { "ms_dataset_id": "codefuse-ai/Evol-instruction-66k", "columns": {"instruction": "query", "output": "response"}, "tags": ["chat", "coding", "🔥"] }, - "tulu-v2-sft-mixture": { + "tulu_v2_sft_mixture": { "ms_dataset_id": "AI-ModelScope/tulu-v2-sft-mixture", "tags": ["chat", "multilingual", "general", "multi-round"], "hf_dataset_id": "allenai/tulu-v2-sft-mixture" }, - "webnovel-zh": { + "webnovel_zh": { "ms_dataset_id": "AI-ModelScope/webnovel_cn", "tags": ["chat", "novel"], "hf_dataset_id": "zxbsmk/webnovel_cn" }, - "sft-nectar": { + "sft_nectar": { "hf_dataset_id": "AstraMindAI/SFT-Nectar", "ms_dataset_id": "AI-ModelScope/SFT-Nectar", "tags": ["cot", "en", "quality"] }, - "generated-chat-zh": { + "generated_chat_zh": { "ms_dataset_id": "AI-ModelScope/generated_chat_0.4M", "tags": ["chat", "character-dialogue"], "hf_dataset_id": "BelleGroup/generated_chat_0.4M" }, - "open-platypus-en": { + "open_platypus_en": { "ms_dataset_id": "AI-ModelScope/Open-Platypus", "tags": ["chat", "math", "quality"], "hf_dataset_id": "garage-bAInd/Open-Platypus" }, - "open-orca": { + "open_orca": { "ms_dataset_id": "AI-ModelScope/OpenOrca", "subsets": ["default", "3_5M"], "columns": {"question": "query"}, @@ -443,17 +443,17 @@ "help": ["`default` uses gpt4 for data cleaning."], "huge_dataset": true }, - "slim-orca": { + "slim_orca": { "ms_dataset_id": "swift/SlimOrca", "hf_dataset_id": "Open-Orca/SlimOrca", "tags": ["quality", "en"] }, - "slim-pajama-627b": { + "slim_pajama_627b": { "hf_dataset_id": "cerebras/SlimPajama-627B", "tags": ["pretrain", "quality"], "huge_dataset": true }, - "moondream2-coyo-5M": { + "moondream2_coyo_5m": { "ms_dataset_id": "swift/moondream2-coyo-5M-captions", "hf_dataset_id": "isidentical/moondream2-coyo-5M-captions", "columns": { @@ -463,18 +463,18 @@ "tags": ["caption", "pretrain", "quality"], "huge_dataset": true }, - "no-robots": { + "no_robots": { "ms_dataset_id": "swift/no_robots", "hf_dataset_id": "HuggingFaceH4/no_robots", "tags": ["multi-task", "quality", "human-annotated"] }, - "open-hermes": { + "open_hermes": { "ms_dataset_id": "swift/OpenHermes-2.5", "hf_dataset_id": "teknium/OpenHermes-2.5", "huge_dataset": true, "tags": ["cot", "en", "quality"] }, - "github-code": { + "github_code": { "ms_dataset_id": "swift/github-code", "hf_dataset_id": "codeparrot/github-code", "columns": { @@ -483,13 +483,13 @@ "tags": ["pretrain", "quality"], "huge_dataset": true }, - "disc-law-sft-zh": { + "disc_law_sft_zh": { "ms_dataset_id": "AI-ModelScope/DISC-Law-SFT", "columns": {"input": "query", "output": "response"}, "tags": ["chat", "law", "🔥"], "hf_dataset_id": "ShengbinYue/DISC-Law-SFT" }, - "math-instruct": { + "math_instruct": { "ms_dataset_id": "AI-ModelScope/MathInstruct", "hf_dataset_id": "TIGER-Lab/MathInstruct", "columns": { @@ -504,7 +504,7 @@ "tags": ["text-generation", "awq"], "hf_dataset_id": "mit-han-lab/pile-val-backup" }, - "stack-exchange-paired": { + "stack_exchange_paired": { "ms_dataset_id": "AI-ModelScope/stack-exchange-paired", "columns": { "question": "query", @@ -515,33 +515,33 @@ "hf_dataset_id": "lvwerra/stack-exchange-paired", "huge_dataset": "true" }, - "ms-agent": { + "ms_agent": { "ms_dataset_id": "iic/ms_agent", "tags": ["chat", "agent", "multi-round", "🔥"] }, - "msagent-pro": { + "msagent_pro": { "ms_dataset_id": "iic/MSAgent-Pro", "tags": ["chat", "agent", "multi-round", "🔥"] }, - "codefuse-python-en": { + "codefuse_python_en": { "ms_dataset_id": "codefuse-ai/CodeExercise-Python-27k", "columns": { "chat_rounds": "messages" }, "tags": ["chat", "coding", "🔥"] }, - "sharegpt-gpt4": { + "sharegpt_gpt4": { "ms_dataset_id": "AI-ModelScope/sharegpt_gpt4", "subsets": ["default", "V3_format", "zh_38K_format"], "tags": ["chat", "multilingual", "general", "multi-round", "gpt4", "🔥"], "help": "`default` uses gpt4 for data cleaning." }, - "disc-med-sft-zh": { + "disc_med_sft_zh": { "ms_dataset_id": "AI-ModelScope/DISC-Med-SFT", "tags": ["chat", "medical", "🔥"], "hf_dataset_id": "Flmc/DISC-Med-SFT" }, - "medical-en": { + "medical_en": { "ms_dataset_id": "swift/medical_zh", "subsets": [{ "name": "en", @@ -560,38 +560,38 @@ "split": ["train", "val", "test"], "tags": ["chat", "medical"] }, - "self-cognition": { + "self_cognition": { "ms_dataset_id": "swift/self-cognition", "hf_dataset_id": "modelscope/self-cognition", "remove_useless_columns": false, "tags": ["chat", "self-cognition", "🔥"] }, - "swift-mix": { + "swift_mix": { "ms_dataset_id": "swift/swift-sft-mixture", "subsets": ["sharegpt", "firefly", "codefuse", "metamathqa"], "tags": ["chat", "sft", "general", "🔥"], "huge_dataset": true }, - "longwriter-6k": { + "longwriter_6k": { "ms_dataset_id": "ZhipuAI/LongWriter-6k", "tags": ["long", "chat", "sft", "🔥"], "hf_dataset_id": "THUDM/LongWriter-6k" }, - "longwriter-6k-filtered": { + "longwriter_6k_filtered": { "ms_dataset_id": "swift/longwriter-6k-filtered", "tags": ["long", "chat", "sft", "🔥"] }, - "qwen2-pro-filtered": { + "qwen2_pro_filtered": { "ms_dataset_id": "AI-ModelScope/Magpie-Qwen2-Pro-300K-Filtered", "tags": ["chat", "sft", "🔥"], "hf_dataset_id": "Magpie-Align/Magpie-Qwen2-Pro-300K-Filtered" }, - "qwen2-pro-zh": { + "qwen2_pro_zh": { "ms_dataset_id": "AI-ModelScope/Magpie-Qwen2-Pro-200K-Chinese", "tags": ["chat", "sft", "🔥", "zh"], "hf_dataset_id": "Magpie-Align/Magpie-Qwen2-Pro-200K-Chinese" }, - "qwen2-pro-en": { + "qwen2_pro_en": { "ms_dataset_id": "AI-ModelScope/Magpie-Qwen2-Pro-200K-English", "tags": ["chat", "sft", "🔥", "en"], "hf_dataset_id": "Magpie-Align/Magpie-Qwen2-Pro-200K-English" diff --git a/swift/llm/dataset/constant.py b/swift/llm/dataset/constant.py index ee72a4191..021982dc3 100644 --- a/swift/llm/dataset/constant.py +++ b/swift/llm/dataset/constant.py @@ -3,90 +3,90 @@ class LLMDatasetName: # general - ms_bench = 'ms-bench' - alpaca_en = 'alpaca-en' - alpaca_zh = 'alpaca-zh' - multi_alpaca = 'multi-alpaca' + ms_bench = 'ms_bench' + alpaca_en = 'alpaca_en' + alpaca_zh = 'alpaca_zh' + multi_alpaca = 'multi_alpaca' instinwild = 'instinwild' - cot_en = 'cot-en' - cot_zh = 'cot-zh' - instruct_en = 'instruct-en' - firefly_zh = 'firefly-zh' - gpt4all_en = 'gpt4all-en' + cot_en = 'cot_en' + cot_zh = 'cot_zh' + instruct_en = 'instruct_en' + firefly_zh = 'firefly_zh' + gpt4all_en = 'gpt4all_en' sharegpt = 'sharegpt' - tulu_v2_sft_mixture = 'tulu-v2-sft-mixture' - wikipedia_zh = 'wikipedia-zh' - open_orca = 'open-orca' - sharegpt_gpt4 = 'sharegpt-gpt4' - deepctrl_sft = 'deepctrl-sft' - coig_cqia = 'coig-cqia' + tulu_v2_sft_mixture = 'tulu_v2_sft_mixture' + wikipedia_zh = 'wikipedia_zh' + open_orca = 'open_orca' + sharegpt_gpt4 = 'sharegpt_gpt4' + deepctrl_sft = 'deepctrl_sft' + coig_cqia = 'coig_cqia' ruozhiba = 'ruozhiba' - long_alpaca_12k = 'long-alpaca-12k' - lmsys_chat_1m = 'lmsys-chat-1m' + long_alpaca_12k = 'long_alpaca_12k' + lmsys_chat_1m = 'lmsys_chat_1m' guanaco = 'guanaco' # agent - ms_agent = 'ms-agent' - ms_agent_for_agentfabric = 'ms-agent-for-agentfabric' - ms_agent_multirole = 'ms-agent-multirole' - toolbench_for_alpha_umi = 'toolbench-for-alpha-umi' - damo_agent_zh = 'damo-agent-zh' - damo_agent_zh_mini = 'damo-agent-zh-mini' - agent_instruct_all_en = 'agent-instruct-all-en' - msagent_pro = 'msagent-pro' + ms_agent = 'ms_agent' + ms_agent_for_agentfabric = 'ms_agent_for_agentfabric' + ms_agent_multirole = 'ms_agent_multirole' + toolbench_for_alpha_umi = 'toolbench_for_alpha_umi' + damo_agent_zh = 'damo_agent_zh' + damo_agent_zh_mini = 'damo_agent_zh_mini' + agent_instruct_all_en = 'agent_instruct_all_en' + msagent_pro = 'msagent_pro' toolbench = 'toolbench' # coding - code_alpaca_en = 'code-alpaca-en' - leetcode_python_en = 'leetcode-python-en' - codefuse_python_en = 'codefuse-python-en' - codefuse_evol_instruction_zh = 'codefuse-evol-instruction-zh' + code_alpaca_en = 'code_alpaca_en' + leetcode_python_en = 'leetcode_python_en' + codefuse_python_en = 'codefuse_python_en' + codefuse_evol_instruction_zh = 'codefuse_evol_instruction_zh' # medical - medical_en = 'medical-en' - medical_zh = 'medical-zh' - disc_med_sft_zh = 'disc-med-sft-zh' + medical_en = 'medical_en' + medical_zh = 'medical_zh' + disc_med_sft_zh = 'disc_med_sft_zh' # law - lawyer_llama_zh = 'lawyer-llama-zh' - tigerbot_law_zh = 'tigerbot-law-zh' - disc_law_sft_zh = 'disc-law-sft-zh' + lawyer_llama_zh = 'lawyer_llama_zh' + tigerbot_law_zh = 'tigerbot_law_zh' + disc_law_sft_zh = 'disc_law_sft_zh' # math - blossom_math_zh = 'blossom-math-zh' - school_math_zh = 'school-math-zh' - open_platypus_en = 'open-platypus-en' + blossom_math_zh = 'blossom_math_zh' + school_math_zh = 'school_math_zh' + open_platypus_en = 'open_platypus_en' # sql - text2sql_en = 'text2sql-en' - sql_create_context_en = 'sql-create-context-en' - synthetic_text_to_sql = 'synthetic-text-to-sql' - # text-generation - advertise_gen_zh = 'advertise-gen-zh' - dureader_robust_zh = 'dureader-robust-zh' + text2sql_en = 'text2sql_en' + sql_create_context_en = 'sql_create_context_en' + synthetic_text_to_sql = 'synthetic_text_to_sql' + # text_generation + advertise_gen_zh = 'advertise_gen_zh' + dureader_robust_zh = 'dureader_robust_zh' # classification - cmnli_zh = 'cmnli-zh' - jd_sentiment_zh = 'jd-sentiment-zh' - hc3_zh = 'hc3-zh' - hc3_en = 'hc3-en' - dolly_15k = 'dolly-15k' - zhihu_kol = 'zhihu-kol' - zhihu_kol_filtered = 'zhihu-kol-filtered' + cmnli_zh = 'cmnli_zh' + jd_sentiment_zh = 'jd_sentiment_zh' + hc3_zh = 'hc3_zh' + hc3_en = 'hc3_en' + dolly_15k = 'dolly_15k' + zhihu_kol = 'zhihu_kol' + zhihu_kol_filtered = 'zhihu_kol_filtered' # other - finance_en = 'finance-en' - poetry_zh = 'poetry-zh' - webnovel_zh = 'webnovel-zh' - generated_chat_zh = 'generated-chat-zh' - self_cognition = 'self-cognition' - swift_mix = 'swift-mix' + finance_en = 'finance_en' + poetry_zh = 'poetry_zh' + webnovel_zh = 'webnovel_zh' + generated_chat_zh = 'generated_chat_zh' + self_cognition = 'self_cognition' + swift_mix = 'swift_mix' - cls_fudan_news_zh = 'cls-fudan-news-zh' - ner_java_zh = 'ner-jave-zh' + cls_fudan_news_zh = 'cls_fudan_news_zh' + ner_java_zh = 'ner_jave_zh' # rlhf - hh_rlhf = 'hh-rlhf' - hh_rlhf_cn = 'hh-rlhf-cn' - orpo_dpo_mix_40k = 'orpo-dpo-mix-40k' - stack_exchange_paired = 'stack-exchange-paired' - shareai_llama3_dpo_zh_en_emoji = 'shareai-llama3-dpo-zh-en-emoji' - ultrafeedback_kto = 'ultrafeedback-kto' + hh_rlhf = 'hh_rlhf' + hh_rlhf_cn = 'hh_rlhf_cn' + orpo_dpo_mix_40k = 'orpo_dpo_mix_40k' + stack_exchange_paired = 'stack_exchange_paired' + shareai_llama3_dpo_zh_en_emoji = 'shareai_llama3_dpo_zh_en_emoji' + ultrafeedback_kto = 'ultrafeedback_kto' # for awq pileval = 'pileval' @@ -94,53 +94,53 @@ class LLMDatasetName: class MLLMDatasetName: # - coco_en = 'coco-en' - coco_en_mini = 'coco-en-mini' + coco_en = 'coco_en' + coco_en_mini = 'coco_en_mini' # images - coco_en_2 = 'coco-en-2' - coco_en_2_mini = 'coco-en-2-mini' - capcha_images = 'capcha-images' - latex_ocr_print = 'latex-ocr-print' - latex_ocr_handwrite = 'latex-ocr-handwrite' - # for qwen-audio - aishell1_zh = 'aishell1-zh' - aishell1_zh_mini = 'aishell1-zh-mini' + coco_en_2 = 'coco_en_2' + coco_en_2_mini = 'coco_en_2_mini' + capcha_images = 'capcha_images' + latex_ocr_print = 'latex_ocr_print' + latex_ocr_handwrite = 'latex_ocr_handwrite' + # for qwen_audio + aishell1_zh = 'aishell1_zh' + aishell1_zh_mini = 'aishell1_zh_mini' # for video - video_chatgpt = 'video-chatgpt' + video_chatgpt = 'video_chatgpt' # visual rlhf - rlaif_v = 'rlaif-v' + rlaif_v = 'rlaif_v' - mantis_instruct = 'mantis-instruct' - llava_data_instruct = 'llava-data-instruct' + mantis_instruct = 'mantis_instruct' + llava_data_instruct = 'llava_data_instruct' midefics = 'midefics' gqa = 'gqa' - text_caps = 'text-caps' - refcoco_unofficial_caption = 'refcoco-unofficial-caption' - refcoco_unofficial_grounding = 'refcoco-unofficial-grounding' - refcocog_unofficial_caption = 'refcocog-unofficial-caption' - refcocog_unofficial_grounding = 'refcocog-unofficial-grounding' - a_okvqa = 'a-okvqa' + text_caps = 'text_caps' + refcoco_unofficial_caption = 'refcoco_unofficial_caption' + refcoco_unofficial_grounding = 'refcoco_unofficial_grounding' + refcocog_unofficial_caption = 'refcocog_unofficial_caption' + refcocog_unofficial_grounding = 'refcocog_unofficial_grounding' + a_okvqa = 'a_okvqa' okvqa = 'okvqa' - ocr_vqa = 'ocr-vqa' + ocr_vqa = 'ocr_vqa' grit = 'grit' - llava_instruct_mix = 'llava-instruct-mix' - gpt4v_dataset = 'gpt4v-dataset' + llava_instruct_mix = 'llava_instruct_mix' + gpt4v_dataset = 'gpt4v_dataset' lnqa = 'lnqa' - science_qa = 'science-qa' + science_qa = 'science_qa' mind2web = 'mind2web' - sharegpt_4o_image = 'sharegpt-4o-image' + sharegpt_4o_image = 'sharegpt_4o_image' pixelprose = 'pixelprose' m3it = 'm3it' # additional images sharegpt4v = 'sharegpt4v' - llava_instruct_150k = 'llava-instruct-150k' - llava_pretrain = 'llava-pretrain' + llava_instruct_150k = 'llava_instruct_150k' + llava_pretrain = 'llava_pretrain' - sa1b_dense_caption = 'sa1b-dense-caption' - sa1b_paired_caption = 'sa1b-paired-caption' + sa1b_dense_caption = 'sa1b_dense_caption' + sa1b_paired_caption = 'sa1b_paired_caption' class DatasetName(LLMDatasetName, MLLMDatasetName): diff --git a/swift/llm/infer/deploy.py b/swift/llm/infer/deploy.py index 517699cbe..b8c2abdec 100644 --- a/swift/llm/infer/deploy.py +++ b/swift/llm/infer/deploy.py @@ -37,24 +37,24 @@ async def _log_stats_hook(self, log_interval: int): while True: await asyncio.sleep(log_interval) global_stats = self.infer_states.compute() - global_stats.reset() + self.infer_states.reset() for k, v in global_stats.items(): global_stats[k] = round(v, 8) logger.info(global_stats) def lifespan(self, app: FastAPI): - global _args - if _args.log_interval > 0: - thread = Thread(target=lambda: asyncio.run(self._log_stats_hook(_args.log_interval))) + args = self.args + if args.log_interval > 0: + thread = Thread(target=lambda: asyncio.run(self._log_stats_hook(args.log_interval))) thread.start() yield async def get_available_models(self): args = self.args model_list = [args.served_model_name or self.model_dir] - if _args.lora_request_list is not None: - model_list += [lora_request.lora_name for lora_request in _args.lora_request_list] - data = [Model(id=model_id, owned_by=_args.owned_by) for model_id in model_list] + if args.lora_request_list is not None: + model_list += [lora_request.lora_name for lora_request in args.lora_request_list] + data = [Model(id=model_id, owned_by=args.owned_by) for model_id in model_list] return ModelList(data=data) async def check_model(self, request: ChatCompletionRequest) -> Optional[str]: @@ -83,10 +83,10 @@ async def create_chat_completion(self, *, return_cmpl_response: bool = False): infer_request, request_config = request.parse() - res_or_gen = await self.infer_async(self.template, infer_request, request_config) + res_or_gen = await self.infer_async(infer_request, request_config, template=self.template) if request_config.stream: - def _gen_wrapper(): + async def _gen_wrapper(): async for res in res_or_gen: res = self._post_process(res, return_cmpl_response) yield f'data: {json.dumps(asdict(res), ensure_ascii=False)}\n\n' diff --git a/swift/llm/infer/infer.py b/swift/llm/infer/infer.py index 3e526e3b7..c6e4bc878 100644 --- a/swift/llm/infer/infer.py +++ b/swift/llm/infer/infer.py @@ -220,7 +220,7 @@ def _prepare_request_config(self) -> RequestConfig: def infer_single(self, infer_request: InferRequest, request_config: RequestConfig) -> Tuple[str, Messages]: messages = infer_request.messages - res_or_gen = self.infer(self.template, [infer_request], request_config, use_tqdm=False) + res_or_gen = self.infer([infer_request], request_config, template=self.template, use_tqdm=False) if request_config.stream: response = '' for res in res_or_gen: @@ -312,7 +312,7 @@ def infer_dataset(self) -> List[Dict[str, Any]]: infer_request = InferRequest(**data) infer_request.remove_response() infer_requests.append(infer_request) - resp_list = self.infer(self.template, infer_requests, request_config, use_tqdm=True) + resp_list = self.infer(infer_requests, request_config, template=self.template, use_tqdm=True) for data, resp in zip(val_dataset, resp_list): response = resp.choices[0].message.content data['messages'].append({'role': 'assistant', 'content': response}) diff --git a/swift/llm/infer/infer_engine/infer_engine.py b/swift/llm/infer/infer_engine/infer_engine.py index ab9f0bc3d..d415d9a53 100644 --- a/swift/llm/infer/infer_engine/infer_engine.py +++ b/swift/llm/infer/infer_engine/infer_engine.py @@ -143,7 +143,7 @@ def infer(self, **kwargs) -> Union[List[ChatCompletionResponse], Iterator[List[Optional[ChatCompletionStreamResponse]]]]: tasks = [self.infer_async(infer_request, request_config, **kwargs) for infer_request in infer_requests] if use_tqdm is None: - use_tqdm = not request_config.stream + use_tqdm = request_config is None or not request_config.stream if request_config.stream: def _gen_wrapper(): diff --git a/swift/llm/infer/infer_engine/pt_engine.py b/swift/llm/infer/infer_engine/pt_engine.py index c2f1e7156..700356bde 100644 --- a/swift/llm/infer/infer_engine/pt_engine.py +++ b/swift/llm/infer/infer_engine/pt_engine.py @@ -149,19 +149,18 @@ async def _infer_stream_async( *, template: Optional[Template] = None, lora_request: Optional[PtLoRARequest] = None) -> AsyncIterator[ChatCompletionStreamResponse]: - if template is None: - template = self.default_template - gen = self.infer(template, [infer_request], request_config, use_tqdm=False, lora_request=lora_request) + gen = self.infer([infer_request], request_config, template=template, use_tqdm=False, lora_request=lora_request) for response in gen: yield response[0] async def _infer_full_async(self, - template: Template, infer_request: InferRequest, request_config: RequestConfig, *, + template: Optional[Template] = None, lora_request: Optional[PtLoRARequest] = None) -> ChatCompletionResponse: - return self.infer(template, [infer_request], request_config, use_tqdm=False, lora_request=lora_request)[0] + return self.infer([infer_request], request_config, template=template, use_tqdm=False, + lora_request=lora_request)[0] @staticmethod def _update_batched_logprobs(batched_logprobs: List[torch.Tensor], logits_streamer: Optional[LogitsStreamer], @@ -347,23 +346,25 @@ async def infer_async( template: Optional[Template] = None, lora_request: Optional[PtLoRARequest] = None, ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionStreamResponse]]: - infer_args = (template, infer_request, request_config) + infer_args = (infer_request, request_config) if request_config.stream: - return self._infer_stream_async(*infer_args, lora_request=lora_request) + return self._infer_stream_async(*infer_args, template=template, lora_request=lora_request) else: - return await self._infer_full_async(*infer_args, lora_request=lora_request) + return await self._infer_full_async(*infer_args, template=template, lora_request=lora_request) def _infer( self, - template: Template, infer_requests: List[InferRequest], request_config: Optional[RequestConfig] = None, metrics: Optional[List[Metric]] = None, *, + template: Optional[Template] = None, lora_request: Optional[PtLoRARequest] = None, ) -> Union[List[ChatCompletionResponse], Iterator[List[Optional[ChatCompletionStreamResponse]]]]: self.model.eval() request_config = deepcopy(request_config or RequestConfig()) + if template is None: + template = self.default_template batched_inputs = [] for infer_request in infer_requests: @@ -399,10 +400,8 @@ def infer( use_tqdm: Optional[bool] = None, lora_request: Optional[PtLoRARequest] = None ) -> Union[List[ChatCompletionResponse], Iterator[List[Optional[ChatCompletionStreamResponse]]]]: - if template is None: - template = self.default_template if use_tqdm is None: - use_tqdm = not request_config.stream + use_tqdm = request_config is None or not request_config.stream prog_bar = tqdm(total=len(infer_requests), dynamic_ncols=True, disable=not use_tqdm) def _infer_full(): @@ -410,7 +409,8 @@ def _infer_full(): i = 0 while i < len(infer_requests): infer_requests_samples = infer_requests[i:i + self.max_batch_size] - res += self._infer(template, infer_requests_samples, request_config, metrics, lora_request=lora_request) + res += self._infer( + infer_requests_samples, request_config, metrics, template=template, lora_request=lora_request) i += self.max_batch_size prog_bar.update(len(infer_requests_samples)) return res @@ -419,7 +419,8 @@ def _infer_stream() -> Iterator[List[Optional[ChatCompletionStreamResponse]]]: i = 0 while i < len(infer_requests): infer_requests_samples = infer_requests[i:i + self.max_batch_size] - gen = self._infer(template, infer_requests_samples, request_config, metrics, lora_request=lora_request) + gen = self._infer( + infer_requests_samples, request_config, metrics, template=template, lora_request=lora_request) for response in gen: res = [None] * len(infer_requests) res[i:i + self.max_batch_size] = response diff --git a/swift/llm/template/constant.py b/swift/llm/template/constant.py index 68e6de02f..aca1d3e06 100644 --- a/swift/llm/template/constant.py +++ b/swift/llm/template/constant.py @@ -4,11 +4,6 @@ class LLMTemplateType: - # base - default_generation = 'default-generation' - chatglm_generation = 'chatglm-generation' - - # chat default = 'default' qwen = 'qwen' qwen2_5 = 'qwen2_5' @@ -29,7 +24,7 @@ class LLMTemplateType: internlm2 = 'internlm2' deepseek = 'deepseek' - deepseek_coder = 'deepseek-coder' + deepseek_coder = 'deepseek_coder' deepseek2 = 'deepseek2' deepseek2_5 = 'deepseek2_5' @@ -38,16 +33,16 @@ class LLMTemplateType: openbuddy2 = 'openbuddy2' minicpm = 'minicpm' telechat = 'telechat' - telechat_v2 = 'telechat-v2' + telechat_v2 = 'telechat_v2' - yi_coder = 'yi-coder' + yi_coder = 'yi_coder' codefuse = 'codefuse' - codefuse_codellama = 'codefuse-codellama' + codefuse_codellama = 'codefuse_codellama' - numina_math = 'numina-math' - mistral_nemo = 'mistral-nemo' + numina_math = 'numina_math' + mistral_nemo = 'mistral_nemo' gemma = 'gemma' - wizardlm2_awq = 'wizardlm2-awq' + wizardlm2_awq = 'wizardlm2_awq' wizardlm2 = 'wizardlm2' atom = 'atom' phi3 = 'phi3' @@ -67,38 +62,29 @@ class LLMTemplateType: class MLLMTemplateType: - # base - qwen_vl_generation = 'qwen_vl_generation' - qwen_audio_generation = 'qwen_audio_generation' - qwen2_vl_generation = 'qwen2_vl_generation' - qwen2_audio_generation = 'qwen2_audio_generation' - - llama3_2_vision_generation = 'llama3_2_vision_generation' - - # chat qwen_vl = 'qwen_vl' qwen_audio = 'qwen_audio' qwen2_vl = 'qwen2_vl' qwen2_audio = 'qwen2_audio' - llama3_1_omni = 'llama3_1-omni' - llama3_2_vision = 'llama3_2-vision' + llama3_1_omni = 'llama3_1_omni' + llama3_2_vision = 'llama3_2_vision' llava1_5 = 'llava1_5' llava_mistral = 'llava_mistral' llava_vicuna = 'llava_vicuna' llava_yi = 'llava_yi' llama3_llava_next_hf = 'llama3_llava_next_hf' - llava_qwen_hf = 'llama-qwen-hf' + llava_qwen_hf = 'llama_qwen_hf' llava_onevision_qwen = 'llava_onevision_qwen' llava_next_video = 'llava_next_video' llava_next_video_yi = 'llava_next_video_yi' - llava_next_llama3 = 'llava-next-llama3' # DaozeZhang + llava_next_llama3 = 'llava_next_llama3' # DaozeZhang llava_llama_instruct = 'llava_llama_instruct' # xtuner - llama3_llava_next = 'llama3-llava-next' # lmms-lab - llava_qwen = 'llava-qwen' # lmms-lab - yi_vl = 'yi-vl' + llama3_llava_next = 'llama3_llava_next' # lmms-lab + llava_qwen = 'llava_qwen' # lmms-lab + yi_vl = 'yi_vl' internvl = 'internvl' internvl_phi3 = 'internvl_phi3' @@ -120,7 +106,7 @@ class MLLMTemplateType: minicpmv2_6 = 'minicpmv2_6' deepseek_vl = 'deepseek_vl' - mplug_owl2 = 'mplug-owl2' + mplug_owl2 = 'mplug_owl2' mplug_owl3 = 'mplug_owl3' got_ocr2 = 'got_ocr2' diff --git a/tests/deploy/test_logprobs.py b/tests/deploy/test_logprobs.py index 2f1041325..01733379f 100644 --- a/tests/deploy/test_logprobs.py +++ b/tests/deploy/test_logprobs.py @@ -19,9 +19,7 @@ def _test_client(print_logprobs: bool = False): infer_request = InferRequest(messages=[{'role': 'user', 'content': '你是谁'}]) request_config = RequestConfig(seed=42, max_tokens=256, temperature=0.8, logprobs=True, top_logprobs=5) - resp = infer_client.infer( - [infer_request], - request_config=request_config)[0] + resp = infer_client.infer([infer_request], request_config=request_config)[0] response = resp.choices[0].message.content print(f'query: {query}') print(f'response: {response}') @@ -30,8 +28,7 @@ def _test_client(print_logprobs: bool = False): request_config = RequestConfig( stream=True, seed=42, max_tokens=256, temperature=0.8, top_k=20, top_p=0.8, logprobs=True, top_logprobs=5) - stream_resp = infer_client.infer( - [infer_request], request_config=request_config) + stream_resp = infer_client.infer([infer_request], request_config=request_config) print(f'query: {query}') print('response: ', end='') for chunk in stream_resp: @@ -47,12 +44,12 @@ def _test(infer_backend): os.environ['CUDA_VISIBLE_DEVICES'] = '0' from swift.llm import DeployArguments - from swift.llm.infer.deploy import llm_deploy + from swift.llm import deploy_main import multiprocessing mp = multiprocessing.get_context('spawn') process = mp.Process( - target=llm_deploy, - args=(DeployArguments(model_type='qwen2-7b-instruct', infer_backend=infer_backend, verbose=False), )) + target=deploy_main, + args=(DeployArguments(model='qwen/Qwen2-7B-Instruct', infer_backend=infer_backend, verbose=False), )) process.start() _test_client(True) process.terminate() @@ -83,7 +80,7 @@ def test_vllm_orgin(): if __name__ == '__main__': - test_vllm_orgin() - # test_vllm() + # test_vllm_orgin() + test_vllm() # test_lmdeploy() # test_pt()