diff --git a/xinference/deploy/cmdline.py b/xinference/deploy/cmdline.py index ab59884e02..9a5f24d276 100644 --- a/xinference/deploy/cmdline.py +++ b/xinference/deploy/cmdline.py @@ -385,7 +385,7 @@ def list_model_registrations( ) @click.option( "--trust-remote-code", - default=False, + default=True, type=bool, help="Whether or not to allow for custom models defined on the Hub in their own modeling files.", ) diff --git a/xinference/model/llm/pytorch/baichuan.py b/xinference/model/llm/pytorch/baichuan.py index f0e5672d4e..a08234216d 100644 --- a/xinference/model/llm/pytorch/baichuan.py +++ b/xinference/model/llm/pytorch/baichuan.py @@ -54,12 +54,11 @@ def _load_model(self, kwargs: dict): tokenizer = AutoTokenizer.from_pretrained( self.model_path, use_fast=self._use_fast_tokenizer, - trust_remote_code=True, + trust_remote_code=kwargs["trust_remote_code"], revision=kwargs["revision"], ) model = AutoModelForCausalLM.from_pretrained( self.model_path, - trust_remote_code=True, **kwargs, ) model.generation_config = GenerationConfig.from_pretrained(self.model_path) diff --git a/xinference/model/llm/pytorch/chatglm.py b/xinference/model/llm/pytorch/chatglm.py index 1754666fa7..b78407e9d7 100644 --- a/xinference/model/llm/pytorch/chatglm.py +++ b/xinference/model/llm/pytorch/chatglm.py @@ -51,12 +51,11 @@ def _load_model(self, kwargs: dict): tokenizer = AutoTokenizer.from_pretrained( self.model_path, - trust_remote_code=True, + trust_remote_code=kwargs["trust_remote_code"], revision=kwargs["revision"], ) model = AutoModel.from_pretrained( self.model_path, - trust_remote_code=True, **kwargs, ) return model, tokenizer diff --git a/xinference/model/llm/pytorch/core.py b/xinference/model/llm/pytorch/core.py index 4b997b6302..99af2afb66 100644 --- a/xinference/model/llm/pytorch/core.py +++ b/xinference/model/llm/pytorch/core.py @@ -87,7 +87,7 @@ def _sanitize_model_config( pytorch_model_config.setdefault("gptq_groupsize", -1) pytorch_model_config.setdefault("gptq_act_order", False) pytorch_model_config.setdefault("device", "auto") - pytorch_model_config.setdefault("trust_remote_code", False) + pytorch_model_config.setdefault("trust_remote_code", True) return pytorch_model_config def _sanitize_generate_config( diff --git a/xinference/model/llm/pytorch/falcon.py b/xinference/model/llm/pytorch/falcon.py index d245476a32..773fea83d7 100644 --- a/xinference/model/llm/pytorch/falcon.py +++ b/xinference/model/llm/pytorch/falcon.py @@ -51,12 +51,12 @@ def _load_model(self, kwargs: dict): tokenizer = AutoTokenizer.from_pretrained( self.model_path, + trust_remote_code=kwargs["trust_remote_code"], revision=kwargs["revision"], ) model = AutoModelForCausalLM.from_pretrained( self.model_path, low_cpu_mem_usage=True, - trust_remote_code=True, **kwargs, ) tokenizer.pad_token_id = 9 diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py index 70d98e508b..772e043f2b 100644 --- a/xinference/model/llm/vllm/core.py +++ b/xinference/model/llm/vllm/core.py @@ -116,7 +116,7 @@ def _sanitize_model_config( cuda_count = self._get_cuda_count() model_config.setdefault("tokenizer_mode", "auto") - model_config.setdefault("trust_remote_code", False) + model_config.setdefault("trust_remote_code", True) model_config.setdefault("tensor_parallel_size", cuda_count) model_config.setdefault("block_size", 16) model_config.setdefault("swap_space", 4)