From d827e7da36ab24accbab799e7dc70e65a9a83dc2 Mon Sep 17 00:00:00 2001 From: "Jonathan C. McKinney" Date: Mon, 20 May 2024 09:00:27 -0700 Subject: [PATCH] Fixes Issue #1602 --- docs/README_DOCKER.md | 2 +- docs/run_patches.sh | 2 ++ docs/trans.patch | 16 ++++++++++++++++ requirements.txt | 4 ++-- src/gen.py | 8 ++++---- src/version.py | 2 +- 6 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 docs/trans.patch diff --git a/docs/README_DOCKER.md b/docs/README_DOCKER.md index ea4a02639..e79e4bcfd 100644 --- a/docs/README_DOCKER.md +++ b/docs/README_DOCKER.md @@ -161,7 +161,7 @@ docker run --gpus all \ --max_new_tokens=1024 \ --visible_visible_models=False \ --openai_port=$OPENAI_SERVER_PORT \ - --gradio_offline_level=2 --gradio_offline_level=2 + --gradio_offline_level=2 ``` Depending upon if use links, may require more specific mappings to direct location not linked location that cannot be used, e.g. ```bash diff --git a/docs/run_patches.sh b/docs/run_patches.sh index 5ef2dc6e7..4940781bd 100755 --- a/docs/run_patches.sh +++ b/docs/run_patches.sh @@ -28,3 +28,5 @@ sed -i "s/except OSError:/except (OSError, RuntimeError):/g" $sp/anyio/_backends # https://github.com/gradio-app/gradio/issues/7086 sed -i 's/while True:/while True:\n time.sleep(0.001)\n/g' $sp/gradio_client/client.py + +patch $sp/transformers/modeling_utils.py docs/trans.patch diff --git a/docs/trans.patch b/docs/trans.patch new file mode 100644 index 000000000..c8bb8eced --- /dev/null +++ b/docs/trans.patch @@ -0,0 +1,16 @@ +--- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/transformers/modeling_utils.py 2024-05-20 08:47:52.580255528 -0700 ++++ new.py 2024-05-20 08:54:05.751687071 -0700 +@@ -3412,7 +3412,12 @@ + "_commit_hash": commit_hash, + **has_file_kwargs, + } +- if not has_file(pretrained_model_name_or_path, safe_weights_name, **has_file_kwargs): ++ import requests ++ try: ++ has_file_res = has_file(pretrained_model_name_or_path, safe_weights_name, **has_file_kwargs) ++ except requests.exceptions.ConnectionError: ++ has_file_res = False ++ if not has_file_res: + Thread( + target=auto_conversion, + args=(pretrained_model_name_or_path,), diff --git a/requirements.txt b/requirements.txt index 160e7532c..6f029b011 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,9 +30,9 @@ matplotlib>=3.7.1 # transformers loralib>=0.1.2 bitsandbytes>=0.43.1 -accelerate>=0.29.3 +accelerate>=0.30.1 peft>=0.7.0 -transformers>=4.40.0 +transformers>=4.41.0 tokenizers>=0.19.0 hf_transfer>=0.1.6 optimum>=1.17.1 diff --git a/src/gen.py b/src/gen.py index 149faf0ed..afdb39454 100644 --- a/src/gen.py +++ b/src/gen.py @@ -2555,7 +2555,7 @@ def get_non_lora_model(base_model, model_loader, load_half, elif load_awq: allowed_dict = dict(max_new_tokens=None, trust_remote_code=True, fuse_layers=True, - batch_size=1, safetensors=False, + batch_size=1, use_safetensors=False, max_memory=None, offload_folder=None) for k in model_kwargs.copy(): if k not in allowed_dict: @@ -2566,7 +2566,7 @@ def get_non_lora_model(base_model, model_loader, load_half, args = tuple([base_model]) model = model_loader( *args, - safetensors=use_safetensors, + use_safetensors=use_safetensors, **model_kwargs, ) elif load_in_8bit or load_in_4bit or not load_half: @@ -3545,7 +3545,7 @@ def get_hf_model(load_8bit: bool = False, elif load_awq: allowed_dict = dict(max_new_tokens=None, trust_remote_code=True, fuse_layers=True, - batch_size=1, safetensors=False, + batch_size=1, use_safetensors=False, max_memory=None, offload_folder=None) for k in model_kwargs.copy(): if k not in allowed_dict: @@ -3556,7 +3556,7 @@ def get_hf_model(load_8bit: bool = False, args = tuple([base_model]) model = model_loader( *args, - safetensors=use_safetensors, + use_safetensors=use_safetensors, **model_kwargs, ) else: diff --git a/src/version.py b/src/version.py index 3ab5cbd55..a842444c3 100644 --- a/src/version.py +++ b/src/version.py @@ -1 +1 @@ -__version__ = "6396404e287099b72bbc38380eeada156d10eba7" +__version__ = "1d267653151ebf48223da9f96f4e2aed2f22e3b8"