Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a "llamacpp_HF creator" menu #5519

Merged
merged 2 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions download-model.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,8 @@ def get_download_links_from_huggingface(self, model, branch, text_only=False, sp
is_llamacpp = has_gguf and specific_file is not None
return links, sha256, is_lora, is_llamacpp

def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, base_folder=None):
if base_folder is None:
base_folder = 'models' if not is_lora else 'loras'
def get_output_folder(self, model, branch, is_lora, is_llamacpp=False):
base_folder = 'models' if not is_lora else 'loras'

# If the model is of type GGUF, save directly in the base_folder
if is_llamacpp:
Expand Down Expand Up @@ -303,7 +302,10 @@ def check_model_files(self, model, branch, links, sha256, output_folder):
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file)

# Get the output folder
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=args.output)
if args.output:
output_folder = Path(args.output)
else:
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp)

if args.check:
# Check previously downloaded files
Expand Down
64 changes: 53 additions & 11 deletions modules/ui_model_menu.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,17 +143,27 @@ def create_ui():
shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.')
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.")
shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to place your GGUF in a subfolder of models/ with the necessary tokenizer files.\n\nYou can use the \"llamacpp_HF creator\" menu to do that automatically.")

with gr.Column():
with gr.Row():
shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.', interactive=not mu)

shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
with gr.Row():
shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
with gr.Tab("Download"):
shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
with gr.Row():
shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)

with gr.Tab("llamacpp_HF creator"):
with gr.Row():
shared.gradio['gguf_menu'] = gr.Dropdown(choices=utils.get_available_ggufs(), value=lambda: shared.model_name, label='Choose your GGUF', elem_classes='slim-dropdown', interactive=not mu)
ui.create_refresh_button(shared.gradio['gguf_menu'], lambda: None, lambda: {'choices': utils.get_available_ggufs()}, 'refresh-button', interactive=not mu)

shared.gradio['unquantized_url'] = gr.Textbox(label="Enter the URL for the original (unquantized) model", info="Example: https://huggingface.co/lmsys/vicuna-13b-v1.5", max_lines=1)
shared.gradio['create_llamacpp_hf_button'] = gr.Button("Submit", variant="primary", interactive=not mu)
gr.Markdown("This will move your gguf file into a subfolder of `models` along with the necessary tokenizer files.")

with gr.Row():
shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
Expand Down Expand Up @@ -203,6 +213,7 @@ def create_event_handlers():
shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model'))
shared.gradio['create_llamacpp_hf_button'].click(create_llamacpp_hf, gradio('gguf_menu', 'unquantized_url'), gradio('model_status'), show_progress=True)


def load_model_wrapper(selected_model, loader, autoload=False):
Expand Down Expand Up @@ -244,27 +255,58 @@ def load_lora_wrapper(selected_loras):

def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
try:
progress(0.0)
downloader = importlib.import_module("download-model").ModelDownloader()

progress(0.0)
model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)

yield ("Getting the download links from Hugging Face")
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
if return_links:
yield '\n\n'.join([f"`{Path(link).name}`" for link in links])
output = "```\n"
for link in links:
output += f"{Path(link).name}" + "\n"

output += "```"
yield output
return

yield ("Getting the output folder")
base_folder = shared.args.lora_dir if is_lora else shared.args.model_dir
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=base_folder)
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp)
if check:
progress(0.5)

yield ("Checking previously downloaded files")
downloader.check_model_files(model, branch, links, sha256, output_folder)
progress(1.0)
else:
yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
yield ("Done!")

yield (f"Model successfully saved to `{output_folder}/`.")
except:
progress(1.0)
yield traceback.format_exc().replace('\n', '\n\n')


def create_llamacpp_hf(gguf_name, unquantized_url, progress=gr.Progress()):
try:
downloader = importlib.import_module("download-model").ModelDownloader()

progress(0.0)
model, branch = downloader.sanitize_model_and_branch_names(unquantized_url, None)

yield ("Getting the tokenizer files links from Hugging Face")
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=True)
output_folder = Path(shared.args.model_dir) / (re.sub(r'(?i)\.gguf$', '', gguf_name) + "-HF")

yield (f"Downloading tokenizer to `{output_folder}`")
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=False)

# Move the GGUF
(Path(shared.args.model_dir) / gguf_name).rename(output_folder / gguf_name)

yield (f"Model saved to `{output_folder}/`.\n\nYou can now load it using llamacpp_HF.")
except:
progress(1.0)
yield traceback.format_exc().replace('\n', '\n\n')
Expand Down
11 changes: 10 additions & 1 deletion modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,16 @@ def get_available_models():
model_list = []
for item in list(Path(f'{shared.args.model_dir}/').glob('*')):
if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml', '.py')) and 'llama-tokenizer' not in item.name:
model_list.append(re.sub('.pth$', '', item.name))
model_list.append(item.name)

return ['None'] + sorted(model_list, key=natural_keys)


def get_available_ggufs():
model_list = []
for item in Path(f'{shared.args.model_dir}/').glob('*'):
if item.is_file() and item.name.lower().endswith(".gguf"):
model_list.append(item.name)

return ['None'] + sorted(model_list, key=natural_keys)

Expand Down