Skip to content

Commit

Permalink
fixed llama_generate, _llama_yield_token_func, _llama_should_stop_fun…
Browse files Browse the repository at this point in the history
…c, _llama_cli_main
  • Loading branch information
mtasic85 committed Jul 13, 2024
1 parent 72506f2 commit e3b9392
Show file tree
Hide file tree
Showing 12 changed files with 510 additions and 287 deletions.
62 changes: 13 additions & 49 deletions examples/demo_cffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,61 +8,19 @@
from demo_models import models


def demo1():
options = Options(
no_display_prompt=True,
threads=psutil.cpu_count(logical=False),
ctx_size=8192,
predict=512,
flash_attn=True,
cont_batching=True,
simple_io=True,
log_disable=True,
hf_repo=models[0].hf_repo,
hf_file=models[0].hf_file,
prompt='<|system|>\nYou are a helpful assistant.<|end|><|user|>\nEvaluate 1 + 2.<|end|>\n<|assistant|>\n',
)

for chunk in llama_generate(options):
print(chunk, flush=True, end='')

print()


def demo2():
options = Options(
no_display_prompt=True,
threads=psutil.cpu_count(logical=False),
ctx_size=2048,
predict=-2,
flash_attn=True,
cont_batching=True,
simple_io=True,
log_disable=True,
hf_repo=models[1].hf_repo,
hf_file=models[1].hf_file,
prompt='<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nEvaluate 1 + 2.<|im_end|>\n<|im_start|>assistant\n',
)

for chunk in llama_generate(options):
print(chunk, flush=True, end='')

print()


def demo3():
def demo_model(model: Model, messages: list[dict]):
options = Options(
no_display_prompt=True,
threads=psutil.cpu_count(logical=False),
ctx_size=2048,
predict=-2,
# batch_size=512, # 2048,
flash_attn=True,
cont_batching=True,
simple_io=True,
log_disable=True,
hf_repo=models[2].hf_repo,
hf_file=models[2].hf_file,
prompt='<|system|>\nYou are a helpful assistant.<|end|><|user|>\nEvaluate 1 + 2.<|end|>\n<|assistant|>\n',
model=model,
prompt=messages,
)

for chunk in llama_generate(options):
Expand All @@ -72,6 +30,12 @@ def demo3():


if __name__ == '__main__':
demo1()
demo2()
demo3()
messages = [
{'role': 'system', 'content': 'You are a helpful assistant.'},
{'role': 'user', 'content': 'Evaluate 1 + 2 in Python.'},
]

for model in models:
print(f'{model = }')
demo_model(model, messages)
print('-' * 80)
68 changes: 16 additions & 52 deletions examples/demo_ctypes.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,26 @@
import os
import sys
sys.path.append(os.path.abspath('.'))
# import os
# import sys
# sys.path.append(os.path.abspath('.'))

import psutil
from llama.llama_cli_ctypes import llama_generate, Model, Options

from demo_models import models


def demo1():
options = Options(
no_display_prompt=True,
threads=psutil.cpu_count(logical=False),
ctx_size=8192,
predict=512,
flash_attn=True,
cont_batching=True,
simple_io=True,
log_disable=True,
hf_repo=models[0].hf_repo,
hf_file=models[0].hf_file,
prompt='<|system|>\nYou are a helpful assistant.<|end|><|user|>\nEvaluate 1 + 2.<|end|>\n<|assistant|>\n',
)

for chunk in llama_generate(options):
print(chunk, flush=True, end='')

print()


def demo2():
options = Options(
no_display_prompt=True,
threads=psutil.cpu_count(logical=False),
ctx_size=2048,
predict=-2,
flash_attn=True,
cont_batching=True,
simple_io=True,
log_disable=True,
hf_repo=models[1].hf_repo,
hf_file=models[1].hf_file,
prompt='<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nEvaluate 1 + 2.<|im_end|>\n<|im_start|>assistant\n',
)

for chunk in llama_generate(options):
print(chunk, flush=True, end='')

print()


def demo3():
def demo_model(model: Model, messages: list[dict]):
options = Options(
no_display_prompt=True,
threads=psutil.cpu_count(logical=False),
ctx_size=2048,
predict=-2,
# batch_size=512, # 2048,
flash_attn=True,
cont_batching=True,
simple_io=True,
log_disable=True,
hf_repo=models[2].hf_repo,
hf_file=models[2].hf_file,
prompt='<|system|>\nYou are a helpful assistant.<|end|><|user|>\nEvaluate 1 + 2.<|end|>\n<|assistant|>\n',
model=model,
prompt=messages,
)

for chunk in llama_generate(options):
Expand All @@ -72,6 +30,12 @@ def demo3():


if __name__ == '__main__':
demo1()
demo2()
demo3()
messages = [
{'role': 'system', 'content': 'You are a helpful assistant.'},
{'role': 'user', 'content': 'Evaluate 1 + 2.'},
]

for model in models:
print(f'{model = }')
demo_model(model, messages)
print('-' * 80)
33 changes: 32 additions & 1 deletion examples/demo_models.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,42 @@
from llama.model import Model

models = [
Model(
'01-ai/Yi-1.5-9B-Chat-16K',
'mradermacher/Yi-1.5-9B-Chat-16K-i1-GGUF',
# 'Yi-1.5-9B-Chat-16K.i1-IQ2_M.gguf',
# 'Yi-1.5-9B-Chat-16K.i1-IQ3_M.gguf',
'Yi-1.5-9B-Chat-16K.i1-IQ4_XS.gguf',
# 'Yi-1.5-9B-Chat-16K.i1-Q4_K_M.gguf',
),
Model(
'mistralai/Mistral-7B-Instruct-v0.3',
'bartowski/Mistral-7B-Instruct-v0.3-GGUF',
# 'Mistral-7B-Instruct-v0.3-IQ2_M.gguf',
# 'Mistral-7B-Instruct-v0.3-IQ3_M.gguf',
'Mistral-7B-Instruct-v0.3-IQ4_XS.gguf',
# 'Mistral-7B-Instruct-v0.3-Q4_K_M.gguf',
),
Model(
'microsoft/Phi-3-mini-128k-instruct',
'bartowski/Phi-3.1-mini-128k-instruct-GGUF',
'Phi-3.1-mini-128k-instruct-Q4_K_M.gguf',
),
Model(
'microsoft/phi-2',
'andrijdavid/phi-2-GGUF',
'ggml-model-Q4_K_M.gguf',
),
Model(
'IndexTeam/Index-1.9B-Chat',
'IndexTeam/Index-1.9B-Chat-GGUF',
'ggml-model-Q4_K_M.gguf',
),
Model(
'internlm/internlm2-chat-1_8b',
'QuantFactory/internlm2-chat-1_8b-GGUF',
'internlm2-chat-1_8b.Q4_K_M.gguf',
),
Model(
'Qwen/Qwen2-1.5B-Instruct',
'Qwen/Qwen2-1.5B-Instruct-GGUF',
Expand All @@ -16,4 +47,4 @@
'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF',
'tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf',
),
]
]
Loading

0 comments on commit e3b9392

Please sign in to comment.