-
Notifications
You must be signed in to change notification settings - Fork 0
/
llamacppasync.py
288 lines (236 loc) · 9.83 KB
/
llamacppasync.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
import asyncio
import aiohttp
import aiofiles
from tqdm.asyncio import tqdm
import os
import json
import sys
from pathlib import Path
from math import log
import heapq
# Configuration and Constants
HUNKSIZE = 15984
BATCHSIZE = 8
# used model interaction size should be related to
# the above with the following eqn:
# CTXSIZE = BATCHSIZE*(HUNKSIZE/4+400/4),
# or alternatively HUNKSIZE = 4*CTXSIZE/BATCHSIZE-400
# (BATCHSIZE = 8, CTXSIZE = 32768 (max), HUNKSIZE = 15984
# with HelloBiblev0.2 works well on my RTX 3090 with 24GB VRAM)
testing_key = 'Password12344321'
AUTH = os.getenv("OPENAI_AI_KEY", testing_key)
testing_api = "http://127.0.0.1:8080"
api = os.getenv("OPENAI_API_ENDPOINT", testing_api)
route = "completion"
URL = f"{api}/{route}"
yes_token = "yes"
no_token = "no"
tokroute = 'tokenize'
TOKURL = f"{api}/{tokroute}"
# File paths
file_path = './Bible-kjv/Books.json'
# Headers for the request
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {AUTH}"
}
def get_data(question, hunk):
return f"""[INST]You're a Christian theology assistant, as far as possible, always refer to the stories in the Bible.
Determine whether the Bible text is applicable for QUERY:[/INST]
[TEXT]
{hunk}
[/TEXT]
(Your Answer Must be 'yes' or 'no' without quotes)
[QUERY]
{question}
[/QUERY]
Answer:"""
async def get_tok(session, tok):
data = {"content": tok}
async with session.post(TOKURL, headers=headers, json=data, ssl=False) as response:
return (await response.json()).get("tokens", [-1])[-1]
async def load_books():
async with aiofiles.open(file_path, 'r') as file:
return json.loads(await file.read())
def get_verses(verses_object):
""" Generator to yield verse number and text from a chapter. """
for verse in verses_object:
vers = verse.get("verse", "???")
text = verse.get("text", "Verse text missing!?")
yield int(vers), text
def get_chapters(book_object):
""" Generator to yield chapter number and verses from a book. """
for chapter_object in book_object.get("chapters", []):
chapt = chapter_object.get("chapter", "???")
verses_object = chapter_object.get("verses", [])
yield int(chapt), get_verses(verses_object)
async def get_books(books=None, path="Bible-kjv"):
""" Generator to yield book name and its chapters. """
if not books:
books = ALL_BOOKS
for book in books:
file_path = Path(path).joinpath(f"{book.replace(' ', '')}.json")
try:
async with aiofiles.open(file_path, 'r') as file:
book_object = json.loads(await file.read())
except FileNotFoundError:
tqdm.write(f"Error: The file {file_path} does not exist.")
continue
except json.JSONDecodeError:
tqdm.write(
f"Error: The file {file_path} is not a valid JSON file.")
continue
yield book, get_chapters(book_object)
def get_score(value):
""" Convert raw score to a human-readable score. """
return f"{int(1000-round(1000*log(1001-1000*value) / log(1001)))}/1000"
async def generate_tasks(queue, book_filter):
book_count = len(book_filter if book_filter else ALL_BOOKS)
async for book, book_contents in tqdm(get_books(book_filter), desc="Books: ", total=book_count, leave=True):
hunk = ""
hunk_start_chapter = 1
hunk_start_verse = 1
async for chapter, chapter_contents in tqdm(list(book_contents), desc="Chapters: ", leave=False):
async for verse, verse_text in tqdm(list(chapter_contents), desc="Verses: ", leave=False):
hunk += verse_text + '\n'
if len(hunk) > HUNKSIZE:
await queue.put((hunk, book, hunk_start_chapter, hunk_start_verse, chapter, verse))
hunk = ""
hunk_start_chapter = chapter
hunk_start_verse = verse + 1
if hunk:
await queue.put((hunk, book, hunk_start_chapter, hunk_start_verse, chapter, verse))
tqdm.write('final tasks will finish shortly')
await queue.put(None) # Signal the end of the queue
async def process(queue, session, question, yes_token_id, no_token_id, topn=25):
""" Process items from the queue and send requests to the API. """
results = []
def append_if_good(results, elem, topn=topn):
return heapq.nlargest(topn, results + [elem], key=lambda x: x['score'])
while True:
item = await queue.get()
if item is None:
break
hunk, book, chapter_start, verse_start, chapter_end, verse_end = item
data = {
"prompt": get_data(question, hunk),
"temperature": -1,
"n_predict": 1,
"logit_bias": [[i, False] for i in range(256 * 256) if i not in [yes_token_id, no_token_id]],
"n_probs": 2,
"add_bos_token": True,
"samplers": []
}
async with session.post(URL, headers=headers, json=data, ssl=False) as response:
response_json = await response.json()
if not isinstance(response_json, dict) or 'err' in response_json or 'error' in response_json:
tqdm.write(str(response_json))
continue
resp_completions = response_json.get("completion_probabilities", [{}])[
0].get("probs", None)
if not resp_completions:
tqdm.write("ERR: no completions")
continue
yes_raw = 0
no_raw = 0
for tok in resp_completions:
if not isinstance(tok, dict):
break
if tok.get("tok_str", "").strip() == yes_token.strip():
yes_raw = tok.get('prob', 0)
elif tok.get("tok_str", "").strip() == no_token.strip():
no_raw = tok.get('prob', 0)
if yes_raw is None:
yes_raw = 0
if no_raw is None:
no_raw = 0
if (yes_raw + no_raw) == 0:
score = 0
else:
score = yes_raw / (yes_raw + no_raw)
contents_string = f"{book} {chapter_start}:{verse_start}"
if chapter_start != chapter_end:
contents_string += f"-{chapter_end}:{verse_end}"
elif verse_start != verse_end:
contents_string += f"-{verse_end}"
results = append_if_good(
results,
{
"score": score,
"question": question,
"book": book,
"ref": contents_string,
"verse": None if chapter_end != chapter_start or verse_end != verse_start else hunk,
"chapter_start": chapter_start,
"verse_start": verse_start,
"chapter_end": chapter_end,
"verse_end": verse_end
}
)
return results
async def get_tasks_for_selection(queue, selection):
async for book, book_contents in get_books([selection['book']]):
async for chapter, chapter_contents in tqdm(list(book_contents), desc="Chapters: ", leave=False):
if chapter < selection['chapter_start']:
continue
elif chapter > selection['chapter_end']:
break
async for verse, verse_text in tqdm(list(chapter_contents), desc="Verses: ", leave=False):
if chapter == selection['chapter_start'] and verse < selection['verse_start']:
continue
elif chapter == selection['chapter_end'] and verse > selection['verse_end']:
break
await queue.put((verse_text, book, chapter, verse, chapter, verse))
await queue.put(None)
async def main():
queue = asyncio.Queue(BATCHSIZE)
book_filter = None
if len(sys.argv) > 1:
book_filter = sys.argv[1:]
yes_token_id = None
no_token_id = None
while True:
question = input(
'Search Query (e.g. question or biblical statement): ')
base_len = len(get_data(question, ""))
if base_len > 400:
if input(f'{base_len-400} chars over limit! continue anyways? [Y/n]')[0].lower() == 'n':
continue
async with aiohttp.ClientSession() as session:
if yes_token_id is None:
yes_token_id = await get_tok(session, yes_token)
if no_token_id is None:
no_token_id = await get_tok(session, no_token)
producer = generate_tasks(queue, book_filter)
consumer = process(queue, session, question,
yes_token_id, no_token_id, 5)
scores = (await asyncio.gather(*[producer, consumer]))[1]
print(f'Scores accumulated. Best {len(scores)} hunks to follow')
# already sorted by heapq
print(
'\n'.join([f"{get_score(obj['score'])}: {obj['ref']}" for obj in scores]))
for selection in scores:
print(f"Selecting hunk: {selection['ref']}")
nv = 3
producer = get_tasks_for_selection(queue, selection)
consumer = process(queue, session, question,
yes_token_id, no_token_id, nv)
scores = (await asyncio.gather(*[producer, consumer]))[1]
print(
f"Best {len(scores)} verses from hunk in {selection['book']}:")
for obj in scores:
text = obj['verse']
score = get_score(obj['score'])
ref = obj['ref']
print(f' Score: {score}, Reference: {ref};')
print(' ' + ' '.join(text.split('\n')))
try:
ALL_BOOKS = asyncio.run(load_books())
except FileNotFoundError:
print(f"Error: The file {file_path} does not exist.")
sys.exit(1)
except json.JSONDecodeError:
print(f"Error: The file {file_path} is not a valid JSON file.")
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())