-
Notifications
You must be signed in to change notification settings - Fork 1
/
concurrency.js
70 lines (68 loc) · 2.11 KB
/
concurrency.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import { startHTTPServer } from '#package/http.js'
import OpenAI from 'openai'
import readline from 'node:readline'
// Printing two parallel completion processes to the console.
const httpServer = await startHTTPServer({
listen: { port: 3000 },
concurrency: 2, // two clients may process chat completions at the same time.
models: {
'my-model': {
task: 'text-completion',
engine: 'node-llama-cpp',
url: 'https://huggingface.co/HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF/blob/main/smollm-135m-instruct-add-basics-q8_0.gguf',
sha256: 'a98d3857b95b96c156d954780d28f39dcb35b642e72892ee08ddff70719e6220',
minInstances: 1, // one instance / session will always be ready
maxInstances: 2, // up to two may be spawned
device: { gpu: false, cpuThreads: 4 }, // configure so they're roughly the same speed
},
},
})
const openai = new OpenAI({
baseURL: 'http://localhost:3000/openai/v1/',
apiKey: 'yes',
})
let sentence1 = 'Sometimes I feel like'
let sentence2 = 'The locality of'
const clearLine = () => {
readline.cursorTo(process.stdout, 0)
readline.clearLine(process.stdout, 0)
}
const updateOutputs = () => {
const truncateLine = (line) => {
return line.length > process.stdout.columns
? '...' + line.slice(line.length - process.stdout.columns + 3)
: line
}
readline.moveCursor(process.stdout, 0, -2)
clearLine()
process.stdout.write(truncateLine(sentence1) + '\n')
clearLine()
process.stdout.write(truncateLine(sentence2) + '\n')
}
const completeSentence = async (prompt, onTokens) => {
const completion = await openai.completions.create({
stream_options: { include_usage: true },
model: 'my-model',
stream: true,
temperature: 1,
stop: ['.'],
prompt,
})
for await (const chunk of completion) {
if (chunk.choices[0].text) {
onTokens(chunk.choices[0].text.replaceAll('\n', '\\n'))
}
}
onTokens('.')
}
setInterval(updateOutputs, 200)
console.log(sentence1)
console.log(sentence2)
while (true) {
await Promise.all([
completeSentence(sentence1, (text) => (sentence1 += text)),
completeSentence(sentence2, (text) => (sentence2 += text)),
])
}
httpServer.close()
clearInterval(updateOutputs)