-
Notifications
You must be signed in to change notification settings - Fork 1
/
express.js
70 lines (61 loc) · 1.95 KB
/
express.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import http from 'node:http'
import express from 'express'
import OpenAI from 'openai'
import { ModelServer } from '#package/server.js'
import { createExpressMiddleware } from '#package/http.js'
// Demonstration of using the ModelServer + Express middleware to serve an OpenAI API.
// Create a server with a single model, limiting to 2 instances that can run concurrently.
// Models will be downloaded on-demand or during ModelServer.start() if minInstances > 0.
const modelServer = new ModelServer({
concurrency: 2,
models: {
'my-model': {
task: 'text-completion',
url: 'https://huggingface.co/HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF/blob/main/smollm-135m-instruct-add-basics-q8_0.gguf',
sha256: 'a98d3857b95b96c156d954780d28f39dcb35b642e72892ee08ddff70719e6220',
engine: 'node-llama-cpp',
maxInstances: 2,
},
},
})
await modelServer.start()
const app = express()
app.use(express.json(), createExpressMiddleware(modelServer))
const server = http.createServer(app)
server.listen(3001)
console.log('Server up, sending chat completion request...')
const openai = new OpenAI({
baseURL: 'http://localhost:3001/openai/v1/',
apiKey: '123',
})
const completion = await openai.chat.completions.create({
model: 'my-model',
messages: [{ role: 'user', content: 'Lets count to three!' }],
stop: ['Two'],
})
console.log(JSON.stringify(completion, null, 2))
/*
{
"id": "my-model:pU2BHWUv-kHdAeVn8",
"model": "my-model",
"object": "chat.completion",
"created": 1714431837,
"system_fingerprint": "0159c68a067a360e4be3e285d3e309440c070734",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Sure, let's count together: 1 (one), 2 (two), and 3 (three). If you have any other questions or need further assistance, feel free to ask!"
},
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 6,
"completion_tokens": 41,
"total_tokens": 47
}
}
*/