-
Notifications
You must be signed in to change notification settings - Fork 31
/
llm-vertex.el
322 lines (279 loc) · 14.9 KB
/
llm-vertex.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
;;; llm-vertex.el --- LLM implementation of Google Cloud Vertex AI -*- lexical-binding: t; package-lint-main-file: "llm.el"; -*-
;; Copyright (c) 2023, 2024 Free Software Foundation, Inc.
;; Author: Andrew Hyatt <[email protected]>
;; Homepage: https://github.com/ahyatt/llm
;; SPDX-License-Identifier: GPL-3.0-or-later
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 3 of the
;; License, or (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; This file implements the llm functionality defined in llm.el, for Google
;; Cloud Vertex AI.
;;; Code:
(require 'cl-lib)
(require 'llm)
(require 'llm-request-plz)
(require 'llm-provider-utils)
(require 'json)
(defgroup llm-vertex nil
"LLM implementation for Google Cloud Vertex AI."
:group 'llm)
(defcustom llm-vertex-gcloud-binary "gcloud"
"The executable to use for the gcloud binary.
If the binary is not in the PATH, the full path must be specified."
:type 'file
:group 'llm-vertex)
(defcustom llm-vertex-gcloud-region "us-central1"
"The gcloud region to use to connect to Vertex AI."
:type 'string
:group 'llm-vertex)
(defcustom llm-vertex-example-prelude "Examples of how you should respond follow."
"The prelude to use for examples in Vertex chat prompts.
This is only used for streaming calls."
:type 'string
:group 'llm-vertex)
(defcustom llm-vertex-default-max-output-tokens 500
"The default maximum number of tokens to ask for.
This is only used when setting the maximum tokens is required,
and there is no default. The maximum value possible here is 2049."
:type 'integer
:group 'llm-vertex)
(defcustom llm-vertex-default-chat-model "gemini-1.5-pro"
"The default model to ask for.
This should almost certainly be a chat model, other models are
for more specialized uses."
:type 'string
:group 'llm-vertex)
(cl-defstruct (llm-google (:include llm-standard-full-provider))
"A base class for functionality that is common to both Vertex and
Gemini.")
(cl-defstruct (llm-vertex (:include llm-google))
"A struct representing a Vertex AI client.
KEY is the temporary API key for the Vertex AI. It is required to
be populated before any call.
CHAT-MODEL is the name of the chat model to use. If unset, will
use a reasonable default.
EMBEDDING-MODEL is the name of the embedding model to use. If
unset, will use a reasonable default.
KEY-GENTIME keeps track of when the key was generated, because
the key must be regenerated every hour."
key
project
embedding-model
(chat-model llm-vertex-default-chat-model)
key-gentime)
;; API reference: https://cloud.google.com/vertex-ai/docs/generative-ai/multimodal/send-chat-prompts-gemini#gemini-chat-samples-drest
(cl-defmethod llm-provider-request-prelude ((provider llm-vertex))
"Refresh the key in the vertex PROVIDER, if needed."
(unless (and (llm-vertex-key provider)
(> (* 60 60)
(float-time (time-subtract (current-time) (or (llm-vertex-key-gentime provider) 0)))))
(let ((result (string-trim (shell-command-to-string (concat llm-vertex-gcloud-binary " auth print-access-token")))))
(when (string-match-p "ERROR" result)
(error "Could not refresh gcloud access token, received the following error: %s" result))
;; We need to make this unibyte, or else it doesn't causes problems when
;; the user is using multibyte strings.
(setf (llm-vertex-key provider) (encode-coding-string result 'utf-8)))
(setf (llm-vertex-key-gentime provider) (current-time))))
(cl-defmethod llm-provider-embedding-url ((provider llm-vertex) &optional _)
(format "https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:predict"
llm-vertex-gcloud-region
(llm-vertex-project provider)
llm-vertex-gcloud-region
(or (llm-vertex-embedding-model provider) "textembedding-gecko")))
(cl-defmethod llm-provider-embedding-extract-result ((_ llm-vertex) response)
(assoc-default 'values (assoc-default 'embeddings (aref (assoc-default 'predictions response) 0))))
(cl-defmethod llm-provider-embedding-extract-error ((provider llm-google) err-response)
(llm-provider-chat-extract-error provider err-response))
(cl-defmethod llm-provider-chat-extract-error ((provider llm-google) err-response)
(if (vectorp err-response)
(llm-provider-chat-extract-error provider (aref err-response 0))
(if-let ((err (assoc-default 'error err-response)))
(format "Problem calling GCloud Vertex AI: status: %s message: %s"
(assoc-default 'code err)
(assoc-default 'message err))
(if-let ((candidates (assoc-default 'candidates err-response)))
(when (and (vectorp candidates)
(> (length candidates) 0)
(equal "SAFETY"
(assoc-default 'finishReason (aref candidates 0))))
(format "Could not finish due to detected Gemini safety violations: %s"
(assoc-default 'safetyRatings (aref candidates 0))))))))
(cl-defmethod llm-provider-embedding-request ((_ llm-vertex) string)
`(("instances" . [(("content" . ,string))])))
(cl-defmethod llm-provider-headers ((provider llm-vertex))
`(("Authorization" . ,(format "Bearer %s" (llm-vertex-key provider)))))
(cl-defmethod llm-provider-chat-extract-result ((provider llm-google) response)
(pcase (type-of response)
('vector (when (> (length response) 0)
(let ((parts (mapcar (lambda (part) (llm-provider-chat-extract-result provider part))
response)))
(if (stringp (car parts))
(mapconcat #'identity parts "")
(car parts)))))
('cons (if (assoc-default 'candidates response)
(let ((parts (assoc-default
'parts
(assoc-default 'content
(aref (assoc-default 'candidates response) 0)))))
(when parts
(assoc-default 'text (aref parts (- (length parts) 1)))))))))
(cl-defmethod llm-provider-extract-function-calls ((provider llm-google) response)
(if (vectorp response)
(llm-provider-extract-function-calls provider (aref response 0))
;; In some error cases, the response does not have any candidates.
(when (assoc-default 'candidates response)
(mapcar (lambda (call)
(make-llm-provider-utils-function-call
:name (assoc-default 'name call)
:args (assoc-default 'args call)))
(mapcan (lambda (maybe-call)
(when-let ((fc (assoc-default 'functionCall maybe-call)))
(list fc)))
(assoc-default
'parts (assoc-default
'content
(aref (assoc-default 'candidates response) 0))))))))
(cl-defmethod llm-provider-extract-streamed-function-calls ((provider llm-google) response)
(llm-provider-extract-function-calls provider (json-read-from-string response)))
(cl-defmethod llm-provider-chat-request ((_ llm-google) prompt _)
(llm-provider-utils-combine-to-system-prompt prompt llm-vertex-example-prelude)
(append
(let ((first (car (llm-chat-prompt-interactions prompt))))
;; System prompts for vertex only really make sense when they are
;; the first interaction, since they are sent separately
(when (eq (llm-chat-prompt-interaction-role first) 'system)
`((system_instruction
. ((parts . (((text . ,(llm-chat-prompt-interaction-content first))))))))))
`((contents
.
,(mapcar (lambda (interaction)
`((role . ,(pcase (llm-chat-prompt-interaction-role interaction)
('user "user")
('assistant "model")
('function "function")))
(parts .
,(if (and (not (equal (llm-chat-prompt-interaction-role interaction)
'function))
(stringp (llm-chat-prompt-interaction-content interaction)))
`(((text . ,(llm-chat-prompt-interaction-content
interaction))))
(if (eq 'function
(llm-chat-prompt-interaction-role interaction))
(mapcar (lambda (fc)
`(((functionResponse
.
((name . ,(llm-chat-prompt-function-call-result-function-name fc))
(response
.
((name . ,(llm-chat-prompt-function-call-result-function-name fc))
(content . ,(llm-chat-prompt-function-call-result-result fc)))))))))
(llm-chat-prompt-interaction-function-call-results interaction))
(if (llm-multipart-p (llm-chat-prompt-interaction-content interaction))
(mapcar (lambda (part)
(if (llm-media-p part)
`((inline_data
. ((mime_type . ,(llm-media-mime-type part))
(data . ,(base64-encode-string (llm-media-data part) t)))))
`((text . ,part))))
(llm-multipart-parts (llm-chat-prompt-interaction-content interaction)))
(llm-chat-prompt-interaction-content interaction)))))))
(seq-filter
(lambda (interaction) (not (eq 'system (llm-chat-prompt-interaction-role interaction))))
(llm-chat-prompt-interactions prompt)))))
(when (llm-chat-prompt-functions prompt)
;; Although Gemini claims to be compatible with Open AI's function declaration,
;; it's only somewhat compatible.
`(("tools" .
,(mapcar (lambda (tool)
`((function_declarations . (((name . ,(llm-function-call-name tool))
(description . ,(llm-function-call-description tool))
(parameters
.
,(llm-provider-utils-openai-arguments
(llm-function-call-args tool))))))))
(llm-chat-prompt-functions prompt)))))
(llm-vertex--chat-parameters prompt)))
(defun llm-vertex--response-schema (schema)
"Return vertex SCHEMA from our standard schema spec."
(llm-provider-utils-json-schema schema))
(defun llm-vertex--chat-parameters (prompt)
"From PROMPT, create the parameters section.
Return value is a cons for adding to an alist, unless there is
nothing to add, in which case it is nil."
(let ((params-alist (llm-chat-prompt-non-standard-params prompt)))
(when (llm-chat-prompt-temperature prompt)
(push `(temperature . ,(llm-chat-prompt-temperature prompt))
params-alist))
(when (llm-chat-prompt-max-tokens prompt)
(push `(maxOutputTokens . ,(llm-chat-prompt-max-tokens prompt)) params-alist))
(when-let ((format (llm-chat-prompt-response-format prompt)))
(push '("response_mime_type" . "application/json") params-alist)
(unless (eq 'json format)
(push `("response_schema" . ,(llm-vertex--response-schema
(llm-chat-prompt-response-format prompt)))
params-alist)))
(when params-alist
`((generation_config . ,params-alist)))))
(cl-defmethod llm-provider-populate-function-calls ((_ llm-vertex) prompt calls)
(llm-provider-utils-append-to-prompt
prompt
(mapcar (lambda (fc)
`((functionCall
.
((name . ,(llm-provider-utils-function-call-name fc))
(args . ,(llm-provider-utils-function-call-args fc))))))
calls)))
(cl-defmethod llm-provider-streaming-media-handler ((provider llm-google)
msg-receiver fc-receiver
err-receiver)
(cons 'application/json
(plz-media-type:application/json-array
:handler
(lambda (element)
(when-let ((err-response (llm-provider-chat-extract-error provider element)))
(funcall err-receiver err-response))
(if-let ((response (llm-provider-chat-extract-result provider element)))
(funcall msg-receiver response)
(when-let ((fc (llm-provider-extract-function-calls provider element)))
(funcall fc-receiver fc)))))))
(cl-defmethod llm-provider-collect-streaming-function-data ((_ llm-google) data)
(car data))
(defun llm-vertex--chat-url (provider &optional streaming)
"Return the correct url to use for PROVIDER.
If STREAMING is non-nil, use the URL for the streaming API."
(format "https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:%s"
llm-vertex-gcloud-region
(llm-vertex-project provider)
llm-vertex-gcloud-region
(llm-vertex-chat-model provider)
(if streaming "streamGenerateContent" "generateContent")))
(cl-defmethod llm-provider-chat-url ((provider llm-vertex))
(llm-vertex--chat-url provider))
(cl-defmethod llm-provider-chat-streaming-url ((provider llm-vertex))
(llm-vertex--chat-url provider t))
(cl-defmethod llm-name ((_ llm-vertex))
"Return the name of the provider."
"Vertex Gemini")
(cl-defmethod llm-chat-token-limit ((provider llm-vertex))
(llm-provider-utils-model-token-limit (llm-vertex-chat-model provider)))
(cl-defmethod llm-capabilities ((provider llm-vertex))
(append
(list 'streaming 'embeddings 'json-response)
(when-let ((model (llm-models-match (llm-vertex-chat-model provider)))
(capabilities (llm-model-capabilities model)))
(append
(when (member 'tool-use capabilities) '(function-calls))
(seq-intersection capabilities '(image-input audio-input video-input))))))
(provide 'llm-vertex)
;;; llm-vertex.el ends here