forked from kserve/kserve
-
Notifications
You must be signed in to change notification settings - Fork 23
/
inferenceservice.yaml
502 lines (436 loc) · 21.2 KB
/
inferenceservice.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
apiVersion: v1
kind: ConfigMap
metadata:
name: inferenceservice-config
namespace: kserve
data:
_example: |-
################################
# #
# EXAMPLE CONFIGURATION #
# #
################################
# This block is not actually functional configuration,
# but serves to illustrate the available configuration
# options and document them in a way that is accessible
# to users that `kubectl edit` this config map.
#
# These sample configuration options may be copied out of
# this example block and unindented to be in the data block
# to actually change the configuration.
# ====================================== EXPLAINERS CONFIGURATION ======================================
# Example
explainers: |-
{
"alibi": {
"image" : "kserve/alibi-explainer",
"defaultImageVersion": "latest"
},
"art": {
"image" : "kserve/art-explainer",
"defaultImageVersion": "latest"
}
}
# Alibi and Art Explainer runtime configuration
explainers: |-
{
# Alibi explainer runtime configuration
"alibi": {
# image contains the default Alibi explainer serving runtime image uri.
"image" : "kserve/alibi-explainer",
# defautltImageVersion contains the Alibi explainer serving runtime default image version.
"defaultImageVersion": "latest"
},
# Art explainer runtime configuration
"art": {
# image contains the default Art explainer serving runtime image uri.
"image" : "kserve/art-explainer",
# defautltImageVersion contains the Art explainer serving runtime default image version.
"defaultImageVersion": "latest"
}
}
# ====================================== STORAGE INITIALIZER CONFIGURATION ======================================
# Example
storageInitializer: |-
{
"image" : "kserve/storage-initializer:latest",
"memoryRequest": "100Mi",
"memoryLimit": "1Gi",
"cpuRequest": "100m",
"cpuLimit": "1",
"enableDirectPvcVolumeMount": false
}
storageInitializer: |-
{
# image contains the default storage initializer image uri.
"image" : "kserve/storage-initializer:latest",
# memoryRequest is the requests.memory to set for the storage initializer init container.
"memoryRequest": "100Mi",
# memoryLimit is the limits.memory to set for the storage initializer init container.
"memoryLimit": "1Gi",
# cpuRequest is the requests.cpu to set for the storage initializer init container.
"cpuRequest": "100m",
# cpuLimit is the limits.cpu to set for the storage initializer init container.
"cpuLimit": "1",
# enableDirectPvcVolumeMount controls whether users can mount pvc volumes directly.
# if pvc volume is provided in storageuri then the pvc volume is directly mounted to /mnt/models in the user container.
# rather than symlink it to a shared volume. For more info see https://github.com/kserve/kserve/issues/2737
"enableDirectPvcVolumeMount": false
}
# ====================================== CREDENTIALS ======================================
# Example
credentials: |-
{
"storageSpecSecretName": "storage-config",
"storageSecretNameAnnotation": "serving.kserve.io/storageSecretName",
"gcs": {
"gcsCredentialFileName": "gcloud-application-credentials.json"
},
"s3": {
"s3AccessKeyIDName": "AWS_ACCESS_KEY_ID",
"s3SecretAccessKeyName": "AWS_SECRET_ACCESS_KEY",
"s3Endpoint": "",
"s3UseHttps": "",
"s3Region": "",
"s3VerifySSL": "",
"s3UseVirtualBucket": "",
"s3UseAnonymousCredential": "",
"s3CABundle": ""
}
}
# This is a global configuration used for downloading models from the cloud storage.
# You can override this configuration by specifying the annotations on service account or static secret.
# https://kserve.github.io/website/master/modelserving/storage/s3/s3/
# For a quick reference about AWS ENV variables:
# AWS Cli: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html
# Boto: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-environment-variables
#
# The `s3AccessKeyIDName` and `s3SecretAccessKeyName` fields are only used from this configmap when static credentials (IAM User Access Key Secret)
# are used as the authentication method for AWS S3.
# The rest of the fields are used in both authentication methods (IAM Role for Service Account & IAM User Access Key Secret) if a non-empty value is provided.
credentials: |-
{
# storageSpecSecretName contains the secret name which has the credentials for downloading the model.
# This option is used when specifying the storage spec on isvc yaml.
"storageSpecSecretName": "storage-config",
# The annotation can be specified on isvc yaml to allow overriding with the secret name reference from the annotation value.
# When using storageUri the order of the precedence is: secret name reference annotation > secret name references from service account
# When using storageSpec the order of the precedence is: secret name reference annotation > storageSpecSecretName in configmap
# Configuration for google cloud storage
"gcs": {
# gcsCredentialFileName specifies the filename of the gcs credential
"gcsCredentialFileName": "gcloud-application-credentials.json"
},
# Configuration for aws s3 storage. This add the corresponding environmental variables to the storage initializer init container.
# For more info on s3 storage see https://kserve.github.io/website/master/modelserving/storage/s3/s3/
"s3": {
# s3AccessKeyIDName specifies the s3 access key id name
"s3AccessKeyIDName": "AWS_ACCESS_KEY_ID",
# s3SecretAccessKeyName specifies the s3 secret access key name
"s3SecretAccessKeyName": "AWS_SECRET_ACCESS_KEY",
# s3Endpoint specifies the s3 endpoint
"s3Endpoint": "",
# s3UseHttps controls whether to use secure https or unsecure http to download models.
# Allowed values are 0 and 1.
"s3UseHttps": "",
# s3Region specifies the region of the bucket.
"s3Region": "",
# s3VerifySSL controls whether to verify the tls/ssl certificate.
"s3VerifySSL": "",
# s3UseVirtualBucket configures whether it is a virtual bucket or not.
"s3UseVirtualBucket": "",
# s3UseAnonymousCredential configures whether to use anonymous credentials to download the model or not.
"s3UseAnonymousCredential": "",
# s3CABundle specifies the path to a certificate bundle to use for HTTPS certificate validation.
"s3CABundle": ""
}
}
# ====================================== INGRESS CONFIGURATION ======================================
# Example
ingress: |-
{
"ingressGateway" : "knative-serving/knative-ingress-gateway",
"ingressService" : "istio-ingressgateway.istio-system.svc.cluster.local",
"localGateway" : "knative-serving/knative-local-gateway",
"localGatewayService" : "knative-local-gateway.istio-system.svc.cluster.local",
"ingressDomain" : "example.com",
"ingressClassName" : "istio",
"domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}",
"urlScheme": "http",
"disableIstioVirtualHost": false
}
ingress: |-
{
# ingressGateway specifies the ingress gateway to serve external traffic.
# The gateway should be specified in format <gateway namespace>/<gateway name>
# NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.
"ingressGateway" : "knative-serving/knative-ingress-gateway",
# ingressService specifies the hostname of the ingress service.
# NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.
"ingressService" : "istio-ingressgateway.istio-system.svc.cluster.local",
# localGateway specifies the gateway which handles the network traffic within the cluster.
# NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.
"localGateway" : "knative-serving/knative-local-gateway",
# localGatewayService specifies the hostname of the local gateway service.
# NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer.
"localGatewayService" : "knative-local-gateway.istio-system.svc.cluster.local",
# ingressDomain specifies the domain name which is used for creating the url.
# If ingressDomain is empty then example.com is used as default domain.
# NOTE: This configuration only applicable for raw deployment.
"ingressDomain" : "example.com",
# ingressClassName specifies the ingress controller to use for ingress traffic.
# This is optional and if omitted the default ingress in the cluster is used.
# https://kubernetes.io/docs/concepts/services-networking/ingress/#default-ingress-class
# NOTE: This configuration only applicable for raw deployment.
"ingressClassName" : "istio",
# domainTemplate specifies the template for generating domain/url for each inference service by combining variable from:
# Name of the inference service ( {{ .Name}} )
# Namespace of the inference service ( {{ .Namespace }} )
# Annotation of the inference service ( {{ .Annotations.key }} )
# Label of the inference service ( {{ .Labels.key }} )
# IngressDomain ( {{ .IngressDomain }} )
# If domain template is empty the default template {{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }} is used.
# NOTE: This configuration only applicable for raw deployment.
"domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}",
# urlScheme specifies the url scheme to use for inference service and inference graph.
# If urlScheme is empty then by default http is used.
"urlScheme": "http",
# disableIstioVirtualHost controls whether to use istio as network layer.
# By default istio is used as the network layer. When DisableIstioVirtualHost is true, KServe does not
# create the top level virtual service thus Istio is no longer required for serverless mode.
# By setting this field to true, user can use other networking layers supported by knative.
# For more info https://github.com/kserve/kserve/pull/2380, https://kserve.github.io/website/master/admin/serverless/kourier_networking/.
# NOTE: This configuration is only applicable to serverless deployment.
"disableIstioVirtualHost": false
# pathTemplate specifies the template for generating path based url for each inference service.
# The following variables can be used in the template for generating url.
# Name of the inference service ( {{ .Name}} )
# Namespace of the inference service ( {{ .Namespace }} )
# For more info https://github.com/kserve/kserve/issues/2257.
# NOTE: This configuration only applicable to serverless deployment.
"pathTemplate": "/serving/{{ .Namespace }}/{{ .Name }}"
}
# ====================================== LOGGER CONFIGURATION ======================================
# Example
logger: |-
{
"image" : "kserve/agent:latest",
"memoryRequest": "100Mi",
"memoryLimit": "1Gi",
"cpuRequest": "100m",
"cpuLimit": "1",
"defaultUrl": "http://default-broker"
}
logger: |-
{
# image contains the default logger image uri.
"image" : "kserve/agent:latest",
# memoryRequest is the requests.memory to set for the logger container.
"memoryRequest": "100Mi",
# memoryLimit is the limits.memory to set for the logger container.
"memoryLimit": "1Gi",
# cpuRequest is the requests.cpu to set for the logger container.
"cpuRequest": "100m",
# cpuLimit is the limits.cpu to set for the logger container.
"cpuLimit": "1",
# defaultUrl specifies the default logger url. If logger is not specified in the resource this url is used.
"defaultUrl": "http://default-broker"
}
# ====================================== BATCHER CONFIGURATION ======================================
# Example
batcher: |-
{
"image" : "kserve/agent:latest",
"memoryRequest": "1Gi",
"memoryLimit": "1Gi",
"cpuRequest": "1",
"cpuLimit": "1"
}
batcher: |-
{
# image contains the default batcher image uri.
"image" : "kserve/agent:latest",
# memoryRequest is the requests.memory to set for the batcher container.
"memoryRequest": "1Gi",
# memoryLimit is the limits.memory to set for the batcher container.
"memoryLimit": "1Gi",
# cpuRequest is the requests.cpu to set for the batcher container.
"cpuRequest": "1",
# cpuLimit is the limits.cpu to set for the batcher container.
"cpuLimit": "1"
}
# ====================================== AGENT CONFIGURATION ======================================
# Example
agent: |-
{
"image" : "kserve/agent:latest",
"memoryRequest": "100Mi",
"memoryLimit": "1Gi",
"cpuRequest": "100m",
"cpuLimit": "1"
}
agent: |-
{
# image contains the default agent image uri.
"image" : "kserve/agent:latest",
# memoryRequest is the requests.memory to set for the agent container.
"memoryRequest": "100Mi",
# memoryLimit is the limits.memory to set for the agent container.
"memoryLimit": "1Gi",
# cpuRequest is the requests.cpu to set for the agent container.
"cpuRequest": "100m",
# cpuLimit is the limits.cpu to set for the agent container.
"cpuLimit": "1"
}
# ====================================== ROUTER CONFIGURATION ======================================
# Example
router: |-
{
"image" : "kserve/router:latest",
"memoryRequest": "100Mi",
"memoryLimit": "1Gi",
"cpuRequest": "100m",
"cpuLimit": "1"
}
# router is the implementation of inference graph.
router: |-
{
# image contains the default router image uri.
"image" : "kserve/router:latest",
# memoryRequest is the requests.memory to set for the router container.
"memoryRequest": "100Mi",
# memoryLimit is the limits.memory to set for the router container.
"memoryLimit": "1Gi",
# cpuRequest is the requests.cpu to set for the router container.
"cpuRequest": "100m",
# cpuLimit is the limits.cpu to set for the router container.
"cpuLimit": "1"
}
# ====================================== DEPLOYMENT CONFIGURATION ======================================
# Example
deploy: |-
{
"defaultDeploymentMode": "Serverless"
}
deploy: |-
{
# defaultDeploymentMode specifies the default deployment mode of the kserve. The supported values are
# Serverless, RawDeployment and ModelMesh. Users can override the deployment mode at service level
# by adding the annotation serving.kserve.io/deploymentMode.For more info on deployment mode visit
# Serverless https://kserve.github.io/website/master/admin/serverless/serverless/
# RawDeployment https://kserve.github.io/website/master/admin/kubernetes_deployment/
# ModelMesh https://kserve.github.io/website/master/admin/modelmesh/
"defaultDeploymentMode": "Serverless"
}
# ====================================== METRICS CONFIGURATION ======================================
# Example
metricsAggregator: |-
{
"enableMetricAggregation": "false",
"enablePrometheusScraping" : "false"
}
# For more info see https://github.com/kserve/kserve/blob/master/qpext/README.md
metricsAggregator: |-
{
# enableMetricAggregation configures metric aggregation annotation. This adds the annotation serving.kserve.io/enable-metric-aggregation to every
# service with the specified boolean value. If true enables metric aggregation in queue-proxy by setting env vars in the queue proxy container
# to configure scraping ports.
"enableMetricAggregation": "false",
# enablePrometheusScraping configures metric aggregation annotation. This adds the annotation serving.kserve.io/enable-metric-aggregation to every
# service with the specified boolean value. If true, prometheus annotations are added to the pod. If serving.kserve.io/enable-metric-aggregation is false,
# the prometheus port is set with the default prometheus scraping port 9090, otherwise the prometheus port annotation is set with the metric aggregation port.
"enablePrometheusScraping" : "false"
}
explainers: |-
{
"alibi": {
"image" : "kserve/alibi-explainer",
"defaultImageVersion": "latest"
},
"art": {
"image" : "kserve/art-explainer",
"defaultImageVersion": "latest"
}
}
storageInitializer: |-
{
"image" : "kserve/storage-initializer:latest",
"memoryRequest": "100Mi",
"memoryLimit": "1Gi",
"cpuRequest": "100m",
"cpuLimit": "1",
"enableDirectPvcVolumeMount": false
}
credentials: |-
{
"storageSpecSecretName": "storage-config",
"storageSecretNameAnnotation": "serving.kserve.io/storageSecretName",
"gcs": {
"gcsCredentialFileName": "gcloud-application-credentials.json"
},
"s3": {
"s3AccessKeyIDName": "AWS_ACCESS_KEY_ID",
"s3SecretAccessKeyName": "AWS_SECRET_ACCESS_KEY",
"s3Endpoint": "",
"s3UseHttps": "",
"s3Region": "",
"s3VerifySSL": "",
"s3UseVirtualBucket": "",
"s3UseAnonymousCredential": "",
"s3CABundle": ""
}
}
ingress: |-
{
"ingressGateway" : "knative-serving/knative-ingress-gateway",
"ingressService" : "istio-ingressgateway.istio-system.svc.cluster.local",
"localGateway" : "knative-serving/knative-local-gateway",
"localGatewayService" : "knative-local-gateway.istio-system.svc.cluster.local",
"ingressDomain" : "example.com",
"ingressClassName" : "istio",
"domainTemplate": "{{ .Name }}-{{ .Namespace }}.{{ .IngressDomain }}",
"urlScheme": "http",
"disableIstioVirtualHost": false
}
logger: |-
{
"image" : "kserve/agent:latest",
"memoryRequest": "100Mi",
"memoryLimit": "1Gi",
"cpuRequest": "100m",
"cpuLimit": "1",
"defaultUrl": "http://default-broker"
}
batcher: |-
{
"image" : "kserve/agent:latest",
"memoryRequest": "1Gi",
"memoryLimit": "1Gi",
"cpuRequest": "1",
"cpuLimit": "1"
}
agent: |-
{
"image" : "kserve/agent:latest",
"memoryRequest": "100Mi",
"memoryLimit": "1Gi",
"cpuRequest": "100m",
"cpuLimit": "1"
}
router: |-
{
"image" : "kserve/router:latest",
"memoryRequest": "100Mi",
"memoryLimit": "1Gi",
"cpuRequest": "100m",
"cpuLimit": "1"
}
deploy: |-
{
"defaultDeploymentMode": "Serverless"
}
metricsAggregator: |-
{
"enableMetricAggregation": "false",
"enablePrometheusScraping" : "false"
}