-
Notifications
You must be signed in to change notification settings - Fork 834
/
Copy pathtriton.yaml
200 lines (200 loc) · 5.09 KB
/
triton.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
---
apiVersion: mlops.seldon.io/v1alpha1
kind: ServerConfig
metadata:
name: triton
spec:
podSpec:
terminationGracePeriodSeconds: 120
serviceAccountName: seldon-server
containers:
- image: rclone:latest
imagePullPolicy: IfNotPresent
name: rclone
ports:
- containerPort: 5572
name: rclone
protocol: TCP
lifecycle:
preStop:
httpGet:
port: 9007
path: terminate
resources:
requests:
cpu: "200m"
memory: '100M'
readinessProbe:
failureThreshold: 3
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
tcpSocket:
port: 5572
timeoutSeconds: 1
volumeMounts:
- mountPath: /mnt/agent
name: triton-models
- image: agent:latest
imagePullPolicy: IfNotPresent
name: agent
command:
- /bin/agent
args:
- --tracing-config-path=/mnt/tracing/tracing.json
env:
- name: SELDON_SERVER_CAPABILITIES
value: "triton,dali,fil,onnx,openvino,python,pytorch,tensorflow,tensorrt"
- name: SELDON_OVERCOMMIT_PERCENTAGE
value: "10"
- name: SELDON_MODEL_INFERENCE_LAG_THRESHOLD
value: "30"
- name: SELDON_MODEL_INACTIVE_SECONDS_THRESHOLD
value: "600"
- name: SELDON_SCALING_STATS_PERIOD_SECONDS
value: "20"
- name: SELDON_SERVER_HTTP_PORT
value: "9000"
- name: SELDON_SERVER_GRPC_PORT
value: "9500"
- name: SELDON_REVERSE_PROXY_HTTP_PORT
value: "9001"
- name: SELDON_REVERSE_PROXY_GRPC_PORT
value: "9501"
- name: AGENT_TLS_SECRET_NAME
value: ""
- name: AGENT_TLS_FOLDER_PATH
value: ""
- name: SELDON_SCHEDULER_HOST
value: "seldon-scheduler"
- name: SELDON_SCHEDULER_PORT
value: "9005"
- name: SELDON_METRICS_PORT
value: "9006"
- name: SELDON_DRAINER_PORT
value: "9007"
- name: SELDON_SERVER_TYPE
value: "triton"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: MEMORY_REQUEST
valueFrom:
resourceFieldRef:
containerName: triton
resource: requests.memory
ports:
- containerPort: 9501
name: grpc
protocol: TCP
- containerPort: 9001
name: http
protocol: TCP
- containerPort: 9006
name: metrics
protocol: TCP
lifecycle:
preStop:
httpGet:
port: 9007
path: terminate
resources:
requests:
cpu: "500m"
memory: '500M'
volumeMounts:
- mountPath: /mnt/agent
name: triton-models
- name: config-volume
mountPath: /mnt/config
- name: tracing-config-volume
mountPath: /mnt/tracing
- image: triton:latest
command:
- /opt/tritonserver/bin/tritonserver
args:
- --model-repository=$(SERVER_MODELS_DIR)
- --http-port=$(SERVER_HTTP_PORT)
- --grpc-port=$(SERVER_GRPC_PORT)
- --log-verbose=1
- --model-control-mode=explicit
- --backend-config=python,shm-default-byte-size=16777216
imagePullPolicy: IfNotPresent
env:
- name: SERVER_HTTP_PORT
value: "9000"
- name: SERVER_GRPC_PORT
value: "9500"
- name: SERVER_MODELS_DIR
value: "/mnt/agent/models"
- name: LD_PRELOAD
value: /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
resources:
requests:
cpu: 1
memory: '1G'
livenessProbe:
httpGet:
path: /v2/health/live
port: server-http
readinessProbe:
httpGet:
path: /v2/health/live
port: server-http
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
httpGet:
path: /v2/health/live
port: server-http
failureThreshold: 10
periodSeconds: 10
name: triton
ports:
- containerPort: 9500
name: server-grpc
protocol: TCP
- containerPort: 9000
name: server-http
protocol: TCP
- containerPort: 8002
name: server-metrics
lifecycle:
preStop:
httpGet:
port: 9007
path: terminate
volumeMounts:
- mountPath: /mnt/agent
name: triton-models
readOnly: true
- mountPath: /dev/shm
name: dshm
readOnly: false
securityContext:
fsGroup: 2000
runAsUser: 1000
runAsNonRoot: true
volumes:
- name: config-volume
configMap:
name: seldon-agent
- name: tracing-config-volume
configMap:
name: seldon-tracing
- name: dshm
emptyDir:
medium: Memory
sizeLimit: "256Mi"
volumeClaimTemplates:
- name: triton-models
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 1Gi