-
Notifications
You must be signed in to change notification settings - Fork 64
/
values.yaml
875 lines (822 loc) · 35.6 KB
/
values.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
# We define a service account that is attached by default to all Jupyter user pods
# and dask-gateway workers. By default, this has no permissions - although extra
# cloud access permissions may be granted - see docs/topic/features.md.
userServiceAccount:
enabled: true
annotations: {}
binderhub-service:
enabled: false
ingressBasicAuth:
enabled: false
# Primarily here for validation to 'work',
# as these are set in secret config otherwise. I don't like this,
# as we won't catch these values missing if they aren't set.
username: ""
password: ""
dex:
enabled: false
staticWebsite:
enabled: false
source:
git:
branch: main
githubAuth:
enabled: false
githubApp:
# Primarily here for validation to 'work',
# as these are set in secret config otherwise. I don't like this,
# as we won't catch these values missing if they aren't set.
id: 0
privateKey: ""
nfs:
enabled: true
dirsizeReporter:
enabled: true
shareCreator:
enabled: true
tolerations: []
pv:
enabled: true
mountOptions:
- soft
- noatime
- vers=4.2
# Use NFS provided by an in cluster server with the nfs-external-provisioner chart
inClusterNFS:
enabled: false
size: 100Gi
# A placeholder as global values that can be referenced from the same location
# of any chart should be possible to provide, but aren't necessarily provided or
# used.
global: {}
jupyterhub:
custom:
auth:
anonymizeUsername: false
singleuser:
extraPVCs: []
singleuserAdmin:
extraEnv: {}
extraVolumeMounts:
- name: home
mountPath: /home/jovyan/shared-readwrite
subPath: _shared
cloudResources:
provider: ""
gcp:
projectId: ""
scratchBucket:
enabled: false
2i2c:
# Should 2i2c engineering staff user IDs be injected to the admin_users
# configuration of the JupyterHub's authenticator by our custom
# jupyterhub_config.py snippet as declared in hub.extraConfig?
add_staff_user_ids_to_admin_users: false
add_staff_user_ids_of_type: ""
staff_github_ids:
- choldgraf
- colliand
- consideRatio
- damianavila
- GeorgianaElena
- jmunroe
- sgibson91
- yuvipanda
staff_google_ids:
homepage:
gitRepoUrl: "https://github.com/2i2c-org/default-hub-homepage"
# TODO: make main the default branch in the repo above
gitRepoBranch: "master"
ingress:
enabled: true
ingressClassName: nginx
annotations:
nginx.ingress.kubernetes.io/proxy-body-size: 256m
cert-manager.io/cluster-issuer: letsencrypt-prod
scheduling:
# We declare matchNodePurpose=require to get a nodeAffinity like a
# nodeSelector on all core pods and user pods. core pods like hub and proxy
# will schedule on nodes with hub.jupyter.org/node-purpose=core and user
# pods on nodes with hub.jupyter.org/node-purpose=user.
#
# Since this setting adds a nodeAffinity, its okay that we configure
# KubeSpawner's profile_list to override node_selector.
#
corePods:
nodeAffinity:
matchNodePurpose: require
userPods:
nodeAffinity:
matchNodePurpose: require
podPriority:
enabled: true
userPlaceholder:
enabled: true
replicas: 0
userScheduler:
enabled: false
# FIXME: We should think about these resource requests/limits, see
# https://github.com/2i2c-org/infrastructure/issues/2127.
#
resources:
requests:
cpu: 0.01
memory: 64Mi
limits:
memory: 1G
prePuller:
continuous:
enabled: false
hook:
enabled: false
proxy:
service:
type: ClusterIP
chp:
# FIXME: We should think about these resource requests/limits, see
# https://github.com/2i2c-org/infrastructure/issues/2127.
#
resources:
requests:
cpu: 0.01
memory: 64Mi
limits:
memory: 1Gi
traefik:
# FIXME: We should think about these resource requests/limits, see
# https://github.com/2i2c-org/infrastructure/issues/2127.
#
# Note if autohttps pod's aren't used anywhere by our basehub
# deployments, we should simply remove this traefik configuration.
#
resources:
requests:
memory: 64Mi
limits:
memory: 1Gi
singleuser:
# Need to explicitly fix ownership here, as otherwise these directories will be owned
# by root on most NFS filesystems - neither EFS nor Google Filestore support anonuid
initContainers:
- name: volume-mount-ownership-fix
image: busybox:1.36.1
command:
[
"sh",
"-c",
"id && chown 1000:1000 /home/jovyan && chown 1000:1000 /home/jovyan/shared && ls -lhd /home/jovyan ",
]
securityContext:
runAsUser: 0
volumeMounts:
- name: home
mountPath: /home/jovyan
subPath: "{username}"
# Mounted without readonly attribute here,
# so we can chown it appropriately
- name: home
mountPath: /home/jovyan/shared
subPath: _shared
cmd:
# Explicitly define this, as it's no longer set by z2jh
# https://github.com/jupyterhub/zero-to-jupyterhub-k8s/pull/2449
- jupyterhub-singleuser
extraEnv:
# notebook writes secure files that don't need to survive a
# restart here. Writing 'secure' files on some file systems (like
# Azure Files with SMB) seems buggy, so we just put runtime dir on
# /tmp. This is ok in our case, since no two users are on the same
# container.
JUPYTER_RUNTIME_DIR: /tmp/.jupyter-runtime
# By default, /bin/sh is used as shell for terminals, not /bin/bash
# Most people do not expect this, so let's match expectation
SHELL: /bin/bash
extraFiles:
ipython_kernel_config.json:
mountPath: /usr/local/etc/ipython/ipython_kernel_config.json
data:
# This keeps a history of all executed code under $HOME, which is almost always on
# NFS. This file is kept as a sqlite file, and sqlite and NFS do not go together very
# well! Disable this to save ourselves from debugging random NFS oddities that are caused
# by this unholy sqlite + NFS mixture.
HistoryManager:
enabled: false
# jupyter_server and notebook are different jupyter servers providing
# similar configuration options. Since we have user images that may
# provide either, we provide the same configuration for both via
# jupyter_server_config.json and jupyter_notebook_config.json.
#
# A hub can force a choice with singleuser.extraEnv via:
#
# JUPYTERHUB_SINGLEUSER_APP: "notebook.notebookapp.NotebookApp"
# JUPYTERHUB_SINGLEUSER_APP: "jupyter_server.serverapp.ServerApp"
#
jupyter_server_config.json:
mountPath: /usr/local/etc/jupyter/jupyter_server_config.json
# if a user leaves a notebook with a running kernel,
# the effective idle timeout will typically be cull idle timeout
# of the server + the cull idle timeout of the kernel,
# as culling the kernel will register activity,
# resetting the no_activity timer for the server as a whole
data:
# MappingKernelManager configuration reference:
# https://jupyter-server.readthedocs.io/en/latest/api/jupyter_server.services.kernels.html#jupyter_server.services.kernels.kernelmanager.MappingKernelManager
#
MappingKernelManager: &server_config_mapping_kernel_manager
cull_idle_timeout: 3600
cull_interval: 300
cull_connected: true
# ServerApp configuration reference:
# https://jupyter-server.readthedocs.io/en/latest/api/jupyter_server.html#jupyter_server.serverapp.ServerApp
#
ServerApp: &server_config_server_app
extra_template_paths:
- /usr/local/share/jupyter/custom_template
# Move the sqlite file used by https://github.com/jupyter-server/jupyter_server_fileid
# off the default path, which is under ~/.local/share/jupyter.
# That is NFS, and sqlite + NFS don't go well together. In addition,
# it uses WAL mode of sqlite, and that is completely unsupported on NFS
# Upstream discussion in https://github.com/jupyter-server/jupyter_server_fileid/issues/60.
BaseFileIdManager: &server_config_base_file_id_manager
db_path: /tmp/file_id_manager.db
jupyter_notebook_config.json:
mountPath: /usr/local/etc/jupyter/jupyter_notebook_config.json
data:
MappingKernelManager: *server_config_mapping_kernel_manager
NotebookApp: *server_config_server_app
BaseFileIdManager: *server_config_base_file_id_manager
startTimeout: 600 # 10 mins, because sometimes we have too many new nodes coming up together
defaultUrl: /tree
image:
name: jupyter/scipy-notebook
tag: "2023-06-19"
storage:
type: static
static:
pvcName: home-nfs
subPath: "{username}"
extraVolumeMounts:
- name: home
mountPath: /home/jovyan/shared
subPath: _shared
readOnly: true
memory:
guarantee: 256M
limit: 1G
cpu:
# If no CPU limit is set, it is possible for a single user or group of users to
# starve everyone else of CPU time on a node, even causing new user pods to completely
# fail as the notebook server process gets no CPU to complete auth handshake with
# the server, and even trivial cells like `print("hello world")` may not run.
# Unlike memory guarantees, CPU guarantees are actually enforced by the Linux Kernel
# (see https://medium.com/@betz.mark/understanding-resource-limits-in-kubernetes-cpu-time-9eff74d3161b)
# By giving each user a 5% CPU guarantee (represented by 0.05), we ensure that:
# 1. Simple cells will always execute
# 2. Notebook server processes will always start - so users won't have server spawn failure
# 3. We don't accidentally set just a high limit for a particular hub and not set a
# guarantee, at which point kubernetes treats the limit as the guarantee! This causes
# far more nodes to be scaled up than needed, making everything super slow (like in
# https://github.com/2i2c-org/infrastructure/issues/790)
# 4. Most of our workloads are still memory bound, and we want scaling to happen only
# when a node is full on its memory guarantees. But a 0.05 guarantee means a n1-highmem-8
# node can fit 160 user pods, and since kubernetes already caps us at 100 pods a node,
# this guarantee doesn't actually change our scheduling.
guarantee: 0.05
networkPolicy:
# Allow unrestricted access to the internet but not local cluster network
enabled: true
egress:
- to:
- ipBlock:
cidr: 0.0.0.0/0
except:
# Don't allow network access to private IP ranges
# Listed in https://datatracker.ietf.org/doc/html/rfc1918
- 10.0.0.0/8
- 172.16.0.0/12
- 192.168.0.0/16
# Don't allow network access to the metadata IP
- 169.254.169.254/32
# Allow code in hubs to talk to ingress provider, so they can talk to
# the hub via its public URL
- to:
- namespaceSelector:
matchLabels:
name: support
podSelector:
matchLabels:
app.kubernetes.io/name: ingress-nginx
# If a hub is using autohttps instead of ingress-nginx, allow traffic
# to the autohttps pod as well
- to:
- podSelector:
matchLabels:
app: jupyterhub
component: autohttps
# Allow traffic to the proxy pod from user pods
# This is particularly important for daskhubs that utilise the proxy
# in order to create clusters (schedulers and workers)
- to:
- podSelector:
matchLabels:
app: jupyterhub
component: proxy
# Allow traffic to the traefik pod from user pods. Needed for daskhubs.
- to:
- podSelector:
matchLabels:
app.kubernetes.io/component: traefik
hub:
loadRoles:
# Should use this, not hub.config.JupyterHub.load_roles - that will
# override any existing load_roles set by z2jh
service-use:
name: user
scopes:
# Allow all users access to 'services', which includes dask-gateway & configurator
- access:services
- self
config:
JupyterHub:
# Allow unauthenticated prometheus requests
# Otherwise our prometheus server can't get hub metrics
authenticate_prometheus: false
KubeSpawner:
# Make sure working directory is where we mount the home folder
working_dir: /home/jovyan
# Increase timeout for Jupyter server to become 'ready', until
# https://github.com/2i2c-org/infrastructure/issues/2047 is fixed
http_timeout: 120
Authenticator:
# Don't allow test username to login into the hub
# The test service will still be able to create this hub username
# and start their server.
# Ref: https://github.com/2i2c-org/meta/issues/321
blocked_users:
- deployment-service-check
extraFiles:
configurator-schema-default:
mountPath: /usr/local/etc/jupyterhub-configurator/00-default.schema.json
data:
type: object
name: config
properties:
KubeSpawner.image:
type: string
title: User docker image
description: Determines languages, libraries and interfaces available
help: Leave this blank to use the default
Spawner.default_url:
type: string
title: Default User Interface
enum:
- "/tree"
- "/lab"
- "/rstudio"
default: "/tree"
enumMetadata:
interfaces:
- value: "/tree"
title: Classic Notebook
description:
The original single-document interface for creating
Jupyter Notebooks.
- value: "/lab"
title: JupyterLab
description: A Powerful next generation notebook interface
- value: "/rstudio"
title: RStudio
description: An IDE For R, created by the RStudio company
initContainers:
- name: templates-clone
image: alpine/git:2.40.1
imagePullPolicy: IfNotPresent
args:
- clone
- --
- $(GIT_REPO_URL)
- /srv/repo
env:
- name: GIT_REPO_URL
valueFrom:
configMapKeyRef:
name: hub-custom-templates-config
key: GIT_REPO_URL
securityContext:
runAsUser: 1000
runAsGroup: 1000
allowPrivilegeEscalation: False
readOnlyRootFilesystem: True
volumeMounts:
- name: custom-templates
mountPath: /srv/repo
- name: templates-ownership-fix
image: alpine/git:2.40.1
imagePullPolicy: IfNotPresent
command:
- /bin/sh
args:
- -c
- ls -lhd /srv/repo && chown 1000:1000 /srv/repo && ls -lhd /srv/repo
securityContext:
runAsUser: 0
volumeMounts:
- name: custom-templates
mountPath: /srv/repo
extraContainers:
- name: templates-sync
image: alpine/git:2.40.1
imagePullPolicy: IfNotPresent
workingDir: /srv/repo
command:
- /bin/sh
args:
- -c
- |
handle_sigterm() {
echo "SIGTERM received, terminating...";
exit;
}
trap handle_sigterm SIGTERM;
echo "Starting template sync...";
echo "";
echo "Info about local git repo to be synced:";
(
# set -x causes commands run to be printed, helping log readers
# understand what the generated output is about. set -x is
# configured within a subshell to just print info about the
# specific chosen commands and avoid printing info about running
# "echo", "sleep", "set +x", or similar commands.
set -x;
git remote -v;
ls -lhd /srv/repo;
)
echo "";
echo "Syncing local git repo /srv/repo against origin's branch $(GIT_REPO_BRANCH) every 5 minutes...";
while true; do
git fetch origin;
git reset --hard origin/$(GIT_REPO_BRANCH);
# signal handling can only be done between sleep calls, so this
# shouldn't be reduced to the otherwise equivalent "sleep 5m"
for i in $(seq 300); do
sleep 1s;
done
done
env:
- name: GIT_REPO_BRANCH
valueFrom:
configMapKeyRef:
name: hub-custom-templates-config
key: GIT_REPO_BRANCH
securityContext:
runAsUser: 1000
runAsGroup: 1000
allowPrivilegeEscalation: False
readOnlyRootFilesystem: True
volumeMounts:
- name: custom-templates
mountPath: /srv/repo
extraVolumes:
- name: custom-templates
emptyDir: {}
extraVolumeMounts:
- mountPath: /usr/local/share/jupyterhub/custom_templates
name: custom-templates
subPath: templates
- mountPath: /usr/local/share/jupyterhub/static/extra-assets
name: custom-templates
subPath: extra-assets
services:
configurator:
url: http://configurator:10101
# Don't require users to explicitly 'confirm' before sending them to configurator
oauth_no_confirm: true
command:
- python3
- -m
- jupyterhub_configurator.app
- --Configurator.config_file=/usr/local/etc/jupyterhub-configurator/jupyterhub_configurator_config.py
# hub-health service helps us run health checks from the deployer script.
# The JupyterHub Helm chart will automatically generate an API token for
# services and expose it in a k8s Secret named `hub`. When we run health
# tests against a hub, we read this token from the k8s Secret to acquire
# the credentials needed to interacting with the JupyterHub API.
#
hub-health:
# FIXME: With JupyterHub 2 we can define a role for this service with
# more tightly scoped permissions based on our needs.
#
admin: true
image:
name: quay.io/2i2c/pilot-hub
tag: "0.0.1-0.dev.git.7080.h0da36d1e"
networkPolicy:
enabled: true
# interNamespaceAccessLabels=accept makes the hub pod's associated
# NetworkPolicy accept ingress from pods in other namespaces that has a
# hub.jupyter.org/network-access-hub=true label.
#
# ref: https://z2jh.jupyter.org/en/stable/resources/reference.html#hub-networkpolicy-internamespaceaccesslabels
#
interNamespaceAccessLabels: accept
ingress:
- from:
- podSelector:
matchLabels:
app: jupyterhub
component: hub
ports:
- port: 8081
protocol: TCP
# The jupyterhub-configurator is a managed jupyterhub service, which
# means it is started by jupyterhub as a separate process in the hub
# pod. Users will access it via the proxy pod, and JupyterHub itself is
# accessing it via localhost. This rule makes receiving such request on
# port 10101 from these destinations accepted.
#
# Maybe the container internal rule, for jupyterhub ->
# jupyterhub-configurator isn't needed, as the request is directly to
# 127.0.0.1:10101.
#
# ref: The extraConfig.02-custom-admin section below
# ref: https://github.com/yuvipanda/jupyterhub-configurator/blob/996405d2a7017153d5abe592b8028fed7a1801bb/jupyterhub_configurator/mixins.py#L7C5-L11
#
- from:
- podSelector:
matchLabels:
app: jupyterhub
component: proxy
- podSelector:
matchLabels:
app: jupyterhub
component: hub
ports:
- port: 10101
protocol: TCP
# FIXME: We should think about these resource requests/limits, see
# https://github.com/2i2c-org/infrastructure/issues/2127.
#
resources:
requests:
cpu: 0.01
memory: 128Mi
limits:
memory: 2Gi
extraConfig:
01-custom-theme: |
from z2jh import get_config
c.JupyterHub.template_paths.insert(0,'/usr/local/share/jupyterhub/custom_templates')
c.JupyterHub.template_vars.update({
'custom': get_config('custom.homepage.templateVars')
})
02-custom-admin: |
from z2jh import get_config
from kubespawner import KubeSpawner
from jupyterhub_configurator.mixins import ConfiguratorSpawnerMixin
class CustomSpawner(ConfiguratorSpawnerMixin, KubeSpawner):
def start(self, *args, **kwargs):
custom_admin = get_config('custom.singleuserAdmin', {})
if custom_admin and self.user.admin:
extra_init_containers = custom_admin.get('initContainers', [])
extra_volume_mounts = custom_admin.get('extraVolumeMounts', [])
extra_env = custom_admin.get('extraEnv', {})
self.init_containers += [container for container in extra_init_containers if container not in self.init_containers]
self.volume_mounts += [volume for volume in extra_volume_mounts if volume not in self.volume_mounts]
self.environment.update(extra_env)
return super().start(*args, **kwargs)
c.JupyterHub.spawner_class = CustomSpawner
03-cloud-storage-bucket: |
from z2jh import get_config
cloud_resources = get_config('custom.cloudResources')
scratch_bucket = cloud_resources['scratchBucket']
import os
if scratch_bucket['enabled']:
# FIXME: Support other providers too
assert cloud_resources['provider'] == 'gcp'
project_id = cloud_resources['gcp']['projectId']
release = os.environ['HELM_RELEASE_NAME']
bucket_protocol = 'gcs'
bucket_name = f'{project_id}-{release}-scratch-bucket'
env = {
'SCRATCH_BUCKET_PROTOCOL': bucket_protocol,
# Matches "daskhub.scratchBUcket.name" helm template
'SCRATCH_BUCKET_NAME': bucket_name,
# Use k8s syntax of $(ENV_VAR) to substitute env vars dynamically in other env vars
'SCRATCH_BUCKET': f'{bucket_protocol}://{bucket_name}/$(JUPYTERHUB_USER)',
'PANGEO_SCRATCH': f'{bucket_protocol}://{bucket_name}/$(JUPYTERHUB_USER)',
}
c.KubeSpawner.environment.update(env)
04-2i2c-add-staff-user-ids-to-admin-users: |
from z2jh import get_config
add_staff_user_ids_to_admin_users = get_config("custom.2i2c.add_staff_user_ids_to_admin_users", False)
if add_staff_user_ids_to_admin_users:
user_id_type = get_config("custom.2i2c.add_staff_user_ids_of_type")
staff_user_ids = get_config(f"custom.2i2c.staff_{user_id_type}_ids", [])
# `c.Authenticator.admin_users` can contain additional admins, can be an empty list,
# or it cannot be defined at all.
# This should cover all these cases.
staff_user_ids.extend(get_config("hub.config.Authenticator.admin_users", []))
c.Authenticator.admin_users = staff_user_ids
05-per-user-disk: |
# Optionally, create a PVC per user - useful for per-user databases
from jupyterhub.utils import exponential_backoff
from z2jh import get_config
from kubespawner.objects import make_pvc
from functools import partial
def make_extra_pvc(component, name_template, storage_class, storage_capacity, spawner):
"""
Create a PVC object with given spec
"""
labels = spawner._build_common_labels({})
labels.update({
'component': component
})
annotations = spawner._build_common_annotations({})
storage_selector = spawner._expand_all(spawner.storage_selector)
return make_pvc(
name=spawner._expand_all(name_template),
storage_class=storage_class,
access_modes=['ReadWriteOnce'],
selector={},
storage=storage_capacity,
labels=labels,
annotations=annotations
)
extra_user_pvcs = get_config('custom.singleuser.extraPVCs', {})
if extra_user_pvcs:
make_db_pvc = partial(make_extra_pvc, 'db-storage', 'db-{username}', 'standard', '1G')
pvc_makers = [partial(
make_extra_pvc,
"extra-pvc",
p["name"],
p["class"],
p["capacity"]
) for p in extra_user_pvcs]
async def ensure_db_pvc(spawner):
""""
Ensure a PVC is created for this user's database volume
"""
for pvc_maker in pvc_makers:
pvc = pvc_maker(spawner)
# If there's a timeout, just let it propagate
await exponential_backoff(
partial(spawner._make_create_pvc_request, pvc, spawner.k8s_api_request_timeout),
f'Could not create pvc {pvc.metadata.name}',
# Each req should be given k8s_api_request_timeout seconds.
timeout=spawner.k8s_api_request_retry_timeout
)
c.Spawner.pre_spawn_hook = ensure_db_pvc
05-gh-teams: |
# Re-assignes c.KubeSpawner.profile_list to a callable that filters the
# initial configuration of profile_list based on the user's github
# org/team membership as declared via "allowed_teams" read from
# profile_list profiles.
#
# This is only done if:
# - GitHubOAuthenticator is used
# - GitHubOAuthenticator.populate_teams_in_auth_state is True
#
import copy
from textwrap import dedent
from tornado import web
from oauthenticator.github import GitHubOAuthenticator
original_profile_list = c.KubeSpawner.profile_list
async def profile_list_allowed_teams_filter(spawner):
"""
Returns the initially configured profile_list filtered based on if
the spawning user is part the profiles' specified GitHub org/teams.
Adds a 'allowed_teams' key to profile_list, with a list of GitHub teams (of the form
org-name:team-name) for which the profile is made available.
If the user isn't part of any team whose membership grants them access to even a single
profile, they aren't allowed to start any servers.
"""
# Only apply to GitHub Authenticator
if not isinstance(spawner.authenticator, GitHubOAuthenticator):
return original_profile_list
# If populate_teams_in_auth_state is not set, github teams are not fetched
# So we just don't do any of this filtering, and let anyone into everything
if spawner.authenticator.populate_teams_in_auth_state == False:
return original_profile_list
auth_state = await spawner.user.get_auth_state()
if not auth_state or "teams" not in auth_state:
if spawner.user.name == 'deployment-service-check':
# For our hub deployer health checker, ignore all this logic
print("Ignoring allowed_teams check for deployment-service-check")
return original_profile_list
print(f"User {spawner.user.name} does not have any auth_state set")
raise web.HTTPError(403)
# Make a list of team names of form org-name:team-name
# This is the same syntax used by allowed_organizations traitlet of GitHubOAuthenticator
teams = set([f'{team_info["organization"]["login"]}:{team_info["slug"]}' for team_info in auth_state["teams"]])
print(f"User {spawner.user.name} is part of teams {' '.join(teams)}")
allowed_profiles = []
# Make a copy of the original profile_list dict,
# otherwise we might end up modifying it by mistake
profile_list_copy = copy.deepcopy(original_profile_list)
for profile in profile_list_copy:
# If there is no ':' in allowed_teams, it's an org and we should check that
# differently
allowed_orgs = set([o for o in profile.get('allowed_teams', []) if ':' not in o])
allowed_teams = set([t for t in profile.get('allowed_teams', []) if ':' in t])
# Keep the profile is the user is part of *any* team listed in allowed_teams
# If allowed_teams is empty or not set, it'll not be accessible to *anyone*
if allowed_teams & teams:
allowed_profiles.append(profile)
print(f"Allowing profile {profile['display_name']} for user {spawner.user.name} based on team membership")
elif allowed_orgs:
for org in allowed_orgs:
user_in_org = await spawner.authenticator._check_membership_allowed_organizations(
org, spawner.user.name, auth_state['access_token']
)
if user_in_org:
allowed_profiles.append(profile)
print(f"Allowing profile {profile['display_name']} for user {spawner.user.name} based on org membership")
break
else:
print(f"Dropping profile {profile['display_name']} for user {spawner.user.name}")
if len(allowed_profiles) == 0:
# If no profiles are allowed, user should not be able to spawn anything!
# If we don't explicitly stop this, user will be logged into the 'default' settings
# set in singleuser, without any profile overrides. Not desired behavior
# FIXME: User doesn't actually see this error message, just the generic 403.
error_msg = dedent(f"""
Your GitHub team membership is insufficient to launch any server profiles.
GitHub teams you are a member of that this JupyterHub knows about are {', '.join(teams)}.
If you are part of additional teams, log out of this JupyterHub and log back in to refresh that information.
""")
raise web.HTTPError(403, error_msg)
return allowed_profiles
# Only set this customized profile_list *if* we already have a profile_list set
# otherwise, we'll show users a blank server options form and they won't be able to
# start their server
if c.KubeSpawner.profile_list:
# Customize list of profiles dynamically, rather than override options form.
# This is more secure, as users can't override the options available to them via the hub API
c.KubeSpawner.profile_list = profile_list_allowed_teams_filter
06-salted-username: |
# Allow anonymizing username to not store *any* PII
import json
import os
import base64
import hashlib
from z2jh import get_config
def salt_username(authenticator, handler, auth_model):
# Combine parts of user info with different provenances to eliminate
# possible deanonym attacks when things get leaked.
# FIXME: Provide useful error message when using an auth provider that
# doesn't give us 'oidc'
# FIXME: Raise error if this is attempted to be used with anything other than CILogon
USERNAME_DERIVATION_PEPPER = bytes.fromhex(os.environ['USERNAME_DERIVATION_PEPPER'])
cilogon_user = auth_model['auth_state']['cilogon_user']
user_key_parts = {
# Opaque ID from CILogon
"sub": cilogon_user['sub'],
# Combined together, opaque ID from upstream IDP (GitHub, Google, etc)
"idp": cilogon_user['idp'],
"oidc": cilogon_user['oidc']
}
# Use JSON here, so we don't have to deal with picking a string
# delimiter that will not appear in any of the parts.
# keys are sorted to ensure stable output over time
user_key = json.dumps(user_key_parts, sort_keys=True).encode('utf-8')
# The cryptographic choices made here are:
# - Use blake2, because it's fairly modern
# - Set blake2 to output 32 bytes as output, which is good enough for our use case
# - Use base32 encoding, as it will produce maximum of 56 characters
# for 32 bytes output by blake2. We have 63 character username
# limits in many parts of our code (particularly, in usernames
# being part of labels in kubernetes pods), so this helps
# - Convert everything to lowercase, as base64.b32encode produces
# all uppercase characters by default. Our usernames are preferably
# lowercase, as uppercase characters must be encoded for kubernetes'
# sake
# - strip the = padding provided by base64.b32encode. This is present
# primarily to be able to determine length of the original byte
# sequence accurately. We don't care about that here. Also = is
# encoded in kubernetes and puts us over the 63 char limit.
# - Use blake2 here explicitly as a keyed hash, rather than use
# hmac. This is the canonical way to do this, and helps make it
# clearer that we want it to output 32byte hashes. We could have
# used a 16byte hash here for shorter usernames, but it is unclear
# what that does to the security properties. So better safe than
# sorry, and stick to 32bytes (rather than the default 64)
digested_user_key = base64.b32encode(hashlib.blake2b(
user_key,
key=USERNAME_DERIVATION_PEPPER,
digest_size=32
).digest()).decode('utf-8').lower().rstrip("=")
# Replace the default name with our digested name, thus
# discarding the default name
auth_model["name"] = digested_user_key
return auth_model
if get_config('custom.auth.anonymizeUsername', False):
# https://jupyterhub.readthedocs.io/en/stable/reference/api/auth.html#jupyterhub.auth.Authenticator.post_auth_hook
c.Authenticator.post_auth_hook = salt_username