helm-charts/basehub/values.yaml

# We define a service account that is attached by default to all Jupyter user pods
# and dask-gateway workers. By default, this has no permissions - although extra
# cloud access permissions may be granted - see docs/topic/features.md.
userServiceAccount:
  enabled: true
  annotations: {}

adminServiceAccount:
  enabled: false
  annotations: {}

binderhub-service:
  enabled: false
  ingress:
    enabled: false
    ingressClassName: nginx
    annotations:
      nginx.ingress.kubernetes.io/proxy-body-size: 256m
      cert-manager.io/cluster-issuer: letsencrypt-prod
  nodeSelector:
    hub.jupyter.org/node-purpose: core
  service:
    port: 8090
  # The DaemonSet at https://github.com/2i2c-org/binderhub-service/blob/main/binderhub-service/templates/docker-api/daemonset.yaml
  # will start a docker-api pod on a user node.
  # It starts the [dockerd](https://docs.docker.com/engine/reference/commandline/dockerd/) daemon,
  # that will be accessible via a unix socket, mounted by the build.
  # The docker-api pod must run on the same node as the builder pods.
  dockerApi:
    nodeSelector:
      hub.jupyter.org/node-purpose: user
    tolerations:
      # Tolerate tainted jupyterhub user nodes
      - key: hub.jupyter.org_dedicated
        value: user
        effect: NoSchedule
      - key: hub.jupyter.org/dedicated
        value: user
        effect: NoSchedule
  config:
    BinderHub:
      base_url: /services/binder
      use_registry: true
    KubernetesBuildExecutor:
      node_selector:
        # Schedule builder pods to run on user nodes only
        hub.jupyter.org/node-purpose: user
  custom:
    sendLogsOfLaunchEventsTo2i2c: false
  extraConfig:
    01-send-logs-of-launch-events-to-2i2c: |
      if get_chart_config("custom.sendLogsOfLaunchEventsTo2i2c"):
          import os
          import sys
          from traitlets.log import get_logger

          # this check would ideally be done via chart config schema validation,
          # but it may be too messy to do in practice - maybe not though
          if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
              get_logger().critical("binderhub-service.custom.sendLogsOfLaunchEventsTo2i2c requires binderhub-service.extraCredentials.googleServiceAccountKey to be setup")
              sys.exit(1)

          from google.cloud.logging import Client
          from google.cloud.logging.handlers import CloudLoggingHandler

          def _make_eventsink_handler(el):
              client = Client()
              log_name = "binderhub-event-logs"
              get_logger().info(f"Sending logs of launch events to a 2i2c managed GCP project {client.project} under log name {log_name}.")
              return [CloudLoggingHandler(client, name=log_name)]
          c.EventLog.handlers_maker = _make_eventsink_handler

ingressBasicAuth:
  enabled: false
  # Primarily here for validation to 'work',
  # as these are set in secret config otherwise. I don't like this,
  # as we won't catch these values missing if they aren't set.
  username: ""
  password: ""

dex:
  enabled: false

staticWebsite:
  enabled: false
  source:
    git:
      branch: main
  githubAuth:
    enabled: false
    githubApp:
      # Primarily here for validation to 'work',
      # as these are set in secret config otherwise. I don't like this,
      # as we won't catch these values missing if they aren't set.
      id: 0
      privateKey: ""

dask-gateway:
  enabled: false # Enabling dask-gateway will install Dask Gateway as a dependency.
  # Further Dask Gateway configuration goes here
  # See https://github.com/dask/dask-gateway/blob/main/resources/helm/dask-gateway/values.yaml
  gateway:
    backend:
      scheduler:
        extraPodConfig:
          serviceAccountName: user-sa
          tolerations:
            # Let's put schedulers on notebook nodes, since they aren't ephemeral
            # dask can recover from dead workers, but not dead schedulers
            - key: "hub.jupyter.org/dedicated"
              operator: "Equal"
              value: "user"
              effect: "NoSchedule"
            - key: "hub.jupyter.org_dedicated"
              operator: "Equal"
              value: "user"
              effect: "NoSchedule"
          nodeSelector:
            k8s.dask.org/node-purpose: scheduler
        cores:
          request: 0.01
          limit: 1
        memory:
          request: 128M
          limit: 1G
      worker:
        extraContainerConfig:
          securityContext:
            runAsGroup: 1000
            runAsUser: 1000
        extraPodConfig:
          serviceAccountName: user-sa
          securityContext:
            fsGroup: 1000
          tolerations:
            - key: "k8s.dask.org/dedicated"
              operator: "Equal"
              value: "worker"
              effect: "NoSchedule"
            - key: "k8s.dask.org_dedicated"
              operator: "Equal"
              value: "worker"
              effect: "NoSchedule"
          nodeSelector:
            # Dask workers get their own pre-emptible pool
            k8s.dask.org/node-purpose: worker
    env:
      - name: BASEHUB_K8S_DIST
        valueFrom:
          configMapKeyRef:
            name: basehub-cluster-info
            key: K8S_DIST

    extraConfig:
      # This configuration represents options that can be presented to users
      # that want to create a Dask cluster using dask-gateway client.
      #
      # This configuration is meant to enable the user to request dask worker
      # pods that fits well on 2i2c's clusters. Currently the only kind of
      # instance types used are n2-highmem-16 or r5.4xlarge.
      #
      # - Documentation about exposing cluster options to users:
      #   https://gateway.dask.org/cluster-options.html and the
      # - Reference for KubeClusterConfig, which is what can be configured:
      #   https://gateway.dask.org/api-server.html#kubeclusterconfig.
      #
      optionHandler: |
        import os
        import string

        from dask_gateway_server.options import Integer, Mapping, Options, Select, String

        # Escape a string to be dns-safe in the same way that KubeSpawner does it.
        # Reference https://github.com/jupyterhub/kubespawner/blob/616f72c4aee26c3d2127c6af6086ec50d6cda383/kubespawner/spawner.py#L1828-L1835
        # Adapted from https://github.com/minrk/escapism to avoid installing the package
        # in the dask-gateway api pod which would have been problematic.
        def escape_string_label_safe(to_escape):
            safe_chars = set(string.ascii_lowercase + string.digits)
            escape_char = "-"
            chars = []
            for c in to_escape:
                if c in safe_chars:
                    chars.append(c)
                else:
                    # escape one character
                    buf = []
                    # UTF-8 uses 1 to 4 bytes per character, depending on the Unicode symbol
                    # so we need to transform each byte to its hex value
                    for byte in c.encode("utf8"):
                        buf.append(escape_char)
                        # %X is the hex value of the byte
                        buf.append('%X' % byte)
                    escaped_hex_char = "".join(buf)
                    chars.append(escaped_hex_char)
            return u''.join(chars)

        # Decide on available instance types and their resource allocation
        # choices to expose based on cloud provider. For each daskhub hub
        # managed by 2i2c, there should be these instance types available.
        #
        cloud_provider = os.environ["BASEHUB_K8S_DIST"] # gke, eks, or aks
        instance_types = {
            "gke": ["n2-highmem-16"],
            "eks": ["r5.4xlarge"],
            "aks": ["Standard_E16_v4"],
        }

        # NOTE: Data mentioned below comes from manual inspection of data
        #       collected and currently only available at
        #       https://github.com/2i2c-org/infrastructure/pull/3337.
        #
        resource_allocations = {
            # n2-highmem-16 nodes in our clusters have 15.89 allocatable cores
            # and 116.549Gi allocatable memory, and daemonset are expected to
            # not add more than 400m cores and 800Mi (0.781Gi) memory with some
            # margin, so we get 15.49 cores and 115.768Gi available for worker
            # pods to request.
            #
            # This is an initial conservative strategy, allowing a slight
            # oversubscription of CPU but not any oversubscription of memory.
            #
            # To workaround https://github.com/dask/dask-gateway/issues/765, we
            # round worker_cores down from [0.968, 1.936, 3.872, 7.745, 15.49]
            # to [0.9, 1.9, 3.8, 7.7, 15.4].
            #
            "n2-highmem-16": {
                "1CPU, 7.2Gi": {"worker_cores": 0.9, "worker_cores_limit": 1, "worker_memory": "7.235G", "worker_memory_limit": "7.235G"},
                "2CPU, 14.5Gi": {"worker_cores": 1.9, "worker_cores_limit": 2, "worker_memory": "14.471G", "worker_memory_limit": "14.471G"},
                "4CPU, 28.9Gi": {"worker_cores": 3.8, "worker_cores_limit": 4, "worker_memory": "28.942G", "worker_memory_limit": "28.942G"},
                "8CPU, 57.9Gi": {"worker_cores": 7.7, "worker_cores_limit": 8, "worker_memory": "57.884G", "worker_memory_limit": "57.884G"},
                "16CPU, 115.8Gi": {"worker_cores": 15.4, "worker_cores_limit": 16, "worker_memory": "115.768G", "worker_memory_limit": "115.768G"},
            },
            # r5.4xlarge nodes in our clusters have 15.89 allocatable cores and
            # 121.504Gi allocatable memory, and daemonset are expected to not
            # add more than 400m cores and 800Mi (0.781Gi) memory with some
            # margin, so we get 15.49 cores and 120.723Gi available for worker
            # pods to request.
            #
            # This is an initial conservative strategy, allowing a slight
            # oversubscription of CPU but not any oversubscription of memory.
            #
            # To workaround https://github.com/dask/dask-gateway/issues/765, we
            # round worker_cores down from [0.968, 1.936, 3.872, 7.745, 15.49]
            # to [0.9, 1.9, 3.8, 7.7, 15.4].
            #
            "r5.4xlarge": {
                "1CPU, 7.5Gi": {"worker_cores": 0.9, "worker_cores_limit": 1, "worker_memory": "7.545G", "worker_memory_limit": "7.545G"},
                "2CPU, 15.1Gi": {"worker_cores": 1.9, "worker_cores_limit": 2, "worker_memory": "15.090G", "worker_memory_limit": "15.090G"},
                "4CPU, 30.2Gi": {"worker_cores": 3.8, "worker_cores_limit": 4, "worker_memory": "30.180G", "worker_memory_limit": "30.180G"},
                "8CPU, 60.4Gi": {"worker_cores": 7.7, "worker_cores_limit": 8, "worker_memory": "60.361G", "worker_memory_limit": "60.361G"},
                "16CPU, 120.7Gi": {"worker_cores": 15.4, "worker_cores_limit": 16, "worker_memory": "120.723G", "worker_memory_limit": "120.723G"},
            },
            "Standard_E16_v4": {
                # Set up to be proportioate, so using all the RAM uses all the CPU too
                ".25-1 CPU, 2GB RAM": {"worker_cores": 0.25, "worker_cores_limit": 1, "worker_memory": "2G", "worker_memory_limit": "2G"},
            },
        }

        # for now we support only on one instance type per cluster, listing it
        # as an option is a way to help convey how things work a bit better
        it = instance_types[cloud_provider][0]
        ra = resource_allocations[it]
        ra_keys = list(ra.keys())

        def cluster_options(user):
            def option_handler(options):
                if ":" not in options.image:
                    raise ValueError("When specifying an image you must also provide a tag")
                extra_labels = {
                    "hub.jupyter.org/username": escape_string_label_safe(user.name),
                }
                scheduler_extra_pod_annotations = {
                    "hub.jupyter.org/username": user.name,
                    "prometheus.io/scrape": "true",
                    "prometheus.io/port": "8787",
                }
                worker_extra_pod_annotations = {
                    "hub.jupyter.org/username": user.name,
                }
                picked_ra = ra[options.worker_resource_allocation]

                return {
                    # A default image is suggested via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable
                    "image": options.image,
                    "scheduler_extra_pod_labels": extra_labels,
                    "scheduler_extra_pod_annotations": scheduler_extra_pod_annotations,
                    "worker_extra_pod_labels": extra_labels,
                    "worker_extra_pod_annotations": worker_extra_pod_annotations,
                    "worker_cores": picked_ra["worker_cores"],
                    "worker_cores_limit": picked_ra["worker_cores_limit"],
                    "worker_memory": picked_ra["worker_memory"],
                    "worker_memory_limit": picked_ra["worker_memory_limit"],
                    "environment": options.environment,
                    "idle_timeout": options.idle_timeout_minutes * 60,
                }
            return Options(
                Select(
                    "instance_type",
                    [it],
                    default=it,
                    label="Instance type running worker containers",
                ),
                Select(
                    "worker_resource_allocation",
                    ra_keys,
                    default=ra_keys[0],
                    label="Resources per worker container",
                ),
                # The default image is pre-specified by the dask-gateway client
                # via the env var DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE set on
                # the jupyterhub user pods
                String("image", label="Image"),
                Mapping("environment", {}, label="Environment variables (YAML)"),
                Integer("idle_timeout_minutes", 30, min=0, label="Idle cluster terminated after (minutes)"),
                handler=option_handler,
            )
        c.Backend.cluster_options = cluster_options

        # timeout after 30 minutes of inactivity by default, keep this in sync
        # with the user exposed option idle_timeout_minutes's default value
        # configured above
        c.KubeClusterConfig.idle_timeout = 1800
    prefix: "/services/dask-gateway" # Users connect to the Gateway through the JupyterHub service.
    auth:
      type: jupyterhub # Use JupyterHub to authenticate with Dask Gateway
  traefik:
    nodeSelector:
      k8s.dask.org/node-purpose: core
    service:
      type: ClusterIP # Access Dask Gateway through JupyterHub. To access the Gateway from outside JupyterHub, this must be changed to a `LoadBalancer`.

nfs:
  enabled: false
  dirsizeReporter:
    enabled: true
  volumeReporter:
    enabled: true
  shareCreator:
    enabled: true
    tolerations: []
  pv:
    enabled: false
    mountOptions:
      - soft
      - noatime
      - vers=4.2

# Use NFS provided by an in cluster server with the nfs-external-provisioner chart
inClusterNFS:
  enabled: false
  size: 100Gi

# A placeholder as global values that can be referenced from the same location
# of any chart should be possible to provide, but aren't necessarily provided or
# used.
global: {}

jupyterhub:
  cull:
    # Don't allow any user pods to run for longer than 7 days by default
    maxAge: 604800 # 7 days in seconds
  custom:
    auth:
      anonymizeUsername: false
    singleuser:
      extraPVCs: []
    singleuserAdmin:
      extraEnv: {}
      extraVolumeMounts:
        # IMPORTANT: What is added to this list is copied to other locations
        #            that wants to add an element to this list. This is done
        #            because when Helm config files are merged, lists get
        #            replaced rather than appended. So, if this is to be
        #            updated, we should update all those copies as well. An easy
        #            to way find such copies is to search for "singleuserAdmin:"
        #            in this repo.
        #
        - name: home
          mountPath: /home/jovyan/shared-readwrite
          subPath: _shared
        - name: home
          mountPath: /home/rstudio/shared-readwrite
          subPath: _shared
    2i2c:
      # Should 2i2c engineering staff user IDs be injected to the admin_users
      # configuration of the JupyterHub's authenticator by our custom
      # jupyterhub_config.py snippet as declared in hub.extraConfig?
      add_staff_user_ids_to_admin_users: false
      add_staff_user_ids_of_type: ""
      staff_github_ids:
        - agoose77
        - AIDEA775
        - choldgraf
        - colliand
        - consideRatio
        - damianavila
        - GeorgianaElena
        - Gman0909
        - haroldcampbell
        - jmunroe
        - jnywong
        - sgibson91
        - yuvipanda
      staff_google_ids:
        - ahollands@2i2c.org
        - asilva@2i2c.org
        - choldgraf@2i2c.org
        - colliand@2i2c.org
        - damianavila@2i2c.org
        - erik@2i2c.org
        - georgianaelena@2i2c.org
        - gmaciocci@2i2c.org
        - hcampbell@2i2c.org
        - jmunroe@2i2c.org
        - jwong@2i2c.org
        - sgibson@2i2c.org
        - yuvipanda@2i2c.org
    homepage:
      gitRepoUrl: "https://github.com/2i2c-org/default-hub-homepage"
      # TODO: make main the default branch in the repo above
      gitRepoBranch: "master"
      templateVars:
        enabled: true
    jupyterhubConfigurator:
      enabled: true
  ingress:
    enabled: true
    ingressClassName: nginx
    annotations:
      nginx.ingress.kubernetes.io/proxy-body-size: 256m
      cert-manager.io/cluster-issuer: letsencrypt-prod
  scheduling:
    # We declare matchNodePurpose=require to get a nodeAffinity like a
    # nodeSelector on all core pods and user pods. core pods like hub and proxy
    # will schedule on nodes with hub.jupyter.org/node-purpose=core and user
    # pods on nodes with hub.jupyter.org/node-purpose=user.
    #
    # Since this setting adds a nodeAffinity, its okay that we configure
    # KubeSpawner's profile_list to override node_selector.
    #
    corePods:
      nodeAffinity:
        matchNodePurpose: require
    userPods:
      nodeAffinity:
        matchNodePurpose: require
    podPriority:
      enabled: true
    userPlaceholder:
      enabled: true
      replicas: 0
    userScheduler:
      enabled: false
      # replicas default value is 2, but having the user-scheduler run HA is an
      # almost never practically helpful but has been found to increase cloud
      # costs in clusters with many hubs. For additional discussion reducing
      # this to 1, see https://github.com/2i2c-org/infrastructure/issues/3865.
      replicas: 1
      # FIXME: We should think about these resource requests/limits, see
      #        https://github.com/2i2c-org/infrastructure/issues/2127.
      #
      resources:
        requests:
          cpu: 0.01
          memory: 64Mi
        limits:
          memory: 1G
  # prePuller is about pulling a one or more images identified via chart
  # configuration, including singleuser.image, singleuser.profileList entries
  # with a dedicated image, but not profileList entries with images' specified
  # via profile_options.
  prePuller:
    # continuous prePuller leads to the creation of a DaemonSet that starts a
    # pod on each node to pull images.
    #
    # It is disabled as its only relevant for nodes started before user pods
    # gets scheduled on them, in other cases it could delay startup and isn't
    # expected to reduce startup times.
    #
    continuous:
      enabled: false
    # hook prePuller leads to the creation of a temporary DaemonSet and a pod
    # awaiting pulling to complete before `helm upgrade` starts its main work.
    #
    # It is disabled as it adds notable complexity for a smaller benefit when
    # correctly adopted. The added complexity includes:
    #
    # - risk of misconfiguration making image pulls not actually needed
    # - risk of broken expectations and additional cognitive load
    # - risk of causing significantly longer `helm upgrade` commands slowing
    #   down our CI system
    # - ClusterRoleBinding resources are needed for the image-awaiter Pod
    #   involved, a resource that requires the highest k8s cluster permission
    #   otherwise possibly not needed to deploy basehub
    #
    hook:
      enabled: false
  proxy:
    service:
      type: ClusterIP
    chp:
      # FIXME: We should think about these resource requests/limits, see
      #        https://github.com/2i2c-org/infrastructure/issues/2127.
      #
      resources:
        requests:
          cpu: 0.01
          memory: 64Mi
        limits:
          memory: 1Gi
    traefik:
      # FIXME: We should think about these resource requests/limits, see
      #        https://github.com/2i2c-org/infrastructure/issues/2127.
      #
      #        Note if autohttps pod's aren't used anywhere by our basehub
      #        deployments, we should simply remove this traefik configuration.
      #
      resources:
        requests:
          memory: 64Mi
        limits:
          memory: 1Gi
  singleuser:
    # basehub creates a k8s ServiceAccount for the hubs users that isn't granted
    # permissions to the k8s api-server or other resources by default. Cloud
    # infra permissions can be granted to all users by declaring annotations on
    # this k8s ServiceAccount via basehub config userServiceAccount.annotations.
    serviceAccountName: user-sa
    # Need to explicitly fix ownership here, as otherwise these directories will be owned
    # by root on most NFS filesystems - neither EFS nor Google Filestore support anonuid
    #
    # This has to be done _once_ for each directory we mount _from_ the NFS
    # server. We do it all the time since we don't know for sure it has been done once
    # already.
    #
    # Note that we don't have to chown both the shared and shared-readwrite
    # folder since they are both mounting the same folder on the NFS server.
    #
    # For details about this, see notes at:
    # - https://github.com/2i2c-org/infrastructure/issues/2953#issuecomment-1672025545
    # - https://github.com/2i2c-org/infrastructure/issues/2946#issuecomment-1671691248
    #
    initContainers:
      - name: volume-mount-ownership-fix
        image: busybox:1.36.1
        command:
          - sh
          - -c
          - id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan
        securityContext:
          runAsUser: 0
        volumeMounts:
          - name: home
            mountPath: /home/jovyan
            subPath: "{username}"
          # Mounted without readonly attribute here,
          # so we can chown it appropriately
          - name: home
            mountPath: /home/jovyan/shared
            subPath: _shared
    cmd:
      # Mitigate a vulnerability in jupyter-server-proxy version <4.1.1, see
      # https://github.com/jupyterhub/jupyter-server-proxy/security/advisories/GHSA-w3vc-fx9p-wp4v
      # for more details.
      - /mnt/ghsa-w3vc-fx9p-wp4v/check-patch-run
      - jupyterhub-singleuser
    extraEnv:
      # notebook writes secure files that don't need to survive a
      # restart here. Writing 'secure' files on some file systems (like
      # Azure Files with SMB) seems buggy, so we just put runtime dir on
      # /tmp. This is ok in our case, since no two users are on the same
      # container.
      JUPYTER_RUNTIME_DIR: /tmp/.jupyter-runtime
      # By default, /bin/sh is used as shell for terminals, not /bin/bash
      # Most people do not expect this, so let's match expectation
      SHELL: /bin/bash
    extraFiles:
      ghsa-w3vc-fx9p-wp4v-check-patch-run:
        mountPath: /mnt/ghsa-w3vc-fx9p-wp4v/check-patch-run
        mode: 0755
        stringData: |
          #!/usr/bin/env python3
          """
          This script is designed to check for and conditionally patch GHSA-w3vc-fx9p-wp4v
          in user servers started by a JupyterHub. The script will execute any command
          passed via arguments if provided, allowing it to wrap a user server startup call
          to `jupyterhub-singleuser` for example.

          Script adjustments:
          - UPGRADE_IF_VULNERABLE
          - ERROR_IF_VULNERABLE

          Script patching assumptions:
          - script is run before the jupyter server starts
          - pip is available
          - pip has sufficient filesystem permissions to upgrade jupyter-server-proxy

          Read more at https://github.com/jupyterhub/jupyter-server-proxy/security/advisories/GHSA-w3vc-fx9p-wp4v.
          """

          import os
          import subprocess
          import sys

          # adjust these to meet vulnerability mitigation needs
          UPGRADE_IF_VULNERABLE = True
          ERROR_IF_VULNERABLE = False


          def check_vuln():
              """
              Checks for the vulnerability by looking to see if __version__ is available
              as it coincides with the patched versions (3.2.3 and 4.1.1).
              """
              try:
                  import jupyter_server_proxy

                  return False if hasattr(jupyter_server_proxy, "__version__") else True
              except:
                  return False


          def get_version_specifier():
              """
              Returns a pip version specifier for use with `--no-deps` meant to do as
              little as possible besides patching the vulnerability and remaining
              functional.
              """
              old = ["jupyter-server-proxy>=3.2.3,<4"]
              new = ["jupyter-server-proxy>=4.1.1,<5", "simpervisor>=1,<2"]

              try:
                  if sys.version_info < (3, 8):
                      return old

                  from importlib.metadata import version

                  jsp_version = version("jupyter-server-proxy")
                  if int(jsp_version.split(".")[0]) < 4:
                      return old
              except:
                  pass
              return new


          def patch_vuln():
              """
              Attempts to patch the vulnerability by upgrading jupyter-server-proxy using
              pip.
              """
              # attempt upgrade via pip, takes ~4 seconds
              proc = subprocess.run(
                  [sys.executable, "-m", "pip", "--version"],
                  stdout=subprocess.DEVNULL,
                  stderr=subprocess.DEVNULL,
              )
              pip_available = proc.returncode == 0
              if pip_available:
                  proc = subprocess.run(
                      [sys.executable, "-m", "pip", "install", "--no-deps"]
                      + get_version_specifier()
                  )
                  if proc.returncode == 0:
                      return True
              return False


          def main():
              if check_vuln():
                  warning_or_error = (
                      "ERROR" if ERROR_IF_VULNERABLE and not UPGRADE_IF_VULNERABLE else "WARNING"
                  )
                  print(
                      f"{warning_or_error}: jupyter-server-proxy __is vulnerable__ to GHSA-w3vc-fx9p-wp4v, see "
                      "https://github.com/jupyterhub/jupyter-server-proxy/security/advisories/GHSA-w3vc-fx9p-wp4v.",
                      flush=True,
                  )
                  if warning_or_error == "ERROR":
                      sys.exit(1)

                  if UPGRADE_IF_VULNERABLE:
                      print(
                          "INFO: Attempting to upgrade jupyter-server-proxy using pip...",
                          flush=True,
                      )
                      if patch_vuln():
                          print(
                              "INFO: Attempt to upgrade jupyter-server-proxy succeeded!",
                              flush=True,
                          )
                      else:
                          warning_or_error = "ERROR" if ERROR_IF_VULNERABLE else "WARNING"
                          print(
                              f"{warning_or_error}: Attempt to upgrade jupyter-server-proxy failed!",
                              flush=True,
                          )
                          if warning_or_error == "ERROR":
                              sys.exit(1)

              if len(sys.argv) >= 2:
                  print("INFO: Executing provided command", flush=True)
                  os.execvp(sys.argv[1], sys.argv[1:])
              else:
                  print("INFO: No command to execute provided", flush=True)


          main()
      ipython_kernel_config.json:
        mountPath: /usr/local/etc/ipython/ipython_kernel_config.json
        data:
          # This keeps a history of all executed code under $HOME, which is almost always on
          # NFS. This file is kept as a sqlite file, and sqlite and NFS do not go together very
          # well! Disable this to save ourselves from debugging random NFS oddities that are caused
          # by this unholy sqlite + NFS mixture.
          HistoryManager:
            enabled: false
      # jupyter_server and notebook are different jupyter servers providing
      # similar configuration options. Since we have user images that may
      # provide either, we provide the same configuration for both via
      # jupyter_server_config.json and jupyter_notebook_config.json.
      #
      # A hub can force a choice with singleuser.extraEnv via:
      #
      #     JUPYTERHUB_SINGLEUSER_APP: "notebook.notebookapp.NotebookApp"
      #     JUPYTERHUB_SINGLEUSER_APP: "jupyter_server.serverapp.ServerApp"
      #
      jupyter_server_config.json:
        mountPath: /usr/local/etc/jupyter/jupyter_server_config.json
        # if a user leaves a notebook with a running kernel,
        # the effective idle timeout will typically be cull idle timeout
        # of the server + the cull idle timeout of the kernel,
        # as culling the kernel will register activity,
        # resetting the no_activity timer for the server as a whole
        data:
          # Allow JupyterLab to show the 'View -> Show Hidden Files' option
          # in the menu. Defaults are not changed.
          # https://github.com/jupyterlab/jupyterlab/issues/11304#issuecomment-945466766
          ContentsManager:
            allow_hidden: true
          # MappingKernelManager configuration reference:
          # https://jupyter-server.readthedocs.io/en/latest/api/jupyter_server.services.kernels.html#jupyter_server.services.kernels.kernelmanager.MappingKernelManager
          #
          MappingKernelManager: &server_config_mapping_kernel_manager
            cull_idle_timeout: 3600
            cull_interval: 300
            cull_connected: true
          # ServerApp configuration reference:
          # https://jupyter-server.readthedocs.io/en/latest/api/jupyter_server.html#jupyter_server.serverapp.ServerApp
          #
          ServerApp: &server_config_server_app
            extra_template_paths:
              - /usr/local/share/jupyter/custom_template
          # Move the sqlite file used by https://github.com/jupyter-server/jupyter_server_fileid
          # off the default path, which is under ~/.local/share/jupyter.
          # That is NFS, and sqlite + NFS don't go well together. In addition,
          # it uses WAL mode of sqlite, and that is completely unsupported on NFS
          # Upstream discussion in https://github.com/jupyter-server/jupyter_server_fileid/issues/60.
          BaseFileIdManager: &server_config_base_file_id_manager
            db_path: /tmp/file_id_manager.db
      jupyter_notebook_config.json:
        mountPath: /usr/local/etc/jupyter/jupyter_notebook_config.json
        data:
          MappingKernelManager: *server_config_mapping_kernel_manager
          NotebookApp: *server_config_server_app
          BaseFileIdManager: *server_config_base_file_id_manager
    startTimeout: 600 # 10 mins, node startup + image pulling sometimes takes more than the default 5min
    defaultUrl: /tree
    image:
      name: quay.io/jupyter/scipy-notebook
      tag: "2024-03-18"
    storage:
      type: static
      static:
        pvcName: home-nfs
        subPath: "{username}"
      extraVolumes:
        - name: dev-shm
          emptyDir:
            medium: Memory
      extraVolumeMounts:
        - name: home
          mountPath: /home/jovyan/shared
          subPath: _shared
          readOnly: true
        - name: dev-shm
          mountPath: /dev/shm

        # For all pods, mount home in both /home/jovyan (done via singleuser.storage.static)
        # as well as /home/rstudio. This allows rocker images (which use the
        # rstudio user and put home ine /home/rstudio) to be first class citizens
        # along with jupyter based images, regardless of how they are specified (
        # via the configurator, or with unlisted_choice, or as a profile). For non-rocker
        # images, this is just invisible in the UI and there is no performance overhead
        # for these extra bind mounts. An additional positive here is that in case *students*
        # end up accidentally hardcoding paths in their notebooks, it will continue to work
        # regardless of whether they or on RStudio or JupyterLab (described to us as a serious
        # problem by openscapes)
        - name: home
          mountPath: /home/rstudio
          subPath: "{username}"
        - name: home
          mountPath: /home/rstudio/shared
          subPath: _shared
          readOnly: true
    memory:
      guarantee: 256M
      limit: 1G
    cpu:
      # If no CPU limit is set, it is possible for a single user or group of users to
      # starve everyone else of CPU time on a node, even causing new user pods to completely
      # fail as the notebook server process gets no CPU to complete auth handshake with
      # the server, and even trivial cells like `print("hello world")` may not run.
      # Unlike memory guarantees, CPU guarantees are actually enforced by the Linux Kernel
      # (see https://medium.com/@betz.mark/understanding-resource-limits-in-kubernetes-cpu-time-9eff74d3161b)
      # By giving each user a 5% CPU guarantee (represented by 0.05), we ensure that:
      # 1. Simple cells will always execute
      # 2. Notebook server processes will always start - so users won't have server spawn failure
      # 3. We don't accidentally set just a high limit for a particular hub and not set a
      #    guarantee, at which point kubernetes treats the limit as the guarantee! This causes
      #    far more nodes to be scaled up than needed, making everything super slow (like in
      #    https://github.com/2i2c-org/infrastructure/issues/790)
      # 4. Most of our workloads are still memory bound, and we want scaling to happen only
      #    when a node is full on its memory guarantees. But a 0.05 guarantee means a n1-highmem-8
      #    node can fit 160 user pods, and since kubernetes already caps us at 100 pods a node,
      #    this guarantee doesn't actually change our scheduling.
      guarantee: 0.05
    networkPolicy:
      enabled: true
      # Egress to internet is allowed by default via z2jh's egressAllowRules,
      # but we need to add a few custom rules for the cluster internal
      # networking.
      egress:
        # Allow code in hubs to talk to ingress provider, so they can talk to
        # the hub via its public URL
        - to:
            - namespaceSelector:
                matchLabels:
                  name: support
              podSelector:
                matchLabels:
                  app.kubernetes.io/name: ingress-nginx
        # If a hub is using autohttps instead of ingress-nginx, allow traffic
        # to the autohttps pod as well
        - to:
            - podSelector:
                matchLabels:
                  app: jupyterhub
                  component: autohttps
        # Allow traffic to the proxy pod from user pods
        # This is particularly important for daskhubs that utilise the proxy
        # in order to create clusters (schedulers and workers)
        - to:
            - podSelector:
                matchLabels:
                  app: jupyterhub
                  component: proxy
        # Allow traffic to the traefik pod from user pods. Needed for daskhubs.
        - to:
            - podSelector:
                matchLabels:
                  app.kubernetes.io/component: traefik

        # Allow HTTPS and HTTP traffic explicitly to the whole world
        # This is a no-op if `singleuser.networkPolicy.egressAllowRules.nonPrivateIPs`
        # is true (the default). When it is set to false, this rule allows outbound access
        # to these specified ports to the broad internet (but not internal networks)
        - ports:
            - port: 80
              protocol: TCP
            - port: 443
              protocol: TCP
            - port: 443
              protocol: UDP # Hello, HTTPS/3
          to:
            - ipBlock:
                cidr: 0.0.0.0/0
                except:
                  - 10.0.0.0/8
                  - 172.16.0.0/12
                  - 192.168.0.0/16
  hub:
    config:
      JupyterHub:
        # Allow unauthenticated prometheus requests
        # Otherwise our prometheus server can't get hub metrics
        authenticate_prometheus: false
      KubeSpawner:
        # Make sure working directory is where we mount the home folder
        working_dir: /home/jovyan
        # Increase timeout for Jupyter server to become 'ready', until
        # https://github.com/2i2c-org/infrastructure/issues/2047 is fixed
        http_timeout: 120
      Authenticator:
        # Don't allow test username to login into the hub
        # The test service will still be able to create this hub username
        # and start their server.
        # Ref: https://github.com/2i2c-org/meta/issues/321
        blocked_users:
          - deployment-service-check
    extraFiles:
      configurator-schema-default:
        mountPath: /usr/local/etc/jupyterhub-configurator/00-default.schema.json
        data:
          type: object
          name: config
          properties:
            KubeSpawner.image:
              type: string
              title: User docker image
              description: Determines languages, libraries and interfaces available
              help: Leave this blank to use the default
            Spawner.default_url:
              type: string
              title: Default User Interface
              enum:
                - "/tree"
                - "/lab"
                - "/rstudio"
              default: "/tree"
              enumMetadata:
                interfaces:
                  - value: "/tree"
                    title: Classic Notebook
                    description: >-
                      The original single-document interface for creating
                      Jupyter Notebooks.
                  - value: "/lab"
                    title: JupyterLab
                    description: A Powerful next generation notebook interface
                  - value: "/rstudio"
                    title: RStudio
                    description: An IDE For R, created by the RStudio company
    extraEnv:
      BASEHUB_K8S_DIST:
        valueFrom:
          configMapKeyRef:
            name: basehub-cluster-info
            key: K8S_DIST
    initContainers:
      - name: templates-clone
        image: alpine/git:2.40.1
        command:
          - /bin/sh
        args:
          - -c
          # Remove the existing repo first if it exists, as otherwise we will
          # error out when the pod restarts. /srv/extra-templates-dir is an
          # emptyDir volume, so it is *not* cleaned up when the pod's containers restarts -
          # only when the pod is deleted and cleaned back up.
          # We also mount the emptyDir in `/srv/extra-templates-dir` but
          # clone into a *subdirectory*, as the mount itself is owned by
          # root, and git freaks out when that is the case. By putting
          # the repo in a sub directory, we avoid the permission problems.
          - |
            rm -rf /srv/extra-templates-dir/repo;
            git clone ${GIT_REPO_URL} /srv/extra-templates-dir/repo
        env:
          - name: GIT_REPO_URL
            valueFrom:
              configMapKeyRef:
                name: hub-custom-templates-config
                key: GIT_REPO_URL
        securityContext:
          runAsUser: 1000
          runAsGroup: 1000
          allowPrivilegeEscalation: false
          readOnlyRootFilesystem: true
        volumeMounts:
          - name: custom-templates
            mountPath: /srv/extra-templates-dir
    extraContainers:
      - name: templates-sync
        image: alpine/git:2.40.1
        workingDir: /srv/extra-templates-dir/repo
        command:
          - /bin/sh
        args:
          - -c
          - |
            handle_sigterm() {
                echo "SIGTERM received, terminating...";
                exit;
            }
            trap handle_sigterm SIGTERM;

            echo "Starting template sync...";

            echo "";
            echo "Info about local git repo to be synced:";
            (
                # set -x causes commands run to be printed, helping log readers
                # understand what the generated output is about. set -x is
                # configured within a subshell to just print info about the
                # specific chosen commands and avoid printing info about running
                # "echo", "sleep", "set +x", or similar commands.
                set -x;
                git remote -v;
                ls -lhd /srv/extra-templates-dir/repo;
            )

            echo "";
            echo "Syncing local git repo /srv/extra-templates-dir/repo against origin's branch $(GIT_REPO_BRANCH) every 5 minutes...";
            while true; do
                git fetch origin;
                git reset --hard origin/$(GIT_REPO_BRANCH);

                # signal handling can only be done between sleep calls, so this
                # shouldn't be reduced to the otherwise equivalent "sleep 5m"
                for i in $(seq 300); do
                    sleep 1s;
                done
            done
        env:
          - name: GIT_REPO_BRANCH
            valueFrom:
              configMapKeyRef:
                name: hub-custom-templates-config
                key: GIT_REPO_BRANCH
        securityContext:
          runAsUser: 1000
          runAsGroup: 1000
          allowPrivilegeEscalation: false
          readOnlyRootFilesystem: true
        volumeMounts:
          - name: custom-templates
            mountPath: /srv/extra-templates-dir
    extraVolumes:
      - name: custom-templates
        emptyDir: {}
    extraVolumeMounts:
      - mountPath: /usr/local/share/jupyterhub/custom_templates
        name: custom-templates
        subPath: repo/templates
      - mountPath: /usr/local/share/jupyterhub/static/extra-assets
        name: custom-templates
        subPath: repo/extra-assets
    services:
      # hub-health service helps us run health checks from the deployer script.
      # The JupyterHub Helm chart will automatically generate an API token for
      # services and expose it in a k8s Secret named `hub`. When we run health
      # tests against a hub, we read this token from the k8s Secret to acquire
      # the credentials needed to interacting with the JupyterHub API.
      #
      hub-health:
        # FIXME: With JupyterHub 2 we can define a role for this service with
        #        more tightly scoped permissions based on our needs.
        #
        admin: true
      dask-gateway:
        # We provide an entry here for dask-gateway unconditionally, so the
        # jupyterhub chart will autogenerate a secret with appropriate keys.
        # It's not used if dask-gateway is not enabled.
        display: false
      binder:
        # We provide an entry here for binderhub-service unconditionally, so the
        # jupyterhub chart will autogenerate a secret with appropriate keys.
        # It's not used if binderhub-service is not enabled.
        display: false
        url: http://binderhub:8090
        oauth_no_confirm: true
    image:
      name: quay.io/2i2c/pilot-hub
      tag: "0.0.1-0.dev.git.10892.h37c70b2e"
    networkPolicy:
      enabled: true
      # interNamespaceAccessLabels=accept makes the hub pod's associated
      # NetworkPolicy accept ingress from pods in other namespaces that has a
      # hub.jupyter.org/network-access-hub=true label.
      #
      # ref: https://z2jh.jupyter.org/en/stable/resources/reference.html#hub-networkpolicy-internamespaceaccesslabels
      #
      interNamespaceAccessLabels: accept
      ingress:
        - from:
            - podSelector:
                matchLabels:
                  app: jupyterhub
                  component: hub
          ports:
            - port: 8081
              protocol: TCP
        # The jupyterhub-configurator is a managed jupyterhub service, which
        # means it is started by jupyterhub as a separate process in the hub
        # pod. Users will access it via the proxy pod, and JupyterHub itself is
        # accessing it via localhost. This rule makes receiving such request on
        # port 10101 from these destinations accepted.
        #
        # Maybe the container internal rule, for jupyterhub ->
        # jupyterhub-configurator isn't needed, as the request is directly to
        # 127.0.0.1:10101.
        #
        # ref: The extraConfig.02-basehub-spawner section below
        # ref: https://github.com/yuvipanda/jupyterhub-configurator/blob/996405d2a7017153d5abe592b8028fed7a1801bb/jupyterhub_configurator/mixins.py#L7C5-L11
        #
        - from:
            - podSelector:
                matchLabels:
                  app: jupyterhub
                  component: proxy
            - podSelector:
                matchLabels:
                  app: jupyterhub
                  component: hub
          ports:
            - port: 10101
              protocol: TCP
    # FIXME: We should think about these resource requests/limits, see
    #        https://github.com/2i2c-org/infrastructure/issues/2127.
    #
    resources:
      requests:
        cpu: 0.01
        memory: 128Mi
      limits:
        memory: 2Gi
    extraConfig:
      # This is copy-pasted exactly from https://github.com/jupyterhub/binderhub/blob/c6c5dc8fe73f81ca538c47b420b33f317c3aa8ae/helm-chart/binderhub/values.yaml#L87
      # Should be updated every time the upstream code changes
      0-binderspawnermixin: |
        """
        Helpers for creating BinderSpawners

        FIXME:
        This file is defined in binderhub/binderspawner_mixin.py
        and is copied to helm-chart/binderhub/values.yaml
        by ci/check_embedded_chart_code.py

        The BinderHub repo is just used as the distribution mechanism for this spawner,
        BinderHub itself doesn't require this code.

        Longer term options include:
        - Move BinderSpawnerMixin to a separate Python package and include it in the Z2JH Hub
          image
        - Override the Z2JH hub with a custom image built in this repository
        - Duplicate the code here and in binderhub/binderspawner_mixin.py
        """

        from tornado import web
        from traitlets import Bool, Unicode
        from traitlets.config import Configurable


        class BinderSpawnerMixin(Configurable):
            """
            Mixin to convert a JupyterHub container spawner to a BinderHub spawner

            Container spawner must support the following properties that will be set
            via spawn options:
            - image: Container image to launch
            - token: JupyterHub API token
            """

            def __init__(self, *args, **kwargs):
                # Is this right? Is it possible to having multiple inheritance with both
                # classes using traitlets?
                # https://stackoverflow.com/questions/9575409/calling-parent-class-init-with-multiple-inheritance-whats-the-right-way
                # https://github.com/ipython/traitlets/pull/175
                super().__init__(*args, **kwargs)

            auth_enabled = Bool(
                False,
                help="""
                Enable authenticated binderhub setup.

                Requires `jupyterhub-singleuser` to be available inside the repositories
                being built.
                """,
                config=True,
            )

            cors_allow_origin = Unicode(
                "",
                help="""
                Origins that can access the spawned notebooks.

                Sets the Access-Control-Allow-Origin header in the spawned
                notebooks. Set to '*' to allow any origin to access spawned
                notebook servers.

                See also BinderHub.cors_allow_origin in binderhub config
                for controlling CORS policy for the BinderHub API endpoint.
                """,
                config=True,
            )

            def get_args(self):
                if self.auth_enabled:
                    args = super().get_args()
                else:
                    args = [
                        "--ip=0.0.0.0",
                        f"--port={self.port}",
                        f"--NotebookApp.base_url={self.server.base_url}",
                        f"--NotebookApp.token={self.user_options['token']}",
                        "--NotebookApp.trust_xheaders=True",
                    ]
                    if self.default_url:
                        args.append(f"--NotebookApp.default_url={self.default_url}")

                    if self.cors_allow_origin:
                        args.append("--NotebookApp.allow_origin=" + self.cors_allow_origin)
                    # allow_origin=* doesn't properly allow cross-origin requests to single files
                    # see https://github.com/jupyter/notebook/pull/5898
                    if self.cors_allow_origin == "*":
                        args.append("--NotebookApp.allow_origin_pat=.*")
                    args += self.args
                    # ServerApp compatibility: duplicate NotebookApp args
                    for arg in list(args):
                        if arg.startswith("--NotebookApp."):
                            args.append(arg.replace("--NotebookApp.", "--ServerApp."))
                return args

            def start(self):
                if not self.auth_enabled:
                    if "token" not in self.user_options:
                        raise web.HTTPError(400, "token required")
                    if "image" not in self.user_options:
                        raise web.HTTPError(400, "image required")
                if "image" in self.user_options:
                    self.image = self.user_options["image"]
                return super().start()

            def get_env(self):
                env = super().get_env()
                if "repo_url" in self.user_options:
                    env["BINDER_REPO_URL"] = self.user_options["repo_url"]
                for key in (
                    "binder_ref_url",
                    "binder_launch_host",
                    "binder_persistent_request",
                    "binder_request",
                ):
                    if key in self.user_options:
                        env[key.upper()] = self.user_options[key]
                return env
      01-custom-theme: |
        # adds a JupyterHub template path and updates template variables

        from z2jh import get_config
        c.JupyterHub.template_paths.insert(0,'/usr/local/share/jupyterhub/custom_templates')
        c.JupyterHub.template_vars.update({
            'custom': get_config('custom.homepage.templateVars')
        })
      02-basehub-spawner: |
        # Updates JupyterHub.spawner_class and KubeSpawner.modify_pod_hook to
        # handle features introduced by the basehub chart, specifically those
        # configured via:
        #
        # jupyterhub.custom.singleuserAdmin
        # jupyterhub.custom.jupyterhubConfigurator
        #
        from jupyterhub_configurator.mixins import ConfiguratorSpawnerMixin
        from kubernetes_asyncio.client.models import V1VolumeMount
        from kubespawner import KubeSpawner
        from kubespawner.utils import get_k8s_model
        from z2jh import get_config


        # Setup jupyterhub-configurator only if its enabled
        spawner_base_classes = [KubeSpawner]
        if get_config("custom.jupyterhubConfigurator.enabled"):
            spawner_base_classes = [ConfiguratorSpawnerMixin, KubeSpawner]

            jhc_service = {
                "name": "configurator",
                "url": "http://configurator:10101",
                "oauth_no_confirm": True,
                "command": [
                    "python3",
                    "-m",
                    "jupyterhub_configurator.app",
                    "--Configurator.config_file=/usr/local/etc/jupyterhub-configurator/jupyterhub_configurator_config.py",
                ],
            }
            c.JupyterHub.services.append(jhc_service)

        if get_config("custom.binderhubUI.enabled"):
            spawner_base_classes = [BinderSpawnerMixin, KubeSpawner]

        class BaseHubSpawner(*spawner_base_classes):
            def start(self, *args, **kwargs):
                """
                Modify admin users' spawners' non-list config based on
                `jupyterhub.custom.singleuserAdmin`.

                The list config is handled separately in by the
                `modify_pod_hook`.
                """
                custom_admin = get_config('custom.singleuserAdmin', {})
                if not (self.user.admin and custom_admin):
                    return super().start(*args, **kwargs)

                admin_environment = custom_admin.get('extraEnv', {})
                self.environment.update(admin_environment)

                admin_service_account = custom_admin.get('serviceAccountName')
                if admin_service_account:
                    self.service_account = admin_service_account

                return super().start(*args, **kwargs)

        c.JupyterHub.spawner_class = BaseHubSpawner


        def modify_pod_hook(spawner, pod):
            """
            Modify admin user's pod manifests based on *list* config under
            `jupyterhub.custom.singleuserAdmin`.

            This hook is required to ensures that list config under
            `jupyterhub.custom.singleuserAdmin` are appended and not just
            overridden when a profile_list entry has a kubespawner_override
            modifying the same config.
            """
            custom_admin = get_config('custom.singleuserAdmin', {})
            if not (spawner.user.admin and custom_admin):
                return pod

            for c in pod.spec.containers:
                if c.name == "notebook":
                    notebook_container = c
                    break
            else:
                raise Exception("No container named 'notebook' found in pod definition")

            admin_volume_mounts = custom_admin.get('extraVolumeMounts', [])
            notebook_container.volume_mounts += [get_k8s_model(V1VolumeMount, obj) for obj in (admin_volume_mounts)]

            return pod
        c.KubeSpawner.modify_pod_hook = modify_pod_hook
      03-2i2c-add-staff-user-ids-to-admin-users: |
        from z2jh import get_config
        add_staff_user_ids_to_admin_users = get_config("custom.2i2c.add_staff_user_ids_to_admin_users", False)

        if add_staff_user_ids_to_admin_users:
            user_id_type = get_config("custom.2i2c.add_staff_user_ids_of_type")
            staff_user_ids = get_config(f"custom.2i2c.staff_{user_id_type}_ids", [])
            # `c.Authenticator.admin_users` can contain additional admins, can be an empty list,
            # or it cannot be defined at all.
            # This should cover all these cases.
            staff_user_ids.extend(get_config("hub.config.Authenticator.admin_users", []))
            c.Authenticator.admin_users = staff_user_ids

      04-per-user-disk: |
        # Optionally, create a PVC per user - useful for per-user databases
        from jupyterhub.utils import exponential_backoff
        from z2jh import get_config
        from kubespawner.objects import make_pvc
        from functools import partial

        def make_extra_pvc(component, name_template, storage_class, storage_capacity, spawner):
          """
          Create a PVC object with given spec
          """
          labels = spawner._build_common_labels({})
          labels.update({
              'component': component
          })
          annotations = spawner._build_common_annotations({})
          storage_selector = spawner._expand_all(spawner.storage_selector)
          return make_pvc(
              name=spawner._expand_all(name_template),
              storage_class=storage_class,
              access_modes=['ReadWriteOnce'],
              selector={},
              storage=storage_capacity,
              labels=labels,
              annotations=annotations
          )

        extra_user_pvcs = get_config('custom.singleuser.extraPVCs', {})
        if extra_user_pvcs:
          make_db_pvc = partial(make_extra_pvc, 'db-storage', 'db-{username}', 'standard', '1G')

          pvc_makers = [partial(
            make_extra_pvc,
            "extra-pvc",
            p["name"],
            p["class"],
            p["capacity"]
          ) for p in extra_user_pvcs]

          async def ensure_db_pvc(spawner):
            """"
            Ensure a PVC is created for this user's database volume
            """
            for pvc_maker in pvc_makers:
              pvc = pvc_maker(spawner)
              # If there's a timeout, just let it propagate
              await exponential_backoff(
                  partial(spawner._make_create_pvc_request, pvc, spawner.k8s_api_request_timeout),
                  f'Could not create pvc {pvc.metadata.name}',
                  # Each req should be given k8s_api_request_timeout seconds.
                  timeout=spawner.k8s_api_request_retry_timeout
              )
          c.Spawner.pre_spawn_hook = ensure_db_pvc
      05-profile-groups: |
        # Re-assignes c.KubeSpawner.profile_list to a callable that filters the
        # initial configuration of profile_list based on the user's github
        # org/team membership as declared via "allowed_groups" read from
        # profile_list profiles.
        #
        # This only has effect if:
        #
        # - GitHubOAuthenticator is used.
        # - GitHubOAuthenticator.populate_teams_in_auth_state is True, that
        #   requires Authenticator.enable_auth_state to be True as well.
        # - The user is a normal user, and not "deployment-service-check".
        #
        from copy import deepcopy

        from functools import partial
        from textwrap import dedent
        from tornado import web
        from oauthenticator.github import GitHubOAuthenticator
        from z2jh import get_config

        async def profile_list_allowed_groups_filter(original_profile_list, spawner):
            """
            Returns the initially configured profile_list filtered based on the
            user's membership in each profile's `allowed_groups`. If
            `allowed_groups` isn't set for a profile, that profile is allowed for
            everyone. Similar functionality is provided for both `unlisted_choice` and
            `choice` inside `profile_options`.

            `allowed_groups` is a list of JupyterHub groups, set up by the authenticator.
            In addition, for use with GitHubOAuthenticator, it can be a list of
            teams the user is a part of, of form '<github-org>:<team-name>'.

            If the returned profile_list is filtered to not include any profiles,
            an error is raised and the user isn't allowed to start a server.
            """
            if spawner.user.name == "deployment-service-check":
                print("Ignoring allowed_groups check for deployment-service-check")
                return original_profile_list

            # casefold group names so we can do case insensitive comparisons.
            groups = {g.name.casefold() for g in spawner.user.groups}

            # If we're using GitHubOAuthenticator, add the user's teams to the groups as well.
            # Eventually this can be removed, as the user's teams can be set to be groups
            # once https://github.com/jupyterhub/oauthenticator/pull/735 is merged
            if isinstance(spawner.authenticator, GitHubOAuthenticator):
              # Ensure auth_state is populated with teams info
              auth_state = await spawner.user.get_auth_state()
              if not auth_state or "teams" not in auth_state:
                  print(f"User {spawner.user.name} does not have any auth_state set, profile_list filtering not available")

              else:
                # casefold teams to match what GitHub's API does when doing authorization calls
                groups |= set([f'{team["organization"]["login"]}:{team["slug"]}'.casefold() for team in auth_state["teams"]])

            print(f"User {spawner.user.name} is part of groups {' '.join(groups)}")

            # Filter out profiles with allowed_groups set if the user isn't part of the group
            allowed_profiles = []
            for original_profile in original_profile_list:
                # Make a copy, as we'll be modifying this profile
                profile = deepcopy(original_profile)

                # Handle `allowed_groups` specified in profile_options
                if 'profile_options' in profile:
                  for k, po in profile['profile_options'].items():

                    # If `unlisted_choice` has an `allowed_groups` and the current
                    # user is not present in any of those teams, we delete the
                    # `unlisted_choice` config entirely for this option. The user
                    # will then not be allowed to 'write in' a value.
                    if 'unlisted_choice' in po:
                      if 'allowed_groups' in po['unlisted_choice']:
                        if not (set(po['unlisted_choice']['allowed_groups']) and groups):
                          del po['unlisted_choice']

                    if 'choices' in po:
                      new_choices = {}
                      for k, c in po['choices'].items():
                        # If `allowed_groups` is not set for a profile option, it is automatically
                        # allowed for everyone
                        if 'allowed_groups' not in c:
                          new_choices[k] = c
                        # If `allowed_groups` *is* set for a profile option, it is allowed only for
                        # members of that team.
                        else:
                          allowed_groups = set([g.casefold() for g in c['allowed_groups']])
                          if allowed_groups & groups:
                            new_choices[k] = c
                      po['choices'] = new_choices

                if 'allowed_groups' not in profile:
                  allowed_profiles.append(profile)
                else:
                  allowed_groups = set([g.casefold() for g in profile.get("allowed_groups", [])])

                  if allowed_groups & groups:
                      print(f"Allowing profile {profile['display_name']} for user {spawner.user.name} based on team membership")
                      allowed_profiles.append(profile)
                      continue

            if len(allowed_profiles) == 0:
                # If no profiles are allowed, user should not be able to spawn anything!
                # If we don't explicitly stop this, user will be logged into the 'default' settings
                # set in singleuser, without any profile overrides. Not desired behavior
                # FIXME: User doesn't actually see this error message, just the generic 403.
                error_msg = dedent(f"""
                Your JupyterHub group membership is insufficient to launch any server profiles.

                JupyterHub groups you are a member of are {', '.join(groups)}.

                If you are part of additional groups, log out of this JupyterHub and log back in to refresh that information.
                """)
                raise web.HTTPError(403, error_msg)

            return allowed_profiles

        # Only set our custom filter if
        # profile_list is specified (otherwise users will get an empty screen when trying to launch servers)
        if c.KubeSpawner.profile_list:
            # Customize list of profiles dynamically, rather than override options form.
            # This is more secure, as users can't override the options available to them via the hub API
            # We pass in a copy of the original profile_list set in config via partial, to reduce possible variable
            # capture related issues.
            c.KubeSpawner.profile_list = partial(
              profile_list_allowed_groups_filter,
              deepcopy(c.KubeSpawner.profile_list)
            )

      06-salted-username: |
        # Allow anonymizing username to not store *any* PII
        import json
        import os
        import base64
        import hashlib
        from z2jh import get_config


        def salt_username(authenticator, handler, auth_model):
          # Combine parts of user info with different provenances to eliminate
          # possible deanonym attacks when things get leaked.

          # FIXME: Provide useful error message when using an auth provider that
          # doesn't give us 'oidc'
          # FIXME: Raise error if this is attempted to be used with anything other than CILogon
          USERNAME_DERIVATION_PEPPER = bytes.fromhex(os.environ['USERNAME_DERIVATION_PEPPER'])
          cilogon_user = auth_model['auth_state']['cilogon_user']
          user_key_parts = {
            # Opaque ID from CILogon
            "sub": cilogon_user['sub'],
            # Combined together, opaque ID from upstream IDP (GitHub, Google, etc)
            "idp": cilogon_user['idp'],
            "oidc": cilogon_user['oidc']
          }

          # Use JSON here, so we don't have to deal with picking a string
          # delimiter that will not appear in any of the parts.
          # keys are sorted to ensure stable output over time
          user_key = json.dumps(user_key_parts, sort_keys=True).encode('utf-8')

          # The cryptographic choices made here are:
          # - Use blake2, because it's fairly modern
          # - Set blake2 to output 32 bytes as output, which is good enough for our use case
          # - Use base32 encoding, as it will produce maximum of 56 characters
          #   for 32 bytes output by blake2. We have 63 character username
          #   limits in many parts of our code (particularly, in usernames
          #   being part of labels in kubernetes pods), so this helps
          # - Convert everything to lowercase, as base64.b32encode produces
          #   all uppercase characters by default. Our usernames are preferably
          #   lowercase, as uppercase characters must be encoded for kubernetes'
          #   sake
          # - strip the = padding provided by base64.b32encode. This is present
          #   primarily to be able to determine length of the original byte
          #   sequence accurately. We don't care about that here. Also = is
          #   encoded in kubernetes and puts us over the 63 char limit.
          # - Use blake2 here explicitly as a keyed hash, rather than use
          #   hmac. This is the canonical way to do this, and helps make it
          #   clearer that we want it to output 32byte hashes. We could have
          #   used a 16byte hash here for shorter usernames, but it is unclear
          #   what that does to the security properties. So better safe than
          #   sorry, and stick to 32bytes (rather than the default 64)
          digested_user_key = base64.b32encode(hashlib.blake2b(
            user_key,
            key=USERNAME_DERIVATION_PEPPER,
            digest_size=32
          ).digest()).decode('utf-8').lower().rstrip("=")

          # Replace the default name with our digested name, thus
          # discarding the default name
          auth_model["name"] = digested_user_key

          return auth_model

        if get_config('custom.auth.anonymizeUsername', False):
          # https://jupyterhub.readthedocs.io/en/stable/reference/api/auth.html#jupyterhub.auth.Authenticator.post_auth_hook
          c.Authenticator.post_auth_hook = salt_username

      07-enable-fancy-profiles: |
        from jupyterhub_fancy_profiles import setup_ui
        setup_ui(c)

      # Initially copied from https://github.com/dask/helm-chart/blob/main/daskhub/values.yaml
      daskhub-01-add-dask-gateway-values: |
        # 1. Sets `DASK_GATEWAY__PROXY_ADDRESS` in the singleuser environment.
        # 2. Adds the URL for the Dask Gateway JupyterHub service.
        import os
        from z2jh import get_config

        if get_config('custom.daskhubSetup.enabled'):
          # Default all users on hubs with dask-gateway to use JupyterLab
          c.Spawner.default_url = '/lab'

          # Add an extra label that allows user pods to talk to the proxy pod
          # in clusters with networkPolicy enabled so kernels can talk to the
          # dask-gateway service via the proxy
          c.KubeSpawner.extra_labels.update({
            "hub.jupyter.org/network-access-proxy-http": "true"
          })
          # These are set by jupyterhub.
          release_name = os.environ["HELM_RELEASE_NAME"]
          release_namespace = os.environ["POD_NAMESPACE"]
          if "PROXY_HTTP_SERVICE_HOST" in os.environ:
              # https is enabled, we want to use the internal http service.
              gateway_address = "http://{}:{}/services/dask-gateway/".format(
                  os.environ["PROXY_HTTP_SERVICE_HOST"],
                  os.environ["PROXY_HTTP_SERVICE_PORT"],
              )
              print("Setting DASK_GATEWAY__ADDRESS {} from HTTP service".format(gateway_address))
          else:
              gateway_address = "http://proxy-public/services/dask-gateway"
              print("Setting DASK_GATEWAY__ADDRESS {}".format(gateway_address))
          # Internal address to connect to the Dask Gateway.
          c.KubeSpawner.environment.setdefault("DASK_GATEWAY__ADDRESS", gateway_address)
          # Internal address for the Dask Gateway proxy.
          c.KubeSpawner.environment.setdefault("DASK_GATEWAY__PROXY_ADDRESS", "gateway://traefik-{}-dask-gateway.{}:80".format(release_name, release_namespace))
          # Relative address for the dashboard link.
          c.KubeSpawner.environment.setdefault("DASK_GATEWAY__PUBLIC_ADDRESS", "/services/dask-gateway/")
          # Use JupyterHub to authenticate with Dask Gateway.
          c.KubeSpawner.environment.setdefault("DASK_GATEWAY__AUTH__TYPE", "jupyterhub")

          # Add some settings for dask gateway via environment variables
          # https://docs.dask.org/en/latest/configuration.html has more information
          # Kubernetes env variable expansion via `{{}}` is used here. See
          # https://kubernetes.io/docs/tasks/inject-data-application/define-interdependent-environment-variables/
          # for more information
          c.KubeSpawner.environment.update({
            # Specify what image dask-gateway workers and schedulers should use
            'DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE': '{{JUPYTER_IMAGE_SPEC}}',
            'DASK_GATEWAY__CLUSTER__OPTIONS__ENVIRONMENT': '{{"SCRATCH_BUCKET": "$(SCRATCH_BUCKET)", "PANGEO_SCRATCH": "$(PANGEO_SCRATCH)"}}',
            'DASK_DISTRIBUTED__DASHBOARD__LINK': '{{JUPYTERHUB_SERVICE_PREFIX}}proxy/{{port}}/status'
          })

          # Adds Dask Gateway as a JupyterHub service to make the gateway available at
          # {HUB_URL}/services/dask-gateway
          service_url = "http://traefik-{}-dask-gateway.{}".format(release_name, release_namespace)
          for service in c.JupyterHub.services:
              if service["name"] == "dask-gateway":
                  if not service.get("url", None):
                      print("Adding dask-gateway service URL")
                      service.setdefault("url", service_url)
                  break
          else:
              print("dask-gateway service not found, this should not happen!")

jupyterhub-home-nfs:
  enabled: false