bird-house · mishaschwartz · Feb 5, 2026 · Feb 9, 2026 · Feb 10, 2026 · Feb 10, 2026
@@ -15,7 +15,65 @@
 [Unreleased](https://github.com/bird-house/birdhouse-deploy/tree/master) (latest)
 ------------------------------------------------------------------------------------------------------------------
 
-[//]: # (list changes here, using '-' for each new entry, remove this when items are added)
+## Changes
+
+- Add Nvidia MPS component for managing Nvidia GPU resources
+
+  This creates a container running Nvidia's Multi Process Service ([MPS](https://docs.nvidia.com/deploy/mps/index.html)) 
+  which helps manage multi-user GPU access.
+  It runs an alternative CUDA interface which manages resource allocation when multiple processes are running simultaneously
+  on the same GPU.
+  It also allows the node admin to set additional per-user limits through the `JUPYTERHUB_RESOURCE_LIMITS` variable
+  which configures Jupyterlab containers:
+
+  - `"gpu_device_mem_limit"`: sets the `CUDA_MPS_PINNED_DEVICE_MEM_LIMIT` environment variable
+  - `"gpu_active_thread_percentage"`: sets the `CUDA_MPS_ACTIVE_THREAD_PERCENTAGE` environment variable
+
+  For example, the following will give all users in the group named `"users"` access to three GPUs in their Jupyterlab
+  container. On the first one (id = 0) only 1GB of memory is available, on the second (id = 1) only 5GB, and on the third 
+  (id = 2) only 10GB. Additionally, the container will be able to use 10% of available threads on the GPUs.
+
+  ```shell
+  export JUPYTERHUB_RESOURCE_LIMITS='
+  [{
+        "type": "group", 
+        "name": "users", 
+        "limits": {
+          "gpu_ids": ["0", "1", "2"], 
+          "gpu_count": 3, 
+          "gpu_device_mem_limit": "0=1G,1=5G,2=10G", 
+          "gpu_active_thread_percentage": "10"
+        }
+  }]
+  '
+  ```
+
+  Note that leaving any of these limits unset will default to allowing the user full access to the given resource.
+
+- Update `CustomDockerSpawner` to make pre spawn hooks and resource limits more configurable
+
+  Introduce `pre_spawn_hooks` and `resource_limit_callbacks` attributes to the `CustomDockerSpawner` class which 
+  can be used to further customize the `CustomDockerSpawner` from optional components. This gives us a way to 
+  add additional functionality without having to directly modify existing functions which may be overwritten by the
+  user when they configure the spawner in `JUPYTERHUB_CONFIG_OVERRIDE`.
+
+  This also introduces the `JUPYTERHUB_CONFIG_OVERRIDE_INTERNAL` variable which is identical to the 
+  `JUPYTERHUB_CONFIG_OVERRIDE` variable except that it is intended to only be set by other components (not by the
+  user in the local environment file). This allows components to customize Jupyterhub deployments without interfering
+  with custom settings created by the user. 
+
+  Note that the contents of `JUPYTERHUB_CONFIG_OVERRIDE` have precedence over the contents of 
+  `JUPYTERHUB_CONFIG_OVERRIDE_INTERNAL`. So for example if you create a volume mount named `my_volume` in both, only
+  the one defined in `JUPYTERHUB_CONFIG_OVERRIDE` will be applied.
+
+## Fixes
+
+- Update GPU limit examples to show expected syntax
+
+  Fixes some examples that showed that `gpu_ids` could be given as integers if they were meant to be indexes. However, 
+  due to limitation of docker they must be strings. This modifies examples so that it is clear that strings must be 
+  used and also updates the code to ensure that string values are only ever passed to docker when spawning a new 
+  jupyterlab server.
 
 [2.22.0](https://github.com/bird-house/birdhouse-deploy/tree/2.22.0) (2026-02-09)
 ------------------------------------------------------------------------------------------------------------------

@@ -55,6 +55,13 @@ export JUPYTER_IDLE_KERNEL_CULL_INTERVAL=0
 # config/jupyterhub/jupyterhub_config.py.template.
 export JUPYTERHUB_CONFIG_OVERRIDE=""
 
+# Allows adding new configuration or overriding existing configurations
+# This variable should only be set by other components not directly by the user.
+# Users should set JUPYTERHUB_CONFIG_OVERRIDE instead.
+# Note that this references itself in case another component has previously set this
+# variable (before this file is processed).
+export JUPYTERHUB_CONFIG_OVERRIDE_INTERNAL="${JUPYTERHUB_CONFIG_OVERRIDE_INTERNAL}"
+
 # URL used to verify that a logged in user has permission to access Jupyterhub
 # To disable this feature, unset this variable. However, disabling this feature is NOT
 # recommended as it may permit unauthorized users from accessing jupyterhub.
@@ -104,6 +111,7 @@ OPTIONAL_VARS="
   \$JUPYTER_LOGIN_BANNER_BOTTOM_SECTION
   \$JUPYTER_LOGIN_TERMS_URL
   \$JUPYTERHUB_CONFIG_OVERRIDE
+  \$JUPYTERHUB_CONFIG_OVERRIDE_INTERNAL
   \$JUPYTERHUB_DOCKER
   \$JUPYTERHUB_VERSION
   \$JUPYTERHUB_IMAGE

@@ -104,4 +104,6 @@ if """${JUPYTERHUB_ADMIN_USERS}""":
 # Configuration overrides
 # ------------------------------------------------------------------------------
 
+${JUPYTERHUB_CONFIG_OVERRIDE_INTERNAL}  # noqa
+
 ${JUPYTERHUB_CONFIG_OVERRIDE}    # noqa
@@ -5,7 +5,7 @@
 
 import docker
 from dockerspawner import DockerSpawner
-from traitlets import default
+from traitlets import Callable, Dict, List, Unicode, default
 
 from . import constants
 
@@ -227,6 +227,35 @@ def _default_start_timeout(self) -> int:
         """Timeout (in seconds) before giving up on starting of single-user server."""
         return 120
 
+    resource_limit_callbacks = Dict(
+        value_trait=Callable(),
+        key_trait=Unicode(),
+        config=True,
+        help=(
+            "Dictionary mapping limit names to a callable that takes two arguments: "
+            "the spawner instance and the value for that limit. These can be used to "
+            "add additional resource limits that are enforced by optional components."
+        ),
+    )
+
+    pre_spawn_hooks = List(
+        Callable(),
+        config=True,
+        help=(
+            "List of pre spawn hooks to run as well as the pre_spawn_hook function. "
+            "This is intended to be set by internal tools, users should set the "
+            "pre_spawn_hook directly."
+        ),
+    )
+
+    @default("pre_spawn_hooks")
+    def _default_pre_spawn_hooks(self) -> list:
+        return [
+            CustomDockerSpawner.__create_dir_hook,
+            CustomDockerSpawner.__limit_resource_hook,
+            CustomDockerSpawner.__create_tutorial_notebook_hook,
+        ]
+
     @property
     def escaped_name(self) -> str:
         """
@@ -321,22 +350,19 @@ def __limit_resource_hook(self) -> None:
                         gpu_ids = value
                     elif limit == "gpu_count":
                         gpu_count = value
+                    elif limit in self.resource_limit_callbacks:
+                        self.resource_limit_callbacks[limit](self, value)
         if gpu_ids:
             # randomly assign GPUs in an attempt to evenly distribute GPU resources
             random.shuffle(gpu_ids)
             gpu_ids = gpu_ids[:gpu_count]
             self.extra_host_config["device_requests"] = [
-                docker.types.DeviceRequest(device_ids=gpu_ids, capabilities=[["gpu"]])
+                docker.types.DeviceRequest(device_ids=[str(i) for i in gpu_ids], capabilities=[["gpu"]])
             ]
 
     def run_pre_spawn_hook(self) -> None:
         """Run the builtin pre-spawn hooks as well as any set by pre_spawn_hook if defined."""
-        self._custom_pre_spawn_hook()
+        for hook in self.pre_spawn_hooks:
+            hook(self)
         if self.pre_spawn_hook:
             self.pre_spawn_hook(self)
-
-    def _custom_pre_spawn_hook(self) -> None:
-        """Run before spawning a singleuser jupyterlab server."""
-        self.__create_dir_hook()
-        self.__limit_resource_hook()
-        self.__create_tutorial_notebook_hook()
@@ -462,7 +462,7 @@ export GEOSERVER_ADMIN_PASSWORD="${__DEFAULT__GEOSERVER_ADMIN_PASSWORD}"
 #    {"type": "user", "name": "user1", "limits": {"mem_limit": "30G"}},
 #    {"type": "group", "name": "group1", "limits": {"mem_limit": "10G", "cpu_limit": 1}},
 #    {"type": "group", "name": "group2", "limits": {"cpu_limit": 3, "gpu_ids": [0, 3, 4]}},
-#    {"type": "user", "name": "user2", "limits": {"gpu_ids": [1, 2, 3], "gpu_count": 2}}
+#    {"type": "user", "name": "user2", "limits": {"gpu_ids": ["1", "2", "3"], "gpu_count": 2}}
 #  ]
 #'
 #  Supported limits are: "mem_limit", "cpu_limit", "gpu_count", "gpu_ids". 
@@ -478,7 +478,7 @@ export GEOSERVER_ADMIN_PASSWORD="${__DEFAULT__GEOSERVER_ADMIN_PASSWORD}"
 #  is possible but discouraged since it makes it possible to select the same GPU multiple times.
 #  If gpu_count is also specified, this is an integer indicating how many GPUs to make available to that user or group.
 #  If gpu_count is not specified, then exactly one GPU will be randomly selected.
-#  For example, if {"gpu_ids": [1,2,6], "gpu_count": 2} then two GPUs will be randomly selected from the gpu_ids list.
+#  For example, if {"gpu_ids": ["1","2","6"], "gpu_count": 2} then two GPUs will be randomly selected from the gpu_ids list.
 #  Note that this will not create the groups in Magpie, that must be done manually.
 #  Note that if a user belongs to multiple groups, later values in `JUPYTERHUB_RESOURCE_LIMITS` will take
 #  precedence. For example, if a user named user1 belongs to group1 and group2 then the following limits will apply:

@@ -730,3 +730,58 @@ that your custom component creates and read the `proxy` access logs at a file de
 
 For example, if `PROXY_LOG_FILE` is set to ``access_file.log`` (the default) and you mount the `proxy-logs` volume to the ``/logs``
 directory in your container, the `proxy` access logs can be read at ``/logs/access_file.log`` in your container.
+
+Nvidia multi process service
+----------------------------
+
+This creates a container running Nvidia's Multi Process Service (MPS_) which helps manage multi-user GPU access.
+It runs an alternative CUDA interface which manages resource allocation when multiple processes are running simultaneously
+on the same GPU.
+It also allows the node admin to set additional per-user limits through the ``JUPYTERHUB_RESOURCE_LIMITS`` variable
+which configures Jupyterlab containers:
+
+* ``"gpu_device_mem_limit"``: sets the ``CUDA_MPS_PINNED_DEVICE_MEM_LIMIT`` environment variable
+* ``"gpu_active_thread_percentage"``: sets the ``CUDA_MPS_ACTIVE_THREAD_PERCENTAGE`` environment variable
+
+For example, the following will give all users in the group named ``"users"`` access to three GPUs in their Jupyterlab
+container. On the first one (id = 0) only 1GB of memory is available, on the second (id = 1) only 5GB, and on the third 
+(id = 2) only 10GB. Additionally, the container will be able to use 10% of available threads on the GPUs.
+
+.. code::shell
+
+    export JUPYTERHUB_RESOURCE_LIMITS='
+    [{
+         "type": "group", 
+         "name": "users", 
+         "limits": {
+            "gpu_ids": ["0", "1", "2"], 
+            "gpu_count": 3, 
+            "gpu_device_mem_limit": "0=1G,1=5G,2=10G", 
+            "gpu_active_thread_percentage": "10"
+         }
+    }]
+    '
+
+Note that leaving any of these limits unset will default to allowing the user full access to the given resource.
+
+.. note::
+
+    The ``mps`` docker container currently applies the MPS server to all GPUs. If you want to only apply the MPS server
+    to a subset of the GPUs available on your machine, you will need to create an additional component with a 
+    ``docker-compose-extra.yml`` file that specifically overrides the container device settings for the ``mps`` container.
+
+    For example, the docker compose configuration below would set the MPS server to only apply to GPUs with ids `"0"` and `"1"`.
+
+.. code-block:: yaml
+
+    services:
+      mps:
+        deploy:
+          resources:
+            reservations:
+              devices: !override
+                - capabilities: [gpu]
+                  driver: nvidia
+                  device_ids: ["0", "1"]
+
+.. _MPS: https://docs.nvidia.com/deploy/mps/index.html
@@ -0,0 +1,4 @@
+# Make these two variables that set limits readonly so that users cannot overwrite
+# these variables from inside their jupyterlab container.
+readonly CUDA_MPS_PINNED_DEVICE_MEM_LIMIT
+readonly CUDA_MPS_ACTIVE_THREAD_PERCENTAGE
@@ -0,0 +1,4 @@
+services:
+  jupyterhub:
+    environment:
+      - NVIDIA_MPS_PROFILE_SCRIPT=${COMPOSE_DIR}/optional-components/nvidia-multi-process-service/02-readonly-cuda-vars.sh
@@ -0,0 +1,50 @@
+export NVIDIA_MULTIPROCESS_SERVICE_DOCKER=debian
+export NVIDIA_MULTIPROCESS_SERVICE_VERSION=bookworm-slim
+export NVIDIA_MULTIPROCESS_SERVICE_IMAGE='${NVIDIA_MULTIPROCESS_SERVICE_DOCKER}:${NVIDIA_MULTIPROCESS_SERVICE_VERSION}'
+
+export DELAYED_EVAL="
+  $DELAYED_EVAL
+  NVIDIA_MULTIPROCESS_SERVICE_IMAGE
+"
+
+export JUPYTERHUB_CONFIG_OVERRIDE_INTERNAL="
+${JUPYTERHUB_CONFIG_OVERRIDE_INTERNAL}
+
+def _gpu_device_mem_limit(spawner: CustomDockerSpawner, value: str) -> None:
+    '''
+    Set memory limits for GPUs allocated to this user.
+
+    See: https://docs.nvidia.com/deploy/mps/appendix-tools-and-interface-reference.html#cuda-mps-pinned-device-mem-limit
+    '''
+    spawner.environment['CUDA_MPS_PINNED_DEVICE_MEM_LIMIT'] = value
+
+def _gpu_active_thread_percentage(spawner: CustomDockerSpawner, value: str | int) -> None:
+    '''
+    Set active thread percentage for GPUs allocated to this user
+
+    See: https://docs.nvidia.com/deploy/mps/appendix-tools-and-interface-reference.html#cuda-mps-active-thread-percentage
+    '''
+    spawner.environment['CUDA_MPS_ACTIVE_THREAD_PERCENTAGE'] = str(value)
+
+c.CustomDockerSpawner.resource_limit_callbacks.update({
+  'gpu_device_mem_limit': _gpu_device_mem_limit,
+  'gpu_active_thread_percentage': _gpu_active_thread_percentage,
+})
+
+def _gpu_set_mps_configs(spawner: CustomDockerSpawner) -> None:
+    '''
+    Set configurations so this container uses the multi-process service running in the container named mps
+
+    See: https://gitlab.com/nvidia/container-images/samples/-/blob/master/mps/docker-compose.yml
+    '''
+    spawner.extra_host_config['ipc_mode'] = 'container:mps'
+    spawner.volumes['nvidia_mps'] = '/tmp/nvidia-mps'
+
+c.CustomDockerSpawner.pre_spawn_hooks.append(_gpu_set_mps_configs)
+
+# This sets the variables as readonly so that users can't unset/update the environment variables
+# that set these limits in the jupyterlab docker container.
+c.CustomDockerSpawner.volumes.update({
+  os.environ['NVIDIA_MPS_PROFILE_SCRIPT']: '/etc/profile.d/02-readonly-cuda-vars.sh'
+})
+"
@@ -0,0 +1,24 @@
+services:
+  mps:
+    image: ${NVIDIA_MULTIPROCESS_SERVICE_IMAGE}
+    container_name: mps
+    restart: always
+    ipc: shareable
+    volumes:      
+      - nvidia_mps:/tmp/nvidia-mps
+    init: true
+    command: ["nvidia-cuda-mps-control", "-f"]
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+
+volumes:
+  nvidia_mps:
+    driver: local
+    driver_opts:
+      type: tmpfs
+      device: tmpfs
@@ -0,0 +1,8 @@
+if ! command -v nvidia-smi >/dev/null; then
+    log ERROR "The optional-components/nvidia-multi-process-service component is enabled but no Nvidia GPUs or drivers can be detected on this system (no nvidia-smi command exists). Please ensure that GPUs are installed properly or disable this component."
+    expect_exit 1
+fi
+
+if [ "$(nvidia-smi --query-gpu=compute_mode --format=csv,noheader | grep -vc 'Exclusive_Process')" -ne 0 ]; then
+    log WARN "Nvidia GPUs with compute mode set to something other than EXCLUSIVE_PROCESS detected. We recommend you set the compute mode to EXCLUSIVE_PROCESS when enabling nvidia's Multi Process Service (MPS)."
+fi
@@ -582,7 +582,7 @@ def test_user_name_matches_gpu_ids_no_count(self, spawner, constants, generate_s
                 spawner_inst.run_pre_spawn_hook()
                 device_ids = spawner_inst.extra_host_config["device_requests"][0].device_ids
                 assert len(device_ids) == 1
-                assert device_ids[0] in [1, 2, 3]
+                assert device_ids[0] in ["1", "2", "3"]
 
             def test_user_name_matches_gpu_ids_with_count(self, spawner, constants, generate_spawner_inst):
                 spawner_inst = generate_spawner_inst(spawner)
@@ -596,8 +596,30 @@ def test_user_name_matches_gpu_ids_with_count(self, spawner, constants, generate
                 spawner_inst.run_pre_spawn_hook()
                 device_ids = spawner_inst.extra_host_config["device_requests"][0].device_ids
                 assert len(device_ids) == 2
-                assert set(device_ids) < {1, 2, 3}
+                assert set(device_ids) < {"1", "2", "3"}
 
+            def test_additional_resource_limits(self, spawner, constants, generate_spawner_inst):
+                mock = Mock()
+                spawner_inst = generate_spawner_inst(spawner)
+                spawner_inst.resource_limit_callbacks["test_limit"] = mock
+                constants.RESOURCE_LIMITS = [
+                    {
+                        "type": "user",
+                        "name": spawner_inst.user.name,
+                        "limits": {"test_limit": 22},
+                    }
+                ]
+                spawner_inst.run_pre_spawn_hook()
+                assert mock.call_args == ((spawner_inst, 22),)
+
+        class TestAdditionalPreSpawnHooks:
+
+            def test_custom_pre_spawn_hook(self, spawner, generate_spawner_inst):
+                mock = Mock()
+                spawner_inst = generate_spawner_inst(spawner)
+                spawner_inst.pre_spawn_hooks.append(mock)
+                spawner_inst.run_pre_spawn_hook()
+                assert mock.call_args == ((spawner_inst,),)
 
 # @pytest.mark.asyncio
 class TestMagpieAuthenticator: