1616DAXFS filesystem image creation.
1717
1818Creates a daxfs image from a directory and writes it to DMA heap memory.
19- The guest kernel can then mount the filesystem directly using the physical address.
19+ The daxfs filesystem holds a reference to the dma-buf, so the memory persists
20+ even after the dmabuf fd is closed.
2021"""
2122
23+ import ctypes
2224import fcntl
2325import mmap
2426import os
2527import shutil
2628import struct
27- import subprocess
2829from dataclasses import dataclass
2930from pathlib import Path
3031from typing import Optional
@@ -50,7 +51,6 @@ class DaxfsImage:
5051 """Represents a created daxfs image."""
5152 phys_addr : int
5253 size : int
53- dmabuf_fd : int
5454
5555
5656@dataclass
@@ -273,6 +273,22 @@ def _is_symlink(mode: int) -> bool:
273273 return stat .S_ISLNK (mode )
274274
275275
276+ def _get_libc ():
277+ """Get libc with errno support."""
278+ return ctypes .CDLL (None , use_errno = True )
279+
280+
281+ def _syscall (libc , nr , * args ):
282+ """Invoke a syscall and raise OSError on failure."""
283+ fn = libc .syscall
284+ fn .restype = ctypes .c_long
285+ ret = fn (ctypes .c_long (nr ), * [ctypes .c_long (a ) if isinstance (a , int ) else a for a in args ])
286+ if ret < 0 :
287+ errno_val = ctypes .get_errno ()
288+ raise OSError (errno_val , os .strerror (errno_val ))
289+ return ret
290+
291+
276292def _get_daxfs_iomem () -> tuple [int , int ]:
277293 """
278294 Parse /proc/iomem to find daxfs allocation.
@@ -313,6 +329,23 @@ def _allocate_dma_heap(heap_path: str, size: int) -> tuple[int, mmap.mmap]:
313329
314330KERF_DAXFS_MNT_DIR = "/var/lib/kerf/daxfs"
315331
332+ # Syscall numbers (same on all architectures, added in kernel 5.2+)
333+ SYS_MOVE_MOUNT = 429
334+ SYS_FSOPEN = 430
335+ SYS_FSCONFIG = 431
336+ SYS_FSMOUNT = 432
337+
338+ # fsconfig command constants
339+ FSCONFIG_SET_STRING = 1
340+ FSCONFIG_SET_FD = 5
341+ FSCONFIG_CMD_CREATE = 6
342+
343+ # move_mount flags
344+ MOVE_MOUNT_F_EMPTY_PATH = 0x00000004
345+
346+ AT_FDCWD = - 100
347+
348+
316349def inject_kerf_init (rootfs_path : str ) -> None :
317350 """Inject /init binary into rootfs.
318351
@@ -333,26 +366,55 @@ def inject_kerf_init(rootfs_path: str) -> None:
333366 os .chmod (init_path , 0o755 )
334367
335368
336- def _mount_daxfs (instance_name : str , phys_addr : int , size : int ) -> None :
369+ def _mount_daxfs (instance_name : str , dmabuf_fd : int ) -> None :
337370 """
338- Mount daxfs on the host kernel with the instance name.
371+ Mount daxfs on the host kernel, passing the dmabuf fd directly.
372+
373+ Uses fsopen/fsconfig/fsmount/move_mount syscalls. The daxfs filesystem
374+ takes a reference on the dma-buf, so the fd can be closed after mounting.
339375
340376 Args:
341377 instance_name: Name of the multikernel instance
342- phys_addr: Physical address of the daxfs image
343- size: Size of the daxfs image in bytes
378+ dmabuf_fd: File descriptor for the dma-buf holding the filesystem image
344379 """
345380 mnt_dir = Path (KERF_DAXFS_MNT_DIR ) / instance_name
346381 mnt_dir .mkdir (parents = True , exist_ok = True )
347382
348- result = subprocess .run (
349- ["mount" , "-t" , "daxfs" , "-o" , f"phys=0x{ phys_addr :x} ,size={ size } ,name={ instance_name } " , "none" , str (mnt_dir )],
350- capture_output = True ,
351- text = True ,
352- check = False ,
353- )
354- if result .returncode != 0 :
355- raise DaxfsError (f"Failed to mount daxfs: { result .stderr .strip ()} " )
383+ libc = _get_libc ()
384+ fs_fd = - 1
385+ mnt_fd = - 1
386+
387+ try :
388+ # fsopen("daxfs", 0)
389+ fstype = b"daxfs"
390+ fs_fd = _syscall (libc , SYS_FSOPEN , ctypes .c_char_p (fstype ), 0 )
391+
392+ # fsconfig(fs_fd, FSCONFIG_SET_FD, "dmabuf", NULL, dmabuf_fd)
393+ _syscall (libc , SYS_FSCONFIG , fs_fd , FSCONFIG_SET_FD ,
394+ ctypes .c_char_p (b"dmabuf" ), 0 , dmabuf_fd )
395+
396+ # fsconfig(fs_fd, FSCONFIG_SET_STRING, "name", instance_name, 0)
397+ name_bytes = instance_name .encode ('utf-8' )
398+ _syscall (libc , SYS_FSCONFIG , fs_fd , FSCONFIG_SET_STRING ,
399+ ctypes .c_char_p (b"name" ), ctypes .c_char_p (name_bytes ), 0 )
400+
401+ # fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)
402+ _syscall (libc , SYS_FSCONFIG , fs_fd , FSCONFIG_CMD_CREATE , 0 , 0 , 0 )
403+
404+ # fsmount(fs_fd, 0, 0)
405+ mnt_fd = _syscall (libc , SYS_FSMOUNT , fs_fd , 0 , 0 )
406+
407+ # move_mount(mnt_fd, "", AT_FDCWD, mountpoint, MOVE_MOUNT_F_EMPTY_PATH)
408+ mountpoint = str (mnt_dir ).encode ('utf-8' )
409+ _syscall (libc , SYS_MOVE_MOUNT , mnt_fd , ctypes .c_char_p (b"" ),
410+ AT_FDCWD , ctypes .c_char_p (mountpoint ), MOVE_MOUNT_F_EMPTY_PATH )
411+ except OSError as e :
412+ raise DaxfsError (f"Failed to mount daxfs: { e } " ) from e
413+ finally :
414+ if mnt_fd >= 0 :
415+ os .close (mnt_fd )
416+ if fs_fd >= 0 :
417+ os .close (fs_fd )
356418
357419
358420def create_daxfs_image (
@@ -371,7 +433,7 @@ def create_daxfs_image(
371433 size: Size to allocate (if None, calculated automatically with 10% padding)
372434
373435 Returns:
374- DaxfsImage with physical address, size, and dmabuf fd
436+ DaxfsImage with physical address and size
375437
376438 Raises:
377439 DaxfsError: If image creation fails
@@ -408,19 +470,18 @@ def create_daxfs_image(
408470 raise DaxfsError (f"Failed to write daxfs image: { e } " ) from e
409471
410472 try :
411- phys_addr , actual_size = _get_daxfs_iomem ( )
473+ _mount_daxfs ( instance_name , dmabuf_fd )
412474 except DaxfsError :
413475 os .close (dmabuf_fd )
414476 raise
415477
416- try :
417- _mount_daxfs ( instance_name , phys_addr , actual_size )
418- except DaxfsError :
419- os . close ( dmabuf_fd )
420- raise
478+ # daxfs now holds a reference to the dma-buf, safe to close
479+ os . close ( dmabuf_fd )
480+
481+ # Get the physical address from /proc/iomem for the spawn kernel rootflags
482+ phys_addr , actual_size = _get_daxfs_iomem ()
421483
422484 return DaxfsImage (
423485 phys_addr = phys_addr ,
424486 size = actual_size ,
425- dmabuf_fd = dmabuf_fd ,
426487 )
0 commit comments