Skip to content

Commit 57018c5

Browse files
committed
Fix daxfs dma-buf lifetime by passing fd directly via fsopen API
Signed-off-by: Cong Wang <cwang@multikernel.io>
1 parent 017363b commit 57018c5

File tree

1 file changed

+84
-23
lines changed

1 file changed

+84
-23
lines changed

src/kerf/daxfs/mkdaxfs.py

Lines changed: 84 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@
1616
DAXFS filesystem image creation.
1717
1818
Creates a daxfs image from a directory and writes it to DMA heap memory.
19-
The guest kernel can then mount the filesystem directly using the physical address.
19+
The daxfs filesystem holds a reference to the dma-buf, so the memory persists
20+
even after the dmabuf fd is closed.
2021
"""
2122

23+
import ctypes
2224
import fcntl
2325
import mmap
2426
import os
2527
import shutil
2628
import struct
27-
import subprocess
2829
from dataclasses import dataclass
2930
from pathlib import Path
3031
from typing import Optional
@@ -50,7 +51,6 @@ class DaxfsImage:
5051
"""Represents a created daxfs image."""
5152
phys_addr: int
5253
size: int
53-
dmabuf_fd: int
5454

5555

5656
@dataclass
@@ -273,6 +273,22 @@ def _is_symlink(mode: int) -> bool:
273273
return stat.S_ISLNK(mode)
274274

275275

276+
def _get_libc():
277+
"""Get libc with errno support."""
278+
return ctypes.CDLL(None, use_errno=True)
279+
280+
281+
def _syscall(libc, nr, *args):
282+
"""Invoke a syscall and raise OSError on failure."""
283+
fn = libc.syscall
284+
fn.restype = ctypes.c_long
285+
ret = fn(ctypes.c_long(nr), *[ctypes.c_long(a) if isinstance(a, int) else a for a in args])
286+
if ret < 0:
287+
errno_val = ctypes.get_errno()
288+
raise OSError(errno_val, os.strerror(errno_val))
289+
return ret
290+
291+
276292
def _get_daxfs_iomem() -> tuple[int, int]:
277293
"""
278294
Parse /proc/iomem to find daxfs allocation.
@@ -313,6 +329,23 @@ def _allocate_dma_heap(heap_path: str, size: int) -> tuple[int, mmap.mmap]:
313329

314330
KERF_DAXFS_MNT_DIR = "/var/lib/kerf/daxfs"
315331

332+
# Syscall numbers (same on all architectures, added in kernel 5.2+)
333+
SYS_MOVE_MOUNT = 429
334+
SYS_FSOPEN = 430
335+
SYS_FSCONFIG = 431
336+
SYS_FSMOUNT = 432
337+
338+
# fsconfig command constants
339+
FSCONFIG_SET_STRING = 1
340+
FSCONFIG_SET_FD = 5
341+
FSCONFIG_CMD_CREATE = 6
342+
343+
# move_mount flags
344+
MOVE_MOUNT_F_EMPTY_PATH = 0x00000004
345+
346+
AT_FDCWD = -100
347+
348+
316349
def inject_kerf_init(rootfs_path: str) -> None:
317350
"""Inject /init binary into rootfs.
318351
@@ -333,26 +366,55 @@ def inject_kerf_init(rootfs_path: str) -> None:
333366
os.chmod(init_path, 0o755)
334367

335368

336-
def _mount_daxfs(instance_name: str, phys_addr: int, size: int) -> None:
369+
def _mount_daxfs(instance_name: str, dmabuf_fd: int) -> None:
337370
"""
338-
Mount daxfs on the host kernel with the instance name.
371+
Mount daxfs on the host kernel, passing the dmabuf fd directly.
372+
373+
Uses fsopen/fsconfig/fsmount/move_mount syscalls. The daxfs filesystem
374+
takes a reference on the dma-buf, so the fd can be closed after mounting.
339375
340376
Args:
341377
instance_name: Name of the multikernel instance
342-
phys_addr: Physical address of the daxfs image
343-
size: Size of the daxfs image in bytes
378+
dmabuf_fd: File descriptor for the dma-buf holding the filesystem image
344379
"""
345380
mnt_dir = Path(KERF_DAXFS_MNT_DIR) / instance_name
346381
mnt_dir.mkdir(parents=True, exist_ok=True)
347382

348-
result = subprocess.run(
349-
["mount", "-t", "daxfs", "-o", f"phys=0x{phys_addr:x},size={size},name={instance_name}", "none", str(mnt_dir)],
350-
capture_output=True,
351-
text=True,
352-
check=False,
353-
)
354-
if result.returncode != 0:
355-
raise DaxfsError(f"Failed to mount daxfs: {result.stderr.strip()}")
383+
libc = _get_libc()
384+
fs_fd = -1
385+
mnt_fd = -1
386+
387+
try:
388+
# fsopen("daxfs", 0)
389+
fstype = b"daxfs"
390+
fs_fd = _syscall(libc, SYS_FSOPEN, ctypes.c_char_p(fstype), 0)
391+
392+
# fsconfig(fs_fd, FSCONFIG_SET_FD, "dmabuf", NULL, dmabuf_fd)
393+
_syscall(libc, SYS_FSCONFIG, fs_fd, FSCONFIG_SET_FD,
394+
ctypes.c_char_p(b"dmabuf"), 0, dmabuf_fd)
395+
396+
# fsconfig(fs_fd, FSCONFIG_SET_STRING, "name", instance_name, 0)
397+
name_bytes = instance_name.encode('utf-8')
398+
_syscall(libc, SYS_FSCONFIG, fs_fd, FSCONFIG_SET_STRING,
399+
ctypes.c_char_p(b"name"), ctypes.c_char_p(name_bytes), 0)
400+
401+
# fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)
402+
_syscall(libc, SYS_FSCONFIG, fs_fd, FSCONFIG_CMD_CREATE, 0, 0, 0)
403+
404+
# fsmount(fs_fd, 0, 0)
405+
mnt_fd = _syscall(libc, SYS_FSMOUNT, fs_fd, 0, 0)
406+
407+
# move_mount(mnt_fd, "", AT_FDCWD, mountpoint, MOVE_MOUNT_F_EMPTY_PATH)
408+
mountpoint = str(mnt_dir).encode('utf-8')
409+
_syscall(libc, SYS_MOVE_MOUNT, mnt_fd, ctypes.c_char_p(b""),
410+
AT_FDCWD, ctypes.c_char_p(mountpoint), MOVE_MOUNT_F_EMPTY_PATH)
411+
except OSError as e:
412+
raise DaxfsError(f"Failed to mount daxfs: {e}") from e
413+
finally:
414+
if mnt_fd >= 0:
415+
os.close(mnt_fd)
416+
if fs_fd >= 0:
417+
os.close(fs_fd)
356418

357419

358420
def create_daxfs_image(
@@ -371,7 +433,7 @@ def create_daxfs_image(
371433
size: Size to allocate (if None, calculated automatically with 10% padding)
372434
373435
Returns:
374-
DaxfsImage with physical address, size, and dmabuf fd
436+
DaxfsImage with physical address and size
375437
376438
Raises:
377439
DaxfsError: If image creation fails
@@ -408,19 +470,18 @@ def create_daxfs_image(
408470
raise DaxfsError(f"Failed to write daxfs image: {e}") from e
409471

410472
try:
411-
phys_addr, actual_size = _get_daxfs_iomem()
473+
_mount_daxfs(instance_name, dmabuf_fd)
412474
except DaxfsError:
413475
os.close(dmabuf_fd)
414476
raise
415477

416-
try:
417-
_mount_daxfs(instance_name, phys_addr, actual_size)
418-
except DaxfsError:
419-
os.close(dmabuf_fd)
420-
raise
478+
# daxfs now holds a reference to the dma-buf, safe to close
479+
os.close(dmabuf_fd)
480+
481+
# Get the physical address from /proc/iomem for the spawn kernel rootflags
482+
phys_addr, actual_size = _get_daxfs_iomem()
421483

422484
return DaxfsImage(
423485
phys_addr=phys_addr,
424486
size=actual_size,
425-
dmabuf_fd=dmabuf_fd,
426487
)

0 commit comments

Comments
 (0)