fix: add missing syscalls to GPU device seccomp allowlist
The allowlist was derived from crosvm's gpu_common + gpu_device seccomp policies, but those are applied after process startup. systemd applies the filter before exec, so process lifecycle (execve, wait4, arch_prctl, set_tid_address), capability management (capget, capset), and socket server (bind, listen, accept4, socketpair) syscalls are also needed. Also create a shader cache directory at /run/vmsilo/<name>/gpu/cache and set __GL_SHADER_DISK_CACHE_PATH so the GPU device backend doesn't fail trying to create /home for shader cache in the sandboxed mount namespace. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e9495f043d
commit
21e0a68023
2 changed files with 34 additions and 16 deletions
|
|
@ -164,24 +164,26 @@ RestrictNamespaces = yes
|
|||
**Seccomp allowlist** (from crosvm `gpu_common.policy` + `gpu_device.policy`):
|
||||
|
||||
```
|
||||
access brk clock_gettime clock_nanosleep clone clone3 close connect
|
||||
dup dup2 epoll_create1 epoll_ctl epoll_pwait epoll_wait eventfd2
|
||||
exit exit_group fallocate fcntl flock fstat fstatfs ftruncate futex
|
||||
getcwd getdents getdents64 getegid geteuid getgid getpid getrandom
|
||||
getsockopt gettid gettimeofday getuid inotify_add_watch inotify_init1
|
||||
inotify_rm_watch io_uring_enter io_uring_register io_uring_setup ioctl
|
||||
kcmp kill lseek lstat madvise membarrier memfd_create mkdir mknodat
|
||||
mmap mprotect mremap munmap nanosleep newfstatat open openat pipe2
|
||||
poll ppoll prctl pread64 prlimit64 read readlink readlinkat readv
|
||||
recvfrom recvmsg rename restart_syscall rseq rt_sigaction rt_sigprocmask
|
||||
rt_sigreturn sched_get_priority_max sched_get_priority_min
|
||||
sched_getaffinity sched_setaffinity sched_setscheduler sched_yield
|
||||
sendmmsg sendmsg sendto set_robust_list setpriority setsockopt shutdown
|
||||
sigaltstack socket stat statfs statx sysinfo tgkill uname unlink
|
||||
unlinkat userfaultfd write writev
|
||||
accept4 access arch_prctl bind brk capget capset clock_gettime
|
||||
clock_nanosleep clone clone3 close connect dup dup2 epoll_create1
|
||||
epoll_ctl epoll_pwait epoll_wait eventfd2 execve exit exit_group
|
||||
fallocate fcntl flock fstat fstatfs ftruncate futex getcwd getdents
|
||||
getdents64 getegid geteuid getgid getpgrp getpid getppid getrandom
|
||||
getresgid getresuid getsockopt gettid gettimeofday getuid
|
||||
inotify_add_watch inotify_init1 inotify_rm_watch io_uring_enter
|
||||
io_uring_register io_uring_setup ioctl kcmp kill listen lseek lstat
|
||||
madvise membarrier memfd_create mkdir mknodat mmap mprotect mremap
|
||||
munmap nanosleep newfstatat open openat pipe2 poll ppoll prctl pread64
|
||||
prlimit64 read readlink readlinkat readv recvfrom recvmsg rename
|
||||
restart_syscall rseq rt_sigaction rt_sigprocmask rt_sigreturn
|
||||
sched_get_priority_max sched_get_priority_min sched_getaffinity
|
||||
sched_setaffinity sched_setscheduler sched_yield sendmmsg sendmsg
|
||||
sendto set_robust_list set_tid_address setpriority setsockopt shutdown
|
||||
sigaltstack socket socketpair stat statfs statx sysinfo tgkill uname
|
||||
unlink unlinkat userfaultfd wait4 write writev
|
||||
```
|
||||
|
||||
Note: systemd cannot do argument-level filtering like crosvm's seccomp policies (e.g., restricting `socket()` to AF_UNIX, `clone()` to CLONE_THREAD, `mmap` to specific PROT flags). The syscall set is identical but without argument restrictions.
|
||||
Note: This allowlist is a superset of crosvm's `gpu_common.policy` + `gpu_device.policy` — it additionally includes process lifecycle syscalls (`execve`, `wait4`, `arch_prctl`, `set_tid_address`), capability management (`capget`, `capset`), and socket server syscalls (`bind`, `listen`, `accept4`) that crosvm's own seccomp doesn't need (crosvm applies its policy after startup; systemd applies it before). systemd also cannot do argument-level filtering like crosvm's seccomp policies (e.g., restricting `socket()` to AF_UNIX, `clone()` to CLONE_THREAD).
|
||||
|
||||
**Service dependencies:**
|
||||
- `After=vmsilo-<vmname>-wayland-seccontext.service`
|
||||
|
|
|
|||
|
|
@ -58,8 +58,13 @@ let
|
|||
vm.gpu;
|
||||
|
||||
gpuSyscallAllowlist = [
|
||||
"accept4"
|
||||
"access"
|
||||
"arch_prctl"
|
||||
"bind"
|
||||
"brk"
|
||||
"capget"
|
||||
"capset"
|
||||
"clock_gettime"
|
||||
"clock_nanosleep"
|
||||
"clone"
|
||||
|
|
@ -73,6 +78,7 @@ let
|
|||
"epoll_pwait"
|
||||
"epoll_wait"
|
||||
"eventfd2"
|
||||
"execve"
|
||||
"exit"
|
||||
"exit_group"
|
||||
"fallocate"
|
||||
|
|
@ -88,8 +94,12 @@ let
|
|||
"getegid"
|
||||
"geteuid"
|
||||
"getgid"
|
||||
"getpgrp"
|
||||
"getpid"
|
||||
"getppid"
|
||||
"getrandom"
|
||||
"getresgid"
|
||||
"getresuid"
|
||||
"getsockopt"
|
||||
"gettid"
|
||||
"gettimeofday"
|
||||
|
|
@ -103,6 +113,7 @@ let
|
|||
"ioctl"
|
||||
"kcmp"
|
||||
"kill"
|
||||
"listen"
|
||||
"lseek"
|
||||
"lstat"
|
||||
"madvise"
|
||||
|
|
@ -146,11 +157,13 @@ let
|
|||
"sendmsg"
|
||||
"sendto"
|
||||
"set_robust_list"
|
||||
"set_tid_address"
|
||||
"setpriority"
|
||||
"setsockopt"
|
||||
"shutdown"
|
||||
"sigaltstack"
|
||||
"socket"
|
||||
"socketpair"
|
||||
"stat"
|
||||
"statfs"
|
||||
"statx"
|
||||
|
|
@ -160,6 +173,7 @@ let
|
|||
"unlink"
|
||||
"unlinkat"
|
||||
"userfaultfd"
|
||||
"wait4"
|
||||
"write"
|
||||
"writev"
|
||||
];
|
||||
|
|
@ -252,6 +266,7 @@ in
|
|||
${pkgs.coreutils}/bin/install -d -m 0755 -o ${toString userUid} -g ${toString userGid} \
|
||||
/run/vmsilo/${vm.name} \
|
||||
/run/vmsilo/${vm.name}/gpu \
|
||||
/run/vmsilo/${vm.name}/gpu/shader-cache \
|
||||
/run/vmsilo/${vm.name}/sound
|
||||
${pkgs.coreutils}/bin/install -d -m 0755 \
|
||||
/run/vmsilo/${vm.name}/virtiofs
|
||||
|
|
@ -571,6 +586,7 @@ in
|
|||
|
||||
environment = {
|
||||
LD_LIBRARY_PATH = "${pkgs.vulkan-loader}/lib";
|
||||
__GL_SHADER_DISK_CACHE_PATH = "/run/vmsilo/${vm.name}/gpu/shader-cache";
|
||||
};
|
||||
|
||||
serviceConfig = {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue