diff --git a/CLAUDE.md b/CLAUDE.md index 43d2fe5..a3f5cc0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -37,11 +37,11 @@ There are no tests in this project. ### VM Launch Flow (NixOS module) -VMs run as system services (root) for PCI passthrough and sandboxing support. crosvm drops privileges before starting the guest. +Each VM runs under its own dynamic service user (`vmsilo-`) via `DynamicUser=yes`. A privileged `ExecStartPre=+` script grants the dynamic user access to devices and sockets (ACLs, chown). Console relay and proxy services run as the configured desktop user. VMs start automatically when first accessed via socket activation: -1. `vm-run banking firefox` connects to `/run/vmsilo/banking-command.socket` +1. `vm-run banking firefox` connects to `/run/vmsilo/banking/command.socket` 2. Socket activation triggers `vmsilo-banking@.service` (proxy template) 3. Proxy requires `vmsilo-banking-vm.service`, which starts crosvm 4. Proxy waits for guest vsock:5000, then forwards command @@ -82,17 +82,30 @@ guestConfig = { ### Sockets and Devices -Files in `/run/vmsilo/`: +Files in `/run/vmsilo//` (per-VM subdirectory owned by `cfg.user`): | Path | Type | Purpose | |------|------|---------| -| `-command.socket` | Socket | Socket activation for `vm-run` commands | -| `-crosvm-control.socket` | Socket | crosvm control socket (VM management) | -| `-console-backend.socket` | Socket | Serial console backend (crosvm connects here) | -| `-console` | PTY | User-facing serial console (for `vm-shell`) | +| `/command.socket` | Socket | Socket activation for `vm-run` commands | +| `/crosvm-control.socket` | Socket | crosvm control socket (VM management) | +| `/console-backend.socket` | Socket | Serial console backend (crosvm connects here) | +| `/console` | PTY | User-facing serial console (for `vm-shell`) | +| `/wayland-0` | Bind mount | Wayland socket (via `BindPaths`, if GPU enabled) | +| `/pulse-native` | Bind mount | PulseAudio socket (via `BindPaths`, if sound enabled) | The console relay service (`vmsilo--console-relay.service`) bridges crosvm to a PTY, allowing users to connect/disconnect without disrupting crosvm. +**Service User Isolation**: Each service runs under its own user: + +| Service | User | Method | +|---------|------|--------| +| `vmsilo--vm` | `vmsilo-` | `DynamicUser=yes` | +| `vmsilo--console-relay` | `cfg.user` | Static (desktop user) | +| `vmsilo-@` (proxy) | `cfg.user` | Static (desktop user) | +| `vm-switch-` | `vm-switch-` | `DynamicUser=yes` | + +Groups for device/socket access: `kvm` (KVM), `vfio` (VFIO container), `vmsilo-video` (Wayland ACL), `vmsilo-audio` (PulseAudio ACL), `vmsilo-net-` (vhost-user sockets). The VM service's `ExecStartPre=+` runs as root to set ACLs, chown VFIO devices, and set TAP interface ownership. + **Desktop Integration**: The module generates .desktop files for all applications in `guestPrograms`, allowing VM apps to appear in the host's desktop menu. Apps are organized into submenus named "VM: \" (e.g., "VM: banking" containing Firefox, Konsole). Each app launches via `vm-run`. Icons are copied from guest packages to ensure proper display. **Window Decoration Colors**: Each VM's `color` option is passed to crosvm via `--wayland-security-context app_id=vmsilo::`. A KWin patch (`patches/`) reads this security context and applies the color to window decorations (title bar, frame). Serverside decorations are forced so colors are always visible. Text color auto-contrasts based on luminance. @@ -139,7 +152,7 @@ Provides L2 switching for VM-to-VM networks: - **Location:** `vm-switch/` Rust crate - **Build:** `nix build .#vm-switch` - **Purpose:** Handles vhost-user protocol for VM network interfaces -- **Systemd:** One service per vmNetwork (`vm-switch-.service`) +- **Systemd:** One service per vmNetwork (`vm-switch-.service`), runs as dynamic user `vm-switch-` with group `vmsilo-net-` **CLI flags:** ``` diff --git a/modules/config.nix b/modules/config.nix index 6d33111..a1723b9 100644 --- a/modules/config.nix +++ b/modules/config.nix @@ -13,11 +13,17 @@ let userUid = config.users.users.${cfg.user}.uid; userRuntimeDir = "/run/user/${toString userUid}"; - # Default PulseAudio sound configuration - defaultSoundConfig = { + # ACL tool for ExecStartPre=+ scripts + acl = pkgs.acl; + + # iproute2 for TAP owner changes in ExecStartPre=+ scripts + iproute2 = pkgs.iproute2; + + # Default PulseAudio sound configuration (parameterized by VM for bind-mounted paths) + mkDefaultSoundConfig = vm: { backend = "pulse"; capture = false; - pulse_socket_path = "${userRuntimeDir}/pulse/native"; + pulse_socket_path = "/run/vmsilo/${vm.name}/pulse-native"; pulse_cookie_path = "/home/${cfg.user}/.config/pulse/cookie"; }; @@ -161,22 +167,39 @@ let getEffectiveVhostUser = vm: vm.vhostUser ++ (vmNetworkToVhostUser vm); # Generate vm-switch service for a network - mkVmSwitchService = netName: { - description = "vm-switch daemon for ${netName} network"; - after = [ - "network.target" - "systemd-tmpfiles-setup.service" - ]; - wants = [ "systemd-tmpfiles-setup.service" ]; - wantedBy = [ "multi-user.target" ]; + mkVmSwitchService = + netName: + let + execStartPreScript = pkgs.writeShellScript "vm-switch-${netName}-pre" '' + set -e + ${acl}/bin/setfacl -R -m u:vm-switch-${netName}:rwx /run/vm-switch/${netName}/ + ''; + in + { + description = "vm-switch daemon for ${netName} network"; + after = [ + "network.target" + "systemd-tmpfiles-setup.service" + ]; + wants = [ "systemd-tmpfiles-setup.service" ]; + wantedBy = [ "multi-user.target" ]; - serviceConfig = { - Type = "simple"; - ExecStart = "${cfg._internal.vm-switch}/bin/vm-switch -d /run/vm-switch/${netName} --log-level ${cfg.vm-switch.logLevel} ${lib.escapeShellArgs cfg.vm-switch.extraArgs}"; - Restart = "on-failure"; - RestartSec = "5s"; + serviceConfig = { + Type = "simple"; + ExecStart = "${cfg._internal.vm-switch}/bin/vm-switch -d /run/vm-switch/${netName} --log-level ${cfg.vm-switch.logLevel} ${lib.escapeShellArgs cfg.vm-switch.extraArgs}"; + Restart = "on-failure"; + RestartSec = "5s"; + + # Service user isolation + DynamicUser = true; + User = "vm-switch-${netName}"; + Group = "vmsilo-net-${netName}"; + UMask = "0007"; + + # Privileged setup (runs as root via =+ prefix) + ExecStartPre = [ "+${execStartPreScript}" ]; + }; }; - }; # Generate shell case statement for VM dispatch mkVmCase = makeCase: '' @@ -264,7 +287,7 @@ let if vm.sound == false then null else if vm.sound == true then - defaultSoundConfig + mkDefaultSoundConfig vm else vm.sound; @@ -355,7 +378,7 @@ let ''} # Clean up stale socket - rm -f /run/vmsilo/${vm.name}-crosvm-control.socket + rm -f /run/vmsilo/${vm.name}/crosvm-control.socket exec ${cfg._internal.crosvm}/bin/crosvm \ --log-level=${effectiveLogLevel} \ @@ -366,7 +389,7 @@ let --name ${vm.name} \ -m ${toString vm.memory} \ --initrd=${initramfsPath} \ - --serial=hardware=virtio-console,type=unix-stream,path=/run/vmsilo/${vm.name}-console-backend.socket,console,input-unix-stream \ + --serial=hardware=virtio-console,type=unix-stream,path=/run/vmsilo/${vm.name}/console-backend.socket,console,input-unix-stream \ ${formatBlockArg rootDiskConfig} \ ${additionalDisksArgs} \ ${lib.optionalString (rootfs != null) ''-p "init=${rootfs.config.system.build.toplevel}/init"''} \ @@ -389,8 +412,8 @@ let --cpus ${toString vm.cpus} \ ${lib.optionalString (effectiveGpu != null) "--gpu=${formatKVArgs "," effectiveGpu}"} \ ${lib.optionalString (effectiveSound != null) "--virtio-snd=${formatKVArgs "," effectiveSound}"} \ - -s /run/vmsilo/${vm.name}-crosvm-control.socket \ - --wayland-security-context wayland_socket=${userRuntimeDir}/wayland-0,app_id=vmsilo:${vm.name}:${vm.color} \ + -s /run/vmsilo/${vm.name}/crosvm-control.socket \ + --wayland-security-context wayland_socket=/run/vmsilo/${vm.name}/wayland-0,app_id=vmsilo:${vm.name}:${vm.color} \ ${vfioArgs} \ ${vhostUserArgs} \ ${lib.escapeShellArgs allExtraRunArgs} \ @@ -407,7 +430,7 @@ let VM_NAME="$1" shift - SOCKET="/run/vmsilo/$VM_NAME-command.socket" + SOCKET="/run/vmsilo/$VM_NAME/command.socket" if [ ! -S "$SOCKET" ]; then echo "Unknown VM or socket not active: $VM_NAME" >&2 @@ -435,7 +458,20 @@ let VM_NAME="$1" - ${mkVmCase (vm: "${vm.name}) exec ${mkVmScript vm} ;;")} + ${mkVmCase ( + vm: + let + hasGpu = if vm.gpu == false then false else true; + hasSound = if vm.sound == false then false else true; + in + '' + ${vm.name}) + mkdir -p /run/vmsilo/${vm.name} + ${lib.optionalString hasGpu "ln -sf ${userRuntimeDir}/wayland-0 /run/vmsilo/${vm.name}/wayland-0"} + ${lib.optionalString hasSound "ln -sf ${userRuntimeDir}/pulse/native /run/vmsilo/${vm.name}/pulse-native"} + exec ${mkVmScript vm} + ;;'' + )} ''; # vm-start: Start VM via systemd (uses polkit for authorization) @@ -517,7 +553,7 @@ let fi ${mkVmCase (vm: "${vm.name}) exec ${pkgs.openssh}/bin/ssh $USER_NAME@vsock/${toString vm.id} ;;")} else - CONSOLE="/run/vmsilo/$VM_NAME-console" + CONSOLE="/run/vmsilo/$VM_NAME/console" if [ ! -e "$CONSOLE" ]; then echo "Console not found: $CONSOLE" >&2 echo "Is the VM running? Use: vm-start $VM_NAME" >&2 @@ -856,6 +892,14 @@ let in { config = lib.mkIf cfg.enable { + # Groups for service user isolation + users.groups = { + vfio = { }; + vmsilo-video = { }; + vmsilo-audio = { }; + } + // lib.listToAttrs (map (netName: lib.nameValuePair "vmsilo-net-${netName}" { }) allVmNetworkNames); + # Override kwin to add VM decoration color support via security context nixpkgs.overlays = [ (final: prev: { @@ -925,6 +969,12 @@ in message = "VM network '${netName}' must have exactly one router. Found ${toString (routerCount netName)}."; }) allVmNetworkNames; + # udev rules for device access by service users + services.udev.extraRules = '' + KERNEL=="kvm", GROUP="kvm", MODE="0660" + SUBSYSTEM=="vfio", KERNEL=="vfio", GROUP="vfio", MODE="0660" + ''; + # Enable IP forwarding boot.kernel.sysctl."net.ipv4.ip_forward" = 1; @@ -1042,7 +1092,6 @@ in vm: lib.nameValuePair "tap${vm.name}" { virtual = true; - virtualOwner = cfg.user; ipv4.addresses = [ { address = "${networkBase}.${toString (vm.id - 1)}"; @@ -1065,6 +1114,8 @@ in systemd.tmpfiles.rules = [ "d /run/vmsilo 0755 root root -" ] + # Per-VM subdirectories owned by the desktop user + ++ map (vm: "d /run/vmsilo/${vm.name} 0755 ${cfg.user} root -") cfg.nixosVms ++ lib.optionals (allVmNetworkNames != [ ]) [ "d /run/vm-switch 0755 root root -" ] ++ lib.concatMap ( netName: @@ -1080,7 +1131,7 @@ in description = "vmsilo socket for ${vm.name}"; wantedBy = [ "sockets.target" ]; socketConfig = { - ListenStream = "/run/vmsilo/${vm.name}-command.socket"; + ListenStream = "/run/vmsilo/${vm.name}/command.socket"; Accept = true; SocketUser = cfg.user; SocketGroup = "root"; @@ -1090,21 +1141,94 @@ in ) cfg.nixosVms ); - # Systemd system services for VMs (run as root for PCI passthrough and sandboxing) + # Systemd system services for VMs (run under dynamic service users) systemd.services = lib.listToAttrs ( - # VM services (run crosvm as root) + # VM services (run crosvm under per-VM dynamic user) map ( vm: + let + hasGpu = vm.gpu != false; + hasSound = vm.sound != false; + hasPci = vm.pciDevices != [ ]; + vmNetworks = lib.attrNames vm.vmNetwork; + + # PCI device BDFs for VFIO chown in ExecStartPre + pciBdfs = map ( + dev: + if !(lib.hasPrefix "/" dev.path) then + normalizeBdf dev.path + else + let + parts = lib.splitString "/" dev.path; + bdfPart = lib.last (lib.filter (p: p != "") parts); + in + normalizeBdf bdfPart + ) vm.pciDevices; + + # Privileged setup script (runs as root via =+ prefix) + execStartPreScript = pkgs.writeShellScript "vmsilo-${vm.name}-pre" '' + set -e + + # Grant dynamic user write access to per-VM socket directory + ${acl}/bin/setfacl -m u:vmsilo-${vm.name}:rwx /run/vmsilo/${vm.name}/ + + ${lib.optionalString hasGpu '' + # Wayland socket ACL (skip if socket does not exist yet) + if [ -e ${userRuntimeDir}/wayland-0 ]; then + ${acl}/bin/setfacl -m g:vmsilo-video:rw ${userRuntimeDir}/wayland-0 + fi + ''} + + ${lib.optionalString hasSound '' + # PulseAudio socket ACL (skip if socket does not exist yet) + if [ -e ${userRuntimeDir}/pulse/native ]; then + ${acl}/bin/setfacl -m g:vmsilo-audio:rw ${userRuntimeDir}/pulse/native + fi + ''} + + ${lib.optionalString hasPci '' + # VFIO device ownership + ${lib.concatMapStringsSep "\n" (bdf: '' + IOMMU_GROUP=$(basename "$(readlink /sys/bus/pci/devices/${bdf}/iommu_group)") + chown vmsilo-${vm.name} /dev/vfio/$IOMMU_GROUP + '') pciBdfs} + ''} + + ${lib.optionalString vm.hostNetworking '' + # TAP interface ownership + ${iproute2}/bin/ip link set tap${vm.name} owner $(id -u vmsilo-${vm.name}) + ''} + ''; + in lib.nameValuePair "vmsilo-${vm.name}-vm" { description = "vmsilo VM: ${vm.name}"; after = [ "network.target" ] - ++ map (netName: "vm-switch-${netName}.service") (lib.attrNames vm.vmNetwork); - requires = map (netName: "vm-switch-${netName}.service") (lib.attrNames vm.vmNetwork); + ++ map (netName: "vm-switch-${netName}.service") vmNetworks; + requires = map (netName: "vm-switch-${netName}.service") vmNetworks; serviceConfig = { Type = "simple"; ExecStart = "${mkVmScript vm}"; + + # Service user isolation + DynamicUser = true; + User = "vmsilo-${vm.name}"; + SupplementaryGroups = [ + "kvm" + ] + ++ lib.optional hasPci "vfio" + ++ lib.optional hasGpu "vmsilo-video" + ++ lib.optional hasSound "vmsilo-audio" + ++ map (netName: "vmsilo-net-${netName}") vmNetworks; + + # Privileged setup (runs as root via =+ prefix) + ExecStartPre = [ "+${execStartPreScript}" ]; + + # Bind-mount wayland/pulse sockets into the per-VM directory + BindPaths = + lib.optional hasGpu "${userRuntimeDir}/wayland-0:/run/vmsilo/${vm.name}/wayland-0" + ++ lib.optional hasSound "${userRuntimeDir}/pulse/native:/run/vmsilo/${vm.name}/pulse-native"; }; } ) cfg.nixosVms @@ -1121,6 +1245,7 @@ in StandardInput = "socket"; StandardOutput = "socket"; ExecStart = "${mkProxyScript vm}"; + User = cfg.user; }; } ) cfg.nixosVms @@ -1142,12 +1267,13 @@ in serviceConfig = { Type = "simple"; + User = cfg.user; ExecStartPre = [ - "-${pkgs.coreutils}/bin/rm -f /run/vmsilo/${vm.name}-console-backend.socket" - "-${pkgs.coreutils}/bin/rm -f /run/vmsilo/${vm.name}-console" + "-${pkgs.coreutils}/bin/rm -f /run/vmsilo/${vm.name}/console-backend.socket" + "-${pkgs.coreutils}/bin/rm -f /run/vmsilo/${vm.name}/console" ]; # PTY slave is created as a symlink that users can open - ExecStart = "${pkgs.socat}/bin/socat UNIX-LISTEN:/run/vmsilo/${vm.name}-console-backend.socket,fork,reuseaddr PTY,link=/run/vmsilo/${vm.name}-console,raw,echo=0,user=${toString userUid},mode=0600"; + ExecStart = "${pkgs.socat}/bin/socat UNIX-LISTEN:/run/vmsilo/${vm.name}/console-backend.socket,fork,reuseaddr PTY,link=/run/vmsilo/${vm.name}/console,raw,echo=0,mode=0600"; Restart = "on-failure"; RestartSec = "1s"; };