diff --git a/docs/superpowers/plans/2026-03-23-ch-sandboxing.md b/docs/superpowers/plans/2026-03-23-ch-sandboxing.md new file mode 100644 index 0000000..b6af292 --- /dev/null +++ b/docs/superpowers/plans/2026-03-23-ch-sandboxing.md @@ -0,0 +1,797 @@ +# Cloud-Hypervisor Sandboxing Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Harden cloud-hypervisor VM services with Landlock, systemd namespace isolation, confinement, and capability dropping. + +**Architecture:** Split the monolithic CH launch script into a sandboxed ExecStart (raw CH binary) + privileged ExecStartPost=+ (orchestration). Use NixOS confinement for filesystem isolation. Pass TAP FDs via a new Rust helper + `ch-remote add-net`. Enable CH's built-in Landlock. + +**Tech Stack:** Nix (NixOS module), Rust (TAP helper), systemd (namespacing/confinement) + +**Spec:** `docs/superpowers/specs/2026-03-23-ch-sandboxing-design.md` + +--- + +## File Structure + +**New files:** +- `vmsilo-tools/Cargo.toml` — Rust workspace manifest +- `vmsilo-tools/tap-open/Cargo.toml` — TAP FD helper crate +- `vmsilo-tools/tap-open/src/main.rs` — opens TAP, execs ch-remote add-net + +**Modified files:** +- `modules/options.nix` — add `cloud-hypervisor.disableSandbox` option, add `_internal.vmsilo-tools` package option +- `modules/networking.nix:103` — change `virtualOwner` from `cfg.user` to `"root"` +- `modules/services.nix:62-90` — change prep service to make top-level runtime dir root-owned +- `modules/services.nix:92-141` — rewrite `mkVmServices` for CH: confinement, split ExecStart/ExecStartPost/ExecStopPost +- `modules/services.nix:704-713` — update tmpfiles.rules ownership +- `modules/scripts.nix:117-207` — replace `mkCloudHypervisorVmScript` with `mkChExecStartPostScript` (privileged orchestration) +- `modules/lib/vm-config.nix:389-392` — remove `chNetworkEntries` from JSON config, expose as separate attr +- `modules/lib/vm-config.nix:514-527` — update `ch` attrset exports +- `flake.nix` — add `buildVmsiloTools` builder, wire into module + +--- + +### Task 1: Create vmsilo-tools Rust workspace with tap-open binary + +**Files:** +- Create: `vmsilo-tools/Cargo.toml` +- Create: `vmsilo-tools/tap-open/Cargo.toml` +- Create: `vmsilo-tools/tap-open/src/main.rs` + +The `vmsilo-tap-open` binary opens `/dev/net/tun`, attaches to a named TAP via `TUNSETIFF` ioctl, then execs a given command with the TAP FD inherited. Usage: `vmsilo-tap-open -- [args...]`. The command receives the FD number as `$TAP_FD` env var. + +- [ ] **Step 1: Create workspace Cargo.toml** + +Create `vmsilo-tools/Cargo.toml`: + +```toml +[workspace] +members = ["tap-open"] +resolver = "2" +``` + +- [ ] **Step 2: Create tap-open crate** + +Create `vmsilo-tools/tap-open/Cargo.toml`: + +```toml +[package] +name = "vmsilo-tap-open" +version = "0.1.0" +edition = "2021" + +[dependencies] +nix = { version = "0.29", features = ["ioctl", "fs", "process"] } +libc = "0.2" +anyhow = "1" +``` + +- [ ] **Step 3: Implement tap-open** + +Create `vmsilo-tools/tap-open/src/main.rs`: + +```rust +//! Opens a TAP device by name and execs a command with the FD inherited. +//! +//! Usage: vmsilo-tap-open -- [args...] +//! +//! Any occurrence of `{TAP_FD}` in command arguments is replaced with the +//! actual FD number (since execvp does not perform shell variable expansion). + +use anyhow::{bail, Context, Result}; +use nix::fcntl::OFlag; +use nix::sys::stat::Mode; +use std::env; +use std::ffi::CString; +use std::os::fd::AsRawFd; + +// TUNSETIFF ioctl number +nix::ioctl_write_ptr!(tunsetiff, b'T', 202, libc::ifreq); + +fn main() -> Result<()> { + let args: Vec = env::args().collect(); + + // Parse: vmsilo-tap-open -- [args...] + let sep = args + .iter() + .position(|a| a == "--") + .context("usage: vmsilo-tap-open -- [args...]")?; + if sep < 2 || sep + 1 >= args.len() { + bail!("usage: vmsilo-tap-open -- [args...]"); + } + let tap_name = &args[1]; + let cmd_args = &args[sep + 1..]; + + if tap_name.len() > libc::IFNAMSIZ - 1 { + bail!("TAP name too long: {tap_name}"); + } + + // Open /dev/net/tun + let tun_fd = nix::fcntl::open( + "/dev/net/tun", + OFlag::O_RDWR | OFlag::O_CLOEXEC, + Mode::empty(), + ) + .context("failed to open /dev/net/tun")?; + + let raw_fd = tun_fd.as_raw_fd(); + + // TUNSETIFF to attach to existing TAP + let mut ifr: libc::ifreq = unsafe { std::mem::zeroed() }; + let name_bytes = tap_name.as_bytes(); + unsafe { + std::ptr::copy_nonoverlapping( + name_bytes.as_ptr(), + ifr.ifr_name.as_mut_ptr() as *mut u8, + name_bytes.len(), + ); + ifr.ifr_ifru.ifru_flags = (libc::IFF_TAP | libc::IFF_NO_PI) as i16; + tunsetiff(raw_fd, &ifr).context("TUNSETIFF failed — is the TAP device created?")?; + } + + // Clear CLOEXEC so the fd survives exec + nix::fcntl::fcntl(raw_fd, nix::fcntl::FcntlArg::F_SETFD(nix::fcntl::FdFlag::empty())) + .context("failed to clear CLOEXEC")?; + + // Prevent OwnedFd from closing the fd on drop (we need it to survive exec) + std::mem::forget(tun_fd); + + // Replace {TAP_FD} in command arguments with the actual fd number + let fd_str = raw_fd.to_string(); + let c_cmd = CString::new(cmd_args[0].clone())?; + let c_args: Vec = cmd_args + .iter() + .map(|a| CString::new(a.replace("{TAP_FD}", &fd_str)).unwrap()) + .collect(); + + nix::unistd::execvp(&c_cmd, &c_args).context("exec failed")?; + unreachable!() +} +``` + +- [ ] **Step 4: Verify it builds** + +```bash +cd vmsilo-tools && cargo build 2>&1 | tail -5 +``` + +Expected: successful build, binary at `target/debug/vmsilo-tap-open`. + +- [ ] **Step 5: Commit** + +```bash +git add vmsilo-tools/ +git commit -m "Add vmsilo-tools workspace with tap-open helper + +vmsilo-tap-open opens a TAP device by name via TUNSETIFF and execs +a command with the FD inherited. Used for passing TAP FDs to +cloud-hypervisor via ch-remote add-net." +``` + +--- + +### Task 2: Add vmsilo-tools to flake.nix and module options + +**Files:** +- Modify: `flake.nix:65-110` (add builder, alongside existing Rust crate builders) +- Modify: `modules/options.nix:1049-1053` (add `_internal.vmsilo-tools` package option) + +- [ ] **Step 1: Add build function in flake.nix** + +In `flake.nix`, after the `buildVmsiloWaylandSeccontext` function (around line 110), add: + +```nix + # Build vmsilo-tools workspace + buildVmsiloTools = + system: + let + pkgs = nixpkgs.legacyPackages.${system}; + in + pkgs.rustPlatform.buildRustPackage { + pname = "vmsilo-tools"; + version = "0.1.0"; + src = ./vmsilo-tools; + cargoLock = { + lockFile = ./vmsilo-tools/Cargo.lock; + }; + }; +``` + +- [ ] **Step 2: Wire into module args** + +Find where existing packages are passed to the module (search for `vmsilo-wayland-seccontext` in `flake.nix` module config). Add `vmsilo-tools` alongside it: + +```nix +vmsilo-tools = buildVmsiloTools system; +``` + +- [ ] **Step 3: Add _internal option** + +In `modules/options.nix`, after the `vmsilo-wayland-seccontext` option (line 1053), add: + +```nix + "vmsilo-tools" = lib.mkOption { + type = lib.types.package; + description = "vmsilo-tools package (injected by flake)."; + internal = true; + }; +``` + +- [ ] **Step 4: Verify nix evaluation** + +```bash +nix eval .#nixosModules.default --apply 'x: "ok"' 2>&1 | head -5 +``` + +- [ ] **Step 5: Commit** + +```bash +git add flake.nix modules/options.nix +git commit -m "Wire vmsilo-tools package into flake and module options" +``` + +--- + +### Task 3: Add cloud-hypervisor.disableSandbox option + +**Files:** +- Modify: `modules/options.nix:806-842` (add `disableSandbox` to CH submodule options) +- Modify: `modules/lib/vm-config.nix:514-527` (export the new value) + +- [ ] **Step 1: Add option declaration** + +In `modules/options.nix`, inside the `cloud-hypervisor` submodule options block (after `seccompPolicy` at line 826), add: + +```nix + disableSandbox = lib.mkOption { + type = lib.types.bool; + default = false; + description = "Disable Landlock and systemd hardening for this VM's cloud-hypervisor instance. Seccomp is controlled separately by seccompPolicy."; + }; +``` + +- [ ] **Step 2: Export from vm-config.nix** + +In `modules/lib/vm-config.nix`, add to the `ch` attrset (around line 525): + +```nix + disableSandbox = vm.cloud-hypervisor.disableSandbox; +``` + +- [ ] **Step 3: Format** + +```bash +nix fmt +``` + +- [ ] **Step 4: Commit** + +```bash +git add modules/options.nix modules/lib/vm-config.nix +git commit -m "Add cloud-hypervisor.disableSandbox option" +``` + +--- + +### Task 4: Change TAP and runtime directory ownership + +**Files:** +- Modify: `modules/networking.nix:103` — change `virtualOwner` +- Modify: `modules/services.nix:79-84` — change prep service ownership +- Modify: `modules/services.nix:709` — change tmpfiles.rules ownership + +- [ ] **Step 1: Change TAP virtualOwner** + +In `modules/networking.nix:103`, change: + +```nix + virtualOwner = cfg.user; +``` + +to: + +```nix + virtualOwner = "root"; +``` + +- [ ] **Step 2: Change prep service directory ownership** + +In `modules/services.nix:79-84`, change the first `install` command to make only subdirectories user-owned, not the top-level dir: + +```nix + ExecStart = pkgs.writeShellScript "vmsilo-prep-${vm.name}" '' + ${pkgs.coreutils}/bin/install -d -m 0755 \ + /run/vmsilo/${vm.name} + ${pkgs.coreutils}/bin/install -d -m 0755 -o ${toString userUid} -g ${toString userGid} \ + /run/vmsilo/${vm.name}/gpu \ + /run/vmsilo/${vm.name}/gpu/shader-cache \ + /run/vmsilo/${vm.name}/sound + ${pkgs.coreutils}/bin/install -d -m 0755 \ + /run/vmsilo/${vm.name}/virtiofs + ''; +``` + +- [ ] **Step 3: Change tmpfiles.rules ownership** + +In `modules/services.nix:709`, change the top-level runtime dir from user-owned to root-owned: + +```nix + "d /run/vmsilo/${vm.name} 0755 root root -" +``` + +Lines 710-711 (gpu, sound) keep user ownership. + +- [ ] **Step 4: Format** + +```bash +nix fmt +``` + +- [ ] **Step 5: Commit** + +```bash +git add modules/networking.nix modules/services.nix +git commit -m "Change TAP and runtime directory ownership to root + +TAP interfaces: virtualOwner changed from cfg.user to root. Only the +hypervisor (running as root) opens TAPs, nothing needs user ownership. + +Runtime dir /run/vmsilo/{name}/: changed to root-owned so the +sandboxed VM service can create sockets without CAP_DAC_OVERRIDE. +Subdirectories (gpu/, sound/) remain user-owned for device backends." +``` + +--- + +### Task 5: Remove network entries from CH JSON config + +**Files:** +- Modify: `modules/lib/vm-config.nix:389-392` — keep `chNetworkEntries` computed but don't include in JSON +- Modify: `modules/lib/vm-config.nix:440-448` — remove `net` from `chBaseConfig` + +Network interfaces will be added via `ch-remote add-net` with FD passing instead of being in the JSON config. + +- [ ] **Step 1: Remove net from chBaseConfig** + +In `modules/lib/vm-config.nix`, find the line (around line 441): + +```nix + // lib.optionalAttrs (chNetworkEntries != [ ]) { net = chNetworkEntries; } +``` + +Delete this line entirely. + +- [ ] **Step 2: Export networkEntries for use by the ExecStartPost script** + +The `chNetworkEntries` and `networkEntries` are already exported in the return attrset. Verify `networkEntries` is still in the `inherit` block (around line 486). No changes needed — the orchestration script will use these entries to call `ch-remote add-net`. + +- [ ] **Step 3: Format and verify** + +```bash +nix fmt +``` + +- [ ] **Step 4: Commit** + +```bash +git add modules/lib/vm-config.nix +git commit -m "Remove network entries from CH JSON config + +Network interfaces will be added via ch-remote add-net with TAP FD +passing from ExecStartPost, enabling PrivateNetwork=true." +``` + +--- + +### Task 6: Rewrite CH launch flow — ExecStartPost script + +**Files:** +- Modify: `modules/scripts.nix:117-207` — replace `mkCloudHypervisorVmScript` with new functions + +Replace the monolithic shell wrapper with: +1. `mkChExecStartArgs` — returns the CH binary + args for direct ExecStart +2. `mkChExecStartPostScript` — privileged orchestration (create, add-net, boot, chown) + +- [ ] **Step 1: Replace mkCloudHypervisorVmScript** + +In `modules/scripts.nix`, replace the `mkCloudHypervisorVmScript` function (lines 117-207) with: + +```nix + # Generate cloud-hypervisor ExecStart script (thin wrapper for correct arg handling) + mkChExecStartScript = + vm: + let + c = mkVmConfig vm; + landlock = lib.optionalString (!c.ch.disableSandbox) "--landlock"; + in + pkgs.writeShellScript "vmsilo-ch-${vm.name}" '' + exec ${c.ch.bin} \ + --api-socket /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket \ + --seccomp ${c.ch.seccompArg} \ + ${landlock} \ + ${lib.escapeShellArgs c.ch.effectiveExtraArgs} \ + ${lib.concatStringsSep " " c.ch.verbosityArgs} + ''; + + # Generate cloud-hypervisor ExecStartPost script (privileged orchestration) + mkChExecStartPostScript = + vm: + let + c = mkVmConfig vm; + apiSocket = "/run/vmsilo/${vm.name}/cloud-hypervisor-control.socket"; + chRemote = "${c.ch.remote} --api-socket ${apiSocket}"; + tapOpen = "${cfg._internal."vmsilo-tools"}/bin/vmsilo-tap-open"; + + # Build add-net commands for each TAP interface + # {TAP_FD} is replaced by vmsilo-tap-open with the actual fd number + addNetCommands = lib.concatMapStringsSep "\n" (ne: '' + ${tapOpen} ${ne.tapName} -- \ + ${c.ch.remote} --api-socket ${apiSocket} \ + add-net "fd=[{TAP_FD}],mac=${ne.mac}" + '') c.networkEntries; + in + pkgs.writeShellScript "vmsilo-post-${vm.name}" '' + set -e + + # Wait for API socket to appear (up to 30s) + ELAPSED=0 + while [ $ELAPSED -lt 60 ] && [ ! -S ${apiSocket} ]; do + sleep 0.5 + ELAPSED=$((ELAPSED + 1)) + done + if [ ! -S ${apiSocket} ]; then + echo "Timeout waiting for cloud-hypervisor API socket" >&2 + exit 1 + fi + + # Wait for API server to be ready + while [ $ELAPSED -lt 60 ]; do + if ${chRemote} ping 2>/dev/null; then + break + fi + sleep 0.5 + ELAPSED=$((ELAPSED + 1)) + done + if ! ${chRemote} ping 2>/dev/null; then + echo "Timeout waiting for cloud-hypervisor API" >&2 + exit 1 + fi + + # Wait for vhost-user backend sockets + ${c.socketWaitScript ""} + + # Create VM + ${chRemote} create -- ${c.ch.configFile} + + # Add TAP network interfaces via FD passing + ${addNetCommands} + + # Boot VM + ${chRemote} boot + + # Discover serial PTY and symlink + CONSOLE_PTY=$(${chRemote} info | ${pkgs.jq}/bin/jq -r '.config.serial.file') + if [ -z "$CONSOLE_PTY" ] || [ "$CONSOLE_PTY" = "null" ]; then + echo "Failed to discover serial PTY" >&2 + exit 1 + fi + ln -sf "$CONSOLE_PTY" /run/vmsilo/${vm.name}/console + chown ${toString cfg._internal.userUid} /run/vmsilo/${vm.name}/console + + # Wait for vsock socket and chown + ELAPSED=0 + while [ ! -S /run/vmsilo/${vm.name}/vsock.socket ] && [ "$ELAPSED" -lt 120 ]; do + sleep 0.5 + ELAPSED=$((ELAPSED + 1)) + done + if [ ! -S /run/vmsilo/${vm.name}/vsock.socket ]; then + echo "Timeout waiting for vsock socket" >&2 + exit 1 + fi + chown ${toString cfg._internal.userUid} /run/vmsilo/${vm.name}/vsock.socket + ''; +``` + +- [ ] **Step 2: Update mkVmScript dispatcher** + +In `modules/scripts.nix`, the `mkVmScript` function (around line 210) still selects the crosvm or CH script. For CH, it should now return the ExecStart args (not a script). But since `cfg._internal.vmScripts` is used by services.nix, we need to handle this differently. + +Keep `mkVmScript` returning a valid (dummy) script for CH VMs so the code doesn't break between Task 6 and Task 7 commits: + +```nix + mkVmScript = + vm: + if vm.hypervisor == "crosvm" then + mkCrosvmVmScript vm + else + # Placeholder — CH VMs use ExecStart= directly (set in services.nix Task 7) + pkgs.writeShellScript "vmsilo-start-${vm.name}" '' + echo "ERROR: CH VM ${vm.name} should use direct ExecStart, not this script" >&2 + exit 1 + ''; +``` + +Also add `mkChExecStartArgs` to the exports consumed by services.nix. Check how scripts.nix exports are consumed — look for `cfg._internal.vmScripts`. + +- [ ] **Step 3: Update script exports** + +At the bottom of `modules/scripts.nix` where `_internal` values are set, add exports for the new CH-specific values. Find the block that sets `vmScripts` and add: + +```nix +chExecStartScripts = lib.listToAttrs ( + map (vm: lib.nameValuePair vm.name (mkChExecStartScript vm)) + (lib.filter (vm: vm.hypervisor == "cloud-hypervisor") vms) +); +chPostScripts = lib.listToAttrs ( + map (vm: lib.nameValuePair vm.name (mkChExecStartPostScript vm)) + (lib.filter (vm: vm.hypervisor == "cloud-hypervisor") vms) +); +``` + +- [ ] **Step 4: Format** + +```bash +nix fmt +``` + +- [ ] **Step 5: Commit** + +```bash +git add modules/scripts.nix +git commit -m "Rewrite CH launch flow: split into ExecStart + ExecStartPost + +ExecStart is the raw CH binary (no shell wrapper). +ExecStartPost is a privileged script that: +- Waits for API socket and vhost-user backends +- Creates VM via ch-remote create +- Adds TAP interfaces via vmsilo-tap-open + ch-remote add-net +- Boots VM, discovers PTY, chowns sockets" +``` + +--- + +### Task 7: Rewrite mkVmServices for CH sandboxing + +**Files:** +- Modify: `modules/services.nix:92-141` — rewrite `mkVmServices` to apply confinement and sandbox for CH VMs + +This is the core task. The VM service for CH VMs gets confinement, namespacing, and all hardening directives. crosvm VMs are unchanged. + +- [ ] **Step 1: Rewrite mkVmServices** + +In `modules/services.nix`, rewrite `mkVmServices` (lines 92-141). The key change is splitting behavior by hypervisor type. For CH VMs, ExecStart is the raw binary, ExecStartPre/ExecStartPost/ExecStopPost use `+` prefix, and sandbox directives are applied. + +Replace lines 92-141 with: + +```nix + mkVmServices = map ( + vm: + let + c = mkVmConfig vm; + isCh = vm.hypervisor == "cloud-hypervisor"; + ephemeralPath = "/var/lib/vmsilo/${vm.name}-ephemeral.raw"; + + createEphemeral = pkgs.writeShellScript "create-ephemeral-${vm.name}" '' + truncate -s ${vm.rootOverlay.size} ${ephemeralPath} + ''; + deleteEphemeral = pkgs.writeShellScript "delete-ephemeral-${vm.name}" '' + rm -f ${ephemeralPath} + ''; + cleanupSocket = pkgs.writeShellScript "cleanup-socket-${vm.name}" '' + rm -f /run/vmsilo/${vm.name}/crosvm-control.socket + rm -f /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket + rm -f /run/vmsilo/${vm.name}/vsock.socket + ${lib.optionalString isCh '' + rm -f /run/vmsilo/${vm.name}/console + ''} + ''; + + # CH-specific: privileged pre/post scripts with + prefix + chStartPreScripts = + lib.optionals (vm.rootOverlay.type == "raw") [ "+${createEphemeral}" ] + ++ [ "+${pkgs.writeShellScript "ch-pre-${vm.name}" '' + ${c.iommuValidationScript} + rm -f /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket + rm -f /run/vmsilo/${vm.name}/vsock.socket + ''}" ]; + + chStopPostScripts = [ + "+${cleanupSocket}" + ] ++ lib.optionals (vm.rootOverlay.type == "raw") [ "+${deleteEphemeral}" ]; + + # crosvm: existing behavior (unchanged) + crosvmStartPreScripts = lib.optionals (vm.rootOverlay.type == "raw") [ "${createEphemeral}" ]; + crosvmStopPostScripts = [ + "${cleanupSocket}" + ] ++ lib.optionals (vm.rootOverlay.type == "raw") [ "${deleteEphemeral}" ]; + + # CH sandbox directives + chSandboxConfig = lib.optionalAttrs (isCh && !c.ch.disableSandbox) { + PrivateDevices = false; + PrivateUsers = "identity"; + CapabilityBoundingSet = ""; + NoNewPrivileges = true; + PrivateNetwork = true; + PrivatePIDs = true; + PrivateIPC = true; + LimitMEMLOCK = "infinity"; + ProtectKernelLogs = true; + RestrictNamespaces = true; + LockPersonality = true; + SystemCallArchitectures = "native"; + DevicePolicy = "closed"; + DeviceAllow = [ + "/dev/kvm rw" + "char-pts rw" + ] ++ lib.optionals (c.vmPciDevicePaths != [ ]) [ + "/dev/vfio/vfio rw" + "/dev/vfio/* rw" + ]; + BindReadOnlyPaths = [ + c.kernelPath + c.initramfsPath + ] ++ lib.optional c.rootDiskReadonly c.rootDiskPath; + BindPaths = [ + "/run/vmsilo/${vm.name}" + ] + ++ lib.optional (!c.rootDiskReadonly) c.rootDiskPath + ++ lib.optional (vm.rootOverlay.type == "raw") c.ephemeralDiskPath + ++ lib.optionals (c.vmPciDevicePaths != [ ]) ( + [ "/dev/vfio" ] ++ map (p: "/sys/bus/pci/devices/${p}") c.vmPciDevicePaths + ) + ++ lib.optional vm.cloud-hypervisor.hugepages "/dev/hugepages"; + }; + in + lib.nameValuePair "vmsilo-${vm.name}-vm" { + description = "vmsilo VM: ${vm.name}"; + wantedBy = lib.optionals (vm.autoStart && !(vm.gpu.wayland || vm.gpu.opengl || vm.gpu.vulkan)) [ + "multi-user.target" + ]; + wants = + map (depName: "vmsilo-${depName}-vm.service") vm.dependsOn + ++ lib.optional ( + vm.network.netvm != null && vm.network.netvm != "host" + ) "vmsilo-${vm.network.netvm}-vm.service" + ++ map (brName: "${brName}-netdev.service") (vmBridges vm); + after = [ "network.target" ] ++ map (brName: "${brName}-netdev.service") (vmBridges vm); + + # NixOS confinement for CH VMs + confinement = lib.mkIf (isCh && !c.ch.disableSandbox) { + enable = true; + binSh = null; + }; + + serviceConfig = { + Type = "simple"; + ExecStart = + if isCh then + "${cfg._internal.chExecStartScripts.${vm.name}}" + else + "${cfg._internal.vmScripts.${vm.name}}"; + ExecStopPost = if isCh then chStopPostScripts else crosvmStopPostScripts; + Environment = [ "RUST_BACKTRACE=full" ]; + } + // lib.optionalAttrs isCh { + ExecStartPre = chStartPreScripts; + ExecStartPost = [ "+${cfg._internal.chPostScripts.${vm.name}}" ]; + } + // lib.optionalAttrs (!isCh && crosvmStartPreScripts != [ ]) { + ExecStartPre = crosvmStartPreScripts; + } + // chSandboxConfig; + } + ) allVms; +``` + +- [ ] **Step 2: Add _internal option declarations for new script exports** + +In `modules/options.nix`, after the `vmScripts` option (around line 1059), add: + +```nix + chExecStartScripts = lib.mkOption { + type = lib.types.attrsOf lib.types.path; + default = {}; + description = "CH ExecStart scripts (name -> script path)."; + internal = true; + }; + chPostScripts = lib.mkOption { + type = lib.types.attrsOf lib.types.path; + default = {}; + description = "CH ExecStartPost scripts (name -> script path)."; + internal = true; + }; +``` + +- [ ] **Step 3: Format** + +```bash +nix fmt +``` + +- [ ] **Step 4: Verify nix build evaluates** + +```bash +nix build .# --dry-run 2>&1 | tail -10 +``` + +- [ ] **Step 5: Commit** + +```bash +git add modules/services.nix modules/options.nix +git commit -m "Apply confinement and namespace sandboxing to CH VM services + +CH VMs get: confinement (chroot with only CH closure), PrivateUsers=identity, +PrivateNetwork, PrivatePIDs, PrivateIPC, empty CapabilityBoundingSet, +DevicePolicy=closed, landlock, and all Protect* directives. + +crosvm VMs are unchanged. Gated by cloud-hypervisor.disableSandbox option." +``` + +--- + +### Task 8: Update CLAUDE.md and README.md + +**Files:** +- Modify: `CLAUDE.md` — document `vmsilo-tools/` crate, CH sandboxing pattern +- Modify: `README.md` — document `cloud-hypervisor.disableSandbox` option + +- [ ] **Step 1: Update CLAUDE.md** + +Add `vmsilo-tools/` to the Rust crates section: + +```markdown +- `vmsilo-tools/` — Rust workspace for small utilities. Contains `tap-open` (opens TAP device by name, execs command with inherited FD) +``` + +Add to Key Patterns: + +```markdown +- **CH sandboxing**: CH VMs use NixOS confinement (chroot), PrivateUsers=identity, PrivateNetwork, PrivatePIDs, PrivateIPC, empty CapabilityBoundingSet. TAP FDs passed via `vmsilo-tap-open` + `ch-remote add-net`. All privileged operations in ExecStartPre=+/ExecStartPost=+/ExecStopPost=+. +``` + +- [ ] **Step 2: Update README.md** + +Add `cloud-hypervisor.disableSandbox` to the per-VM cloud-hypervisor options table. + +- [ ] **Step 3: Commit** + +```bash +git add CLAUDE.md README.md +git commit -m "Document CH sandboxing and vmsilo-tools in CLAUDE.md and README.md" +``` + +--- + +### Task 9: Build and smoke test + +- [ ] **Step 1: Full nix build** + +```bash +git add -A # nix build needs files in git index +nix build .# +``` + +- [ ] **Step 2: Inspect generated service** + +On a test system with the module applied, check the generated systemd unit: + +```bash +systemctl cat vmsilo--vm.service +``` + +Verify: confinement (RootDirectory), PrivateUsers=identity, PrivateNetwork=true, PrivatePIDs=true, CapabilityBoundingSet=, DevicePolicy=closed, BindPaths, ExecStartPost with + prefix. + +- [ ] **Step 3: Test VM start/stop** + +```bash +vm-start +vm-run echo hello +vm-stop +``` + +- [ ] **Step 4: Test with disableSandbox=true** + +Temporarily set `cloud-hypervisor.disableSandbox = true` for a VM and verify it falls back to unsandboxed behavior. + +- [ ] **Step 5: Commit any fixes** diff --git a/docs/superpowers/plans/2026-03-23-remove-python-sommelier.md b/docs/superpowers/plans/2026-03-23-remove-python-sommelier.md new file mode 100644 index 0000000..a47cf7b --- /dev/null +++ b/docs/superpowers/plans/2026-03-23-remove-python-sommelier.md @@ -0,0 +1,715 @@ +# Remove Python & Sommelier, Add Rust Tools + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Remove Python runtime dependency and sommelier wayland proxy; replace with two new Rust CLI tools in the vmsilo-tools workspace. + +**Architecture:** Two new Rust crates (`desktop-file` and `vsock-proxy`) are added to the existing `vmsilo-tools/` Cargo workspace. Nix modules are updated to call the new binaries instead of Python scripts. Sommelier is removed entirely, simplifying wayland proxy code to a single codepath. + +**Tech Stack:** Rust (edition 2021), Nix/NixOS modules, shell scripts + +**Important context:** +- `nix build` uses the git index — all new files must be `git add`'d before building. +- The vmsilo-tools workspace builds as a single package: `cfg._internal."vmsilo-tools"`. Binaries are at `${cfg._internal."vmsilo-tools"}/bin/`. +- There are no automated tests for the Nix modules. Rust crates should have unit tests. +- Run `nix fmt` before committing. +- Don't vary guest rootfs contents per-VM; gate features on the host side only. + +--- + +### Task 1: Add `desktop-file` crate to vmsilo-tools workspace + +**Files:** +- Create: `vmsilo-tools/desktop-file/Cargo.toml` +- Create: `vmsilo-tools/desktop-file/src/main.rs` +- Create: `vmsilo-tools/desktop-file/src/parser.rs` +- Modify: `vmsilo-tools/Cargo.toml` (add workspace member) + +This is a generic .desktop file manipulation tool. The freedesktop .desktop format is INI-like: `[Group]` headers, `Key=Value` lines, comments (`#`), blank lines. Locale keys like `Name[fr]` are distinct keys. + +**Subcommands:** +- `get-key ` — print value, exit 0 if found, exit 1 if missing +- `set-key ` — output full file with key set (create key/group if missing) +- `list-groups ` — one group name per line +- `list-entries ` — one key per line for group +- `filter-groups [...]` — output file keeping only listed groups +- `filter-keys [...]` — output file keeping only listed keys in group; other groups pass through unchanged + +All commands accept `-` as file argument to read from stdin. Output to stdout. + +- [ ] **Step 1: Create `desktop-file/Cargo.toml`** + +```toml +[package] +name = "vmsilo-desktop-file" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1" +``` + +- [ ] **Step 2: Add workspace member** + +In `vmsilo-tools/Cargo.toml`, change `members = ["tap-open"]` to `members = ["tap-open", "desktop-file"]`. + +- [ ] **Step 3: Write the parser module (`src/parser.rs`)** + +Line-oriented parser that classifies each line as: blank/comment, group header `[Name]`, or key-value `Key=Value`. Preserves original text for passthrough. Data model: + +```rust +/// A single line from a .desktop file, classified but preserving original text. +pub enum Line { + /// Blank line or comment + Other(String), + /// Group header: [GroupName] + Group { name: String, raw: String }, + /// Key=Value entry + Entry { key: String, value: String, raw: String }, +} + +/// Parsed .desktop file preserving order and formatting. +pub struct DesktopFile { + pub lines: Vec, +} +``` + +Implement: +- `DesktopFile::parse(input: &str) -> DesktopFile` — parse lines +- `DesktopFile::get_key(group: &str, key: &str) -> Option<&str>` — find key in group +- `DesktopFile::set_key(group: &str, key: &str, value: &str)` — set key (create if needed, create group if needed) +- `DesktopFile::list_groups() -> Vec<&str>` — unique group names in order +- `DesktopFile::list_entries(group: &str) -> Vec<&str>` — key names in group +- `DesktopFile::filter_groups(keep: &[&str])` — remove groups not in keep list +- `DesktopFile::filter_keys(group: &str, keep: &[&str])` — remove keys not in keep list for given group, other groups unchanged +- `DesktopFile::write(&self) -> String` — reconstruct file text + +Key parsing rules: +- Group header: line starts with `[`, ends with `]` (after trim) +- Key-value: split on first `=` +- Everything else is Other (comments, blank lines) +- Preserve original line text in `raw` field for exact passthrough when not modified + +- [ ] **Step 4: Write parser unit tests** + +Test at minimum: +- Parse a file with multiple groups, keys, comments, blank lines +- `get_key` returns correct value, returns `None` for missing key/group +- `set_key` on existing key changes value +- `set_key` on new key in existing group appends it +- `set_key` on new key in new group creates group at end +- `list_groups` returns groups in order +- `list_entries` returns keys in order +- `filter_groups` keeps only listed groups plus their keys +- `filter_keys` keeps only listed keys in target group, other groups unchanged +- Round-trip: parse then write preserves original text +- Locale keys (`Name[fr]`) treated as distinct from `Name` + +- [ ] **Step 5: Run tests** + +Run: `cd /home/david/git/vmsilo/vmsilo-tools && cargo test -p vmsilo-desktop-file` +Expected: All tests pass. + +- [ ] **Step 6: Write `src/main.rs`** + +Parse CLI args manually (no clap dependency — keep it minimal like tap-open). Read file (or stdin if `-`), dispatch to parser methods, write output to stdout. + +``` +Usage: + desktop-file get-key + desktop-file set-key + desktop-file list-groups + desktop-file list-entries + desktop-file filter-groups [...] + desktop-file filter-keys [...] +``` + +Exit codes: 0 = success, 1 = key/group not found (get-key), 2 = usage error. + +- [ ] **Step 7: Write main integration tests** + +Test stdin piping: write a test that calls the binary as a subprocess, pipes input via stdin with `-` arg, verifies stdout output. Test the pipeline pattern: `set-key | set-key | filter-keys`. + +- [ ] **Step 8: Run all tests** + +Run: `cd /home/david/git/vmsilo/vmsilo-tools && cargo test -p vmsilo-desktop-file` +Expected: All tests pass. + +- [ ] **Step 9: Commit** + +```bash +cd /home/david/git/vmsilo +git add vmsilo-tools/desktop-file/ vmsilo-tools/Cargo.toml +# Cargo.lock will be updated — add it too +git add vmsilo-tools/Cargo.lock +git commit -m "Add desktop-file tool to vmsilo-tools workspace" +``` + +--- + +### Task 2: Add `vsock-proxy` crate to vmsilo-tools workspace + +**Files:** +- Create: `vmsilo-tools/vsock-proxy/Cargo.toml` +- Create: `vmsilo-tools/vsock-proxy/src/main.rs` +- Modify: `vmsilo-tools/Cargo.toml` (add workspace member) + +Synchronous vsock CONNECT handshake + stdin/stdout proxy for cloud-hypervisor hybrid vsock. + +**Usage:** `vmsilo-vsock-proxy ` + +**Protocol:** +1. Connect to Unix socket at `` +2. Send `CONNECT \n` +3. Read response byte-by-byte until `\n` +4. Validate response starts with `OK ` +5. Write any data received after the OK line to stdout +6. Bidirectional proxy: stdin->socket (spawned thread), socket->stdout (main thread) +7. On stdin EOF: `shutdown(Write)` on socket (half-close) +8. Continue reading socket->stdout until EOF +9. Exit 0 on success, non-zero on handshake failure or connection error + +**Reference implementation:** The existing Rust CONNECT handshake in `vmsilo-dbus-proxy/src/host/vsock.rs:20-60` does the same protocol (async/tokio). This tool is the synchronous, standalone equivalent. + +- [ ] **Step 1: Create `vsock-proxy/Cargo.toml`** + +```toml +[package] +name = "vmsilo-vsock-proxy" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1" +``` + +- [ ] **Step 2: Add workspace member** + +In `vmsilo-tools/Cargo.toml`, add `"vsock-proxy"` to members list (should now be `["tap-open", "desktop-file", "vsock-proxy"]`). + +- [ ] **Step 3: Write `src/main.rs`** + +Implementation outline: +```rust +fn main() -> Result<()> { + let args: Vec = env::args().collect(); + // Validate: exactly 3 args (binary, socket_path, port) + + let socket_path = &args[1]; + let port: u32 = args[2].parse()?; + + // Connect to Unix socket + let stream = UnixStream::connect(socket_path)?; + + // Send CONNECT handshake + // write_all(format!("CONNECT {port}\n").as_bytes()) + + // Read response byte-by-byte until \n + // Validate starts with "OK " + // If any trailing data after \n, write to stdout + + // Clone stream for the stdin->socket thread + let writer = stream.try_clone()?; + + // Spawn stdin->socket thread + // On stdin EOF: writer.shutdown(Shutdown::Write) + + // Main thread: socket->stdout loop + // Read until EOF, write to stdout + + // Wait for stdin thread (optional, just exit) + Ok(()) +} +``` + +Use `std::os::unix::net::UnixStream`, `std::net::Shutdown`, `std::thread`, `std::io::{Read, Write, stdin, stdout}`. No async. + +- [ ] **Step 4: Write unit tests for handshake** + +Use `std::os::unix::net::UnixListener` to create a mock server in a tempdir. Test: +- Successful handshake (send CONNECT, get OK, verify stream works) +- Failed handshake (server responds with error) +- Trailing data after OK line is forwarded + +- [ ] **Step 5: Run tests** + +Run: `cd /home/david/git/vmsilo/vmsilo-tools && cargo test -p vmsilo-vsock-proxy` +Expected: All tests pass. + +- [ ] **Step 6: Commit** + +```bash +cd /home/david/git/vmsilo +git add vmsilo-tools/vsock-proxy/ vmsilo-tools/Cargo.toml vmsilo-tools/Cargo.lock +git commit -m "Add vsock-proxy tool to vmsilo-tools workspace" +``` + +--- + +### Task 3: Rewrite `desktop.nix` to use `desktop-file` tool, remove Python + +**Files:** +- Modify: `modules/desktop.nix:97-175` (remove processDesktopScript, pythonWithPyxdg; rewrite shell function) + +The shell function `process_desktop` in `mkDesktopEntries` (line 224) currently calls `python3 processDesktopScript`. Replace with calls to `vmsilo-desktop-file`. + +- [ ] **Step 1: Add desktopFile variable** + +At the top of the `let` block in `desktop.nix` (after `cfg = config.programs.vmsilo;`), add: + +```nix +desktopFile = "${cfg._internal."vmsilo-tools"}/bin/vmsilo-desktop-file"; +``` + +- [ ] **Step 2: Delete the Python script and interpreter** + +Remove lines 96-175 from `desktop.nix`: +- The `processDesktopScript` definition (lines 96-172) +- The `pythonWithPyxdg` definition (lines 174-175) + +- [ ] **Step 3: Rewrite the `process_desktop` shell function** + +Replace the body of `process_desktop` (currently at lines 224-238) with: + +```bash +process_desktop() { + local desktop="$1" + local pkg="$2" + + # Get output filename + local basename + basename=$(basename "$desktop") + local outfile="$out/share/applications/vmsilo.${vm.name}.$basename" + + # Check filters — skip NoDisplay=true and non-Application types + local nodisplay dtype + nodisplay=$(${desktopFile} get-key "$desktop" "Desktop Entry" NoDisplay) || nodisplay="" + if [ "$nodisplay" = "true" ]; then return; fi + dtype=$(${desktopFile} get-key "$desktop" "Desktop Entry" Type) || dtype="" + if [ -n "$dtype" ] && [ "$dtype" != "Application" ]; then return; fi + + # Get original values for transformation + local icon name exec_val + icon=$(${desktopFile} get-key "$desktop" "Desktop Entry" Icon) || icon="" + name=$(${desktopFile} get-key "$desktop" "Desktop Entry" Name) || name="" + exec_val=$(${desktopFile} get-key "$desktop" "Desktop Entry" Exec) || exec_val="" + + # Build transformation pipeline + { + if [ -n "$name" ]; then + ${desktopFile} set-key "$desktop" "Desktop Entry" Name "${vm.name}: $name" + else + cat "$desktop" + fi + } \ + | ${desktopFile} set-key - "Desktop Entry" Exec "vm-run ${vm.name} $exec_val" \ + | ${desktopFile} set-key - "Desktop Entry" Icon "vmsilo.${vm.name}.''${icon:-unknown}" \ + | ${desktopFile} set-key - "Desktop Entry" Categories "X-Vmsilo-${vm.name};" \ + | ${desktopFile} set-key - "Desktop Entry" X-VmSilo-Color "${vm.color}" \ + | ${desktopFile} filter-keys - "Desktop Entry" Type Version Name GenericName Comment Icon Exec Terminal Categories Keywords NoDisplay OnlyShowIn NotShowIn X-VmSilo-Color \ + | ${desktopFile} filter-groups - "Desktop Entry" \ + > "$outfile" + + # ... icon copying logic continues unchanged below ... +``` + +The rest of the function (icon copying, lines 240-277) stays as-is — it uses `$icon` which is still set. + +- [ ] **Step 4: Verify no Python references remain in desktop.nix** + +Search for `python`, `pyxdg`, `processDesktop` in the file. Should be zero matches. + +- [ ] **Step 5: Run `nix fmt`** + +Run: `cd /home/david/git/vmsilo && nix fmt` + +- [ ] **Step 6: Commit** + +```bash +git add modules/desktop.nix +git commit -m "Replace Python processDesktopScript with desktop-file tool" +``` + +--- + +### Task 4: Rewrite `scripts.nix` to use `vsock-proxy`, remove Python + +**Files:** +- Modify: `modules/scripts.nix:229-312` (rewrite mkChVsockConnectScript, remove pyProxy) +- Modify: `modules/scripts.nix:375-378` (update stale comment about Python) + +- [ ] **Step 1: Rewrite `mkChVsockConnectScript`** + +Replace the entire function (lines 229-312) with: + +```nix +mkChVsockConnectScript = + vmName: port: + let + vsockProxy = "${cfg._internal."vmsilo-tools"}/bin/vmsilo-vsock-proxy"; + in + pkgs.writeShellScript "vsock-connect-${vmName}-${toString port}" '' + VSOCK_SOCKET="/run/vmsilo/${vmName}/vsock.socket" + PORT=${toString port} + TIMEOUT=30 + ELAPSED=0 + + # Wait for vsock socket to appear + while [ $ELAPSED -lt $TIMEOUT ] && [ ! -S "$VSOCK_SOCKET" ]; do + sleep 0.5 + ELAPSED=$((ELAPSED + 1)) + done + [ -S "$VSOCK_SOCKET" ] || { echo "Timeout: vsock socket not found" >&2; exit 1; } + + # Retry until vsock port is ready (guest command listener may not be up yet). + ELAPSED=0 + while [ $ELAPSED -lt $TIMEOUT ]; do + ${vsockProxy} "$VSOCK_SOCKET" "$PORT" && exit 0 + sleep 0.5 + ELAPSED=$((ELAPSED + 1)) + done + + echo "Timeout waiting for VM ${vmName} vsock:${toString port}" >&2 + exit 1 + ''; +``` + +This removes `pyProxy` and the `pkgs.python3` dependency entirely. + +- [ ] **Step 2: Update the stale comment in vmRunScript** + +At line 376-377, change: +``` +# -t5: wait up to 5s for response after stdin EOF (default 0.5s is too short +# for cloud-hypervisor proxy startup: Python interpreter + CONNECT handshake) +``` +to: +``` +# -t5: wait up to 5s for response after stdin EOF (default 0.5s is too short +# for cloud-hypervisor proxy startup and CONNECT handshake) +``` + +- [ ] **Step 3: Verify no Python references remain in scripts.nix** + +Search for `python`, `pyProxy` in the file. Should be zero matches. + +- [ ] **Step 4: Run `nix fmt`** + +Run: `cd /home/david/git/vmsilo && nix fmt` + +- [ ] **Step 5: Commit** + +```bash +git add modules/scripts.nix +git commit -m "Replace Python vsock-proxy with vmsilo-vsock-proxy tool" +``` + +--- + +### Task 5: Replace socat CONNECT probe with vsock-proxy in USB service + +**Files:** +- Modify: `modules/services.nix:668-674` (replace socat probe with vsock-proxy) + +- [ ] **Step 1: Replace the cloud-hypervisor CONNECT probe** + +In `modules/services.nix`, the USB service start script (inside `mkUsbServices`) has a block at lines 668-674 that checks if the guest vsock port 5002 is reachable. The cloud-hypervisor branch currently uses socat: + +```bash +if echo "CONNECT 5002" | socat - UNIX-CONNECT:/run/vmsilo/${vm.name}/vsock.socket 2>/dev/null | head -1 | grep -q '^OK '; then break; fi +``` + +Replace the entire `${if vm.hypervisor == "crosvm" then ... else ...}` block (lines 669-674) with: + +```nix +${ + if vm.hypervisor == "crosvm" then + "if ${pkgs.socat}/bin/socat -u OPEN:/dev/null VSOCK-CONNECT:${toString vm.id}:5002 2>/dev/null; then break; fi" + else + "if ${cfg._internal."vmsilo-tools"}/bin/vmsilo-vsock-proxy /run/vmsilo/${vm.name}/vsock.socket 5002 /dev/null; then break; fi" +} +``` + +The crosvm path stays as socat (kernel vsock, no CONNECT protocol). The cloud-hypervisor path uses `vmsilo-vsock-proxy` with stdin from `/dev/null` — handshake succeeds, stdin EOF triggers `shutdown(Write)`, socket closes, exit 0. + +- [ ] **Step 2: Update the stale comment** + +At approximately line 666 in `modules/services.nix`, update the comment: +``` +# Use socat probe for crosvm (kernel vsock), socat via unix socket for cloud-hypervisor +``` +to: +``` +# Use socat probe for crosvm (kernel vsock), vsock-proxy for cloud-hypervisor +``` + +- [ ] **Step 3: Run `nix fmt`** + +Run: `cd /home/david/git/vmsilo && nix fmt` + +- [ ] **Step 4: Commit** + +```bash +git add modules/services.nix +git commit -m "Use vmsilo-vsock-proxy for CH vsock probe in USB service" +``` + +--- + +### Task 6: Remove sommelier entirely + +**Files:** +- Delete: `packages/sommelier.nix` +- Modify: `flake.nix:170` (remove sommelier package build) +- Modify: `flake.nix:235` (remove sommelier injection) +- Modify: `modules/options.nix:230-239` (remove `waylandProxy.type` option) +- Modify: `modules/options.nix:250` (update logLevel description) +- Modify: `modules/options.nix:1018-1022` (remove internal sommelier option) +- Modify: `modules/lib/vm-config.nix:94,98` (remove sommelier and waylandProxy args) +- Modify: `rootfs-nixos/default.nix:12,15,39,41-42` (remove sommelier/waylandProxy params, update comment) +- Modify: `rootfs-nixos/guest/wayland.nix` (remove sommelier codepath, simplify) + +**Important:** All sommelier removal must happen in a single commit to avoid broken intermediate states (options.nix declares the option, vm-config.nix references it — removing one without the other breaks evaluation). + +- [ ] **Step 1: Delete `packages/sommelier.nix`** + +```bash +rm packages/sommelier.nix +``` + +- [ ] **Step 2: Remove sommelier from `flake.nix`** + +Remove line 170: +```nix + sommelier = nixpkgs.legacyPackages.${system}.callPackage ./packages/sommelier.nix { }; +``` + +Remove line 235 (the `sommelier =` line in the `_internal` block): +```nix + sommelier = pkgs.callPackage ./packages/sommelier.nix { }; +``` + +- [ ] **Step 3: Remove `waylandProxy.type` option from `options.nix`** + +Remove the entire `waylandProxy.type` option block at lines 231-239 (the `type = lib.mkOption { type = lib.types.enum [...]; ... }` block, but keep the `waylandProxy = {` wrapper and `logLevel` option). + +The structure should remain: +```nix +waylandProxy = { + logLevel = lib.mkOption { ... }; +}; +``` + +- [ ] **Step 4: Update `waylandProxy.logLevel` description** + +At line 250, change: +```nix +description = "Log level for wayland-proxy-virtwl. Ignored for sommelier."; +``` +to: +```nix +description = "Log level for wayland-proxy-virtwl."; +``` + +- [ ] **Step 5: Remove internal sommelier option** + +Remove lines 1018-1022: +```nix + sommelier = lib.mkOption { + type = lib.types.package; + description = "sommelier package (injected by flake)."; + internal = true; + }; +``` + +- [ ] **Step 6: Update `modules/lib/vm-config.nix`** + +At line 94, change: +```nix +inherit (cfg._internal) wayland-proxy-virtwl sommelier; +``` +to: +```nix +inherit (cfg._internal) wayland-proxy-virtwl; +``` + +Remove line 98 (`waylandProxy = vm.waylandProxy.type;`). Keep line 99 (`waylandProxyLogLevel = vm.waylandProxy.logLevel;`). + +- [ ] **Step 7: Update `rootfs-nixos/default.nix`** + +Remove `sommelier,` from the function args (line 12). + +Remove `waylandProxy ? "wayland-proxy-virtwl",` from the function args (line 15). Keep `waylandProxyLogLevel ? "info",` (line 16). + +Update the comment at line 39 from: +```nix +# Pass wayland proxy packages and selection to the configuration +``` +to: +```nix +# Pass wayland proxy package and config to the configuration +``` + +Remove these `_module.args` injections (lines 41-42): +```nix +_module.args.sommelier = sommelier; +_module.args.waylandProxy = waylandProxy; +``` + +Keep `_module.args.waylandProxyLogLevel = waylandProxyLogLevel;` (line 43). + +- [ ] **Step 8: Simplify `rootfs-nixos/guest/wayland.nix`** + +Update the file header comment (line 3) from: +``` +# - wayland-proxy-virtwl or sommelier (mutually exclusive) +``` +to: +``` +# - wayland-proxy-virtwl proxy service +``` + +Remove `sommelier,` and `waylandProxy,` from the function args (lines 9-10). Keep `waylandProxyLogLevel,`. + +Remove the sommelier-related `let` variables (lines 17-19 — NOT line 16 which is `kernelParamHelper`): +```nix + isSommelier = waylandProxy == "sommelier"; + proxyServiceName = if isSommelier then "sommelier" else "wayland-proxy-virtwl"; + waylandDisplayName = if isSommelier then "wayland-0" else "wayland-1"; +``` + +On the wayland-proxy-virtwl service (line 23), remove the `lib.mkIf (!isSommelier)` guard — the service is now unconditional. + +Delete the entire sommelier service block (lines 63-94). + +In `vmsilo-session-setup` (line 110), change: +```nix +requires = [ "${proxyServiceName}.service" ]; +after = [ "${proxyServiceName}.service" ]; +``` +to: +```nix +requires = [ "wayland-proxy-virtwl.service" ]; +after = [ "wayland-proxy-virtwl.service" ]; +``` + +In the session setup script (line 128), change: +```nix +export WAYLAND_DISPLAY="${waylandDisplayName}" +``` +to: +```nix +export WAYLAND_DISPLAY="wayland-1" +``` + +- [ ] **Step 9: Run `nix fmt`** + +Run: `cd /home/david/git/vmsilo && nix fmt` + +- [ ] **Step 10: Commit** + +```bash +git add -u packages/sommelier.nix flake.nix modules/options.nix modules/lib/vm-config.nix rootfs-nixos/default.nix rootfs-nixos/guest/wayland.nix +git commit -m "Remove sommelier wayland proxy entirely" +``` + +--- + +### Task 7: Update README.md and CLAUDE.md + +**Files:** +- Modify: `README.md:383-389` (simplify wayland proxy section) +- Modify: `README.md:590` (remove sommelier reference) +- Modify: `CLAUDE.md` (update if needed) + +- [ ] **Step 1: Simplify the Wayland Proxy section in README.md** + +Replace lines 383-389: +```markdown +### Wayland Proxy + +```nix +waylandProxy.type = "wayland-proxy-virtwl"; # Default: wayland-proxy-virtwl by Thomas Leonard +waylandProxy.type = "sommelier"; # ChromeOS sommelier (experiment, does not work currently) +waylandProxy.logLevel = "debug"; # Log level for wayland-proxy-virtwl (default: info) +``` +``` + +with: +```markdown +### Wayland Proxy + +```nix +waylandProxy.logLevel = "debug"; # Log level for wayland-proxy-virtwl (default: info) +``` +``` + +- [ ] **Step 2: Update architecture section in README.md** + +At line 590, change: +``` +- Wayland proxy for GPU passthrough (wayland-proxy-virtwl or sommelier) +``` +to: +``` +- Wayland proxy for GPU passthrough (wayland-proxy-virtwl) +``` + +- [ ] **Step 3: Update CLAUDE.md if needed** + +Check for any references to Python, sommelier, or `waylandProxy.type` in CLAUDE.md. Update: +- If `waylandProxy` option documentation mentions the type enum, remove it +- If sommelier is mentioned anywhere, remove it +- No Python-specific references are expected but verify + +- [ ] **Step 4: Run `nix fmt`** + +Run: `cd /home/david/git/vmsilo && nix fmt` + +- [ ] **Step 5: Commit** + +```bash +git add README.md CLAUDE.md +git commit -m "Update docs: remove sommelier references, simplify wayland proxy section" +``` + +--- + +### Task 8: Build verification + +**Files:** None (verification only) + +- [ ] **Step 1: Verify Rust tools build** + +```bash +cd /home/david/git/vmsilo/vmsilo-tools && cargo build && cargo test +``` + +Expected: All crates build, all tests pass. + +- [ ] **Step 2: Verify no Python references remain** + +Search the entire `modules/` and `rootfs-nixos/` directories for `python`, `pyxdg`, `sommelier`. Only valid hits should be in git history, not in current files. + +```bash +cd /home/david/git/vmsilo +grep -r "python\|pyxdg\|sommelier" modules/ rootfs-nixos/ packages/ --include="*.nix" -l +``` + +Expected: No matches (or only false positives like comments mentioning removal). + +- [ ] **Step 3: Verify nix fmt is clean** + +```bash +cd /home/david/git/vmsilo && nix fmt -- --check . +``` + +Expected: No formatting changes needed. + +- [ ] **Step 4: Verify nix build succeeds** + +```bash +cd /home/david/git/vmsilo && git add -A && nix build .# +``` + +Expected: Build succeeds. Note: `git add -A` is needed because nix build uses git index for source filtering. diff --git a/docs/superpowers/specs/2026-03-23-ch-sandboxing-design.md b/docs/superpowers/specs/2026-03-23-ch-sandboxing-design.md new file mode 100644 index 0000000..7ec2738 --- /dev/null +++ b/docs/superpowers/specs/2026-03-23-ch-sandboxing-design.md @@ -0,0 +1,180 @@ +# Cloud-Hypervisor Sandboxing Design + +Harden the cloud-hypervisor VM service with Landlock filesystem sandboxing, systemd namespace isolation, and capability dropping. Whitelist approach — deny everything, allow only what's needed. + +## Current State + +The CH VM service (`vmsilo-{name}-vm.service`) runs as root with: +- CH's built-in seccomp (`--seccomp true`) — the only existing restriction +- No systemd sandboxing (no namespaces, no capability restrictions, no filesystem protection) +- A shell wrapper script that starts CH, orchestrates VM creation/boot, and waits for exit + +The GPU/sound/USB device backend services already have thorough systemd sandboxing (`TemporaryFileSystem=/`, `CapabilityBoundingSet=""`, etc.). + +## Design + +### 1. Enable CH Landlock + +Pass `--landlock` to the CH binary. CH automatically allowlists all paths from its JSON config (disks, kernel, initramfs, vhost-user sockets, VFIO devices, vsock socket parent directory, etc.) via the `ApplyLandlock` trait on each config type. Landlock rules are applied when `ch-remote create` loads the VM config. + +No `--landlock-rules` needed — all static paths are covered by the config, and TAP devices use FD passing (no filesystem access). + +### 2. New Option: `cloud-hypervisor.disableSandbox` + +```nix +cloud-hypervisor.disableSandbox = lib.mkOption { + type = lib.types.bool; + default = false; + description = "Disable Landlock and systemd hardening for this VM's cloud-hypervisor instance. Seccomp is controlled separately by seccompPolicy."; +}; +``` + +When `true`, disables landlock and all systemd hardening directives. The existing `seccompPolicy` option remains independent. + +### 3. TAP and Runtime Directory Ownership Changes + +**TAP interfaces** (`networking.nix`): Change `virtualOwner = cfg.user` to `virtualOwner = "root"`. Nothing relies on user ownership — only the hypervisor (running as root) opens TAP devices. This removes the need for `CAP_NET_ADMIN` to attach to a TAP owned by a different user. + +**Runtime directory** (`services.nix` prep service AND `systemd.tmpfiles.rules`): Change `/run/vmsilo/{name}/` from user-owned to root-owned in both places. Subdirectories (`gpu/`, `gpu/shader-cache/`, `sound/`) remain user-owned for device backend services. `virtiofs/` is already root-owned. + +This lets the sandboxed VM service (running as root uid 0 with no capabilities) create sockets and symlinks in its own runtime directory without `CAP_DAC_OVERRIDE`. + +### 4. Launch Flow Restructuring + +Replace the monolithic shell wrapper with a split architecture: + +``` +ExecStartPre=+ (privileged — bypasses all sandbox restrictions): + - Create ephemeral disk (if raw overlay) + - IOMMU validation (if PCI passthrough) + - Remove stale sockets + +ExecStart (sandboxed): + - cloud-hypervisor --api-socket ... --seccomp ... --landlock ... + - CH is the main process directly — no shell wrapper + +ExecStartPost=+ (privileged — runs immediately for Type=simple): + - Wait for API socket + ch-remote ping + - Wait for vhost-user backend sockets (GPU, sound, virtiofs) + - ch-remote create -- (triggers landlock application) + - For each TAP: open TAP fd via helper, ch-remote add-net fd=[N],mac=... + - ch-remote boot + - Discover console PTY via ch-remote info, create symlink, chown to user + - Wait for vsock.socket to appear, chown to user + +ExecStopPost=+ (privileged): + - Delete sockets, console symlink + - Delete ephemeral disk (if raw overlay) +``` + +**Key changes from current flow:** + +- **ExecStart is the raw CH binary**, not a shell script. No background process, no `wait`. +- **Network config removed from the JSON.** TAP devices are added via `ch-remote add-net` with FD passing (SCM_RIGHTS over the API socket) from the privileged ExecStartPost. CH receives pre-opened TAP FDs and never needs network namespace access or `CAP_NET_ADMIN`. +- **TAP FD helper required.** Opening a TAP device requires the `TUNSETIFF` ioctl, which cannot be done from a shell script. A new `vmsilo-tap-open` binary (in the `vmsilo-tools/` Rust workspace) opens `/dev/net/tun`, attaches to the named TAP via `TUNSETIFF`, and execs `ch-remote add-net` with the inherited FD. The `vmsilo-tools/` workspace is a new Rust crate for small vmsilo utilities. +- **add-net before boot**: `vm_add_net()` cold-adds to VmConfig when the VM is created but not yet booted (the `else` branch at `lib.rs:2193-2197`). The device is present from first boot, ensuring the guest sees it during early-boot interface rename. +- **All privileged operations (chown, TAP opening, disk creation, socket cleanup) use `ExecStartPre=+` / `ExecStartPost=+` / `ExecStopPost=+`**, which bypass all service sandbox restrictions. +- **No synchronization needed** between ExecStart and ExecStartPost — ExecStartPost simply waits for the API socket to appear. + +### 5. Namespace Isolation + +| Namespace | Setting | Effect | +|-----------|---------|--------| +| User | `PrivateUsers=identity` | 1:1 UID mapping for first 65536 UIDs. Zero capabilities in the host user namespace. Device files remain accessible (same UIDs). | +| Mount | NixOS `confinement` (chroot) + bind mounts | Chroot with only CH binary closure + explicitly allowed paths. | +| Network | `PrivateNetwork=true` | Empty network namespace. TAP FDs passed via API socket. | +| PID | `PrivatePIDs=true` | PID-scoped `/proc`. CH is PID 1 in its namespace. | +| IPC | `PrivateIPC=true` | Isolated SysV/POSIX IPC. | +| Cgroup | `ProtectControlGroups=true` | Read-only cgroup filesystem. | +| UTS | Not applied | CH doesn't touch hostname. Marginal benefit. | + +Note: `ProtectKernelTunables=true`, `ProtectControlGroups=true`, and `PrivatePIDs=true` all force `MountAPIVFS` internally (`namespace_parameters_mount_apivfs()` in systemd is a hard OR). This means `/proc`, `/dev`, `/sys`, and `/run` are always mounted in the namespace. `/proc` is PID-scoped (from `PrivatePIDs`). `/dev` is a minimal devtmpfs with basic devices. `/sys` is the full sysfs (read-only information). `DevicePolicy=closed` + `DeviceAllow` restricts actual device access at the cgroup level. + +### 6. Filesystem Whitelist + +Since `MountAPIVFS` is forced (see above), `/proc`, `/dev`, `/sys`, and `/run` are already available in the namespace. The bind mounts below provide paths NOT covered by MountAPIVFS and override the MountAPIVFS `/run` tmpfs with the real runtime directory. + +Use NixOS `confinement` (as the sound service does) instead of mounting all of `/nix/store`. Confinement creates a chroot at `/run/confinement/%n/` with only the nix store closure of the ExecStart binary. This replaces `TemporaryFileSystem=/`. + +```nix +confinement = { + enable = true; + binSh = null; + # No extra packages needed — confinement auto-includes + # the ExecStart binary's closure (CH and its dependencies) +}; + +# Override confinement defaults that conflict with our needs: +serviceConfig = { + PrivateDevices = false; # confinement sets true, but that hides /dev/kvm + PrivateUsers = "identity"; # confinement sets true (root→nobody), we need 1:1 mapping + + BindReadOnlyPaths = [ + kernelPath # guest kernel + initramfsPath # guest initrd + rootDiskPath # root disk (when read-only) + ]; + + BindPaths = [ + "/run/vmsilo/${vm.name}" # runtime sockets — overrides MountAPIVFS /run tmpfs + + # Conditional: + # rootDiskPath # if writable root disk + # ephemeralDiskPath # if raw overlay + # "/dev/vfio/vfio" # if PCI passthrough + # "/dev/vfio/" # if PCI passthrough + # "/sys/bus/pci/devices/" # if PCI passthrough (IOMMU validation) + # "/dev/hugepages" # if hugepages enabled + ]; +}; +``` + +Note: confinement's `mode = "full-apivfs"` (default) also sets `MountAPIVFS=true`, `ProtectControlGroups=true`, `ProtectKernelModules=true`, `ProtectKernelTunables=true` — all matching our plan. The `+`-prefixed ExecStartPost/ExecStopPost commands bypass the chroot entirely. + +### 7. Capability and Hardening Directives + +Confinement (full-apivfs mode) already provides: `MountAPIVFS`, `PrivateTmp`, `ProtectControlGroups`, `ProtectKernelModules`, `ProtectKernelTunables`. The following are additional directives: + +```nix +# Confinement overrides (confinement defaults conflict) +PrivateDevices = false; # confinement sets true — hides /dev/kvm +PrivateUsers = "identity"; # confinement sets true — maps root→nobody + +# Capabilities — none needed +CapabilityBoundingSet = ""; +NoNewPrivileges = true; + +# Network — fully isolated, TAP FDs passed via API socket +PrivateNetwork = true; + +# PID namespace +PrivatePIDs = true; + +# IPC isolation +PrivateIPC = true; + +# Memory locking for guest memory / VFIO DMA +LimitMEMLOCK = "infinity"; + +# Additional kernel hardening (not set by confinement) +ProtectKernelLogs = true; +RestrictNamespaces = true; +LockPersonality = true; +SystemCallArchitectures = "native"; + +# Device whitelist (replaces PrivateDevices for /dev access control) +DevicePolicy = "closed"; +DeviceAllow = [ + "/dev/kvm rw" + "char-pts rw" # PTY allocation + # "/dev/net/tun rw" # not needed — FD passing + # "/dev/vfio/* rw" # if PCI passthrough +]; +``` + +### 8. Scope + +- **Cloud-hypervisor VMs only.** crosvm sandboxing is out of scope. +- **Gated by `cloud-hypervisor.disableSandbox`** (per-VM, default `false`). +- **Seccomp stays independent** — `cloud-hypervisor.seccompPolicy` is unaffected. +- **crosvm and cloud-hypervisor share networking.nix** — the `virtualOwner` change affects both, but crosvm also runs as root so this is a no-op for crosvm behavior. diff --git a/flake.lock b/flake.lock index 66c8235..8763f67 100644 --- a/flake.lock +++ b/flake.lock @@ -107,11 +107,11 @@ ] }, "locked": { - "lastModified": 1774207989, - "narHash": "sha256-hKxO8Sjs41DxPpkvn0OvF5+XYqYu4piGLmPywVjq320=", + "lastModified": 1773773254, + "narHash": "sha256-oEbWGCaNhlq6AvL7zDUy9BJDb8bP+BpfjHWOnJP8eAA=", "ref": "refs/heads/master", - "rev": "eeea7aee9e38978788604dd6bb099bafa76c24de", - "revCount": 128, + "rev": "b83283fd7dab54d903f3a28098d54b83ecdd83bf", + "revCount": 127, "type": "git", "url": "https://git.dsg.is/dsg/ocaml-wayland.git" }, @@ -231,11 +231,11 @@ "ocaml-wayland": "ocaml-wayland" }, "locked": { - "lastModified": 1774226593, - "narHash": "sha256-ZrVOK2sfdB/JPexflbs7lGO65NQdFPhKdRxlnU7VWsg=", + "lastModified": 1774353354, + "narHash": "sha256-I/Y4odcxnoK1jO7VAyhbGqqUn5PKrFMk2yvevOE2qQg=", "ref": "refs/heads/master", - "rev": "9cda8b7760fbb8ac49f2b6ab1f6dac5a10e0d79f", - "revCount": 187, + "rev": "2b3ce3e4d0f143a52050509f71b78530ba7538da", + "revCount": 181, "submodules": true, "type": "git", "url": "https://git.dsg.is/dsg/wayland-proxy-virtwl.git"