vmsilo/modules/scripts.nix
Davíð Steinn Geirsson 196c486c24 fix: add required max_vcpus to cloud-hypervisor cpus config
cloud-hypervisor's CpusConfig requires max_vcpus when the cpus object
is present in JSON. Set it equal to boot_vcpus (no CPU hotplug).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 19:34:05 +00:00

1313 lines
45 KiB
Nix

# Script generation for vmsilo NixOS module
# VM launcher scripts and user-facing scripts (vm-run, vm-start, vm-stop, vm-shell)
{
config,
pkgs,
lib,
...
}:
let
cfg = config.programs.vmsilo;
helpers = import ./lib/helpers.nix { inherit lib; };
inherit (helpers)
formatKVArgs
formatPositionalKVArgs
parseCIDR
prefixToNetmask
idxToIfIndex
ifIndexToPciAddr
normalizeBdf
isBdf
generateMac
sortedInterfaceList
makeTapName
assignVmIds
mkEffectiveSharedDirs
;
vms = assignVmIds cfg.nixosVms;
# USB helper library — sourced by vmsilo-usb CLI, systemd oneshot services, and persistent attach service
usbHelperLib = pkgs.writeShellScript "vmsilo-usb-lib" ''
# --- constants ---
USB_STATE_FILE=/run/vmsilo/usb-state.json
USB_STATE_LOCK=/run/vmsilo/usb-state.lock
CROSVM=${cfg._internal.crosvm}/bin/crosvm
# --- locking (fd 9) ---
usb_lock() {
exec 9>"''${USB_STATE_LOCK}"
${pkgs.util-linux}/bin/flock 9
}
usb_unlock() {
exec 9>&-
}
# --- state file ---
usb_read_state() {
if [ -f "''${USB_STATE_FILE}" ]; then
${pkgs.coreutils}/bin/cat "''${USB_STATE_FILE}"
else
echo '[]'
fi
}
usb_write_state() {
local json="$1"
local tmp="''${USB_STATE_FILE}.tmp.$$"
printf '%s\n' "''${json}" > "''${tmp}"
${pkgs.coreutils}/bin/mv "''${tmp}" "''${USB_STATE_FILE}"
}
# --- enumerate USB devices from sysfs ---
usb_enumerate() {
local result='[]'
for dev in /sys/bus/usb/devices/[0-9]*-[0-9]*; do
[ -d "''${dev}" ] || continue
# Skip interface entries (e.g. 1-3:1.0)
local base
base=$(${pkgs.coreutils}/bin/basename "''${dev}")
case "''${base}" in
*:*) continue ;;
esac
# Read sysfs attributes
local vid pid serial busnum devnum manufacturer product devpath
vid=$(${pkgs.coreutils}/bin/cat "''${dev}/idVendor" 2>/dev/null || echo "")
pid=$(${pkgs.coreutils}/bin/cat "''${dev}/idProduct" 2>/dev/null || echo "")
serial=$(${pkgs.coreutils}/bin/cat "''${dev}/serial" 2>/dev/null || echo "")
busnum=$(${pkgs.coreutils}/bin/cat "''${dev}/busnum" 2>/dev/null || echo "")
devnum=$(${pkgs.coreutils}/bin/cat "''${dev}/devnum" 2>/dev/null || echo "")
manufacturer=$(${pkgs.coreutils}/bin/cat "''${dev}/manufacturer" 2>/dev/null || echo "")
product=$(${pkgs.coreutils}/bin/cat "''${dev}/product" 2>/dev/null || echo "")
devpath="''${base}"
local dev_file="/dev/bus/usb/$(printf '%03d' "''${busnum}")/$(printf '%03d' "''${devnum}")"
[ -z "''${vid}" ] && continue
result=$(printf '%s\n' "''${result}" | ${pkgs.jq}/bin/jq -c \
--arg devpath "''${devpath}" \
--arg vid "''${vid}" \
--arg pid "''${pid}" \
--arg serial "''${serial}" \
--arg busnum "''${busnum}" \
--arg devnum "''${devnum}" \
--arg manufacturer "''${manufacturer}" \
--arg product "''${product}" \
--arg dev_file "''${dev_file}" \
'. + [{devpath: $devpath, vid: $vid, pid: $pid, serial: $serial, busnum: $busnum, devnum: $devnum, manufacturer: $manufacturer, product: $product, dev_file: $dev_file}]')
done
printf '%s\n' "''${result}"
}
# --- find by VID:PID (optional serial) ---
usb_find_by_vidpid() {
local vid="$1" pid="$2" serial="''${3:-}"
local devices
devices=$(usb_enumerate)
if [ -n "''${serial}" ]; then
printf '%s\n' "''${devices}" | ${pkgs.jq}/bin/jq -c \
--arg vid "''${vid}" --arg pid "''${pid}" --arg serial "''${serial}" \
'[.[] | select(.vid == $vid and .pid == $pid and .serial == $serial)]'
else
printf '%s\n' "''${devices}" | ${pkgs.jq}/bin/jq -c \
--arg vid "''${vid}" --arg pid "''${pid}" \
'[.[] | select(.vid == $vid and .pid == $pid)]'
fi
}
# --- find by devpath ---
usb_find_by_devpath() {
local devpath="$1"
local devices
devices=$(usb_enumerate)
printf '%s\n' "''${devices}" | ${pkgs.jq}/bin/jq -c \
--arg devpath "''${devpath}" \
'[.[] | select(.devpath == $devpath)]'
}
# --- attach (expects lock already held) ---
usb_do_attach() {
local vm_name="$1" devpath="$2" dev_file="$3" vid="$4" pid="$5" serial="$6" busnum="$7" devnum="$8"
local state socket response port
socket="/run/vmsilo/''${vm_name}/crosvm-control.socket"
state=$(usb_read_state)
# Check if already attached somewhere detach first
local existing
existing=$(printf '%s\n' "''${state}" | ${pkgs.jq}/bin/jq -c \
--arg devpath "''${devpath}" \
'.[] | select(.devpath == $devpath)')
if [ -n "''${existing}" ]; then
local old_vm old_port old_socket
old_vm=$(printf '%s\n' "''${existing}" | ${pkgs.jq}/bin/jq -r '.vm')
old_port=$(printf '%s\n' "''${existing}" | ${pkgs.jq}/bin/jq -r '.port')
old_socket="/run/vmsilo/''${old_vm}/crosvm-control.socket"
if [ -S "''${old_socket}" ]; then
''${CROSVM} usb detach "''${old_port}" "''${old_socket}" >/dev/null 2>&1 || true
fi
state=$(printf '%s\n' "''${state}" | ${pkgs.jq}/bin/jq -c \
--arg devpath "''${devpath}" \
'[.[] | select(.devpath != $devpath)]')
fi
# Attach
response=$(''${CROSVM} usb attach "1:1:0000:0000" "''${dev_file}" "''${socket}" 2>&1) || {
echo "Error: crosvm usb attach failed: ''${response}" >&2
usb_write_state "''${state}"
return 1
}
# Parse "ok <port>"
port=$(printf '%s\n' "''${response}" | ${pkgs.gnused}/bin/sed -n 's/^ok \([0-9]*\)/\1/p')
if [ -z "''${port}" ]; then
echo "Error: unexpected crosvm response: ''${response}" >&2
usb_write_state "''${state}"
return 1
fi
# Update state
state=$(printf '%s\n' "''${state}" | ${pkgs.jq}/bin/jq -c \
--arg vm "''${vm_name}" \
--arg devpath "''${devpath}" \
--arg vid "''${vid}" \
--arg pid "''${pid}" \
--arg serial "''${serial}" \
--arg busnum "''${busnum}" \
--arg devnum "''${devnum}" \
--arg port "''${port}" \
'. + [{vm: $vm, devpath: $devpath, vid: $vid, pid: $pid, serial: $serial, busnum: $busnum, devnum: $devnum, port: ($port | tonumber)}]')
usb_write_state "''${state}"
echo "Attached ''${vid}:''${pid} (''${devpath}) to ''${vm_name} on port ''${port}"
}
# --- detach (expects lock already held) ---
usb_do_detach() {
local vm_name="$1" devpath="$2"
local state socket
socket="/run/vmsilo/''${vm_name}/crosvm-control.socket"
state=$(usb_read_state)
local entry
entry=$(printf '%s\n' "''${state}" | ${pkgs.jq}/bin/jq -c \
--arg vm "''${vm_name}" --arg devpath "''${devpath}" \
'.[] | select(.vm == $vm and .devpath == $devpath)')
if [ -z "''${entry}" ]; then
echo "Error: device ''${devpath} not attached to ''${vm_name}" >&2
return 1
fi
local port
port=$(printf '%s\n' "''${entry}" | ${pkgs.jq}/bin/jq -r '.port')
if [ -S "''${socket}" ]; then
local response
response=$(''${CROSVM} usb detach "''${port}" "''${socket}" 2>&1) || {
echo "Warning: crosvm usb detach failed: ''${response}" >&2
}
fi
state=$(printf '%s\n' "''${state}" | ${pkgs.jq}/bin/jq -c \
--arg vm "''${vm_name}" --arg devpath "''${devpath}" \
'[.[] | select(.vm != $vm or .devpath != $devpath)]')
usb_write_state "''${state}"
echo "Detached ''${devpath} from ''${vm_name} (port ''${port})"
}
# --- cleanup all entries for a VM (acquires lock) ---
usb_cleanup_vm() {
local vm_name="$1"
usb_lock
local state
state=$(usb_read_state)
state=$(printf '%s\n' "''${state}" | ${pkgs.jq}/bin/jq -c \
--arg vm "''${vm_name}" \
'[.[] | select(.vm != $vm)]')
usb_write_state "''${state}"
usb_unlock
}
'';
# vmsilo-usb CLI — list/attach/detach USB devices to/from VMs
vmsiloUsbScript = pkgs.writeShellScript "vmsilo-usb" ''
set -euo pipefail
# shellcheck source=/dev/null
source ${usbHelperLib}
is_devpath() {
case "$1" in
[0-9]*-[0-9]*) return 0 ;;
*) return 1 ;;
esac
}
parse_vidpid() {
local input="$1"
USB_VID="''${input%%:*}"
USB_PID="''${input#*:}"
if [ -z "''${USB_VID}" ] || [ -z "''${USB_PID}" ] || [ "''${USB_VID}" = "''${input}" ]; then
echo "Error: invalid VID:PID format: ''${input}" >&2
return 1
fi
}
cmd_list() {
local devices state
devices=$(usb_enumerate)
if [ -f "''${USB_STATE_FILE}" ]; then
state=$(usb_read_state)
else
state='[]'
fi
# Merge state into device list and format table
printf '%-10s%-6s%-14s%-14s%-24s%s\n' "VID:PID" "Port" "Serial" "Manufacturer" "Product" "VM"
printf '%s\n' "''${devices}" | ${pkgs.jq}/bin/jq -r --argjson state "''${state}" '
.[] |
. as $dev |
($state | map(select(.devpath == $dev.devpath)) | if length > 0 then .[0].vm else "-" end) as $vm |
[$dev.vid + ":" + $dev.pid,
$dev.devpath,
(if $dev.serial == "" then "-" else $dev.serial end),
(if $dev.manufacturer == "" then "-" else $dev.manufacturer end),
(if $dev.product == "" then "-" else $dev.product end),
$vm] |
@tsv
' | while IFS=$'\t' read -r vidpid port serial manufacturer product vm; do
printf '%-10s%-6s%-14s%-14s%-24s%s\n' "''${vidpid}" "''${port}" "''${serial}" "''${manufacturer}" "''${product}" "''${vm}"
done
}
cmd_attach() {
local vm="$1" identifier="$2"
# shellcheck disable=SC2049
${lib.optionalString
(lib.any (vm: vm.hypervisor == "cloud-hypervisor") (lib.attrValues cfg.nixosVms))
''
case "$vm" in
${
lib.concatMapStringsSep "|" (vm: vm.name) (
lib.filter (vm: vm.hypervisor == "cloud-hypervisor") (lib.attrValues cfg.nixosVms)
)
})
echo "Error: USB passthrough is not supported for cloud-hypervisor VMs" >&2
exit 1
;;
esac
''
}
if is_devpath "''${identifier}"; then
local matches
matches=$(usb_find_by_devpath "''${identifier}")
local count
count=$(printf '%s\n' "''${matches}" | ${pkgs.jq}/bin/jq 'length')
if [ "''${count}" -eq 0 ]; then
echo "Error: no USB device found at ''${identifier}" >&2
exit 1
fi
printf '%s\n' "''${matches}" | ${pkgs.jq}/bin/jq -r '.[] | [.devpath, .dev_file, .vid, .pid, .serial, .busnum, .devnum] | @tsv' | \
while IFS=$'\t' read -r devpath dev_file vid pid serial busnum devnum; do
local instance
instance=$(${pkgs.systemd}/bin/systemd-escape "''${vm}:''${devpath}:''${dev_file}:''${vid}:''${pid}:''${serial}:''${busnum}:''${devnum}")
systemctl start "vmsilo-usb-attach@''${instance}.service"
done
else
parse_vidpid "''${identifier}"
local matches
matches=$(usb_find_by_vidpid "''${USB_VID}" "''${USB_PID}")
local count
count=$(printf '%s\n' "''${matches}" | ${pkgs.jq}/bin/jq 'length')
if [ "''${count}" -eq 0 ]; then
echo "Error: no USB device found matching ''${identifier}" >&2
exit 1
fi
printf '%s\n' "''${matches}" | ${pkgs.jq}/bin/jq -r '.[] | [.devpath, .dev_file, .vid, .pid, .serial, .busnum, .devnum] | @tsv' | \
while IFS=$'\t' read -r devpath dev_file vid pid serial busnum devnum; do
local instance
instance=$(${pkgs.systemd}/bin/systemd-escape "''${vm}:''${devpath}:''${dev_file}:''${vid}:''${pid}:''${serial}:''${busnum}:''${devnum}")
systemctl start "vmsilo-usb-attach@''${instance}.service"
done
fi
}
cmd_detach() {
local vm="$1" identifier="$2"
# shellcheck disable=SC2049
${lib.optionalString
(lib.any (vm: vm.hypervisor == "cloud-hypervisor") (lib.attrValues cfg.nixosVms))
''
case "$vm" in
${
lib.concatMapStringsSep "|" (vm: vm.name) (
lib.filter (vm: vm.hypervisor == "cloud-hypervisor") (lib.attrValues cfg.nixosVms)
)
})
echo "Error: USB passthrough is not supported for cloud-hypervisor VMs" >&2
exit 1
;;
esac
''
}
if is_devpath "''${identifier}"; then
local instance
instance=$(${pkgs.systemd}/bin/systemd-escape "''${vm}:''${identifier}")
systemctl start "vmsilo-usb-detach@''${instance}.service"
else
parse_vidpid "''${identifier}"
local state
state=$(usb_read_state)
local devpaths
devpaths=$(printf '%s\n' "''${state}" | ${pkgs.jq}/bin/jq -r \
--arg vm "''${vm}" --arg vid "''${USB_VID}" --arg pid "''${USB_PID}" \
'[.[] | select(.vm == $vm and .vid == $vid and .pid == $pid) | .devpath] | .[]')
if [ -z "''${devpaths}" ]; then
echo "Error: no ''${identifier} devices attached to ''${vm}" >&2
exit 1
fi
while IFS= read -r devpath; do
local instance
instance=$(${pkgs.systemd}/bin/systemd-escape "''${vm}:''${devpath}")
systemctl start "vmsilo-usb-detach@''${instance}.service"
done <<< "''${devpaths}"
fi
}
case "''${1:-}" in
"")
cmd_list
;;
attach)
if [ $# -ne 3 ]; then
echo "Usage: vmsilo-usb attach <vm> <vid:pid|devpath>" >&2
exit 1
fi
cmd_attach "$2" "$3"
;;
detach)
if [ $# -ne 3 ]; then
echo "Usage: vmsilo-usb detach <vm> <vid:pid|devpath>" >&2
exit 1
fi
cmd_detach "$2" "$3"
;;
*)
echo "Usage: vmsilo-usb [attach <vm> <vid:pid|devpath> | detach <vm> <vid:pid|devpath>]" >&2
echo "" >&2
echo "With no arguments, lists all USB devices and their VM assignments." >&2
exit 1
;;
esac
'';
# NOTE: getEffectiveInterfaces is intentionally duplicated in networking.nix and services.nix.
# It cannot live in helpers.nix (which has no config access) and the three modules
# don't share a common let-binding scope. Keep the copies in sync.
getEffectiveInterfaces =
vm: vm.network.interfaces // (cfg._internal.netvmInjections.${vm.name}.interfaces or { });
# Get effective generated guest config: vm._generatedGuestConfig + netvm-injected guestConfig.
getEffectiveGuestConfig =
vm: vm._generatedGuestConfig ++ (cfg._internal.netvmInjections.${vm.name}.guestConfig or [ ]);
# User UID/GID and runtime directory for explicit paths (system services need these)
userUid = config.users.users.${cfg.user}.uid;
userGid = config.users.groups.${config.users.users.${cfg.user}.group}.gid;
userRuntimeDir = "/run/user/${toString userUid}";
# Get effective MAC for an interface (uses user-specified interface name)
getEffectiveIfaceMac =
vm: ifName: iface:
if iface.macAddress != null then iface.macAddress else generateMac vm.name ifName;
# Build rootfs for a VM
buildRootfs =
vm:
let
# Enable pipewire in guest when sound is enabled
soundConfig = lib.optionalAttrs (vm.sound.playback || vm.sound.capture) {
services.pipewire = {
enable = lib.mkDefault true;
pulse.enable = lib.mkDefault true;
# Pipewire volume defaults to 40%, likely because the ALSA device has no mixer controls.
wireplumber.extraConfig."50-default-volume"."wireplumber.settings" = {
"device.routes.default-sink-volume" = 1.0;
};
extraConfig.pipewire."50-allow-sample-rates"."context.properties" = {
# Allow everything to avoid resampling. Let the host pipewire resample if needed.
"default.clock.allowed-rates" = [
44100
48000
88200
96000
176400
192000
352800
384000
];
};
# Increase ALSA headroom for virtio-snd. The virtio transport adds
# latency jitter that the default 256-sample headroom can't absorb,
# causing frequent XRUN underruns.
wireplumber.extraConfig."51-virtio-snd-headroom"."monitor.alsa.rules" = [
{
matches = [
{
"node.name" = "~alsa_output.*";
}
];
actions.update-props = {
"api.alsa.headroom" = 1024;
};
}
];
};
# Enable realtime scheduling for pipewire.
# rtkit alone is not enough: xdg-desktop-portal's Realtime portal intercepts
# PipeWire's RT requests but fails (fstatat on pidfd returns ENOTDIR), silently
# preventing RT scheduling. PAM limits let PipeWire set RT directly.
security.rtkit.enable = true;
security.pam.loginLimits = [
{
domain = "@audio";
type = "-";
item = "rtprio";
value = "95";
}
{
domain = "@audio";
type = "-";
item = "nice";
value = "-19";
}
{
domain = "@audio";
type = "-";
item = "memlock";
value = "unlimited";
}
];
};
in
pkgs.callPackage ../rootfs-nixos {
inherit (cfg._internal) wayland-proxy-virtwl sommelier;
inherit soundConfig;
vmsilo-tray = cfg._internal.vmsilo-tray;
trayLogLevel = cfg.vmsilo-tray.logLevel;
waylandProxy = vm.waylandProxy;
guestPrograms = vm.guestPrograms;
guestConfig =
(if lib.isList vm.guestConfig then vm.guestConfig else [ vm.guestConfig ])
++ getEffectiveGuestConfig vm;
inherit (vm) copyChannel;
};
# Default block device options applied to all disks
defaultBlockOpts = {
packed-queue = true; # More efficient, better cache locality than split virtqueues
direct = true; # Bypass host page cache, guest handles its own page cache
};
# Format a disk configuration as --block argument (applies defaults)
formatBlockArg =
disk: "--block ${formatPositionalKVArgs [ "path" ] "," "=" (defaultBlockOpts // disk)}";
# Normalize all isolated devices
normalizedIsolatedDevices = map normalizeBdf cfg.isolatedPciDevices;
# Generate VM launcher script (crosvm implementation)
mkCrosvmVmScript =
vm:
let
# Only build rootfs if we need it (no custom root/kernel/initramfs)
needsBuiltRootfs = vm.rootDisk == null || vm.kernel == null || vm.initramfs == null;
rootfs = if needsBuiltRootfs then buildRootfs vm else null;
# Determine root disk config: use user's as-is, or built rootfs with rootDiskReadonly
rootDiskConfig =
if vm.rootDisk != null then
vm.rootDisk
else
{
path = "${rootfs}/nixos.erofs";
ro = vm.rootDiskReadonly;
};
kernelPath = if vm.kernel != null then vm.kernel else "${rootfs}/bzImage";
initramfsPath = if vm.initramfs != null then vm.initramfs else "${rootfs}/initrd";
additionalDisksArgs = lib.concatMapStringsSep " " formatBlockArg vm.additionalDisks;
# Ephemeral overlay disk (raw mode only)
ephemeralDiskPath = "/var/lib/vmsilo/${vm.name}-ephemeral.raw";
ephemeralDiskId = "ephemeral";
ephemeralDiskConfig = {
path = ephemeralDiskPath;
ro = false;
id = ephemeralDiskId;
};
ephemeralDiskArg = lib.optionalString (vm.rootOverlay.type == "raw") (
formatBlockArg ephemeralDiskConfig
);
# Kernel param for overlay type
rootOverlayKernelParam =
if vm.rootOverlay.type == "raw" then
''-p "vmsilo.rootOverlay=raw,${ephemeralDiskId}"''
else
''-p "vmsilo.rootOverlay=tmpfs"'';
# Shared home directory
sharedHomeEnabled = vm.sharedHome != false;
# Effective shared directories (user config + implicit sharedHome entry)
effectiveSharedDirs = mkEffectiveSharedDirs {
inherit (vm) sharedDirectories sharedHome;
vmName = vm.name;
inherit userUid userGid;
};
# virtiofsd vhost-user socket args for crosvm
virtiofsDirArgs = lib.concatMapStringsSep " " (
tag: "--vhost-user type=fs,socket=/run/vmsilo/${vm.name}/virtiofs/${tag}.socket"
) (builtins.attrNames effectiveSharedDirs);
extraKernelParams = lib.concatMapStringsSep " " (p: "-p \"${p}\"") vm.kernelParams;
# GPU config: false = disabled, true = default (wayland+opengl), attrset = custom features
gpuConfig =
if vm.gpu == false then
null
else if vm.gpu == true then
{
wayland = true;
opengl = true;
vulkan = false;
}
else
vm.gpu;
# vhost-user sound arg (when any sound is enabled)
soundEnabled = vm.sound.playback || vm.sound.capture;
soundVhostUserArg = lib.optionalString soundEnabled "--vhost-user type=sound,socket=/run/vmsilo/${vm.name}/sound/sound.socket";
# Convert BDF to sysfs path
bdfToSysfs = bdf: "/sys/bus/pci/devices/${normalizeBdf bdf}";
# PCI devices for this VM (extract path from attrset, normalize BDF)
vmPciDevicePaths = map (
dev: if isBdf dev.path then normalizeBdf dev.path else dev.path
) vm.pciDevices;
# Format --vfio arguments with optional kv pairs
vfioArgs = lib.concatMapStringsSep " " (
dev:
let
sysfsPath = if isBdf dev.path then bdfToSysfs dev.path else dev.path;
remaining = lib.filterAttrs (k: v: k != "path" && v != null) dev;
kvPart = formatKVArgs "," remaining;
in
if kvPart == "" then "--vfio ${sysfsPath}" else "--vfio ${sysfsPath},${kvPart}"
) vm.pciDevices;
# vhost-user arguments
vhostUserArgs = lib.concatMapStringsSep " " (
vu: "--vhost-user ${formatKVArgs "," vu}"
) vm.vhostUser;
# Network interface crosvm arguments
# Sorted alphabetically by interface name for deterministic PCI slot assignment
networkArgs = lib.concatStringsSep " \\\n " (
lib.imap0 (
idx: entry:
let
ifName = entry.name;
iface = entry.value;
ifIndex = idxToIfIndex idx;
pciAddr = ifIndexToPciAddr ifIndex;
mac = getEffectiveIfaceMac vm ifName iface;
tapName = if iface.tap.name != null then iface.tap.name else makeTapName vm.name vm.id ifIndex;
in
"--net tap-name=${tapName},mac=${mac},pci-address=${pciAddr}"
) (sortedInterfaceList (getEffectiveInterfaces vm))
);
# Kernel params for network configuration (uses user-specified interface names)
networkKernelParams = lib.concatLists (
map (
entry:
let
ifName = entry.name;
iface = entry.value;
in
if iface.dhcp then
[ ''-p "ip=:::::${ifName}:dhcp"'' ]
else
# Static IPv4 addresses
(map (
addr:
let
parsed = parseCIDR addr;
in
''-p "ip=${parsed.ip}:::${prefixToNetmask parsed.prefix}::${ifName}:none"''
) iface.addresses)
# Static IPv6 addresses
++ (map (addr: ''-p "ip=[${addr}]:::::${ifName}:none"'') iface.v6Addresses)
# IPv4 routes
++ (lib.mapAttrsToList (dest: r: ''-p "rd.route=${dest}:${r.via}:${ifName}"'') iface.routes)
# IPv6 routes
++ (lib.mapAttrsToList (dest: r: ''-p "rd.route=[${dest}]:[${r.via}]:${ifName}"'') iface.v6Routes)
) (sortedInterfaceList (getEffectiveInterfaces vm))
);
# Kernel params for interface naming (vmsilo.ifname=<name>,<mac>)
interfaceNameKernelParams = map (
entry:
let
ifName = entry.name;
mac = getEffectiveIfaceMac vm ifName entry.value;
in
''-p "vmsilo.ifname=${ifName},${mac}"''
) (sortedInterfaceList (getEffectiveInterfaces vm));
# Nameserver params
nameserverParams = map (ns: ''-p "nameserver=${ns}"'') vm.network.nameservers;
# All network kernel params
allNetworkKernelParams = interfaceNameKernelParams ++ networkKernelParams ++ nameserverParams;
# Crosvm configuration (per-VM overrides global)
effectiveLogLevel = if vm.crosvm.logLevel != null then vm.crosvm.logLevel else cfg.crosvm.logLevel;
allExtraArgs = cfg.crosvm.extraArgs ++ vm.crosvm.extraArgs;
allExtraRunArgs = cfg.crosvm.extraRunArgs ++ vm.crosvm.extraRunArgs;
in
pkgs.writeShellScript "vmsilo-start-${vm.name}" ''
#!/bin/sh
set -e
${lib.optionalString (vm.pciDevices != [ ]) ''
# IOMMU group validation
check_iommu_group() {
local dev="$1"
local group_path="/sys/bus/pci/devices/$dev/iommu_group/devices"
if [ ! -d "$group_path" ]; then
echo "Error: IOMMU not enabled or device $dev not found" >&2
echo "Ensure IOMMU is enabled (intel_iommu=on or amd_iommu=on)" >&2
exit 1
fi
for peer in "$group_path"/*; do
peer_bdf=$(basename "$peer")
[ "$peer_bdf" = "$dev" ] && continue
# Check if peer is in our passthrough list
case "$peer_bdf" in
${lib.concatStringsSep "|" vmPciDevicePaths})
# Peer is being passed to this VM, OK
;;
*)
# Check if peer is unbound (no driver)
if [ -L "/sys/bus/pci/devices/$peer_bdf/driver" ]; then
peer_driver=$(basename "$(readlink "/sys/bus/pci/devices/$peer_bdf/driver")")
if [ "$peer_driver" != "vfio-pci" ]; then
# Check if peer is a PCI bridge (class 0x0604xx) - safe to leave bound
peer_class=$(cat "/sys/bus/pci/devices/$peer_bdf/class" 2>/dev/null || echo "")
case "$peer_class" in
0x0604*)
# PCI-to-PCI bridge, safe to leave bound to pcieport driver
;;
*)
echo "Error: Device $dev shares IOMMU group with $peer_bdf (bound to $peer_driver)" >&2
echo "All devices in an IOMMU group must be passed to the same VM or unbound" >&2
exit 1
;;
esac
fi
fi
;;
esac
done
}
# Check all PCI devices
for dev in ${lib.concatStringsSep " " vmPciDevicePaths}; do
check_iommu_group "$dev"
done
''}
# Clean up stale socket
rm -f /run/vmsilo/${vm.name}/crosvm-control.socket
exec ${cfg._internal.crosvm}/bin/crosvm \
--log-level=${effectiveLogLevel} \
--no-syslog \
--no-timestamps \
${lib.escapeShellArgs allExtraArgs} \
run \
--name ${vm.name} \
-m ${toString vm.memory} \
--hugepages \
--balloon-page-reporting \
--boost-uclamp \
--disable-virtio-intx \
--no-i8042 \
--no-rtc \
--s2idle \
${
if cfg.schedulerIsolation == "full" then
"--core-scheduling=true"
else if cfg.schedulerIsolation == "vm" then
"--per-vm-core-scheduling"
else
"--core-scheduling=false"
} \
--initrd=${initramfsPath} \
--serial=hardware=virtio-console,type=unix-stream,path=/run/vmsilo/${vm.name}/console-backend.socket,console,input-unix-stream,stream-non-blocking \
${formatBlockArg rootDiskConfig} \
${additionalDisksArgs} \
${ephemeralDiskArg} \
${lib.optionalString (rootfs != null) ''-p "init=${rootfs.config.system.build.toplevel}/init"''} \
-p "vmsilo.hostname=${vm.name}" \
${lib.concatStringsSep " \\\n " allNetworkKernelParams} \
${lib.optionalString vm.autoShutdown.enable ''
-p "autoShutdown.enable=1" \
-p "autoShutdown.after=${toString vm.autoShutdown.after}" \
''} \
${rootOverlayKernelParam} \
${lib.optionalString sharedHomeEnabled ''-p "systemd.mount-extra=home:/home/user:virtiofs:"''} \
${extraKernelParams} \
${virtiofsDirArgs} \
--cid ${toString vm.id} \
--cpus ${toString vm.cpus} \
${
lib.optionalString (
gpuConfig != null
) "--vhost-user type=gpu,socket=/run/vmsilo/${vm.name}/gpu/gpu.socket"
} \
${soundVhostUserArg} \
-s /run/vmsilo/${vm.name}/crosvm-control.socket \
${vfioArgs} \
${networkArgs} \
${vhostUserArgs} \
${lib.escapeShellArgs allExtraRunArgs} \
${kernelPath}
'';
# Generate cloud-hypervisor VM launch script (two-step: start VMM → ch-remote create + boot)
mkCloudHypervisorVmScript =
vm:
let
# ── Shared with crosvm ──────────────────────────────────────────────────
needsBuiltRootfs = vm.rootDisk == null || vm.kernel == null || vm.initramfs == null;
rootfs = if needsBuiltRootfs then buildRootfs vm else null;
rootDiskPath = if vm.rootDisk != null then vm.rootDisk.path else "${rootfs}/nixos.erofs";
rootDiskReadonly = if vm.rootDisk != null then vm.rootDisk.ro else vm.rootDiskReadonly;
kernelPath = if vm.kernel != null then vm.kernel else "${rootfs}/bzImage";
initramfsPath = if vm.initramfs != null then vm.initramfs else "${rootfs}/initrd";
ephemeralDiskPath = "/var/lib/vmsilo/${vm.name}-ephemeral.raw";
sharedHomeEnabled = vm.sharedHome != false;
effectiveSharedDirs = mkEffectiveSharedDirs {
inherit (vm) sharedDirectories sharedHome;
vmName = vm.name;
inherit userUid userGid;
};
gpuConfig =
if vm.gpu == false then
null
else if vm.gpu == true then
{
wayland = true;
opengl = true;
}
else
vm.gpu;
soundEnabled = vm.sound.playback || vm.sound.capture;
# ── Kernel cmdline ───────────────────────────────────────────────────────
initParam = lib.optional (rootfs != null) "init=${rootfs.config.system.build.toplevel}/init";
networkCmdlineParams = lib.concatLists (
map (
entry:
let
ifName = entry.name;
iface = entry.value;
in
if iface.dhcp then
[ "ip=:::::${ifName}:dhcp" ]
else
(map (
addr:
let
p = parseCIDR addr;
in
"ip=${p.ip}:::${prefixToNetmask p.prefix}::${ifName}:none"
) iface.addresses)
++ (map (addr: "ip=[${addr}]:::::${ifName}:none") iface.v6Addresses)
++ (lib.mapAttrsToList (dest: r: "rd.route=${dest}:${r.via}:${ifName}") iface.routes)
++ (lib.mapAttrsToList (dest: r: "rd.route=[${dest}]:[${r.via}]:${ifName}") iface.v6Routes)
) (sortedInterfaceList (getEffectiveInterfaces vm))
);
ifaceNameParams = map (
entry: "vmsilo.ifname=${entry.name},${getEffectiveIfaceMac vm entry.name entry.value}"
) (sortedInterfaceList (getEffectiveInterfaces vm));
nameserverParams = map (ns: "nameserver=${ns}") vm.network.nameservers;
rootOverlayParam =
if vm.rootOverlay.type == "raw" then "vmsilo.rootOverlay=raw,vdb" else "vmsilo.rootOverlay=tmpfs";
sharedHomeParam = lib.optional sharedHomeEnabled "systemd.mount-extra=home:/home/user:virtiofs:";
extraKernelParams = vm.kernelParams;
allCmdlineParams = lib.flatten [
initParam
[ "vmsilo.hostname=${vm.name}" ]
ifaceNameParams
networkCmdlineParams
nameserverParams
(lib.optionals vm.autoShutdown.enable [
"autoShutdown.enable=1"
"autoShutdown.after=${toString vm.autoShutdown.after}"
])
[ rootOverlayParam ]
sharedHomeParam
extraKernelParams
];
cmdlineStr = lib.concatStringsSep " " allCmdlineParams;
# ── Cloud-hypervisor JSON config ─────────────────────────────────────────
networkEntries = lib.imap0 (
idx: entry:
let
ifName = entry.name;
iface = entry.value;
ifIndex = idxToIfIndex idx;
mac = getEffectiveIfaceMac vm ifName iface;
tapName = if iface.tap.name != null then iface.tap.name else makeTapName vm.name vm.id ifIndex;
in
{
tap = tapName;
mac = mac;
}
) (sortedInterfaceList (getEffectiveInterfaces vm));
diskEntries = [
{
path = rootDiskPath;
readonly = rootDiskReadonly;
direct = true;
}
]
++ lib.optional (vm.rootOverlay.type == "raw") {
path = ephemeralDiskPath;
readonly = false;
direct = false;
}
++ map (d: {
path = d.path;
readonly = d.ro or false;
direct = false;
}) vm.additionalDisks;
fsEntries = map (tag: {
tag = tag;
socket = "/run/vmsilo/${vm.name}/virtiofs/${tag}.socket";
}) (builtins.attrNames effectiveSharedDirs);
deviceEntries = map (dev: {
path = "/sys/bus/pci/devices/${if isBdf dev.path then normalizeBdf dev.path else dev.path}";
}) vm.pciDevices;
coreScheduling = if cfg.schedulerIsolation == "off" then "Off" else "Vm";
chBaseConfig = {
payload = {
kernel = kernelPath;
initramfs = initramfsPath;
cmdline = cmdlineStr;
};
memory = {
size = vm.memory * 1024 * 1024;
shared = true;
hugepages = true;
};
cpus = {
boot_vcpus = vm.cpus;
max_vcpus = vm.cpus;
core_scheduling = coreScheduling;
};
balloon = {
size = 0;
free_page_reporting = true;
};
serial = {
mode = "Socket";
socket = "/run/vmsilo/${vm.name}/console-backend.socket";
};
console = {
mode = "Off";
};
vsock = {
cid = vm.id;
socket = "/run/vmsilo/${vm.name}/vsock.socket";
};
disks = diskEntries;
}
// lib.optionalAttrs (networkEntries != [ ]) { net = networkEntries; }
// lib.optionalAttrs (fsEntries != [ ]) { fs = fsEntries; }
// lib.optionalAttrs (gpuConfig != null) {
gpu = [ { socket = "/run/vmsilo/${vm.name}/gpu/gpu.socket"; } ];
}
// lib.optionalAttrs soundEnabled {
vhost_user_sound = [ { socket = "/run/vmsilo/${vm.name}/sound/sound.socket"; } ];
}
// lib.optionalAttrs (deviceEntries != [ ]) { devices = deviceEntries; };
chConfig = lib.recursiveUpdate chBaseConfig vm.cloudHypervisor.extraConfig;
chConfigJson = builtins.toJSON chConfig;
effectiveChLogLevel =
if vm.cloudHypervisor.logLevel != null then
vm.cloudHypervisor.logLevel
else
cfg."cloud-hypervisor".logLevel;
effectiveChExtraArgs = cfg."cloud-hypervisor".extraArgs ++ vm.cloudHypervisor.extraArgs;
chBin = "${cfg._internal."cloud-hypervisor"}/bin/cloud-hypervisor";
chRemote = "${cfg._internal."cloud-hypervisor"}/bin/ch-remote";
in
pkgs.writeShellScript "vmsilo-start-${vm.name}" ''
set -e
# Remove stale control socket
rm -f /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket
${lib.optionalString (vm.rootOverlay.type == "raw") ''
# Create ephemeral overlay disk
truncate -s ${vm.rootOverlay.size} ${ephemeralDiskPath}
''}
# Step 1: Start cloud-hypervisor VMM in background
export RUST_LOG=${effectiveChLogLevel}
${chBin} \
--api-socket /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket \
${lib.escapeShellArgs effectiveChExtraArgs} &
CH_PID=$!
# Wait for API socket to appear (up to 30s)
ELAPSED=0
while [ $ELAPSED -lt 60 ] && [ ! -S /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket ]; do
sleep 0.5
ELAPSED=$((ELAPSED + 1))
done
if [ ! -S /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket ]; then
echo "Timeout waiting for cloud-hypervisor API socket" >&2
kill $CH_PID 2>/dev/null || true
exit 1
fi
# Step 2: Create VM configuration
printf '%s\n' ${lib.escapeShellArg chConfigJson} | \
${chRemote} \
--api-socket /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket \
create -- -
# Step 3: Boot VM
${chRemote} \
--api-socket /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket \
boot
# Block until VMM exits (VM shutdown)
wait $CH_PID
'';
# Dispatcher: generate the appropriate VM script based on hypervisor choice
mkVmScript =
vm: if vm.hypervisor == "crosvm" then mkCrosvmVmScript vm else mkCloudHypervisorVmScript vm;
# vsock proxy for cloud-hypervisor: connect via unix socket + CONNECT handshake
mkChVsockConnectScript =
vmName: port:
let
pyProxy = pkgs.writeText "vsock-proxy.py" ''
import socket, sys, threading
sock_path, port = sys.argv[1], int(sys.argv[2])
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
try:
s.connect(sock_path)
except OSError as e:
sys.stderr.write(f"vsock connect failed: {e}\n")
sys.exit(1)
# Send CONNECT handshake
s.sendall(f"CONNECT {port}\n".encode())
# Read OK/NACK response line
buf = b""
while b"\n" not in buf:
chunk = s.recv(4096)
if not chunk:
sys.stderr.write("vsock: closed before OK response\n")
sys.exit(1)
buf += chunk
line, _, rest = buf.partition(b"\n")
if not line.startswith(b"OK"):
sys.stderr.write(f"vsock: unexpected response: {line}\n")
sys.exit(1)
# Write any data that arrived alongside the OK line
if rest:
sys.stdout.buffer.write(rest)
sys.stdout.buffer.flush()
# Bidirectional proxy: stdin socket, socket stdout
def forward_in():
try:
while True:
data = sys.stdin.buffer.read(4096)
if not data:
break
s.sendall(data)
except Exception:
pass
finally:
try:
s.shutdown(socket.SHUT_WR)
except Exception:
pass
threading.Thread(target=forward_in, daemon=True).start()
while True:
data = s.recv(4096)
if not data:
break
sys.stdout.buffer.write(data)
sys.stdout.buffer.flush()
'';
in
pkgs.writeShellScript "vsock-connect-${vmName}-${toString port}" ''
VSOCK_SOCKET="/run/vmsilo/${vmName}/vsock.socket"
PORT=${toString port}
TIMEOUT=30
ELAPSED=0
# Wait for vsock socket to appear
while [ $ELAPSED -lt $TIMEOUT ] && [ ! -S "$VSOCK_SOCKET" ]; do
sleep 0.5
ELAPSED=$((ELAPSED + 1))
done
[ -S "$VSOCK_SOCKET" ] || { echo "Timeout: vsock socket not found" >&2; exit 1; }
# Retry until vsock port is ready (guest command listener may not be up yet).
ELAPSED=0
while [ $ELAPSED -lt $TIMEOUT ]; do
${pkgs.python3}/bin/python3 ${pyProxy} "$VSOCK_SOCKET" "$PORT" && exit 0
sleep 0.5
ELAPSED=$((ELAPSED + 1))
done
echo "Timeout waiting for VM ${vmName} vsock:${toString port}" >&2
exit 1
'';
# Generate proxy script for a VM
mkProxyScript =
vm:
if vm.hypervisor == "crosvm" then
pkgs.writeShellScript "vmsilo-proxy-${vm.name}" ''
CID=${toString vm.id}
VSOCK_PORT=5000
TIMEOUT=30
# Wait for vsock to become available
ELAPSED=0
while [ $ELAPSED -lt $TIMEOUT ]; do
if ${pkgs.socat}/bin/socat -u OPEN:/dev/null VSOCK-CONNECT:$CID:$VSOCK_PORT 2>/dev/null; then
break
fi
sleep 0.5
ELAPSED=$((ELAPSED + 1))
done
if [ $ELAPSED -ge $TIMEOUT ]; then
echo "Timeout waiting for VM ${vm.name} to start" >&2
exit 1
fi
# Forward stdin/stdout to vsock
exec ${pkgs.socat}/bin/socat - VSOCK-CONNECT:$CID:$VSOCK_PORT
''
else
# Cloud-hypervisor: connect via unix socket + CONNECT protocol
mkChVsockConnectScript vm.name 5000;
# Generate shell case statement for VM dispatch
mkVmCase = makeCase: ''
case "$VM_NAME" in
${lib.concatMapStringsSep "\n " makeCase vms}
*)
echo "Unknown VM: $VM_NAME" >&2
echo "Available VMs: ${lib.concatMapStringsSep ", " (vm: vm.name) vms}" >&2
exit 1
;;
esac
'';
# vm-run: Run command in VM (socket-activated)
vmRunScript = pkgs.writeShellScript "vm-run" ''
if [ $# -lt 2 ]; then
echo "Usage: vm-run <vm-name> <command> [args...]" >&2
exit 1
fi
VM_NAME="$1"
shift
SOCKET="/run/vmsilo/$VM_NAME/command.socket"
if [ ! -S "$SOCKET" ]; then
echo "Unknown VM or socket not active: $VM_NAME" >&2
echo "Available VMs: ${lib.concatMapStringsSep ", " (vm: vm.name) vms}" >&2
exit 1
fi
# Send command via socket (triggers activation if needed)
echo "$@" | ${pkgs.socat}/bin/socat - UNIX-CONNECT:"$SOCKET"
'';
# vm-start: Start VM via systemd (uses polkit for authorization)
vmStartScript = pkgs.writeShellScript "vm-start" ''
if [ $# -ne 1 ]; then
echo "Usage: vm-start <vm-name>" >&2
exit 1
fi
VM_NAME="$1"
${mkVmCase (vm: "${vm.name}) systemctl start vmsilo-${vm.name}-vm.service ;;")}
'';
# vm-stop: Stop VM via systemd (uses polkit for authorization)
vmStopScript = pkgs.writeShellScript "vm-stop" ''
if [ $# -ne 1 ]; then
echo "Usage: vm-stop <vm-name>" >&2
exit 1
fi
VM_NAME="$1"
${mkVmCase (vm: "${vm.name}) systemctl stop vmsilo-${vm.name}-vm.service ;;")}
'';
# vm-shell: Connect to VM (serial console by default, SSH with --ssh)
vmShellScript = pkgs.writeShellScript "vm-shell" ''
usage() {
echo "Usage: vm-shell [--ssh [--root]] <vm-name>" >&2
echo "" >&2
echo "Options:" >&2
echo " --ssh Use SSH over vsock (requires SSH keys configured)" >&2
echo " --root Connect as root (only with --ssh)" >&2
echo "" >&2
echo "Without --ssh, connects to serial console." >&2
echo "Escape character is CTRL+]" >&2
exit 1
}
USE_SSH=0
USE_ROOT=0
while [ $# -gt 0 ]; do
case "$1" in
--ssh)
USE_SSH=1
shift
;;
--root)
USE_ROOT=1
shift
;;
-*)
usage
;;
*)
break
;;
esac
done
if [ $# -ne 1 ]; then
usage
fi
VM_NAME="$1"
if [ $USE_ROOT -eq 1 ] && [ $USE_SSH -eq 0 ]; then
echo "Error: --root requires --ssh" >&2
exit 1
fi
if [ $USE_SSH -eq 1 ]; then
if [ $USE_ROOT -eq 1 ]; then
USER_NAME="root"
else
USER_NAME="user"
fi
${mkVmCase (
vm:
if vm.hypervisor == "crosvm" then
"${vm.name}) exec ${pkgs.openssh}/bin/ssh \$USER_NAME@vsock/${toString vm.id} ;;"
else
let
proxyCmd = mkChVsockConnectScript vm.name 22;
in
"${vm.name}) exec ${pkgs.openssh}/bin/ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ProxyCommand=${lib.escapeShellArg "${proxyCmd}"} \$USER_NAME@localhost ;;"
)}
else
CONSOLE="/run/vmsilo/$VM_NAME/console"
if [ ! -e "$CONSOLE" ]; then
echo "Console not found: $CONSOLE" >&2
echo "Is the VM running? Use: vm-start $VM_NAME" >&2
exit 1
fi
echo "Escape character is CTRL+]"
exec ${pkgs.socat}/bin/socat -,raw,echo=0,escape=0x1d "$CONSOLE"
fi
'';
in
{
config = lib.mkIf cfg.enable {
# Set internal options for other modules to consume
programs.vmsilo._internal = {
vmScripts = lib.listToAttrs (map (vm: lib.nameValuePair vm.name (mkVmScript vm)) vms);
proxyScripts = lib.listToAttrs (map (vm: lib.nameValuePair vm.name (mkProxyScript vm)) vms);
userScripts = {
vm-run = vmRunScript;
vm-start = vmStartScript;
vm-stop = vmStopScript;
vm-shell = vmShellScript;
vmsilo-usb = vmsiloUsbScript;
};
usbHelperLib = usbHelperLib;
};
};
}