vmsilo/modules/scripts.nix

588 lines
19 KiB
Nix

# Script generation for vmsilo NixOS module
# VM launcher scripts and user-facing scripts (vm-run, vm-start, vm-stop, vm-shell)
{
config,
pkgs,
lib,
...
}:
let
cfg = config.programs.vmsilo;
helpers = import ./lib/helpers.nix { inherit lib; };
inherit (helpers)
formatKVArgs
formatPositionalKVArgs
parseCIDR
prefixToNetmask
idxToIfIndex
ifIndexToPciAddr
normalizeBdf
isBdf
generateMac
sortedInterfaceList
makeTapName
assignVmIds
mkEffectiveSharedDirs
;
vms = assignVmIds cfg.nixosVms;
# User UID/GID and runtime directory for explicit paths (system services need these)
userUid = config.users.users.${cfg.user}.uid;
userGid = config.users.groups.${config.users.users.${cfg.user}.group}.gid;
userRuntimeDir = "/run/user/${toString userUid}";
# Build effective sound config from sound options
mkSoundConfig =
sound:
if sound.playback || sound.capture then
{
backend = "pulse";
capture = sound.capture;
pulse_socket_path = "${userRuntimeDir}/pulse/native";
pulse_cookie_path = "/home/${cfg.user}/.config/pulse/cookie";
}
else
null;
# Get effective MAC for an interface (uses user-specified interface name)
getEffectiveIfaceMac =
vm: ifName: iface:
if iface.macAddress != null then iface.macAddress else generateMac vm.name ifName;
# Build rootfs for a VM
buildRootfs =
vm:
let
# Enable pipewire in guest when sound is enabled
soundConfig = lib.optionalAttrs (vm.sound.playback || vm.sound.capture) {
services.pipewire = {
enable = lib.mkDefault true;
pulse.enable = lib.mkDefault true;
# Pipewire volume defaults to 40%, likely because the ALSA device has no mixer controls.
wireplumber.extraConfig."50-default-volume"."wireplumber.settings" = {
"device.routes.default-sink-volume" = 1.0;
};
extraConfig.pipewire."50-allow-sample-rates"."context.properties" = {
# Allow everything to avoid resampling. Let the host pipewire resample if needed.
"default.clock.allowed-rates" = [
44100
48000
88200
96000
176400
192000
352800
384000
];
};
};
# Enable realtime scheduling for pipewire
security.rtkit.enable = true;
};
in
pkgs.callPackage ../rootfs-nixos {
inherit (cfg._internal) wayland-proxy-virtwl sommelier;
inherit soundConfig;
vmsilo-tray = cfg._internal.vmsilo-tray;
trayLogLevel = cfg.vmsilo-tray.logLevel;
waylandProxy = vm.waylandProxy;
guestPrograms = vm.guestPrograms;
guestConfig = vm.guestConfig;
inherit (vm) copyChannel;
};
# Default block device options applied to all disks
defaultBlockOpts = {
packed-queue = true; # More efficient, better cache locality than split virtqueues
direct = true; # Bypass host page cache, guest handles its own page cache
};
# Format a disk configuration as --block argument (applies defaults)
formatBlockArg =
disk: "--block ${formatPositionalKVArgs [ "path" ] "," "=" (defaultBlockOpts // disk)}";
# Normalize all isolated devices
normalizedIsolatedDevices = map normalizeBdf cfg.isolatedPciDevices;
# Generate VM launcher script
mkVmScript =
vm:
let
# Only build rootfs if we need it (no custom root/kernel/initramfs)
needsBuiltRootfs = vm.rootDisk == null || vm.kernel == null || vm.initramfs == null;
rootfs = if needsBuiltRootfs then buildRootfs vm else null;
# Determine root disk config: use user's as-is, or built rootfs with rootDiskReadonly
rootDiskConfig =
if vm.rootDisk != null then
vm.rootDisk
else
{
path = "${rootfs}/nixos.erofs";
ro = vm.rootDiskReadonly;
};
kernelPath = if vm.kernel != null then vm.kernel else "${rootfs}/bzImage";
initramfsPath = if vm.initramfs != null then vm.initramfs else "${rootfs}/initrd";
additionalDisksArgs = lib.concatMapStringsSep " " formatBlockArg vm.additionalDisks;
# Ephemeral overlay disk (qcow2 mode only)
ephemeralDiskPath = "/var/lib/vmsilo/${vm.name}-ephemeral.qcow2";
ephemeralDiskId = "ephemeral";
ephemeralDiskConfig = {
path = ephemeralDiskPath;
ro = false;
id = ephemeralDiskId;
};
ephemeralDiskArg = lib.optionalString (vm.rootOverlay.type == "qcow2") (
formatBlockArg ephemeralDiskConfig
);
# Kernel param for overlay type
rootOverlayKernelParam =
if vm.rootOverlay.type == "qcow2" then
''-p "vmsilo.rootOverlay=qcow2,${ephemeralDiskId}"''
else
''-p "vmsilo.rootOverlay=tmpfs"'';
# Shared home directory
sharedHomePath = if builtins.isString vm.sharedHome then vm.sharedHome else "/shared/${vm.name}";
sharedHomeEnabled = vm.sharedHome != false;
# Effective shared directories (user config + implicit sharedHome entry)
effectiveSharedDirs = mkEffectiveSharedDirs {
inherit (vm) sharedDirectories sharedHome;
vmName = vm.name;
inherit userUid userGid;
};
# virtiofsd vhost-user socket args for crosvm
virtiofsDirArgs = lib.concatMapStringsSep " " (
tag: "--vhost-user type=fs,socket=/run/vmsilo/${vm.name}-virtiofsd-${tag}.socket"
) (builtins.attrNames effectiveSharedDirs);
extraKernelParams = lib.concatMapStringsSep " " (p: "-p \"${p}\"") vm.kernelParams;
# GPU config: false = disabled, true = default (wayland+opengl), attrset = custom features
gpuConfig =
if vm.gpu == false then
null
else if vm.gpu == true then
{
wayland = true;
opengl = true;
vulkan = false;
}
else
vm.gpu;
# Build context-types string from enabled GPU features
gpuContextTypes = lib.optionalString (gpuConfig != null) (
lib.concatStringsSep ":" (
lib.filter (x: x != null) [
(if gpuConfig.wayland then "cross-domain" else null)
(if gpuConfig.opengl then "virgl2" else null)
(if gpuConfig.vulkan then "venus" else null)
]
)
);
# Sound config from playback/capture booleans
effectiveSound = mkSoundConfig vm.sound;
# Convert BDF to sysfs path
bdfToSysfs = bdf: "/sys/bus/pci/devices/${normalizeBdf bdf}";
# PCI devices for this VM (extract path from attrset, normalize BDF)
vmPciDevicePaths = map (
dev: if isBdf dev.path then normalizeBdf dev.path else dev.path
) vm.pciDevices;
# Format --vfio arguments with optional kv pairs
vfioArgs = lib.concatMapStringsSep " " (
dev:
let
sysfsPath = if isBdf dev.path then bdfToSysfs dev.path else dev.path;
remaining = lib.filterAttrs (k: v: k != "path" && v != null) dev;
kvPart = formatKVArgs "," remaining;
in
if kvPart == "" then "--vfio ${sysfsPath}" else "--vfio ${sysfsPath},${kvPart}"
) vm.pciDevices;
# vhost-user arguments
vhostUserArgs = lib.concatMapStringsSep " " (
vu: "--vhost-user ${formatKVArgs "," vu}"
) vm.vhostUser;
# Network interface crosvm arguments
# Sorted alphabetically by interface name for deterministic PCI slot assignment
networkArgs = lib.concatStringsSep " \\\n " (
lib.imap0 (
idx: entry:
let
ifName = entry.name;
iface = entry.value;
ifIndex = idxToIfIndex idx;
pciAddr = ifIndexToPciAddr ifIndex;
mac = getEffectiveIfaceMac vm ifName iface;
tapName = if iface.tap.name != null then iface.tap.name else makeTapName vm.name vm.id ifIndex;
in
"--net tap-name=${tapName},mac=${mac},pci-address=${pciAddr}"
) (sortedInterfaceList vm.network.interfaces)
);
# Kernel params for network configuration (uses user-specified interface names)
networkKernelParams = lib.concatLists (
map (
entry:
let
ifName = entry.name;
iface = entry.value;
in
if iface.dhcp then
[ ''-p "ip=:::::${ifName}:dhcp"'' ]
else
# Static IPv4 addresses
(map (
addr:
let
parsed = parseCIDR addr;
in
''-p "ip=${parsed.ip}:::${prefixToNetmask parsed.prefix}::${ifName}:none"''
) iface.addresses)
# Static IPv6 addresses
++ (map (addr: ''-p "ip=[${addr}]:::::${ifName}:none"'') iface.v6Addresses)
# IPv4 routes
++ (lib.mapAttrsToList (dest: r: ''-p "rd.route=${dest}:${r.via}:${ifName}"'') iface.routes)
# IPv6 routes
++ (lib.mapAttrsToList (dest: r: ''-p "rd.route=[${dest}]:[${r.via}]:${ifName}"'') iface.v6Routes)
) (sortedInterfaceList vm.network.interfaces)
);
# Kernel params for interface naming (vmsilo.ifname=<name>,<mac>)
interfaceNameKernelParams = map (
entry:
let
ifName = entry.name;
mac = getEffectiveIfaceMac vm ifName entry.value;
in
''-p "vmsilo.ifname=${ifName},${mac}"''
) (sortedInterfaceList vm.network.interfaces);
# Nameserver params
nameserverParams = map (ns: ''-p "nameserver=${ns}"'') vm.network.nameservers;
# All network kernel params
allNetworkKernelParams = interfaceNameKernelParams ++ networkKernelParams ++ nameserverParams;
# Crosvm configuration (per-VM overrides global)
effectiveLogLevel = if vm.crosvm.logLevel != null then vm.crosvm.logLevel else cfg.crosvm.logLevel;
allExtraArgs = cfg.crosvm.extraArgs ++ vm.crosvm.extraArgs;
allExtraRunArgs = cfg.crosvm.extraRunArgs ++ vm.crosvm.extraRunArgs;
in
pkgs.writeShellScript "vmsilo-start-${vm.name}" ''
#!/bin/sh
set -e
${lib.optionalString (vm.pciDevices != [ ]) ''
# IOMMU group validation
check_iommu_group() {
local dev="$1"
local group_path="/sys/bus/pci/devices/$dev/iommu_group/devices"
if [ ! -d "$group_path" ]; then
echo "Error: IOMMU not enabled or device $dev not found" >&2
echo "Ensure IOMMU is enabled (intel_iommu=on or amd_iommu=on)" >&2
exit 1
fi
for peer in "$group_path"/*; do
peer_bdf=$(basename "$peer")
[ "$peer_bdf" = "$dev" ] && continue
# Check if peer is in our passthrough list
case "$peer_bdf" in
${lib.concatStringsSep "|" vmPciDevicePaths})
# Peer is being passed to this VM, OK
;;
*)
# Check if peer is unbound (no driver)
if [ -L "/sys/bus/pci/devices/$peer_bdf/driver" ]; then
peer_driver=$(basename "$(readlink "/sys/bus/pci/devices/$peer_bdf/driver")")
if [ "$peer_driver" != "vfio-pci" ]; then
# Check if peer is a PCI bridge (class 0x0604xx) - safe to leave bound
peer_class=$(cat "/sys/bus/pci/devices/$peer_bdf/class" 2>/dev/null || echo "")
case "$peer_class" in
0x0604*)
# PCI-to-PCI bridge, safe to leave bound to pcieport driver
;;
*)
echo "Error: Device $dev shares IOMMU group with $peer_bdf (bound to $peer_driver)" >&2
echo "All devices in an IOMMU group must be passed to the same VM or unbound" >&2
exit 1
;;
esac
fi
fi
;;
esac
done
}
# Check all PCI devices
for dev in ${lib.concatStringsSep " " vmPciDevicePaths}; do
check_iommu_group "$dev"
done
''}
# Clean up stale socket
rm -f /run/vmsilo/${vm.name}-crosvm-control.socket
exec ${cfg._internal.crosvm}/bin/crosvm \
--log-level=${effectiveLogLevel} \
--no-syslog \
--no-timestamps \
${lib.escapeShellArgs allExtraArgs} \
run \
--name ${vm.name} \
-m ${toString vm.memory} \
--hugepages \
--balloon-page-reporting \
--boost-uclamp \
--disable-virtio-intx \
--no-i8042 \
--no-rtc \
--s2idle \
${
if cfg.schedulerIsolation == "full" then
"--core-scheduling=true"
else if cfg.schedulerIsolation == "vm" then
"--per-vm-core-scheduling"
else
"--core-scheduling=false"
} \
--initrd=${initramfsPath} \
--serial=hardware=virtio-console,type=unix-stream,path=/run/vmsilo/${vm.name}-console-backend.socket,console,input-unix-stream,stream-non-blocking \
${formatBlockArg rootDiskConfig} \
${additionalDisksArgs} \
${ephemeralDiskArg} \
${lib.optionalString (rootfs != null) ''-p "init=${rootfs.config.system.build.toplevel}/init"''} \
-p "vmsilo.hostname=${vm.name}" \
${lib.concatStringsSep " \\\n " allNetworkKernelParams} \
${lib.optionalString vm.autoShutdown.enable ''
-p "autoShutdown.enable=1" \
-p "autoShutdown.after=${toString vm.autoShutdown.after}" \
''} \
${rootOverlayKernelParam} \
${lib.optionalString sharedHomeEnabled ''-p "systemd.mount-extra=home:/home/user:virtiofs:"''} \
${extraKernelParams} \
${virtiofsDirArgs} \
--cid ${toString vm.id} \
--cpus ${toString vm.cpus} \
${lib.optionalString (gpuConfig != null) "--gpu=context-types=${gpuContextTypes}"} \
${
lib.optionalString (
gpuConfig != null && gpuConfig.vulkan
) "--gpu-render-server=path=${pkgs.virglrenderer}/libexec/virgl_render_server"
} \
${lib.optionalString (effectiveSound != null) "--virtio-snd=${formatKVArgs "," effectiveSound}"} \
-s /run/vmsilo/${vm.name}-crosvm-control.socket \
${
lib.optionalString (gpuConfig != null)
"--wayland-security-context wayland_socket=${userRuntimeDir}/wayland-0,app_id=vmsilo:${vm.name}:${vm.color}"
} \
${vfioArgs} \
${networkArgs} \
${vhostUserArgs} \
${lib.escapeShellArgs allExtraRunArgs} \
${kernelPath}
'';
# Generate proxy script for a VM
mkProxyScript =
vm:
pkgs.writeShellScript "vmsilo-proxy-${vm.name}" ''
CID=${toString vm.id}
VSOCK_PORT=5000
TIMEOUT=30
# Wait for vsock to become available
ELAPSED=0
while [ $ELAPSED -lt $TIMEOUT ]; do
if ${pkgs.socat}/bin/socat -u OPEN:/dev/null VSOCK-CONNECT:$CID:$VSOCK_PORT 2>/dev/null; then
break
fi
sleep 0.5
ELAPSED=$((ELAPSED + 1))
done
if [ $ELAPSED -ge $TIMEOUT ]; then
echo "Timeout waiting for VM ${vm.name} to start" >&2
exit 1
fi
# Forward stdin/stdout to vsock
exec ${pkgs.socat}/bin/socat - VSOCK-CONNECT:$CID:$VSOCK_PORT
'';
# Generate shell case statement for VM dispatch
mkVmCase = makeCase: ''
case "$VM_NAME" in
${lib.concatMapStringsSep "\n " makeCase vms}
*)
echo "Unknown VM: $VM_NAME" >&2
echo "Available VMs: ${lib.concatMapStringsSep ", " (vm: vm.name) vms}" >&2
exit 1
;;
esac
'';
# vm-run: Run command in VM (socket-activated)
vmRunScript = pkgs.writeShellScript "vm-run" ''
if [ $# -lt 2 ]; then
echo "Usage: vm-run <vm-name> <command> [args...]" >&2
exit 1
fi
VM_NAME="$1"
shift
SOCKET="/run/vmsilo/$VM_NAME-command.socket"
if [ ! -S "$SOCKET" ]; then
echo "Unknown VM or socket not active: $VM_NAME" >&2
echo "Available VMs: ${lib.concatMapStringsSep ", " (vm: vm.name) vms}" >&2
exit 1
fi
# Send command via socket (triggers activation if needed)
echo "$@" | ${pkgs.socat}/bin/socat - UNIX-CONNECT:"$SOCKET"
'';
# vm-start-debug: Start VM directly (bypasses socket activation, requires root)
vmStartDebugScript = pkgs.writeShellScript "vm-start-debug" ''
if [ $# -ne 1 ]; then
echo "Usage: vm-start-debug <vm-name>" >&2
echo "Note: Requires root privileges (use sudo)" >&2
exit 1
fi
if [ "$(id -u)" -ne 0 ]; then
echo "Error: vm-start-debug requires root privileges" >&2
echo "Run: sudo vm-start-debug $1" >&2
exit 1
fi
VM_NAME="$1"
${mkVmCase (vm: "${vm.name}) exec ${mkVmScript vm} ;;")}
'';
# vm-start: Start VM via systemd (uses polkit for authorization)
vmStartScript = pkgs.writeShellScript "vm-start" ''
if [ $# -ne 1 ]; then
echo "Usage: vm-start <vm-name>" >&2
exit 1
fi
VM_NAME="$1"
${mkVmCase (vm: "${vm.name}) systemctl start vmsilo-${vm.name}-vm.service ;;")}
'';
# vm-stop: Stop VM via systemd (uses polkit for authorization)
vmStopScript = pkgs.writeShellScript "vm-stop" ''
if [ $# -ne 1 ]; then
echo "Usage: vm-stop <vm-name>" >&2
exit 1
fi
VM_NAME="$1"
${mkVmCase (vm: "${vm.name}) systemctl stop vmsilo-${vm.name}-vm.service ;;")}
'';
# vm-shell: Connect to VM (serial console by default, SSH with --ssh)
vmShellScript = pkgs.writeShellScript "vm-shell" ''
usage() {
echo "Usage: vm-shell [--ssh [--root]] <vm-name>" >&2
echo "" >&2
echo "Options:" >&2
echo " --ssh Use SSH over vsock (requires SSH keys configured)" >&2
echo " --root Connect as root (only with --ssh)" >&2
echo "" >&2
echo "Without --ssh, connects to serial console." >&2
echo "Escape character is CTRL+]" >&2
exit 1
}
USE_SSH=0
USE_ROOT=0
while [ $# -gt 0 ]; do
case "$1" in
--ssh)
USE_SSH=1
shift
;;
--root)
USE_ROOT=1
shift
;;
-*)
usage
;;
*)
break
;;
esac
done
if [ $# -ne 1 ]; then
usage
fi
VM_NAME="$1"
if [ $USE_ROOT -eq 1 ] && [ $USE_SSH -eq 0 ]; then
echo "Error: --root requires --ssh" >&2
exit 1
fi
if [ $USE_SSH -eq 1 ]; then
if [ $USE_ROOT -eq 1 ]; then
USER_NAME="root"
else
USER_NAME="user"
fi
${mkVmCase (vm: "${vm.name}) exec ${pkgs.openssh}/bin/ssh $USER_NAME@vsock/${toString vm.id} ;;")}
else
CONSOLE="/run/vmsilo/$VM_NAME-console"
if [ ! -e "$CONSOLE" ]; then
echo "Console not found: $CONSOLE" >&2
echo "Is the VM running? Use: vm-start $VM_NAME" >&2
exit 1
fi
echo "Escape character is CTRL+]"
exec ${pkgs.socat}/bin/socat -,raw,echo=0,escape=0x1d "$CONSOLE"
fi
'';
in
{
config = lib.mkIf cfg.enable {
# Set internal options for other modules to consume
programs.vmsilo._internal = {
vmScripts = lib.listToAttrs (map (vm: lib.nameValuePair vm.name (mkVmScript vm)) vms);
proxyScripts = lib.listToAttrs (map (vm: lib.nameValuePair vm.name (mkProxyScript vm)) vms);
userScripts = {
vm-run = vmRunScript;
vm-start = vmStartScript;
vm-start-debug = vmStartDebugScript;
vm-stop = vmStopScript;
vm-shell = vmShellScript;
};
};
};
}