Cloud-hypervisor's hybrid vsock (Unix socket + CONNECT protocol) doesn't support half-close. When recv_pkt() gets a 0-byte read from shutdown(SHUT_WR), it sends VSOCK_OP_SHUTDOWN with both RCV|SEND flags, tearing down the entire connection and killing the response path. Two fixes: - Remove s.shutdown(SHUT_WR) from the vsock proxy - Make guest command handler self-terminating: head -1 | bash. The pipe gives bash a clean EOF after one command line, so it no longer depends on vsock half-close to exit. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
490 lines
15 KiB
Nix
490 lines
15 KiB
Nix
# Script generation for vmsilo NixOS module
|
|
# VM launcher scripts and user-facing scripts (vm-run, vm-start, vm-stop, vm-shell)
|
|
{
|
|
config,
|
|
pkgs,
|
|
lib,
|
|
...
|
|
}:
|
|
|
|
let
|
|
cfg = config.programs.vmsilo;
|
|
helpers = import ./lib/helpers.nix { inherit lib; };
|
|
inherit (helpers)
|
|
formatPositionalKVArgs
|
|
assignVmIds
|
|
;
|
|
|
|
mkVmConfig = import ./lib/vm-config.nix {
|
|
inherit
|
|
lib
|
|
helpers
|
|
cfg
|
|
config
|
|
pkgs
|
|
;
|
|
};
|
|
|
|
vms = assignVmIds cfg.nixosVms;
|
|
|
|
# ── VM launcher scripts ──────────────────────────────────────────────────
|
|
|
|
# Default block device options applied to all disks (crosvm only)
|
|
defaultBlockOpts = {
|
|
packed-queue = true; # More efficient, better cache locality than split virtqueues
|
|
direct = true; # Bypass host page cache, guest handles its own page cache
|
|
};
|
|
|
|
# Format a disk configuration as --block argument (applies defaults)
|
|
formatBlockArg =
|
|
disk: "--block ${formatPositionalKVArgs [ "path" ] "," "=" (defaultBlockOpts // disk)}";
|
|
|
|
# Generate VM launcher script (crosvm)
|
|
mkCrosvmVmScript =
|
|
vm:
|
|
let
|
|
c = mkVmConfig vm;
|
|
|
|
additionalDisksArgs = lib.concatMapStringsSep " " formatBlockArg vm.additionalDisks;
|
|
ephemeralDiskArg = lib.optionalString (vm.rootOverlay.type == "raw") (
|
|
formatBlockArg c.crosvm.ephemeralDiskConfig
|
|
);
|
|
|
|
# virtiofsd vhost-user socket args
|
|
virtiofsDirArgs = lib.concatMapStringsSep " " (
|
|
tag: "--vhost-user type=fs,socket=/run/vmsilo/${vm.name}/virtiofs/${tag}.socket"
|
|
) (builtins.attrNames c.effectiveSharedDirs);
|
|
|
|
# Kernel params wrapped with -p for crosvm CLI
|
|
kernelParamsArgs = lib.concatMapStringsSep " \\\n " (p: ''-p "${p}"'') c.allKernelParams;
|
|
|
|
soundVhostUserArg = lib.optionalString c.soundEnabled "--vhost-user type=sound,socket=/run/vmsilo/${vm.name}/sound/sound.socket";
|
|
|
|
# Crosvm configuration (per-VM overrides global)
|
|
effectiveLogLevel = if vm.crosvm.logLevel != null then vm.crosvm.logLevel else cfg.crosvm.logLevel;
|
|
allExtraArgs = cfg.crosvm.extraArgs ++ vm.crosvm.extraArgs;
|
|
allExtraRunArgs = cfg.crosvm.extraRunArgs ++ vm.crosvm.extraRunArgs;
|
|
in
|
|
pkgs.writeShellScript "vmsilo-start-${vm.name}" ''
|
|
#!/bin/sh
|
|
set -e
|
|
|
|
${c.iommuValidationScript}
|
|
|
|
# Clean up stale socket
|
|
rm -f /run/vmsilo/${vm.name}/crosvm-control.socket
|
|
|
|
${c.socketWaitScript ""}
|
|
|
|
exec ${cfg._internal.crosvm}/bin/crosvm \
|
|
--log-level=${effectiveLogLevel} \
|
|
--no-syslog \
|
|
--no-timestamps \
|
|
${lib.escapeShellArgs allExtraArgs} \
|
|
run \
|
|
--name ${vm.name} \
|
|
-m ${toString vm.memory} \
|
|
--hugepages \
|
|
--balloon-page-reporting \
|
|
--boost-uclamp \
|
|
--disable-virtio-intx \
|
|
--no-i8042 \
|
|
--no-rtc \
|
|
--s2idle \
|
|
${
|
|
if cfg.schedulerIsolation == "full" then
|
|
"--core-scheduling=true"
|
|
else if cfg.schedulerIsolation == "vm" then
|
|
"--per-vm-core-scheduling"
|
|
else
|
|
"--core-scheduling=false"
|
|
} \
|
|
--initrd=${c.initramfsPath} \
|
|
--serial=hardware=virtio-console,type=unix-stream,path=/run/vmsilo/${vm.name}/console-backend.socket,console,input-unix-stream,stream-non-blocking \
|
|
${formatBlockArg c.crosvm.rootDiskConfig} \
|
|
${additionalDisksArgs} \
|
|
${ephemeralDiskArg} \
|
|
${kernelParamsArgs} \
|
|
${virtiofsDirArgs} \
|
|
--cid ${toString vm.id} \
|
|
--cpus ${toString vm.cpus} \
|
|
${
|
|
lib.optionalString (
|
|
c.gpu.config != null
|
|
) "--vhost-user type=gpu,socket=/run/vmsilo/${vm.name}/gpu/gpu.socket"
|
|
} \
|
|
${soundVhostUserArg} \
|
|
-s /run/vmsilo/${vm.name}/crosvm-control.socket \
|
|
${c.crosvm.vfioArgs} \
|
|
${c.crosvm.networkArgs} \
|
|
${c.crosvm.vhostUserArgs} \
|
|
${lib.escapeShellArgs allExtraRunArgs} \
|
|
${c.kernelPath}
|
|
'';
|
|
|
|
# Generate cloud-hypervisor VM launch script
|
|
mkCloudHypervisorVmScript =
|
|
vm:
|
|
let
|
|
c = mkVmConfig vm;
|
|
in
|
|
pkgs.writeShellScript "vmsilo-start-${vm.name}" ''
|
|
set -e
|
|
|
|
${c.iommuValidationScript}
|
|
|
|
# Remove stale sockets
|
|
rm -f /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket
|
|
rm -f /run/vmsilo/${vm.name}/vsock.socket
|
|
|
|
# Step 1: Start cloud-hypervisor VMM in background
|
|
${c.ch.bin} \
|
|
--api-socket /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket \
|
|
--seccomp ${c.ch.seccompArg} \
|
|
${lib.escapeShellArgs c.ch.effectiveExtraArgs} \
|
|
${lib.concatStringsSep " " c.ch.verbosityArgs} &
|
|
CH_PID=$!
|
|
|
|
# Wait for API socket to appear (up to 30s)
|
|
ELAPSED=0
|
|
while [ $ELAPSED -lt 60 ] && [ ! -S /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket ]; do
|
|
sleep 0.5
|
|
ELAPSED=$((ELAPSED + 1))
|
|
done
|
|
|
|
if [ ! -S /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket ]; then
|
|
echo "Timeout waiting for cloud-hypervisor API socket" >&2
|
|
kill $CH_PID 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
|
|
# Wait for API server to be ready (socket existing != API ready)
|
|
while [ $ELAPSED -lt 60 ]; do
|
|
if ${c.ch.remote} --api-socket /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket ping 2>/dev/null; then
|
|
break
|
|
fi
|
|
sleep 0.5
|
|
ELAPSED=$((ELAPSED + 1))
|
|
done
|
|
|
|
if ! ${c.ch.remote} --api-socket /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket ping 2>/dev/null; then
|
|
echo "Timeout waiting for cloud-hypervisor API to become ready" >&2
|
|
kill $CH_PID 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
|
|
${c.socketWaitScript "kill $CH_PID 2>/dev/null || true"}
|
|
|
|
# Step 2: Create VM configuration
|
|
${c.ch.remote} \
|
|
--api-socket /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket \
|
|
create -- ${c.ch.configFile}
|
|
|
|
# Discover serial PTY allocated by cloud-hypervisor and symlink to standard path
|
|
CONSOLE_PTY=$(${c.ch.remote} \
|
|
--api-socket /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket \
|
|
info | ${pkgs.jq}/bin/jq -r '.config.serial.file')
|
|
if [ -z "$CONSOLE_PTY" ] || [ "$CONSOLE_PTY" = "null" ]; then
|
|
echo "Failed to discover serial PTY from cloud-hypervisor API" >&2
|
|
kill $CH_PID 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
ln -sf "$CONSOLE_PTY" /run/vmsilo/${vm.name}/console
|
|
chown ${toString cfg._internal.userUid} /run/vmsilo/${vm.name}/console
|
|
|
|
# Step 3: Boot VM
|
|
${c.ch.remote} \
|
|
--api-socket /run/vmsilo/${vm.name}/cloud-hypervisor-control.socket \
|
|
boot
|
|
|
|
# vsock socket is created at boot time; wait for it then chown so user-level services can connect
|
|
ELAPSED=0
|
|
while [ ! -S /run/vmsilo/${vm.name}/vsock.socket ] && [ "$ELAPSED" -lt 60 ]; do
|
|
sleep 0.5
|
|
ELAPSED=$((ELAPSED + 1))
|
|
done
|
|
if [ ! -S /run/vmsilo/${vm.name}/vsock.socket ]; then
|
|
echo "Timeout waiting for vsock socket" >&2
|
|
kill $CH_PID 2>/dev/null || true
|
|
exit 1
|
|
fi
|
|
chown ${toString cfg._internal.userUid} /run/vmsilo/${vm.name}/vsock.socket
|
|
|
|
# Block until VMM exits (VM shutdown)
|
|
wait $CH_PID
|
|
'';
|
|
|
|
# Dispatcher: generate the appropriate VM script based on hypervisor choice
|
|
mkVmScript =
|
|
vm: if vm.hypervisor == "crosvm" then mkCrosvmVmScript vm else mkCloudHypervisorVmScript vm;
|
|
|
|
# ── Proxy and user-facing scripts ────────────────────────────────────────
|
|
|
|
# vsock proxy for cloud-hypervisor: connect via unix socket + CONNECT handshake
|
|
mkChVsockConnectScript =
|
|
vmName: port:
|
|
let
|
|
pyProxy = pkgs.writeText "vsock-proxy.py" ''
|
|
import socket, sys, threading
|
|
sock_path, port = sys.argv[1], int(sys.argv[2])
|
|
|
|
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
|
try:
|
|
s.connect(sock_path)
|
|
except OSError as e:
|
|
sys.stderr.write(f"vsock connect failed: {e}\n")
|
|
sys.exit(1)
|
|
|
|
# Send CONNECT handshake
|
|
s.sendall(f"CONNECT {port}\n".encode())
|
|
|
|
# Read OK/NACK response line
|
|
buf = b""
|
|
while b"\n" not in buf:
|
|
chunk = s.recv(4096)
|
|
if not chunk:
|
|
sys.stderr.write("vsock: closed before OK response\n")
|
|
sys.exit(1)
|
|
buf += chunk
|
|
|
|
line, _, rest = buf.partition(b"\n")
|
|
if not line.startswith(b"OK"):
|
|
sys.stderr.write(f"vsock: unexpected response: {line}\n")
|
|
sys.exit(1)
|
|
|
|
# Write any data that arrived alongside the OK line
|
|
if rest:
|
|
sys.stdout.buffer.write(rest)
|
|
sys.stdout.buffer.flush()
|
|
|
|
# Bidirectional proxy: stdin → socket, socket → stdout
|
|
# Note: no s.shutdown(SHUT_WR) — cloud-hypervisor hybrid vsock doesn't
|
|
# support half-close; it tears down the entire connection, killing the
|
|
# response path. The guest command handler exits on its own after one command.
|
|
def forward_in():
|
|
try:
|
|
while True:
|
|
data = sys.stdin.buffer.read(4096)
|
|
if not data:
|
|
break
|
|
s.sendall(data)
|
|
except Exception:
|
|
pass
|
|
|
|
threading.Thread(target=forward_in, daemon=True).start()
|
|
|
|
while True:
|
|
data = s.recv(4096)
|
|
if not data:
|
|
break
|
|
sys.stdout.buffer.write(data)
|
|
sys.stdout.buffer.flush()
|
|
'';
|
|
in
|
|
pkgs.writeShellScript "vsock-connect-${vmName}-${toString port}" ''
|
|
VSOCK_SOCKET="/run/vmsilo/${vmName}/vsock.socket"
|
|
PORT=${toString port}
|
|
TIMEOUT=30
|
|
ELAPSED=0
|
|
|
|
# Wait for vsock socket to appear
|
|
while [ $ELAPSED -lt $TIMEOUT ] && [ ! -S "$VSOCK_SOCKET" ]; do
|
|
sleep 0.5
|
|
ELAPSED=$((ELAPSED + 1))
|
|
done
|
|
[ -S "$VSOCK_SOCKET" ] || { echo "Timeout: vsock socket not found" >&2; exit 1; }
|
|
|
|
# Retry until vsock port is ready (guest command listener may not be up yet).
|
|
ELAPSED=0
|
|
while [ $ELAPSED -lt $TIMEOUT ]; do
|
|
${pkgs.python3}/bin/python3 ${pyProxy} "$VSOCK_SOCKET" "$PORT" && exit 0
|
|
sleep 0.5
|
|
ELAPSED=$((ELAPSED + 1))
|
|
done
|
|
|
|
echo "Timeout waiting for VM ${vmName} vsock:${toString port}" >&2
|
|
exit 1
|
|
'';
|
|
|
|
# Generate proxy script for a VM
|
|
mkProxyScript =
|
|
vm:
|
|
if vm.hypervisor == "crosvm" then
|
|
pkgs.writeShellScript "vmsilo-proxy-${vm.name}" ''
|
|
CID=${toString vm.id}
|
|
VSOCK_PORT=5000
|
|
TIMEOUT=30
|
|
|
|
# Wait for vsock to become available
|
|
ELAPSED=0
|
|
while [ $ELAPSED -lt $TIMEOUT ]; do
|
|
if ${pkgs.socat}/bin/socat -u OPEN:/dev/null VSOCK-CONNECT:$CID:$VSOCK_PORT 2>/dev/null; then
|
|
break
|
|
fi
|
|
sleep 0.5
|
|
ELAPSED=$((ELAPSED + 1))
|
|
done
|
|
|
|
if [ $ELAPSED -ge $TIMEOUT ]; then
|
|
echo "Timeout waiting for VM ${vm.name} to start" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Forward stdin/stdout to vsock
|
|
exec ${pkgs.socat}/bin/socat - VSOCK-CONNECT:$CID:$VSOCK_PORT
|
|
''
|
|
else
|
|
# Cloud-hypervisor: connect via unix socket + CONNECT protocol
|
|
mkChVsockConnectScript vm.name 5000;
|
|
|
|
# Generate shell case statement for VM dispatch
|
|
mkVmCase = makeCase: ''
|
|
case "$VM_NAME" in
|
|
${lib.concatMapStringsSep "\n " makeCase vms}
|
|
*)
|
|
echo "Unknown VM: $VM_NAME" >&2
|
|
echo "Available VMs: ${lib.concatMapStringsSep ", " (vm: vm.name) vms}" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
'';
|
|
|
|
# vm-run: Run command in VM (socket-activated)
|
|
vmRunScript = pkgs.writeShellScript "vm-run" ''
|
|
if [ $# -lt 2 ]; then
|
|
echo "Usage: vm-run <vm-name> <command> [args...]" >&2
|
|
exit 1
|
|
fi
|
|
|
|
VM_NAME="$1"
|
|
shift
|
|
|
|
SOCKET="/run/vmsilo/$VM_NAME/command.socket"
|
|
|
|
if [ ! -S "$SOCKET" ]; then
|
|
echo "Unknown VM or socket not active: $VM_NAME" >&2
|
|
echo "Available VMs: ${lib.concatMapStringsSep ", " (vm: vm.name) vms}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Send command via socket (triggers activation if needed)
|
|
# -t5: wait up to 5s for response after stdin EOF (default 0.5s is too short
|
|
# for cloud-hypervisor proxy startup: Python interpreter + CONNECT handshake)
|
|
echo "$@" | ${pkgs.socat}/bin/socat -t5 - UNIX-CONNECT:"$SOCKET"
|
|
'';
|
|
|
|
# vm-start: Start VM via systemd (uses polkit for authorization)
|
|
vmStartScript = pkgs.writeShellScript "vm-start" ''
|
|
if [ $# -ne 1 ]; then
|
|
echo "Usage: vm-start <vm-name>" >&2
|
|
exit 1
|
|
fi
|
|
|
|
VM_NAME="$1"
|
|
|
|
${mkVmCase (vm: "${vm.name}) systemctl start vmsilo-${vm.name}-vm.service ;;")}
|
|
'';
|
|
|
|
# vm-stop: Stop VM via systemd (uses polkit for authorization)
|
|
vmStopScript = pkgs.writeShellScript "vm-stop" ''
|
|
if [ $# -ne 1 ]; then
|
|
echo "Usage: vm-stop <vm-name>" >&2
|
|
exit 1
|
|
fi
|
|
|
|
VM_NAME="$1"
|
|
|
|
${mkVmCase (vm: "${vm.name}) systemctl stop vmsilo-${vm.name}-vm.service ;;")}
|
|
'';
|
|
|
|
# vm-shell: Connect to VM (serial console by default, SSH with --ssh)
|
|
vmShellScript = pkgs.writeShellScript "vm-shell" ''
|
|
usage() {
|
|
echo "Usage: vm-shell [--ssh [--root]] <vm-name>" >&2
|
|
echo "" >&2
|
|
echo "Options:" >&2
|
|
echo " --ssh Use SSH over vsock (requires SSH keys configured)" >&2
|
|
echo " --root Connect as root (only with --ssh)" >&2
|
|
echo "" >&2
|
|
echo "Without --ssh, connects to serial console via screen." >&2
|
|
echo "Detach with Ctrl+A, D" >&2
|
|
exit 1
|
|
}
|
|
|
|
USE_SSH=0
|
|
USE_ROOT=0
|
|
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--ssh)
|
|
USE_SSH=1
|
|
shift
|
|
;;
|
|
--root)
|
|
USE_ROOT=1
|
|
shift
|
|
;;
|
|
-*)
|
|
usage
|
|
;;
|
|
*)
|
|
break
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [ $# -ne 1 ]; then
|
|
usage
|
|
fi
|
|
|
|
VM_NAME="$1"
|
|
|
|
if [ $USE_ROOT -eq 1 ] && [ $USE_SSH -eq 0 ]; then
|
|
echo "Error: --root requires --ssh" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [ $USE_SSH -eq 1 ]; then
|
|
if [ $USE_ROOT -eq 1 ]; then
|
|
USER_NAME="root"
|
|
else
|
|
USER_NAME="user"
|
|
fi
|
|
${mkVmCase (
|
|
vm:
|
|
if vm.hypervisor == "crosvm" then
|
|
"${vm.name}) exec ${pkgs.openssh}/bin/ssh \$USER_NAME@vsock/${toString vm.id} ;;"
|
|
else
|
|
let
|
|
proxyCmd = mkChVsockConnectScript vm.name 22;
|
|
in
|
|
"${vm.name}) exec ${pkgs.openssh}/bin/ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ProxyCommand=${lib.escapeShellArg "${proxyCmd}"} \$USER_NAME@localhost ;;"
|
|
)}
|
|
else
|
|
if ! ${pkgs.screen}/bin/screen -ls vmsilo-$VM_NAME >/dev/null 2>&1; then
|
|
echo "No console session found for $VM_NAME" >&2
|
|
echo "Is the VM running? Use: vm-start $VM_NAME" >&2
|
|
exit 1
|
|
fi
|
|
echo "Detach with Ctrl+A, D"
|
|
exec ${pkgs.screen}/bin/screen -x vmsilo-$VM_NAME
|
|
fi
|
|
'';
|
|
in
|
|
{
|
|
config = lib.mkIf cfg.enable {
|
|
# Set internal options for other modules to consume
|
|
programs.vmsilo._internal = {
|
|
vmScripts = lib.listToAttrs (map (vm: lib.nameValuePair vm.name (mkVmScript vm)) vms);
|
|
|
|
proxyScripts = lib.listToAttrs (map (vm: lib.nameValuePair vm.name (mkProxyScript vm)) vms);
|
|
|
|
userScripts = {
|
|
vm-run = vmRunScript;
|
|
vm-start = vmStartScript;
|
|
vm-stop = vmStopScript;
|
|
vm-shell = vmShellScript;
|
|
};
|
|
};
|
|
};
|
|
}
|