diff --git a/Cargo.lock b/Cargo.lock index 2f75fb049..25dd33adf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -470,6 +470,7 @@ name = "hypervisor" version = "0.1.0" dependencies = [ "anyhow", + "arc-swap", "kvm-bindings", "kvm-ioctls", "libc", diff --git a/hypervisor/Cargo.toml b/hypervisor/Cargo.toml index a824bb0a1..bcd266c1d 100644 --- a/hypervisor/Cargo.toml +++ b/hypervisor/Cargo.toml @@ -9,6 +9,7 @@ kvm = [] [dependencies] anyhow = "1.0" +arc-swap = ">=0.4.4" thiserror = "1.0" libc = "0.2.78" log = "0.4.11" diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 2dd04eb0c..804fa8ad4 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -16,9 +16,10 @@ pub use crate::aarch64::{ use crate::cpu; use crate::device; use crate::hypervisor; -use crate::vm; +use crate::vm::{self, VmmOps}; #[cfg(target_arch = "aarch64")] use crate::{arm64_core_reg_id, offset__of}; +use arc_swap::ArcSwapOption; use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; use serde_derive::{Deserialize, Serialize}; use std::os::unix::io::{AsRawFd, RawFd}; @@ -91,6 +92,7 @@ pub struct KvmVm { #[cfg(target_arch = "x86_64")] msrs: MsrEntries, state: KvmVmState, + vmmops: ArcSwapOption>, } // Returns a `Vec` with a size in bytes at least as large as `size_in_bytes`. @@ -179,6 +181,7 @@ impl vm::Vm for KvmVm { fd: vc, #[cfg(target_arch = "x86_64")] msrs: self.msrs.clone(), + vmmops: self.vmmops.clone(), }; Ok(Arc::new(vcpu)) } @@ -345,6 +348,14 @@ impl vm::Vm for KvmVm { fn set_state(&self, _state: VmState) -> vm::Result<()> { Ok(()) } + + /// + /// Set the VmmOps interface + /// + fn set_vmmops(&self, vmmops: Box) -> vm::Result<()> { + self.vmmops.store(Some(Arc::new(vmmops))); + Ok(()) + } } /// Wrapper over KVM system ioctls. pub struct KvmHypervisor { @@ -422,6 +433,7 @@ impl hypervisor::Hypervisor for KvmHypervisor { fd: vm_fd, msrs, state: VmState {}, + vmmops: ArcSwapOption::from(None), })) } @@ -430,6 +442,7 @@ impl hypervisor::Hypervisor for KvmHypervisor { Ok(Arc::new(KvmVm { fd: vm_fd, state: VmState {}, + vmmops: ArcSwapOption::from(None), })) } } @@ -490,6 +503,7 @@ pub struct KvmVcpu { fd: VcpuFd, #[cfg(target_arch = "x86_64")] msrs: MsrEntries, + vmmops: ArcSwapOption>, } /// Implementation of Vcpu trait for KVM /// Example: @@ -681,9 +695,27 @@ impl cpu::Vcpu for KvmVcpu { match self.fd.run() { Ok(run) => match run { #[cfg(target_arch = "x86_64")] - VcpuExit::IoIn(addr, data) => Ok(cpu::VmExit::IoIn(addr, data)), + VcpuExit::IoIn(addr, data) => { + if let Some(vmmops) = self.vmmops.load_full() { + return vmmops + .pio_read(addr.into(), data) + .map(|_| cpu::VmExit::Ignore) + .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); + } + + Ok(cpu::VmExit::IoIn(addr, data)) + } #[cfg(target_arch = "x86_64")] - VcpuExit::IoOut(addr, data) => Ok(cpu::VmExit::IoOut(addr, data)), + VcpuExit::IoOut(addr, data) => { + if let Some(vmmops) = self.vmmops.load_full() { + return vmmops + .pio_write(addr.into(), data) + .map(|_| cpu::VmExit::Ignore) + .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); + } + + Ok(cpu::VmExit::IoOut(addr, data)) + } #[cfg(target_arch = "x86_64")] VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), #[cfg(target_arch = "x86_64")] @@ -705,8 +737,26 @@ impl cpu::Vcpu for KvmVcpu { } } - VcpuExit::MmioRead(addr, data) => Ok(cpu::VmExit::MmioRead(addr, data)), - VcpuExit::MmioWrite(addr, data) => Ok(cpu::VmExit::MmioWrite(addr, data)), + VcpuExit::MmioRead(addr, data) => { + if let Some(vmmops) = self.vmmops.load_full() { + return vmmops + .mmio_read(addr, data) + .map(|_| cpu::VmExit::Ignore) + .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); + } + + Ok(cpu::VmExit::MmioRead(addr, data)) + } + VcpuExit::MmioWrite(addr, data) => { + if let Some(vmmops) = self.vmmops.load_full() { + return vmmops + .mmio_write(addr, data) + .map(|_| cpu::VmExit::Ignore) + .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); + } + + Ok(cpu::VmExit::MmioWrite(addr, data)) + } VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( diff --git a/hypervisor/src/vm.rs b/hypervisor/src/vm.rs index 6d5d5de16..467a23a05 100644 --- a/hypervisor/src/vm.rs +++ b/hypervisor/src/vm.rs @@ -117,6 +117,36 @@ pub enum HypervisorVmError { /// #[error("Failed to create passthrough device: {0}")] CreatePassthroughDevice(#[source] anyhow::Error), + /// + /// Write to Guest memory + /// + #[error("Failed to write to guest memory: {0}")] + GuestMemWrite(#[source] anyhow::Error), + /// + /// Read Guest memory + /// + #[error("Failed to read guest memory: {0}")] + GuestMemRead(#[source] anyhow::Error), + /// + /// Read from MMIO Bus + /// + #[error("Failed to read from MMIO Bus: {0}")] + MmioBusRead(#[source] anyhow::Error), + /// + /// Write to MMIO Bus + /// + #[error("Failed to write to MMIO Bus: {0}")] + MmioBusWrite(#[source] anyhow::Error), + /// + /// Read from IO Bus + /// + #[error("Failed to read from IO Bus: {0}")] + IoBusRead(#[source] anyhow::Error), + /// + /// Write to IO Bus + /// + #[error("Failed to write to IO Bus: {0}")] + IoBusWrite(#[source] anyhow::Error), } /// /// Result type for returning from a function @@ -184,4 +214,17 @@ pub trait Vm: Send + Sync { fn state(&self) -> Result; /// Set the VM state fn set_state(&self, state: VmState) -> Result<()>; + /// Set VmmOps interface + fn set_vmmops(&self, vmmops: Box) -> Result<()>; +} + +pub trait VmmOps: Send + Sync { + fn guest_mem_write(&self, buf: &[u8], gpa: u64) -> Result; + fn guest_mem_read(&self, buf: &mut [u8], gpa: u64) -> Result; + fn mmio_read(&self, addr: u64, data: &mut [u8]) -> Result<()>; + fn mmio_write(&self, addr: u64, data: &[u8]) -> Result<()>; + #[cfg(target_arch = "x86_64")] + fn pio_read(&self, addr: u64, data: &mut [u8]) -> Result<()>; + #[cfg(target_arch = "x86_64")] + fn pio_write(&self, addr: u64, data: &[u8]) -> Result<()>; } diff --git a/vmm/src/cpu.rs b/vmm/src/cpu.rs index 8c890d3d2..c2688660a 100644 --- a/vmm/src/cpu.rs +++ b/vmm/src/cpu.rs @@ -37,13 +37,11 @@ use hypervisor::{CpuState, VmExit}; use seccomp::{SeccompAction, SeccompFilter}; use libc::{c_void, siginfo_t}; -#[cfg(target_arch = "x86_64")] -use std::fmt; use std::os::unix::thread::JoinHandleExt; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Barrier, Mutex}; use std::{cmp, io, result, thread}; -use vm_device::{Bus, BusDevice}; +use vm_device::BusDevice; #[cfg(target_arch = "x86_64")] use vm_memory::GuestAddress; use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap}; @@ -62,51 +60,6 @@ const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit. #[cfg(target_arch = "x86_64")] const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit. -// Debug I/O port -#[cfg(target_arch = "x86_64")] -const DEBUG_IOPORT: u16 = 0x80; -#[cfg(target_arch = "x86_64")] -const DEBUG_IOPORT_PREFIX: &str = "Debug I/O port"; - -#[cfg(target_arch = "x86_64")] -/// Debug I/O port, see: -/// https://www.intel.com/content/www/us/en/support/articles/000005500/boards-and-kits.html -/// -/// Since we're not a physical platform, we can freely assign code ranges for -/// debugging specific parts of our virtual platform. -pub enum DebugIoPortRange { - Firmware, - Bootloader, - Kernel, - Userspace, - Custom, -} -#[cfg(target_arch = "x86_64")] -impl DebugIoPortRange { - fn from_u8(value: u8) -> DebugIoPortRange { - match value { - 0x00..=0x1f => DebugIoPortRange::Firmware, - 0x20..=0x3f => DebugIoPortRange::Bootloader, - 0x40..=0x5f => DebugIoPortRange::Kernel, - 0x60..=0x7f => DebugIoPortRange::Userspace, - _ => DebugIoPortRange::Custom, - } - } -} - -#[cfg(target_arch = "x86_64")] -impl fmt::Display for DebugIoPortRange { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - DebugIoPortRange::Firmware => write!(f, "{}: Firmware", DEBUG_IOPORT_PREFIX), - DebugIoPortRange::Bootloader => write!(f, "{}: Bootloader", DEBUG_IOPORT_PREFIX), - DebugIoPortRange::Kernel => write!(f, "{}: Kernel", DEBUG_IOPORT_PREFIX), - DebugIoPortRange::Userspace => write!(f, "{}: Userspace", DEBUG_IOPORT_PREFIX), - DebugIoPortRange::Custom => write!(f, "{}: Custom", DEBUG_IOPORT_PREFIX), - } - } -} - #[derive(Debug)] pub enum Error { /// Cannot create the vCPU. @@ -216,6 +169,9 @@ pub enum Error { /// Error starting vCPU after restore StartRestoreVcpu(anyhow::Error), + + /// Error because an unexpected VmExit type was received. + UnexpectedVmExit, } pub type Result = result::Result; @@ -256,13 +212,8 @@ pub struct Vcpu { // The hypervisor abstracted CPU. vcpu: Arc, id: u8, - #[cfg(target_arch = "x86_64")] - io_bus: Arc, - mmio_bus: Arc, #[cfg_attr(target_arch = "aarch64", allow(dead_code))] interrupt_controller: Option>>, - #[cfg_attr(target_arch = "aarch64", allow(dead_code))] - vm_ts: std::time::Instant, #[cfg(target_arch = "aarch64")] mpidr: u64, saved_state: Option, @@ -278,10 +229,7 @@ impl Vcpu { pub fn new( id: u8, vm: &Arc, - #[cfg(target_arch = "x86_64")] io_bus: Arc, - mmio_bus: Arc, interrupt_controller: Option>>, - creation_ts: std::time::Instant, ) -> Result>> { let vcpu = vm .create_vcpu(id) @@ -290,11 +238,7 @@ impl Vcpu { Ok(Arc::new(Mutex::new(Vcpu { vcpu, id, - #[cfg(target_arch = "x86_64")] - io_bus, - mmio_bus, interrupt_controller, - vm_ts: creation_ts, #[cfg(target_arch = "aarch64")] mpidr: 0, saved_state: None, @@ -373,43 +317,6 @@ impl Vcpu { pub fn run(&self) -> Result { match self.vcpu.run() { Ok(run) => match run { - #[cfg(target_arch = "x86_64")] - VmExit::IoIn(addr, data) => { - if let Err(e) = self.io_bus.read(u64::from(addr), data) { - if let vm_device::BusError::MissingAddressRange = e { - warn!("Guest PIO read to unregistered address 0x{:x}", addr); - } - } - Ok(true) - } - #[cfg(target_arch = "x86_64")] - VmExit::IoOut(addr, data) => { - if addr == DEBUG_IOPORT && data.len() == 1 { - self.log_debug_ioport(data[0]); - } - if let Err(e) = self.io_bus.write(u64::from(addr), data) { - if let vm_device::BusError::MissingAddressRange = e { - warn!("Guest PIO write to unregistered address 0x{:x}", addr); - } - } - Ok(true) - } - VmExit::MmioRead(addr, data) => { - if let Err(e) = self.mmio_bus.read(addr as u64, data) { - if let vm_device::BusError::MissingAddressRange = e { - warn!("Guest MMIO read to unregistered address 0x{:x}", addr); - } - } - Ok(true) - } - VmExit::MmioWrite(addr, data) => { - if let Err(e) = self.mmio_bus.write(addr as u64, data) { - if let vm_device::BusError::MissingAddressRange = e { - warn!("Guest MMIO write to unregistered address 0x{:x}", addr); - } - } - Ok(true) - } #[cfg(target_arch = "x86_64")] VmExit::IoapicEoi(vector) => { if let Some(interrupt_controller) = &self.interrupt_controller { @@ -420,30 +327,16 @@ impl Vcpu { } Ok(true) } - VmExit::Ignore => Ok(true), VmExit::Reset => Ok(false), // No need to handle anything from a KVM HyperV exit VmExit::Hyperv => Ok(true), + _ => Err(Error::UnexpectedVmExit), }, Err(e) => Err(Error::VcpuRun(e.into())), } } - - #[cfg(target_arch = "x86_64")] - // Log debug io port codes. - fn log_debug_ioport(&self, code: u8) { - let ts = self.vm_ts.elapsed(); - - debug!( - "[{} code 0x{:x}] {}.{:>06} seconds", - DebugIoPortRange::from_u8(code), - code, - ts.as_secs(), - ts.as_micros() - ); - } } const VCPU_SNAPSHOT_ID: &str = "vcpu"; @@ -513,10 +406,6 @@ impl Snapshottable for Vcpu { pub struct CpuManager { config: CpusConfig, - #[cfg(target_arch = "x86_64")] - io_bus: Arc, - #[cfg_attr(target_arch = "aarch64", allow(dead_code))] - mmio_bus: Arc, #[cfg_attr(target_arch = "aarch64", allow(dead_code))] interrupt_controller: Option>>, #[cfg_attr(target_arch = "aarch64", allow(dead_code))] @@ -678,9 +567,6 @@ impl CpuManager { let device_manager = device_manager.lock().unwrap(); let cpu_manager = Arc::new(Mutex::new(CpuManager { config: config.clone(), - #[cfg(target_arch = "x86_64")] - io_bus: device_manager.io_bus().clone(), - mmio_bus: device_manager.mmio_bus().clone(), interrupt_controller: device_manager.interrupt_controller().clone(), vm_memory: guest_memory, #[cfg(target_arch = "x86_64")] @@ -704,10 +590,8 @@ impl CpuManager { .ok_or(Error::AllocateIOPort)?; #[cfg(target_arch = "x86_64")] - cpu_manager - .lock() - .unwrap() - .io_bus + device_manager + .io_bus() .insert(cpu_manager.clone(), 0x0cd8, 0xc) .map_err(Error::BusError)?; @@ -792,17 +676,7 @@ impl CpuManager { None }; - let creation_ts = std::time::Instant::now(); - - let vcpu = Vcpu::new( - cpu_id, - &self.vm, - #[cfg(target_arch = "x86_64")] - self.io_bus.clone(), - self.mmio_bus.clone(), - interrupt_controller, - creation_ts, - )?; + let vcpu = Vcpu::new(cpu_id, &self.vm, interrupt_controller)?; if let Some(snapshot) = snapshot { #[cfg(target_arch = "x86_64")] diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 1a6464d88..93a904d7b 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -42,6 +42,7 @@ use anyhow::anyhow; use arch::BootProtocol; use arch::EntryPoint; use devices::HotPlugNotificationFlags; +use hypervisor::vm::{HypervisorVmError, VmmOps}; use linux_loader::cmdline::Cmdline; #[cfg(target_arch = "x86_64")] use linux_loader::loader::elf::Error::InvalidElfMagicNumber; @@ -53,6 +54,8 @@ use signal_hook::{iterator::Signals, SIGINT, SIGTERM, SIGWINCH}; use std::collections::{BTreeMap, HashMap}; use std::convert::TryInto; use std::ffi::CString; +#[cfg(target_arch = "x86_64")] +use std::fmt; use std::fs::{File, OpenOptions}; use std::io::{self, Write}; use std::io::{Seek, SeekFrom}; @@ -62,8 +65,10 @@ use std::path::PathBuf; use std::sync::{Arc, Mutex, RwLock}; use std::{result, str, thread}; use url::Url; +use vm_device::Bus; use vm_memory::{ - Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap, GuestRegionMmap, + Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, GuestMemoryMmap, + GuestRegionMmap, }; use vm_migration::{ Migratable, MigratableError, Pausable, Snapshot, SnapshotDataSection, Snapshottable, @@ -217,6 +222,9 @@ pub enum Error { /// Failed resizing a memory zone. ResizeZone, + + /// Failed setting the VmmOps interface. + SetVmmOpsInterface(hypervisor::HypervisorVmError), } pub type Result = result::Result; @@ -295,6 +303,134 @@ impl VmState { } } +// Debug I/O port +#[cfg(target_arch = "x86_64")] +const DEBUG_IOPORT: u16 = 0x80; +#[cfg(target_arch = "x86_64")] +const DEBUG_IOPORT_PREFIX: &str = "Debug I/O port"; + +#[cfg(target_arch = "x86_64")] +/// Debug I/O port, see: +/// https://www.intel.com/content/www/us/en/support/articles/000005500/boards-and-kits.html +/// +/// Since we're not a physical platform, we can freely assign code ranges for +/// debugging specific parts of our virtual platform. +pub enum DebugIoPortRange { + Firmware, + Bootloader, + Kernel, + Userspace, + Custom, +} +#[cfg(target_arch = "x86_64")] +impl DebugIoPortRange { + fn from_u8(value: u8) -> DebugIoPortRange { + match value { + 0x00..=0x1f => DebugIoPortRange::Firmware, + 0x20..=0x3f => DebugIoPortRange::Bootloader, + 0x40..=0x5f => DebugIoPortRange::Kernel, + 0x60..=0x7f => DebugIoPortRange::Userspace, + _ => DebugIoPortRange::Custom, + } + } +} + +#[cfg(target_arch = "x86_64")] +impl fmt::Display for DebugIoPortRange { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DebugIoPortRange::Firmware => write!(f, "{}: Firmware", DEBUG_IOPORT_PREFIX), + DebugIoPortRange::Bootloader => write!(f, "{}: Bootloader", DEBUG_IOPORT_PREFIX), + DebugIoPortRange::Kernel => write!(f, "{}: Kernel", DEBUG_IOPORT_PREFIX), + DebugIoPortRange::Userspace => write!(f, "{}: Userspace", DEBUG_IOPORT_PREFIX), + DebugIoPortRange::Custom => write!(f, "{}: Custom", DEBUG_IOPORT_PREFIX), + } + } +} + +struct VmOps { + memory: GuestMemoryAtomic, + #[cfg(target_arch = "x86_64")] + io_bus: Arc, + mmio_bus: Arc, + #[cfg(target_arch = "x86_64")] + timestamp: std::time::Instant, +} + +impl VmOps { + #[cfg(target_arch = "x86_64")] + // Log debug io port codes. + fn log_debug_ioport(&self, code: u8) { + let elapsed = self.timestamp.elapsed(); + + debug!( + "[{} code 0x{:x}] {}.{:>06} seconds", + DebugIoPortRange::from_u8(code), + code, + elapsed.as_secs(), + elapsed.as_micros() + ); + } +} + +impl VmmOps for VmOps { + fn guest_mem_write(&self, buf: &[u8], gpa: u64) -> hypervisor::vm::Result { + self.memory + .memory() + .write(buf, GuestAddress(gpa)) + .map_err(|e| HypervisorVmError::GuestMemWrite(e.into())) + } + + fn guest_mem_read(&self, buf: &mut [u8], gpa: u64) -> hypervisor::vm::Result { + self.memory + .memory() + .read(buf, GuestAddress(gpa)) + .map_err(|e| HypervisorVmError::GuestMemRead(e.into())) + } + + fn mmio_read(&self, addr: u64, data: &mut [u8]) -> hypervisor::vm::Result<()> { + if let Err(e) = self.mmio_bus.read(addr, data) { + if let vm_device::BusError::MissingAddressRange = e { + warn!("Guest MMIO read to unregistered address 0x{:x}", addr); + } + } + Ok(()) + } + + fn mmio_write(&self, addr: u64, data: &[u8]) -> hypervisor::vm::Result<()> { + if let Err(e) = self.mmio_bus.write(addr, data) { + if let vm_device::BusError::MissingAddressRange = e { + warn!("Guest MMIO write to unregistered address 0x{:x}", addr); + } + } + Ok(()) + } + + #[cfg(target_arch = "x86_64")] + fn pio_read(&self, addr: u64, data: &mut [u8]) -> hypervisor::vm::Result<()> { + if let Err(e) = self.io_bus.read(addr, data) { + if let vm_device::BusError::MissingAddressRange = e { + warn!("Guest PIO read to unregistered address 0x{:x}", addr); + } + } + Ok(()) + } + + #[cfg(target_arch = "x86_64")] + fn pio_write(&self, addr: u64, data: &[u8]) -> hypervisor::vm::Result<()> { + if addr == DEBUG_IOPORT as u64 && data.len() == 1 { + self.log_debug_ioport(data[0]); + } + + if let Err(e) = self.io_bus.write(addr, data) { + if let vm_device::BusError::MissingAddressRange = e { + warn!("Guest PIO write to unregistered address 0x{:x}", addr); + } + } + Ok(()) + } +} + pub struct Vm { kernel: File, initramfs: Option, @@ -354,6 +490,22 @@ impl Vm { ) .map_err(Error::DeviceManager)?; + let memory = memory_manager.lock().unwrap().guest_memory(); + #[cfg(target_arch = "x86_64")] + let io_bus = Arc::clone(device_manager.lock().unwrap().io_bus()); + let mmio_bus = Arc::clone(device_manager.lock().unwrap().mmio_bus()); + // Create the VmOps structure, which implements the VmmOps trait. + // And send it to the hypervisor. + let vm_ops = Box::new(VmOps { + memory, + #[cfg(target_arch = "x86_64")] + io_bus, + mmio_bus, + #[cfg(target_arch = "x86_64")] + timestamp: std::time::Instant::now(), + }); + vm.set_vmmops(vm_ops).map_err(Error::SetVmmOpsInterface)?; + let cpu_manager = cpu::CpuManager::new( &config.lock().unwrap().cpus.clone(), &device_manager, @@ -516,7 +668,6 @@ impl Vm { .unwrap() .create_devices() .map_err(Error::DeviceManager)?; - Ok(new_vm) }