From fdc19ad85e9aac085610b66c2645b8a7050bd2c9 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Fri, 13 Jun 2025 14:27:00 -0400 Subject: [PATCH] misc: Mark memory region APIs as unsafe To ensure that struct sizes are the same on 32-bit and 64-bit, various kernel APIs use __u64 (Rust u64) to represent userspace pointers. Userspace is expected to cast pointers to __u64 before passing them to the kernel, and cast kernel-provided __u64 to a pointer before using them. However, various safe APIs in Cloud Hypervisor took caller-provided u64 values and passed them to syscalls that treat them as userspace addresses. Therefore, passing bad u64 values would cause memory disclosure or corruption. The memory region APIs are one example of this, so mark them as unsafe. Signed-off-by: Demi Marie Obenour --- hypervisor/src/kvm/mod.rs | 132 ++++++++++++++----------------- hypervisor/src/lib.rs | 21 ----- hypervisor/src/mshv/mod.rs | 131 ++++++++++++++----------------- hypervisor/src/vm.rs | 29 +++++-- pci/src/vfio.rs | 98 +++++++++++------------ pci/src/vfio_user.rs | 101 ++++++++++++------------ vmm/src/api/dbus/mod.rs | 4 +- vmm/src/config.rs | 2 +- vmm/src/device_manager.rs | 156 +++++++++++++++++++++---------------- vmm/src/lib.rs | 4 +- vmm/src/memory_manager.rs | 129 +++++++++++++++++------------- vmm/src/vm.rs | 89 ++++++++++++++++++--- 12 files changed, 489 insertions(+), 407 deletions(-) diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 6bbd4839e..53de9bad3 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -69,11 +69,7 @@ use crate::ClockData; use crate::arch::x86::{ CpuIdEntry, FpuState, LapicState, MsrEntry, NUM_IOAPIC_PINS, SpecialRegisters, XsaveState, }; -use crate::{ - CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, - USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, - UserMemoryRegion, -}; +use crate::{CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters}; // aarch64 dependencies #[cfg(target_arch = "aarch64")] pub mod aarch64; @@ -234,51 +230,6 @@ pub struct KvmTdxExitVmcall { pub out_rdx: u64, } -impl From for UserMemoryRegion { - fn from(region: kvm_userspace_memory_region) -> Self { - let mut flags = USER_MEMORY_REGION_READ; - if region.flags & KVM_MEM_READONLY == 0 { - flags |= USER_MEMORY_REGION_WRITE; - } - if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { - flags |= USER_MEMORY_REGION_LOG_DIRTY; - } - - UserMemoryRegion { - slot: region.slot, - guest_phys_addr: region.guest_phys_addr, - memory_size: region.memory_size, - userspace_addr: region.userspace_addr, - flags, - } - } -} - -impl From for kvm_userspace_memory_region { - fn from(region: UserMemoryRegion) -> Self { - assert!( - region.flags & USER_MEMORY_REGION_READ != 0, - "KVM mapped memory is always readable" - ); - - let mut flags = 0; - if region.flags & USER_MEMORY_REGION_WRITE == 0 { - flags |= KVM_MEM_READONLY; - } - if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { - flags |= KVM_MEM_LOG_DIRTY_PAGES; - } - - kvm_userspace_memory_region { - slot: region.slot, - guest_phys_addr: region.guest_phys_addr, - memory_size: region.memory_size, - userspace_addr: region.userspace_addr, - flags, - } - } -} - impl From for MpState { fn from(s: kvm_mp_state) -> Self { MpState::Kvm(s) @@ -758,10 +709,17 @@ impl vm::Vm for KvmVm { .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) } + /// Creates a guest physical memory region. /// - /// Creates a memory region structure that can be used with {create/remove}_user_memory_region + /// # Safety /// - fn make_user_memory_region( + /// `userspace_addr` must point to `memory_size` bytes of memory + /// that will stay mapped until a successful call to + /// `remove_user_memory_region().` Freeing them with `munmap()` + /// before then will cause undefined guest behavior but at least + /// should not cause undefined behavior in the host. In theory, + /// at least. + unsafe fn create_user_memory_region( &self, slot: u32, guest_phys_addr: u64, @@ -769,27 +727,24 @@ impl vm::Vm for KvmVm { userspace_addr: u64, readonly: bool, log_dirty_pages: bool, - ) -> UserMemoryRegion { - kvm_userspace_memory_region { + ) -> vm::Result<()> { + let mut flags = 0; + if readonly { + flags |= KVM_MEM_READONLY; + } + if log_dirty_pages { + flags |= KVM_MEM_LOG_DIRTY_PAGES; + } + + const _: () = assert!(core::mem::size_of::() <= core::mem::size_of::()); + + let mut region = kvm_userspace_memory_region { slot, guest_phys_addr, memory_size, userspace_addr, - flags: if readonly { KVM_MEM_READONLY } else { 0 } - | if log_dirty_pages { - KVM_MEM_LOG_DIRTY_PAGES - } else { - 0 - }, - } - .into() - } - - /// - /// Creates a guest physical memory region. - /// - fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { - let mut region: kvm_userspace_memory_region = user_memory_region.into(); + flags, + }; if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { if (region.flags & KVM_MEM_READONLY) != 0 { @@ -814,7 +769,7 @@ impl vm::Vm for KvmVm { region.flags = 0; } - // SAFETY: Safe because guest regions are guaranteed not to overlap. + // SAFETY: Safe because caller promised this is safe. unsafe { self.fd .set_user_memory_region(region) @@ -822,18 +777,45 @@ impl vm::Vm for KvmVm { } } - /// /// Removes a guest physical memory region. /// - fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { - let mut region: kvm_userspace_memory_region = user_memory_region.into(); + /// # Safety + /// + /// `userspace_addr` must point to `memory_size` bytes of memory, + /// and `add_user_memory_region()` must have been successfully called. + unsafe fn remove_user_memory_region( + &self, + slot: u32, + guest_phys_addr: u64, + memory_size: u64, + userspace_addr: u64, + readonly: bool, + log_dirty_pages: bool, + ) -> vm::Result<()> { + let mut flags = 0; + if readonly { + flags |= KVM_MEM_READONLY; + } + if log_dirty_pages { + flags |= KVM_MEM_LOG_DIRTY_PAGES; + } + + const _: () = assert!(core::mem::size_of::() <= core::mem::size_of::()); + + let mut region = kvm_userspace_memory_region { + slot, + guest_phys_addr, + memory_size, + userspace_addr, + flags, + }; // Remove the corresponding entry from "self.dirty_log_slots" if needed self.dirty_log_slots.write().unwrap().remove(®ion.slot); // Setting the size to 0 means "remove" region.memory_size = 0; - // SAFETY: Safe because guest regions are guaranteed not to overlap. + // SAFETY: Safe because caller promised this is safe. unsafe { self.fd .set_user_memory_region(region) @@ -1211,7 +1193,7 @@ impl hypervisor::Hypervisor for KvmHypervisor { vm_type = KVM_X86_SW_PROTECTED_VM.into(); } else { vm_type = KVM_X86_DEFAULT_VM.into(); - }; + } loop { match self.kvm.create_vm_with_type(vm_type) { diff --git a/hypervisor/src/lib.rs b/hypervisor/src/lib.rs index d9448c8c6..607166968 100644 --- a/hypervisor/src/lib.rs +++ b/hypervisor/src/lib.rs @@ -123,27 +123,6 @@ pub fn vec_with_array_field(count: usize) -> Vec { vec_with_size_in_bytes(vec_size_bytes) } -/// -/// User memory region structure -/// -#[derive(Debug, Default, Eq, PartialEq)] -pub struct UserMemoryRegion { - pub slot: u32, - pub guest_phys_addr: u64, - pub memory_size: u64, - pub userspace_addr: u64, - pub flags: u32, -} - -/// -/// Flags for user memory region -/// -pub const USER_MEMORY_REGION_READ: u32 = 1; -pub const USER_MEMORY_REGION_WRITE: u32 = 1 << 1; -pub const USER_MEMORY_REGION_EXECUTE: u32 = 1 << 2; -pub const USER_MEMORY_REGION_LOG_DIRTY: u32 = 1 << 3; -pub const USER_MEMORY_REGION_ADJUSTABLE: u32 = 1 << 4; - #[derive(Debug)] pub enum MpState { #[cfg(feature = "kvm")] diff --git a/hypervisor/src/mshv/mod.rs b/hypervisor/src/mshv/mod.rs index 92ef648b6..1fe60d368 100644 --- a/hypervisor/src/mshv/mod.rs +++ b/hypervisor/src/mshv/mod.rs @@ -74,35 +74,10 @@ use crate::arch::aarch64::gic::{Vgic, VgicConfig}; use crate::arch::aarch64::regs; #[cfg(target_arch = "x86_64")] use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry}; -use crate::{ - CpuState, IoEventAddress, IrqRoutingEntry, MpState, USER_MEMORY_REGION_ADJUSTABLE, - USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, - UserMemoryRegion, -}; +use crate::{CpuState, IoEventAddress, IrqRoutingEntry, MpState}; pub const PAGE_SHIFT: usize = 12; -impl From for UserMemoryRegion { - fn from(region: mshv_user_mem_region) -> Self { - let mut flags: u32 = USER_MEMORY_REGION_READ | USER_MEMORY_REGION_ADJUSTABLE; - if region.flags & (1 << MSHV_SET_MEM_BIT_WRITABLE) != 0 { - flags |= USER_MEMORY_REGION_WRITE; - } - if region.flags & (1 << MSHV_SET_MEM_BIT_EXECUTABLE) != 0 { - flags |= USER_MEMORY_REGION_EXECUTE; - } - - UserMemoryRegion { - guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64) - + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)), - memory_size: region.size, - userspace_addr: region.userspace_addr, - flags, - ..Default::default() - } - } -} - #[cfg(target_arch = "x86_64")] impl From for ClockData { fn from(d: MshvClockData) -> Self { @@ -122,26 +97,6 @@ impl From for MshvClockData { } } -impl From for mshv_user_mem_region { - fn from(region: UserMemoryRegion) -> Self { - let mut flags: u8 = 0; - if region.flags & USER_MEMORY_REGION_WRITE != 0 { - flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; - } - if region.flags & USER_MEMORY_REGION_EXECUTE != 0 { - flags |= 1 << MSHV_SET_MEM_BIT_EXECUTABLE; - } - - mshv_user_mem_region { - guest_pfn: region.guest_phys_addr >> PAGE_SHIFT, - size: region.memory_size, - userspace_addr: region.userspace_addr, - flags, - ..Default::default() - } - } -} - impl From for IoEventAddress { fn from(a: mshv_ioctls::IoEventAddress) -> Self { match a { @@ -1918,8 +1873,36 @@ impl vm::Vm for MshvVm { } /// Creates a guest physical memory region. - fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { - let user_memory_region: mshv_user_mem_region = user_memory_region.into(); + /// + /// # Safety + /// + /// `userspace_addr` must point to `memory_size` bytes of memory + /// that will stay mapped until a successful call to + /// `remove_user_memory_region().` Freeing them with `munmap()` + /// before then will cause undefined guest behavior but at least + /// should not cause undefined behavior in the host. In theory, + /// at least. + unsafe fn create_user_memory_region( + &self, + _slot: u32, + guest_phys_addr: u64, + memory_size: u64, + userspace_addr: u64, + readonly: bool, + _log_dirty_pages: bool, + ) -> vm::Result<()> { + let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE; + if !readonly { + flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; + } + + let user_memory_region = mshv_user_mem_region { + flags, + guest_pfn: guest_phys_addr >> PAGE_SHIFT, + size: memory_size, + userspace_addr, + ..Default::default() + }; // No matter read only or not we keep track the slots. // For readonly hypervisor can enable the dirty bits, // but a VM exit happens before setting the dirty bits @@ -1938,8 +1921,32 @@ impl vm::Vm for MshvVm { } /// Removes a guest physical memory region. - fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { - let user_memory_region: mshv_user_mem_region = user_memory_region.into(); + /// + /// # Safety + /// + /// `userspace_addr` must point to `memory_size` bytes of memory, + /// and `add_user_memory_region()` must have been successfully called. + unsafe fn remove_user_memory_region( + &self, + _slot: u32, + guest_phys_addr: u64, + memory_size: u64, + userspace_addr: u64, + readonly: bool, + _log_dirty_pages: bool, + ) -> vm::Result<()> { + let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE; + if !readonly { + flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; + } + + let user_memory_region = mshv_user_mem_region { + flags, + guest_pfn: guest_phys_addr >> PAGE_SHIFT, + size: memory_size, + userspace_addr, + ..Default::default() + }; // Remove the corresponding entry from "self.dirty_log_slots" if needed self.dirty_log_slots .write() @@ -1952,30 +1959,6 @@ impl vm::Vm for MshvVm { Ok(()) } - fn make_user_memory_region( - &self, - _slot: u32, - guest_phys_addr: u64, - memory_size: u64, - userspace_addr: u64, - readonly: bool, - _log_dirty_pages: bool, - ) -> UserMemoryRegion { - let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE; - if !readonly { - flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; - } - - mshv_user_mem_region { - flags, - guest_pfn: guest_phys_addr >> PAGE_SHIFT, - size: memory_size, - userspace_addr, - ..Default::default() - } - .into() - } - fn create_passthrough_device(&self) -> vm::Result { let mut vfio_dev = mshv_create_device { type_: MSHV_DEV_TYPE_VFIO, diff --git a/hypervisor/src/vm.rs b/hypervisor/src/vm.rs index 7a0d1b6af..a01e499a7 100644 --- a/hypervisor/src/vm.rs +++ b/hypervisor/src/vm.rs @@ -29,7 +29,7 @@ use crate::arch::riscv64::aia::{Vaia, VaiaConfig}; #[cfg(feature = "tdx")] use crate::arch::x86::CpuIdEntry; use crate::cpu::Vcpu; -use crate::{IoEventAddress, IrqRoutingEntry, UserMemoryRegion}; +use crate::{IoEventAddress, IrqRoutingEntry}; /// /// I/O events data matches (32 or 64 bits). @@ -335,8 +335,13 @@ pub trait Vm: Send + Sync + Any { fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry; /// Sets the GSI routing table entries, overwriting any previously set fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> Result<()>; - /// Creates a memory region structure that can be used with {create/remove}_user_memory_region - fn make_user_memory_region( + /// Creates a guest physical memory slot. + /// + /// # Safety + /// + /// `[userspace_addr, userspace_addr + memory_size)` must be valid memory, + /// and that address range must remain valid until [`Vm::remove_user_memory_region`] is called. + unsafe fn create_user_memory_region( &self, slot: u32, guest_phys_addr: u64, @@ -344,11 +349,21 @@ pub trait Vm: Send + Sync + Any { userspace_addr: u64, readonly: bool, log_dirty_pages: bool, - ) -> UserMemoryRegion; - /// Creates a guest physical memory slot. - fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> Result<()>; + ) -> Result<()>; /// Removes a guest physical memory slot. - fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> Result<()>; + /// + /// # Safety + /// + /// `[userspace_addr, userspace_addr + memory_size)` must be valid memory, + unsafe fn remove_user_memory_region( + &self, + slot: u32, + guest_phys_addr: u64, + memory_size: u64, + userspace_addr: u64, + readonly: bool, + log_dirty_pages: bool, + ) -> Result<()>; /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. #[cfg(target_arch = "aarch64")] fn get_preferred_target(&self, kvi: &mut crate::VcpuInit) -> Result<()>; diff --git a/pci/src/vfio.rs b/pci/src/vfio.rs index 2c07f309c..3d59293ae 100644 --- a/pci/src/vfio.rs +++ b/pci/src/vfio.rs @@ -1663,21 +1663,22 @@ impl VfioPciDevice { host_addr: host_addr as u64, }; + // SAFETY: host_addr was allocated by mmap() and points to size + // bytes of memory. + unsafe { + self.vm.create_user_memory_region( + user_memory_region.slot, + user_memory_region.start, + user_memory_region.size, + user_memory_region.host_addr, + false, + false, + ) + } + .map_err(VfioPciError::CreateUserMemoryRegion)?; + region.user_memory_regions.push(user_memory_region); - let mem_region = self.vm.make_user_memory_region( - user_memory_region.slot, - user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, - false, - false, - ); - - self.vm - .create_user_memory_region(mem_region) - .map_err(VfioPciError::CreateUserMemoryRegion)?; - if !self.iommu_attached { self.container .vfio_dma_map( @@ -1714,16 +1715,18 @@ impl VfioPciDevice { } // Remove region - let r = self.vm.make_user_memory_region( - user_memory_region.slot, - user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, - false, - false, - ); - - if let Err(e) = self.vm.remove_user_memory_region(r) { + // SAFETY: only valid entries are added to the user_memory_regions field + // of the entries of self.common.mmio_regions. + if let Err(e) = unsafe { + self.vm.remove_user_memory_region( + user_memory_region.slot, + user_memory_region.start, + user_memory_region.size, + user_memory_region.host_addr, + false, + false, + ) + } { error!("Could not remove the userspace memory region: {e}"); } @@ -1898,20 +1901,19 @@ impl PciDevice for VfioPciDevice { user_memory_region.start, user_memory_region.size, e ); } - // Remove old region - let old_mem_region = self.vm.make_user_memory_region( - user_memory_region.slot, - user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, - false, - false, - ); - - self.vm - .remove_user_memory_region(old_mem_region) - .map_err(io::Error::other)?; + // SAFETY: user_memory_regions has valid entries + unsafe { + self.vm.remove_user_memory_region( + user_memory_region.slot, + user_memory_region.start, + user_memory_region.size, + user_memory_region.host_addr, + false, + false, + ) + } + .map_err(io::Error::other)?; // Update the user memory region with the correct start address. if new_base > old_base { @@ -1921,18 +1923,18 @@ impl PciDevice for VfioPciDevice { } // Insert new region - let new_mem_region = self.vm.make_user_memory_region( - user_memory_region.slot, - user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, - false, - false, - ); - - self.vm - .create_user_memory_region(new_mem_region) - .map_err(io::Error::other)?; + // SAFETY: mmio_regions only has valid values + unsafe { + self.vm.create_user_memory_region( + user_memory_region.slot, + user_memory_region.start, + user_memory_region.size, + user_memory_region.host_addr, + false, + false, + ) + } + .map_err(io::Error::other)?; // Map the moved mmio region to vfio container if !self.iommu_attached { diff --git a/pci/src/vfio_user.rs b/pci/src/vfio_user.rs index 73de9ee96..79b563872 100644 --- a/pci/src/vfio_user.rs +++ b/pci/src/vfio_user.rs @@ -110,7 +110,10 @@ impl VfioUserPciDevice { }) } - pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> { + /// # Safety + /// + /// Not known yet (TODO) + pub unsafe fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> { for mmio_region in &mut self.common.mmio_regions { let region_flags = self .client @@ -184,20 +187,21 @@ impl VfioUserPciDevice { host_addr: host_addr as u64, }; + // SAFETY: host_addr was just allocated with mmap() + // and points to size bytes of valid address. + unsafe { + self.vm.create_user_memory_region( + user_memory_region.slot, + user_memory_region.start, + user_memory_region.size, + user_memory_region.host_addr, + false, + false, + ) + } + .map_err(VfioUserPciDeviceError::MapRegionGuest)?; + mmio_region.user_memory_regions.push(user_memory_region); - - let mem_region = self.vm.make_user_memory_region( - user_memory_region.slot, - user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, - false, - false, - ); - - self.vm - .create_user_memory_region(mem_region) - .map_err(VfioUserPciDeviceError::MapRegionGuest)?; } } } @@ -209,16 +213,17 @@ impl VfioUserPciDevice { for mmio_region in self.common.mmio_regions.iter() { for user_memory_region in mmio_region.user_memory_regions.iter() { // Remove region - let r = self.vm.make_user_memory_region( - user_memory_region.slot, - user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, - false, - false, - ); - - if let Err(e) = self.vm.remove_user_memory_region(r) { + // SAFETY: only valid regions are in user_memory_regions + if let Err(e) = unsafe { + self.vm.remove_user_memory_region( + user_memory_region.slot, + user_memory_region.start, + user_memory_region.size, + user_memory_region.host_addr, + false, + false, + ) + } { error!("Could not remove the userspace memory region: {e}"); } @@ -455,18 +460,18 @@ impl PciDevice for VfioUserPciDevice { for user_memory_region in mmio_region.user_memory_regions.iter_mut() { // Remove old region - let old_region = self.vm.make_user_memory_region( - user_memory_region.slot, - user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, - false, - false, - ); - - self.vm - .remove_user_memory_region(old_region) - .map_err(std::io::Error::other)?; + // SAFETY: only valid regions are in user_memory_regions + unsafe { + self.vm.remove_user_memory_region( + user_memory_region.slot, + user_memory_region.start, + user_memory_region.size, + user_memory_region.host_addr, + false, + false, + ) + } + .map_err(std::io::Error::other)?; // Update the user memory region with the correct start address. if new_base > old_base { @@ -476,18 +481,18 @@ impl PciDevice for VfioUserPciDevice { } // Insert new region - let new_region = self.vm.make_user_memory_region( - user_memory_region.slot, - user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, - false, - false, - ); - - self.vm - .create_user_memory_region(new_region) - .map_err(std::io::Error::other)?; + // SAFETY: only valid regions are in user_memory_regions + unsafe { + self.vm.create_user_memory_region( + user_memory_region.slot, + user_memory_region.start, + user_memory_region.size, + user_memory_region.host_addr, + false, + false, + ) + } + .map_err(std::io::Error::other)?; } info!("Moved bar 0x{old_base:x} -> 0x{new_base:x}"); } diff --git a/vmm/src/api/dbus/mod.rs b/vmm/src/api/dbus/mod.rs index f329b86bc..59c48f8a9 100644 --- a/vmm/src/api/dbus/mod.rs +++ b/vmm/src/api/dbus/mod.rs @@ -356,7 +356,7 @@ pub fn start_dbus_thread( apply_filter(&api_seccomp_filter) .map_err(VmmError::ApplySeccompFilter) .map_err(|e| { - error!("Error applying seccomp filter: {:?}", e); + error!("Error applying seccomp filter: {e:?}"); exit_evt.write(1).ok(); e })?; @@ -383,7 +383,7 @@ pub fn start_dbus_thread( } } } - }) + }); })) .map_err(|_| { error!("dbus-api thread panicked"); diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 8378a7a31..c7ec5a07d 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -2521,7 +2521,7 @@ impl VmConfig { #[cfg(target_arch = "x86_64")] if self.debug_console.mode == ConsoleOutputMode::Tty { tty_consoles.push("debug-console"); - }; + } if tty_consoles.len() > 1 { warn!("Using TTY output for multiple consoles: {tty_consoles:?}"); } diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index c56d5f27c..1293a0daa 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -817,32 +817,43 @@ impl DeviceRelocation for AddressManager { if let Some(mut shm_regions) = virtio_dev.get_shm_regions() && shm_regions.addr.raw_value() == old_base { - let mem_region = self.vm.make_user_memory_region( - shm_regions.mem_slot, - old_base, - shm_regions.len, - shm_regions.host_addr, - false, - false, - ); + // SAFETY: TODO what are the invariants here? + unsafe { + // Remove old mapping + self.vm + .remove_user_memory_region( + shm_regions.mem_slot, + old_base, + shm_regions.len, + shm_regions.host_addr, + false, + false, + ) + .map_err(|e| { + io::Error::other(format!( + "failed to remove user memory region: {e:?}" + )) + })?; + } - self.vm.remove_user_memory_region(mem_region).map_err(|e| { - io::Error::other(format!("failed to remove user memory region: {e:?}")) - })?; - - // Create new mapping by inserting new region to KVM. - let mem_region = self.vm.make_user_memory_region( - shm_regions.mem_slot, - new_base, - shm_regions.len, - shm_regions.host_addr, - false, - false, - ); - - self.vm.create_user_memory_region(mem_region).map_err(|e| { - io::Error::other(format!("failed to create user memory regions: {e:?}")) - })?; + // SAFETY: TODO what are the invariants here? + unsafe { + // Create new mapping by inserting new region to KVM. + self.vm + .create_user_memory_region( + shm_regions.mem_slot, + new_base, + shm_regions.len, + shm_regions.host_addr, + false, + false, + ) + .map_err(|e| { + io::Error::other(format!( + "failed to create user memory regions: {e:?}" + )) + })?; + } // Update shared memory regions to reflect the new mapping. shm_regions.addr = GuestAddress(new_base); @@ -3240,12 +3251,14 @@ impl DeviceManager { .map_err(DeviceManagerError::NewMmapRegion)?; let host_addr: u64 = mmap_region.as_ptr() as u64; - let mem_slot = self - .memory_manager - .lock() - .unwrap() - .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) - .map_err(DeviceManagerError::MemoryManager)?; + // SAFETY: host_addr points to region_size bytes of mmap-allocated memory. + let mem_slot = unsafe { + self.memory_manager + .lock() + .unwrap() + .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) + .map_err(DeviceManagerError::MemoryManager) + }?; let mapping = UserspaceMapping { host_addr, @@ -3997,13 +4010,16 @@ impl DeviceManager { resources, )?; + // SAFETY: TODO // Note it is required to call 'add_pci_device()' in advance to have the list of // mmio regions provisioned correctly - vfio_user_pci_device - .lock() - .unwrap() - .map_mmio_regions() - .map_err(DeviceManagerError::VfioUserMapRegion)?; + unsafe { + vfio_user_pci_device + .lock() + .unwrap() + .map_mmio_regions() + .map_err(DeviceManagerError::VfioUserMapRegion) + }?; let mut node = device_node!(vfio_user_name, vfio_user_pci_device); @@ -4722,17 +4738,22 @@ impl DeviceManager { // Shutdown and remove the underlying virtio-device if present if let Some(virtio_device) = virtio_device { for mapping in virtio_device.lock().unwrap().userspace_mappings() { - self.memory_manager - .lock() - .unwrap() - .remove_userspace_mapping( - mapping.addr.raw_value(), - mapping.len, - mapping.host_addr, - mapping.mergeable, - mapping.mem_slot, - ) - .map_err(DeviceManagerError::MemoryManager)?; + // SAFETY: userspace_mappings only has valid mappings. + // TODO: do not rely on the correctness of all the code in this file + // for this to hold. + unsafe { + self.memory_manager + .lock() + .unwrap() + .remove_userspace_mapping( + mapping.addr.raw_value(), + mapping.len, + mapping.host_addr, + mapping.mergeable, + mapping.mem_slot, + ) + .map_err(DeviceManagerError::MemoryManager) + }?; } virtio_device.lock().unwrap().shutdown(); @@ -4984,7 +5005,7 @@ impl IvshmemOps for IvshmemHandler { ) -> Result<(Arc, UserspaceMapping), IvshmemError> { info!("Creating ivshmem mem region at 0x{start_addr:x}"); - let region = MemoryManager::create_ram_region( + let region: Arc = MemoryManager::create_ram_region( &backing_file, 0, GuestAddress(start_addr), @@ -4998,19 +5019,21 @@ impl IvshmemOps for IvshmemHandler { false, ) .map_err(|_| IvshmemError::CreateUserMemoryRegion)?; - let mem_slot = self - .memory_manager - .lock() - .unwrap() - .create_userspace_mapping( - region.start_addr().0, - region.len(), - region.as_ptr() as u64, - false, - false, - false, - ) - .map_err(|_| IvshmemError::CreateUserspaceMapping)?; + let mem_slot = { + let mut manager = self.memory_manager.lock().unwrap(); + // SAFETY: guaranteed by GuestRegionMmap invariants + unsafe { + manager.create_userspace_mapping( + region.start_addr().0, + region.len(), + region.as_ptr() as u64, + false, + false, + false, + ) + } + } + .map_err(|_| IvshmemError::CreateUserspaceMapping)?; let mapping = UserspaceMapping { host_addr: region.as_ptr() as u64, mem_slot, @@ -5022,17 +5045,18 @@ impl IvshmemOps for IvshmemHandler { } fn unmap_ram_region(&mut self, mapping: UserspaceMapping) -> Result<(), IvshmemError> { - self.memory_manager - .lock() - .unwrap() - .remove_userspace_mapping( + let mut manager = self.memory_manager.lock().unwrap(); + // SAFETY: UserspaceMapping is valid due to other code being correct + unsafe { + manager.remove_userspace_mapping( mapping.addr.raw_value(), mapping.len, mapping.host_addr, mapping.mergeable, mapping.mem_slot, ) - .map_err(|_| IvshmemError::RemoveUserspaceMapping)?; + } + .map_err(|_| IvshmemError::RemoveUserspaceMapping)?; Ok(()) } } diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 734320683..84cf123ea 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -1128,7 +1128,7 @@ impl Vmm { return Err(MigratableError::MigrateSend(anyhow!( "Live Migration is not supported when TDX is enabled" ))); - }; + } let amx = vm_config.lock().unwrap().cpus.features.amx; let phys_bits = vm::physical_bits( @@ -1265,7 +1265,7 @@ impl Vmm { return Err(MigratableError::MigrateReceive(anyhow!( "Live Migration is not supported when TDX is enabled" ))); - }; + } // We check the `CPUID` compatibility of between the source vm and destination, which is // mostly about feature compatibility. diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index 947fd41f3..146ea610e 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -900,14 +900,18 @@ impl MemoryManager { for (zone_id, regions) in list { for (region, virtio_mem) in regions { - let slot = self.create_userspace_mapping( - region.start_addr().raw_value(), - region.len(), - region.as_ptr() as u64, - self.mergeable, - false, - self.log_dirty, - )?; + // SAFETY: regions only holds valid addresses. + // TODO: encapsulate this unsafety in a small part of the file. + let slot = unsafe { + self.create_userspace_mapping( + region.start_addr().raw_value(), + region.len(), + region.as_ptr() as u64, + self.mergeable, + false, + self.log_dirty, + ) + }?; let file_offset = if let Some(file_offset) = region.file_offset() { file_offset.start() @@ -958,18 +962,18 @@ impl MemoryManager { arch::layout::UEFI_START, ) .unwrap(); - let uefi_mem_region = self.vm.make_user_memory_region( - uefi_mem_slot, - uefi_region.start_addr().raw_value(), - uefi_region.len(), - uefi_region.as_ptr() as u64, - false, - false, - ); - self.vm - .create_user_memory_region(uefi_mem_region) - .map_err(Error::CreateUefiFlash)?; - + unsafe { + self.vm + .create_user_memory_region( + uefi_mem_slot, + uefi_region.start_addr().raw_value(), + uefi_region.len(), + uefi_region.as_ptr() as u64, + false, + false, + ) + .map_err(Error::CreateUefiFlash)?; + } let uefi_flash = GuestMemoryAtomic::new(GuestMemoryMmap::from_regions(vec![uefi_region]).unwrap()); @@ -1608,14 +1612,17 @@ impl MemoryManager { )?; // Map it into the guest - let slot = self.create_userspace_mapping( - region.start_addr().0, - region.len(), - region.as_ptr() as u64, - self.mergeable, - false, - self.log_dirty, - )?; + // SAFETY: create_ram_region only produces valid mappings. + let slot = unsafe { + self.create_userspace_mapping( + region.start_addr().0, + region.len(), + region.as_ptr() as u64, + self.mergeable, + false, + self.log_dirty, + ) + }?; self.guest_ram_mappings.push(GuestRamMapping { gpa: region.start_addr().raw_value(), size: region.len(), @@ -1708,7 +1715,11 @@ impl MemoryManager { self.memory_slot_allocator().next_memory_slot() } - pub fn create_userspace_mapping( + /// # Safety + /// + /// `userspace_addr` and `memory_size` must be and remain valid + /// until `remove_userspace_mapping` is called. + pub unsafe fn create_userspace_mapping( &mut self, guest_phys_addr: u64, memory_size: u64, @@ -1718,22 +1729,24 @@ impl MemoryManager { log_dirty: bool, ) -> Result { let slot = self.allocate_memory_slot(); - let mem_region = self.vm.make_user_memory_region( - slot, - guest_phys_addr, - memory_size, - userspace_addr, - readonly, - log_dirty, - ); info!( "Creating userspace mapping: {guest_phys_addr:x} -> {userspace_addr:x} {memory_size:x}, slot {slot}" ); - self.vm - .create_user_memory_region(mem_region) - .map_err(Error::CreateUserMemoryRegion)?; + // SAFETY: promised by caller + unsafe { + self.vm + .create_user_memory_region( + slot, + guest_phys_addr, + memory_size, + userspace_addr, + readonly, + log_dirty, + ) + .map_err(Error::CreateUserMemoryRegion)?; + } // SAFETY: the address and size are valid since the // mmap succeeded. @@ -1781,7 +1794,16 @@ impl MemoryManager { Ok(slot) } - pub fn remove_userspace_mapping( + /// # Safety + /// + /// `userspace_addr` and `memory_size` must have previously been passed + /// to `create_userspace_mapping`. + /// + /// # Errors + /// + /// If this function fails there is no way to clean up resources and you + /// should probably crash the process. + pub unsafe fn remove_userspace_mapping( &mut self, guest_phys_addr: u64, memory_size: u64, @@ -1789,18 +1811,19 @@ impl MemoryManager { mergeable: bool, slot: u32, ) -> Result<(), Error> { - let mem_region = self.vm.make_user_memory_region( - slot, - guest_phys_addr, - memory_size, - userspace_addr, - false, /* readonly -- don't care */ - false, /* log dirty */ - ); - - self.vm - .remove_user_memory_region(mem_region) - .map_err(Error::RemoveUserMemoryRegion)?; + // SAFETY: The caller promises that the parameters are correct. + unsafe { + self.vm + .remove_user_memory_region( + slot, + guest_phys_addr, + memory_size, + userspace_addr, + false, /* readonly -- don't care */ + false, /* log dirty */ + ) + .map_err(Error::RemoveUserMemoryRegion)?; + } // Mark the pages as unmergeable if there were previously marked as // mergeable. diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 9837e71ff..89255e28b 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -3464,17 +3464,18 @@ mod unit_tests { .expect("new VM creation failed"); for (index, region) in mem.iter().enumerate() { - let mem_region = vm.make_user_memory_region( - index as u32, - region.start_addr().raw_value(), - region.len(), - region.as_ptr() as u64, - false, - false, - ); - - vm.create_user_memory_region(mem_region) + // SAFETY: inputs are valid + unsafe { + vm.create_user_memory_region( + index as u32, + region.start_addr().raw_value(), + region.len(), + region.as_ptr() as u64, + false, + false, + ) .expect("Cannot configure guest memory"); + } } mem.write_slice(&code, load_addr) .expect("Writing code to memory failed"); @@ -3574,3 +3575,71 @@ mod unit_tests { .unwrap(); } } + +#[cfg(all(feature = "kvm", target_arch = "x86_64"))] +#[test] +pub fn test_vm() { + use hypervisor::VmExit; + use vm_memory::{Address, GuestMemory, GuestMemoryRegion}; + // This example based on https://lwn.net/Articles/658511/ + let code = [ + 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ + 0x00, 0xd8, /* add %bl, %al */ + 0x04, b'0', /* add $'0', %al */ + 0xee, /* out %al, (%dx) */ + 0xb0, b'\n', /* mov $'\n', %al */ + 0xee, /* out %al, (%dx) */ + 0xf4, /* hlt */ + ]; + + let mem_size = 0x1000; + let load_addr = GuestAddress(0x1000); + let mem = GuestMemoryMmap::from_ranges(&[(load_addr, mem_size)]).unwrap(); + + let hv = hypervisor::new().unwrap(); + let vm = hv + .create_vm(HypervisorVmConfig::default()) + .expect("new VM creation failed"); + + for (index, region) in mem.iter().enumerate() { + // SAFETY: parameters are correct + unsafe { + vm.create_user_memory_region( + index as u32, + region.start_addr().raw_value(), + region.len(), + region.as_ptr() as u64, + false, + false, + ) + .expect("Cannot configure guest memory"); + } + } + mem.write_slice(&code, load_addr) + .expect("Writing code to memory failed"); + + let mut vcpu = vm.create_vcpu(0, None).expect("new Vcpu failed"); + + let mut vcpu_sregs = vcpu.get_sregs().expect("get sregs failed"); + vcpu_sregs.cs.base = 0; + vcpu_sregs.cs.selector = 0; + vcpu.set_sregs(&vcpu_sregs).expect("set sregs failed"); + + let mut vcpu_regs = vcpu.get_regs().expect("get regs failed"); + vcpu_regs.set_rip(0x1000); + vcpu_regs.set_rax(2); + vcpu_regs.set_rbx(3); + vcpu_regs.set_rflags(2); + vcpu.set_regs(&vcpu_regs).expect("set regs failed"); + + loop { + match vcpu.run().expect("run failed") { + VmExit::Reset => { + println!("HLT"); + break; + } + VmExit::Ignore => {} + r => panic!("unexpected exit reason: {r:?}"), + } + } +}