diff --git a/devices/src/ivshmem.rs b/devices/src/ivshmem.rs index 5997e10a2..d6f5b5d46 100644 --- a/devices/src/ivshmem.rs +++ b/devices/src/ivshmem.rs @@ -33,7 +33,7 @@ const IVSHMEM_DEVICE_ID: u16 = 0x1110; const IVSHMEM_REG_BAR_SIZE: u64 = 0x100; -type GuestRegionMmap = vm_memory::GuestRegionMmap; +type MmapRegion = vm_memory::MmapRegion; #[derive(Debug, Error)] pub enum IvshmemError { @@ -68,7 +68,7 @@ pub trait IvshmemOps: Send + Sync { start_addr: u64, size: usize, backing_file: Option, - ) -> Result<(Arc, UserspaceMapping), IvshmemError>; + ) -> Result<(Arc, UserspaceMapping), IvshmemError>; fn unmap_ram_region(&mut self, mapping: UserspaceMapping) -> Result<(), IvshmemError>; } @@ -95,7 +95,7 @@ pub struct IvshmemDevice { region_size: u64, ivshmem_ops: Arc>, backend_file: Option, - region: Option>, + region: Option>, userspace_mapping: Option, } @@ -180,11 +180,7 @@ impl IvshmemDevice { Ok(device) } - pub fn set_region( - &mut self, - region: Arc, - userspace_mapping: UserspaceMapping, - ) { + pub fn set_region(&mut self, region: Arc, userspace_mapping: UserspaceMapping) { self.region = Some(region); self.userspace_mapping = Some(userspace_mapping); } diff --git a/fuzz/fuzz_targets/pmem.rs b/fuzz/fuzz_targets/pmem.rs index e9247fb63..a8fcb7a77 100644 --- a/fuzz/fuzz_targets/pmem.rs +++ b/fuzz/fuzz_targets/pmem.rs @@ -115,10 +115,9 @@ fn create_dummy_pmem() -> Pmem { .unwrap(); let guest_addr = GuestAddress(0); let dummy_user_mapping = UserspaceMapping { - host_addr: dummy_mmap_region.as_ptr() as u64, mem_slot: 0, addr: guest_addr, - len: dummy_mapping_size as u64, + mapping: Arc::new(dummy_mmap_region), mergeable: false, }; @@ -127,7 +126,6 @@ fn create_dummy_pmem() -> Pmem { file, guest_addr, dummy_user_mapping, - dummy_mmap_region, false, SeccompAction::Allow, EventFd::new(EFD_NONBLOCK).unwrap(), diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 53de9bad3..d9dc9b754 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -723,8 +723,8 @@ impl vm::Vm for KvmVm { &self, slot: u32, guest_phys_addr: u64, - memory_size: u64, - userspace_addr: u64, + memory_size: usize, + userspace_addr: *mut u8, readonly: bool, log_dirty_pages: bool, ) -> vm::Result<()> { @@ -741,8 +741,8 @@ impl vm::Vm for KvmVm { let mut region = kvm_userspace_memory_region { slot, guest_phys_addr, - memory_size, - userspace_addr, + memory_size: memory_size as u64, + userspace_addr: userspace_addr as usize as u64, flags, }; @@ -787,8 +787,8 @@ impl vm::Vm for KvmVm { &self, slot: u32, guest_phys_addr: u64, - memory_size: u64, - userspace_addr: u64, + memory_size: usize, + userspace_addr: *mut u8, readonly: bool, log_dirty_pages: bool, ) -> vm::Result<()> { @@ -805,8 +805,8 @@ impl vm::Vm for KvmVm { let mut region = kvm_userspace_memory_region { slot, guest_phys_addr, - memory_size, - userspace_addr, + memory_size: memory_size as u64, + userspace_addr: userspace_addr as usize as u64, flags, }; diff --git a/hypervisor/src/mshv/mod.rs b/hypervisor/src/mshv/mod.rs index 1fe60d368..3ca3d846f 100644 --- a/hypervisor/src/mshv/mod.rs +++ b/hypervisor/src/mshv/mod.rs @@ -1886,8 +1886,8 @@ impl vm::Vm for MshvVm { &self, _slot: u32, guest_phys_addr: u64, - memory_size: u64, - userspace_addr: u64, + memory_size: usize, + userspace_addr: *mut u8, readonly: bool, _log_dirty_pages: bool, ) -> vm::Result<()> { @@ -1899,8 +1899,8 @@ impl vm::Vm for MshvVm { let user_memory_region = mshv_user_mem_region { flags, guest_pfn: guest_phys_addr >> PAGE_SHIFT, - size: memory_size, - userspace_addr, + size: memory_size.try_into().unwrap(), + userspace_addr: (userspace_addr as usize).try_into().unwrap(), ..Default::default() }; // No matter read only or not we keep track the slots. @@ -1930,8 +1930,8 @@ impl vm::Vm for MshvVm { &self, _slot: u32, guest_phys_addr: u64, - memory_size: u64, - userspace_addr: u64, + memory_size: usize, + userspace_addr: *mut u8, readonly: bool, _log_dirty_pages: bool, ) -> vm::Result<()> { @@ -1943,8 +1943,8 @@ impl vm::Vm for MshvVm { let user_memory_region = mshv_user_mem_region { flags, guest_pfn: guest_phys_addr >> PAGE_SHIFT, - size: memory_size, - userspace_addr, + size: memory_size.try_into().unwrap(), + userspace_addr: (userspace_addr as usize).try_into().unwrap(), ..Default::default() }; // Remove the corresponding entry from "self.dirty_log_slots" if needed diff --git a/hypervisor/src/vm.rs b/hypervisor/src/vm.rs index a01e499a7..9d4bf5c3d 100644 --- a/hypervisor/src/vm.rs +++ b/hypervisor/src/vm.rs @@ -345,8 +345,8 @@ pub trait Vm: Send + Sync + Any { &self, slot: u32, guest_phys_addr: u64, - memory_size: u64, - userspace_addr: u64, + memory_size: usize, + userspace_addr: *mut u8, readonly: bool, log_dirty_pages: bool, ) -> Result<()>; @@ -359,8 +359,8 @@ pub trait Vm: Send + Sync + Any { &self, slot: u32, guest_phys_addr: u64, - memory_size: u64, - userspace_addr: u64, + memory_size: usize, + userspace_addr: *mut u8, readonly: bool, log_dirty_pages: bool, ) -> Result<()>; diff --git a/pci/src/lib.rs b/pci/src/lib.rs index c95a38b33..8c1532491 100644 --- a/pci/src/lib.rs +++ b/pci/src/lib.rs @@ -11,6 +11,7 @@ extern crate log; mod bus; mod configuration; mod device; +mod mmap; mod msi; mod msix; mod vfio; diff --git a/pci/src/mmap.rs b/pci/src/mmap.rs new file mode 100644 index 000000000..79d3ab20b --- /dev/null +++ b/pci/src/mmap.rs @@ -0,0 +1,89 @@ +// Copyright © 2025 Demi Marie Obenour +// +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +//! Helpers for `mmap()` + +use core::ffi::c_int; +use core::ptr::null_mut; +use std::io::{Error, ErrorKind}; +use std::os::fd::{AsRawFd as _, BorrowedFd}; + +use libc::size_t; + +/// A region of `mmap()`-allocated memory that calls `munmap()` when dropped. +/// This guarantees that the buffer is valid and that its address space +/// will be reserved. The address space is not guaranteed to be accessible. +/// Atomic access to the data will not cause undefined behavior but might +/// cause SIGSEGV or SIGBUS. Non-atomic access will generally cause data +/// races and thus Undefined Behavior. +#[derive(Debug)] +pub struct MmapRegion { + addr: *mut u8, + len: size_t, +} + +impl Drop for MmapRegion { + fn drop(&mut self) { + // SAFETY: guaranteed by type validity invariant + unsafe { assert_eq!(libc::munmap(self.addr as *mut _, self.len), 0) } + } +} +// SAFETY: the caller is responsible for avoiding data races +unsafe impl Send for MmapRegion {} +// SAFETY: the caller is responsible for avoiding data races +unsafe impl Sync for MmapRegion {} + +impl MmapRegion { + #[inline] + pub fn addr(&self) -> *mut u8 { + self.addr + } + + /// Return the length of the region. + /// This function promises that the return value fits in [`libc::size_t`] + /// and in [`isize`] and `unsafe` code can rely on this. + #[inline] + pub fn len(&self) -> usize { + self.len + } + + /// Create an [`MmapRegion`] using `mmap` of a file descriptor. + pub fn mmap( + len: u64, + prot: c_int, + fd: BorrowedFd, + offset1: u64, + offset2: u64, + ) -> std::io::Result { + const BAD_LENGTH: &str = "Offsets must fit in libc::off_t"; + const BAD_OFFSET: &str = "Mapping length must fit \ +in both isize and libc::size_t"; + let Some(offset) = offset1.checked_add(offset2) else { + return Err(Error::new(ErrorKind::InvalidInput, BAD_OFFSET)); + }; + let Ok(offset) = libc::off_t::try_from(offset) else { + return Err(Error::new(ErrorKind::InvalidInput, BAD_OFFSET)); + }; + if isize::try_from(len).is_err() { + return Err(Error::new(ErrorKind::InvalidInput, BAD_LENGTH)); + } + let Ok(len) = libc::size_t::try_from(len) else { + return Err(Error::new(ErrorKind::InvalidInput, BAD_LENGTH)); + }; + + assert!( + (prot & !(libc::PROT_READ | libc::PROT_WRITE | libc::PROT_EXEC)) == 0, + "bad protection" + ); + let flags = libc::MAP_SHARED; + // SAFETY: FFI call with correct parameters. + let addr = unsafe { libc::mmap(null_mut(), len, prot, flags, fd.as_raw_fd(), offset) }; + if addr == libc::MAP_FAILED { + Err(Error::last_os_error()) + } else { + let addr = addr as _; + Ok(Self { addr, len }) + } + } +} diff --git a/pci/src/vfio.rs b/pci/src/vfio.rs index 3d59293ae..7c85f557c 100644 --- a/pci/src/vfio.rs +++ b/pci/src/vfio.rs @@ -6,9 +6,9 @@ use std::any::Any; use std::collections::{BTreeMap, HashMap}; use std::io; +use std::os::fd::BorrowedFd; use std::os::unix::io::AsRawFd; use std::path::PathBuf; -use std::ptr::null_mut; use std::sync::{Arc, Barrier, Mutex}; use anyhow::anyhow; @@ -34,6 +34,7 @@ use vm_memory::{Address, GuestAddress, GuestAddressSpace, GuestMemory, GuestUsiz use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use vmm_sys_util::eventfd::EventFd; +use crate::mmap::MmapRegion; use crate::msi::{MSI_CONFIG_ID, MsiConfigState}; use crate::msix::MsixConfigState; use crate::{ @@ -258,12 +259,11 @@ impl Interrupt { } } -#[derive(Copy, Clone)] +#[derive(Clone)] pub struct UserMemoryRegion { pub slot: u32, pub start: u64, - pub size: u64, - pub host_addr: u64, + pub mapping: Arc, } #[derive(Clone)] @@ -275,12 +275,17 @@ pub struct MmioRegion { pub(crate) user_memory_regions: Vec, } -trait MmioRegionRange { +/// # Safety +/// +/// [`Self::find_user_address`] must always either return `Err` +/// or a pointer to `size` bytes of valid memory. +unsafe trait MmioRegionRange { fn check_range(&self, guest_addr: u64, size: u64) -> bool; - fn find_user_address(&self, guest_addr: u64) -> Result; + fn find_user_address(&self, guest_addr: u64, size: u64) -> Result<*mut u8, io::Error>; } -impl MmioRegionRange for Vec { +// SAFETY: See the comment in `find_user_address`. +unsafe impl MmioRegionRange for Vec { // Check if a guest address is within the range of mmio regions fn check_range(&self, guest_addr: u64, size: u64) -> bool { for region in self.iter() { @@ -298,14 +303,33 @@ impl MmioRegionRange for Vec { } // Locate the user region address for a guest address within all mmio regions - fn find_user_address(&self, guest_addr: u64) -> Result { + fn find_user_address(&self, guest_addr: u64, size: u64) -> Result<*mut u8, io::Error> { for region in self.iter() { for user_region in region.user_memory_regions.iter() { - if guest_addr >= user_region.start - && guest_addr < user_region.start + user_region.size - { - return Ok(user_region.host_addr + (guest_addr - user_region.start)); + let mapping: &MmapRegion = &user_region.mapping; + let start: u64 = user_region.start; + let len: u64 = mapping.len().try_into().unwrap(); + // See if the guest address is inside the region. + let Some(offset_from_start) = guest_addr.checked_sub(start) else { + continue; + }; + if offset_from_start >= len { + continue; } + // Check that the size is in bounds. + // This enforces the invariant promised by implementing MmioRegionRange. + assert!( + size <= len - offset_from_start, + "Attempt to read {size} bytes at offset {offset_from_start} into \ +a region of size {len}" + ); + // SAFETY: MmapRegion guarantees that mapping.addr points to at least + // mapping.len() bytes of valid memory. offset_from_start is equal + // to guest_addr - start, which was checked to be less than mapping.len(). + // Therefore, the returned pointer is still in the range of valid memory. + // Also, since mapping.len() fit in usize, offset_from_start must as well, + // so the cast is safe. + return Ok(unsafe { mapping.addr().add(offset_from_start as usize) }); } } @@ -1585,7 +1609,8 @@ impl VfioPciDevice { /// * `mem_slot` - The closure to return a memory slot. pub fn map_mmio_regions(&mut self) -> Result<(), VfioPciError> { let fd = self.device.as_raw_fd(); - + // SAFETY: fd is guaranteed valid + let fd = unsafe { BorrowedFd::borrow_raw(fd) }; for region in self.common.mmio_regions.iter_mut() { let region_flags = self.device.get_region_flags(region.index); if region_flags & VFIO_REGION_INFO_FLAG_MMAP != 0 { @@ -1625,71 +1650,72 @@ impl VfioPciDevice { self.common.interrupt.msix.as_ref(), )?; - for area in sparse_areas.iter() { - // SAFETY: FFI call with correct arguments - let host_addr = unsafe { - libc::mmap( - null_mut(), - area.size as usize, - prot, - libc::MAP_SHARED, - fd, - mmap_offset as libc::off_t + area.offset as libc::off_t, - ) - }; - - if std::ptr::eq(host_addr, libc::MAP_FAILED) { + for area in &sparse_areas { + if !is_page_size_aligned(area.size) || !is_page_size_aligned(area.offset) { error!( - "Could not mmap sparse area (offset = 0x{:x}, size = 0x{:x}): {}", - area.offset, - area.size, - std::io::Error::last_os_error() + "Could not mmap sparse area that is not page size aligned \ +(offset = 0x{:x}, size = 0x{:x})", + area.offset, area.size ); return Err(VfioPciError::MmapArea); } + } - if !is_page_size_aligned(area.size) || !is_page_size_aligned(area.offset) { - warn!( - "Could not mmap sparse area that is not page size aligned (offset = 0x{:x}, size = 0x{:x})", - area.offset, area.size, - ); - return Ok(()); - } + for area in sparse_areas.iter() { + let mapping = match MmapRegion::mmap( + area.size, + prot, + fd, + mmap_offset, + area.offset, + ) { + Ok(mapping) => mapping, + Err(_) => { + error!( + "Could not mmap sparse area (offset = 0x{:x}, size = 0x{:x}): {}", + mmap_offset, + area.size, + std::io::Error::last_os_error() + ); + return Err(VfioPciError::MmapArea); + } + }; let user_memory_region = UserMemoryRegion { slot: self.memory_slot_allocator.next_memory_slot(), start: region.start.0 + area.offset, - size: area.size, - host_addr: host_addr as u64, + mapping: Arc::new(mapping), }; - - // SAFETY: host_addr was allocated by mmap() and points to size - // bytes of memory. + // SAFETY: MmapRegion invariants guarantee that + // user_memory_region.mapping.addr() points to + // user_memory_region.mapping.len() bytes of + // valid memory that will only be unmapped with munmap(). unsafe { self.vm.create_user_memory_region( user_memory_region.slot, user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, + user_memory_region.mapping.len(), + user_memory_region.mapping.addr(), false, false, ) } .map_err(VfioPciError::CreateUserMemoryRegion)?; - region.user_memory_regions.push(user_memory_region); - if !self.iommu_attached { self.container .vfio_dma_map( user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, + user_memory_region.mapping.len().try_into().unwrap(), + (user_memory_region.mapping.addr() as usize) + .try_into() + .unwrap(), ) .map_err(|e| { VfioPciError::DmaMap(e, self.device_path.clone(), self.bdf) })?; } + region.user_memory_regions.push(user_memory_region); } } } @@ -1698,19 +1724,21 @@ impl VfioPciDevice { } pub fn unmap_mmio_regions(&mut self) { - for region in self.common.mmio_regions.iter() { - for user_memory_region in region.user_memory_regions.iter() { + for region in self.common.mmio_regions.iter_mut() { + for user_memory_region in region.user_memory_regions.drain(..) { + let len = user_memory_region.mapping.len(); + let host_addr = user_memory_region.mapping.addr(); // Unmap from vfio container if !self.iommu_attached && let Err(e) = self .container - .vfio_dma_unmap(user_memory_region.start, user_memory_region.size) + .vfio_dma_unmap(user_memory_region.start, len.try_into().unwrap()) .map_err(|e| VfioPciError::DmaUnmap(e, self.device_path.clone(), self.bdf)) { error!( "Could not unmap mmio region from vfio container: \ iova 0x{:x}, size 0x{:x}: {}, ", - user_memory_region.start, user_memory_region.size, e + user_memory_region.start, len, e ); } @@ -1721,8 +1749,8 @@ impl VfioPciDevice { self.vm.remove_user_memory_region( user_memory_region.slot, user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, + len, + host_addr, false, false, ) @@ -1732,21 +1760,6 @@ impl VfioPciDevice { self.memory_slot_allocator .free_memory_slot(user_memory_region.slot); - - // SAFETY: FFI call with correct arguments - let ret = unsafe { - libc::munmap( - user_memory_region.host_addr as *mut libc::c_void, - user_memory_region.size as usize, - ) - }; - if ret != 0 { - error!( - "Could not unmap region {}, error:{}", - region.index, - io::Error::last_os_error() - ); - } } } } @@ -1886,29 +1899,31 @@ impl PciDevice for VfioPciDevice { region.start = GuestAddress(new_base); for user_memory_region in region.user_memory_regions.iter_mut() { + let len = user_memory_region.mapping.len(); + let host_addr = user_memory_region.mapping.addr(); // Unmap the old MMIO region from vfio container if !self.iommu_attached && let Err(e) = self .container - .vfio_dma_unmap(user_memory_region.start, user_memory_region.size) + .vfio_dma_unmap(user_memory_region.start, len.try_into().unwrap()) .map_err(|e| { VfioPciError::DmaUnmap(e, self.device_path.clone(), self.bdf) }) { error!( "Could not unmap mmio region from vfio container: \ - iova 0x{:x}, size 0x{:x}: {}, ", - user_memory_region.start, user_memory_region.size, e +iova 0x{:x}, size 0x{:x}: {}, ", + user_memory_region.start, len, e ); } // Remove old region - // SAFETY: user_memory_regions has valid entries + // SAFETY: validity of len and host_addr guaranteed by hypervisor::mmap::MmapRegion unsafe { self.vm.remove_user_memory_region( user_memory_region.slot, user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, + len, + host_addr, false, false, ) @@ -1923,13 +1938,13 @@ impl PciDevice for VfioPciDevice { } // Insert new region - // SAFETY: mmio_regions only has valid values + // SAFETY: validity of len and host_addr guaranteed by hypervisor::mmap::MmapRegion unsafe { self.vm.create_user_memory_region( user_memory_region.slot, user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, + len, + host_addr, false, false, ) @@ -1941,8 +1956,8 @@ impl PciDevice for VfioPciDevice { self.container .vfio_dma_map( user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, + len.try_into().unwrap(), + (host_addr as usize).try_into().unwrap(), ) .map_err(|e| { VfioPciError::DmaMap(e, self.device_path.clone(), self.bdf) @@ -1950,8 +1965,8 @@ impl PciDevice for VfioPciDevice { .map_err(|e| { io::Error::other(format!( "Could not map mmio region to vfio container: \ - iova 0x{:x}, size 0x{:x}: {}, ", - user_memory_region.start, user_memory_region.size, e +iova 0x{:x}, size 0x{:x}: {}, ", + user_memory_region.start, len, e )) })?; } @@ -1987,6 +2002,7 @@ impl Snapshottable for VfioPciDevice { Ok(vfio_pci_dev_snapshot) } } + impl Transportable for VfioPciDevice {} impl Migratable for VfioPciDevice {} @@ -2020,11 +2036,17 @@ impl VfioDmaMapping { impl ExternalDmaMapping for VfioDmaMapping { fn map(&self, iova: u64, gpa: u64, size: u64) -> std::result::Result<(), io::Error> { + let Ok(usize_size): Result = size.try_into() else { + return Err(io::Error::other(format!("size {size} overflows usize"))); + }; let mem = self.memory.memory(); let guest_addr = GuestAddress(gpa); - let user_addr = if mem.check_range(guest_addr, size as usize) { - match mem.get_host_address(guest_addr) { - Ok(t) => t as u64, + let user_addr = if mem.check_range(guest_addr, usize_size) { + match mem.get_slice(guest_addr, usize_size) { + Ok(t) => { + assert!(t.len() >= usize_size); + Ok(t.ptr_guard_mut()) + } Err(e) => { return Err(io::Error::other(format!( "unable to retrieve user address for gpa 0x{gpa:x} from guest memory region: {e}" @@ -2032,15 +2054,26 @@ impl ExternalDmaMapping for VfioDmaMapping p.as_ptr(), + Err(p) => p, + }; + // SAFETY: find_user_address and GuestMemory::get_slice() guarantee that + // the returned pointer is valid for up to `usize_size` bytes. + // `usize_size` is always equal to `size` due to the above `try_into()` call. self.container - .vfio_dma_map(iova, size, user_addr) + .vfio_dma_map(iova, size, (user_addr as usize).try_into().unwrap()) .map_err(|e| { io::Error::other(format!( "failed to map memory for VFIO container, \ diff --git a/pci/src/vfio_user.rs b/pci/src/vfio_user.rs index 79b563872..3ce9ebdb0 100644 --- a/pci/src/vfio_user.rs +++ b/pci/src/vfio_user.rs @@ -4,8 +4,8 @@ // use std::any::Any; +use std::os::fd::AsFd; use std::os::unix::prelude::AsRawFd; -use std::ptr::null_mut; use std::sync::{Arc, Barrier, Mutex}; use hypervisor::HypervisorVmError; @@ -24,6 +24,7 @@ use vm_memory::{ use vm_migration::{Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable}; use vmm_sys_util::eventfd::EventFd; +use crate::mmap::MmapRegion; use crate::vfio::{UserMemoryRegion, VFIO_COMMON_ID, Vfio, VfioCommon, VfioError}; use crate::{ BarReprogrammingParams, PciBarConfiguration, PciBdf, PciDevice, PciDeviceError, PciSubclass, @@ -52,6 +53,8 @@ pub enum VfioUserPciDeviceError { InitializeLegacyInterrupts(#[source] VfioPciError), #[error("Failed to create VfioCommon")] CreateVfioCommon(#[source] VfioPciError), + #[error("Other OS error")] + Other(#[source] std::io::Error), } #[derive(Copy, Clone)] @@ -110,10 +113,8 @@ impl VfioUserPciDevice { }) } - /// # Safety - /// - /// Not known yet (TODO) - pub unsafe fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> { + /// Map all of the MMIO regions. + pub fn map_mmio_regions(&mut self) -> Result<(), VfioUserPciDeviceError> { for mmio_region in &mut self.common.mmio_regions { let region_flags = self .client @@ -158,43 +159,39 @@ impl VfioUserPciDevice { sparse_areas }; - for s in mmaps.iter() { - // SAFETY: FFI call with correct arguments - let host_addr = unsafe { - libc::mmap( - null_mut(), - s.size as usize, - prot, - libc::MAP_SHARED, - file_offset.as_ref().unwrap().file().as_raw_fd(), - file_offset.as_ref().unwrap().start() as libc::off_t - + s.offset as libc::off_t, - ) - }; + let file_offset = file_offset.as_ref().unwrap(); - if std::ptr::eq(host_addr, libc::MAP_FAILED) { - error!( - "Could not mmap regions, error:{}", - std::io::Error::last_os_error() - ); - continue; - } + for s in mmaps.iter() { + let mapping = match MmapRegion::mmap( + s.size, + prot, + file_offset.file().as_fd(), + file_offset.start(), + s.offset, + ) { + Ok(mapping) => Arc::new(mapping), + Err(e) => { + error!( + "Could not mmap sparse area (offset = 0x{:x}, size = 0x{:x}): {}", + s.offset, s.size, e + ); + return Err(VfioUserPciDeviceError::Other(e)); + } + }; let user_memory_region = UserMemoryRegion { slot: self.memory_slot_allocator.next_memory_slot(), start: mmio_region.start.0 + s.offset, - size: s.size, - host_addr: host_addr as u64, + mapping, }; - // SAFETY: host_addr was just allocated with mmap() - // and points to size bytes of valid address. + // SAFETY: validity of len and host_addr guaranteed by hypervisor::mmap::MmapRegion unsafe { self.vm.create_user_memory_region( user_memory_region.slot, user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, + user_memory_region.mapping.len(), + user_memory_region.mapping.addr(), false, false, ) @@ -209,17 +206,17 @@ impl VfioUserPciDevice { Ok(()) } - pub fn unmap_mmio_regions(&mut self) { - for mmio_region in self.common.mmio_regions.iter() { - for user_memory_region in mmio_region.user_memory_regions.iter() { + fn unmap_mmio_regions(&mut self) { + for mmio_region in self.common.mmio_regions.iter_mut() { + for user_memory_region in mmio_region.user_memory_regions.drain(..) { // Remove region - // SAFETY: only valid regions are in user_memory_regions + // SAFETY: guaranteed by hypervisor::mmap::MmapRegion invariants if let Err(e) = unsafe { self.vm.remove_user_memory_region( user_memory_region.slot, user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, + user_memory_region.mapping.len(), + user_memory_region.mapping.addr(), false, false, ) @@ -229,22 +226,7 @@ impl VfioUserPciDevice { self.memory_slot_allocator .free_memory_slot(user_memory_region.slot); - - // Remove mmaps - // SAFETY: FFI call with correct arguments - let ret = unsafe { - libc::munmap( - user_memory_region.host_addr as *mut libc::c_void, - user_memory_region.size as usize, - ) - }; - if ret != 0 { - error!( - "Could not unmap region {}, error:{}", - mmio_region.index, - std::io::Error::last_os_error() - ); - } + // memory will be unmapped on drop } } } @@ -465,8 +447,8 @@ impl PciDevice for VfioUserPciDevice { self.vm.remove_user_memory_region( user_memory_region.slot, user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, + user_memory_region.mapping.len(), + user_memory_region.mapping.addr(), false, false, ) @@ -486,8 +468,8 @@ impl PciDevice for VfioUserPciDevice { self.vm.create_user_memory_region( user_memory_region.slot, user_memory_region.start, - user_memory_region.size, - user_memory_region.host_addr, + user_memory_region.mapping.len(), + user_memory_region.mapping.addr(), false, false, ) diff --git a/virtio-devices/src/device.rs b/virtio-devices/src/device.rs index a30ad6064..cd9a8fea7 100644 --- a/virtio-devices/src/device.rs +++ b/virtio-devices/src/device.rs @@ -16,13 +16,13 @@ use std::thread; use libc::EFD_NONBLOCK; use virtio_queue::Queue; use vm_device::UserspaceMapping; -use vm_memory::{GuestAddress, GuestMemoryAtomic, GuestUsize}; +use vm_memory::{GuestAddress, GuestMemoryAtomic}; use vm_migration::{MigratableError, Pausable}; use vm_virtio::{AccessPlatform, VirtioDeviceType}; use vmm_sys_util::eventfd::EventFd; use crate::{ - ActivateError, ActivateResult, Error, GuestMemoryMmap, GuestRegionMmap, + ActivateError, ActivateResult, Error, GuestMemoryMmap, GuestRegionMmap, MmapRegion, VIRTIO_F_RING_INDIRECT_DESC, }; @@ -46,10 +46,9 @@ pub struct VirtioSharedMemory { #[derive(Clone)] pub struct VirtioSharedMemoryList { - pub host_addr: u64, pub mem_slot: u32, pub addr: GuestAddress, - pub len: GuestUsize, + pub mapping: Arc, pub region_list: Vec, } diff --git a/virtio-devices/src/pmem.rs b/virtio-devices/src/pmem.rs index d76655847..ff8ad296b 100644 --- a/virtio-devices/src/pmem.rs +++ b/virtio-devices/src/pmem.rs @@ -34,7 +34,7 @@ use super::{ }; use crate::seccomp_filters::Thread; use crate::thread_helper::spawn_virtio_thread; -use crate::{GuestMemoryMmap, MmapRegion, VirtioInterrupt, VirtioInterruptType}; +use crate::{GuestMemoryMmap, VirtioInterrupt, VirtioInterruptType}; const QUEUE_SIZE: u16 = 256; const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE]; @@ -267,10 +267,6 @@ pub struct Pmem { mapping: UserspaceMapping, seccomp_action: SeccompAction, exit_evt: EventFd, - - // Hold ownership of the memory that is allocated for the device - // which will be automatically dropped when the device is dropped - _region: MmapRegion, } #[derive(Serialize, Deserialize)] @@ -287,7 +283,6 @@ impl Pmem { disk: File, addr: GuestAddress, mapping: UserspaceMapping, - _region: MmapRegion, iommu: bool, seccomp_action: SeccompAction, exit_evt: EventFd, @@ -304,7 +299,7 @@ impl Pmem { } else { let config = VirtioPmemConfig { start: addr.raw_value().to_le(), - size: (_region.size() as u64).to_le(), + size: (mapping.mapping.size() as u64).to_le(), }; let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; @@ -331,7 +326,6 @@ impl Pmem { config, mapping, seccomp_action, - _region, exit_evt, }) } diff --git a/virtio-devices/src/transport/pci_device.rs b/virtio-devices/src/transport/pci_device.rs index da54ef64d..ca60c2c06 100644 --- a/virtio-devices/src/transport/pci_device.rs +++ b/virtio-devices/src/transport/pci_device.rs @@ -1028,7 +1028,7 @@ impl PciDevice for VirtioPciDevice { let bar = PciBarConfiguration::default() .set_index(VIRTIO_SHM_BAR_INDEX) .set_address(shm_list.addr.raw_value()) - .set_size(shm_list.len); + .set_size(shm_list.mapping.size() as _); // The creation of the PCI BAR and its associated capabilities must // happen only during the creation of a brand new VM. When a VM is diff --git a/virtio-devices/src/vhost_user/fs.rs b/virtio-devices/src/vhost_user/fs.rs index 2374a1cc7..b8d728920 100644 --- a/virtio-devices/src/vhost_user/fs.rs +++ b/virtio-devices/src/vhost_user/fs.rs @@ -356,10 +356,9 @@ impl VirtioDevice for Fs { let mut mappings = Vec::new(); if let Some(cache) = self.cache.as_ref() { mappings.push(UserspaceMapping { - host_addr: cache.0.host_addr, mem_slot: cache.0.mem_slot, addr: cache.0.addr, - len: cache.0.len, + mapping: cache.0.mapping.clone(), mergeable: false, }); } diff --git a/vm-device/src/lib.rs b/vm-device/src/lib.rs index f484e9f14..61e765950 100644 --- a/vm-device/src/lib.rs +++ b/vm-device/src/lib.rs @@ -3,8 +3,11 @@ // SPDX-License-Identifier: Apache-2.0 // +use std::sync::Arc; + use serde::{Deserialize, Serialize}; -use vm_memory::{GuestAddress, GuestUsize}; +use vm_memory::bitmap::AtomicBitmap; +use vm_memory::{GuestAddress, MmapRegion}; mod bus; pub mod dma_mapping; @@ -62,9 +65,8 @@ pub enum Resource { #[derive(Clone)] pub struct UserspaceMapping { - pub host_addr: u64, pub mem_slot: u32, pub addr: GuestAddress, - pub len: GuestUsize, + pub mapping: Arc>, pub mergeable: bool, } diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 1293a0daa..622c183f0 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -97,8 +97,10 @@ use vm_device::interrupt::{ InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, }; use vm_device::{Bus, BusDevice, BusDeviceSync, Resource, UserspaceMapping}; +#[cfg(feature = "ivshmem")] +use vm_memory::bitmap::AtomicBitmap; use vm_memory::guest_memory::FileOffset; -use vm_memory::{Address, GuestAddress, GuestMemoryRegion, GuestUsize, MmapRegion}; +use vm_memory::{Address, GuestAddress, GuestMemoryRegion, GuestUsize, MmapRegion, VolatileMemory}; #[cfg(target_arch = "x86_64")] use vm_memory::{GuestAddressSpace, GuestMemory}; use vm_migration::protocol::MemoryRangeTable; @@ -817,15 +819,15 @@ impl DeviceRelocation for AddressManager { if let Some(mut shm_regions) = virtio_dev.get_shm_regions() && shm_regions.addr.raw_value() == old_base { - // SAFETY: TODO what are the invariants here? + // SAFETY: guaranteed by MmapRegion invariants unsafe { // Remove old mapping self.vm .remove_user_memory_region( shm_regions.mem_slot, old_base, - shm_regions.len, - shm_regions.host_addr, + shm_regions.mapping.len(), + shm_regions.mapping.as_ptr(), false, false, ) @@ -834,17 +836,14 @@ impl DeviceRelocation for AddressManager { "failed to remove user memory region: {e:?}" )) })?; - } - // SAFETY: TODO what are the invariants here? - unsafe { // Create new mapping by inserting new region to KVM. self.vm .create_user_memory_region( shm_regions.mem_slot, new_base, - shm_regions.len, - shm_regions.host_addr, + shm_regions.mapping.len(), + shm_regions.mapping.as_ptr(), false, false, ) @@ -3249,10 +3248,11 @@ impl DeviceManager { }, ) .map_err(DeviceManagerError::NewMmapRegion)?; - let host_addr: u64 = mmap_region.as_ptr() as u64; + let host_addr = mmap_region.as_ptr(); // SAFETY: host_addr points to region_size bytes of mmap-allocated memory. let mem_slot = unsafe { + let region_size = region_size.try_into().unwrap(); self.memory_manager .lock() .unwrap() @@ -3261,10 +3261,9 @@ impl DeviceManager { }?; let mapping = UserspaceMapping { - host_addr, mem_slot, addr: GuestAddress(region_base), - len: region_size, + mapping: Arc::new(mmap_region), mergeable: false, }; @@ -3274,7 +3273,6 @@ impl DeviceManager { file, GuestAddress(region_base), mapping, - mmap_region, self.force_iommu | pmem_cfg.iommu, self.seccomp_action.clone(), self.exit_evt @@ -4010,16 +4008,13 @@ impl DeviceManager { resources, )?; - // SAFETY: TODO // Note it is required to call 'add_pci_device()' in advance to have the list of // mmio regions provisioned correctly - unsafe { - vfio_user_pci_device - .lock() - .unwrap() - .map_mmio_regions() - .map_err(DeviceManagerError::VfioUserMapRegion) - }?; + vfio_user_pci_device + .lock() + .unwrap() + .map_mmio_regions() + .map_err(DeviceManagerError::VfioUserMapRegion)?; let mut node = device_node!(vfio_user_name, vfio_user_pci_device); @@ -4747,8 +4742,8 @@ impl DeviceManager { .unwrap() .remove_userspace_mapping( mapping.addr.raw_value(), - mapping.len, - mapping.host_addr, + mapping.mapping.size(), + mapping.mapping.as_ptr() as _, mapping.mergeable, mapping.mem_slot, ) @@ -5002,13 +4997,12 @@ impl IvshmemOps for IvshmemHandler { start_addr: u64, size: usize, backing_file: Option, - ) -> Result<(Arc, UserspaceMapping), IvshmemError> { + ) -> Result<(Arc>, UserspaceMapping), IvshmemError> { info!("Creating ivshmem mem region at 0x{start_addr:x}"); - let region: Arc = MemoryManager::create_ram_region( + let region = MemoryManager::create_ram_region_raw( &backing_file, 0, - GuestAddress(start_addr), size, false, true, @@ -5021,12 +5015,12 @@ impl IvshmemOps for IvshmemHandler { .map_err(|_| IvshmemError::CreateUserMemoryRegion)?; let mem_slot = { let mut manager = self.memory_manager.lock().unwrap(); - // SAFETY: guaranteed by GuestRegionMmap invariants + // SAFETY: guaranteed by MmapRegion invariants unsafe { manager.create_userspace_mapping( - region.start_addr().0, + start_addr, region.len(), - region.as_ptr() as u64, + region.as_ptr(), false, false, false, @@ -5034,11 +5028,11 @@ impl IvshmemOps for IvshmemHandler { } } .map_err(|_| IvshmemError::CreateUserspaceMapping)?; + let region = Arc::new(region); let mapping = UserspaceMapping { - host_addr: region.as_ptr() as u64, + mapping: region.clone(), mem_slot, - addr: GuestAddress(region.start_addr().0), - len: region.len(), + addr: GuestAddress(start_addr), mergeable: false, }; Ok((region, mapping)) @@ -5050,8 +5044,8 @@ impl IvshmemOps for IvshmemHandler { unsafe { manager.remove_userspace_mapping( mapping.addr.raw_value(), - mapping.len, - mapping.host_addr, + mapping.mapping.len(), + mapping.mapping.as_ptr(), mapping.mergeable, mapping.mem_slot, ) diff --git a/vmm/src/memory_manager.rs b/vmm/src/memory_manager.rs index 146ea610e..a9efbe99a 100644 --- a/vmm/src/memory_manager.rs +++ b/vmm/src/memory_manager.rs @@ -8,7 +8,7 @@ use std::collections::BTreeMap; use std::collections::HashMap; use std::fs::{File, OpenOptions}; use std::io::{self}; -use std::ops::{BitAnd, Deref, Not, Sub}; +use std::ops::{BitAnd, Not, Sub}; #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] use std::os::fd::AsFd; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; @@ -900,13 +900,12 @@ impl MemoryManager { for (zone_id, regions) in list { for (region, virtio_mem) in regions { - // SAFETY: regions only holds valid addresses. - // TODO: encapsulate this unsafety in a small part of the file. + // SAFETY: guaranteed by GuestRegionMmap invariants let slot = unsafe { self.create_userspace_mapping( region.start_addr().raw_value(), - region.len(), - region.as_ptr() as u64, + region.len().try_into().unwrap(), + region.as_ptr(), self.mergeable, false, self.log_dirty, @@ -962,13 +961,16 @@ impl MemoryManager { arch::layout::UEFI_START, ) .unwrap(); + const _: () = assert!(core::mem::size_of::() == core::mem::size_of::()); + + // SAFETY: guaranteed by GuestRegionMmap unsafe { self.vm .create_user_memory_region( uefi_mem_slot, uefi_region.start_addr().raw_value(), - uefi_region.len(), - uefi_region.as_ptr() as u64, + uefi_region.len() as usize, + uefi_region.as_ptr(), false, false, ) @@ -1364,10 +1366,9 @@ impl MemoryManager { } #[allow(clippy::too_many_arguments)] - pub fn create_ram_region( + pub fn create_ram_region_raw( backing_file: &Option, file_offset: u64, - start_addr: GuestAddress, size: usize, prefault: bool, shared: bool, @@ -1376,7 +1377,7 @@ impl MemoryManager { host_numa_node: Option, existing_memory_file: Option, thp: bool, - ) -> Result, Error> { + ) -> Result, Error> { let mut mmap_flags = libc::MAP_NORESERVE; // The duplication of mmap_flags ORing here is unfortunate but it also makes @@ -1403,17 +1404,13 @@ impl MemoryManager { None }; - let region = GuestRegionMmap::new( - MmapRegion::build(fo, size, libc::PROT_READ | libc::PROT_WRITE, mmap_flags) - .map_err(Error::GuestMemoryRegion)?, - start_addr, - ) - .map_err(Error::GuestMemory)?; + let region = MmapRegion::build(fo, size, libc::PROT_READ | libc::PROT_WRITE, mmap_flags) + .map_err(Error::GuestMemoryRegion)?; // Apply NUMA policy if needed. if let Some(node) = host_numa_node { - let addr = region.deref().as_ptr(); - let len = region.deref().size() as u64; + let addr = region.as_ptr(); + let len = region.size() as u64; let mode = MPOL_BIND; let mut nodemask: Vec = Vec::new(); let flags = MPOL_MF_STRICT | MPOL_MF_MOVE; @@ -1498,7 +1495,39 @@ impl MemoryManager { } } - Ok(Arc::new(region)) + Ok(region) + } + + #[allow(clippy::too_many_arguments)] + pub fn create_ram_region( + backing_file: &Option, + file_offset: u64, + start_addr: GuestAddress, + size: usize, + prefault: bool, + shared: bool, + hugepages: bool, + hugepage_size: Option, + host_numa_node: Option, + existing_memory_file: Option, + thp: bool, + ) -> Result, Error> { + let r = Self::create_ram_region_raw( + backing_file, + file_offset, + size, + prefault, + shared, + hugepages, + hugepage_size, + host_numa_node, + existing_memory_file, + thp, + )?; + + Ok(Arc::new( + GuestRegionMmap::new(r, start_addr).map_err(Error::GuestMemory)?, + )) } // Duplicate of `memory_zone_get_align_size` that does not require a `zone` @@ -1612,12 +1641,12 @@ impl MemoryManager { )?; // Map it into the guest - // SAFETY: create_ram_region only produces valid mappings. + // SAFETY: guaranteed by GuestMmapRegion invariants let slot = unsafe { self.create_userspace_mapping( region.start_addr().0, - region.len(), - region.as_ptr() as u64, + region.len().try_into().unwrap(), + region.as_ptr(), self.mergeable, false, self.log_dirty, @@ -1722,8 +1751,8 @@ impl MemoryManager { pub unsafe fn create_userspace_mapping( &mut self, guest_phys_addr: u64, - memory_size: u64, - userspace_addr: u64, + memory_size: usize, + userspace_addr: *mut u8, mergeable: bool, readonly: bool, log_dirty: bool, @@ -1731,10 +1760,11 @@ impl MemoryManager { let slot = self.allocate_memory_slot(); info!( - "Creating userspace mapping: {guest_phys_addr:x} -> {userspace_addr:x} {memory_size:x}, slot {slot}" + "Creating userspace mapping: {guest_phys_addr:x} -> {userspace_addr_:x} {memory_size:x}, slot {slot}", + userspace_addr_ = userspace_addr as u64 ); - // SAFETY: promised by caller + // SAFETY: caller promises parameters are correct. unsafe { self.vm .create_user_memory_region( @@ -1788,7 +1818,8 @@ impl MemoryManager { } info!( - "Created userspace mapping: {guest_phys_addr:x} -> {userspace_addr:x} {memory_size:x}" + "Created userspace mapping: {guest_phys_addr:x} -> {userspace_addr_:x} {memory_size:x}", + userspace_addr_ = userspace_addr as u64 ); Ok(slot) @@ -1806,12 +1837,12 @@ impl MemoryManager { pub unsafe fn remove_userspace_mapping( &mut self, guest_phys_addr: u64, - memory_size: u64, - userspace_addr: u64, + memory_size: usize, + userspace_addr: *mut u8, mergeable: bool, slot: u32, ) -> Result<(), Error> { - // SAFETY: The caller promises that the parameters are correct. + // SAFETY: Caller promises parameters are correct. unsafe { self.vm .remove_user_memory_region( @@ -1852,7 +1883,8 @@ impl MemoryManager { } info!( - "Removed userspace mapping: {guest_phys_addr:x} -> {userspace_addr:x} {memory_size:x}" + "Removed userspace mapping: {guest_phys_addr:x} -> {userspace_addr_:x} {memory_size:x}", + userspace_addr_ = userspace_addr as u64 ); Ok(()) diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 89255e28b..8803f68aa 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -3469,8 +3469,8 @@ mod unit_tests { vm.create_user_memory_region( index as u32, region.start_addr().raw_value(), - region.len(), - region.as_ptr() as u64, + region.len().try_into().unwrap(), + region.as_ptr(), false, false, ) @@ -3607,8 +3607,8 @@ pub fn test_vm() { vm.create_user_memory_region( index as u32, region.start_addr().raw_value(), - region.len(), - region.as_ptr() as u64, + region.len().try_into().unwrap(), + region.as_ptr() as _, false, false, )