From c22c15b9636c8ad66fe9fe62b1bd9f3e265d6cce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=AD=C3=B0=20Steinn=20Geirsson?= Date: Wed, 18 Mar 2026 17:46:55 +0000 Subject: [PATCH] virtio-devices: add vhost-user GPU device Add a vhost-user frontend for virtio-gpu using the current vhost-user protocol (GET_SHMEM_CONFIG=44, SHMEM_MAP/UNMAP=9/10, SHMEM=0x0020_0000). The GPU device queries the backend for shared memory regions via get_shmem_config(), allocates a PCI BAR for the shmem region, and implements a BackendReqHandler that handles SHMEM_MAP/UNMAP requests by mmapping file descriptors from the backend into the shared memory region. To support non-contiguous shared memory region IDs (needed for VIRTIO_GPU_SHM_ID_HOST_VISIBLE=1), VirtioSharedMemoryList.region_list is changed from Vec to BTreeMap, and the PCI BAR code uses the map key as the PCI capability shmid. Includes full VMM plumbing: GpuConfig, --gpu CLI, API, device_manager, seccomp rules, and hotplug support. Co-Authored-By: Claude Opus 4.6 (1M context) --- cloud-hypervisor/src/main.rs | 1 + virtio-devices/src/device.rs | 4 +- virtio-devices/src/lib.rs | 4 +- virtio-devices/src/seccomp_filters.rs | 13 + virtio-devices/src/transport/pci_device.rs | 4 +- virtio-devices/src/vhost_user/gpu.rs | 460 +++++++++++++++++++++ virtio-devices/src/vhost_user/mod.rs | 6 + vmm/src/api/mod.rs | 47 ++- vmm/src/config.rs | 81 ++++ vmm/src/device_manager.rs | 141 ++++++- vmm/src/lib.rs | 28 +- vmm/src/vm.rs | 26 +- vmm/src/vm_config.rs | 23 ++ 13 files changed, 824 insertions(+), 14 deletions(-) create mode 100644 virtio-devices/src/vhost_user/gpu.rs diff --git a/cloud-hypervisor/src/main.rs b/cloud-hypervisor/src/main.rs index 3abc382f3..0f4eb5a60 100644 --- a/cloud-hypervisor/src/main.rs +++ b/cloud-hypervisor/src/main.rs @@ -998,6 +998,7 @@ mod unit_tests { }, balloon: None, fs: None, + gpu: None, pmem: None, serial: ConsoleConfig { file: None, diff --git a/virtio-devices/src/device.rs b/virtio-devices/src/device.rs index f0ed28f51..892bf08ee 100644 --- a/virtio-devices/src/device.rs +++ b/virtio-devices/src/device.rs @@ -6,7 +6,7 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::io::Write; use std::num::Wrapping; use std::sync::atomic::{AtomicBool, Ordering}; @@ -50,7 +50,7 @@ pub struct VirtioSharedMemoryList { pub mem_slot: u32, pub addr: GuestAddress, pub mapping: Arc, - pub region_list: Vec, + pub region_list: BTreeMap, } /// Trait for virtio devices to be driven by a virtio transport. diff --git a/virtio-devices/src/lib.rs b/virtio-devices/src/lib.rs index b52282921..7efc4bf8c 100644 --- a/virtio-devices/src/lib.rs +++ b/virtio-devices/src/lib.rs @@ -43,7 +43,7 @@ pub use self::block::{Block, BlockState}; pub use self::console::{Console, ConsoleResizer, Endpoint}; pub use self::device::{ DmaRemapping, VirtioCommon, VirtioDevice, VirtioInterrupt, VirtioInterruptType, - VirtioSharedMemoryList, + VirtioSharedMemory, VirtioSharedMemoryList, }; pub use self::epoll_helper::{ EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, EpollHelperHandler, @@ -90,6 +90,8 @@ pub enum ActivateError { VhostUserFsSetup(#[source] vhost_user::Error), #[error("Failed to setup vhost-user daemon")] VhostUserSetup(#[source] vhost_user::Error), + #[error("Failed to setup vhost-user-gpu daemon")] + VhostUserGpuSetup(#[source] vhost_user::Error), #[error("Failed to create seccomp filter")] CreateSeccompFilter(#[source] seccompiler::Error), #[error("Failed to create rate limiter")] diff --git a/virtio-devices/src/seccomp_filters.rs b/virtio-devices/src/seccomp_filters.rs index 5afd056a6..59f364320 100644 --- a/virtio-devices/src/seccomp_filters.rs +++ b/virtio-devices/src/seccomp_filters.rs @@ -24,6 +24,7 @@ pub enum Thread { VirtioRng, VirtioVhostBlock, VirtioVhostFs, + VirtioVhostGpu, VirtioVhostNet, VirtioVhostNetCtl, VirtioVsock, @@ -192,6 +193,17 @@ fn virtio_vhost_fs_thread_rules() -> Vec<(i64, Vec)> { ] } +fn virtio_vhost_gpu_thread_rules() -> Vec<(i64, Vec)> { + vec![ + (libc::SYS_clock_nanosleep, vec![]), + (libc::SYS_connect, vec![]), + (libc::SYS_nanosleep, vec![]), + (libc::SYS_recvmsg, vec![]), + (libc::SYS_sendmsg, vec![]), + (libc::SYS_socket, vec![]), + ] +} + fn virtio_vhost_net_ctl_thread_rules() -> Vec<(i64, Vec)> { vec![] } @@ -271,6 +283,7 @@ fn get_seccomp_rules(thread_type: Thread) -> Vec<(i64, Vec)> { Thread::VirtioRng => virtio_rng_thread_rules(), Thread::VirtioVhostBlock => virtio_vhost_block_thread_rules(), Thread::VirtioVhostFs => virtio_vhost_fs_thread_rules(), + Thread::VirtioVhostGpu => virtio_vhost_gpu_thread_rules(), Thread::VirtioVhostNet => virtio_vhost_net_thread_rules(), Thread::VirtioVhostNetCtl => virtio_vhost_net_ctl_thread_rules(), Thread::VirtioVsock => virtio_vsock_thread_rules(), diff --git a/virtio-devices/src/transport/pci_device.rs b/virtio-devices/src/transport/pci_device.rs index 408611e29..6aaffcef7 100644 --- a/virtio-devices/src/transport/pci_device.rs +++ b/virtio-devices/src/transport/pci_device.rs @@ -1036,11 +1036,11 @@ impl PciDevice for VirtioPciDevice { PciDeviceError::IoRegistrationFailed(shm_list.addr.raw_value(), e) })?; - for (idx, shm) in shm_list.region_list.iter().enumerate() { + for (&shmid, shm) in shm_list.region_list.iter() { let shm_cap = VirtioPciCap64::new( PciCapabilityType::SharedMemory, VIRTIO_SHM_BAR_INDEX as u8, - idx as u8, + shmid, shm.offset, shm.len, ); diff --git a/virtio-devices/src/vhost_user/gpu.rs b/virtio-devices/src/vhost_user/gpu.rs new file mode 100644 index 000000000..497081bbf --- /dev/null +++ b/virtio-devices/src/vhost_user/gpu.rs @@ -0,0 +1,460 @@ +// Copyright © 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +use std::io; +use std::os::unix::io::AsRawFd; +use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Barrier, Mutex}; +use std::{result, thread}; + +use event_monitor::event; +use log::error; +use seccompiler::SeccompAction; +use vhost::vhost_user::message::{ + VhostUserConfigFlags, VhostUserMMap, VhostUserMMapFlags, VhostUserProtocolFeatures, + VhostUserVirtioFeatures, +}; +use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler}; +use virtio_queue::Queue; +use vm_device::UserspaceMapping; +use vm_memory::GuestMemoryAtomic; +use vm_migration::{Migratable, MigratableError, Pausable, Snapshottable, Transportable}; +use vmm_sys_util::eventfd::EventFd; + +use super::vu_common_ctrl::VhostUserHandle; +use super::{DEFAULT_VIRTIO_FEATURES, Error, Result}; +use crate::seccomp_filters::Thread; +use crate::thread_helper::spawn_virtio_thread; +use crate::vhost_user::VhostUserCommon; +use crate::{ + ActivateResult, GuestMemoryMmap, GuestRegionMmap, MmapRegion, VIRTIO_F_IOMMU_PLATFORM, + VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioSharedMemoryList, +}; + +const DEFAULT_QUEUE_NUMBER: usize = 2; +const DEFAULT_QUEUE_SIZE: u16 = 256; + +struct BackendReqHandler { + mapping: Arc, +} + +impl BackendReqHandler { + /// Validate and compute the target pointer and length within the shared memory region. + fn checked_region( + &self, + offset: u64, + len: u64, + ) -> io::Result<(*mut u8, usize)> { + let offset: usize = offset + .try_into() + .map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?; + let len: usize = len + .try_into() + .map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?; + let region_size = self.mapping.size(); + + if offset.checked_add(len).is_none_or(|end| end > region_size) { + return Err(io::Error::from_raw_os_error(libc::EINVAL)); + } + + // SAFETY: offset is within the mapped region + let ptr = unsafe { self.mapping.as_ptr().add(offset) }; + Ok((ptr, len)) + } +} + +impl VhostUserFrontendReqHandler for BackendReqHandler { + fn shmem_map( + &self, + req: &VhostUserMMap, + fd: &dyn AsRawFd, + ) -> vhost::vhost_user::HandlerResult { + let (ptr, len) = self.checked_region(req.shm_offset, req.len)?; + + let writable = VhostUserMMapFlags::from_bits_truncate(req.flags) + .contains(VhostUserMMapFlags::WRITABLE); + let prot = if writable { + libc::PROT_READ | libc::PROT_WRITE + } else { + libc::PROT_READ + }; + + // SAFETY: we've checked we're only giving addr and length within the + // region, and are passing MAP_FIXED to ensure they are respected. + let ret = unsafe { + libc::mmap( + ptr.cast(), + len, + prot, + // https://bugzilla.kernel.org/show_bug.cgi?id=217238 + if writable { + libc::MAP_SHARED + } else { + libc::MAP_PRIVATE + } | libc::MAP_FIXED, + fd.as_raw_fd(), + req.fd_offset as libc::off_t, + ) + }; + + if ret == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + + Ok(0) + } + + fn shmem_unmap(&self, req: &VhostUserMMap) -> vhost::vhost_user::HandlerResult { + let (ptr, len) = self.checked_region(req.shm_offset, req.len)?; + + // SAFETY: we've checked we're only giving addr and length within the + // region, and are passing MAP_FIXED to ensure they are respected. + let ret = unsafe { + libc::mmap( + ptr.cast(), + len, + libc::PROT_NONE, + libc::MAP_ANONYMOUS | libc::MAP_PRIVATE | libc::MAP_FIXED, + -1, + 0, + ) + }; + + if ret == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + + Ok(0) + } +} + +/// Shared memory region descriptor returned from the backend. +pub struct ShmemRegion { + pub id: u8, + pub length: u64, +} + +pub struct Gpu { + common: VirtioCommon, + vu_common: VhostUserCommon, + id: String, + cache: Option, + backend_req_support: bool, + seccomp_action: SeccompAction, + guest_memory: Option>, + epoll_thread: Option>, + exit_evt: EventFd, + iommu: bool, +} + +impl Gpu { + /// Create a new virtio-gpu device. + pub fn new( + id: String, + path: &str, + seccomp_action: SeccompAction, + exit_evt: EventFd, + iommu: bool, + ) -> Result<(Gpu, ShmemRegion)> { + let num_queues = DEFAULT_QUEUE_NUMBER; + + // Connect to the vhost-user socket. + let mut vu = + VhostUserHandle::connect_vhost_user(false, path, num_queues as u64, false)?; + + // Filling device and vring features VMM supports. + let avail_features = DEFAULT_VIRTIO_FEATURES; + + let avail_protocol_features = VhostUserProtocolFeatures::CONFIG + | VhostUserProtocolFeatures::BACKEND_REQ + | VhostUserProtocolFeatures::SHMEM + | VhostUserProtocolFeatures::REPLY_ACK + | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS; + + let (acked_features, acked_protocol_features) = + vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?; + + let backend_req_support = + acked_protocol_features & VhostUserProtocolFeatures::BACKEND_REQ.bits() != 0; + + // Query shared memory regions. + let shm_config = vu + .socket_handle() + .get_shmem_config() + .map_err(Error::VhostUserGetShmemConfig)?; + + // Decode the sparse region array. + let regions: Vec<(u8, u64)> = shm_config + .memory_sizes + .iter() + .enumerate() + .filter(|&(_, &size)| size != 0) + .take(shm_config.nregions as usize) + .map(|(id, &size)| (id as u8, size)) + .collect(); + + if regions.len() != 1 { + error!( + "Expected exactly 1 shared memory region from GPU backend, got {}", + regions.len() + ); + return Err(Error::VhostUserUnexpectedShmemRegionCount(1, regions.len())); + } + let (shm_id, shm_length) = regions[0]; + + let gpu = Gpu { + common: VirtioCommon { + device_type: VirtioDeviceType::Gpu as u32, + avail_features: acked_features, + acked_features: acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(), + queue_sizes: vec![DEFAULT_QUEUE_SIZE; num_queues], + paused_sync: Some(Arc::new(Barrier::new(2))), + min_queues: DEFAULT_QUEUE_NUMBER as u16, + paused: Arc::new(AtomicBool::new(false)), + ..Default::default() + }, + vu_common: VhostUserCommon { + vu: Some(Arc::new(Mutex::new(vu))), + acked_protocol_features, + socket_path: path.to_string(), + vu_num_queues: num_queues, + ..Default::default() + }, + id, + cache: None, + backend_req_support, + seccomp_action, + guest_memory: None, + epoll_thread: None, + exit_evt, + iommu, + }; + + Ok(( + gpu, + ShmemRegion { + id: shm_id, + length: shm_length, + }, + )) + } + + pub fn set_cache(&mut self, cache: VirtioSharedMemoryList) { + self.cache = Some(cache); + } +} + +impl Drop for Gpu { + fn drop(&mut self) { + if let Some(kill_evt) = self.common.kill_evt.take() { + let _ = kill_evt.write(1); + } + self.common.wait_for_epoll_threads(); + if let Some(thread) = self.epoll_thread.take() + && let Err(e) = thread.join() + { + error!("Error joining thread: {e:?}"); + } + } +} + +impl VirtioDevice for Gpu { + fn device_type(&self) -> u32 { + self.common.device_type + } + + fn queue_max_sizes(&self) -> &[u16] { + &self.common.queue_sizes + } + + fn features(&self) -> u64 { + let mut features = self.common.avail_features; + if self.iommu { + features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM; + } + features + } + + fn ack_features(&mut self, value: u64) { + self.common.ack_features(value); + } + + fn read_config(&self, offset: u64, mut data: &mut [u8]) { + if let Some(vu) = &self.vu_common.vu + && let Err(e) = vu + .lock() + .unwrap() + .socket_handle() + .get_config( + offset as u32, + data.len() as u32, + VhostUserConfigFlags::WRITABLE, + data, + ) + .map_err(|e| format!("{e:?}")) + .and_then(|(_, config)| { + use std::io::Write; + data.write_all(&config).map_err(|e| format!("{e:?}")) + }) + { + error!("Failed getting vhost-user-gpu configuration: {e:?}"); + } + } + + fn activate( + &mut self, + mem: GuestMemoryAtomic, + interrupt_cb: Arc, + queues: Vec<(usize, Queue, EventFd)>, + ) -> ActivateResult { + self.common.activate(&queues, interrupt_cb.clone())?; + self.guest_memory = Some(mem.clone()); + + // Initialize backend communication. + let backend_req_handler = if self.backend_req_support { + if let Some(cache) = self.cache.as_ref() { + let vu_frontend_req_handler = Arc::new(BackendReqHandler { + mapping: cache.mapping.clone(), + }); + + let mut req_handler = + FrontendReqHandler::new(vu_frontend_req_handler).map_err(|e| { + crate::ActivateError::VhostUserGpuSetup(Error::FrontendReqHandlerCreation( + e, + )) + })?; + + if self.vu_common.acked_protocol_features + & VhostUserProtocolFeatures::REPLY_ACK.bits() + != 0 + { + req_handler.set_reply_ack_flag(true); + } + + Some(req_handler) + } else { + None + } + } else { + None + }; + + // Run a dedicated thread for handling potential reconnections with + // the backend. + let (kill_evt, pause_evt) = self.common.dup_eventfds(); + + let mut handler = self.vu_common.activate( + mem, + &queues, + interrupt_cb, + self.common.acked_features, + backend_req_handler, + kill_evt, + pause_evt, + )?; + + let paused = self.common.paused.clone(); + let paused_sync = self.common.paused_sync.clone(); + + let mut epoll_threads = Vec::new(); + spawn_virtio_thread( + &self.id, + &self.seccomp_action, + Thread::VirtioVhostGpu, + &mut epoll_threads, + &self.exit_evt, + move || handler.run(&paused, paused_sync.as_ref().unwrap()), + )?; + self.epoll_thread = Some(epoll_threads.remove(0)); + + event!("virtio-device", "activated", "id", &self.id); + Ok(()) + } + + fn reset(&mut self) -> Option> { + if self.common.pause_evt.take().is_some() { + self.common.resume().ok()?; + } + + if let Some(vu) = &self.vu_common.vu + && let Err(e) = vu.lock().unwrap().reset_vhost_user() + { + error!("Failed to reset vhost-user daemon: {e:?}"); + return None; + } + + if let Some(kill_evt) = self.common.kill_evt.take() { + let _ = kill_evt.write(1); + } + + event!("virtio-device", "reset", "id", &self.id); + + Some(self.common.interrupt_cb.take().unwrap()) + } + + fn shutdown(&mut self) { + self.vu_common.shutdown(); + } + + fn get_shm_regions(&self) -> Option { + self.cache.clone() + } + + fn set_shm_regions( + &mut self, + shm_regions: VirtioSharedMemoryList, + ) -> std::result::Result<(), crate::Error> { + if let Some(cache) = self.cache.as_mut() { + *cache = shm_regions; + Ok(()) + } else { + Err(crate::Error::SetShmRegionsNotSupported) + } + } + + fn add_memory_region( + &mut self, + region: &Arc, + ) -> std::result::Result<(), crate::Error> { + self.vu_common.add_memory_region(&self.guest_memory, region) + } + + fn userspace_mappings(&self) -> Vec { + let mut mappings = Vec::new(); + if let Some(cache) = self.cache.as_ref() { + mappings.push(UserspaceMapping { + mem_slot: cache.mem_slot, + addr: cache.addr, + mapping: cache.mapping.clone(), + mergeable: false, + }); + } + + mappings + } +} + +impl Pausable for Gpu { + fn pause(&mut self) -> result::Result<(), MigratableError> { + self.vu_common.pause()?; + self.common.pause() + } + + fn resume(&mut self) -> result::Result<(), MigratableError> { + self.common.resume()?; + + if let Some(epoll_thread) = &self.epoll_thread { + epoll_thread.thread().unpark(); + } + + self.vu_common.resume() + } +} + +impl Snapshottable for Gpu { + fn id(&self) -> String { + self.id.clone() + } +} +impl Transportable for Gpu {} +impl Migratable for Gpu {} diff --git a/virtio-devices/src/vhost_user/mod.rs b/virtio-devices/src/vhost_user/mod.rs index cd5976988..ac651de94 100644 --- a/virtio-devices/src/vhost_user/mod.rs +++ b/virtio-devices/src/vhost_user/mod.rs @@ -35,11 +35,13 @@ use crate::{ pub mod blk; pub mod fs; +pub mod gpu; pub mod net; pub mod vu_common_ctrl; pub use self::blk::Blk; pub use self::fs::*; +pub use self::gpu::Gpu; pub use self::net::Net; pub use self::vu_common_ctrl::VhostUserConfig; @@ -147,6 +149,10 @@ pub enum Error { NewMmapRegion(#[source] MmapRegionError), #[error("Could not find the shm log region")] MissingShmLogRegion, + #[error("Get shared memory config failed")] + VhostUserGetShmemConfig(#[source] VhostError), + #[error("Expected {0} shared memory regions; got {1}")] + VhostUserUnexpectedShmemRegionCount(usize, usize), } type Result = std::result::Result; diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 12ca6b987..18e66b96b 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -51,8 +51,8 @@ use crate::config::RestoreConfig; use crate::device_tree::DeviceTree; use crate::vm::{Error as VmError, VmState}; use crate::vm_config::{ - DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, - VmConfig, VsockConfig, + DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig, + VdpaConfig, VmConfig, VsockConfig, }; /// API errors are sent back from the VMM API server through the ApiResponse. @@ -170,6 +170,10 @@ pub enum ApiError { #[error("The fs could not be added to the VM")] VmAddFs(#[source] VmError), + /// The gpu could not be added to the VM. + #[error("The gpu could not be added to the VM")] + VmAddGpu(#[source] VmError), + /// The pmem device could not be added to the VM. #[error("The pmem device could not be added to the VM")] VmAddPmem(#[source] VmError), @@ -340,6 +344,8 @@ pub trait RequestHandler { fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> Result>, VmError>; + fn vm_add_gpu(&mut self, gpu_cfg: GpuConfig) -> Result>, VmError>; + fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> Result>, VmError>; fn vm_add_net(&mut self, net_cfg: NetConfig) -> Result>, VmError>; @@ -539,6 +545,43 @@ impl ApiAction for VmAddFs { } } +pub struct VmAddGpu; + +impl ApiAction for VmAddGpu { + type RequestBody = GpuConfig; + type ResponseBody = Option; + + fn request( + &self, + config: Self::RequestBody, + response_sender: Sender, + ) -> ApiRequest { + Box::new(move |vmm| { + info!("API request event: VmAddGpu {config:?}"); + + let response = vmm + .vm_add_gpu(config) + .map_err(ApiError::VmAddGpu) + .map(ApiResponsePayload::VmAction); + + response_sender + .send(response) + .map_err(VmmError::ApiResponseSend)?; + + Ok(false) + }) + } + + fn send( + &self, + api_evt: EventFd, + api_sender: Sender, + data: Self::RequestBody, + ) -> ApiResult { + get_response_body(self, api_evt, api_sender, data) + } +} + pub struct VmAddPmem; impl ApiAction for VmAddPmem { diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 16089e558..1216b0cd4 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -90,6 +90,12 @@ pub enum Error { /// Error parsing filesystem parameters #[error("Error parsing --fs")] ParseFileSystem(#[source] OptionParserError), + /// GPU socket is missing + #[error("Error parsing --gpu: socket missing")] + ParseGpuSockMissing, + /// Error parsing GPU parameters + #[error("Error parsing --gpu")] + ParseGpu(#[source] OptionParserError), /// Error parsing persistent memory parameters #[error("Error parsing --pmem")] ParsePersistentMemory(#[source] OptionParserError), @@ -393,6 +399,7 @@ pub struct VmParams<'a> { pub rng: &'a str, pub balloon: Option<&'a str>, pub fs: Option>, + pub gpu: Option>, pub pmem: Option>, pub serial: &'a str, pub console: &'a str, @@ -454,6 +461,9 @@ impl<'a> VmParams<'a> { let fs: Option> = args .get_many::("fs") .map(|x| x.map(|y| y as &str).collect()); + let gpu: Option> = args + .get_many::("gpu") + .map(|x| x.map(|y| y as &str).collect()); let pmem: Option> = args .get_many::("pmem") .map(|x| x.map(|y| y as &str).collect()); @@ -508,6 +518,7 @@ impl<'a> VmParams<'a> { rng, balloon, fs, + gpu, pmem, serial, console, @@ -1701,6 +1712,49 @@ impl FsConfig { } } +impl GpuConfig { + pub const SYNTAX: &'static str = "virtio-gpu parameters \ + \"socket=,id=,pci_segment=\""; + + pub fn parse(gpu: &str) -> Result { + let mut parser = OptionParser::new(); + parser.add("socket").add("id").add("pci_segment"); + parser.parse(gpu).map_err(Error::ParseGpu)?; + + let socket = PathBuf::from(parser.get("socket").ok_or(Error::ParseGpuSockMissing)?); + let id = parser.get("id"); + + let pci_segment = parser + .convert("pci_segment") + .map_err(Error::ParseGpu)? + .unwrap_or_default(); + + Ok(GpuConfig { + socket, + id, + pci_segment, + }) + } + + pub fn validate(&self, vm_config: &VmConfig) -> ValidationResult<()> { + if let Some(platform_config) = vm_config.platform.as_ref() { + if self.pci_segment >= platform_config.num_pci_segments { + return Err(ValidationError::InvalidPciSegment(self.pci_segment)); + } + + if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() + && iommu_segments.contains(&self.pci_segment) + { + return Err(ValidationError::IommuNotSupportedOnSegment( + self.pci_segment, + )); + } + } + + Ok(()) + } +} + #[cfg(feature = "fw_cfg")] impl FwCfgConfig { pub const SYNTAX: &'static str = "Boot params to pass to FW CFG device \ @@ -2723,6 +2777,13 @@ impl VmConfig { } } + if let Some(gpus) = &self.gpu { + for gpu in gpus { + gpu.validate(self)?; + Self::validate_identifier(&mut id_list, &gpu.id)?; + } + } + if let Some(pmems) = &self.pmem { for pmem in pmems { pmem.validate(self)?; @@ -2976,6 +3037,15 @@ impl VmConfig { fs = Some(fs_config_list); } + let mut gpu: Option> = None; + if let Some(gpu_list) = &vm_params.gpu { + let mut gpu_config_list = Vec::new(); + for item in gpu_list.iter() { + gpu_config_list.push(GpuConfig::parse(item)?); + } + gpu = Some(gpu_config_list); + } + let mut pmem: Option> = None; if let Some(pmem_list) = &vm_params.pmem { let mut pmem_config_list = Vec::new(); @@ -3112,6 +3182,7 @@ impl VmConfig { rng, balloon, fs, + gpu, pmem, serial, console, @@ -3173,6 +3244,13 @@ impl VmConfig { removed |= fs.len() != len; } + // Remove if gpu device + if let Some(gpu_list) = self.gpu.as_mut() { + let len = gpu_list.len(); + gpu_list.retain(|dev| dev.id.as_ref().map(|id| id.as_ref()) != Some(id)); + removed |= gpu_list.len() != len; + } + // Remove if net device if let Some(net) = self.net.as_mut() { let len = net.len(); @@ -3245,6 +3323,7 @@ impl Clone for VmConfig { #[cfg(feature = "pvmemcontrol")] pvmemcontrol: self.pvmemcontrol.clone(), fs: self.fs.clone(), + gpu: self.gpu.clone(), pmem: self.pmem.clone(), serial: self.serial.clone(), console: self.console.clone(), @@ -4153,6 +4232,7 @@ mod unit_tests { rng: RngConfig::default(), balloon: None, fs: None, + gpu: None, pmem: None, serial: default_serial(), console: default_console(), @@ -4356,6 +4436,7 @@ mod unit_tests { }, balloon: None, fs: None, + gpu: None, pmem: None, serial: ConsoleConfig { file: None, diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 752da6e51..40f27d4ff 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -94,7 +94,7 @@ use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator, Virti use virtio_devices::vhost_user::VhostUserConfig; use virtio_devices::{ AccessPlatformMapping, ActivateError, Block, Endpoint, IommuMapping, VdpaDmaMapping, - VirtioMemMappingSource, + VirtioMemMappingSource, VirtioSharedMemory, VirtioSharedMemoryList, }; use vm_allocator::{AddressAllocator, SystemAllocator}; use vm_device::dma_mapping::ExternalDmaMapping; @@ -127,8 +127,8 @@ use crate::serial_manager::{Error as SerialManagerError, SerialManager}; use crate::vm_config::IvshmemConfig; use crate::vm_config::{ ConsoleOutputMode, DEFAULT_IOMMU_ADDRESS_WIDTH_BITS, DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, - DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, - VhostMode, VmConfig, VsockConfig, + DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig, + VdpaConfig, VhostMode, VmConfig, VsockConfig, }; use crate::{DEVICE_MANAGER_SNAPSHOT_ID, GuestRegionMmap, PciDeviceInfo, device_node}; @@ -157,6 +157,7 @@ const IVSHMEM_DEVICE_NAME: &str = "__ivshmem"; // identifiers if the user doesn't give one const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; const FS_DEVICE_NAME_PREFIX: &str = "_fs"; +const GPU_DEVICE_NAME_PREFIX: &str = "_gpu"; const NET_DEVICE_NAME_PREFIX: &str = "_net"; const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; @@ -205,6 +206,18 @@ pub enum DeviceManagerError { #[error("Virtio-fs device was created without a socket")] NoVirtioFsSock, + /// Cannot create virtio-gpu device + #[error("Cannot create virtio-gpu device")] + CreateVirtioGpu(#[source] virtio_devices::vhost_user::Error), + + /// Virtio-gpu device was created without a socket. + #[error("Virtio-gpu device was created without a socket")] + NoVirtioGpuSock, + + /// Cannot find a memory range for virtio-gpu + #[error("Cannot find a memory range for virtio-gpu")] + GpuRangeAllocation, + /// Cannot create vhost-user-blk device #[error("Cannot create vhost-user-blk device")] CreateVhostUserBlk(#[source] virtio_devices::vhost_user::Error), @@ -2548,6 +2561,9 @@ impl DeviceManager { // Add virtio-fs if required self.make_virtio_fs_devices()?; + // Add virtio-gpu if required + self.make_virtio_gpu_devices()?; + // Add virtio-pmem if required self.make_virtio_pmem_devices()?; @@ -3146,6 +3162,118 @@ impl DeviceManager { Ok(()) } + fn make_virtio_gpu_device( + &mut self, + gpu_cfg: &mut GpuConfig, + ) -> DeviceManagerResult { + let id = if let Some(id) = &gpu_cfg.id { + id.clone() + } else { + let id = self.next_device_name(GPU_DEVICE_NAME_PREFIX)?; + gpu_cfg.id = Some(id.clone()); + id + }; + + info!("Creating virtio-gpu device: {gpu_cfg:?}"); + + let mut node = device_node!(id); + + if let Some(gpu_socket) = gpu_cfg.socket.to_str() { + let (mut virtio_gpu_device, region) = virtio_devices::vhost_user::Gpu::new( + id.clone(), + gpu_socket, + self.seccomp_action.clone(), + self.exit_evt + .try_clone() + .map_err(DeviceManagerError::EventFd)?, + self.force_iommu, + ) + .map_err(DeviceManagerError::CreateVirtioGpu)?; + + // Allocate the shared memory BAR region. + let cache_base = self.pci_segments[gpu_cfg.pci_segment as usize] + .mem64_allocator + .lock() + .unwrap() + .allocate(None, region.length, Some(region.length)) + .ok_or(DeviceManagerError::GpuRangeAllocation)? + .raw_value(); + + node.resources.push(Resource::MmioAddressRange { + base: cache_base, + size: region.length, + }); + + let mmap_region = MmapRegion::build( + None, + region.length as usize, + libc::PROT_NONE, + libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, + ) + .map_err(DeviceManagerError::NewMmapRegion)?; + + // SAFETY: mmap_region.size() and mmap_region.as_ptr() refer to a valid allocation. + let mem_slot = unsafe { + self.memory_manager + .lock() + .unwrap() + .create_userspace_mapping( + cache_base, + mmap_region.size(), + mmap_region.as_ptr(), + false, + false, + false, + ) + .map_err(DeviceManagerError::MemoryManager)? + }; + + let region_list = std::iter::once(( + region.id, + VirtioSharedMemory { + offset: 0, + len: region.length, + }, + )) + .collect(); + + virtio_gpu_device.set_cache(VirtioSharedMemoryList { + mapping: Arc::new(mmap_region), + mem_slot, + addr: GuestAddress(cache_base), + region_list, + }); + + let virtio_gpu_device = Arc::new(Mutex::new(virtio_gpu_device)); + + self.device_tree.lock().unwrap().insert(id.clone(), node); + + Ok(MetaVirtioDevice { + virtio_device: Arc::clone(&virtio_gpu_device) + as Arc>, + iommu: false, + id, + pci_segment: gpu_cfg.pci_segment, + dma_handler: None, + }) + } else { + Err(DeviceManagerError::NoVirtioGpuSock) + } + } + + fn make_virtio_gpu_devices(&mut self) -> DeviceManagerResult<()> { + let mut gpu_devices = self.config.lock().unwrap().gpu.take(); + if let Some(gpu_list_cfg) = &mut gpu_devices { + for gpu_cfg in gpu_list_cfg.iter_mut() { + let device = self.make_virtio_gpu_device(gpu_cfg)?; + self.virtio_devices.push(device); + } + } + self.config.lock().unwrap().gpu = gpu_devices; + + Ok(()) + } + fn make_virtio_pmem_device( &mut self, pmem_cfg: &mut PmemConfig, @@ -4876,6 +5004,13 @@ impl DeviceManager { self.hotplug_virtio_pci_device(device) } + pub fn add_gpu(&mut self, gpu_cfg: &mut GpuConfig) -> DeviceManagerResult { + self.validate_identifier(&gpu_cfg.id)?; + + let device = self.make_virtio_gpu_device(gpu_cfg)?; + self.hotplug_virtio_pci_device(device) + } + pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult { self.validate_identifier(&pmem_cfg.id)?; diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index dcf6614b2..1d5109d5b 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -59,8 +59,8 @@ use crate::migration::{recv_vm_config, recv_vm_state}; use crate::seccomp_filters::{Thread, get_seccomp_filter}; use crate::vm::{Error as VmError, Vm, VmState}; use crate::vm_config::{ - DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig, - VmConfig, VsockConfig, + DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig, + VdpaConfig, VmConfig, VsockConfig, }; mod acpi; @@ -2125,6 +2125,29 @@ impl RequestHandler for Vmm { } } + fn vm_add_gpu(&mut self, gpu_cfg: GpuConfig) -> result::Result>, VmError> { + self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; + + { + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone(); + add_to_config(&mut config.gpu, gpu_cfg.clone()); + config.validate().map_err(VmError::ConfigValidation)?; + } + + if let Some(ref mut vm) = self.vm { + let info = vm.add_gpu(gpu_cfg).inspect_err(|e| { + error!("Error when adding new gpu to the VM: {e:?}"); + })?; + serde_json::to_vec(&info) + .map(Some) + .map_err(VmError::SerializeJson) + } else { + let mut config = self.vm_config.as_ref().unwrap().lock().unwrap(); + add_to_config(&mut config.gpu, gpu_cfg); + Ok(None) + } + } + fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result>, VmError> { self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?; @@ -2443,6 +2466,7 @@ mod unit_tests { }, balloon: None, fs: None, + gpu: None, pmem: None, serial: ConsoleConfig { file: None, diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index 4e1f8c3b8..b32b3f5d6 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -100,8 +100,8 @@ use crate::migration::{SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE, url_to_path}; #[cfg(feature = "fw_cfg")] use crate::vm_config::FwCfgConfig; use crate::vm_config::{ - DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, NumaConfig, PayloadConfig, - PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, + DeviceConfig, DiskConfig, FsConfig, GpuConfig, HotplugMethod, NetConfig, NumaConfig, + PayloadConfig, PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig, }; use crate::{ CPU_MANAGER_SNAPSHOT_ID, DEVICE_MANAGER_SNAPSHOT_ID, GuestMemoryMmap, @@ -2136,6 +2136,28 @@ impl Vm { Ok(pci_device_info) } + pub fn add_gpu(&mut self, mut gpu_cfg: GpuConfig) -> Result { + let pci_device_info = self + .device_manager + .lock() + .unwrap() + .add_gpu(&mut gpu_cfg) + .map_err(Error::DeviceManager)?; + + { + let mut config = self.config.lock().unwrap(); + add_to_config(&mut config.gpu, gpu_cfg); + } + + self.device_manager + .lock() + .unwrap() + .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED) + .map_err(Error::DeviceManager)?; + + Ok(pci_device_info) + } + pub fn add_pmem(&mut self, mut pmem_cfg: PmemConfig) -> Result { let pci_device_info = self .device_manager diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs index 407d4e491..4052d935d 100644 --- a/vmm/src/vm_config.rs +++ b/vmm/src/vm_config.rs @@ -469,6 +469,22 @@ impl ApplyLandlock for FsConfig { } } +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct GpuConfig { + pub socket: PathBuf, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +impl ApplyLandlock for GpuConfig { + fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> { + landlock.add_rule_with_access(&self.socket, "rw")?; + Ok(()) + } +} + #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct PmemConfig { pub file: PathBuf, @@ -922,6 +938,7 @@ pub struct VmConfig { pub rng: RngConfig, pub balloon: Option, pub fs: Option>, + pub gpu: Option>, pub pmem: Option>, #[serde(default = "default_serial")] pub serial: ConsoleConfig, @@ -997,6 +1014,12 @@ impl VmConfig { } } + if let Some(gpu_configs) = &self.gpu { + for gpu_config in gpu_configs.iter() { + gpu_config.apply_landlock(&mut landlock)?; + } + } + if let Some(pmem_configs) = &self.pmem { for pmem_config in pmem_configs.iter() { pmem_config.apply_landlock(&mut landlock)?;