virtio-devices: add vhost-user GPU device
Some checks failed
Cloud Hypervisor Tests (Metrics) / Tests (Metrics) (push) Has been cancelled
REUSE Compliance Check / REUSE Compliance Check (push) Has been cancelled
Shell scripts check / Check shell scripts (push) Has been cancelled

Add a vhost-user frontend for virtio-gpu using the current vhost-user
protocol (GET_SHMEM_CONFIG=44, SHMEM_MAP/UNMAP=9/10, SHMEM=0x0020_0000).

The GPU device queries the backend for shared memory regions via
get_shmem_config(), allocates a PCI BAR for the shmem region, and
implements a BackendReqHandler that handles SHMEM_MAP/UNMAP requests
by mmapping file descriptors from the backend into the shared memory
region.

To support non-contiguous shared memory region IDs (needed for
VIRTIO_GPU_SHM_ID_HOST_VISIBLE=1), VirtioSharedMemoryList.region_list
is changed from Vec<VirtioSharedMemory> to BTreeMap<u8, VirtioSharedMemory>,
and the PCI BAR code uses the map key as the PCI capability shmid.

Includes full VMM plumbing: GpuConfig, --gpu CLI, API, device_manager,
seccomp rules, and hotplug support.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Davíð Steinn Geirsson 2026-03-18 17:46:55 +00:00
parent e3372a22f6
commit c22c15b963
13 changed files with 824 additions and 14 deletions

View file

@ -998,6 +998,7 @@ mod unit_tests {
},
balloon: None,
fs: None,
gpu: None,
pmem: None,
serial: ConsoleConfig {
file: None,

View file

@ -6,7 +6,7 @@
//
// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use std::io::Write;
use std::num::Wrapping;
use std::sync::atomic::{AtomicBool, Ordering};
@ -50,7 +50,7 @@ pub struct VirtioSharedMemoryList {
pub mem_slot: u32,
pub addr: GuestAddress,
pub mapping: Arc<MmapRegion>,
pub region_list: Vec<VirtioSharedMemory>,
pub region_list: BTreeMap<u8, VirtioSharedMemory>,
}
/// Trait for virtio devices to be driven by a virtio transport.

View file

@ -43,7 +43,7 @@ pub use self::block::{Block, BlockState};
pub use self::console::{Console, ConsoleResizer, Endpoint};
pub use self::device::{
DmaRemapping, VirtioCommon, VirtioDevice, VirtioInterrupt, VirtioInterruptType,
VirtioSharedMemoryList,
VirtioSharedMemory, VirtioSharedMemoryList,
};
pub use self::epoll_helper::{
EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, EpollHelperHandler,
@ -90,6 +90,8 @@ pub enum ActivateError {
VhostUserFsSetup(#[source] vhost_user::Error),
#[error("Failed to setup vhost-user daemon")]
VhostUserSetup(#[source] vhost_user::Error),
#[error("Failed to setup vhost-user-gpu daemon")]
VhostUserGpuSetup(#[source] vhost_user::Error),
#[error("Failed to create seccomp filter")]
CreateSeccompFilter(#[source] seccompiler::Error),
#[error("Failed to create rate limiter")]

View file

@ -24,6 +24,7 @@ pub enum Thread {
VirtioRng,
VirtioVhostBlock,
VirtioVhostFs,
VirtioVhostGpu,
VirtioVhostNet,
VirtioVhostNetCtl,
VirtioVsock,
@ -192,6 +193,17 @@ fn virtio_vhost_fs_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> {
]
}
fn virtio_vhost_gpu_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> {
vec![
(libc::SYS_clock_nanosleep, vec![]),
(libc::SYS_connect, vec![]),
(libc::SYS_nanosleep, vec![]),
(libc::SYS_recvmsg, vec![]),
(libc::SYS_sendmsg, vec![]),
(libc::SYS_socket, vec![]),
]
}
fn virtio_vhost_net_ctl_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> {
vec![]
}
@ -271,6 +283,7 @@ fn get_seccomp_rules(thread_type: Thread) -> Vec<(i64, Vec<SeccompRule>)> {
Thread::VirtioRng => virtio_rng_thread_rules(),
Thread::VirtioVhostBlock => virtio_vhost_block_thread_rules(),
Thread::VirtioVhostFs => virtio_vhost_fs_thread_rules(),
Thread::VirtioVhostGpu => virtio_vhost_gpu_thread_rules(),
Thread::VirtioVhostNet => virtio_vhost_net_thread_rules(),
Thread::VirtioVhostNetCtl => virtio_vhost_net_ctl_thread_rules(),
Thread::VirtioVsock => virtio_vsock_thread_rules(),

View file

@ -1036,11 +1036,11 @@ impl PciDevice for VirtioPciDevice {
PciDeviceError::IoRegistrationFailed(shm_list.addr.raw_value(), e)
})?;
for (idx, shm) in shm_list.region_list.iter().enumerate() {
for (&shmid, shm) in shm_list.region_list.iter() {
let shm_cap = VirtioPciCap64::new(
PciCapabilityType::SharedMemory,
VIRTIO_SHM_BAR_INDEX as u8,
idx as u8,
shmid,
shm.offset,
shm.len,
);

View file

@ -0,0 +1,460 @@
// Copyright © 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
use std::io;
use std::os::unix::io::AsRawFd;
use std::sync::atomic::AtomicBool;
use std::sync::{Arc, Barrier, Mutex};
use std::{result, thread};
use event_monitor::event;
use log::error;
use seccompiler::SeccompAction;
use vhost::vhost_user::message::{
VhostUserConfigFlags, VhostUserMMap, VhostUserMMapFlags, VhostUserProtocolFeatures,
VhostUserVirtioFeatures,
};
use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler};
use virtio_queue::Queue;
use vm_device::UserspaceMapping;
use vm_memory::GuestMemoryAtomic;
use vm_migration::{Migratable, MigratableError, Pausable, Snapshottable, Transportable};
use vmm_sys_util::eventfd::EventFd;
use super::vu_common_ctrl::VhostUserHandle;
use super::{DEFAULT_VIRTIO_FEATURES, Error, Result};
use crate::seccomp_filters::Thread;
use crate::thread_helper::spawn_virtio_thread;
use crate::vhost_user::VhostUserCommon;
use crate::{
ActivateResult, GuestMemoryMmap, GuestRegionMmap, MmapRegion, VIRTIO_F_IOMMU_PLATFORM,
VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioSharedMemoryList,
};
const DEFAULT_QUEUE_NUMBER: usize = 2;
const DEFAULT_QUEUE_SIZE: u16 = 256;
struct BackendReqHandler {
mapping: Arc<MmapRegion>,
}
impl BackendReqHandler {
/// Validate and compute the target pointer and length within the shared memory region.
fn checked_region(
&self,
offset: u64,
len: u64,
) -> io::Result<(*mut u8, usize)> {
let offset: usize = offset
.try_into()
.map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?;
let len: usize = len
.try_into()
.map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?;
let region_size = self.mapping.size();
if offset.checked_add(len).is_none_or(|end| end > region_size) {
return Err(io::Error::from_raw_os_error(libc::EINVAL));
}
// SAFETY: offset is within the mapped region
let ptr = unsafe { self.mapping.as_ptr().add(offset) };
Ok((ptr, len))
}
}
impl VhostUserFrontendReqHandler for BackendReqHandler {
fn shmem_map(
&self,
req: &VhostUserMMap,
fd: &dyn AsRawFd,
) -> vhost::vhost_user::HandlerResult<u64> {
let (ptr, len) = self.checked_region(req.shm_offset, req.len)?;
let writable = VhostUserMMapFlags::from_bits_truncate(req.flags)
.contains(VhostUserMMapFlags::WRITABLE);
let prot = if writable {
libc::PROT_READ | libc::PROT_WRITE
} else {
libc::PROT_READ
};
// SAFETY: we've checked we're only giving addr and length within the
// region, and are passing MAP_FIXED to ensure they are respected.
let ret = unsafe {
libc::mmap(
ptr.cast(),
len,
prot,
// https://bugzilla.kernel.org/show_bug.cgi?id=217238
if writable {
libc::MAP_SHARED
} else {
libc::MAP_PRIVATE
} | libc::MAP_FIXED,
fd.as_raw_fd(),
req.fd_offset as libc::off_t,
)
};
if ret == libc::MAP_FAILED {
return Err(io::Error::last_os_error());
}
Ok(0)
}
fn shmem_unmap(&self, req: &VhostUserMMap) -> vhost::vhost_user::HandlerResult<u64> {
let (ptr, len) = self.checked_region(req.shm_offset, req.len)?;
// SAFETY: we've checked we're only giving addr and length within the
// region, and are passing MAP_FIXED to ensure they are respected.
let ret = unsafe {
libc::mmap(
ptr.cast(),
len,
libc::PROT_NONE,
libc::MAP_ANONYMOUS | libc::MAP_PRIVATE | libc::MAP_FIXED,
-1,
0,
)
};
if ret == libc::MAP_FAILED {
return Err(io::Error::last_os_error());
}
Ok(0)
}
}
/// Shared memory region descriptor returned from the backend.
pub struct ShmemRegion {
pub id: u8,
pub length: u64,
}
pub struct Gpu {
common: VirtioCommon,
vu_common: VhostUserCommon,
id: String,
cache: Option<VirtioSharedMemoryList>,
backend_req_support: bool,
seccomp_action: SeccompAction,
guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
epoll_thread: Option<thread::JoinHandle<()>>,
exit_evt: EventFd,
iommu: bool,
}
impl Gpu {
/// Create a new virtio-gpu device.
pub fn new(
id: String,
path: &str,
seccomp_action: SeccompAction,
exit_evt: EventFd,
iommu: bool,
) -> Result<(Gpu, ShmemRegion)> {
let num_queues = DEFAULT_QUEUE_NUMBER;
// Connect to the vhost-user socket.
let mut vu =
VhostUserHandle::connect_vhost_user(false, path, num_queues as u64, false)?;
// Filling device and vring features VMM supports.
let avail_features = DEFAULT_VIRTIO_FEATURES;
let avail_protocol_features = VhostUserProtocolFeatures::CONFIG
| VhostUserProtocolFeatures::BACKEND_REQ
| VhostUserProtocolFeatures::SHMEM
| VhostUserProtocolFeatures::REPLY_ACK
| VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS;
let (acked_features, acked_protocol_features) =
vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?;
let backend_req_support =
acked_protocol_features & VhostUserProtocolFeatures::BACKEND_REQ.bits() != 0;
// Query shared memory regions.
let shm_config = vu
.socket_handle()
.get_shmem_config()
.map_err(Error::VhostUserGetShmemConfig)?;
// Decode the sparse region array.
let regions: Vec<(u8, u64)> = shm_config
.memory_sizes
.iter()
.enumerate()
.filter(|&(_, &size)| size != 0)
.take(shm_config.nregions as usize)
.map(|(id, &size)| (id as u8, size))
.collect();
if regions.len() != 1 {
error!(
"Expected exactly 1 shared memory region from GPU backend, got {}",
regions.len()
);
return Err(Error::VhostUserUnexpectedShmemRegionCount(1, regions.len()));
}
let (shm_id, shm_length) = regions[0];
let gpu = Gpu {
common: VirtioCommon {
device_type: VirtioDeviceType::Gpu as u32,
avail_features: acked_features,
acked_features: acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(),
queue_sizes: vec![DEFAULT_QUEUE_SIZE; num_queues],
paused_sync: Some(Arc::new(Barrier::new(2))),
min_queues: DEFAULT_QUEUE_NUMBER as u16,
paused: Arc::new(AtomicBool::new(false)),
..Default::default()
},
vu_common: VhostUserCommon {
vu: Some(Arc::new(Mutex::new(vu))),
acked_protocol_features,
socket_path: path.to_string(),
vu_num_queues: num_queues,
..Default::default()
},
id,
cache: None,
backend_req_support,
seccomp_action,
guest_memory: None,
epoll_thread: None,
exit_evt,
iommu,
};
Ok((
gpu,
ShmemRegion {
id: shm_id,
length: shm_length,
},
))
}
pub fn set_cache(&mut self, cache: VirtioSharedMemoryList) {
self.cache = Some(cache);
}
}
impl Drop for Gpu {
fn drop(&mut self) {
if let Some(kill_evt) = self.common.kill_evt.take() {
let _ = kill_evt.write(1);
}
self.common.wait_for_epoll_threads();
if let Some(thread) = self.epoll_thread.take()
&& let Err(e) = thread.join()
{
error!("Error joining thread: {e:?}");
}
}
}
impl VirtioDevice for Gpu {
fn device_type(&self) -> u32 {
self.common.device_type
}
fn queue_max_sizes(&self) -> &[u16] {
&self.common.queue_sizes
}
fn features(&self) -> u64 {
let mut features = self.common.avail_features;
if self.iommu {
features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
}
features
}
fn ack_features(&mut self, value: u64) {
self.common.ack_features(value);
}
fn read_config(&self, offset: u64, mut data: &mut [u8]) {
if let Some(vu) = &self.vu_common.vu
&& let Err(e) = vu
.lock()
.unwrap()
.socket_handle()
.get_config(
offset as u32,
data.len() as u32,
VhostUserConfigFlags::WRITABLE,
data,
)
.map_err(|e| format!("{e:?}"))
.and_then(|(_, config)| {
use std::io::Write;
data.write_all(&config).map_err(|e| format!("{e:?}"))
})
{
error!("Failed getting vhost-user-gpu configuration: {e:?}");
}
}
fn activate(
&mut self,
mem: GuestMemoryAtomic<GuestMemoryMmap>,
interrupt_cb: Arc<dyn VirtioInterrupt>,
queues: Vec<(usize, Queue, EventFd)>,
) -> ActivateResult {
self.common.activate(&queues, interrupt_cb.clone())?;
self.guest_memory = Some(mem.clone());
// Initialize backend communication.
let backend_req_handler = if self.backend_req_support {
if let Some(cache) = self.cache.as_ref() {
let vu_frontend_req_handler = Arc::new(BackendReqHandler {
mapping: cache.mapping.clone(),
});
let mut req_handler =
FrontendReqHandler::new(vu_frontend_req_handler).map_err(|e| {
crate::ActivateError::VhostUserGpuSetup(Error::FrontendReqHandlerCreation(
e,
))
})?;
if self.vu_common.acked_protocol_features
& VhostUserProtocolFeatures::REPLY_ACK.bits()
!= 0
{
req_handler.set_reply_ack_flag(true);
}
Some(req_handler)
} else {
None
}
} else {
None
};
// Run a dedicated thread for handling potential reconnections with
// the backend.
let (kill_evt, pause_evt) = self.common.dup_eventfds();
let mut handler = self.vu_common.activate(
mem,
&queues,
interrupt_cb,
self.common.acked_features,
backend_req_handler,
kill_evt,
pause_evt,
)?;
let paused = self.common.paused.clone();
let paused_sync = self.common.paused_sync.clone();
let mut epoll_threads = Vec::new();
spawn_virtio_thread(
&self.id,
&self.seccomp_action,
Thread::VirtioVhostGpu,
&mut epoll_threads,
&self.exit_evt,
move || handler.run(&paused, paused_sync.as_ref().unwrap()),
)?;
self.epoll_thread = Some(epoll_threads.remove(0));
event!("virtio-device", "activated", "id", &self.id);
Ok(())
}
fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
if self.common.pause_evt.take().is_some() {
self.common.resume().ok()?;
}
if let Some(vu) = &self.vu_common.vu
&& let Err(e) = vu.lock().unwrap().reset_vhost_user()
{
error!("Failed to reset vhost-user daemon: {e:?}");
return None;
}
if let Some(kill_evt) = self.common.kill_evt.take() {
let _ = kill_evt.write(1);
}
event!("virtio-device", "reset", "id", &self.id);
Some(self.common.interrupt_cb.take().unwrap())
}
fn shutdown(&mut self) {
self.vu_common.shutdown();
}
fn get_shm_regions(&self) -> Option<VirtioSharedMemoryList> {
self.cache.clone()
}
fn set_shm_regions(
&mut self,
shm_regions: VirtioSharedMemoryList,
) -> std::result::Result<(), crate::Error> {
if let Some(cache) = self.cache.as_mut() {
*cache = shm_regions;
Ok(())
} else {
Err(crate::Error::SetShmRegionsNotSupported)
}
}
fn add_memory_region(
&mut self,
region: &Arc<GuestRegionMmap>,
) -> std::result::Result<(), crate::Error> {
self.vu_common.add_memory_region(&self.guest_memory, region)
}
fn userspace_mappings(&self) -> Vec<UserspaceMapping> {
let mut mappings = Vec::new();
if let Some(cache) = self.cache.as_ref() {
mappings.push(UserspaceMapping {
mem_slot: cache.mem_slot,
addr: cache.addr,
mapping: cache.mapping.clone(),
mergeable: false,
});
}
mappings
}
}
impl Pausable for Gpu {
fn pause(&mut self) -> result::Result<(), MigratableError> {
self.vu_common.pause()?;
self.common.pause()
}
fn resume(&mut self) -> result::Result<(), MigratableError> {
self.common.resume()?;
if let Some(epoll_thread) = &self.epoll_thread {
epoll_thread.thread().unpark();
}
self.vu_common.resume()
}
}
impl Snapshottable for Gpu {
fn id(&self) -> String {
self.id.clone()
}
}
impl Transportable for Gpu {}
impl Migratable for Gpu {}

View file

@ -35,11 +35,13 @@ use crate::{
pub mod blk;
pub mod fs;
pub mod gpu;
pub mod net;
pub mod vu_common_ctrl;
pub use self::blk::Blk;
pub use self::fs::*;
pub use self::gpu::Gpu;
pub use self::net::Net;
pub use self::vu_common_ctrl::VhostUserConfig;
@ -147,6 +149,10 @@ pub enum Error {
NewMmapRegion(#[source] MmapRegionError),
#[error("Could not find the shm log region")]
MissingShmLogRegion,
#[error("Get shared memory config failed")]
VhostUserGetShmemConfig(#[source] VhostError),
#[error("Expected {0} shared memory regions; got {1}")]
VhostUserUnexpectedShmemRegionCount(usize, usize),
}
type Result<T> = std::result::Result<T, Error>;

View file

@ -51,8 +51,8 @@ use crate::config::RestoreConfig;
use crate::device_tree::DeviceTree;
use crate::vm::{Error as VmError, VmState};
use crate::vm_config::{
DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig,
VmConfig, VsockConfig,
DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig,
VdpaConfig, VmConfig, VsockConfig,
};
/// API errors are sent back from the VMM API server through the ApiResponse.
@ -170,6 +170,10 @@ pub enum ApiError {
#[error("The fs could not be added to the VM")]
VmAddFs(#[source] VmError),
/// The gpu could not be added to the VM.
#[error("The gpu could not be added to the VM")]
VmAddGpu(#[source] VmError),
/// The pmem device could not be added to the VM.
#[error("The pmem device could not be added to the VM")]
VmAddPmem(#[source] VmError),
@ -340,6 +344,8 @@ pub trait RequestHandler {
fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> Result<Option<Vec<u8>>, VmError>;
fn vm_add_gpu(&mut self, gpu_cfg: GpuConfig) -> Result<Option<Vec<u8>>, VmError>;
fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> Result<Option<Vec<u8>>, VmError>;
fn vm_add_net(&mut self, net_cfg: NetConfig) -> Result<Option<Vec<u8>>, VmError>;
@ -539,6 +545,43 @@ impl ApiAction for VmAddFs {
}
}
pub struct VmAddGpu;
impl ApiAction for VmAddGpu {
type RequestBody = GpuConfig;
type ResponseBody = Option<Body>;
fn request(
&self,
config: Self::RequestBody,
response_sender: Sender<ApiResponse>,
) -> ApiRequest {
Box::new(move |vmm| {
info!("API request event: VmAddGpu {config:?}");
let response = vmm
.vm_add_gpu(config)
.map_err(ApiError::VmAddGpu)
.map(ApiResponsePayload::VmAction);
response_sender
.send(response)
.map_err(VmmError::ApiResponseSend)?;
Ok(false)
})
}
fn send(
&self,
api_evt: EventFd,
api_sender: Sender<ApiRequest>,
data: Self::RequestBody,
) -> ApiResult<Self::ResponseBody> {
get_response_body(self, api_evt, api_sender, data)
}
}
pub struct VmAddPmem;
impl ApiAction for VmAddPmem {

View file

@ -90,6 +90,12 @@ pub enum Error {
/// Error parsing filesystem parameters
#[error("Error parsing --fs")]
ParseFileSystem(#[source] OptionParserError),
/// GPU socket is missing
#[error("Error parsing --gpu: socket missing")]
ParseGpuSockMissing,
/// Error parsing GPU parameters
#[error("Error parsing --gpu")]
ParseGpu(#[source] OptionParserError),
/// Error parsing persistent memory parameters
#[error("Error parsing --pmem")]
ParsePersistentMemory(#[source] OptionParserError),
@ -393,6 +399,7 @@ pub struct VmParams<'a> {
pub rng: &'a str,
pub balloon: Option<&'a str>,
pub fs: Option<Vec<&'a str>>,
pub gpu: Option<Vec<&'a str>>,
pub pmem: Option<Vec<&'a str>>,
pub serial: &'a str,
pub console: &'a str,
@ -454,6 +461,9 @@ impl<'a> VmParams<'a> {
let fs: Option<Vec<&str>> = args
.get_many::<String>("fs")
.map(|x| x.map(|y| y as &str).collect());
let gpu: Option<Vec<&str>> = args
.get_many::<String>("gpu")
.map(|x| x.map(|y| y as &str).collect());
let pmem: Option<Vec<&str>> = args
.get_many::<String>("pmem")
.map(|x| x.map(|y| y as &str).collect());
@ -508,6 +518,7 @@ impl<'a> VmParams<'a> {
rng,
balloon,
fs,
gpu,
pmem,
serial,
console,
@ -1701,6 +1712,49 @@ impl FsConfig {
}
}
impl GpuConfig {
pub const SYNTAX: &'static str = "virtio-gpu parameters \
\"socket=<socket_path>,id=<device_id>,pci_segment=<segment_id>\"";
pub fn parse(gpu: &str) -> Result<Self> {
let mut parser = OptionParser::new();
parser.add("socket").add("id").add("pci_segment");
parser.parse(gpu).map_err(Error::ParseGpu)?;
let socket = PathBuf::from(parser.get("socket").ok_or(Error::ParseGpuSockMissing)?);
let id = parser.get("id");
let pci_segment = parser
.convert("pci_segment")
.map_err(Error::ParseGpu)?
.unwrap_or_default();
Ok(GpuConfig {
socket,
id,
pci_segment,
})
}
pub fn validate(&self, vm_config: &VmConfig) -> ValidationResult<()> {
if let Some(platform_config) = vm_config.platform.as_ref() {
if self.pci_segment >= platform_config.num_pci_segments {
return Err(ValidationError::InvalidPciSegment(self.pci_segment));
}
if let Some(iommu_segments) = platform_config.iommu_segments.as_ref()
&& iommu_segments.contains(&self.pci_segment)
{
return Err(ValidationError::IommuNotSupportedOnSegment(
self.pci_segment,
));
}
}
Ok(())
}
}
#[cfg(feature = "fw_cfg")]
impl FwCfgConfig {
pub const SYNTAX: &'static str = "Boot params to pass to FW CFG device \
@ -2723,6 +2777,13 @@ impl VmConfig {
}
}
if let Some(gpus) = &self.gpu {
for gpu in gpus {
gpu.validate(self)?;
Self::validate_identifier(&mut id_list, &gpu.id)?;
}
}
if let Some(pmems) = &self.pmem {
for pmem in pmems {
pmem.validate(self)?;
@ -2976,6 +3037,15 @@ impl VmConfig {
fs = Some(fs_config_list);
}
let mut gpu: Option<Vec<GpuConfig>> = None;
if let Some(gpu_list) = &vm_params.gpu {
let mut gpu_config_list = Vec::new();
for item in gpu_list.iter() {
gpu_config_list.push(GpuConfig::parse(item)?);
}
gpu = Some(gpu_config_list);
}
let mut pmem: Option<Vec<PmemConfig>> = None;
if let Some(pmem_list) = &vm_params.pmem {
let mut pmem_config_list = Vec::new();
@ -3112,6 +3182,7 @@ impl VmConfig {
rng,
balloon,
fs,
gpu,
pmem,
serial,
console,
@ -3173,6 +3244,13 @@ impl VmConfig {
removed |= fs.len() != len;
}
// Remove if gpu device
if let Some(gpu_list) = self.gpu.as_mut() {
let len = gpu_list.len();
gpu_list.retain(|dev| dev.id.as_ref().map(|id| id.as_ref()) != Some(id));
removed |= gpu_list.len() != len;
}
// Remove if net device
if let Some(net) = self.net.as_mut() {
let len = net.len();
@ -3245,6 +3323,7 @@ impl Clone for VmConfig {
#[cfg(feature = "pvmemcontrol")]
pvmemcontrol: self.pvmemcontrol.clone(),
fs: self.fs.clone(),
gpu: self.gpu.clone(),
pmem: self.pmem.clone(),
serial: self.serial.clone(),
console: self.console.clone(),
@ -4153,6 +4232,7 @@ mod unit_tests {
rng: RngConfig::default(),
balloon: None,
fs: None,
gpu: None,
pmem: None,
serial: default_serial(),
console: default_console(),
@ -4356,6 +4436,7 @@ mod unit_tests {
},
balloon: None,
fs: None,
gpu: None,
pmem: None,
serial: ConsoleConfig {
file: None,

View file

@ -94,7 +94,7 @@ use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator, Virti
use virtio_devices::vhost_user::VhostUserConfig;
use virtio_devices::{
AccessPlatformMapping, ActivateError, Block, Endpoint, IommuMapping, VdpaDmaMapping,
VirtioMemMappingSource,
VirtioMemMappingSource, VirtioSharedMemory, VirtioSharedMemoryList,
};
use vm_allocator::{AddressAllocator, SystemAllocator};
use vm_device::dma_mapping::ExternalDmaMapping;
@ -127,8 +127,8 @@ use crate::serial_manager::{Error as SerialManagerError, SerialManager};
use crate::vm_config::IvshmemConfig;
use crate::vm_config::{
ConsoleOutputMode, DEFAULT_IOMMU_ADDRESS_WIDTH_BITS, DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT,
DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig,
VhostMode, VmConfig, VsockConfig,
DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig,
VdpaConfig, VhostMode, VmConfig, VsockConfig,
};
use crate::{DEVICE_MANAGER_SNAPSHOT_ID, GuestRegionMmap, PciDeviceInfo, device_node};
@ -157,6 +157,7 @@ const IVSHMEM_DEVICE_NAME: &str = "__ivshmem";
// identifiers if the user doesn't give one
const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
const FS_DEVICE_NAME_PREFIX: &str = "_fs";
const GPU_DEVICE_NAME_PREFIX: &str = "_gpu";
const NET_DEVICE_NAME_PREFIX: &str = "_net";
const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
@ -205,6 +206,18 @@ pub enum DeviceManagerError {
#[error("Virtio-fs device was created without a socket")]
NoVirtioFsSock,
/// Cannot create virtio-gpu device
#[error("Cannot create virtio-gpu device")]
CreateVirtioGpu(#[source] virtio_devices::vhost_user::Error),
/// Virtio-gpu device was created without a socket.
#[error("Virtio-gpu device was created without a socket")]
NoVirtioGpuSock,
/// Cannot find a memory range for virtio-gpu
#[error("Cannot find a memory range for virtio-gpu")]
GpuRangeAllocation,
/// Cannot create vhost-user-blk device
#[error("Cannot create vhost-user-blk device")]
CreateVhostUserBlk(#[source] virtio_devices::vhost_user::Error),
@ -2548,6 +2561,9 @@ impl DeviceManager {
// Add virtio-fs if required
self.make_virtio_fs_devices()?;
// Add virtio-gpu if required
self.make_virtio_gpu_devices()?;
// Add virtio-pmem if required
self.make_virtio_pmem_devices()?;
@ -3146,6 +3162,118 @@ impl DeviceManager {
Ok(())
}
fn make_virtio_gpu_device(
&mut self,
gpu_cfg: &mut GpuConfig,
) -> DeviceManagerResult<MetaVirtioDevice> {
let id = if let Some(id) = &gpu_cfg.id {
id.clone()
} else {
let id = self.next_device_name(GPU_DEVICE_NAME_PREFIX)?;
gpu_cfg.id = Some(id.clone());
id
};
info!("Creating virtio-gpu device: {gpu_cfg:?}");
let mut node = device_node!(id);
if let Some(gpu_socket) = gpu_cfg.socket.to_str() {
let (mut virtio_gpu_device, region) = virtio_devices::vhost_user::Gpu::new(
id.clone(),
gpu_socket,
self.seccomp_action.clone(),
self.exit_evt
.try_clone()
.map_err(DeviceManagerError::EventFd)?,
self.force_iommu,
)
.map_err(DeviceManagerError::CreateVirtioGpu)?;
// Allocate the shared memory BAR region.
let cache_base = self.pci_segments[gpu_cfg.pci_segment as usize]
.mem64_allocator
.lock()
.unwrap()
.allocate(None, region.length, Some(region.length))
.ok_or(DeviceManagerError::GpuRangeAllocation)?
.raw_value();
node.resources.push(Resource::MmioAddressRange {
base: cache_base,
size: region.length,
});
let mmap_region = MmapRegion::build(
None,
region.length as usize,
libc::PROT_NONE,
libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
)
.map_err(DeviceManagerError::NewMmapRegion)?;
// SAFETY: mmap_region.size() and mmap_region.as_ptr() refer to a valid allocation.
let mem_slot = unsafe {
self.memory_manager
.lock()
.unwrap()
.create_userspace_mapping(
cache_base,
mmap_region.size(),
mmap_region.as_ptr(),
false,
false,
false,
)
.map_err(DeviceManagerError::MemoryManager)?
};
let region_list = std::iter::once((
region.id,
VirtioSharedMemory {
offset: 0,
len: region.length,
},
))
.collect();
virtio_gpu_device.set_cache(VirtioSharedMemoryList {
mapping: Arc::new(mmap_region),
mem_slot,
addr: GuestAddress(cache_base),
region_list,
});
let virtio_gpu_device = Arc::new(Mutex::new(virtio_gpu_device));
self.device_tree.lock().unwrap().insert(id.clone(), node);
Ok(MetaVirtioDevice {
virtio_device: Arc::clone(&virtio_gpu_device)
as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
iommu: false,
id,
pci_segment: gpu_cfg.pci_segment,
dma_handler: None,
})
} else {
Err(DeviceManagerError::NoVirtioGpuSock)
}
}
fn make_virtio_gpu_devices(&mut self) -> DeviceManagerResult<()> {
let mut gpu_devices = self.config.lock().unwrap().gpu.take();
if let Some(gpu_list_cfg) = &mut gpu_devices {
for gpu_cfg in gpu_list_cfg.iter_mut() {
let device = self.make_virtio_gpu_device(gpu_cfg)?;
self.virtio_devices.push(device);
}
}
self.config.lock().unwrap().gpu = gpu_devices;
Ok(())
}
fn make_virtio_pmem_device(
&mut self,
pmem_cfg: &mut PmemConfig,
@ -4876,6 +5004,13 @@ impl DeviceManager {
self.hotplug_virtio_pci_device(device)
}
pub fn add_gpu(&mut self, gpu_cfg: &mut GpuConfig) -> DeviceManagerResult<PciDeviceInfo> {
self.validate_identifier(&gpu_cfg.id)?;
let device = self.make_virtio_gpu_device(gpu_cfg)?;
self.hotplug_virtio_pci_device(device)
}
pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
self.validate_identifier(&pmem_cfg.id)?;

View file

@ -59,8 +59,8 @@ use crate::migration::{recv_vm_config, recv_vm_state};
use crate::seccomp_filters::{Thread, get_seccomp_filter};
use crate::vm::{Error as VmError, Vm, VmState};
use crate::vm_config::{
DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig,
VmConfig, VsockConfig,
DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig,
VdpaConfig, VmConfig, VsockConfig,
};
mod acpi;
@ -2125,6 +2125,29 @@ impl RequestHandler for Vmm {
}
}
fn vm_add_gpu(&mut self, gpu_cfg: GpuConfig) -> result::Result<Option<Vec<u8>>, VmError> {
self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
{
let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
add_to_config(&mut config.gpu, gpu_cfg.clone());
config.validate().map_err(VmError::ConfigValidation)?;
}
if let Some(ref mut vm) = self.vm {
let info = vm.add_gpu(gpu_cfg).inspect_err(|e| {
error!("Error when adding new gpu to the VM: {e:?}");
})?;
serde_json::to_vec(&info)
.map(Some)
.map_err(VmError::SerializeJson)
} else {
let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
add_to_config(&mut config.gpu, gpu_cfg);
Ok(None)
}
}
fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> {
self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
@ -2443,6 +2466,7 @@ mod unit_tests {
},
balloon: None,
fs: None,
gpu: None,
pmem: None,
serial: ConsoleConfig {
file: None,

View file

@ -100,8 +100,8 @@ use crate::migration::{SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE, url_to_path};
#[cfg(feature = "fw_cfg")]
use crate::vm_config::FwCfgConfig;
use crate::vm_config::{
DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, NumaConfig, PayloadConfig,
PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig,
DeviceConfig, DiskConfig, FsConfig, GpuConfig, HotplugMethod, NetConfig, NumaConfig,
PayloadConfig, PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig,
};
use crate::{
CPU_MANAGER_SNAPSHOT_ID, DEVICE_MANAGER_SNAPSHOT_ID, GuestMemoryMmap,
@ -2136,6 +2136,28 @@ impl Vm {
Ok(pci_device_info)
}
pub fn add_gpu(&mut self, mut gpu_cfg: GpuConfig) -> Result<PciDeviceInfo> {
let pci_device_info = self
.device_manager
.lock()
.unwrap()
.add_gpu(&mut gpu_cfg)
.map_err(Error::DeviceManager)?;
{
let mut config = self.config.lock().unwrap();
add_to_config(&mut config.gpu, gpu_cfg);
}
self.device_manager
.lock()
.unwrap()
.notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
.map_err(Error::DeviceManager)?;
Ok(pci_device_info)
}
pub fn add_pmem(&mut self, mut pmem_cfg: PmemConfig) -> Result<PciDeviceInfo> {
let pci_device_info = self
.device_manager

View file

@ -469,6 +469,22 @@ impl ApplyLandlock for FsConfig {
}
}
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct GpuConfig {
pub socket: PathBuf,
#[serde(default)]
pub id: Option<String>,
#[serde(default)]
pub pci_segment: u16,
}
impl ApplyLandlock for GpuConfig {
fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> {
landlock.add_rule_with_access(&self.socket, "rw")?;
Ok(())
}
}
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct PmemConfig {
pub file: PathBuf,
@ -922,6 +938,7 @@ pub struct VmConfig {
pub rng: RngConfig,
pub balloon: Option<BalloonConfig>,
pub fs: Option<Vec<FsConfig>>,
pub gpu: Option<Vec<GpuConfig>>,
pub pmem: Option<Vec<PmemConfig>>,
#[serde(default = "default_serial")]
pub serial: ConsoleConfig,
@ -997,6 +1014,12 @@ impl VmConfig {
}
}
if let Some(gpu_configs) = &self.gpu {
for gpu_config in gpu_configs.iter() {
gpu_config.apply_landlock(&mut landlock)?;
}
}
if let Some(pmem_configs) = &self.pmem {
for pmem_config in pmem_configs.iter() {
pmem_config.apply_landlock(&mut landlock)?;