virtio-devices: add vhost-user GPU device

Add a vhost-user frontend for virtio-gpu using the current vhost-user protocol (GET_SHMEM_CONFIG=44, SHMEM_MAP/UNMAP=9/10, SHMEM=0x0020_0000). The GPU device queries the backend for shared memory regions via get_shmem_config(), allocates a PCI BAR for the shmem region, and implements a BackendReqHandler that handles SHMEM_MAP/UNMAP requests by mmapping file descriptors from the backend into the shared memory region. To support non-contiguous shared memory region IDs (needed for VIRTIO_GPU_SHM_ID_HOST_VISIBLE=1), VirtioSharedMemoryList.region_list is changed from Vec<VirtioSharedMemory> to BTreeMap<u8, VirtioSharedMemory>, and the PCI BAR code uses the map key as the PCI capability shmid. Includes full VMM plumbing: GpuConfig, --gpu CLI, API, device_manager, seccomp rules, and hotplug support. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 17:46:55 +00:00 · 2026-03-18 17:46:55 +00:00 · c22c15b963
commit c22c15b963
parent e3372a22f6
13 changed files with 824 additions and 14 deletions
--- a/cloud-hypervisor/src/main.rs
+++ b/cloud-hypervisor/src/main.rs
@ -998,6 +998,7 @@ mod unit_tests {
            },
            balloon: None,
            fs: None,
+            gpu: None,
            pmem: None,
            serial: ConsoleConfig {
                file: None,
--- a/virtio-devices/src/device.rs
+++ b/virtio-devices/src/device.rs
@ -6,7 +6,7 @@
 //
 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause

-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
 use std::io::Write;
 use std::num::Wrapping;
 use std::sync::atomic::{AtomicBool, Ordering};
@ -50,7 +50,7 @@ pub struct VirtioSharedMemoryList {
    pub mem_slot: u32,
    pub addr: GuestAddress,
    pub mapping: Arc<MmapRegion>,
-    pub region_list: Vec<VirtioSharedMemory>,
+    pub region_list: BTreeMap<u8, VirtioSharedMemory>,
 }

 /// Trait for virtio devices to be driven by a virtio transport.
--- a/virtio-devices/src/lib.rs
+++ b/virtio-devices/src/lib.rs
@ -43,7 +43,7 @@ pub use self::block::{Block, BlockState};
 pub use self::console::{Console, ConsoleResizer, Endpoint};
 pub use self::device::{
    DmaRemapping, VirtioCommon, VirtioDevice, VirtioInterrupt, VirtioInterruptType,
-    VirtioSharedMemoryList,
+    VirtioSharedMemory, VirtioSharedMemoryList,
 };
 pub use self::epoll_helper::{
    EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, EpollHelperHandler,
@ -90,6 +90,8 @@ pub enum ActivateError {
    VhostUserFsSetup(#[source] vhost_user::Error),
    #[error("Failed to setup vhost-user daemon")]
    VhostUserSetup(#[source] vhost_user::Error),
+    #[error("Failed to setup vhost-user-gpu daemon")]
+    VhostUserGpuSetup(#[source] vhost_user::Error),
    #[error("Failed to create seccomp filter")]
    CreateSeccompFilter(#[source] seccompiler::Error),
    #[error("Failed to create rate limiter")]
--- a/virtio-devices/src/seccomp_filters.rs
+++ b/virtio-devices/src/seccomp_filters.rs
@ -24,6 +24,7 @@ pub enum Thread {
    VirtioRng,
    VirtioVhostBlock,
    VirtioVhostFs,
+    VirtioVhostGpu,
    VirtioVhostNet,
    VirtioVhostNetCtl,
    VirtioVsock,
@ -192,6 +193,17 @@ fn virtio_vhost_fs_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> {
    ]
 }

+fn virtio_vhost_gpu_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> {
+    vec![
+        (libc::SYS_clock_nanosleep, vec![]),
+        (libc::SYS_connect, vec![]),
+        (libc::SYS_nanosleep, vec![]),
+        (libc::SYS_recvmsg, vec![]),
+        (libc::SYS_sendmsg, vec![]),
+        (libc::SYS_socket, vec![]),
+    ]
+}
+
 fn virtio_vhost_net_ctl_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> {
    vec![]
 }
@ -271,6 +283,7 @@ fn get_seccomp_rules(thread_type: Thread) -> Vec<(i64, Vec<SeccompRule>)> {
        Thread::VirtioRng => virtio_rng_thread_rules(),
        Thread::VirtioVhostBlock => virtio_vhost_block_thread_rules(),
        Thread::VirtioVhostFs => virtio_vhost_fs_thread_rules(),
+        Thread::VirtioVhostGpu => virtio_vhost_gpu_thread_rules(),
        Thread::VirtioVhostNet => virtio_vhost_net_thread_rules(),
        Thread::VirtioVhostNetCtl => virtio_vhost_net_ctl_thread_rules(),
        Thread::VirtioVsock => virtio_vsock_thread_rules(),
--- a/virtio-devices/src/transport/pci_device.rs
+++ b/virtio-devices/src/transport/pci_device.rs
@ -1036,11 +1036,11 @@ impl PciDevice for VirtioPciDevice {
                    PciDeviceError::IoRegistrationFailed(shm_list.addr.raw_value(), e)
                })?;

-                for (idx, shm) in shm_list.region_list.iter().enumerate() {
+                for (&shmid, shm) in shm_list.region_list.iter() {
                    let shm_cap = VirtioPciCap64::new(
                        PciCapabilityType::SharedMemory,
                        VIRTIO_SHM_BAR_INDEX as u8,
-                        idx as u8,
+                        shmid,
                        shm.offset,
                        shm.len,
                    );
--- a/virtio-devices/src/vhost_user/gpu.rs
+++ b/virtio-devices/src/vhost_user/gpu.rs
@ -0,0 +1,460 @@
+// Copyright © 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+use std::io;
+use std::os::unix::io::AsRawFd;
+use std::sync::atomic::AtomicBool;
+use std::sync::{Arc, Barrier, Mutex};
+use std::{result, thread};
+
+use event_monitor::event;
+use log::error;
+use seccompiler::SeccompAction;
+use vhost::vhost_user::message::{
+    VhostUserConfigFlags, VhostUserMMap, VhostUserMMapFlags, VhostUserProtocolFeatures,
+    VhostUserVirtioFeatures,
+};
+use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler};
+use virtio_queue::Queue;
+use vm_device::UserspaceMapping;
+use vm_memory::GuestMemoryAtomic;
+use vm_migration::{Migratable, MigratableError, Pausable, Snapshottable, Transportable};
+use vmm_sys_util::eventfd::EventFd;
+
+use super::vu_common_ctrl::VhostUserHandle;
+use super::{DEFAULT_VIRTIO_FEATURES, Error, Result};
+use crate::seccomp_filters::Thread;
+use crate::thread_helper::spawn_virtio_thread;
+use crate::vhost_user::VhostUserCommon;
+use crate::{
+    ActivateResult, GuestMemoryMmap, GuestRegionMmap, MmapRegion, VIRTIO_F_IOMMU_PLATFORM,
+    VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioSharedMemoryList,
+};
+
+const DEFAULT_QUEUE_NUMBER: usize = 2;
+const DEFAULT_QUEUE_SIZE: u16 = 256;
+
+struct BackendReqHandler {
+    mapping: Arc<MmapRegion>,
+}
+
+impl BackendReqHandler {
+    /// Validate and compute the target pointer and length within the shared memory region.
+    fn checked_region(
+        &self,
+        offset: u64,
+        len: u64,
+    ) -> io::Result<(*mut u8, usize)> {
+        let offset: usize = offset
+            .try_into()
+            .map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?;
+        let len: usize = len
+            .try_into()
+            .map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?;
+        let region_size = self.mapping.size();
+
+        if offset.checked_add(len).is_none_or(|end| end > region_size) {
+            return Err(io::Error::from_raw_os_error(libc::EINVAL));
+        }
+
+        // SAFETY: offset is within the mapped region
+        let ptr = unsafe { self.mapping.as_ptr().add(offset) };
+        Ok((ptr, len))
+    }
+}
+
+impl VhostUserFrontendReqHandler for BackendReqHandler {
+    fn shmem_map(
+        &self,
+        req: &VhostUserMMap,
+        fd: &dyn AsRawFd,
+    ) -> vhost::vhost_user::HandlerResult<u64> {
+        let (ptr, len) = self.checked_region(req.shm_offset, req.len)?;
+
+        let writable = VhostUserMMapFlags::from_bits_truncate(req.flags)
+            .contains(VhostUserMMapFlags::WRITABLE);
+        let prot = if writable {
+            libc::PROT_READ | libc::PROT_WRITE
+        } else {
+            libc::PROT_READ
+        };
+
+        // SAFETY: we've checked we're only giving addr and length within the
+        // region, and are passing MAP_FIXED to ensure they are respected.
+        let ret = unsafe {
+            libc::mmap(
+                ptr.cast(),
+                len,
+                prot,
+                // https://bugzilla.kernel.org/show_bug.cgi?id=217238
+                if writable {
+                    libc::MAP_SHARED
+                } else {
+                    libc::MAP_PRIVATE
+                } | libc::MAP_FIXED,
+                fd.as_raw_fd(),
+                req.fd_offset as libc::off_t,
+            )
+        };
+
+        if ret == libc::MAP_FAILED {
+            return Err(io::Error::last_os_error());
+        }
+
+        Ok(0)
+    }
+
+    fn shmem_unmap(&self, req: &VhostUserMMap) -> vhost::vhost_user::HandlerResult<u64> {
+        let (ptr, len) = self.checked_region(req.shm_offset, req.len)?;
+
+        // SAFETY: we've checked we're only giving addr and length within the
+        // region, and are passing MAP_FIXED to ensure they are respected.
+        let ret = unsafe {
+            libc::mmap(
+                ptr.cast(),
+                len,
+                libc::PROT_NONE,
+                libc::MAP_ANONYMOUS | libc::MAP_PRIVATE | libc::MAP_FIXED,
+                -1,
+                0,
+            )
+        };
+
+        if ret == libc::MAP_FAILED {
+            return Err(io::Error::last_os_error());
+        }
+
+        Ok(0)
+    }
+}
+
+/// Shared memory region descriptor returned from the backend.
+pub struct ShmemRegion {
+    pub id: u8,
+    pub length: u64,
+}
+
+pub struct Gpu {
+    common: VirtioCommon,
+    vu_common: VhostUserCommon,
+    id: String,
+    cache: Option<VirtioSharedMemoryList>,
+    backend_req_support: bool,
+    seccomp_action: SeccompAction,
+    guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
+    epoll_thread: Option<thread::JoinHandle<()>>,
+    exit_evt: EventFd,
+    iommu: bool,
+}
+
+impl Gpu {
+    /// Create a new virtio-gpu device.
+    pub fn new(
+        id: String,
+        path: &str,
+        seccomp_action: SeccompAction,
+        exit_evt: EventFd,
+        iommu: bool,
+    ) -> Result<(Gpu, ShmemRegion)> {
+        let num_queues = DEFAULT_QUEUE_NUMBER;
+
+        // Connect to the vhost-user socket.
+        let mut vu =
+            VhostUserHandle::connect_vhost_user(false, path, num_queues as u64, false)?;
+
+        // Filling device and vring features VMM supports.
+        let avail_features = DEFAULT_VIRTIO_FEATURES;
+
+        let avail_protocol_features = VhostUserProtocolFeatures::CONFIG
+            | VhostUserProtocolFeatures::BACKEND_REQ
+            | VhostUserProtocolFeatures::SHMEM
+            | VhostUserProtocolFeatures::REPLY_ACK
+            | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS;
+
+        let (acked_features, acked_protocol_features) =
+            vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?;
+
+        let backend_req_support =
+            acked_protocol_features & VhostUserProtocolFeatures::BACKEND_REQ.bits() != 0;
+
+        // Query shared memory regions.
+        let shm_config = vu
+            .socket_handle()
+            .get_shmem_config()
+            .map_err(Error::VhostUserGetShmemConfig)?;
+
+        // Decode the sparse region array.
+        let regions: Vec<(u8, u64)> = shm_config
+            .memory_sizes
+            .iter()
+            .enumerate()
+            .filter(|&(_, &size)| size != 0)
+            .take(shm_config.nregions as usize)
+            .map(|(id, &size)| (id as u8, size))
+            .collect();
+
+        if regions.len() != 1 {
+            error!(
+                "Expected exactly 1 shared memory region from GPU backend, got {}",
+                regions.len()
+            );
+            return Err(Error::VhostUserUnexpectedShmemRegionCount(1, regions.len()));
+        }
+        let (shm_id, shm_length) = regions[0];
+
+        let gpu = Gpu {
+            common: VirtioCommon {
+                device_type: VirtioDeviceType::Gpu as u32,
+                avail_features: acked_features,
+                acked_features: acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(),
+                queue_sizes: vec![DEFAULT_QUEUE_SIZE; num_queues],
+                paused_sync: Some(Arc::new(Barrier::new(2))),
+                min_queues: DEFAULT_QUEUE_NUMBER as u16,
+                paused: Arc::new(AtomicBool::new(false)),
+                ..Default::default()
+            },
+            vu_common: VhostUserCommon {
+                vu: Some(Arc::new(Mutex::new(vu))),
+                acked_protocol_features,
+                socket_path: path.to_string(),
+                vu_num_queues: num_queues,
+                ..Default::default()
+            },
+            id,
+            cache: None,
+            backend_req_support,
+            seccomp_action,
+            guest_memory: None,
+            epoll_thread: None,
+            exit_evt,
+            iommu,
+        };
+
+        Ok((
+            gpu,
+            ShmemRegion {
+                id: shm_id,
+                length: shm_length,
+            },
+        ))
+    }
+
+    pub fn set_cache(&mut self, cache: VirtioSharedMemoryList) {
+        self.cache = Some(cache);
+    }
+}
+
+impl Drop for Gpu {
+    fn drop(&mut self) {
+        if let Some(kill_evt) = self.common.kill_evt.take() {
+            let _ = kill_evt.write(1);
+        }
+        self.common.wait_for_epoll_threads();
+        if let Some(thread) = self.epoll_thread.take()
+            && let Err(e) = thread.join()
+        {
+            error!("Error joining thread: {e:?}");
+        }
+    }
+}
+
+impl VirtioDevice for Gpu {
+    fn device_type(&self) -> u32 {
+        self.common.device_type
+    }
+
+    fn queue_max_sizes(&self) -> &[u16] {
+        &self.common.queue_sizes
+    }
+
+    fn features(&self) -> u64 {
+        let mut features = self.common.avail_features;
+        if self.iommu {
+            features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
+        }
+        features
+    }
+
+    fn ack_features(&mut self, value: u64) {
+        self.common.ack_features(value);
+    }
+
+    fn read_config(&self, offset: u64, mut data: &mut [u8]) {
+        if let Some(vu) = &self.vu_common.vu
+            && let Err(e) = vu
+                .lock()
+                .unwrap()
+                .socket_handle()
+                .get_config(
+                    offset as u32,
+                    data.len() as u32,
+                    VhostUserConfigFlags::WRITABLE,
+                    data,
+                )
+                .map_err(|e| format!("{e:?}"))
+                .and_then(|(_, config)| {
+                    use std::io::Write;
+                    data.write_all(&config).map_err(|e| format!("{e:?}"))
+                })
+        {
+            error!("Failed getting vhost-user-gpu configuration: {e:?}");
+        }
+    }
+
+    fn activate(
+        &mut self,
+        mem: GuestMemoryAtomic<GuestMemoryMmap>,
+        interrupt_cb: Arc<dyn VirtioInterrupt>,
+        queues: Vec<(usize, Queue, EventFd)>,
+    ) -> ActivateResult {
+        self.common.activate(&queues, interrupt_cb.clone())?;
+        self.guest_memory = Some(mem.clone());
+
+        // Initialize backend communication.
+        let backend_req_handler = if self.backend_req_support {
+            if let Some(cache) = self.cache.as_ref() {
+                let vu_frontend_req_handler = Arc::new(BackendReqHandler {
+                    mapping: cache.mapping.clone(),
+                });
+
+                let mut req_handler =
+                    FrontendReqHandler::new(vu_frontend_req_handler).map_err(|e| {
+                        crate::ActivateError::VhostUserGpuSetup(Error::FrontendReqHandlerCreation(
+                            e,
+                        ))
+                    })?;
+
+                if self.vu_common.acked_protocol_features
+                    & VhostUserProtocolFeatures::REPLY_ACK.bits()
+                    != 0
+                {
+                    req_handler.set_reply_ack_flag(true);
+                }
+
+                Some(req_handler)
+            } else {
+                None
+            }
+        } else {
+            None
+        };
+
+        // Run a dedicated thread for handling potential reconnections with
+        // the backend.
+        let (kill_evt, pause_evt) = self.common.dup_eventfds();
+
+        let mut handler = self.vu_common.activate(
+            mem,
+            &queues,
+            interrupt_cb,
+            self.common.acked_features,
+            backend_req_handler,
+            kill_evt,
+            pause_evt,
+        )?;
+
+        let paused = self.common.paused.clone();
+        let paused_sync = self.common.paused_sync.clone();
+
+        let mut epoll_threads = Vec::new();
+        spawn_virtio_thread(
+            &self.id,
+            &self.seccomp_action,
+            Thread::VirtioVhostGpu,
+            &mut epoll_threads,
+            &self.exit_evt,
+            move || handler.run(&paused, paused_sync.as_ref().unwrap()),
+        )?;
+        self.epoll_thread = Some(epoll_threads.remove(0));
+
+        event!("virtio-device", "activated", "id", &self.id);
+        Ok(())
+    }
+
+    fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
+        if self.common.pause_evt.take().is_some() {
+            self.common.resume().ok()?;
+        }
+
+        if let Some(vu) = &self.vu_common.vu
+            && let Err(e) = vu.lock().unwrap().reset_vhost_user()
+        {
+            error!("Failed to reset vhost-user daemon: {e:?}");
+            return None;
+        }
+
+        if let Some(kill_evt) = self.common.kill_evt.take() {
+            let _ = kill_evt.write(1);
+        }
+
+        event!("virtio-device", "reset", "id", &self.id);
+
+        Some(self.common.interrupt_cb.take().unwrap())
+    }
+
+    fn shutdown(&mut self) {
+        self.vu_common.shutdown();
+    }
+
+    fn get_shm_regions(&self) -> Option<VirtioSharedMemoryList> {
+        self.cache.clone()
+    }
+
+    fn set_shm_regions(
+        &mut self,
+        shm_regions: VirtioSharedMemoryList,
+    ) -> std::result::Result<(), crate::Error> {
+        if let Some(cache) = self.cache.as_mut() {
+            *cache = shm_regions;
+            Ok(())
+        } else {
+            Err(crate::Error::SetShmRegionsNotSupported)
+        }
+    }
+
+    fn add_memory_region(
+        &mut self,
+        region: &Arc<GuestRegionMmap>,
+    ) -> std::result::Result<(), crate::Error> {
+        self.vu_common.add_memory_region(&self.guest_memory, region)
+    }
+
+    fn userspace_mappings(&self) -> Vec<UserspaceMapping> {
+        let mut mappings = Vec::new();
+        if let Some(cache) = self.cache.as_ref() {
+            mappings.push(UserspaceMapping {
+                mem_slot: cache.mem_slot,
+                addr: cache.addr,
+                mapping: cache.mapping.clone(),
+                mergeable: false,
+            });
+        }
+
+        mappings
+    }
+}
+
+impl Pausable for Gpu {
+    fn pause(&mut self) -> result::Result<(), MigratableError> {
+        self.vu_common.pause()?;
+        self.common.pause()
+    }
+
+    fn resume(&mut self) -> result::Result<(), MigratableError> {
+        self.common.resume()?;
+
+        if let Some(epoll_thread) = &self.epoll_thread {
+            epoll_thread.thread().unpark();
+        }
+
+        self.vu_common.resume()
+    }
+}
+
+impl Snapshottable for Gpu {
+    fn id(&self) -> String {
+        self.id.clone()
+    }
+}
+impl Transportable for Gpu {}
+impl Migratable for Gpu {}
--- a/virtio-devices/src/vhost_user/mod.rs
+++ b/virtio-devices/src/vhost_user/mod.rs
@ -35,11 +35,13 @@ use crate::{

 pub mod blk;
 pub mod fs;
+pub mod gpu;
 pub mod net;
 pub mod vu_common_ctrl;

 pub use self::blk::Blk;
 pub use self::fs::*;
+pub use self::gpu::Gpu;
 pub use self::net::Net;
 pub use self::vu_common_ctrl::VhostUserConfig;

@ -147,6 +149,10 @@ pub enum Error {
    NewMmapRegion(#[source] MmapRegionError),
    #[error("Could not find the shm log region")]
    MissingShmLogRegion,
+    #[error("Get shared memory config failed")]
+    VhostUserGetShmemConfig(#[source] VhostError),
+    #[error("Expected {0} shared memory regions; got {1}")]
+    VhostUserUnexpectedShmemRegionCount(usize, usize),
 }
 type Result<T> = std::result::Result<T, Error>;

--- a/vmm/src/api/mod.rs
+++ b/vmm/src/api/mod.rs
@ -51,8 +51,8 @@ use crate::config::RestoreConfig;
 use crate::device_tree::DeviceTree;
 use crate::vm::{Error as VmError, VmState};
 use crate::vm_config::{
-    DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig,
-    VmConfig, VsockConfig,
+    DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig,
+    VdpaConfig, VmConfig, VsockConfig,
 };

 /// API errors are sent back from the VMM API server through the ApiResponse.
@ -170,6 +170,10 @@ pub enum ApiError {
    #[error("The fs could not be added to the VM")]
    VmAddFs(#[source] VmError),

+    /// The gpu could not be added to the VM.
+    #[error("The gpu could not be added to the VM")]
+    VmAddGpu(#[source] VmError),
+
    /// The pmem device could not be added to the VM.
    #[error("The pmem device could not be added to the VM")]
    VmAddPmem(#[source] VmError),
@ -340,6 +344,8 @@ pub trait RequestHandler {

    fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> Result<Option<Vec<u8>>, VmError>;

+    fn vm_add_gpu(&mut self, gpu_cfg: GpuConfig) -> Result<Option<Vec<u8>>, VmError>;
+
    fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> Result<Option<Vec<u8>>, VmError>;

    fn vm_add_net(&mut self, net_cfg: NetConfig) -> Result<Option<Vec<u8>>, VmError>;
@ -539,6 +545,43 @@ impl ApiAction for VmAddFs {
    }
 }

+pub struct VmAddGpu;
+
+impl ApiAction for VmAddGpu {
+    type RequestBody = GpuConfig;
+    type ResponseBody = Option<Body>;
+
+    fn request(
+        &self,
+        config: Self::RequestBody,
+        response_sender: Sender<ApiResponse>,
+    ) -> ApiRequest {
+        Box::new(move |vmm| {
+            info!("API request event: VmAddGpu {config:?}");
+
+            let response = vmm
+                .vm_add_gpu(config)
+                .map_err(ApiError::VmAddGpu)
+                .map(ApiResponsePayload::VmAction);
+
+            response_sender
+                .send(response)
+                .map_err(VmmError::ApiResponseSend)?;
+
+            Ok(false)
+        })
+    }
+
+    fn send(
+        &self,
+        api_evt: EventFd,
+        api_sender: Sender<ApiRequest>,
+        data: Self::RequestBody,
+    ) -> ApiResult<Self::ResponseBody> {
+        get_response_body(self, api_evt, api_sender, data)
+    }
+}
+
 pub struct VmAddPmem;

 impl ApiAction for VmAddPmem {
--- a/vmm/src/config.rs
+++ b/vmm/src/config.rs
@ -90,6 +90,12 @@ pub enum Error {
    /// Error parsing filesystem parameters
    #[error("Error parsing --fs")]
    ParseFileSystem(#[source] OptionParserError),
+    /// GPU socket is missing
+    #[error("Error parsing --gpu: socket missing")]
+    ParseGpuSockMissing,
+    /// Error parsing GPU parameters
+    #[error("Error parsing --gpu")]
+    ParseGpu(#[source] OptionParserError),
    /// Error parsing persistent memory parameters
    #[error("Error parsing --pmem")]
    ParsePersistentMemory(#[source] OptionParserError),
@ -393,6 +399,7 @@ pub struct VmParams<'a> {
    pub rng: &'a str,
    pub balloon: Option<&'a str>,
    pub fs: Option<Vec<&'a str>>,
+    pub gpu: Option<Vec<&'a str>>,
    pub pmem: Option<Vec<&'a str>>,
    pub serial: &'a str,
    pub console: &'a str,
@ -454,6 +461,9 @@ impl<'a> VmParams<'a> {
        let fs: Option<Vec<&str>> = args
            .get_many::<String>("fs")
            .map(|x| x.map(|y| y as &str).collect());
+        let gpu: Option<Vec<&str>> = args
+            .get_many::<String>("gpu")
+            .map(|x| x.map(|y| y as &str).collect());
        let pmem: Option<Vec<&str>> = args
            .get_many::<String>("pmem")
            .map(|x| x.map(|y| y as &str).collect());
@ -508,6 +518,7 @@ impl<'a> VmParams<'a> {
            rng,
            balloon,
            fs,
+            gpu,
            pmem,
            serial,
            console,
@ -1701,6 +1712,49 @@ impl FsConfig {
    }
 }

+impl GpuConfig {
+    pub const SYNTAX: &'static str = "virtio-gpu parameters \
+    \"socket=<socket_path>,id=<device_id>,pci_segment=<segment_id>\"";
+
+    pub fn parse(gpu: &str) -> Result<Self> {
+        let mut parser = OptionParser::new();
+        parser.add("socket").add("id").add("pci_segment");
+        parser.parse(gpu).map_err(Error::ParseGpu)?;
+
+        let socket = PathBuf::from(parser.get("socket").ok_or(Error::ParseGpuSockMissing)?);
+        let id = parser.get("id");
+
+        let pci_segment = parser
+            .convert("pci_segment")
+            .map_err(Error::ParseGpu)?
+            .unwrap_or_default();
+
+        Ok(GpuConfig {
+            socket,
+            id,
+            pci_segment,
+        })
+    }
+
+    pub fn validate(&self, vm_config: &VmConfig) -> ValidationResult<()> {
+        if let Some(platform_config) = vm_config.platform.as_ref() {
+            if self.pci_segment >= platform_config.num_pci_segments {
+                return Err(ValidationError::InvalidPciSegment(self.pci_segment));
+            }
+
+            if let Some(iommu_segments) = platform_config.iommu_segments.as_ref()
+                && iommu_segments.contains(&self.pci_segment)
+            {
+                return Err(ValidationError::IommuNotSupportedOnSegment(
+                    self.pci_segment,
+                ));
+            }
+        }
+
+        Ok(())
+    }
+}
+
 #[cfg(feature = "fw_cfg")]
 impl FwCfgConfig {
    pub const SYNTAX: &'static str = "Boot params to pass to FW CFG device \
@ -2723,6 +2777,13 @@ impl VmConfig {
            }
        }

+        if let Some(gpus) = &self.gpu {
+            for gpu in gpus {
+                gpu.validate(self)?;
+                Self::validate_identifier(&mut id_list, &gpu.id)?;
+            }
+        }
+
        if let Some(pmems) = &self.pmem {
            for pmem in pmems {
                pmem.validate(self)?;
@ -2976,6 +3037,15 @@ impl VmConfig {
            fs = Some(fs_config_list);
        }

+        let mut gpu: Option<Vec<GpuConfig>> = None;
+        if let Some(gpu_list) = &vm_params.gpu {
+            let mut gpu_config_list = Vec::new();
+            for item in gpu_list.iter() {
+                gpu_config_list.push(GpuConfig::parse(item)?);
+            }
+            gpu = Some(gpu_config_list);
+        }
+
        let mut pmem: Option<Vec<PmemConfig>> = None;
        if let Some(pmem_list) = &vm_params.pmem {
            let mut pmem_config_list = Vec::new();
@ -3112,6 +3182,7 @@ impl VmConfig {
            rng,
            balloon,
            fs,
+            gpu,
            pmem,
            serial,
            console,
@ -3173,6 +3244,13 @@ impl VmConfig {
            removed |= fs.len() != len;
        }

+        // Remove if gpu device
+        if let Some(gpu_list) = self.gpu.as_mut() {
+            let len = gpu_list.len();
+            gpu_list.retain(|dev| dev.id.as_ref().map(|id| id.as_ref()) != Some(id));
+            removed |= gpu_list.len() != len;
+        }
+
        // Remove if net device
        if let Some(net) = self.net.as_mut() {
            let len = net.len();
@ -3245,6 +3323,7 @@ impl Clone for VmConfig {
            #[cfg(feature = "pvmemcontrol")]
            pvmemcontrol: self.pvmemcontrol.clone(),
            fs: self.fs.clone(),
+            gpu: self.gpu.clone(),
            pmem: self.pmem.clone(),
            serial: self.serial.clone(),
            console: self.console.clone(),
@ -4153,6 +4232,7 @@ mod unit_tests {
            rng: RngConfig::default(),
            balloon: None,
            fs: None,
+            gpu: None,
            pmem: None,
            serial: default_serial(),
            console: default_console(),
@ -4356,6 +4436,7 @@ mod unit_tests {
            },
            balloon: None,
            fs: None,
+            gpu: None,
            pmem: None,
            serial: ConsoleConfig {
                file: None,
--- a/vmm/src/device_manager.rs
+++ b/vmm/src/device_manager.rs
@ -94,7 +94,7 @@ use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator, Virti
 use virtio_devices::vhost_user::VhostUserConfig;
 use virtio_devices::{
    AccessPlatformMapping, ActivateError, Block, Endpoint, IommuMapping, VdpaDmaMapping,
-    VirtioMemMappingSource,
+    VirtioMemMappingSource, VirtioSharedMemory, VirtioSharedMemoryList,
 };
 use vm_allocator::{AddressAllocator, SystemAllocator};
 use vm_device::dma_mapping::ExternalDmaMapping;
@ -127,8 +127,8 @@ use crate::serial_manager::{Error as SerialManagerError, SerialManager};
 use crate::vm_config::IvshmemConfig;
 use crate::vm_config::{
    ConsoleOutputMode, DEFAULT_IOMMU_ADDRESS_WIDTH_BITS, DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT,
-    DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig,
-    VhostMode, VmConfig, VsockConfig,
+    DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig,
+    VdpaConfig, VhostMode, VmConfig, VsockConfig,
 };
 use crate::{DEVICE_MANAGER_SNAPSHOT_ID, GuestRegionMmap, PciDeviceInfo, device_node};

@ -157,6 +157,7 @@ const IVSHMEM_DEVICE_NAME: &str = "__ivshmem";
 // identifiers if the user doesn't give one
 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
+const GPU_DEVICE_NAME_PREFIX: &str = "_gpu";
 const NET_DEVICE_NAME_PREFIX: &str = "_net";
 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
@ -205,6 +206,18 @@ pub enum DeviceManagerError {
    #[error("Virtio-fs device was created without a socket")]
    NoVirtioFsSock,

+    /// Cannot create virtio-gpu device
+    #[error("Cannot create virtio-gpu device")]
+    CreateVirtioGpu(#[source] virtio_devices::vhost_user::Error),
+
+    /// Virtio-gpu device was created without a socket.
+    #[error("Virtio-gpu device was created without a socket")]
+    NoVirtioGpuSock,
+
+    /// Cannot find a memory range for virtio-gpu
+    #[error("Cannot find a memory range for virtio-gpu")]
+    GpuRangeAllocation,
+
    /// Cannot create vhost-user-blk device
    #[error("Cannot create vhost-user-blk device")]
    CreateVhostUserBlk(#[source] virtio_devices::vhost_user::Error),
@ -2548,6 +2561,9 @@ impl DeviceManager {
        // Add virtio-fs if required
        self.make_virtio_fs_devices()?;

+        // Add virtio-gpu if required
+        self.make_virtio_gpu_devices()?;
+
        // Add virtio-pmem if required
        self.make_virtio_pmem_devices()?;

@ -3146,6 +3162,118 @@ impl DeviceManager {
        Ok(())
    }

+    fn make_virtio_gpu_device(
+        &mut self,
+        gpu_cfg: &mut GpuConfig,
+    ) -> DeviceManagerResult<MetaVirtioDevice> {
+        let id = if let Some(id) = &gpu_cfg.id {
+            id.clone()
+        } else {
+            let id = self.next_device_name(GPU_DEVICE_NAME_PREFIX)?;
+            gpu_cfg.id = Some(id.clone());
+            id
+        };
+
+        info!("Creating virtio-gpu device: {gpu_cfg:?}");
+
+        let mut node = device_node!(id);
+
+        if let Some(gpu_socket) = gpu_cfg.socket.to_str() {
+            let (mut virtio_gpu_device, region) = virtio_devices::vhost_user::Gpu::new(
+                id.clone(),
+                gpu_socket,
+                self.seccomp_action.clone(),
+                self.exit_evt
+                    .try_clone()
+                    .map_err(DeviceManagerError::EventFd)?,
+                self.force_iommu,
+            )
+            .map_err(DeviceManagerError::CreateVirtioGpu)?;
+
+            // Allocate the shared memory BAR region.
+            let cache_base = self.pci_segments[gpu_cfg.pci_segment as usize]
+                .mem64_allocator
+                .lock()
+                .unwrap()
+                .allocate(None, region.length, Some(region.length))
+                .ok_or(DeviceManagerError::GpuRangeAllocation)?
+                .raw_value();
+
+            node.resources.push(Resource::MmioAddressRange {
+                base: cache_base,
+                size: region.length,
+            });
+
+            let mmap_region = MmapRegion::build(
+                None,
+                region.length as usize,
+                libc::PROT_NONE,
+                libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
+            )
+            .map_err(DeviceManagerError::NewMmapRegion)?;
+
+            // SAFETY: mmap_region.size() and mmap_region.as_ptr() refer to a valid allocation.
+            let mem_slot = unsafe {
+                self.memory_manager
+                    .lock()
+                    .unwrap()
+                    .create_userspace_mapping(
+                        cache_base,
+                        mmap_region.size(),
+                        mmap_region.as_ptr(),
+                        false,
+                        false,
+                        false,
+                    )
+                    .map_err(DeviceManagerError::MemoryManager)?
+            };
+
+            let region_list = std::iter::once((
+                region.id,
+                VirtioSharedMemory {
+                    offset: 0,
+                    len: region.length,
+                },
+            ))
+            .collect();
+
+            virtio_gpu_device.set_cache(VirtioSharedMemoryList {
+                mapping: Arc::new(mmap_region),
+                mem_slot,
+                addr: GuestAddress(cache_base),
+                region_list,
+            });
+
+            let virtio_gpu_device = Arc::new(Mutex::new(virtio_gpu_device));
+
+            self.device_tree.lock().unwrap().insert(id.clone(), node);
+
+            Ok(MetaVirtioDevice {
+                virtio_device: Arc::clone(&virtio_gpu_device)
+                    as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
+                iommu: false,
+                id,
+                pci_segment: gpu_cfg.pci_segment,
+                dma_handler: None,
+            })
+        } else {
+            Err(DeviceManagerError::NoVirtioGpuSock)
+        }
+    }
+
+    fn make_virtio_gpu_devices(&mut self) -> DeviceManagerResult<()> {
+        let mut gpu_devices = self.config.lock().unwrap().gpu.take();
+        if let Some(gpu_list_cfg) = &mut gpu_devices {
+            for gpu_cfg in gpu_list_cfg.iter_mut() {
+                let device = self.make_virtio_gpu_device(gpu_cfg)?;
+                self.virtio_devices.push(device);
+            }
+        }
+        self.config.lock().unwrap().gpu = gpu_devices;
+
+        Ok(())
+    }
+
    fn make_virtio_pmem_device(
        &mut self,
        pmem_cfg: &mut PmemConfig,
@ -4876,6 +5004,13 @@ impl DeviceManager {
        self.hotplug_virtio_pci_device(device)
    }

+    pub fn add_gpu(&mut self, gpu_cfg: &mut GpuConfig) -> DeviceManagerResult<PciDeviceInfo> {
+        self.validate_identifier(&gpu_cfg.id)?;
+
+        let device = self.make_virtio_gpu_device(gpu_cfg)?;
+        self.hotplug_virtio_pci_device(device)
+    }
+
    pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
        self.validate_identifier(&pmem_cfg.id)?;

--- a/vmm/src/lib.rs
+++ b/vmm/src/lib.rs
@ -59,8 +59,8 @@ use crate::migration::{recv_vm_config, recv_vm_state};
 use crate::seccomp_filters::{Thread, get_seccomp_filter};
 use crate::vm::{Error as VmError, Vm, VmState};
 use crate::vm_config::{
-    DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig,
-    VmConfig, VsockConfig,
+    DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig,
+    VdpaConfig, VmConfig, VsockConfig,
 };

 mod acpi;
@ -2125,6 +2125,29 @@ impl RequestHandler for Vmm {
        }
    }

+    fn vm_add_gpu(&mut self, gpu_cfg: GpuConfig) -> result::Result<Option<Vec<u8>>, VmError> {
+        self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
+
+        {
+            let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
+            add_to_config(&mut config.gpu, gpu_cfg.clone());
+            config.validate().map_err(VmError::ConfigValidation)?;
+        }
+
+        if let Some(ref mut vm) = self.vm {
+            let info = vm.add_gpu(gpu_cfg).inspect_err(|e| {
+                error!("Error when adding new gpu to the VM: {e:?}");
+            })?;
+            serde_json::to_vec(&info)
+                .map(Some)
+                .map_err(VmError::SerializeJson)
+        } else {
+            let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
+            add_to_config(&mut config.gpu, gpu_cfg);
+            Ok(None)
+        }
+    }
+
    fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> {
        self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;

@ -2443,6 +2466,7 @@ mod unit_tests {
            },
            balloon: None,
            fs: None,
+            gpu: None,
            pmem: None,
            serial: ConsoleConfig {
                file: None,
--- a/vmm/src/vm.rs
+++ b/vmm/src/vm.rs
@ -100,8 +100,8 @@ use crate::migration::{SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE, url_to_path};
 #[cfg(feature = "fw_cfg")]
 use crate::vm_config::FwCfgConfig;
 use crate::vm_config::{
-    DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, NumaConfig, PayloadConfig,
-    PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig,
+    DeviceConfig, DiskConfig, FsConfig, GpuConfig, HotplugMethod, NetConfig, NumaConfig,
+    PayloadConfig, PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig,
 };
 use crate::{
    CPU_MANAGER_SNAPSHOT_ID, DEVICE_MANAGER_SNAPSHOT_ID, GuestMemoryMmap,
@ -2136,6 +2136,28 @@ impl Vm {
        Ok(pci_device_info)
    }

+    pub fn add_gpu(&mut self, mut gpu_cfg: GpuConfig) -> Result<PciDeviceInfo> {
+        let pci_device_info = self
+            .device_manager
+            .lock()
+            .unwrap()
+            .add_gpu(&mut gpu_cfg)
+            .map_err(Error::DeviceManager)?;
+
+        {
+            let mut config = self.config.lock().unwrap();
+            add_to_config(&mut config.gpu, gpu_cfg);
+        }
+
+        self.device_manager
+            .lock()
+            .unwrap()
+            .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
+            .map_err(Error::DeviceManager)?;
+
+        Ok(pci_device_info)
+    }
+
    pub fn add_pmem(&mut self, mut pmem_cfg: PmemConfig) -> Result<PciDeviceInfo> {
        let pci_device_info = self
            .device_manager
--- a/vmm/src/vm_config.rs
+++ b/vmm/src/vm_config.rs
@ -469,6 +469,22 @@ impl ApplyLandlock for FsConfig {
    }
 }

+#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
+pub struct GpuConfig {
+    pub socket: PathBuf,
+    #[serde(default)]
+    pub id: Option<String>,
+    #[serde(default)]
+    pub pci_segment: u16,
+}
+
+impl ApplyLandlock for GpuConfig {
+    fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> {
+        landlock.add_rule_with_access(&self.socket, "rw")?;
+        Ok(())
+    }
+}
+
 #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
 pub struct PmemConfig {
    pub file: PathBuf,
@ -922,6 +938,7 @@ pub struct VmConfig {
    pub rng: RngConfig,
    pub balloon: Option<BalloonConfig>,
    pub fs: Option<Vec<FsConfig>>,
+    pub gpu: Option<Vec<GpuConfig>>,
    pub pmem: Option<Vec<PmemConfig>>,
    #[serde(default = "default_serial")]
    pub serial: ConsoleConfig,
@ -997,6 +1014,12 @@ impl VmConfig {
            }
        }

+        if let Some(gpu_configs) = &self.gpu {
+            for gpu_config in gpu_configs.iter() {
+                gpu_config.apply_landlock(&mut landlock)?;
+            }
+        }
+
        if let Some(pmem_configs) = &self.pmem {
            for pmem_config in pmem_configs.iter() {
                pmem_config.apply_landlock(&mut landlock)?;