From c22c15b9636c8ad66fe9fe62b1bd9f3e265d6cce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dav=C3=AD=C3=B0=20Steinn=20Geirsson?= <david@dsg.is>
Date: Wed, 18 Mar 2026 17:46:55 +0000
Subject: [PATCH] virtio-devices: add vhost-user GPU device

Add a vhost-user frontend for virtio-gpu using the current vhost-user
protocol (GET_SHMEM_CONFIG=44, SHMEM_MAP/UNMAP=9/10, SHMEM=0x0020_0000).

The GPU device queries the backend for shared memory regions via
get_shmem_config(), allocates a PCI BAR for the shmem region, and
implements a BackendReqHandler that handles SHMEM_MAP/UNMAP requests
by mmapping file descriptors from the backend into the shared memory
region.

To support non-contiguous shared memory region IDs (needed for
VIRTIO_GPU_SHM_ID_HOST_VISIBLE=1), VirtioSharedMemoryList.region_list
is changed from Vec<VirtioSharedMemory> to BTreeMap<u8, VirtioSharedMemory>,
and the PCI BAR code uses the map key as the PCI capability shmid.

Includes full VMM plumbing: GpuConfig, --gpu CLI, API, device_manager,
seccomp rules, and hotplug support.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cloud-hypervisor/src/main.rs               |   1 +
 virtio-devices/src/device.rs               |   4 +-
 virtio-devices/src/lib.rs                  |   4 +-
 virtio-devices/src/seccomp_filters.rs      |  13 +
 virtio-devices/src/transport/pci_device.rs |   4 +-
 virtio-devices/src/vhost_user/gpu.rs       | 460 +++++++++++++++++++++
 virtio-devices/src/vhost_user/mod.rs       |   6 +
 vmm/src/api/mod.rs                         |  47 ++-
 vmm/src/config.rs                          |  81 ++++
 vmm/src/device_manager.rs                  | 141 ++++++-
 vmm/src/lib.rs                             |  28 +-
 vmm/src/vm.rs                              |  26 +-
 vmm/src/vm_config.rs                       |  23 ++
 13 files changed, 824 insertions(+), 14 deletions(-)
 create mode 100644 virtio-devices/src/vhost_user/gpu.rs
diff --git a/cloud-hypervisor/src/main.rs b/cloud-hypervisor/src/main.rs
index 3abc382f3..0f4eb5a60 100644
--- a/cloud-hypervisor/src/main.rs
+++ b/cloud-hypervisor/src/main.rs
@@ -998,6 +998,7 @@ mod unit_tests {
             },
             balloon: None,
             fs: None,
+            gpu: None,
             pmem: None,
             serial: ConsoleConfig {
                 file: None,
diff --git a/virtio-devices/src/device.rs b/virtio-devices/src/device.rs
index f0ed28f51..892bf08ee 100644
--- a/virtio-devices/src/device.rs
+++ b/virtio-devices/src/device.rs
@@ -6,7 +6,7 @@
 //
 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
 
-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
 use std::io::Write;
 use std::num::Wrapping;
 use std::sync::atomic::{AtomicBool, Ordering};
@@ -50,7 +50,7 @@ pub struct VirtioSharedMemoryList {
     pub mem_slot: u32,
     pub addr: GuestAddress,
     pub mapping: Arc<MmapRegion>,
-    pub region_list: Vec<VirtioSharedMemory>,
+    pub region_list: BTreeMap<u8, VirtioSharedMemory>,
 }
 
 /// Trait for virtio devices to be driven by a virtio transport.
diff --git a/virtio-devices/src/lib.rs b/virtio-devices/src/lib.rs
index b52282921..7efc4bf8c 100644
--- a/virtio-devices/src/lib.rs
+++ b/virtio-devices/src/lib.rs
@@ -43,7 +43,7 @@ pub use self::block::{Block, BlockState};
 pub use self::console::{Console, ConsoleResizer, Endpoint};
 pub use self::device::{
     DmaRemapping, VirtioCommon, VirtioDevice, VirtioInterrupt, VirtioInterruptType,
-    VirtioSharedMemoryList,
+    VirtioSharedMemory, VirtioSharedMemoryList,
 };
 pub use self::epoll_helper::{
     EPOLL_HELPER_EVENT_LAST, EpollHelper, EpollHelperError, EpollHelperHandler,
@@ -90,6 +90,8 @@ pub enum ActivateError {
     VhostUserFsSetup(#[source] vhost_user::Error),
     #[error("Failed to setup vhost-user daemon")]
     VhostUserSetup(#[source] vhost_user::Error),
+    #[error("Failed to setup vhost-user-gpu daemon")]
+    VhostUserGpuSetup(#[source] vhost_user::Error),
     #[error("Failed to create seccomp filter")]
     CreateSeccompFilter(#[source] seccompiler::Error),
     #[error("Failed to create rate limiter")]
diff --git a/virtio-devices/src/seccomp_filters.rs b/virtio-devices/src/seccomp_filters.rs
index 5afd056a6..59f364320 100644
--- a/virtio-devices/src/seccomp_filters.rs
+++ b/virtio-devices/src/seccomp_filters.rs
@@ -24,6 +24,7 @@ pub enum Thread {
     VirtioRng,
     VirtioVhostBlock,
     VirtioVhostFs,
+    VirtioVhostGpu,
     VirtioVhostNet,
     VirtioVhostNetCtl,
     VirtioVsock,
@@ -192,6 +193,17 @@ fn virtio_vhost_fs_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> {
     ]
 }
 
+fn virtio_vhost_gpu_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> {
+    vec![
+        (libc::SYS_clock_nanosleep, vec![]),
+        (libc::SYS_connect, vec![]),
+        (libc::SYS_nanosleep, vec![]),
+        (libc::SYS_recvmsg, vec![]),
+        (libc::SYS_sendmsg, vec![]),
+        (libc::SYS_socket, vec![]),
+    ]
+}
+
 fn virtio_vhost_net_ctl_thread_rules() -> Vec<(i64, Vec<SeccompRule>)> {
     vec![]
 }
@@ -271,6 +283,7 @@ fn get_seccomp_rules(thread_type: Thread) -> Vec<(i64, Vec<SeccompRule>)> {
         Thread::VirtioRng => virtio_rng_thread_rules(),
         Thread::VirtioVhostBlock => virtio_vhost_block_thread_rules(),
         Thread::VirtioVhostFs => virtio_vhost_fs_thread_rules(),
+        Thread::VirtioVhostGpu => virtio_vhost_gpu_thread_rules(),
         Thread::VirtioVhostNet => virtio_vhost_net_thread_rules(),
         Thread::VirtioVhostNetCtl => virtio_vhost_net_ctl_thread_rules(),
         Thread::VirtioVsock => virtio_vsock_thread_rules(),
diff --git a/virtio-devices/src/transport/pci_device.rs b/virtio-devices/src/transport/pci_device.rs
index 408611e29..6aaffcef7 100644
--- a/virtio-devices/src/transport/pci_device.rs
+++ b/virtio-devices/src/transport/pci_device.rs
@@ -1036,11 +1036,11 @@ impl PciDevice for VirtioPciDevice {
                     PciDeviceError::IoRegistrationFailed(shm_list.addr.raw_value(), e)
                 })?;
 
-                for (idx, shm) in shm_list.region_list.iter().enumerate() {
+                for (&shmid, shm) in shm_list.region_list.iter() {
                     let shm_cap = VirtioPciCap64::new(
                         PciCapabilityType::SharedMemory,
                         VIRTIO_SHM_BAR_INDEX as u8,
-                        idx as u8,
+                        shmid,
                         shm.offset,
                         shm.len,
                     );
diff --git a/virtio-devices/src/vhost_user/gpu.rs b/virtio-devices/src/vhost_user/gpu.rs
new file mode 100644
index 000000000..497081bbf
--- /dev/null
+++ b/virtio-devices/src/vhost_user/gpu.rs
@@ -0,0 +1,460 @@
+// Copyright © 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+use std::io;
+use std::os::unix::io::AsRawFd;
+use std::sync::atomic::AtomicBool;
+use std::sync::{Arc, Barrier, Mutex};
+use std::{result, thread};
+
+use event_monitor::event;
+use log::error;
+use seccompiler::SeccompAction;
+use vhost::vhost_user::message::{
+    VhostUserConfigFlags, VhostUserMMap, VhostUserMMapFlags, VhostUserProtocolFeatures,
+    VhostUserVirtioFeatures,
+};
+use vhost::vhost_user::{FrontendReqHandler, VhostUserFrontend, VhostUserFrontendReqHandler};
+use virtio_queue::Queue;
+use vm_device::UserspaceMapping;
+use vm_memory::GuestMemoryAtomic;
+use vm_migration::{Migratable, MigratableError, Pausable, Snapshottable, Transportable};
+use vmm_sys_util::eventfd::EventFd;
+
+use super::vu_common_ctrl::VhostUserHandle;
+use super::{DEFAULT_VIRTIO_FEATURES, Error, Result};
+use crate::seccomp_filters::Thread;
+use crate::thread_helper::spawn_virtio_thread;
+use crate::vhost_user::VhostUserCommon;
+use crate::{
+    ActivateResult, GuestMemoryMmap, GuestRegionMmap, MmapRegion, VIRTIO_F_IOMMU_PLATFORM,
+    VirtioCommon, VirtioDevice, VirtioDeviceType, VirtioInterrupt, VirtioSharedMemoryList,
+};
+
+const DEFAULT_QUEUE_NUMBER: usize = 2;
+const DEFAULT_QUEUE_SIZE: u16 = 256;
+
+struct BackendReqHandler {
+    mapping: Arc<MmapRegion>,
+}
+
+impl BackendReqHandler {
+    /// Validate and compute the target pointer and length within the shared memory region.
+    fn checked_region(
+        &self,
+        offset: u64,
+        len: u64,
+    ) -> io::Result<(*mut u8, usize)> {
+        let offset: usize = offset
+            .try_into()
+            .map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?;
+        let len: usize = len
+            .try_into()
+            .map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?;
+        let region_size = self.mapping.size();
+
+        if offset.checked_add(len).is_none_or(|end| end > region_size) {
+            return Err(io::Error::from_raw_os_error(libc::EINVAL));
+        }
+
+        // SAFETY: offset is within the mapped region
+        let ptr = unsafe { self.mapping.as_ptr().add(offset) };
+        Ok((ptr, len))
+    }
+}
+
+impl VhostUserFrontendReqHandler for BackendReqHandler {
+    fn shmem_map(
+        &self,
+        req: &VhostUserMMap,
+        fd: &dyn AsRawFd,
+    ) -> vhost::vhost_user::HandlerResult<u64> {
+        let (ptr, len) = self.checked_region(req.shm_offset, req.len)?;
+
+        let writable = VhostUserMMapFlags::from_bits_truncate(req.flags)
+            .contains(VhostUserMMapFlags::WRITABLE);
+        let prot = if writable {
+            libc::PROT_READ | libc::PROT_WRITE
+        } else {
+            libc::PROT_READ
+        };
+
+        // SAFETY: we've checked we're only giving addr and length within the
+        // region, and are passing MAP_FIXED to ensure they are respected.
+        let ret = unsafe {
+            libc::mmap(
+                ptr.cast(),
+                len,
+                prot,
+                // https://bugzilla.kernel.org/show_bug.cgi?id=217238
+                if writable {
+                    libc::MAP_SHARED
+                } else {
+                    libc::MAP_PRIVATE
+                } | libc::MAP_FIXED,
+                fd.as_raw_fd(),
+                req.fd_offset as libc::off_t,
+            )
+        };
+
+        if ret == libc::MAP_FAILED {
+            return Err(io::Error::last_os_error());
+        }
+
+        Ok(0)
+    }
+
+    fn shmem_unmap(&self, req: &VhostUserMMap) -> vhost::vhost_user::HandlerResult<u64> {
+        let (ptr, len) = self.checked_region(req.shm_offset, req.len)?;
+
+        // SAFETY: we've checked we're only giving addr and length within the
+        // region, and are passing MAP_FIXED to ensure they are respected.
+        let ret = unsafe {
+            libc::mmap(
+                ptr.cast(),
+                len,
+                libc::PROT_NONE,
+                libc::MAP_ANONYMOUS | libc::MAP_PRIVATE | libc::MAP_FIXED,
+                -1,
+                0,
+            )
+        };
+
+        if ret == libc::MAP_FAILED {
+            return Err(io::Error::last_os_error());
+        }
+
+        Ok(0)
+    }
+}
+
+/// Shared memory region descriptor returned from the backend.
+pub struct ShmemRegion {
+    pub id: u8,
+    pub length: u64,
+}
+
+pub struct Gpu {
+    common: VirtioCommon,
+    vu_common: VhostUserCommon,
+    id: String,
+    cache: Option<VirtioSharedMemoryList>,
+    backend_req_support: bool,
+    seccomp_action: SeccompAction,
+    guest_memory: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
+    epoll_thread: Option<thread::JoinHandle<()>>,
+    exit_evt: EventFd,
+    iommu: bool,
+}
+
+impl Gpu {
+    /// Create a new virtio-gpu device.
+    pub fn new(
+        id: String,
+        path: &str,
+        seccomp_action: SeccompAction,
+        exit_evt: EventFd,
+        iommu: bool,
+    ) -> Result<(Gpu, ShmemRegion)> {
+        let num_queues = DEFAULT_QUEUE_NUMBER;
+
+        // Connect to the vhost-user socket.
+        let mut vu =
+            VhostUserHandle::connect_vhost_user(false, path, num_queues as u64, false)?;
+
+        // Filling device and vring features VMM supports.
+        let avail_features = DEFAULT_VIRTIO_FEATURES;
+
+        let avail_protocol_features = VhostUserProtocolFeatures::CONFIG
+            | VhostUserProtocolFeatures::BACKEND_REQ
+            | VhostUserProtocolFeatures::SHMEM
+            | VhostUserProtocolFeatures::REPLY_ACK
+            | VhostUserProtocolFeatures::CONFIGURE_MEM_SLOTS;
+
+        let (acked_features, acked_protocol_features) =
+            vu.negotiate_features_vhost_user(avail_features, avail_protocol_features)?;
+
+        let backend_req_support =
+            acked_protocol_features & VhostUserProtocolFeatures::BACKEND_REQ.bits() != 0;
+
+        // Query shared memory regions.
+        let shm_config = vu
+            .socket_handle()
+            .get_shmem_config()
+            .map_err(Error::VhostUserGetShmemConfig)?;
+
+        // Decode the sparse region array.
+        let regions: Vec<(u8, u64)> = shm_config
+            .memory_sizes
+            .iter()
+            .enumerate()
+            .filter(|&(_, &size)| size != 0)
+            .take(shm_config.nregions as usize)
+            .map(|(id, &size)| (id as u8, size))
+            .collect();
+
+        if regions.len() != 1 {
+            error!(
+                "Expected exactly 1 shared memory region from GPU backend, got {}",
+                regions.len()
+            );
+            return Err(Error::VhostUserUnexpectedShmemRegionCount(1, regions.len()));
+        }
+        let (shm_id, shm_length) = regions[0];
+
+        let gpu = Gpu {
+            common: VirtioCommon {
+                device_type: VirtioDeviceType::Gpu as u32,
+                avail_features: acked_features,
+                acked_features: acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(),
+                queue_sizes: vec![DEFAULT_QUEUE_SIZE; num_queues],
+                paused_sync: Some(Arc::new(Barrier::new(2))),
+                min_queues: DEFAULT_QUEUE_NUMBER as u16,
+                paused: Arc::new(AtomicBool::new(false)),
+                ..Default::default()
+            },
+            vu_common: VhostUserCommon {
+                vu: Some(Arc::new(Mutex::new(vu))),
+                acked_protocol_features,
+                socket_path: path.to_string(),
+                vu_num_queues: num_queues,
+                ..Default::default()
+            },
+            id,
+            cache: None,
+            backend_req_support,
+            seccomp_action,
+            guest_memory: None,
+            epoll_thread: None,
+            exit_evt,
+            iommu,
+        };
+
+        Ok((
+            gpu,
+            ShmemRegion {
+                id: shm_id,
+                length: shm_length,
+            },
+        ))
+    }
+
+    pub fn set_cache(&mut self, cache: VirtioSharedMemoryList) {
+        self.cache = Some(cache);
+    }
+}
+
+impl Drop for Gpu {
+    fn drop(&mut self) {
+        if let Some(kill_evt) = self.common.kill_evt.take() {
+            let _ = kill_evt.write(1);
+        }
+        self.common.wait_for_epoll_threads();
+        if let Some(thread) = self.epoll_thread.take()
+            && let Err(e) = thread.join()
+        {
+            error!("Error joining thread: {e:?}");
+        }
+    }
+}
+
+impl VirtioDevice for Gpu {
+    fn device_type(&self) -> u32 {
+        self.common.device_type
+    }
+
+    fn queue_max_sizes(&self) -> &[u16] {
+        &self.common.queue_sizes
+    }
+
+    fn features(&self) -> u64 {
+        let mut features = self.common.avail_features;
+        if self.iommu {
+            features |= 1u64 << VIRTIO_F_IOMMU_PLATFORM;
+        }
+        features
+    }
+
+    fn ack_features(&mut self, value: u64) {
+        self.common.ack_features(value);
+    }
+
+    fn read_config(&self, offset: u64, mut data: &mut [u8]) {
+        if let Some(vu) = &self.vu_common.vu
+            && let Err(e) = vu
+                .lock()
+                .unwrap()
+                .socket_handle()
+                .get_config(
+                    offset as u32,
+                    data.len() as u32,
+                    VhostUserConfigFlags::WRITABLE,
+                    data,
+                )
+                .map_err(|e| format!("{e:?}"))
+                .and_then(|(_, config)| {
+                    use std::io::Write;
+                    data.write_all(&config).map_err(|e| format!("{e:?}"))
+                })
+        {
+            error!("Failed getting vhost-user-gpu configuration: {e:?}");
+        }
+    }
+
+    fn activate(
+        &mut self,
+        mem: GuestMemoryAtomic<GuestMemoryMmap>,
+        interrupt_cb: Arc<dyn VirtioInterrupt>,
+        queues: Vec<(usize, Queue, EventFd)>,
+    ) -> ActivateResult {
+        self.common.activate(&queues, interrupt_cb.clone())?;
+        self.guest_memory = Some(mem.clone());
+
+        // Initialize backend communication.
+        let backend_req_handler = if self.backend_req_support {
+            if let Some(cache) = self.cache.as_ref() {
+                let vu_frontend_req_handler = Arc::new(BackendReqHandler {
+                    mapping: cache.mapping.clone(),
+                });
+
+                let mut req_handler =
+                    FrontendReqHandler::new(vu_frontend_req_handler).map_err(|e| {
+                        crate::ActivateError::VhostUserGpuSetup(Error::FrontendReqHandlerCreation(
+                            e,
+                        ))
+                    })?;
+
+                if self.vu_common.acked_protocol_features
+                    & VhostUserProtocolFeatures::REPLY_ACK.bits()
+                    != 0
+                {
+                    req_handler.set_reply_ack_flag(true);
+                }
+
+                Some(req_handler)
+            } else {
+                None
+            }
+        } else {
+            None
+        };
+
+        // Run a dedicated thread for handling potential reconnections with
+        // the backend.
+        let (kill_evt, pause_evt) = self.common.dup_eventfds();
+
+        let mut handler = self.vu_common.activate(
+            mem,
+            &queues,
+            interrupt_cb,
+            self.common.acked_features,
+            backend_req_handler,
+            kill_evt,
+            pause_evt,
+        )?;
+
+        let paused = self.common.paused.clone();
+        let paused_sync = self.common.paused_sync.clone();
+
+        let mut epoll_threads = Vec::new();
+        spawn_virtio_thread(
+            &self.id,
+            &self.seccomp_action,
+            Thread::VirtioVhostGpu,
+            &mut epoll_threads,
+            &self.exit_evt,
+            move || handler.run(&paused, paused_sync.as_ref().unwrap()),
+        )?;
+        self.epoll_thread = Some(epoll_threads.remove(0));
+
+        event!("virtio-device", "activated", "id", &self.id);
+        Ok(())
+    }
+
+    fn reset(&mut self) -> Option<Arc<dyn VirtioInterrupt>> {
+        if self.common.pause_evt.take().is_some() {
+            self.common.resume().ok()?;
+        }
+
+        if let Some(vu) = &self.vu_common.vu
+            && let Err(e) = vu.lock().unwrap().reset_vhost_user()
+        {
+            error!("Failed to reset vhost-user daemon: {e:?}");
+            return None;
+        }
+
+        if let Some(kill_evt) = self.common.kill_evt.take() {
+            let _ = kill_evt.write(1);
+        }
+
+        event!("virtio-device", "reset", "id", &self.id);
+
+        Some(self.common.interrupt_cb.take().unwrap())
+    }
+
+    fn shutdown(&mut self) {
+        self.vu_common.shutdown();
+    }
+
+    fn get_shm_regions(&self) -> Option<VirtioSharedMemoryList> {
+        self.cache.clone()
+    }
+
+    fn set_shm_regions(
+        &mut self,
+        shm_regions: VirtioSharedMemoryList,
+    ) -> std::result::Result<(), crate::Error> {
+        if let Some(cache) = self.cache.as_mut() {
+            *cache = shm_regions;
+            Ok(())
+        } else {
+            Err(crate::Error::SetShmRegionsNotSupported)
+        }
+    }
+
+    fn add_memory_region(
+        &mut self,
+        region: &Arc<GuestRegionMmap>,
+    ) -> std::result::Result<(), crate::Error> {
+        self.vu_common.add_memory_region(&self.guest_memory, region)
+    }
+
+    fn userspace_mappings(&self) -> Vec<UserspaceMapping> {
+        let mut mappings = Vec::new();
+        if let Some(cache) = self.cache.as_ref() {
+            mappings.push(UserspaceMapping {
+                mem_slot: cache.mem_slot,
+                addr: cache.addr,
+                mapping: cache.mapping.clone(),
+                mergeable: false,
+            });
+        }
+
+        mappings
+    }
+}
+
+impl Pausable for Gpu {
+    fn pause(&mut self) -> result::Result<(), MigratableError> {
+        self.vu_common.pause()?;
+        self.common.pause()
+    }
+
+    fn resume(&mut self) -> result::Result<(), MigratableError> {
+        self.common.resume()?;
+
+        if let Some(epoll_thread) = &self.epoll_thread {
+            epoll_thread.thread().unpark();
+        }
+
+        self.vu_common.resume()
+    }
+}
+
+impl Snapshottable for Gpu {
+    fn id(&self) -> String {
+        self.id.clone()
+    }
+}
+impl Transportable for Gpu {}
+impl Migratable for Gpu {}
diff --git a/virtio-devices/src/vhost_user/mod.rs b/virtio-devices/src/vhost_user/mod.rs
index cd5976988..ac651de94 100644
--- a/virtio-devices/src/vhost_user/mod.rs
+++ b/virtio-devices/src/vhost_user/mod.rs
@@ -35,11 +35,13 @@ use crate::{
 
 pub mod blk;
 pub mod fs;
+pub mod gpu;
 pub mod net;
 pub mod vu_common_ctrl;
 
 pub use self::blk::Blk;
 pub use self::fs::*;
+pub use self::gpu::Gpu;
 pub use self::net::Net;
 pub use self::vu_common_ctrl::VhostUserConfig;
 
@@ -147,6 +149,10 @@ pub enum Error {
     NewMmapRegion(#[source] MmapRegionError),
     #[error("Could not find the shm log region")]
     MissingShmLogRegion,
+    #[error("Get shared memory config failed")]
+    VhostUserGetShmemConfig(#[source] VhostError),
+    #[error("Expected {0} shared memory regions; got {1}")]
+    VhostUserUnexpectedShmemRegionCount(usize, usize),
 }
 type Result<T> = std::result::Result<T, Error>;
 
diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs
index 12ca6b987..18e66b96b 100644
--- a/vmm/src/api/mod.rs
+++ b/vmm/src/api/mod.rs
@@ -51,8 +51,8 @@ use crate::config::RestoreConfig;
 use crate::device_tree::DeviceTree;
 use crate::vm::{Error as VmError, VmState};
 use crate::vm_config::{
-    DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig,
-    VmConfig, VsockConfig,
+    DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig,
+    VdpaConfig, VmConfig, VsockConfig,
 };
 
 /// API errors are sent back from the VMM API server through the ApiResponse.
@@ -170,6 +170,10 @@ pub enum ApiError {
     #[error("The fs could not be added to the VM")]
     VmAddFs(#[source] VmError),
 
+    /// The gpu could not be added to the VM.
+    #[error("The gpu could not be added to the VM")]
+    VmAddGpu(#[source] VmError),
+
     /// The pmem device could not be added to the VM.
     #[error("The pmem device could not be added to the VM")]
     VmAddPmem(#[source] VmError),
@@ -340,6 +344,8 @@ pub trait RequestHandler {
 
     fn vm_add_fs(&mut self, fs_cfg: FsConfig) -> Result<Option<Vec<u8>>, VmError>;
 
+    fn vm_add_gpu(&mut self, gpu_cfg: GpuConfig) -> Result<Option<Vec<u8>>, VmError>;
+
     fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> Result<Option<Vec<u8>>, VmError>;
 
     fn vm_add_net(&mut self, net_cfg: NetConfig) -> Result<Option<Vec<u8>>, VmError>;
@@ -539,6 +545,43 @@ impl ApiAction for VmAddFs {
     }
 }
 
+pub struct VmAddGpu;
+
+impl ApiAction for VmAddGpu {
+    type RequestBody = GpuConfig;
+    type ResponseBody = Option<Body>;
+
+    fn request(
+        &self,
+        config: Self::RequestBody,
+        response_sender: Sender<ApiResponse>,
+    ) -> ApiRequest {
+        Box::new(move |vmm| {
+            info!("API request event: VmAddGpu {config:?}");
+
+            let response = vmm
+                .vm_add_gpu(config)
+                .map_err(ApiError::VmAddGpu)
+                .map(ApiResponsePayload::VmAction);
+
+            response_sender
+                .send(response)
+                .map_err(VmmError::ApiResponseSend)?;
+
+            Ok(false)
+        })
+    }
+
+    fn send(
+        &self,
+        api_evt: EventFd,
+        api_sender: Sender<ApiRequest>,
+        data: Self::RequestBody,
+    ) -> ApiResult<Self::ResponseBody> {
+        get_response_body(self, api_evt, api_sender, data)
+    }
+}
+
 pub struct VmAddPmem;
 
 impl ApiAction for VmAddPmem {
diff --git a/vmm/src/config.rs b/vmm/src/config.rs
index 16089e558..1216b0cd4 100644
--- a/vmm/src/config.rs
+++ b/vmm/src/config.rs
@@ -90,6 +90,12 @@ pub enum Error {
     /// Error parsing filesystem parameters
     #[error("Error parsing --fs")]
     ParseFileSystem(#[source] OptionParserError),
+    /// GPU socket is missing
+    #[error("Error parsing --gpu: socket missing")]
+    ParseGpuSockMissing,
+    /// Error parsing GPU parameters
+    #[error("Error parsing --gpu")]
+    ParseGpu(#[source] OptionParserError),
     /// Error parsing persistent memory parameters
     #[error("Error parsing --pmem")]
     ParsePersistentMemory(#[source] OptionParserError),
@@ -393,6 +399,7 @@ pub struct VmParams<'a> {
     pub rng: &'a str,
     pub balloon: Option<&'a str>,
     pub fs: Option<Vec<&'a str>>,
+    pub gpu: Option<Vec<&'a str>>,
     pub pmem: Option<Vec<&'a str>>,
     pub serial: &'a str,
     pub console: &'a str,
@@ -454,6 +461,9 @@ impl<'a> VmParams<'a> {
         let fs: Option<Vec<&str>> = args
             .get_many::<String>("fs")
             .map(|x| x.map(|y| y as &str).collect());
+        let gpu: Option<Vec<&str>> = args
+            .get_many::<String>("gpu")
+            .map(|x| x.map(|y| y as &str).collect());
         let pmem: Option<Vec<&str>> = args
             .get_many::<String>("pmem")
             .map(|x| x.map(|y| y as &str).collect());
@@ -508,6 +518,7 @@ impl<'a> VmParams<'a> {
             rng,
             balloon,
             fs,
+            gpu,
             pmem,
             serial,
             console,
@@ -1701,6 +1712,49 @@ impl FsConfig {
     }
 }
 
+impl GpuConfig {
+    pub const SYNTAX: &'static str = "virtio-gpu parameters \
+    \"socket=<socket_path>,id=<device_id>,pci_segment=<segment_id>\"";
+
+    pub fn parse(gpu: &str) -> Result<Self> {
+        let mut parser = OptionParser::new();
+        parser.add("socket").add("id").add("pci_segment");
+        parser.parse(gpu).map_err(Error::ParseGpu)?;
+
+        let socket = PathBuf::from(parser.get("socket").ok_or(Error::ParseGpuSockMissing)?);
+        let id = parser.get("id");
+
+        let pci_segment = parser
+            .convert("pci_segment")
+            .map_err(Error::ParseGpu)?
+            .unwrap_or_default();
+
+        Ok(GpuConfig {
+            socket,
+            id,
+            pci_segment,
+        })
+    }
+
+    pub fn validate(&self, vm_config: &VmConfig) -> ValidationResult<()> {
+        if let Some(platform_config) = vm_config.platform.as_ref() {
+            if self.pci_segment >= platform_config.num_pci_segments {
+                return Err(ValidationError::InvalidPciSegment(self.pci_segment));
+            }
+
+            if let Some(iommu_segments) = platform_config.iommu_segments.as_ref()
+                && iommu_segments.contains(&self.pci_segment)
+            {
+                return Err(ValidationError::IommuNotSupportedOnSegment(
+                    self.pci_segment,
+                ));
+            }
+        }
+
+        Ok(())
+    }
+}
+
 #[cfg(feature = "fw_cfg")]
 impl FwCfgConfig {
     pub const SYNTAX: &'static str = "Boot params to pass to FW CFG device \
@@ -2723,6 +2777,13 @@ impl VmConfig {
             }
         }
 
+        if let Some(gpus) = &self.gpu {
+            for gpu in gpus {
+                gpu.validate(self)?;
+                Self::validate_identifier(&mut id_list, &gpu.id)?;
+            }
+        }
+
         if let Some(pmems) = &self.pmem {
             for pmem in pmems {
                 pmem.validate(self)?;
@@ -2976,6 +3037,15 @@ impl VmConfig {
             fs = Some(fs_config_list);
         }
 
+        let mut gpu: Option<Vec<GpuConfig>> = None;
+        if let Some(gpu_list) = &vm_params.gpu {
+            let mut gpu_config_list = Vec::new();
+            for item in gpu_list.iter() {
+                gpu_config_list.push(GpuConfig::parse(item)?);
+            }
+            gpu = Some(gpu_config_list);
+        }
+
         let mut pmem: Option<Vec<PmemConfig>> = None;
         if let Some(pmem_list) = &vm_params.pmem {
             let mut pmem_config_list = Vec::new();
@@ -3112,6 +3182,7 @@ impl VmConfig {
             rng,
             balloon,
             fs,
+            gpu,
             pmem,
             serial,
             console,
@@ -3173,6 +3244,13 @@ impl VmConfig {
             removed |= fs.len() != len;
         }
 
+        // Remove if gpu device
+        if let Some(gpu_list) = self.gpu.as_mut() {
+            let len = gpu_list.len();
+            gpu_list.retain(|dev| dev.id.as_ref().map(|id| id.as_ref()) != Some(id));
+            removed |= gpu_list.len() != len;
+        }
+
         // Remove if net device
         if let Some(net) = self.net.as_mut() {
             let len = net.len();
@@ -3245,6 +3323,7 @@ impl Clone for VmConfig {
             #[cfg(feature = "pvmemcontrol")]
             pvmemcontrol: self.pvmemcontrol.clone(),
             fs: self.fs.clone(),
+            gpu: self.gpu.clone(),
             pmem: self.pmem.clone(),
             serial: self.serial.clone(),
             console: self.console.clone(),
@@ -4153,6 +4232,7 @@ mod unit_tests {
             rng: RngConfig::default(),
             balloon: None,
             fs: None,
+            gpu: None,
             pmem: None,
             serial: default_serial(),
             console: default_console(),
@@ -4356,6 +4436,7 @@ mod unit_tests {
             },
             balloon: None,
             fs: None,
+            gpu: None,
             pmem: None,
             serial: ConsoleConfig {
                 file: None,
diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs
index 752da6e51..40f27d4ff 100644
--- a/vmm/src/device_manager.rs
+++ b/vmm/src/device_manager.rs
@@ -94,7 +94,7 @@ use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator, Virti
 use virtio_devices::vhost_user::VhostUserConfig;
 use virtio_devices::{
     AccessPlatformMapping, ActivateError, Block, Endpoint, IommuMapping, VdpaDmaMapping,
-    VirtioMemMappingSource,
+    VirtioMemMappingSource, VirtioSharedMemory, VirtioSharedMemoryList,
 };
 use vm_allocator::{AddressAllocator, SystemAllocator};
 use vm_device::dma_mapping::ExternalDmaMapping;
@@ -127,8 +127,8 @@ use crate::serial_manager::{Error as SerialManagerError, SerialManager};
 use crate::vm_config::IvshmemConfig;
 use crate::vm_config::{
     ConsoleOutputMode, DEFAULT_IOMMU_ADDRESS_WIDTH_BITS, DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT,
-    DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig,
-    VhostMode, VmConfig, VsockConfig,
+    DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig,
+    VdpaConfig, VhostMode, VmConfig, VsockConfig,
 };
 use crate::{DEVICE_MANAGER_SNAPSHOT_ID, GuestRegionMmap, PciDeviceInfo, device_node};
 
@@ -157,6 +157,7 @@ const IVSHMEM_DEVICE_NAME: &str = "__ivshmem";
 // identifiers if the user doesn't give one
 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
+const GPU_DEVICE_NAME_PREFIX: &str = "_gpu";
 const NET_DEVICE_NAME_PREFIX: &str = "_net";
 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
@@ -205,6 +206,18 @@ pub enum DeviceManagerError {
     #[error("Virtio-fs device was created without a socket")]
     NoVirtioFsSock,
 
+    /// Cannot create virtio-gpu device
+    #[error("Cannot create virtio-gpu device")]
+    CreateVirtioGpu(#[source] virtio_devices::vhost_user::Error),
+
+    /// Virtio-gpu device was created without a socket.
+    #[error("Virtio-gpu device was created without a socket")]
+    NoVirtioGpuSock,
+
+    /// Cannot find a memory range for virtio-gpu
+    #[error("Cannot find a memory range for virtio-gpu")]
+    GpuRangeAllocation,
+
     /// Cannot create vhost-user-blk device
     #[error("Cannot create vhost-user-blk device")]
     CreateVhostUserBlk(#[source] virtio_devices::vhost_user::Error),
@@ -2548,6 +2561,9 @@ impl DeviceManager {
         // Add virtio-fs if required
         self.make_virtio_fs_devices()?;
 
+        // Add virtio-gpu if required
+        self.make_virtio_gpu_devices()?;
+
         // Add virtio-pmem if required
         self.make_virtio_pmem_devices()?;
 
@@ -3146,6 +3162,118 @@ impl DeviceManager {
         Ok(())
     }
 
+    fn make_virtio_gpu_device(
+        &mut self,
+        gpu_cfg: &mut GpuConfig,
+    ) -> DeviceManagerResult<MetaVirtioDevice> {
+        let id = if let Some(id) = &gpu_cfg.id {
+            id.clone()
+        } else {
+            let id = self.next_device_name(GPU_DEVICE_NAME_PREFIX)?;
+            gpu_cfg.id = Some(id.clone());
+            id
+        };
+
+        info!("Creating virtio-gpu device: {gpu_cfg:?}");
+
+        let mut node = device_node!(id);
+
+        if let Some(gpu_socket) = gpu_cfg.socket.to_str() {
+            let (mut virtio_gpu_device, region) = virtio_devices::vhost_user::Gpu::new(
+                id.clone(),
+                gpu_socket,
+                self.seccomp_action.clone(),
+                self.exit_evt
+                    .try_clone()
+                    .map_err(DeviceManagerError::EventFd)?,
+                self.force_iommu,
+            )
+            .map_err(DeviceManagerError::CreateVirtioGpu)?;
+
+            // Allocate the shared memory BAR region.
+            let cache_base = self.pci_segments[gpu_cfg.pci_segment as usize]
+                .mem64_allocator
+                .lock()
+                .unwrap()
+                .allocate(None, region.length, Some(region.length))
+                .ok_or(DeviceManagerError::GpuRangeAllocation)?
+                .raw_value();
+
+            node.resources.push(Resource::MmioAddressRange {
+                base: cache_base,
+                size: region.length,
+            });
+
+            let mmap_region = MmapRegion::build(
+                None,
+                region.length as usize,
+                libc::PROT_NONE,
+                libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
+            )
+            .map_err(DeviceManagerError::NewMmapRegion)?;
+
+            // SAFETY: mmap_region.size() and mmap_region.as_ptr() refer to a valid allocation.
+            let mem_slot = unsafe {
+                self.memory_manager
+                    .lock()
+                    .unwrap()
+                    .create_userspace_mapping(
+                        cache_base,
+                        mmap_region.size(),
+                        mmap_region.as_ptr(),
+                        false,
+                        false,
+                        false,
+                    )
+                    .map_err(DeviceManagerError::MemoryManager)?
+            };
+
+            let region_list = std::iter::once((
+                region.id,
+                VirtioSharedMemory {
+                    offset: 0,
+                    len: region.length,
+                },
+            ))
+            .collect();
+
+            virtio_gpu_device.set_cache(VirtioSharedMemoryList {
+                mapping: Arc::new(mmap_region),
+                mem_slot,
+                addr: GuestAddress(cache_base),
+                region_list,
+            });
+
+            let virtio_gpu_device = Arc::new(Mutex::new(virtio_gpu_device));
+
+            self.device_tree.lock().unwrap().insert(id.clone(), node);
+
+            Ok(MetaVirtioDevice {
+                virtio_device: Arc::clone(&virtio_gpu_device)
+                    as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
+                iommu: false,
+                id,
+                pci_segment: gpu_cfg.pci_segment,
+                dma_handler: None,
+            })
+        } else {
+            Err(DeviceManagerError::NoVirtioGpuSock)
+        }
+    }
+
+    fn make_virtio_gpu_devices(&mut self) -> DeviceManagerResult<()> {
+        let mut gpu_devices = self.config.lock().unwrap().gpu.take();
+        if let Some(gpu_list_cfg) = &mut gpu_devices {
+            for gpu_cfg in gpu_list_cfg.iter_mut() {
+                let device = self.make_virtio_gpu_device(gpu_cfg)?;
+                self.virtio_devices.push(device);
+            }
+        }
+        self.config.lock().unwrap().gpu = gpu_devices;
+
+        Ok(())
+    }
+
     fn make_virtio_pmem_device(
         &mut self,
         pmem_cfg: &mut PmemConfig,
@@ -4876,6 +5004,13 @@ impl DeviceManager {
         self.hotplug_virtio_pci_device(device)
     }
 
+    pub fn add_gpu(&mut self, gpu_cfg: &mut GpuConfig) -> DeviceManagerResult<PciDeviceInfo> {
+        self.validate_identifier(&gpu_cfg.id)?;
+
+        let device = self.make_virtio_gpu_device(gpu_cfg)?;
+        self.hotplug_virtio_pci_device(device)
+    }
+
     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
         self.validate_identifier(&pmem_cfg.id)?;
 
diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs
index dcf6614b2..1d5109d5b 100644
--- a/vmm/src/lib.rs
+++ b/vmm/src/lib.rs
@@ -59,8 +59,8 @@ use crate::migration::{recv_vm_config, recv_vm_state};
 use crate::seccomp_filters::{Thread, get_seccomp_filter};
 use crate::vm::{Error as VmError, Vm, VmState};
 use crate::vm_config::{
-    DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, VdpaConfig,
-    VmConfig, VsockConfig,
+    DeviceConfig, DiskConfig, FsConfig, GpuConfig, NetConfig, PmemConfig, UserDeviceConfig,
+    VdpaConfig, VmConfig, VsockConfig,
 };
 
 mod acpi;
@@ -2125,6 +2125,29 @@ impl RequestHandler for Vmm {
         }
     }
 
+    fn vm_add_gpu(&mut self, gpu_cfg: GpuConfig) -> result::Result<Option<Vec<u8>>, VmError> {
+        self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
+
+        {
+            let mut config = self.vm_config.as_ref().unwrap().lock().unwrap().clone();
+            add_to_config(&mut config.gpu, gpu_cfg.clone());
+            config.validate().map_err(VmError::ConfigValidation)?;
+        }
+
+        if let Some(ref mut vm) = self.vm {
+            let info = vm.add_gpu(gpu_cfg).inspect_err(|e| {
+                error!("Error when adding new gpu to the VM: {e:?}");
+            })?;
+            serde_json::to_vec(&info)
+                .map(Some)
+                .map_err(VmError::SerializeJson)
+        } else {
+            let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
+            add_to_config(&mut config.gpu, gpu_cfg);
+            Ok(None)
+        }
+    }
+
     fn vm_add_pmem(&mut self, pmem_cfg: PmemConfig) -> result::Result<Option<Vec<u8>>, VmError> {
         self.vm_config.as_ref().ok_or(VmError::VmNotCreated)?;
 
@@ -2443,6 +2466,7 @@ mod unit_tests {
             },
             balloon: None,
             fs: None,
+            gpu: None,
             pmem: None,
             serial: ConsoleConfig {
                 file: None,
diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs
index 4e1f8c3b8..b32b3f5d6 100644
--- a/vmm/src/vm.rs
+++ b/vmm/src/vm.rs
@@ -100,8 +100,8 @@ use crate::migration::{SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE, url_to_path};
 #[cfg(feature = "fw_cfg")]
 use crate::vm_config::FwCfgConfig;
 use crate::vm_config::{
-    DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, NumaConfig, PayloadConfig,
-    PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig,
+    DeviceConfig, DiskConfig, FsConfig, GpuConfig, HotplugMethod, NetConfig, NumaConfig,
+    PayloadConfig, PmemConfig, UserDeviceConfig, VdpaConfig, VmConfig, VsockConfig,
 };
 use crate::{
     CPU_MANAGER_SNAPSHOT_ID, DEVICE_MANAGER_SNAPSHOT_ID, GuestMemoryMmap,
@@ -2136,6 +2136,28 @@ impl Vm {
         Ok(pci_device_info)
     }
 
+    pub fn add_gpu(&mut self, mut gpu_cfg: GpuConfig) -> Result<PciDeviceInfo> {
+        let pci_device_info = self
+            .device_manager
+            .lock()
+            .unwrap()
+            .add_gpu(&mut gpu_cfg)
+            .map_err(Error::DeviceManager)?;
+
+        {
+            let mut config = self.config.lock().unwrap();
+            add_to_config(&mut config.gpu, gpu_cfg);
+        }
+
+        self.device_manager
+            .lock()
+            .unwrap()
+            .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
+            .map_err(Error::DeviceManager)?;
+
+        Ok(pci_device_info)
+    }
+
     pub fn add_pmem(&mut self, mut pmem_cfg: PmemConfig) -> Result<PciDeviceInfo> {
         let pci_device_info = self
             .device_manager
diff --git a/vmm/src/vm_config.rs b/vmm/src/vm_config.rs
index 407d4e491..4052d935d 100644
--- a/vmm/src/vm_config.rs
+++ b/vmm/src/vm_config.rs
@@ -469,6 +469,22 @@ impl ApplyLandlock for FsConfig {
     }
 }
 
+#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
+pub struct GpuConfig {
+    pub socket: PathBuf,
+    #[serde(default)]
+    pub id: Option<String>,
+    #[serde(default)]
+    pub pci_segment: u16,
+}
+
+impl ApplyLandlock for GpuConfig {
+    fn apply_landlock(&self, landlock: &mut Landlock) -> LandlockResult<()> {
+        landlock.add_rule_with_access(&self.socket, "rw")?;
+        Ok(())
+    }
+}
+
 #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
 pub struct PmemConfig {
     pub file: PathBuf,
@@ -922,6 +938,7 @@ pub struct VmConfig {
     pub rng: RngConfig,
     pub balloon: Option<BalloonConfig>,
     pub fs: Option<Vec<FsConfig>>,
+    pub gpu: Option<Vec<GpuConfig>>,
     pub pmem: Option<Vec<PmemConfig>>,
     #[serde(default = "default_serial")]
     pub serial: ConsoleConfig,
@@ -997,6 +1014,12 @@ impl VmConfig {
             }
         }
 
+        if let Some(gpu_configs) = &self.gpu {
+            for gpu_config in gpu_configs.iter() {
+                gpu_config.apply_landlock(&mut landlock)?;
+            }
+        }
+
         if let Some(pmem_configs) = &self.pmem {
             for pmem_config in pmem_configs.iter() {
                 pmem_config.apply_landlock(&mut landlock)?;