From 480bfb70950059b843312f2c2280eaa10b17563d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=AD=C3=B0=20Steinn=20Geirsson?= Date: Sun, 22 Mar 2026 10:05:19 +0000 Subject: [PATCH] virtio-devices: balloon: add statistics virtqueue support Implement the virtio balloon statistics virtqueue per the VIRTIO spec, exposing guest memory statistics through a new /vm.balloon-statistics API endpoint. Device layer: add stats virtqueue constants and types, wire up the stats queue in the balloon handler, implement stats request/response handlers, and expose a public request_balloon_statistics function. VMM layer: plumb statistics through device manager, VM, and VMM, add HTTP and D-Bus API endpoints, add ServiceUnavailable (503) to api_client, add balloon-statistics subcommand to ch-remote, and add the endpoint to the OpenAPI spec. The response includes both guest-reported stats and host-known values (balloon_actual_bytes, balloon_target_bytes, balloon_total_ram_bytes) so consumers can understand balloon utilization without querying /vm.info separately. Also fixes pre-existing rustfmt issues and adds clippy+rustfmt to the flake devShell. Co-Authored-By: Claude Opus 4.6 (1M context) --- api_client/src/lib.rs | 2 + cloud-hypervisor/src/bin/ch-remote.rs | 9 + flake.nix | 17 ++ virtio-devices/src/balloon.rs | 285 +++++++++++++++++++++- virtio-devices/src/lib.rs | 1 + vmm/src/api/dbus/mod.rs | 10 +- vmm/src/api/http/http_endpoint.rs | 27 +- vmm/src/api/http/mod.rs | 18 +- vmm/src/api/mod.rs | 83 +++++++ vmm/src/api/openapi/cloud-hypervisor.yaml | 82 +++++++ vmm/src/device_manager.rs | 44 +++- vmm/src/lib.rs | 13 + vmm/src/vm.rs | 12 +- 13 files changed, 586 insertions(+), 17 deletions(-) diff --git a/api_client/src/lib.rs b/api_client/src/lib.rs index f5bdd1302..c60ff4b65 100644 --- a/api_client/src/lib.rs +++ b/api_client/src/lib.rs @@ -41,6 +41,7 @@ pub enum StatusCode { TooManyRequests, InternalServerError, NotImplemented, + ServiceUnavailable, Unknown, } @@ -55,6 +56,7 @@ impl StatusCode { 429 => StatusCode::TooManyRequests, 500 => StatusCode::InternalServerError, 501 => StatusCode::NotImplemented, + 503 => StatusCode::ServiceUnavailable, _ => StatusCode::Unknown, } } diff --git a/cloud-hypervisor/src/bin/ch-remote.rs b/cloud-hypervisor/src/bin/ch-remote.rs index fd48ffab1..edc3fc9bb 100644 --- a/cloud-hypervisor/src/bin/ch-remote.rs +++ b/cloud-hypervisor/src/bin/ch-remote.rs @@ -91,6 +91,7 @@ trait DBusApi1 { fn vm_boot(&self) -> zbus::Result<()>; fn vm_coredump(&self, vm_coredump_data: &str) -> zbus::Result<()>; fn vm_counters(&self) -> zbus::Result>; + fn vm_balloon_statistics(&self) -> zbus::Result>; fn vm_create(&self, vm_config: &str) -> zbus::Result<()>; fn vm_delete(&self) -> zbus::Result<()>; fn vm_info(&self) -> zbus::Result; @@ -188,6 +189,10 @@ impl<'a> DBusApi1ProxyBlocking<'a> { self.print_response(self.vm_counters()) } + fn api_vm_balloon_statistics(&self) -> ApiResult { + self.print_response(self.vm_balloon_statistics()) + } + fn api_vm_create(&self, vm_config: &str) -> ApiResult { self.vm_create(vm_config).map_err(Error::DBusApiClient) } @@ -295,6 +300,8 @@ fn rest_api_do_command(matches: &ArgMatches, socket: &mut UnixStream) -> ApiResu Some("counters") => { simple_api_command(socket, "GET", "counters", None).map_err(Error::HttpApiClient) } + Some("balloon-statistics") => simple_api_command(socket, "GET", "balloon-statistics", None) + .map_err(Error::HttpApiClient), Some("ping") => { simple_api_full_command(socket, "GET", "vmm.ping", None).map_err(Error::HttpApiClient) } @@ -543,6 +550,7 @@ fn dbus_api_do_command(matches: &ArgMatches, proxy: &DBusApi1ProxyBlocking<'_>) Some("pause") => proxy.api_vm_pause(), Some("info") => proxy.api_vm_info(), Some("counters") => proxy.api_vm_counters(), + Some("balloon-statistics") => proxy.api_vm_balloon_statistics(), Some("ping") => proxy.api_vmm_ping(), Some("shutdown") => proxy.api_vm_shutdown(), Some("resize") => { @@ -1004,6 +1012,7 @@ fn get_cli_commands_sorted() -> Box<[Command]> { Command::new("add-vsock") .about("Add vsock device") .arg(Arg::new("vsock_config").index(1).help(VsockConfig::SYNTAX)), + Command::new("balloon-statistics").about("Balloon statistics from the VM"), Command::new("boot").about("Boot a created VM"), Command::new("coredump") .about("Create a coredump from VM") diff --git a/flake.nix b/flake.nix index ba2779119..e12dd02dd 100644 --- a/flake.nix +++ b/flake.nix @@ -79,5 +79,22 @@ }); } ); + + devShells = forAllSystems ( + system: + let + pkgs = nixpkgs.legacyPackages.${system}; + inherit (pkgs) lib; + in + { + default = pkgs.mkShell { + inputsFrom = [ self.packages.${system}.cloud-hypervisor ]; + nativeBuildInputs = with pkgs; [ + clippy + rustfmt + ]; + }; + } + ); }; } diff --git a/virtio-devices/src/balloon.rs b/virtio-devices/src/balloon.rs index 3db683261..2cefe4d5a 100644 --- a/virtio-devices/src/balloon.rs +++ b/virtio-devices/src/balloon.rs @@ -19,7 +19,8 @@ use std::mem::size_of; use std::os::unix::io::AsRawFd; use std::result; use std::sync::atomic::AtomicBool; -use std::sync::{Arc, Barrier}; +use std::sync::{Arc, Barrier, Condvar, Mutex}; +use std::time::Duration; use anyhow::anyhow; use event_monitor::event; @@ -52,18 +53,40 @@ const MIN_NUM_QUEUES: usize = 2; const INFLATE_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 1; // Deflate virtio queue event. const DEFLATE_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 2; +// Stats virtio queue event. +const STATS_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3; +// Stats request event (triggered by host to request stats from guest). +const STATS_REQUEST_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 5; // Reporting virtio queue event. -const REPORTING_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 3; +const REPORTING_QUEUE_EVENT: u16 = EPOLL_HELPER_EVENT_LAST + 4; + +// Stats queue constants. +const STATS_QUEUE_SIZE: u16 = 2; +const STATS_QUEUE_INDEX: usize = 2; // Size of a PFN in the balloon interface. const VIRTIO_BALLOON_PFN_SHIFT: u64 = 12; +// Enable the stats virtqueue. +const VIRTIO_BALLOON_F_STATS_VQ: u64 = 1; // Deflate balloon on OOM const VIRTIO_BALLOON_F_DEFLATE_ON_OOM: u64 = 2; // Enable an additional virtqueue to let the guest notify the host about free // pages. const VIRTIO_BALLOON_F_REPORTING: u64 = 5; +// Stats tags as defined in the virtio spec. +const VIRTIO_BALLOON_S_SWAP_IN: u16 = 0; +const VIRTIO_BALLOON_S_SWAP_OUT: u16 = 1; +const VIRTIO_BALLOON_S_MAJFLT: u16 = 2; +const VIRTIO_BALLOON_S_MINFLT: u16 = 3; +const VIRTIO_BALLOON_S_MEMFREE: u16 = 4; +const VIRTIO_BALLOON_S_MEMTOT: u16 = 5; +const VIRTIO_BALLOON_S_AVAIL: u16 = 6; +const VIRTIO_BALLOON_S_CACHES: u16 = 7; +const VIRTIO_BALLOON_S_HTLB_PGALLOC: u16 = 8; +const VIRTIO_BALLOON_S_HTLB_PGFAIL: u16 = 9; + #[derive(Error, Debug)] pub enum Error { #[error("Guest gave us bad memory addresses.")] @@ -88,6 +111,12 @@ pub enum Error { QueueAddUsed(#[source] virtio_queue::Error), #[error("Failed creating an iterator over the queue")] QueueIterator(#[source] virtio_queue::Error), + #[error("Stats feature not negotiated by guest")] + StatsFeatureNotNegotiated, + #[error("Stats request timed out")] + StatsTimeout, + #[error("Failed to parse stats buffer")] + StatsDescriptorParse, } // Got from include/uapi/linux/virtio_balloon.h @@ -150,12 +179,49 @@ const CONFIG_ACTUAL_SIZE: usize = 4; // SAFETY: it only has data and has no implicit padding. unsafe impl ByteValued for VirtioBalloonConfig {} +#[derive(Clone, Default, Debug, Serialize, Deserialize)] +pub struct BalloonStatistics { + #[serde(skip_serializing_if = "Option::is_none")] + pub swap_in: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub swap_out: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub major_faults: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub minor_faults: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub free_memory: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub total_memory: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub available_memory: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub disk_caches: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub hugetlb_allocations: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub hugetlb_failures: Option, +} + +pub struct BalloonStatsState { + pub stats: Option, + pub pending: bool, + pub generation: u64, +} + +pub type SharedStatsState = Arc<(Mutex, Condvar)>; + struct BalloonEpollHandler { mem: GuestMemoryAtomic, queues: Vec, interrupt_cb: Arc, inflate_queue_evt: EventFd, deflate_queue_evt: EventFd, + stats_queue_evt: Option, + stats_request_evt: Option, + stats_state: SharedStatsState, + stats_desc_index: Option, + stats_generation: u64, reporting_queue_evt: Option, kill_evt: EventFd, pause_evt: EventFd, @@ -340,6 +406,107 @@ impl BalloonEpollHandler { } } + fn parse_stats_buffer( + memory: &GuestMemoryMmap, + addr: GuestAddress, + len: u32, + ) -> result::Result { + let entry_size = size_of::() + size_of::(); // 10 bytes per entry + let mut stats = BalloonStatistics::default(); + let mut offset = 0u64; + while offset + entry_size as u64 <= len as u64 { + let tag_addr = addr + .checked_add(offset) + .ok_or(Error::StatsDescriptorParse)?; + let val_addr = addr + .checked_add(offset + size_of::() as u64) + .ok_or(Error::StatsDescriptorParse)?; + + let mut tag_bytes = [0u8; 2]; + memory + .read_slice(&mut tag_bytes, tag_addr) + .map_err(|_| Error::StatsDescriptorParse)?; + let tag = u16::from_le_bytes(tag_bytes); + + let mut val_bytes = [0u8; 8]; + memory + .read_slice(&mut val_bytes, val_addr) + .map_err(|_| Error::StatsDescriptorParse)?; + let val = u64::from_le_bytes(val_bytes); + + match tag { + VIRTIO_BALLOON_S_SWAP_IN => stats.swap_in = Some(val), + VIRTIO_BALLOON_S_SWAP_OUT => stats.swap_out = Some(val), + VIRTIO_BALLOON_S_MAJFLT => stats.major_faults = Some(val), + VIRTIO_BALLOON_S_MINFLT => stats.minor_faults = Some(val), + VIRTIO_BALLOON_S_MEMFREE => stats.free_memory = Some(val), + VIRTIO_BALLOON_S_MEMTOT => stats.total_memory = Some(val), + VIRTIO_BALLOON_S_AVAIL => stats.available_memory = Some(val), + VIRTIO_BALLOON_S_CACHES => stats.disk_caches = Some(val), + VIRTIO_BALLOON_S_HTLB_PGALLOC => stats.hugetlb_allocations = Some(val), + VIRTIO_BALLOON_S_HTLB_PGFAIL => stats.hugetlb_failures = Some(val), + _ => { + log::warn!("Unknown balloon stats tag: {tag}"); + } + } + offset += entry_size as u64; + } + Ok(stats) + } + + fn process_stats_queue(&mut self) -> result::Result<(), Error> { + while let Some(mut desc_chain) = + self.queues[STATS_QUEUE_INDEX].pop_descriptor_chain(self.mem.memory()) + { + let desc = desc_chain.next().ok_or(Error::DescriptorChainTooShort)?; + let head_index = desc_chain.head_index(); + + let parsed = Self::parse_stats_buffer(desc_chain.memory(), desc.addr(), desc.len()); + match parsed { + Ok(new_stats) => { + let (mutex, condvar) = self.stats_state.as_ref(); + let mut state = mutex.lock().unwrap(); + if state.pending && state.generation == self.stats_generation { + state.stats = Some(new_stats); + state.pending = false; + condvar.notify_all(); + } + } + Err(e) => { + log::error!("Failed to parse balloon stats buffer: {e:?}"); + } + } + // Retain the descriptor index so we can re-use it for the next request. + self.stats_desc_index = Some(head_index); + } + Ok(()) + } + + fn request_stats(&mut self) -> result::Result<(), Error> { + { + let (mutex, _) = self.stats_state.as_ref(); + let state = mutex.lock().unwrap(); + self.stats_generation = state.generation; + } + if let Some(head_index) = self.stats_desc_index.take() { + let mem = self.mem.memory(); + self.queues[STATS_QUEUE_INDEX] + .add_used(&*mem, head_index, 0) + .map_err(Error::QueueAddUsed)?; + self.signal(VirtioInterruptType::Queue(STATS_QUEUE_INDEX as u16))?; + } else if let Some(desc_chain) = + self.queues[STATS_QUEUE_INDEX].pop_descriptor_chain(self.mem.memory()) + { + self.queues[STATS_QUEUE_INDEX] + .add_used(desc_chain.memory(), desc_chain.head_index(), 0) + .map_err(Error::QueueAddUsed)?; + self.signal(VirtioInterruptType::Queue(STATS_QUEUE_INDEX as u16))?; + } else { + log::warn!("Stats queue empty: guest hasn't placed init buffer yet"); + } + Ok(()) + } + fn run( &mut self, paused: &AtomicBool, @@ -348,6 +515,12 @@ impl BalloonEpollHandler { let mut helper = EpollHelper::new(&self.kill_evt, &self.pause_evt)?; helper.add_event(self.inflate_queue_evt.as_raw_fd(), INFLATE_QUEUE_EVENT)?; helper.add_event(self.deflate_queue_evt.as_raw_fd(), DEFLATE_QUEUE_EVENT)?; + if let Some(stats_queue_evt) = self.stats_queue_evt.as_ref() { + helper.add_event(stats_queue_evt.as_raw_fd(), STATS_QUEUE_EVENT)?; + } + if let Some(stats_request_evt) = self.stats_request_evt.as_ref() { + helper.add_event(stats_request_evt.as_raw_fd(), STATS_REQUEST_EVENT)?; + } if let Some(reporting_queue_evt) = self.reporting_queue_evt.as_ref() { helper.add_event(reporting_queue_evt.as_raw_fd(), REPORTING_QUEUE_EVENT)?; } @@ -389,6 +562,40 @@ impl EpollHelperHandler for BalloonEpollHandler { )) })?; } + STATS_QUEUE_EVENT => { + if let Some(stats_queue_evt) = self.stats_queue_evt.as_ref() { + stats_queue_evt.read().map_err(|e| { + EpollHelperError::HandleEvent(anyhow!( + "Failed to get stats queue event: {e:?}" + )) + })?; + self.process_stats_queue().map_err(|e| { + EpollHelperError::HandleEvent(anyhow!( + "Failed to process stats queue: {e:?}" + )) + })?; + } else { + return Err(EpollHelperError::HandleEvent(anyhow!( + "Invalid stats queue event as no eventfd registered" + ))); + } + } + STATS_REQUEST_EVENT => { + if let Some(stats_request_evt) = self.stats_request_evt.as_ref() { + stats_request_evt.read().map_err(|e| { + EpollHelperError::HandleEvent(anyhow!( + "Failed to get stats request event: {e:?}" + )) + })?; + self.request_stats().map_err(|e| { + EpollHelperError::HandleEvent(anyhow!("Failed to request stats: {e:?}")) + })?; + } else { + return Err(EpollHelperError::HandleEvent(anyhow!( + "Invalid stats request event as no eventfd registered" + ))); + } + } REPORTING_QUEUE_EVENT => { if let Some(reporting_queue_evt) = self.reporting_queue_evt.as_ref() { reporting_queue_evt.read().map_err(|e| { @@ -396,7 +603,7 @@ impl EpollHelperHandler for BalloonEpollHandler { "Failed to get reporting queue event: {e:?}" )) })?; - self.process_reporting_queue(2).map_err(|e| { + self.process_reporting_queue(3).map_err(|e| { EpollHelperError::HandleEvent(anyhow!( "Failed to signal used inflate queue: {e:?}" )) @@ -433,6 +640,8 @@ pub struct Balloon { seccomp_action: SeccompAction, exit_evt: EventFd, interrupt_cb: Option>, + stats_state: SharedStatsState, + stats_request_evt: EventFd, } impl Balloon { @@ -458,6 +667,7 @@ impl Balloon { ) } else { let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; + avail_features |= 1u64 << VIRTIO_BALLOON_F_STATS_VQ; if deflate_on_oom { avail_features |= 1u64 << VIRTIO_BALLOON_F_DEFLATE_ON_OOM; } @@ -473,6 +683,8 @@ impl Balloon { (avail_features, 0, config, false) }; + queue_sizes.push(STATS_QUEUE_SIZE); + if free_page_reporting { queue_sizes.push(REPORTING_QUEUE_SIZE); } @@ -493,9 +705,32 @@ impl Balloon { seccomp_action, exit_evt, interrupt_cb: None, + stats_state: Arc::new(( + Mutex::new(BalloonStatsState { + stats: None, + pending: false, + generation: 0, + }), + Condvar::new(), + )), + stats_request_evt: EventFd::new(libc::EFD_NONBLOCK) + .map_err(|e| io::Error::other(format!("Failed to create stats EventFd: {e}")))?, }) } + pub fn stats_resources(&self) -> Option<(SharedStatsState, EventFd)> { + if self.common.feature_acked(VIRTIO_BALLOON_F_STATS_VQ) { + Some(( + self.stats_state.clone(), + self.stats_request_evt + .try_clone() + .expect("failed to clone stats_request_evt"), + )) + } else { + None + } + } + pub fn resize(&mut self, size: u64) -> Result<(), Error> { self.config.num_pages = (size >> VIRTIO_BALLOON_PFN_SHIFT) as u32; @@ -513,6 +748,11 @@ impl Balloon { (self.config.actual as u64) << VIRTIO_BALLOON_PFN_SHIFT } + // Get the target size of the virtio-balloon. + pub fn get_target(&self) -> u64 { + (self.config.num_pages as u64) << VIRTIO_BALLOON_PFN_SHIFT + } + fn state(&self) -> BalloonState { BalloonState { avail_features: self.common.avail_features, @@ -606,6 +846,18 @@ impl VirtioDevice for Balloon { let (_, queue, queue_evt) = queues.remove(0); virtqueues.push(queue); let deflate_queue_evt = queue_evt; + let (stats_queue_evt, stats_request_evt) = + if self.common.feature_acked(VIRTIO_BALLOON_F_STATS_VQ) && !queues.is_empty() { + let (_, queue, queue_evt) = queues.remove(0); + virtqueues.push(queue); + let req_evt = self + .stats_request_evt + .try_clone() + .map_err(crate::ActivateError::CloneExitEventFd)?; + (Some(queue_evt), Some(req_evt)) + } else { + (None, None) + }; let reporting_queue_evt = if self.common.feature_acked(VIRTIO_BALLOON_F_REPORTING) && !queues.is_empty() { let (_, queue, queue_evt) = queues.remove(0); @@ -623,6 +875,11 @@ impl VirtioDevice for Balloon { interrupt_cb, inflate_queue_evt, deflate_queue_evt, + stats_queue_evt, + stats_request_evt, + stats_state: self.stats_state.clone(), + stats_desc_index: None, + stats_generation: 0, reporting_queue_evt, kill_evt, pause_evt, @@ -675,3 +932,25 @@ impl Snapshottable for Balloon { } impl Transportable for Balloon {} impl Migratable for Balloon {} + +pub fn request_balloon_statistics( + stats_state: &SharedStatsState, + request_evt: &EventFd, +) -> result::Result { + let (mutex, condvar) = stats_state.as_ref(); + let mut state = mutex.lock().unwrap(); + state.generation = state.generation.wrapping_add(1); + let expected_gen = state.generation; + state.pending = true; + state.stats = None; + request_evt.write(1).map_err(Error::EventFdWriteFail)?; + let (state, timeout_result) = condvar + .wait_timeout_while(state, Duration::from_millis(500), |s| { + s.pending && s.generation == expected_gen + }) + .unwrap(); + if timeout_result.timed_out() || state.generation != expected_gen { + return Err(Error::StatsTimeout); + } + state.stats.clone().ok_or(Error::StatsTimeout) +} diff --git a/virtio-devices/src/lib.rs b/virtio-devices/src/lib.rs index 7efc4bf8c..a1c7d2e00 100644 --- a/virtio-devices/src/lib.rs +++ b/virtio-devices/src/lib.rs @@ -39,6 +39,7 @@ use vm_memory::{GuestAddress, GuestMemory}; use vm_virtio::VirtioDeviceType; pub use self::balloon::Balloon; +pub use self::balloon::BalloonStatistics; pub use self::block::{Block, BlockState}; pub use self::console::{Console, ConsoleResizer, Endpoint}; pub use self::device::{ diff --git a/vmm/src/api/dbus/mod.rs b/vmm/src/api/dbus/mod.rs index 6f75fb5cd..36ab073ce 100644 --- a/vmm/src/api/dbus/mod.rs +++ b/vmm/src/api/dbus/mod.rs @@ -23,9 +23,9 @@ use super::{ApiAction, ApiRequest}; use crate::api::VmCoredump; use crate::api::{ AddDisk, Body, VmAddDevice, VmAddFs, VmAddNet, VmAddPmem, VmAddUserDevice, VmAddVdpa, - VmAddVsock, VmBoot, VmCounters, VmCreate, VmDelete, VmInfo, VmPause, VmPowerButton, VmReboot, - VmReceiveMigration, VmRemoveDevice, VmResize, VmResizeZone, VmRestore, VmResume, - VmSendMigration, VmShutdown, VmSnapshot, VmmPing, VmmShutdown, + VmAddVsock, VmBalloonStatistics, VmBoot, VmCounters, VmCreate, VmDelete, VmInfo, VmPause, + VmPowerButton, VmReboot, VmReceiveMigration, VmRemoveDevice, VmResize, VmResizeZone, VmRestore, + VmResume, VmSendMigration, VmShutdown, VmSnapshot, VmmPing, VmmShutdown, }; use crate::seccomp_filters::{Thread, get_seccomp_filter}; use crate::{Error as VmmError, NetConfig, Result as VmmResult, VmConfig}; @@ -200,6 +200,10 @@ impl DBusApi { self.vm_action(&VmCounters, ()).await } + async fn vm_balloon_statistics(&self) -> Result> { + self.vm_action(&VmBalloonStatistics, ()).await + } + async fn vm_create(&self, vm_config: String) -> Result<()> { let api_sender = self.clone_api_sender().await; let api_notifier = self.clone_api_notifier()?; diff --git a/vmm/src/api/http/http_endpoint.rs b/vmm/src/api/http/http_endpoint.rs index e463a2081..9e7d9b6d5 100644 --- a/vmm/src/api/http/http_endpoint.rs +++ b/vmm/src/api/http/http_endpoint.rs @@ -46,12 +46,14 @@ use crate::api::http::http_endpoint::fds_helper::{attach_fds_to_cfg, attach_fds_ use crate::api::http::{EndpointHandler, HttpError, error_response}; use crate::api::{ AddDisk, ApiAction, ApiError, ApiRequest, NetConfig, VmAddDevice, VmAddFs, VmAddNet, VmAddPmem, - VmAddUserDevice, VmAddVdpa, VmAddVsock, VmBoot, VmConfig, VmCounters, VmDelete, VmNmi, VmPause, - VmPowerButton, VmReboot, VmReceiveMigration, VmRemoveDevice, VmResize, VmResizeDisk, - VmResizeZone, VmRestore, VmResume, VmSendMigration, VmShutdown, VmSnapshot, + VmAddUserDevice, VmAddVdpa, VmAddVsock, VmBalloonStatistics, VmBoot, VmConfig, VmCounters, + VmDelete, VmNmi, VmPause, VmPowerButton, VmReboot, VmReceiveMigration, VmRemoveDevice, + VmResize, VmResizeDisk, VmResizeZone, VmRestore, VmResume, VmSendMigration, VmShutdown, + VmSnapshot, }; use crate::config::RestoreConfig; use crate::cpu::Error as CpuError; +use crate::device_manager::DeviceManagerError; use crate::vm::Error as VmError; /// Helper module for attaching externally opened FDs to config objects. @@ -485,6 +487,25 @@ impl PutHandler for VmResize { impl GetHandler for VmResize {} +impl GetHandler for VmBalloonStatistics { + fn handle_request( + &'static self, + api_notifier: EventFd, + api_sender: Sender, + ) -> std::result::Result, HttpError> { + self.send(api_notifier, api_sender, ()) + .map_err(|e| match e { + ApiError::VmBalloonStatistics(VmError::DeviceManager( + DeviceManagerError::MissingVirtioBalloon, + )) => HttpError::NotFound, + ApiError::VmBalloonStatistics(_) => HttpError::ServiceUnavailable, + _ => HttpError::ApiError(e), + }) + } +} + +impl PutHandler for VmBalloonStatistics {} + // Special handling for virtio-net devices backed by network FDs. // See module description for more info. impl PutHandler for VmRestore { diff --git a/vmm/src/api/http/mod.rs b/vmm/src/api/http/mod.rs index 2aa52e8e3..d354db92b 100644 --- a/vmm/src/api/http/mod.rs +++ b/vmm/src/api/http/mod.rs @@ -29,9 +29,9 @@ use self::http_endpoint::{VmActionHandler, VmCreate, VmInfo, VmmPing, VmmShutdow use crate::api::VmCoredump; use crate::api::{ AddDisk, ApiError, ApiRequest, VmAddDevice, VmAddFs, VmAddNet, VmAddPmem, VmAddUserDevice, - VmAddVdpa, VmAddVsock, VmBoot, VmCounters, VmDelete, VmNmi, VmPause, VmPowerButton, VmReboot, - VmReceiveMigration, VmRemoveDevice, VmResize, VmResizeDisk, VmResizeZone, VmRestore, VmResume, - VmSendMigration, VmShutdown, VmSnapshot, + VmAddVdpa, VmAddVsock, VmBalloonStatistics, VmBoot, VmCounters, VmDelete, VmNmi, VmPause, + VmPowerButton, VmReboot, VmReceiveMigration, VmRemoveDevice, VmResize, VmResizeDisk, + VmResizeZone, VmRestore, VmResume, VmSendMigration, VmShutdown, VmSnapshot, }; use crate::landlock::Landlock; use crate::seccomp_filters::{Thread, get_seccomp_filter}; @@ -64,6 +64,10 @@ pub enum HttpError { #[error("Internal Server Error")] InternalServerError, + /// Service Unavailable + #[error("Service Unavailable")] + ServiceUnavailable, + /// Error from internal API #[error("Error from API")] ApiError(#[source] ApiError), @@ -138,6 +142,10 @@ pub trait EndpointHandler { error_response(e, StatusCode::BadRequest) } Err(e @ HttpError::TooManyRequests) => error_response(e, StatusCode::TooManyRequests), + Err(e @ HttpError::NotFound) => error_response(e, StatusCode::NotFound), + Err(e @ HttpError::ServiceUnavailable) => { + error_response(e, StatusCode::ServiceUnavailable) + } Err(e) => error_response(e, StatusCode::InternalServerError), } } @@ -220,6 +228,10 @@ pub static HTTP_ROUTES: LazyLock = LazyLock::new(|| { endpoint!("/vm.counters"), Box::new(VmActionHandler::new(&VmCounters)), ); + r.routes.insert( + endpoint!("/vm.balloon-statistics"), + Box::new(VmActionHandler::new(&VmBalloonStatistics)), + ); r.routes .insert(endpoint!("/vm.create"), Box::new(VmCreate {})); r.routes.insert( diff --git a/vmm/src/api/mod.rs b/vmm/src/api/mod.rs index 18e66b96b..837740328 100644 --- a/vmm/src/api/mod.rs +++ b/vmm/src/api/mod.rs @@ -205,6 +205,10 @@ pub enum ApiError { /// Error triggering NMI #[error("Error triggering NMI")] VmNmi(#[source] VmError), + + /// Failed to get balloon statistics + #[error("Failed to get balloon statistics")] + VmBalloonStatistics(#[source] VmError), } pub type ApiResult = Result; @@ -356,6 +360,8 @@ pub trait RequestHandler { fn vm_counters(&mut self) -> Result>, VmError>; + fn vm_balloon_statistics(&mut self) -> Result>, VmError>; + fn vm_power_button(&mut self) -> Result<(), VmError>; fn vm_receive_migration( @@ -872,6 +878,45 @@ impl ApiAction for VmCounters { } } +#[derive(Clone, Debug, Serialize)] +pub struct BalloonStatisticsResponse { + #[serde(flatten)] + pub stats: virtio_devices::BalloonStatistics, + pub balloon_actual_bytes: u64, + pub balloon_target_bytes: u64, + pub balloon_total_ram_bytes: u64, +} + +pub struct VmBalloonStatistics; + +impl ApiAction for VmBalloonStatistics { + type RequestBody = (); + type ResponseBody = Option; + + fn request(&self, _: Self::RequestBody, response_sender: Sender) -> ApiRequest { + Box::new(move |vmm| { + info!("API request event: VmBalloonStatistics"); + let response = vmm + .vm_balloon_statistics() + .map_err(ApiError::VmBalloonStatistics) + .map(ApiResponsePayload::VmAction); + response_sender + .send(response) + .map_err(VmmError::ApiResponseSend)?; + Ok(false) + }) + } + + fn send( + &self, + api_evt: EventFd, + api_sender: Sender, + data: Self::RequestBody, + ) -> ApiResult { + get_response_body(self, api_evt, api_sender, data) + } +} + pub struct VmCreate; impl ApiAction for VmCreate { @@ -1538,3 +1583,41 @@ impl ApiAction for VmNmi { get_response_body(self, api_evt, api_sender, data) } } + +#[cfg(test)] +mod tests { + use super::*; + use virtio_devices::BalloonStatistics; + + #[test] + fn test_balloon_statistics_response_serialization() { + let response = BalloonStatisticsResponse { + stats: BalloonStatistics { + free_memory: Some(1024), + total_memory: Some(4096), + ..Default::default() + }, + balloon_actual_bytes: 2048, + balloon_target_bytes: 2048, + balloon_total_ram_bytes: 4096, + }; + + let json = serde_json::to_value(&response).unwrap(); + + // Verify flatten works: guest stats appear at top level, not nested + assert_eq!(json["free_memory"], 1024); + assert_eq!(json["total_memory"], 4096); + + // Verify new fields are present + assert_eq!(json["balloon_actual_bytes"], 2048); + assert_eq!(json["balloon_target_bytes"], 2048); + assert_eq!(json["balloon_total_ram_bytes"], 4096); + + // Verify None fields are omitted (skip_serializing_if works with flatten) + assert!(json.get("swap_in").is_none()); + assert!(json.get("major_faults").is_none()); + + // Verify no nested "stats" key + assert!(json.get("stats").is_none()); + } +} diff --git a/vmm/src/api/openapi/cloud-hypervisor.yaml b/vmm/src/api/openapi/cloud-hypervisor.yaml index 01c1b9ed9..7011dc50e 100644 --- a/vmm/src/api/openapi/cloud-hypervisor.yaml +++ b/vmm/src/api/openapi/cloud-hypervisor.yaml @@ -52,6 +52,22 @@ paths: schema: $ref: "#/components/schemas/VmCounters" + /vm.balloon-statistics: + get: + summary: Get balloon statistics from the VM + operationId: vm.balloon-statistics + responses: + 200: + description: The balloon statistics + content: + application/json: + schema: + $ref: '#/components/schemas/BalloonStatistics' + 404: + description: No balloon device configured + 503: + description: Balloon statistics not available + /vm.create: put: summary: Create the cloud-hypervisor Virtual Machine (VM) instance. The instance is not booted, only created. @@ -1029,6 +1045,72 @@ components: default: false description: Enable guest to report free pages. + BalloonStatistics: + type: object + properties: + swap_in: + description: Amount of memory swapped in (bytes) + type: integer + format: int64 + nullable: true + swap_out: + description: Amount of memory swapped out (bytes) + type: integer + format: int64 + nullable: true + major_faults: + description: Number of major page faults + type: integer + format: int64 + nullable: true + minor_faults: + description: Number of minor page faults + type: integer + format: int64 + nullable: true + free_memory: + description: Amount of free memory (bytes) + type: integer + format: int64 + nullable: true + total_memory: + description: Total memory available (bytes) + type: integer + format: int64 + nullable: true + available_memory: + description: Estimated available memory for new applications (bytes) + type: integer + format: int64 + nullable: true + disk_caches: + description: Amount of memory used for disk caches (bytes) + type: integer + format: int64 + nullable: true + hugetlb_allocations: + description: Number of successful hugetlb page allocations + type: integer + format: int64 + nullable: true + hugetlb_failures: + description: Number of failed hugetlb page allocations + type: integer + format: int64 + nullable: true + balloon_actual_bytes: + description: Current balloon size in bytes (guest-confirmed) + type: integer + format: int64 + balloon_target_bytes: + description: Target balloon size in bytes (host-requested) + type: integer + format: int64 + balloon_total_ram_bytes: + description: Total VM RAM in bytes (includes hotplugged memory) + type: integer + format: int64 + FsConfig: required: - num_queues diff --git a/vmm/src/device_manager.rs b/vmm/src/device_manager.rs index 40f27d4ff..abd0aff14 100644 --- a/vmm/src/device_manager.rs +++ b/vmm/src/device_manager.rs @@ -116,6 +116,7 @@ use vm_migration::{ use vm_virtio::{AccessPlatform, VirtioDeviceType}; use vmm_sys_util::eventfd::EventFd; +use crate::api::BalloonStatisticsResponse; use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput}; use crate::cpu::{CPU_MANAGER_ACPI_SIZE, CpuManager}; use crate::device_tree::{DeviceNode, DeviceTree}; @@ -556,6 +557,11 @@ pub enum DeviceManagerError { #[error("Missing virtio-balloon, can't proceed as expected")] MissingVirtioBalloon, + #[error("Balloon statistics feature not negotiated by guest")] + VirtioBalloonStatsNotNegotiated, + #[error("Failed to get balloon statistics")] + VirtioBalloonStatistics(#[source] virtio_devices::balloon::Error), + /// Missing virtual IOMMU device #[error("Missing virtual IOMMU device")] MissingVirtualIommu, @@ -3882,8 +3888,11 @@ impl DeviceManager { vfio_container }; - let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container) as Arc) - .map_err(DeviceManagerError::VfioCreate)?; + let vfio_device = VfioDevice::new( + &device_cfg.path, + Arc::clone(&vfio_container) as Arc, + ) + .map_err(DeviceManagerError::VfioCreate)?; if needs_dma_mapping { // Register DMA mapping in IOMMU. @@ -5081,6 +5090,37 @@ impl DeviceManager { Err(DeviceManagerError::MissingVirtioBalloon) } + pub fn balloon_statistics(&self) -> DeviceManagerResult { + if let Some(balloon) = &self.balloon { + let balloon_locked = balloon.lock().unwrap(); + + let (stats_state, request_evt) = balloon_locked + .stats_resources() + .ok_or(DeviceManagerError::VirtioBalloonStatsNotNegotiated)?; + + let balloon_actual_bytes = balloon_locked.get_actual(); + let balloon_target_bytes = balloon_locked.get_target(); + + // Drop the lock before the blocking stats request + drop(balloon_locked); + + let stats = + virtio_devices::balloon::request_balloon_statistics(&stats_state, &request_evt) + .map_err(DeviceManagerError::VirtioBalloonStatistics)?; + + let balloon_total_ram_bytes = self.config.lock().unwrap().memory.total_size(); + + Ok(BalloonStatisticsResponse { + stats, + balloon_actual_bytes, + balloon_target_bytes, + balloon_total_ram_bytes, + }) + } else { + Err(DeviceManagerError::MissingVirtioBalloon) + } + } + pub fn balloon_size(&self) -> u64 { if let Some(balloon) = &self.balloon { return balloon.lock().unwrap().get_actual(); diff --git a/vmm/src/lib.rs b/vmm/src/lib.rs index 1d5109d5b..78ea74fb0 100644 --- a/vmm/src/lib.rs +++ b/vmm/src/lib.rs @@ -2266,6 +2266,19 @@ impl RequestHandler for Vmm { } } + fn vm_balloon_statistics(&mut self) -> result::Result>, VmError> { + if let Some(ref mut vm) = self.vm { + let stats = vm.balloon_statistics().inspect_err(|e| { + error!("Error getting balloon statistics: {e:?}"); + })?; + serde_json::to_vec(&stats) + .map(Some) + .map_err(VmError::SerializeJson) + } else { + Err(VmError::VmNotRunning) + } + } + fn vm_power_button(&mut self) -> result::Result<(), VmError> { if let Some(ref mut vm) = self.vm { vm.power_button() diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index b32b3f5d6..e8b4930f5 100644 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -66,9 +66,7 @@ use tracer::trace_scoped; use vm_device::Bus; #[cfg(feature = "tdx")] use vm_memory::{Address, ByteValued, GuestMemoryRegion, ReadVolatile}; -use vm_memory::{ - Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, WriteVolatile, -}; +use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic, WriteVolatile}; use vm_migration::protocol::{MemoryRangeTable, Request, Response}; use vm_migration::{ Migratable, MigratableError, Pausable, Snapshot, Snapshottable, Transportable, snapshot_from_id, @@ -2258,6 +2256,14 @@ impl Vm { Ok(self.device_manager.lock().unwrap().counters()) } + pub fn balloon_statistics(&self) -> Result { + self.device_manager + .lock() + .unwrap() + .balloon_statistics() + .map_err(Error::DeviceManager) + } + #[cfg(feature = "tdx")] fn extract_tdvf_sections(&mut self) -> Result<(Vec, bool)> { use arch::x86_64::tdx::*;