diff --git a/block/src/lib.rs b/block/src/lib.rs index 72210302a..f6c990ce7 100644 --- a/block/src/lib.rs +++ b/block/src/lib.rs @@ -42,7 +42,7 @@ use std::{cmp, result}; #[cfg(feature = "io_uring")] use io_uring::{IoUring, Probe, opcode}; use libc::{S_IFBLK, S_IFMT, ioctl}; -use log::{error, info, warn}; +use log::{debug, error, info, warn}; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; use thiserror::Error; @@ -684,6 +684,109 @@ pub fn block_io_uring_is_supported() -> bool { } } +/// Probe whether the file/device supports punch hole and zero range +pub fn probe_sparse_support(file: &File) -> bool { + let fd = file.as_raw_fd(); + + let is_block_device = { + let mut stat = std::mem::MaybeUninit::::uninit(); + // SAFETY: FFI call with valid fd and buffer + let ret = unsafe { libc::fstat(fd, stat.as_mut_ptr()) }; + if ret != 0 { + warn!( + "Failed to stat file descriptor for sparse probe: {}", + io::Error::last_os_error() + ); + return false; + } + // SAFETY: stat result is valid at this point + unsafe { (*stat.as_ptr()).st_mode & S_IFMT == S_IFBLK } + }; + + if is_block_device { + probe_block_device_sparse_support(fd) + } else { + probe_file_sparse_support(fd) + } +} + +/// Probe sparse support for a regular file using fallocate(). +fn probe_file_sparse_support(fd: libc::c_int) -> bool { + const FALLOC_FL_KEEP_SIZE: libc::c_int = 0x01; + const FALLOC_FL_PUNCH_HOLE: libc::c_int = 0x02; + const FALLOC_FL_ZERO_RANGE: libc::c_int = 0x10; + + // SAFETY: FFI call with valid fd + let file_size = unsafe { libc::lseek(fd, 0, libc::SEEK_END) }; + if file_size < 0 { + let err = io::Error::last_os_error(); + warn!("Failed to get file size for sparse probe: {err}"); + return false; + } + + // SAFETY: FFI call with valid fd, probing past EOF is safe with KEEP_SIZE + let punch_hole = + unsafe { libc::fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, file_size, 1) } + == 0; + + if !punch_hole { + let err = io::Error::last_os_error(); + if err.raw_os_error() == Some(libc::EOPNOTSUPP) { + debug!("File does not support FALLOC_FL_PUNCH_HOLE: {err}"); + } else { + debug!("PUNCH_HOLE probe returned unexpected error: {err}"); + } + } + + // SAFETY: FFI call with valid fd, probing past EOF is safe with KEEP_SIZE + let zero_range = + unsafe { libc::fallocate(fd, FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE, file_size, 1) } + == 0; + + if !zero_range { + let err = io::Error::last_os_error(); + if err.raw_os_error() == Some(libc::EOPNOTSUPP) { + debug!("File does not support FALLOC_FL_ZERO_RANGE: {err}"); + } + } + + let supported = punch_hole || zero_range; + info!( + "Probed file sparse support: punch_hole={punch_hole}, zero_range={zero_range} => {supported}" + ); + supported +} + +/// Probe sparse support for a block device using ioctls. +fn probe_block_device_sparse_support(fd: libc::c_int) -> bool { + ioctl_io_nr!(BLKDISCARD, 0x12, 119); + ioctl_io_nr!(BLKZEROOUT, 0x12, 127); + + let range: [u64; 2] = [0, 0]; + + // SAFETY: FFI call with valid fd and valid range buffer + let punch_hole = unsafe { ioctl(fd, BLKDISCARD() as _, &range) } == 0; + + if !punch_hole { + let err = io::Error::last_os_error(); + debug!("Block device BLKDISCARD probe returned: {err}"); + } + + // SAFETY: FFI call with valid fd and valid range buffer + let zero_range = unsafe { ioctl(fd, BLKZEROOUT() as _, &range) } == 0; + + if !zero_range { + let err = io::Error::last_os_error(); + debug!("Block device BLKZEROOUT probe returned: {err}"); + } + + let supported = punch_hole || zero_range; + info!( + "Probed block device sparse support: punch_hole={punch_hole}, zero_range={zero_range} => {supported}" + ); + supported +} + pub trait AsyncAdaptor { fn read_vectored_sync( &mut self, diff --git a/vmm/src/seccomp_filters.rs b/vmm/src/seccomp_filters.rs index 55e153111..bb40b99f6 100644 --- a/vmm/src/seccomp_filters.rs +++ b/vmm/src/seccomp_filters.rs @@ -108,6 +108,10 @@ mod kvm { pub const KVM_SET_NESTED_STATE: u64 = 1082175167; } +// Block device ioctls for sparse support probing (not exported by libc) +const BLKDISCARD: u64 = 0x1277; // _IO(0x12, 119) +const BLKZEROOUT: u64 = 0x127f; // _IO(0x12, 127) + // MSHV IOCTL code. This is unstable until the kernel code has been declared stable. #[cfg(feature = "mshv")] use hypervisor::mshv::mshv_ioctls::*; @@ -259,6 +263,8 @@ fn create_vmm_ioctl_seccomp_rule_common( and![Cond::new(1, ArgLen::Dword, Eq, BLKPBSZGET as _)?], and![Cond::new(1, ArgLen::Dword, Eq, BLKIOMIN as _)?], and![Cond::new(1, ArgLen::Dword, Eq, BLKIOOPT as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, BLKDISCARD as _)?], + and![Cond::new(1, ArgLen::Dword, Eq, BLKZEROOUT as _)?], and![Cond::new(1, ArgLen::Dword, Eq, FIOCLEX as _)?], and![Cond::new(1, ArgLen::Dword, Eq, FIONBIO as _)?], and![Cond::new(1, ArgLen::Dword, Eq, SIOCGIFFLAGS)?],