diff --git a/hypervisor/src/arch/x86/mod.rs b/hypervisor/src/arch/x86/mod.rs index b1b2957f6..78e4d7cc5 100644 --- a/hypervisor/src/arch/x86/mod.rs +++ b/hypervisor/src/arch/x86/mod.rs @@ -315,12 +315,19 @@ pub struct MsrEntry { pub struct XsaveState { #[serde_as(as = "[_; 1024usize]")] pub region: [u32; 1024usize], + + // extra data to support xsave2 + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub extra: Vec, } impl Default for XsaveState { fn default() -> Self { - // SAFETY: this is plain old data structure - unsafe { ::std::mem::zeroed() } + Self { + // SAFETY: this is plain old data structure + region: unsafe { std::mem::zeroed() }, + extra: Vec::new(), + } } } diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 0398f0136..259009151 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -107,11 +107,13 @@ use kvm_bindings::{ use kvm_bindings::{KVM_REG_RISCV_CORE, kvm_riscv_core}; #[cfg(feature = "tdx")] use kvm_bindings::{KVM_X86_DEFAULT_VM, KVM_X86_SW_PROTECTED_VM, KVMIO, kvm_run__bindgen_ty_1}; +#[cfg(target_arch = "x86_64")] +use kvm_bindings::{Xsave as xsave2, kvm_xsave2}; pub use kvm_ioctls::{Cap, Kvm, VcpuExit}; use thiserror::Error; use vfio_ioctls::VfioDeviceFd; #[cfg(target_arch = "x86_64")] -use vmm_sys_util::ioctl_io_nr; +use vmm_sys_util::{fam::FamStruct, ioctl_io_nr}; #[cfg(feature = "tdx")] use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_iowr_nr}; pub use {kvm_bindings, kvm_ioctls}; @@ -120,6 +122,9 @@ pub use {kvm_bindings, kvm_ioctls}; use crate::RegList; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::regs; +#[cfg(target_arch = "x86_64")] +use crate::kvm::x86_64::XsaveStateError; + #[cfg(target_arch = "x86_64")] ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); @@ -564,6 +569,17 @@ impl vm::Vm for KvmVm { .fd .create_vcpu(id as u64) .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; + + #[cfg(target_arch = "x86_64")] + // Safety: `xsave_size` will not change after vcpu creation because: + // 1. `xsave_size` depends on cpuid + // 2. The only factor that affects cpuid is xsave permission, obtained via + // `ARCH_GET_XCOMP_GUEST_PERM` + // 3. This permission is already acquired before vcpu creation + // Therefore, cpuid remains unchanged after vcpu creation, and so does `xsave_size`. + // + // First vCPU allocation locks the permissions of `ARCH_GET_XCOMP_GUEST_PERM`. + let xsave_size = self.fd.check_extension_int(Cap::Xsave2); let vcpu = KvmVcpu { fd, #[cfg(target_arch = "x86_64")] @@ -571,6 +587,8 @@ impl vm::Vm for KvmVm { vm_ops, #[cfg(target_arch = "x86_64")] hyperv_synic: AtomicBool::new(false), + #[cfg(target_arch = "x86_64")] + xsave_size, }; Ok(Box::new(vcpu)) } @@ -1332,6 +1350,8 @@ pub struct KvmVcpu { vm_ops: Option>, #[cfg(target_arch = "x86_64")] hyperv_synic: AtomicBool, + #[cfg(target_arch = "x86_64")] + xsave_size: i32, } /// Implementation of Vcpu trait for KVM @@ -2329,7 +2349,11 @@ impl cpu::Vcpu for KvmVcpu { let mp_state = self.get_mp_state()?.into(); let regs = self.get_regs()?; let sregs = self.get_sregs()?; - let xsave = self.get_xsave()?; + let xsave = if self.xsave_size > 0 { + self.get_xsave2()? + } else { + self.get_xsave()? + }; let xcrs = self.get_xcrs()?; let lapic_state = self.get_lapic()?; let fpu = self.get_fpu()?; @@ -2566,7 +2590,11 @@ impl cpu::Vcpu for KvmVcpu { self.set_mp_state(state.mp_state.into())?; self.set_regs(&state.regs.into())?; self.set_sregs(&state.sregs.into())?; - self.set_xsave(&state.xsave)?; + if self.xsave_size > 0 { + self.set_xsave2(&state.xsave)?; + } else { + self.set_xsave(&state.xsave)?; + } self.set_xcrs(&state.xcrs)?; self.set_lapic(&state.lapic_state)?; self.set_fpu(&state.fpu)?; @@ -2856,7 +2884,10 @@ impl KvmVcpu { /// X86 specific call that sets the vcpu's current "xsave struct". /// fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { - let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); + let xsave: kvm_bindings::kvm_xsave = (*xsave) + .clone() + .try_into() + .map_err(|e: XsaveStateError| cpu::HypervisorCpuError::GetXsaveState(e.into()))?; // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct // when calling the kvm-ioctl library function. unsafe { @@ -2866,6 +2897,53 @@ impl KvmVcpu { } } + #[cfg(target_arch = "x86_64")] + /// X86 specific call that returns the vcpu's current "xsave struct" using the extended + /// xsave2 interface which supports larger state buffers (>4KB) for features like Intel AMX. + /// + /// This method requires KVM_CAP_XSAVE2 capability and uses KVM_GET_XSAVE2 ioctl. + /// The xsave parameter must be allocated with sufficient size based on the value + /// returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2). + pub fn get_xsave2(&self) -> cpu::Result { + assert!( + self.xsave_size > 0, + "'xsave_size' must be initialized via 'KVM_CAP_XSAVE2' first" + ); + let fam_size = (self.xsave_size as usize - size_of::()) + .div_ceil(size_of::<::Entry>()); + let mut xsave = + xsave2::new(fam_size).map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))?; + // SAFETY: The caller guarantees that xsave is allocated with enough space + unsafe { + self.fd + .get_xsave2(&mut xsave) + .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))?; + } + Ok((&xsave).into()) + } + + #[cfg(target_arch = "x86_64")] + /// X86 specific call that sets the vcpu's current "xsave struct" using the extended + /// xsave2 interface which supports larger state buffers (>4KB) for features like Intel AMX. + /// + /// This method uses KVM_SET_XSAVE ioctl but with extended buffer support when + /// KVM_CAP_XSAVE2 is available. + pub fn set_xsave2(&self, xsave_state: &XsaveState) -> cpu::Result<()> { + assert!( + self.xsave_size > 0, + "'xsave_size' must be initialized via 'KVM_CAP_XSAVE2' first" + ); + let xsave = xsave_state + .to_xsave2() + .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))?; + // SAFETY: The caller guarantees that xsave contains valid data + unsafe { + self.fd + .set_xsave2(&xsave) + .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) + } + } + #[cfg(target_arch = "x86_64")] /// /// X86 specific call that returns the vcpu's current "xcrs". diff --git a/hypervisor/src/kvm/x86_64/mod.rs b/hypervisor/src/kvm/x86_64/mod.rs index 39db4a994..e338346c3 100644 --- a/hypervisor/src/kvm/x86_64/mod.rs +++ b/hypervisor/src/kvm/x86_64/mod.rs @@ -8,14 +8,17 @@ // // +use log::error; use serde::{Deserialize, Serialize}; +use thiserror::Error; /// /// Export generically-named wrappers of kvm-bindings for Unix-based platforms /// pub use { kvm_bindings::CpuId, kvm_bindings::KVM_CPUID_FLAG_SIGNIFCANT_INDEX, kvm_bindings::MsrList, - kvm_bindings::Msrs as MsrEntries, kvm_bindings::kvm_cpuid_entry2, kvm_bindings::kvm_dtable, - kvm_bindings::kvm_fpu, kvm_bindings::kvm_lapic_state, kvm_bindings::kvm_mp_state as MpState, + kvm_bindings::Msrs as MsrEntries, kvm_bindings::Xsave as xsave2, + kvm_bindings::kvm_cpuid_entry2, kvm_bindings::kvm_dtable, kvm_bindings::kvm_fpu, + kvm_bindings::kvm_lapic_state, kvm_bindings::kvm_mp_state as MpState, kvm_bindings::kvm_msr_entry, kvm_bindings::kvm_regs, kvm_bindings::kvm_segment, kvm_bindings::kvm_sregs, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_bindings::kvm_xcrs as ExtendedControlRegisters, kvm_bindings::kvm_xsave, @@ -294,17 +297,65 @@ impl From for kvm_msr_entry { } } -impl From for XsaveState { - fn from(s: kvm_xsave) -> Self { - Self { region: s.region } - } +#[derive(Error, Debug)] +pub enum XsaveStateError { + #[error("kvm_xsave extra field is not empty")] + XsaveExtraFieldNotEmpty, } -impl From for kvm_xsave { - fn from(s: XsaveState) -> Self { +impl From for XsaveState { + fn from(value: kvm_xsave) -> Self { + // Check if kvm_xsave struct size is larger than region size, indicating extra data exists + assert_eq!( + size_of_val(&value), + size_of_val(&value.region), + "kvm_xsave extra field is not empty" + ); Self { - region: s.region, - extra: Default::default(), + region: value.region, + extra: Vec::new(), } } } + +impl TryFrom for kvm_xsave { + type Error = XsaveStateError; + fn try_from(value: XsaveState) -> Result { + if !value.extra.is_empty() { + error!("XsaveState extra field is not empty"); + return Err(XsaveStateError::XsaveExtraFieldNotEmpty); + } + Ok(Self { + region: value.region, + extra: Default::default(), + }) + } +} + +impl From<&xsave2> for XsaveState { + fn from(xsave: &xsave2) -> Self { + // SAFETY: `xsave` is a valid reference with properly initialized FAM structure. + let region = unsafe { + let ptr = xsave.as_fam_struct_ptr(); + (*ptr).xsave.region + }; + Self { + region, + extra: xsave.as_slice().to_vec(), + } + } +} + +impl XsaveState { + pub fn to_xsave2(&self) -> Result { + let mut xsave = xsave2::new(self.extra.len())?; + // SAFETY: `xsave` was just created via `Xsave::new()` with valid allocated memory. + unsafe { + let ptr = xsave.as_mut_fam_struct_ptr(); + (*ptr).xsave.region = self.region; + } + let extra_slice = xsave.as_mut_slice(); + extra_slice.copy_from_slice(&self.extra); + Ok(xsave) + } +}