// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause // // Copyright © 2020, Microsoft Corporation // use std::any::Any; use std::collections::HashMap; #[cfg(feature = "sev_snp")] use std::num::NonZeroUsize; use std::sync::{Arc, RwLock}; #[cfg(feature = "sev_snp")] use arc_swap::ArcSwap; use mshv_bindings::*; #[cfg(target_arch = "x86_64")] use mshv_ioctls::InterruptRequest; use mshv_ioctls::{Mshv, NoDatamatch, VcpuFd, VmFd, VmType, set_registers_64}; use vfio_ioctls::VfioDeviceFd; use vm::DataMatch; #[cfg(feature = "sev_snp")] use vm_memory::bitmap::AtomicBitmap; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::regs::{ AARCH64_ARCH_TIMER_VIRT_IRQ, AARCH64_MIN_PPI_IRQ, AARCH64_PMU_IRQ, }; #[cfg(target_arch = "x86_64")] use crate::arch::emulator::PlatformEmulator; #[cfg(target_arch = "x86_64")] use crate::arch::x86::emulator::Emulator; #[cfg(target_arch = "aarch64")] use crate::mshv::aarch64::emulator; use crate::mshv::emulator::MshvEmulatorContext; use crate::vm::{self, InterruptSourceConfig, VmOps}; use crate::{HypervisorType, HypervisorVmConfig, cpu, hypervisor, vec_with_array_field}; #[cfg(feature = "sev_snp")] mod snp_constants; // x86_64 dependencies #[cfg(target_arch = "x86_64")] pub mod x86_64; // aarch64 dependencies #[cfg(target_arch = "aarch64")] pub mod aarch64; use std::os::unix::io::AsRawFd; #[cfg(target_arch = "aarch64")] use std::sync::Mutex; #[cfg(target_arch = "aarch64")] pub use aarch64::VcpuMshvState; #[cfg(target_arch = "aarch64")] use aarch64::gic::{BASE_SPI_IRQ, MshvGicV2M}; #[cfg(feature = "sev_snp")] use igvm_defs::IGVM_VHS_SNP_ID_BLOCK; #[cfg(feature = "sev_snp")] use snp_constants::*; use vmm_sys_util::eventfd::EventFd; #[cfg(target_arch = "x86_64")] pub use x86_64::*; #[cfg(target_arch = "x86_64")] pub use x86_64::{VcpuMshvState, emulator}; /// /// Export generically-named wrappers of mshv-bindings for Unix-based platforms /// pub use { mshv_bindings::mshv_create_device as CreateDevice, mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls, mshv_ioctls::DeviceFd, }; #[cfg(target_arch = "x86_64")] use crate::ClockData; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::gic::{Vgic, VgicConfig}; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::regs; #[cfg(target_arch = "x86_64")] use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry}; use crate::{CpuState, IoEventAddress, IrqRoutingEntry, MpState}; pub const PAGE_SHIFT: usize = 12; #[cfg(target_arch = "x86_64")] impl From for ClockData { fn from(d: MshvClockData) -> Self { ClockData::Mshv(d) } } #[cfg(target_arch = "x86_64")] impl From for MshvClockData { fn from(ms: ClockData) -> Self { match ms { ClockData::Mshv(s) => s, /* Needed in case other hypervisors are enabled */ #[allow(unreachable_patterns)] _ => unreachable!("MSHV clock data is not valid"), } } } impl From for IoEventAddress { fn from(a: mshv_ioctls::IoEventAddress) -> Self { match a { mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), } } } impl From for mshv_ioctls::IoEventAddress { fn from(a: IoEventAddress) -> Self { match a { IoEventAddress::Pio(x) => Self::Pio(x), IoEventAddress::Mmio(x) => Self::Mmio(x), } } } impl From for CpuState { fn from(s: VcpuMshvState) -> Self { CpuState::Mshv(s) } } impl From for VcpuMshvState { fn from(s: CpuState) -> Self { match s { CpuState::Mshv(s) => s, /* Needed in case other hypervisors are enabled */ #[allow(unreachable_patterns)] _ => panic!("CpuState is not valid"), } } } impl From for crate::StandardRegisters { fn from(s: mshv_bindings::StandardRegisters) -> Self { crate::StandardRegisters::Mshv(s) } } impl From for mshv_bindings::StandardRegisters { fn from(e: crate::StandardRegisters) -> Self { match e { crate::StandardRegisters::Mshv(e) => e, /* Needed in case other hypervisors are enabled */ #[allow(unreachable_patterns)] _ => panic!("StandardRegisters are not valid"), } } } impl From for IrqRoutingEntry { fn from(s: mshv_user_irq_entry) -> Self { IrqRoutingEntry::Mshv(s) } } impl From for mshv_user_irq_entry { fn from(e: IrqRoutingEntry) -> Self { match e { IrqRoutingEntry::Mshv(e) => e, /* Needed in case other hypervisors are enabled */ #[allow(unreachable_patterns)] _ => panic!("IrqRoutingEntry is not valid"), } } } #[cfg(target_arch = "aarch64")] impl From for crate::RegList { fn from(s: mshv_bindings::MshvRegList) -> Self { crate::RegList::Mshv(s) } } #[cfg(target_arch = "aarch64")] impl From for mshv_bindings::MshvRegList { fn from(e: crate::RegList) -> Self { match e { crate::RegList::Mshv(e) => e, /* Needed in case other hypervisors are enabled */ #[allow(unreachable_patterns)] _ => panic!("RegList is not valid"), } } } #[cfg(target_arch = "aarch64")] impl From for crate::VcpuInit { fn from(s: mshv_bindings::MshvVcpuInit) -> Self { crate::VcpuInit::Mshv(s) } } #[cfg(target_arch = "aarch64")] impl From for mshv_bindings::MshvVcpuInit { fn from(e: crate::VcpuInit) -> Self { match e { crate::VcpuInit::Mshv(e) => e, /* Needed in case other hypervisors are enabled */ #[allow(unreachable_patterns)] _ => panic!("VcpuInit is not valid"), } } } struct MshvDirtyLogSlot { guest_pfn: u64, memory_size: u64, } /// Wrapper over mshv system ioctls. pub struct MshvHypervisor { mshv: Mshv, } impl MshvHypervisor { #[cfg(target_arch = "x86_64")] /// /// Retrieve the list of MSRs supported by MSHV. /// fn get_msr_list(&self) -> hypervisor::Result> { self.mshv .get_msr_index_list() .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) } } impl MshvHypervisor { /// Create a hypervisor based on Mshv #[allow(clippy::new_ret_no_self)] pub fn new() -> hypervisor::Result> { let mshv_obj = Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?; Ok(Arc::new(MshvHypervisor { mshv: mshv_obj })) } /// Check if the hypervisor is available pub fn is_available() -> hypervisor::Result { match std::fs::metadata("/dev/mshv") { Ok(_) => Ok(true), Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( err.into(), )), } } } /// Implementation of Hypervisor trait for Mshv /// /// # Examples /// /// ``` /// use hypervisor::mshv::MshvHypervisor; /// # use hypervisor::HypervisorVmConfig; /// use std::sync::Arc; /// let mshv = MshvHypervisor::new().unwrap(); /// let hypervisor = Arc::new(mshv); /// let vm = hypervisor.create_vm(HypervisorVmConfig::default()).expect("new VM fd creation failed"); /// ``` impl hypervisor::Hypervisor for MshvHypervisor { /// /// Returns the type of the hypervisor /// fn hypervisor_type(&self) -> HypervisorType { HypervisorType::Mshv } /// Create a mshv vm object and return the object as Vm trait object /// /// # Examples /// /// ``` /// # extern crate hypervisor; /// use hypervisor::mshv::MshvHypervisor; /// use hypervisor::mshv::MshvVm; /// use hypervisor::HypervisorVmConfig; /// let config = HypervisorVmConfig::default(); /// let hypervisor = MshvHypervisor::new().unwrap(); /// let vm = hypervisor.create_vm(config).unwrap(); /// ``` fn create_vm(&self, _config: HypervisorVmConfig) -> hypervisor::Result> { #[allow(unused_mut)] #[allow(unused_assignments)] let mut mshv_vm_type = VmType::Normal; // Create with default platform type #[cfg(feature = "sev_snp")] { mshv_vm_type = if _config.sev_snp_enabled { VmType::Snp } else { VmType::Normal }; } let fd: VmFd; loop { match self.mshv.create_vm_with_type(mshv_vm_type) { Ok(res) => fd = res, Err(e) => { if e.errno() == libc::EINTR { // If the error returned is EINTR, which means the // ioctl has been interrupted, we have to retry as // this can't be considered as a regular error. continue; } else { return Err(hypervisor::HypervisorError::VmCreate(e.into())); } } } break; } let vm_fd = Arc::new(fd); #[cfg(target_arch = "x86_64")] { let msr_list = self.get_msr_list()?; let mut msrs: Vec = vec![ MsrEntry { ..Default::default() }; msr_list.len() ]; for (pos, index) in msr_list.iter().enumerate() { msrs[pos].index = *index; } Ok(Arc::new(MshvVm { fd: vm_fd, msrs, dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), #[cfg(feature = "sev_snp")] sev_snp_enabled: mshv_vm_type == VmType::Snp, #[cfg(feature = "sev_snp")] host_access_pages: ArcSwap::new( AtomicBitmap::new( _config.mem_size as usize, NonZeroUsize::new(HV_PAGE_SIZE).unwrap(), ) .into(), ), })) } #[cfg(target_arch = "aarch64")] { Ok(Arc::new(MshvVm { fd: vm_fd, dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), })) } } #[cfg(target_arch = "x86_64")] /// /// Get the supported CpuID /// fn get_supported_cpuid(&self) -> hypervisor::Result> { let mut cpuid = Vec::new(); let functions: [u32; 2] = [0x1, 0xb]; for function in functions { cpuid.push(CpuIdEntry { function, ..Default::default() }); } Ok(cpuid) } /// Get maximum number of vCPUs fn get_max_vcpus(&self) -> u32 { // TODO: Using HV_MAXIMUM_PROCESSORS would be better // but the ioctl API is limited to u8 256 } fn get_guest_debug_hw_bps(&self) -> usize { 0 } #[cfg(target_arch = "aarch64")] /// /// Retrieve AArch64 host maximum IPA size supported by MSHV. /// fn get_host_ipa_limit(&self) -> i32 { let host_ipa = self.mshv.get_host_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_PHYSICAL_ADDRESS_WIDTH, ); match host_ipa { Ok(ipa) => ipa.try_into().unwrap(), Err(e) => { panic!("Failed to get host IPA limit: {e:?}"); } } } } #[cfg(feature = "sev_snp")] struct Ghcb(*mut svm_ghcb_base); #[cfg(feature = "sev_snp")] // SAFETY: struct is based on GHCB page in the hypervisor, // safe to Send across threads unsafe impl Send for Ghcb {} #[cfg(feature = "sev_snp")] // SAFETY: struct is based on GHCB page in the hypervisor, // safe to Sync across threads as this is only required for Vcpu trait // functionally not used anyway unsafe impl Sync for Ghcb {} /// Vcpu struct for Microsoft Hypervisor #[allow(dead_code)] pub struct MshvVcpu { fd: VcpuFd, vp_index: u8, #[cfg(target_arch = "x86_64")] cpuid: Vec, #[cfg(target_arch = "x86_64")] msrs: Vec, vm_ops: Option>, vm_fd: Arc, #[cfg(feature = "sev_snp")] ghcb: Option, #[cfg(feature = "sev_snp")] host_access_pages: ArcSwap, } /// Implementation of Vcpu trait for Microsoft Hypervisor /// /// # Examples /// /// ``` /// use hypervisor::mshv::MshvHypervisor; /// use hypervisor::HypervisorVmConfig; /// use std::sync::Arc; /// let mshv = MshvHypervisor::new().unwrap(); /// let hypervisor = Arc::new(mshv); /// let vm = hypervisor.create_vm(HypervisorVmConfig::default()).expect("new VM fd creation failed"); /// let vcpu = vm.create_vcpu(0, None).unwrap(); /// ``` impl cpu::Vcpu for MshvVcpu { /// /// Returns StandardRegisters with default value set /// fn create_standard_regs(&self) -> crate::StandardRegisters { mshv_bindings::StandardRegisters::default().into() } /// /// Returns the vCPU general purpose registers. /// fn get_regs(&self) -> cpu::Result { Ok(self .fd .get_regs() .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? .into()) } /// /// Sets the vCPU general purpose registers. /// fn set_regs(&self, regs: &crate::StandardRegisters) -> cpu::Result<()> { let regs = (*regs).into(); self.fd .set_regs(®s) .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Returns the vCPU special registers. /// fn get_sregs(&self) -> cpu::Result { Ok(self .fd .get_sregs() .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? .into()) } #[cfg(target_arch = "x86_64")] /// /// Sets the vCPU special registers. /// fn set_sregs(&self, sregs: &crate::arch::x86::SpecialRegisters) -> cpu::Result<()> { let sregs = (*sregs).into(); self.fd .set_sregs(&sregs) .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Returns the floating point state (FPU) from the vCPU. /// fn get_fpu(&self) -> cpu::Result { Ok(self .fd .get_fpu() .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? .into()) } #[cfg(target_arch = "x86_64")] /// /// Set the floating point state (FPU) of a vCPU. /// fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into(); self.fd .set_fpu(&fpu) .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Returns the model-specific registers (MSR) for this vCPU. /// fn get_msrs(&self, msrs: &mut Vec) -> cpu::Result { let mshv_msrs: Vec = msrs.iter().map(|e| (*e).into()).collect(); let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); let succ = self .fd .get_msrs(&mut mshv_msrs) .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; msrs[..succ].copy_from_slice( &mshv_msrs.as_slice()[..succ] .iter() .map(|e| (*e).into()) .collect::>(), ); Ok(succ) } #[cfg(target_arch = "x86_64")] /// /// Setup the model-specific registers (MSR) for this vCPU. /// Returns the number of MSR entries actually written. /// fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result { let mshv_msrs: Vec = msrs.iter().map(|e| (*e).into()).collect(); let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); self.fd .set_msrs(&mshv_msrs) .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call to enable HyperV SynIC /// fn enable_hyperv_synic(&self) -> cpu::Result<()> { /* We always have SynIC enabled on MSHV */ Ok(()) } #[allow(non_upper_case_globals)] fn run(&mut self) -> std::result::Result { match self.fd.run() { Ok(x) => match x.header.message_type { hv_message_type_HVMSG_X64_HALT => { debug!("HALT"); Ok(cpu::VmExit::Reset) } #[cfg(target_arch = "aarch64")] hv_message_type_HVMSG_ARM64_RESET_INTERCEPT => { let reset_msg = x.to_reset_intercept_msg().unwrap(); match reset_msg.reset_type { hv_arm64_reset_type_HV_ARM64_RESET_TYPE_REBOOT => Ok(cpu::VmExit::Reset), hv_arm64_reset_type_HV_ARM64_RESET_TYPE_POWER_OFF => { Ok(cpu::VmExit::Shutdown) } _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( "Unhandled VCPU exit (RESET_INTERCEPT): reset type: {:?}", reset_msg.reset_type ))), } } hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => { warn!("TRIPLE FAULT"); Ok(cpu::VmExit::Shutdown) } #[cfg(target_arch = "x86_64")] hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => { let info = x.to_ioport_info().unwrap(); let access_info = info.access_info; // SAFETY: access_info is valid, otherwise we won't be here let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize; let is_write = info.header.intercept_access_type == 1; let port = info.port_number; let mut data: [u8; 4] = [0; 4]; let mut ret_rax = info.rax; /* * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports. * * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402 * under the "fwdebug" feature flag. But that feature is not enabled by default * and is considered legacy. * * OVMF unconditionally pokes these IO ports with string IO. * * Instead of trying to implement string IO support now which does not do much * now, skip those ports explicitly to avoid panicking. * * Proper string IO support can be added once we gain the ability to translate * guest virtual addresses to guest physical addresses on MSHV. */ match port { 0x402 | 0x510 | 0x511 | 0x514 => { self.advance_rip_update_rax(&info, ret_rax)?; return Ok(cpu::VmExit::Ignore); } _ => {} } assert!( // SAFETY: access_info is valid, otherwise we won't be here (unsafe { access_info.__bindgen_anon_1.string_op() } != 1), "String IN/OUT not supported" ); assert!( // SAFETY: access_info is valid, otherwise we won't be here (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1), "Rep IN/OUT not supported" ); if is_write { let data = (info.rax as u32).to_le_bytes(); if let Some(vm_ops) = &self.vm_ops { vm_ops .pio_write(port.into(), &data[0..len]) .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; } } else { if let Some(vm_ops) = &self.vm_ops { vm_ops .pio_read(port.into(), &mut data[0..len]) .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; } let v = u32::from_le_bytes(data); /* Preserve high bits in EAX but clear out high bits in RAX */ let mask = 0xffffffff >> (32 - len * 8); let eax = (info.rax as u32 & !mask) | (v & mask); ret_rax = eax as u64; } self.advance_rip_update_rax(&info, ret_rax)?; Ok(cpu::VmExit::Ignore) } #[cfg(target_arch = "aarch64")] hv_message_type_HVMSG_UNMAPPED_GPA => { let info = x.to_memory_info().unwrap(); let gva = info.guest_virtual_address; let gpa = info.guest_physical_address; debug!("Unmapped GPA exit: GVA {gva:x} GPA {gpa:x}"); let context = MshvEmulatorContext { vcpu: self, map: (gva, gpa), syndrome: info.syndrome, instruction_bytes: info.instruction_bytes, instruction_byte_count: info.instruction_byte_count, // SAFETY: Accessing a union element from bindgen generated bindings. interruption_pending: unsafe { info.header .execution_state .__bindgen_anon_1 .interruption_pending() != 0 }, pc: info.header.pc, }; let mut emulator = emulator::Emulator::new(context); emulator .emulate() .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; Ok(cpu::VmExit::Ignore) } #[cfg(target_arch = "x86_64")] msg_type @ (hv_message_type_HVMSG_UNMAPPED_GPA | hv_message_type_HVMSG_GPA_INTERCEPT) => { let info = x.to_memory_info().unwrap(); let insn_len = info.instruction_byte_count as usize; let gva = info.guest_virtual_address; let gpa = info.guest_physical_address; debug!("Exit ({msg_type:?}) GVA {gva:x} GPA {gpa:x}"); let mut context = MshvEmulatorContext { vcpu: self, map: (gva, gpa), }; // Create a new emulator. let mut emul = Emulator::new(&mut context); // Emulate the trapped instruction, and only the first one. let new_state = emul .emulate_first_insn( self.vp_index as usize, &info.instruction_bytes[..insn_len], ) .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; // Set CPU state back. context .set_cpu_state(self.vp_index as usize, new_state) .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; Ok(cpu::VmExit::Ignore) } #[cfg(feature = "sev_snp")] hv_message_type_HVMSG_GPA_ATTRIBUTE_INTERCEPT => { let info = x.to_gpa_attribute_info().unwrap(); let host_vis = info.__bindgen_anon_1.host_visibility(); if host_vis >= HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE { warn!("Ignored attribute intercept with full host visibility"); return Ok(cpu::VmExit::Ignore); } let num_ranges = info.__bindgen_anon_1.range_count(); assert!(num_ranges >= 1); if num_ranges > 1 { return Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( "Unhandled VCPU exit(GPA_ATTRIBUTE_INTERCEPT): Expected num_ranges to be 1 but found num_ranges {num_ranges:?}" ))); } // TODO: we could also deny the request with HvCallCompleteIntercept let mut gpas = Vec::new(); let ranges = info.ranges; let (gfn_start, gfn_count) = snp::parse_gpa_range(ranges[0]).unwrap(); debug!("Releasing pages: gfn_start: {gfn_start:x?}, gfn_count: {gfn_count:?}"); let gpa_start = gfn_start * HV_PAGE_SIZE as u64; for i in 0..gfn_count { gpas.push(gpa_start + i * HV_PAGE_SIZE as u64); } let mut gpa_list = vec_with_array_field::(gpas.len()); gpa_list[0].page_count = gpas.len() as u64; gpa_list[0].flags = 0; if host_vis & HV_MAP_GPA_READABLE != 0 { gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_READABLE; } if host_vis & HV_MAP_GPA_WRITABLE != 0 { gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_WRITABLE; } // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold // everything from gpas. unsafe { let gpas_slice: &mut [u64] = gpa_list[0].guest_pfns.as_mut_slice(gpas.len()); gpas_slice.copy_from_slice(gpas.as_slice()); } self.vm_fd .modify_gpa_host_access(&gpa_list[0]) .map_err(|e| cpu::HypervisorCpuError::RunVcpu(anyhow!( "Unhandled VCPU exit: attribute intercept - couldn't modify host access {e}" )))?; // Guest is revoking the shared access, so we need to update the bitmap self.host_access_pages.rcu(|_bitmap| { let bm = self.host_access_pages.load().as_ref().clone(); bm.reset_addr_range(gpa_start as usize, gfn_count as usize); bm }); Ok(cpu::VmExit::Ignore) } #[cfg(target_arch = "x86_64")] hv_message_type_HVMSG_UNACCEPTED_GPA => { let info = x.to_memory_info().unwrap(); let gva = info.guest_virtual_address; let gpa = info.guest_physical_address; Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( "Unhandled VCPU exit: Unaccepted GPA({gpa:x}) found at GVA({gva:x})", ))) } #[cfg(target_arch = "x86_64")] hv_message_type_HVMSG_X64_CPUID_INTERCEPT => { let info = x.to_cpuid_info().unwrap(); debug!("cpuid eax: {:x}", { info.rax }); Ok(cpu::VmExit::Ignore) } #[cfg(target_arch = "x86_64")] hv_message_type_HVMSG_X64_MSR_INTERCEPT => { let info = x.to_msr_info().unwrap(); if info.header.intercept_access_type == 0 { debug!("msr read: {:x}", { info.msr_number }); } else { debug!("msr write: {:x}", { info.msr_number }); } Ok(cpu::VmExit::Ignore) } #[cfg(target_arch = "x86_64")] hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => { //TODO: Handler for VMCALL here. let info = x.to_exception_info().unwrap(); debug!("Exception Info {:?}", { info.exception_vector }); Ok(cpu::VmExit::Ignore) } #[cfg(target_arch = "x86_64")] hv_message_type_HVMSG_X64_APIC_EOI => { let info = x.to_apic_eoi_info().unwrap(); // The kernel should dispatch the EOI to the correct thread. // Check the VP index is the same as the one we have. assert!(info.vp_index == self.vp_index as u32); // The interrupt vector in info is u32, but x86 only supports 256 vectors. // There is no good way to recover from this if the hypervisor messes around. // Just unwrap. Ok(cpu::VmExit::IoapicEoi( info.interrupt_vector.try_into().unwrap(), )) } #[cfg(feature = "sev_snp")] hv_message_type_HVMSG_X64_SEV_VMGEXIT_INTERCEPT => { let info = x.to_vmg_intercept_info().unwrap(); let ghcb_data = info.ghcb_msr >> GHCB_INFO_BIT_WIDTH; let ghcb_msr = svm_ghcb_msr { as_uint64: info.ghcb_msr, }; // Safe to use unwrap, for sev_snp guest we already have the // GHCB pointer wrapped in the option, otherwise this place is not reached. let ghcb = self.ghcb.as_ref().unwrap().0; // SAFETY: Accessing a union element from bindgen generated bindings. let ghcb_op = unsafe { ghcb_msr.__bindgen_anon_2.ghcb_info() as u32 }; // Sanity check on the header fields before handling other operations. assert!(info.header.intercept_access_type == HV_INTERCEPT_ACCESS_EXECUTE as u8); match ghcb_op { GHCB_INFO_HYP_FEATURE_REQUEST => { // Pre-condition: GHCB data must be zero assert!(ghcb_data == 0); let mut ghcb_response = GHCB_INFO_HYP_FEATURE_RESPONSE as u64; // Indicate support for basic SEV-SNP features ghcb_response |= (GHCB_HYP_FEATURE_SEV_SNP << GHCB_INFO_BIT_WIDTH) as u64; // Indicate support for SEV-SNP AP creation ghcb_response |= (GHCB_HYP_FEATURE_SEV_SNP_AP_CREATION << GHCB_INFO_BIT_WIDTH) as u64; debug!( "GHCB_INFO_HYP_FEATURE_REQUEST: Supported features: {ghcb_response:0x}" ); let arr_reg_name_value = [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)]; set_registers_64!(self.fd, arr_reg_name_value) .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; } GHCB_INFO_REGISTER_REQUEST => { let mut ghcb_gpa = hv_x64_register_sev_ghcb::default(); // Disable the previously used GHCB page. self.disable_prev_ghcb_page()?; // SAFETY: Accessing a union element from bindgen generated bindings. unsafe { ghcb_gpa.__bindgen_anon_1.set_enabled(1); ghcb_gpa .__bindgen_anon_1 .set_page_number(ghcb_msr.__bindgen_anon_2.gpa_page_number()); } // SAFETY: Accessing a union element from bindgen generated bindings. let reg_name_value = unsafe { [( hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, ghcb_gpa.as_uint64, )] }; set_registers_64!(self.fd, reg_name_value) .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; let mut resp_ghcb_msr = svm_ghcb_msr::default(); // SAFETY: Accessing a union element from bindgen generated bindings. unsafe { resp_ghcb_msr .__bindgen_anon_2 .set_ghcb_info(GHCB_INFO_REGISTER_RESPONSE as u64); resp_ghcb_msr.__bindgen_anon_2.set_gpa_page_number( ghcb_msr.__bindgen_anon_2.gpa_page_number(), ); debug!("GHCB GPA is {:x}", ghcb_gpa.as_uint64); } // SAFETY: Accessing a union element from bindgen generated bindings. let reg_name_value = unsafe { [( hv_register_name_HV_X64_REGISTER_GHCB, resp_ghcb_msr.as_uint64, )] }; set_registers_64!(self.fd, reg_name_value) .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; } GHCB_INFO_SEV_INFO_REQUEST => { let sev_cpuid_function = 0x8000_001F; let cpu_leaf = self .fd .get_cpuid_values(sev_cpuid_function, 0, 0, 0) .unwrap(); let ebx = cpu_leaf[1]; // First 6-byte of EBX represents page table encryption bit number let pbit_encryption = (ebx & 0x3f) as u8; let mut ghcb_response = GHCB_INFO_SEV_INFO_RESPONSE as u64; // GHCBData[63:48] specifies the maximum GHCB protocol version supported ghcb_response |= (GHCB_PROTOCOL_VERSION_MAX as u64) << 48; // GHCBData[47:32] specifies the minimum GHCB protocol version supported ghcb_response |= (GHCB_PROTOCOL_VERSION_MIN as u64) << 32; // GHCBData[31:24] specifies the SEV page table encryption bit number. ghcb_response |= (pbit_encryption as u64) << 24; let arr_reg_name_value = [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)]; set_registers_64!(self.fd, arr_reg_name_value) .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; } GHCB_INFO_NORMAL => { let exit_code = info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_code as u32; match exit_code { SVM_EXITCODE_HV_DOORBELL_PAGE => { let exit_info1 = info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32; match exit_info1 { SVM_NAE_HV_DOORBELL_PAGE_GET_PREFERRED => { // Hypervisor does not have any preference for doorbell GPA. let preferred_doorbell_gpa: u64 = 0xFFFFFFFFFFFFFFFF; set_svm_field_u64_ptr!( ghcb, exit_info2, preferred_doorbell_gpa ); } SVM_NAE_HV_DOORBELL_PAGE_SET => { let exit_info2 = info .__bindgen_anon_2 .__bindgen_anon_1 .sw_exit_info2; let mut ghcb_doorbell_gpa = hv_x64_register_sev_hv_doorbell::default(); // SAFETY: Accessing a union element from bindgen generated bindings. unsafe { ghcb_doorbell_gpa.__bindgen_anon_1.set_enabled(1); ghcb_doorbell_gpa .__bindgen_anon_1 .set_page_number(exit_info2 >> PAGE_SHIFT); } // SAFETY: Accessing a union element from bindgen generated bindings. let reg_names = unsafe { [( hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA, ghcb_doorbell_gpa.as_uint64, )] }; set_registers_64!(self.fd, reg_names).map_err(|e| { cpu::HypervisorCpuError::SetRegister(e.into()) })?; set_svm_field_u64_ptr!(ghcb, exit_info2, exit_info2); // Clear the SW_EXIT_INFO1 register to indicate no error self.clear_swexit_info1()?; } SVM_NAE_HV_DOORBELL_PAGE_QUERY => { let mut reg_assocs = [ hv_register_assoc { name: hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA, ..Default::default() } ]; self.fd.get_reg(&mut reg_assocs).unwrap(); // SAFETY: Accessing a union element from bindgen generated bindings. let doorbell_gpa = unsafe { reg_assocs[0].value.reg64 }; set_svm_field_u64_ptr!(ghcb, exit_info2, doorbell_gpa); // Clear the SW_EXIT_INFO1 register to indicate no error self.clear_swexit_info1()?; } SVM_NAE_HV_DOORBELL_PAGE_CLEAR => { set_svm_field_u64_ptr!(ghcb, exit_info2, 0); } _ => { panic!( "SVM_EXITCODE_HV_DOORBELL_PAGE: Unhandled exit code: {exit_info1:0x}" ); } } } SVM_EXITCODE_IOIO_PROT => { let exit_info1 = info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32; let port_info = hv_sev_vmgexit_port_info { as_uint32: exit_info1, }; let port = // SAFETY: Accessing a union element from bindgen generated bindings. unsafe { port_info.__bindgen_anon_1.intercepted_port() }; let mut len = 4; // SAFETY: Accessing a union element from bindgen generated bindings. unsafe { if port_info.__bindgen_anon_1.operand_size_16bit() == 1 { len = 2; } else if port_info.__bindgen_anon_1.operand_size_8bit() == 1 { len = 1; } } let is_write = // SAFETY: Accessing a union element from bindgen generated bindings. unsafe { port_info.__bindgen_anon_1.access_type() == 0 }; // SAFETY: Accessing the field from a mapped address let mut data = unsafe { (*ghcb).rax.to_le_bytes() }; if is_write { if let Some(vm_ops) = &self.vm_ops { vm_ops.pio_write(port.into(), &data[..len]).map_err( |e| cpu::HypervisorCpuError::RunVcpu(e.into()), )?; } } else { if let Some(vm_ops) = &self.vm_ops { vm_ops .pio_read(port.into(), &mut data[..len]) .map_err(|e| { cpu::HypervisorCpuError::RunVcpu(e.into()) })?; } set_svm_field_u64_ptr!(ghcb, rax, u64::from_le_bytes(data)); } // Clear the SW_EXIT_INFO1 register to indicate no error self.clear_swexit_info1()?; } SVM_EXITCODE_MMIO_READ => { let src_gpa = info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; let data_len = info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2 as usize; // Sanity check to make sure data len is within supported range. assert!(data_len <= 0x8); let mut data: Vec = vec![0; data_len]; if let Some(vm_ops) = &self.vm_ops { vm_ops.mmio_read(src_gpa, &mut data).map_err(|e| { cpu::HypervisorCpuError::RunVcpu(e.into()) })?; } // Copy the data to the shared buffer of the GHCB page let mut buffer_data = [0; 8]; buffer_data[..data_len].copy_from_slice(&data[..data_len]); // SAFETY: Updating the value of mapped area unsafe { (*ghcb).shared[0] = u64::from_le_bytes(buffer_data) }; // Clear the SW_EXIT_INFO1 register to indicate no error self.clear_swexit_info1()?; } SVM_EXITCODE_MMIO_WRITE => { let dst_gpa = info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; let data_len = info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2 as usize; // Sanity check to make sure data len is within supported range. assert!(data_len <= 0x8); let mut data = vec![0; data_len]; // SAFETY: Accessing data from a mapped address let bytes_shared_ghcb = unsafe { (*ghcb).shared[0].to_le_bytes() }; data.copy_from_slice(&bytes_shared_ghcb[..data_len]); if let Some(vm_ops) = &self.vm_ops { vm_ops.mmio_write(dst_gpa, &data).map_err(|e| { cpu::HypervisorCpuError::RunVcpu(e.into()) })?; } // Clear the SW_EXIT_INFO1 register to indicate no error self.clear_swexit_info1()?; } SVM_EXITCODE_SNP_GUEST_REQUEST | SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST => { if exit_code == SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST { info!("Fetching extended guest request is not supported"); // We don't support extended guest request, so we just write empty data. // This matches the behavior of KVM in Linux 6.11. // Read RBX from the GHCB. // SAFETY: Accessing data from a mapped address let data_gpa = unsafe { (*ghcb).rax }; // SAFETY: Accessing data from a mapped address let data_npages = unsafe { (*ghcb).rbx }; if data_npages > 0 { // The certificates are terminated by 24 zero bytes. // TODO: Need to check if data_gpa is the address of the shared buffer in the GHCB page // in that case we should clear the shared buffer(24 bytes) self.gpa_write(data_gpa, &[0; 24])?; } } let req_gpa = info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; let rsp_gpa = info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2; let mshv_psp_req = mshv_issue_psp_guest_request { req_gpa, rsp_gpa }; self.vm_fd .psp_issue_guest_request(&mshv_psp_req) .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; debug!( "SNP guest request: req_gpa {req_gpa:0x} rsp_gpa {rsp_gpa:0x}" ); set_svm_field_u64_ptr!(ghcb, exit_info2, 0); } SVM_EXITCODE_SNP_AP_CREATION => { let vmsa_gpa = info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2; let apic_id = info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 >> 32; debug!( "SNP AP CREATE REQUEST with VMSA GPA {vmsa_gpa:0x}, and APIC ID {apic_id:?}" ); let mshv_ap_create_req = mshv_sev_snp_ap_create { vp_id: apic_id, vmsa_gpa, }; self.vm_fd .sev_snp_ap_create(&mshv_ap_create_req) .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; // Clear the SW_EXIT_INFO1 register to indicate no error self.clear_swexit_info1()?; } _ => { panic!("GHCB_INFO_NORMAL: Unhandled exit code: {exit_code:0x}") } } } _ => panic!("Unsupported VMGEXIT operation: {ghcb_op:0x}"), } Ok(cpu::VmExit::Ignore) } exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( "Unhandled VCPU exit {exit:?}" ))), }, Err(e) => match e.errno() { libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( "VCPU error {e:?}" ))), }, } } #[cfg(target_arch = "aarch64")] fn init_pmu(&self, _irq: u32) -> cpu::Result<()> { Ok(()) } #[cfg(target_arch = "aarch64")] fn has_pmu_support(&self) -> bool { true } #[cfg(target_arch = "aarch64")] fn setup_regs(&self, cpu_id: u32, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { let arr_reg_name_value = [( hv_register_name_HV_ARM64_REGISTER_PSTATE, regs::PSTATE_FAULT_BITS_64, )]; set_registers_64!(self.fd, arr_reg_name_value) .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; if cpu_id == 0 { let arr_reg_name_value = [ (hv_register_name_HV_ARM64_REGISTER_PC, boot_ip), (hv_register_name_HV_ARM64_REGISTER_X0, fdt_start), ]; set_registers_64!(self.fd, arr_reg_name_value) .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; } Ok(()) } #[cfg(target_arch = "aarch64")] fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result { let mshv_reg = self.sys_reg_to_mshv_reg(sys_reg)?; let mut reg_assocs = [hv_register_assoc { name: mshv_reg, ..Default::default() }]; self.fd .get_reg(&mut reg_assocs) .map_err(|e| cpu::HypervisorCpuError::GetRegister(e.into()))?; // SAFETY: Accessing a union element from bindgen generated definition. let res = unsafe { reg_assocs[0].value.reg64 }; Ok(res) } #[cfg(target_arch = "aarch64")] fn get_reg_list(&self, _reg_list: &mut crate::RegList) -> cpu::Result<()> { unimplemented!() } #[cfg(target_arch = "aarch64")] fn vcpu_init(&self, _kvi: &crate::VcpuInit) -> cpu::Result<()> { Ok(()) } #[cfg(target_arch = "aarch64")] fn vcpu_finalize(&self, _feature: i32) -> cpu::Result<()> { Ok(()) } #[cfg(target_arch = "aarch64")] fn vcpu_get_finalized_features(&self) -> i32 { 0 } #[cfg(target_arch = "aarch64")] fn vcpu_set_processor_features( &self, _vm: &dyn crate::Vm, _kvi: &mut crate::VcpuInit, _id: u32, ) -> cpu::Result<()> { Ok(()) } #[cfg(target_arch = "aarch64")] fn create_vcpu_init(&self) -> crate::VcpuInit { MshvVcpuInit {}.into() } #[cfg(target_arch = "x86_64")] /// /// X86 specific call to setup the CPUID registers. /// fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { let cpuid: Vec = cpuid.iter().map(|e| (*e).into()).collect(); let mshv_cpuid = ::from_entries(&cpuid) .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; self.fd .register_intercept_result_cpuid(&mshv_cpuid) .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call to retrieve the CPUID registers. /// fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result> { Ok(self.cpuid.clone()) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call to retrieve cpuid leaf /// fn get_cpuid_values( &self, function: u32, index: u32, xfem: u64, xss: u64, ) -> cpu::Result<[u32; 4]> { self.fd .get_cpuid_values(function, index, xfem, xss) .map_err(|e| cpu::HypervisorCpuError::GetCpuidVales(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). /// fn get_lapic(&self) -> cpu::Result { Ok(self .fd .get_lapic() .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? .into()) } #[cfg(target_arch = "x86_64")] /// /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). /// fn set_lapic(&self, lapic: &crate::arch::x86::LapicState) -> cpu::Result<()> { let lapic: mshv_bindings::LapicState = (*lapic).clone().into(); self.fd .set_lapic(&lapic) .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) } /// /// Returns the vcpu's current "multiprocessing state". /// fn get_mp_state(&self) -> cpu::Result { Ok(MpState::Mshv) } /// /// Sets the vcpu's current "multiprocessing state". /// fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> { Ok(()) } #[cfg(target_arch = "x86_64")] /// /// Set CPU state for x86_64 guest. /// fn set_state(&self, state: &CpuState) -> cpu::Result<()> { let mut state: VcpuMshvState = state.clone().into(); self.set_msrs(&state.msrs)?; self.set_vcpu_events(&state.vcpu_events)?; self.set_regs(&state.regs.into())?; self.set_sregs(&state.sregs.into())?; self.set_fpu(&state.fpu)?; self.set_xcrs(&state.xcrs)?; // These registers are global and needed to be set only for first VCPU // as Microsoft Hypervisor allows setting this register for only one VCPU if self.vp_index == 0 { self.fd .set_misc_regs(&state.misc) .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))?; } self.fd .set_debug_regs(&state.dbg) .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?; self.fd .set_all_vp_state_components(&mut state.vp_states) .map_err(|e| cpu::HypervisorCpuError::SetAllVpStateComponents(e.into()))?; Ok(()) } #[cfg(target_arch = "aarch64")] /// /// Set CPU state for aarch64 guest. /// fn set_state(&self, _state: &CpuState) -> cpu::Result<()> { unimplemented!() } #[cfg(target_arch = "x86_64")] /// /// Get CPU State for x86_64 guest /// fn state(&self) -> cpu::Result { let regs = self.get_regs()?; let sregs = self.get_sregs()?; let xcrs = self.get_xcrs()?; let fpu = self.get_fpu()?; let vcpu_events = self.get_vcpu_events()?; let mut msrs = self.msrs.clone(); self.get_msrs(&mut msrs)?; let misc = self .fd .get_misc_regs() .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?; let dbg = self .fd .get_debug_regs() .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?; let vp_states = self .fd .get_all_vp_state_components() .map_err(|e| cpu::HypervisorCpuError::GetAllVpStateComponents(e.into()))?; Ok(VcpuMshvState { msrs, vcpu_events, regs: regs.into(), sregs: sregs.into(), fpu, xcrs, dbg, misc, vp_states, } .into()) } #[cfg(target_arch = "aarch64")] /// /// Get CPU state for aarch64 guest. /// fn state(&self) -> cpu::Result { unimplemented!() } #[cfg(target_arch = "x86_64")] /// /// Translate guest virtual address to guest physical address /// fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> { let r = self .fd .translate_gva(gva, flags) .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; let gpa = r.0; // SAFETY: r is valid, otherwise this function will have returned let result_code = unsafe { r.1.__bindgen_anon_1.result_code }; Ok((gpa, result_code)) } #[cfg(target_arch = "x86_64")] /// /// Return the list of initial MSR entries for a VCPU /// fn boot_msr_entries(&self) -> Vec { use crate::arch::x86::{MTRR_ENABLE, MTRR_MEM_TYPE_WB, msr_index}; [ msr!(msr_index::MSR_IA32_SYSENTER_CS), msr!(msr_index::MSR_IA32_SYSENTER_ESP), msr!(msr_index::MSR_IA32_SYSENTER_EIP), msr!(msr_index::MSR_STAR), msr!(msr_index::MSR_CSTAR), msr!(msr_index::MSR_LSTAR), msr!(msr_index::MSR_KERNEL_GS_BASE), msr!(msr_index::MSR_SYSCALL_MASK), msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), ] .to_vec() } /// /// Sets the AMD specific vcpu's sev control register. /// #[cfg(feature = "sev_snp")] fn set_sev_control_register(&self, vmsa_pfn: u64) -> cpu::Result<()> { let sev_control_reg = snp::get_sev_control_register(vmsa_pfn); self.fd .set_sev_control_register(sev_control_reg) .map_err(|e| cpu::HypervisorCpuError::SetSevControlRegister(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Trigger NMI interrupt /// fn nmi(&self) -> cpu::Result<()> { let cfg = InterruptRequest { interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI, apic_id: self.vp_index as u64, level_triggered: false, vector: 0, logical_destination_mode: false, long_mode: false, }; self.vm_fd .request_virtual_interrupt(&cfg) .map_err(|e| cpu::HypervisorCpuError::Nmi(e.into())) } /// /// Set the GICR base address for the vcpu. /// #[cfg(target_arch = "aarch64")] fn set_gic_redistributor_addr(&self, gicr_base_addr: u64) -> cpu::Result<()> { debug!( "Setting GICR base address to: {:#x}, for vp_index: {:?}", gicr_base_addr, self.vp_index ); let arr_reg_name_value = [( hv_register_name_HV_ARM64_REGISTER_GICR_BASE_GPA, gicr_base_addr, )]; set_registers_64!(self.fd, arr_reg_name_value) .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; Ok(()) } } impl MshvVcpu { /// /// Deactivate previously used GHCB page. /// #[cfg(feature = "sev_snp")] fn disable_prev_ghcb_page(&self) -> cpu::Result<()> { let mut reg_assocs = [hv_register_assoc { name: hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, ..Default::default() }]; self.fd.get_reg(&mut reg_assocs).unwrap(); // SAFETY: Accessing a union element from bindgen generated bindings. let prev_ghcb_gpa = unsafe { reg_assocs[0].value.reg64 }; debug!("Prev GHCB GPA is {prev_ghcb_gpa:x}"); let mut ghcb_gpa = hv_x64_register_sev_ghcb::default(); // SAFETY: Accessing a union element from bindgen generated bindings. unsafe { ghcb_gpa.__bindgen_anon_1.set_enabled(0); ghcb_gpa.__bindgen_anon_1.set_page_number(prev_ghcb_gpa); } // SAFETY: Accessing a union element from bindgen generated bindings. let reg_name_value = unsafe { [( hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, ghcb_gpa.as_uint64, )] }; set_registers_64!(self.fd, reg_name_value) .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; Ok(()) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call that returns the vcpu's current "xcrs". /// fn get_xcrs(&self) -> cpu::Result { self.fd .get_xcrs() .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) } #[cfg(target_arch = "x86_64")] /// /// X86 specific call that sets the vcpu's current "xcrs". /// fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { self.fd .set_xcrs(xcrs) .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Returns currently pending exceptions, interrupts, and NMIs as well as related /// states of the vcpu. /// fn get_vcpu_events(&self) -> cpu::Result { self.fd .get_vcpu_events() .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) } #[cfg(target_arch = "x86_64")] /// /// Sets pending exceptions, interrupts, and NMIs as well as related states /// of the vcpu. /// fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { self.fd .set_vcpu_events(events) .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) } /// /// Clear SW_EXIT_INFO1 register for SEV-SNP guests. /// #[cfg(feature = "sev_snp")] fn clear_swexit_info1(&self) -> std::result::Result { // Clear the SW_EXIT_INFO1 register to indicate no error // Safe to use unwrap, for sev_snp guest we already have the // GHCB pointer wrapped in the option, otherwise this place is not reached. let ghcb = self.ghcb.as_ref().unwrap().0; set_svm_field_u64_ptr!(ghcb, exit_info1, 0); Ok(cpu::VmExit::Ignore) } #[cfg(feature = "sev_snp")] fn gpa_write(&self, gpa: u64, data: &[u8]) -> cpu::Result<()> { for (gpa, chunk) in (gpa..) .step_by(HV_READ_WRITE_GPA_MAX_SIZE as usize) .zip(data.chunks(HV_READ_WRITE_GPA_MAX_SIZE as usize)) { let mut data = [0; HV_READ_WRITE_GPA_MAX_SIZE as usize]; data[..chunk.len()].copy_from_slice(chunk); let mut rw_gpa_arg = mshv_bindings::mshv_read_write_gpa { base_gpa: gpa, byte_count: chunk.len() as u32, data, ..Default::default() }; self.fd .gpa_write(&mut rw_gpa_arg) .map_err(|e| cpu::HypervisorCpuError::GpaWrite(e.into()))?; } Ok(()) } #[cfg(target_arch = "x86_64")] fn advance_rip_update_rax( &self, info: &hv_x64_io_port_intercept_message, ret_rax: u64, ) -> cpu::Result<()> { let insn_len = info.header.instruction_length() as u64; /* * Advance RIP and update RAX * First, try to update the registers using VP register page * which is mapped into user space for faster access. * If the register page is not available, fall back to regular * IOCTL to update the registers. */ if let Some(reg_page) = self.fd.get_vp_reg_page() { let vp_reg_page = reg_page.0; set_gp_regs_field_ptr!(vp_reg_page, rax, ret_rax); // SAFETY: access raw pointer to reg page, access union fields unsafe { (*vp_reg_page).__bindgen_anon_1.__bindgen_anon_1.rip = info.header.rip + insn_len; (*vp_reg_page).dirty |= 1 << HV_X64_REGISTER_CLASS_IP; (*vp_reg_page).dirty |= 1 << HV_X64_REGISTER_CLASS_GENERAL; } } else { let arr_reg_name_value = [ ( hv_register_name_HV_X64_REGISTER_RIP, info.header.rip + insn_len, ), (hv_register_name_HV_X64_REGISTER_RAX, ret_rax), ]; set_registers_64!(self.fd, arr_reg_name_value) .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; } Ok(()) } #[cfg(target_arch = "aarch64")] fn sys_reg_to_mshv_reg(&self, sys_regs: u32) -> cpu::Result { match sys_regs { regs::MPIDR_EL1 => Ok(hv_register_name_HV_ARM64_REGISTER_MPIDR_EL1), _ => Err(cpu::HypervisorCpuError::UnsupportedSysReg(sys_regs)), } } } /// Wrapper over Mshv VM ioctls. pub struct MshvVm { fd: Arc, #[cfg(target_arch = "x86_64")] msrs: Vec, dirty_log_slots: Arc>>, #[cfg(feature = "sev_snp")] sev_snp_enabled: bool, #[cfg(feature = "sev_snp")] host_access_pages: ArcSwap, } impl MshvVm { /// /// Creates an in-kernel device. /// /// See the documentation for `MSHV_CREATE_DEVICE`. fn create_device(&self, device: &mut CreateDevice) -> vm::Result { let device_fd = self .fd .create_device(device) .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; Ok(VfioDeviceFd::new_from_mshv(device_fd)) } } /// /// Implementation of Vm trait for Mshv /// /// # Examples /// /// ``` /// extern crate hypervisor; /// use hypervisor::mshv::MshvHypervisor; /// use hypervisor::HypervisorVmConfig; /// use std::sync::Arc; /// let mshv = MshvHypervisor::new().unwrap(); /// let hypervisor = Arc::new(mshv); /// let vm = hypervisor.create_vm(HypervisorVmConfig::default()).expect("new VM fd creation failed"); /// ``` impl vm::Vm for MshvVm { #[cfg(target_arch = "x86_64")] /// /// Sets the address of the one-page region in the VM's address space. /// fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> { Ok(()) } #[cfg(target_arch = "x86_64")] /// /// Sets the address of the three-page region in the VM's address space. /// fn set_tss_address(&self, _offset: usize) -> vm::Result<()> { Ok(()) } /// /// Creates an in-kernel interrupt controller. /// fn create_irq_chip(&self) -> vm::Result<()> { Ok(()) } /// /// Registers an event that will, when signaled, trigger the `gsi` IRQ. /// fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); self.fd .register_irqfd(fd, gsi) .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?; Ok(()) } /// /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. /// fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); self.fd .unregister_irqfd(fd, gsi) .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?; Ok(()) } /// /// Creates a VcpuFd object from a vcpu RawFd. /// fn create_vcpu( &self, id: u32, vm_ops: Option>, ) -> vm::Result> { let id: u8 = id.try_into().unwrap(); let vcpu_fd = self .fd .create_vcpu(id) .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; /* Map the GHCB page to the VMM(root) address space * The map is available after the vcpu creation. This address is mapped * to the overlay ghcb page of the Microsoft Hypervisor, don't have * to worry about the scenario when a guest changes the GHCB mapping. */ #[cfg(feature = "sev_snp")] let ghcb = if self.sev_snp_enabled { // SAFETY: Safe to call as VCPU has this map already available upon creation let addr = unsafe { libc::mmap( std::ptr::null_mut(), HV_PAGE_SIZE, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED, vcpu_fd.as_raw_fd(), MSHV_VP_MMAP_OFFSET_GHCB as i64 * libc::sysconf(libc::_SC_PAGE_SIZE), ) }; if std::ptr::eq(addr, libc::MAP_FAILED) { // No point of continuing, without this mmap VMGEXIT will fail anyway // Return error return Err(vm::HypervisorVmError::MmapToRoot); } Some(Ghcb(addr as *mut svm_ghcb_base)) } else { None }; let vcpu = MshvVcpu { fd: vcpu_fd, vp_index: id, #[cfg(target_arch = "x86_64")] cpuid: Vec::new(), #[cfg(target_arch = "x86_64")] msrs: self.msrs.clone(), vm_ops, vm_fd: self.fd.clone(), #[cfg(feature = "sev_snp")] ghcb, #[cfg(feature = "sev_snp")] host_access_pages: ArcSwap::new(self.host_access_pages.load().clone()), }; Ok(Box::new(vcpu)) } #[cfg(target_arch = "x86_64")] fn enable_split_irq(&self) -> vm::Result<()> { Ok(()) } fn register_ioevent( &self, fd: &EventFd, addr: &IoEventAddress, datamatch: Option, ) -> vm::Result<()> { #[cfg(feature = "sev_snp")] if self.sev_snp_enabled { return Ok(()); } let addr = &mshv_ioctls::IoEventAddress::from(*addr); debug!( "register_ioevent fd {} addr {:x?} datamatch {:?}", fd.as_raw_fd(), addr, datamatch ); if let Some(dm) = datamatch { match dm { vm::DataMatch::DataMatch32(mshv_dm32) => self .fd .register_ioevent(fd, addr, mshv_dm32) .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), vm::DataMatch::DataMatch64(mshv_dm64) => self .fd .register_ioevent(fd, addr, mshv_dm64) .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), } } else { self.fd .register_ioevent(fd, addr, NoDatamatch) .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) } } /// Unregister an event from a certain address it has been previously registered to. fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { #[cfg(feature = "sev_snp")] if self.sev_snp_enabled { return Ok(()); } let addr = &mshv_ioctls::IoEventAddress::from(*addr); debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr); self.fd .unregister_ioevent(fd, addr, NoDatamatch) .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) } /// Creates a guest physical memory region. /// /// # Safety /// /// `userspace_addr` must point to `memory_size` bytes of memory /// that will stay mapped until a successful call to /// `remove_user_memory_region().` Freeing them with `munmap()` /// before then will cause undefined guest behavior but at least /// should not cause undefined behavior in the host. In theory, /// at least. unsafe fn create_user_memory_region( &self, _slot: u32, guest_phys_addr: u64, memory_size: u64, userspace_addr: u64, readonly: bool, _log_dirty_pages: bool, ) -> vm::Result<()> { let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE; if !readonly { flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; } let user_memory_region = mshv_user_mem_region { flags, guest_pfn: guest_phys_addr >> PAGE_SHIFT, size: memory_size, userspace_addr, ..Default::default() }; // No matter read only or not we keep track the slots. // For readonly hypervisor can enable the dirty bits, // but a VM exit happens before setting the dirty bits self.dirty_log_slots.write().unwrap().insert( user_memory_region.guest_pfn, MshvDirtyLogSlot { guest_pfn: user_memory_region.guest_pfn, memory_size: user_memory_region.size, }, ); self.fd .map_user_memory(user_memory_region) .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?; Ok(()) } /// Removes a guest physical memory region. /// /// # Safety /// /// `userspace_addr` must point to `memory_size` bytes of memory, /// and `add_user_memory_region()` must have been successfully called. unsafe fn remove_user_memory_region( &self, _slot: u32, guest_phys_addr: u64, memory_size: u64, userspace_addr: u64, readonly: bool, _log_dirty_pages: bool, ) -> vm::Result<()> { let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE; if !readonly { flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; } let user_memory_region = mshv_user_mem_region { flags, guest_pfn: guest_phys_addr >> PAGE_SHIFT, size: memory_size, userspace_addr, ..Default::default() }; // Remove the corresponding entry from "self.dirty_log_slots" if needed self.dirty_log_slots .write() .unwrap() .remove(&user_memory_region.guest_pfn); self.fd .unmap_user_memory(user_memory_region) .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?; Ok(()) } fn create_passthrough_device(&self) -> vm::Result { let mut vfio_dev = mshv_create_device { type_: MSHV_DEV_TYPE_VFIO, fd: 0, flags: 0, }; self.create_device(&mut vfio_dev) .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) } /// /// Constructs a routing entry /// fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { match config { InterruptSourceConfig::MsiIrq(cfg) => mshv_user_irq_entry { gsi, address_lo: cfg.low_addr, address_hi: cfg.high_addr, data: cfg.data, } .into(), #[cfg(target_arch = "x86_64")] _ => { unreachable!() } #[cfg(target_arch = "aarch64")] InterruptSourceConfig::LegacyIrq(cfg) => mshv_user_irq_entry { gsi, // In order to get IRQ line we need to add `BASE_SPI_IRQ` to the pin number // as `BASE_SPI_IRQ` is the base SPI interrupt number exposed via FDT to the // guest. data: cfg.pin + BASE_SPI_IRQ, ..Default::default() } .into(), } } fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { let mut msi_routing = vec_with_array_field::(entries.len()); msi_routing[0].nr = entries.len() as u32; let entries: Vec = entries .iter() .map(|entry| match entry { IrqRoutingEntry::Mshv(e) => *e, #[allow(unreachable_patterns)] _ => panic!("IrqRoutingEntry type is wrong"), }) .collect(); // SAFETY: msi_routing initialized with entries.len() and now it is being turned into // entries_slice with entries.len() again. It is guaranteed to be large enough to hold // everything from entries. unsafe { let entries_slice: &mut [mshv_user_irq_entry] = msi_routing[0].entries.as_mut_slice(entries.len()); entries_slice.copy_from_slice(&entries); } self.fd .set_msi_routing(&msi_routing[0]) .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) } /// /// Start logging dirty pages /// fn start_dirty_log(&self) -> vm::Result<()> { self.fd .enable_dirty_page_tracking() .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into())) } /// /// Stop logging dirty pages /// fn stop_dirty_log(&self) -> vm::Result<()> { let dirty_log_slots = self.dirty_log_slots.read().unwrap(); // Before disabling the dirty page tracking we need // to set the dirty bits in the Hypervisor // This is a requirement from Microsoft Hypervisor for (_, s) in dirty_log_slots.iter() { self.fd .get_dirty_log( s.guest_pfn, s.memory_size as usize, MSHV_GPAP_ACCESS_OP_SET as u8, ) .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; } self.fd .disable_dirty_page_tracking() .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; Ok(()) } /// /// Get dirty pages bitmap (one bit per page) /// fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result> { self.fd .get_dirty_log( base_gpa >> PAGE_SHIFT, memory_size as usize, MSHV_GPAP_ACCESS_OP_CLEAR as u8, ) .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) } /// Retrieve guest clock. #[cfg(target_arch = "x86_64")] fn get_clock(&self) -> vm::Result { let val = self .fd .get_partition_property(hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME) .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?; Ok(MshvClockData { ref_time: val }.into()) } /// Set guest clock. #[cfg(target_arch = "x86_64")] fn set_clock(&self, data: &ClockData) -> vm::Result<()> { let data: MshvClockData = (*data).into(); self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME, data.ref_time, ) .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) } /// Downcast to the underlying MshvVm type fn as_any(&self) -> &dyn Any { self } /// Initialize the SEV-SNP VM #[cfg(feature = "sev_snp")] fn sev_snp_init(&self) -> vm::Result<()> { self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_STATE, hv_partition_isolation_state_HV_PARTITION_ISOLATION_SECURE as u64, ) .map_err(|e| vm::HypervisorVmError::InitializeSevSnp(e.into())) } /// /// Importing isolated pages, these pages will be used /// for the PSP(Platform Security Processor) measurement. #[cfg(feature = "sev_snp")] fn import_isolated_pages( &self, page_type: u32, page_size: u32, pages: &[u64], ) -> vm::Result<()> { debug_assert!(page_size == hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB); if pages.is_empty() { return Ok(()); } let mut isolated_pages = vec_with_array_field::(pages.len()); isolated_pages[0].page_type = page_type as u8; isolated_pages[0].page_count = pages.len() as u64; // SAFETY: isolated_pages initialized with pages.len() and now it is being turned into // pages_slice with pages.len() again. It is guaranteed to be large enough to hold // everything from pages. unsafe { let pages_slice: &mut [u64] = isolated_pages[0].guest_pfns.as_mut_slice(pages.len()); pages_slice.copy_from_slice(pages); } self.fd .import_isolated_pages(&isolated_pages[0]) .map_err(|e| vm::HypervisorVmError::ImportIsolatedPages(e.into())) } /// /// Complete isolated import, telling the hypervisor that /// importing the pages to guest memory is complete. /// #[cfg(feature = "sev_snp")] fn complete_isolated_import( &self, snp_id_block: IGVM_VHS_SNP_ID_BLOCK, host_data: [u8; 32], id_block_enabled: u8, ) -> vm::Result<()> { let mut auth_info = hv_snp_id_auth_info { id_key_algorithm: snp_id_block.id_key_algorithm, auth_key_algorithm: snp_id_block.author_key_algorithm, ..Default::default() }; // Each of r/s component is 576 bits long auth_info.id_block_signature[..SIG_R_COMPONENT_SIZE_IN_BYTES] .copy_from_slice(snp_id_block.id_key_signature.r_comp.as_ref()); auth_info.id_block_signature [SIG_R_COMPONENT_SIZE_IN_BYTES..SIG_R_AND_S_COMPONENT_SIZE_IN_BYTES] .copy_from_slice(snp_id_block.id_key_signature.s_comp.as_ref()); auth_info.id_key[..ECDSA_CURVE_ID_SIZE_IN_BYTES] .copy_from_slice(snp_id_block.id_public_key.curve.to_le_bytes().as_ref()); auth_info.id_key[ECDSA_SIG_X_COMPONENT_START..ECDSA_SIG_X_COMPONENT_END] .copy_from_slice(snp_id_block.id_public_key.qx.as_ref()); auth_info.id_key[ECDSA_SIG_Y_COMPONENT_START..ECDSA_SIG_Y_COMPONENT_END] .copy_from_slice(snp_id_block.id_public_key.qy.as_ref()); let data = mshv_complete_isolated_import { import_data: hv_partition_complete_isolated_import_data { psp_parameters: hv_psp_launch_finish_data { id_block: hv_snp_id_block { launch_digest: snp_id_block.ld, family_id: snp_id_block.family_id, image_id: snp_id_block.image_id, version: snp_id_block.version, guest_svn: snp_id_block.guest_svn, policy: get_default_snp_guest_policy(), }, id_auth_info: auth_info, host_data, id_block_enabled, author_key_enabled: 0, }, }, }; self.fd .complete_isolated_import(&data) .map_err(|e| vm::HypervisorVmError::CompleteIsolatedImport(e.into())) } #[cfg(target_arch = "aarch64")] fn create_vgic(&self, config: VgicConfig) -> vm::Result>> { let gic_device = MshvGicV2M::new(self, config) .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {e:?}")))?; // Register GICD address with the hypervisor self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_GICD_BASE_ADDRESS, gic_device.dist_addr, ) .map_err(|e| { vm::HypervisorVmError::CreateVgic(anyhow!("Failed to set GICD address: {e}")) })?; // Register GITS address with the hypervisor self.fd .set_partition_property( // spellchecker:disable-line hv_partition_property_code_HV_PARTITION_PROPERTY_GITS_TRANSLATER_BASE_ADDRESS, gic_device.gits_addr, ) .map_err(|e| { vm::HypervisorVmError::CreateVgic(anyhow!("Failed to set GITS address: {e}")) })?; Ok(Arc::new(Mutex::new(gic_device))) } #[cfg(target_arch = "aarch64")] fn get_preferred_target(&self, _kvi: &mut crate::VcpuInit) -> vm::Result<()> { Ok(()) } /// Pause the VM fn pause(&self) -> vm::Result<()> { // Freeze the partition self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 1u64, ) .map_err(|e| { vm::HypervisorVmError::SetVmProperty(anyhow!( "Failed to set partition property: {e}" )) }) } /// Resume the VM fn resume(&self) -> vm::Result<()> { // Resuming the partition using TIME_FREEZE property self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 0u64, ) .map_err(|e| { vm::HypervisorVmError::SetVmProperty(anyhow!( "Failed to set partition property: {e}" )) }) } #[cfg(feature = "sev_snp")] fn gain_page_access(&self, gpa: u64, size: u32) -> vm::Result<()> { use mshv_ioctls::set_bits; const ONE_GB: usize = 1024 * 1024 * 1024; if !self.sev_snp_enabled { return Ok(()); } let start_gpfn: u64 = gpa >> PAGE_SHIFT; let end_gpfn: u64 = (gpa + size as u64 - 1) >> PAGE_SHIFT; // Enlarge the bitmap if the PFN is greater than the bitmap length if end_gpfn >= self.host_access_pages.load().as_ref().len() as u64 { self.host_access_pages.rcu(|bitmap| { let mut bm = bitmap.as_ref().clone(); bm.enlarge(ONE_GB); bm }); } let gpas: Vec = (start_gpfn..=end_gpfn) .filter(|x| { !self .host_access_pages .load() .as_ref() .is_bit_set(*x as usize) }) .map(|x| x << PAGE_SHIFT) .collect(); if !gpas.is_empty() { let mut gpa_list = vec_with_array_field::(gpas.len()); gpa_list[0].page_count = gpas.len() as u64; gpa_list[0].flags = set_bits!( u8, MSHV_GPA_HOST_ACCESS_BIT_ACQUIRE, MSHV_GPA_HOST_ACCESS_BIT_READABLE, MSHV_GPA_HOST_ACCESS_BIT_WRITABLE ); // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold // everything from gpas. unsafe { let gpas_slice: &mut [u64] = gpa_list[0].guest_pfns.as_mut_slice(gpas.len()); gpas_slice.copy_from_slice(gpas.as_slice()); } self.fd .modify_gpa_host_access(&gpa_list[0]) .map_err(|e| vm::HypervisorVmError::ModifyGpaHostAccess(e.into()))?; for acquired_gpa in gpas { self.host_access_pages.rcu(|bitmap| { let bm = bitmap.clone(); bm.set_bit((acquired_gpa >> PAGE_SHIFT) as usize); bm }); } } Ok(()) } fn init(&self) -> vm::Result<()> { #[cfg(target_arch = "aarch64")] { self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_GIC_LPI_INT_ID_BITS, 0, ) .map_err(|e| { vm::HypervisorVmError::InitializeVm(anyhow!( "Failed to set GIC LPI support: {e}", )) })?; self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_GIC_PPI_OVERFLOW_INTERRUPT_FROM_CNTV, (AARCH64_ARCH_TIMER_VIRT_IRQ + AARCH64_MIN_PPI_IRQ) as u64, ) .map_err(|e| { vm::HypervisorVmError::InitializeVm(anyhow!( "Failed to set arch timer interrupt ID: {e}", )) })?; self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_GIC_PPI_PERFORMANCE_MONITORS_INTERRUPT, (AARCH64_PMU_IRQ + AARCH64_MIN_PPI_IRQ) as u64, ) .map_err(|e| { vm::HypervisorVmError::InitializeVm(anyhow!( "Failed to set PMU interrupt ID: {e}", )) })?; } self.fd .initialize() .map_err(|e| vm::HypervisorVmError::InitializeVm(e.into()))?; // Set additional partition property for SEV-SNP partition. #[cfg(feature = "sev_snp")] if self.sev_snp_enabled { let snp_policy = snp::get_default_snp_guest_policy(); let vmgexit_offloads = snp::get_default_vmgexit_offload_features(); // SAFETY: access union fields unsafe { debug!( "Setting the partition isolation policy as: 0x{:x}", snp_policy.as_uint64 ); self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_POLICY, snp_policy.as_uint64, ) .map_err(|e| vm::HypervisorVmError::InitializeVm(e.into()))?; debug!( "Setting the partition property to enable VMGEXIT offloads as : 0x{:x}", vmgexit_offloads.as_uint64 ); self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_SEV_VMGEXIT_OFFLOADS, vmgexit_offloads.as_uint64, ) .map_err(|e| vm::HypervisorVmError::InitializeVm(e.into()))?; } } // Default Microsoft Hypervisor behavior for unimplemented MSR is to // send a fault to the guest if it tries to access it. It is possible // to override this behavior with a more suitable option i.e., ignore // writes from the guest and return zero in attempt to read unimplemented // MSR. #[cfg(target_arch = "x86_64")] self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION, hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64, ) .map_err(|e| vm::HypervisorVmError::InitializeVm(e.into()))?; // Always create a frozen partition self.fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 1u64, ) .map_err(|e| vm::HypervisorVmError::InitializeVm(e.into()))?; Ok(()) } }