vmm: fix CVM boot failure on MSHV

Recent changes related to arm64 support in MSHV exposed
inconsistencies in the VM initialization and CVM boot paths.
The VM creation flow currently diverges across multiple scenarios,
including regular MSHV, CVM, and arm64, with each path performing
guest initialization steps in a different order.
Certain platform-specific requirements further constrain the ordering
of operations, such as the timing of address space creation,
IGVM loading, interrupt controller setup, and payload loading. For
CVM case address-space creation must be done after IGVM loading, and
PSP measurement. For Regular and arm64 this memory initialization
must be done early. For MSHV, vm.init() and sev_snp.init() are called in
different order which is run time and build time conditionally checked.

Additionally, while the KVM initialization path differs slightly
from MSHV, it shares common logic that is currently split across
separate conditional and build-time code paths, contributing to
fragmentation of the overall flow.

This change restructures the VM creation and initialization sequence
to better align shared logic, enforce scenario-specific ordering
constraints, and ensure consistent and correct behavior across all
supported configurations. In doing so, it restores proper CVM boot
behavior and improves the maintainability of the initialization code.

Signed-off-by: Muminul Islam <muislam@microsoft.com>
This commit is contained in:
Muminul Islam 2025-12-07 18:08:19 -08:00 committed by Bo Chen
parent 25e8e64a01
commit c9cd82b52b
2 changed files with 122 additions and 46 deletions

View file

@ -27,7 +27,7 @@ use crate::GuestMemoryMmap;
use crate::cpu::CpuManager;
use crate::igvm::loader::Loader;
use crate::igvm::{BootPageAcceptance, HV_PAGE_SIZE, IgvmLoadedInfo, StartupMemoryType};
use crate::memory_manager::MemoryManager;
use crate::memory_manager::{Error as MemoryManagerError, MemoryManager};
#[derive(Debug, Error)]
pub enum Error {
@ -49,6 +49,8 @@ pub enum Error {
CompleteIsolatedImport(#[source] hypervisor::HypervisorVmError),
#[error("Error decoding host data")]
FailedToDecodeHostData(#[source] hex::FromHexError),
#[error("Error allocating address space")]
MemoryManager(MemoryManagerError),
}
#[allow(dead_code)]
@ -421,6 +423,11 @@ pub fn load_igvm(
#[cfg(feature = "sev_snp")]
{
memory_manager
.lock()
.unwrap()
.allocate_address_space()
.map_err(Error::MemoryManager)?;
use std::time::Instant;
let mut now = Instant::now();

View file

@ -667,51 +667,37 @@ impl Vm {
)
.map_err(Error::DeviceManager)?;
// For MSHV, we need to create the interrupt controller before we initialize the VM.
// Because we need to set the base address of GICD before we initialize the VM.
#[cfg(feature = "mshv")]
// Initialize the VM now that we have created the device manager.
// For MSHV and non aarch64, we need to initialize the VM before creating vCPUs.
// For aarch64, we need to initialize the VM after creating interrupt controller.
// Push down write after the IC(Interrupt Controller) creation for MSHV aarch64.
#[cfg(all(feature = "mshv", not(target_arch = "aarch64")))]
{
if is_mshv {
let ic = device_manager
.lock()
.unwrap()
.create_interrupt_controller()
.map_err(Error::DeviceManager)?;
vm.init().map_err(Error::InitializeVm)?;
device_manager
.lock()
.unwrap()
.create_devices(
console_info.clone(),
console_resize_pipe.clone(),
original_termios.clone(),
ic,
)
.map_err(Error::DeviceManager)?;
}
}
#[cfg(feature = "sev_snp")]
if sev_snp_enabled {
cpu_manager
.lock()
.unwrap()
.create_boot_vcpus(snapshot_from_id(snapshot, CPU_MANAGER_SNAPSHOT_ID))
.map_err(Error::CpuManager)?;
memory_manager
.lock()
.unwrap()
.allocate_address_space()
.map_err(Error::MemoryManager)?;
#[cfg(target_arch = "aarch64")]
memory_manager
.lock()
.unwrap()
.add_uefi_flash()
.map_err(Error::MemoryManager)?;
// This initial SEV-SNP configuration must be done immediately after
// vCPUs are created. As part of this initialization we are
// transitioning the guest into secure state.
vm.sev_snp_init().map_err(Error::InitializeSevSnpVm)?;
}
#[cfg(feature = "sev_snp")]
// Loading the igvm file is pushed down here because
// igvm parser needs cpu_manager to retrieve cpuid leaf.
// Currently, Microsoft Hypervisor does not provide any
// Hypervisor specific common cpuid, we need to call get_cpuid_values
// per cpuid through cpu_manager.
let load_payload_handle = if snapshot.is_none() {
let _load_payload_handle = if snapshot.is_none() && sev_snp_enabled {
Self::load_payload_async(
&memory_manager,
&config,
@ -724,11 +710,102 @@ impl Vm {
None
};
cpu_manager
#[cfg(feature = "mshv")]
{
if is_mshv {
let ic = device_manager
.lock()
.unwrap()
.create_interrupt_controller()
.map_err(Error::DeviceManager)?;
#[cfg(target_arch = "aarch64")]
vm.init().map_err(Error::InitializeVm)?;
device_manager
.lock()
.unwrap()
.create_devices(
console_info.clone(),
console_resize_pipe.clone(),
original_termios.clone(),
ic,
)
.map_err(Error::DeviceManager)?;
}
}
cfg_if::cfg_if! {
if #[cfg(feature = "sev_snp")] {
if !sev_snp_enabled {
memory_manager
.lock()
.unwrap()
.allocate_address_space()
.map_err(Error::MemoryManager)?;
}
} else {
memory_manager
.lock()
.unwrap()
.allocate_address_space()
.map_err(Error::MemoryManager)?;
}
}
#[cfg(target_arch = "aarch64")]
memory_manager
.lock()
.unwrap()
.create_boot_vcpus(snapshot_from_id(snapshot, CPU_MANAGER_SNAPSHOT_ID))
.map_err(Error::CpuManager)?;
.add_uefi_flash()
.map_err(Error::MemoryManager)?;
// First case is when sev_snp is enabled(compiled), but run time non-cvn
// guest boot. 2nd case is when sev_snp is not compiled in, KVM and MSHV regular guest boot.
cfg_if::cfg_if! {
if #[cfg(feature = "sev_snp")] {
let _load_payload_handle = if snapshot.is_none() && !sev_snp_enabled {
Self::load_payload_async(
&memory_manager,
&config,
#[cfg(feature = "igvm")]
&cpu_manager,
#[cfg(feature = "sev_snp")]
sev_snp_enabled,
)?
} else {
None
};
} else {
let _load_payload_handle = if snapshot.is_none() {
Self::load_payload_async(
&memory_manager,
&config,
#[cfg(feature = "igvm")]
&cpu_manager,
)?
} else {
None
};
}
}
// First case is when sev_snp is enabled(compiled), but run time non-cvn
// guest boot. 2nd case is when sev_snp is not compiled in, KVM and MSHV regular guest boot.
cfg_if::cfg_if! {
if #[cfg(feature = "sev_snp")] {
if !sev_snp_enabled {
cpu_manager
.lock()
.unwrap()
.create_boot_vcpus(snapshot_from_id(snapshot, CPU_MANAGER_SNAPSHOT_ID))
.map_err(Error::CpuManager)?;
}
} else {
cpu_manager
.lock()
.unwrap()
.create_boot_vcpus(snapshot_from_id(snapshot, CPU_MANAGER_SNAPSHOT_ID))
.map_err(Error::CpuManager)?;
}
}
// For KVM, we need to create interrupt controller after we create boot vcpus.
// Because we restore GIC state from the snapshot as part of boot vcpu creation.
@ -752,14 +829,6 @@ impl Vm {
}
}
// This initial SEV-SNP configuration must be done immediately after
// vCPUs are created. As part of this initialization we are
// transitioning the guest into secure state.
#[cfg(feature = "sev_snp")]
if sev_snp_enabled {
vm.sev_snp_init().map_err(Error::InitializeSevSnpVm)?;
}
#[cfg(feature = "fw_cfg")]
{
let fw_cfg_config = config
@ -830,7 +899,7 @@ impl Vm {
#[cfg(not(target_arch = "riscv64"))]
hypervisor,
stop_on_boot,
load_payload_handle,
load_payload_handle: _load_payload_handle,
})
}