vmm: raise the (v)CPU limit on kvm/x86_64

Raise the max number of supported (v)CPUs on kvm x86_64 hosts
to 8192 (the max allowed value of CONFIG_NR_CPUS in the Linux kernel).

Other platfroms keep their existing CPU limits pending further
development and testing.

The change has been tested on Intel and AMD hosts.

Signed-off-by: Barret Rhoden <brho@google.com>
Signed-off-by: Neel Natu <neelnatu@google.com>
Signed-off-by: Ofir Weisse <oweisse@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
This commit is contained in:
Peter Oskolkov 2025-09-05 20:36:25 +00:00 committed by Bo Chen
parent 57bc78da4f
commit 05d222f0eb
6 changed files with 97 additions and 45 deletions

View file

@ -27,6 +27,11 @@ use crate::vm_config::*;
const MAX_NUM_PCI_SEGMENTS: u16 = 96;
const MAX_IOMMU_ADDRESS_WIDTH_BITS: u8 = 64;
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
const MAX_SUPPORTED_CPUS: u32 = 8192;
#[cfg(not(all(feature = "kvm", target_arch = "x86_64")))]
const MAX_SUPPORTED_CPUS: u32 = 255;
/// Errors associated with VM configuration parameters.
#[derive(Debug, Error)]
pub enum Error {
@ -182,6 +187,9 @@ pub enum ValidationError {
/// Max is less than boot
#[error("Max CPUs lower than boot CPUs")]
CpusMaxLowerThanBoot,
/// Too many CPUs.
#[error("Too many CPUs: specified {0} but {MAX_SUPPORTED_CPUS} is the limit")]
TooManyCpus(u32 /* specified CPUs */),
/// Missing file value for debug-console
#[cfg(target_arch = "x86_64")]
#[error("Path missing when using file mode for debug console")]
@ -586,11 +594,11 @@ impl CpusConfig {
.add("features");
parser.parse(cpus).map_err(Error::ParseCpus)?;
let boot_vcpus: u8 = parser
let boot_vcpus: u32 = parser
.convert("boot")
.map_err(Error::ParseCpus)?
.unwrap_or(DEFAULT_VCPUS);
let max_vcpus: u8 = parser
let max_vcpus: u32 = parser
.convert("max")
.map_err(Error::ParseCpus)?
.unwrap_or(boot_vcpus);
@ -605,7 +613,7 @@ impl CpusConfig {
.map_err(Error::ParseCpus)?
.unwrap_or(DEFAULT_MAX_PHYS_BITS);
let affinity = parser
.convert::<Tuple<u8, Vec<usize>>>("affinity")
.convert::<Tuple<u32, Vec<usize>>>("affinity")
.map_err(Error::ParseCpus)?
.map(|v| {
v.0.iter()
@ -2147,7 +2155,7 @@ impl NumaConfig {
let cpus = parser
.convert::<IntegerList>("cpus")
.map_err(Error::ParseNuma)?
.map(|v| v.0.iter().map(|e| *e as u8).collect());
.map(|v| v.0.iter().map(|e| *e as u32).collect());
let distances = parser
.convert::<Tuple<u64, u64>>("distances")
.map_err(Error::ParseNuma)?
@ -2523,6 +2531,15 @@ impl VmConfig {
return Err(ValidationError::CpusMaxLowerThanBoot);
}
if self.cpus.max_vcpus > MAX_SUPPORTED_CPUS {
// Note: historically, Cloud Hypervisor did not support more than 255(254 on x64)
// vCPUs: self.cpus.max_vcpus was of type u8, so 255 was the maximum;
// on x86_64, the legacy mptable/apic was limited to 254 CPUs.
//
// Now the limit is lifted on x86_64 targets. Other targests/archs: TBD.
return Err(ValidationError::TooManyCpus(self.cpus.max_vcpus));
}
if let Some(rate_limit_groups) = &self.rate_limit_groups {
for rate_limit_group in rate_limit_groups {
rate_limit_group.validate(self)?;
@ -2614,7 +2631,10 @@ impl VmConfig {
return Err(ValidationError::CpuTopologyDiesPerPackage);
}
let total = t.threads_per_core * t.cores_per_die * t.dies_per_package * t.packages;
let total: u32 = (t.threads_per_core as u32)
* (t.cores_per_die as u32)
* (t.dies_per_package as u32)
* (t.packages as u32);
if total != self.cpus.max_vcpus {
return Err(ValidationError::CpuTopologyCount);
}

View file

@ -196,8 +196,8 @@ pub enum Error {
#[error("Error setting up AMX")]
AmxEnable(#[source] anyhow::Error),
#[error("Maximum number of vCPUs exceeds host limit")]
MaximumVcpusExceeded,
#[error("Maximum number of vCPUs {0} exceeds host limit {1}")]
MaximumVcpusExceeded(u32, u32),
#[cfg(feature = "sev_snp")]
#[error("Failed to set sev control register")]
@ -698,12 +698,16 @@ impl CpuManager {
numa_nodes: &NumaNodes,
#[cfg(feature = "sev_snp")] sev_snp_enabled: bool,
) -> Result<Arc<Mutex<CpuManager>>> {
if u32::from(config.max_vcpus) > hypervisor.get_max_vcpus() {
return Err(Error::MaximumVcpusExceeded);
if config.max_vcpus > hypervisor.get_max_vcpus() {
return Err(Error::MaximumVcpusExceeded(
config.max_vcpus,
hypervisor.get_max_vcpus(),
));
}
let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus));
vcpu_states.resize_with(usize::from(config.max_vcpus), VcpuState::default);
let max_vcpus = usize::try_from(config.max_vcpus).unwrap();
let mut vcpu_states = Vec::with_capacity(max_vcpus);
vcpu_states.resize_with(max_vcpus, VcpuState::default);
let hypervisor_type = hypervisor.hypervisor_type();
#[cfg(target_arch = "x86_64")]
let cpu_vendor = hypervisor.get_cpu_vendor();
@ -755,7 +759,7 @@ impl CpuManager {
let affinity = if let Some(cpu_affinity) = config.affinity.as_ref() {
cpu_affinity
.iter()
.map(|a| (a.vcpu as u32, a.host_cpus.clone()))
.map(|a| (a.vcpu, a.host_cpus.clone()))
.collect()
} else {
BTreeMap::new()
@ -781,7 +785,7 @@ impl CpuManager {
#[cfg(feature = "guest_debug")]
vm_debug_evt,
selected_cpu: 0,
vcpus: Vec::with_capacity(usize::from(config.max_vcpus)),
vcpus: Vec::with_capacity(max_vcpus),
seccomp_action,
vm_ops,
acpi_address: None,
@ -895,10 +899,10 @@ impl CpuManager {
},
|t| {
(
t.threads_per_core.into(),
t.cores_per_die.into(),
t.dies_per_package.into(),
t.packages.into(),
t.threads_per_core,
t.cores_per_die,
t.dies_per_package,
t.packages,
)
},
);
@ -934,7 +938,7 @@ impl CpuManager {
self.present_vcpus()
);
if desired_vcpus > self.config.max_vcpus as u32 {
if desired_vcpus > self.config.max_vcpus {
return Err(Error::DesiredVCpuCountExceedsMax);
}
@ -1245,7 +1249,7 @@ impl CpuManager {
inserting: bool,
paused: Option<bool>,
) -> Result<()> {
if desired_vcpus > self.config.max_vcpus as u32 {
if desired_vcpus > self.config.max_vcpus {
return Err(Error::DesiredVCpuCountExceedsMax);
}
@ -1418,11 +1422,11 @@ impl CpuManager {
}
pub fn boot_vcpus(&self) -> u32 {
self.config.boot_vcpus as u32
self.config.boot_vcpus
}
pub fn max_vcpus(&self) -> u32 {
self.config.max_vcpus as u32
self.config.max_vcpus
}
#[cfg(target_arch = "x86_64")]
@ -1456,10 +1460,10 @@ impl CpuManager {
pub fn get_vcpu_topology(&self) -> Option<(u16, u16, u16, u16)> {
self.config.topology.clone().map(|t| {
(
t.threads_per_core.into(),
t.cores_per_die.into(),
t.dies_per_package.into(),
t.packages.into(),
t.threads_per_core,
t.cores_per_die,
t.dies_per_package,
t.packages,
)
})
}
@ -1475,7 +1479,7 @@ impl CpuManager {
{
madt.write(36, arch::layout::APIC_START.0);
for cpu in 0..self.config.max_vcpus as u32 {
for cpu in 0..self.config.max_vcpus {
let x2apic_id = get_x2apic_id(cpu, self.get_vcpu_topology());
let lapic = LocalX2Apic {
@ -1483,7 +1487,7 @@ impl CpuManager {
length: 16,
processor_id: cpu,
apic_id: x2apic_id,
flags: if cpu < self.config.boot_vcpus as u32 {
flags: if cpu < self.config.boot_vcpus {
1 << MADT_CPU_ENABLE_FLAG
} else {
0
@ -1535,8 +1539,8 @@ impl CpuManager {
r#type: acpi::ACPI_APIC_GENERIC_CPU_INTERFACE,
length: 80,
reserved0: 0,
cpu_interface_number: cpu as u32,
uid: cpu as u32,
cpu_interface_number: cpu,
uid: cpu,
flags: 1,
parking_version: 0,
performance_interrupt: 0,
@ -2274,7 +2278,7 @@ impl Aml for CpuManager {
let uid = aml::Name::new("_CID".into(), &aml::EISAName::new("PNP0A05"));
// Bundle methods together under a common object
let methods = CpuMethods {
max_vcpus: self.config.max_vcpus as u32,
max_vcpus: self.config.max_vcpus,
dynamic: self.dynamic,
};
let mut cpu_data_inner: Vec<&dyn Aml> = vec![&hid, &uid, &methods];
@ -2282,7 +2286,7 @@ impl Aml for CpuManager {
#[cfg(target_arch = "x86_64")]
let topology = self.get_vcpu_topology();
let mut cpu_devices = Vec::new();
for cpu_id in 0..(self.config.max_vcpus as u32) {
for cpu_id in 0..self.config.max_vcpus {
let proximity_domain = *self.proximity_domain_per_cpu.get(&cpu_id).unwrap_or(&0);
let cpu_device = Cpu {
cpu_id,

View file

@ -1694,7 +1694,7 @@ impl DeviceManager {
) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
gic::Gic::new(
self.config.lock().unwrap().cpus.boot_vcpus as u32,
self.config.lock().unwrap().cpus.boot_vcpus,
Arc::clone(&self.msi_interrupt_manager),
self.address_manager.vm.clone(),
)

View file

@ -27,6 +27,8 @@ use anyhow::anyhow;
#[cfg(feature = "dbus_api")]
use api::dbus::{DBusApiOptions, DBusApiShutdownChannels};
use api::http::HttpApiHandle;
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
use arch::x86_64::MAX_SUPPORTED_CPUS_LEGACY;
use console_devices::{pre_create_console_devices, ConsoleInfo};
use landlock::LandlockError;
use libc::{tcsetattr, termios, EFD_NONBLOCK, SIGINT, SIGTERM, TCSANOW};
@ -888,6 +890,11 @@ impl Vmm {
))
})?;
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
if config.lock().unwrap().max_apic_id() > MAX_SUPPORTED_CPUS_LEGACY {
vm.enable_x2apic_api().unwrap();
}
let phys_bits =
vm::physical_bits(&self.hypervisor, config.lock().unwrap().cpus.max_phys_bits);
@ -1822,7 +1829,7 @@ impl RequestHandler for Vmm {
} else {
let mut config = self.vm_config.as_ref().unwrap().lock().unwrap();
if let Some(desired_vcpus) = desired_vcpus {
config.cpus.boot_vcpus = desired_vcpus.try_into().unwrap();
config.cpus.boot_vcpus = desired_vcpus;
}
if let Some(desired_ram) = desired_ram {
config.memory.size = desired_ram;

View file

@ -29,6 +29,8 @@ use anyhow::anyhow;
use arch::layout::{KVM_IDENTITY_MAP_START, KVM_TSS_START};
#[cfg(feature = "tdx")]
use arch::x86_64::tdx::TdvfSection;
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
use arch::x86_64::MAX_SUPPORTED_CPUS_LEGACY;
#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
use arch::PciSpaceInfo;
use arch::{get_host_cpu_phys_bits, EntryPoint, NumaNode, NumaNodes};
@ -944,7 +946,7 @@ impl Vm {
}
if let Some(cpus) = &config.cpus {
node.cpus.extend(cpus.iter().map(|cpu| *cpu as u32));
node.cpus.extend(cpus);
}
if let Some(pci_segments) = &config.pci_segments {
@ -1022,6 +1024,11 @@ impl Vm {
vm_config.lock().unwrap().memory.total_size(),
)?;
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
if vm_config.lock().unwrap().max_apic_id() > MAX_SUPPORTED_CPUS_LEGACY {
vm.enable_x2apic_api().unwrap();
}
let phys_bits = physical_bits(&hypervisor, vm_config.lock().unwrap().cpus.max_phys_bits);
let memory_manager = if let Some(snapshot) =
@ -1655,7 +1662,7 @@ impl Vm {
.notify_hotplug(AcpiNotificationFlags::CPU_DEVICES_CHANGED)
.map_err(Error::DeviceManager)?;
}
self.config.lock().unwrap().cpus.boot_vcpus = desired_vcpus.try_into().unwrap();
self.config.lock().unwrap().cpus.boot_vcpus = desired_vcpus;
}
if let Some(desired_memory) = desired_memory {
@ -2709,7 +2716,7 @@ impl Vm {
&mut self,
destination_url: &str,
) -> std::result::Result<DumpState, GuestDebuggableError> {
let nr_cpus = self.config.lock().unwrap().cpus.boot_vcpus as u32;
let nr_cpus = self.config.lock().unwrap().cpus.boot_vcpus;
let elf_note_size = self.get_note_size(NoteDescType::ElfAndVmm, nr_cpus) as isize;
let mut elf_phdr_num = 1;
let elf_sh_info = 0;

View file

@ -26,7 +26,7 @@ pub(crate) trait ApplyLandlock {
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct CpuAffinity {
pub vcpu: u8,
pub vcpu: u32,
pub host_cpus: Vec<usize>,
}
@ -39,10 +39,10 @@ pub struct CpuFeatures {
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct CpuTopology {
pub threads_per_core: u8,
pub cores_per_die: u8,
pub dies_per_package: u8,
pub packages: u8,
pub threads_per_core: u16,
pub cores_per_die: u16,
pub dies_per_package: u16,
pub packages: u16,
}
// When booting with PVH boot the maximum physical addressable size
@ -56,8 +56,8 @@ pub fn default_cpuconfig_max_phys_bits() -> u8 {
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct CpusConfig {
pub boot_vcpus: u8,
pub max_vcpus: u8,
pub boot_vcpus: u32,
pub max_vcpus: u32,
#[serde(default)]
pub topology: Option<CpuTopology>,
#[serde(default)]
@ -70,7 +70,7 @@ pub struct CpusConfig {
pub features: CpuFeatures,
}
pub const DEFAULT_VCPUS: u8 = 1;
pub const DEFAULT_VCPUS: u32 = 1;
impl Default for CpusConfig {
fn default() -> Self {
@ -684,7 +684,7 @@ pub struct NumaConfig {
#[serde(default)]
pub guest_numa_id: u32,
#[serde(default)]
pub cpus: Option<Vec<u8>>,
pub cpus: Option<Vec<u32>>,
#[serde(default)]
pub distances: Option<Vec<NumaDistance>>,
#[serde(default)]
@ -1035,4 +1035,18 @@ impl VmConfig {
Ok(())
}
#[cfg(all(feature = "kvm", target_arch = "x86_64"))]
pub(crate) fn max_apic_id(&self) -> u32 {
if let Some(topology) = &self.cpus.topology {
arch::x86_64::get_max_x2apic_id((
topology.threads_per_core,
topology.cores_per_die,
topology.dies_per_package,
topology.packages,
))
} else {
self.cpus.max_vcpus
}
}
}