vmm: Add ACPI Generic Initiator support
Support ACPI Generic Initiator Affinity to associate PCI devices with NUMA proximity domains Add GenericInitiatorAffinity struct Add from_pci_bdf() to encode PCI Segment:Bus:Device.Function Add from_acpi_device() for ACPI device handles (future use) Generate SRAT Type 5 entries for nodes with device_id Improve create_slit_table() to check distance symmetry when forward distance is missing Track device ID to BDF mappings in DeviceManager Includes comprehensive unit tests Signed-off-by: Saravanan D <saravanand@crusoe.ai>
This commit is contained in:
parent
fa43548975
commit
dc0c306dd9
3 changed files with 301 additions and 3 deletions
273
vmm/src/acpi.rs
273
vmm/src/acpi.rs
|
|
@ -16,7 +16,7 @@ use arch::NumaNodes;
|
|||
#[cfg(target_arch = "aarch64")]
|
||||
use arch::aarch64::DeviceInfoForFdt;
|
||||
use bitflags::bitflags;
|
||||
use log::info;
|
||||
use log::{info, warn};
|
||||
use pci::PciBdf;
|
||||
use tracer::trace_scoped;
|
||||
use vm_memory::{Address, Bytes, GuestAddress, GuestMemoryRegion};
|
||||
|
|
@ -104,6 +104,82 @@ struct ProcessorGiccAffinity {
|
|||
pub clock_domain: u32,
|
||||
}
|
||||
|
||||
// ACPI 6.6 Section 5.2.16.6 - Generic Initiator Affinity Structure
|
||||
// Associates devices (e.g., GPUs, NVMe, accelerators) with NUMA proximity domains
|
||||
//
|
||||
// Device Handle Type values per ACPI 6.6 spec:
|
||||
// 0 = ACPI device handle (uses HID and UID)
|
||||
// 1 = PCI device handle (uses Segment and BDF)
|
||||
//
|
||||
// Note: Some older Linux kernel versions may incorrectly expect
|
||||
// device_handle_type=0 for PCI devices.
|
||||
#[allow(dead_code)]
|
||||
#[repr(C, packed)]
|
||||
#[derive(Default, IntoBytes, Immutable, FromBytes)]
|
||||
struct GenericInitiatorAffinity {
|
||||
pub type_: u8,
|
||||
pub length: u8,
|
||||
_reserved1: u8,
|
||||
pub device_handle_type: u8,
|
||||
pub proximity_domain: u32,
|
||||
pub device_handle: [u8; 16],
|
||||
pub flags: u32,
|
||||
_reserved2: u32,
|
||||
}
|
||||
|
||||
impl GenericInitiatorAffinity {
|
||||
#[allow(dead_code)]
|
||||
fn from_acpi_device(hid: u64, uid: u32, proximity_domain: u32) -> Self {
|
||||
let mut device_handle = [0u8; 16];
|
||||
// ACPI 6.6 Table 5-66: ACPI device handle
|
||||
// Bytes 0-7: Hardware ID (HID) as 64-bit value
|
||||
// Bytes 8-11: Unique ID (UID) as 32-bit value
|
||||
device_handle[0..8].copy_from_slice(&hid.to_le_bytes());
|
||||
device_handle[8..12].copy_from_slice(&uid.to_le_bytes());
|
||||
// Bytes 12-15: Reserved
|
||||
GenericInitiatorAffinity {
|
||||
type_: 5,
|
||||
length: 32,
|
||||
_reserved1: 0,
|
||||
device_handle_type: 0, // 0 = ACPI
|
||||
proximity_domain,
|
||||
device_handle,
|
||||
flags: 1,
|
||||
_reserved2: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_pci_bdf(bdf: PciBdf, proximity_domain: u32) -> Self {
|
||||
let mut device_handle = [0u8; 16];
|
||||
let segment = bdf.segment();
|
||||
let bus = bdf.bus();
|
||||
let device = bdf.device();
|
||||
let function = bdf.function();
|
||||
|
||||
// ACPI 6.6 Table 5-66: PCI Device Handle
|
||||
device_handle[0] = (segment & 0xff) as u8;
|
||||
device_handle[1] = ((segment >> 8) & 0xff) as u8;
|
||||
device_handle[2] = bus;
|
||||
device_handle[3] = bus;
|
||||
device_handle[4] = device;
|
||||
device_handle[5] = device;
|
||||
device_handle[6] = function;
|
||||
device_handle[7] = function;
|
||||
// Bytes 8-15 remain 0 (Reserved)
|
||||
|
||||
GenericInitiatorAffinity {
|
||||
type_: 5,
|
||||
length: 32,
|
||||
_reserved1: 0,
|
||||
device_handle_type: 1, // 1 = PCI
|
||||
proximity_domain,
|
||||
device_handle,
|
||||
flags: 1,
|
||||
_reserved2: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct MemAffinityFlags: u32 {
|
||||
|
|
@ -293,6 +369,7 @@ fn create_tpm2_table() -> Sdt {
|
|||
|
||||
fn create_srat_table(
|
||||
numa_nodes: &NumaNodes,
|
||||
device_manager: &Arc<Mutex<DeviceManager>>,
|
||||
#[cfg(target_arch = "x86_64")] topology: Option<(u16, u16, u16, u16)>,
|
||||
) -> Sdt {
|
||||
let mut srat = Sdt::new(*b"SRAT", 36, 3, *b"CLOUDH", *b"CHSRAT ", 1);
|
||||
|
|
@ -302,7 +379,9 @@ fn create_srat_table(
|
|||
// Check the MemoryAffinity structure is the right size as expected by
|
||||
// the ACPI specification.
|
||||
assert_eq!(std::mem::size_of::<MemoryAffinity>(), 40);
|
||||
|
||||
// Confirm struct size matches ACPI 6.6 spec
|
||||
assert_eq!(std::mem::size_of::<GenericInitiatorAffinity>(), 32);
|
||||
let dm = device_manager.lock().unwrap();
|
||||
for (node_id, node) in numa_nodes.iter() {
|
||||
let proximity_domain = *node_id;
|
||||
|
||||
|
|
@ -353,6 +432,19 @@ fn create_srat_table(
|
|||
clock_domain: 0,
|
||||
});
|
||||
}
|
||||
|
||||
// Add Generic Initiator Affinity structures for device-only NUMA nodes
|
||||
if let Some(device_id) = &node.device_id {
|
||||
// Resolve device_id to guest BDF
|
||||
if let Some(bdf) = dm.get_device_bdf(device_id) {
|
||||
srat.append(GenericInitiatorAffinity::from_pci_bdf(
|
||||
bdf,
|
||||
proximity_domain,
|
||||
));
|
||||
} else {
|
||||
warn!("Generic Initiator: device_id '{device_id}' not found in device manager");
|
||||
}
|
||||
}
|
||||
}
|
||||
srat
|
||||
}
|
||||
|
|
@ -370,6 +462,10 @@ fn create_slit_table(numa_nodes: &NumaNodes) -> Sdt {
|
|||
10
|
||||
} else if let Some(distance) = distances.get(i) {
|
||||
*distance
|
||||
// When forward distance config is missing
|
||||
// we can derive it using distance symmetry
|
||||
} else if let Some(destination) = numa_nodes.get(i) {
|
||||
destination.distances.get(node_id).copied().unwrap_or(20)
|
||||
} else {
|
||||
20
|
||||
};
|
||||
|
|
@ -887,6 +983,7 @@ fn create_acpi_tables_internal(
|
|||
// SRAT
|
||||
let srat = create_srat_table(
|
||||
numa_nodes,
|
||||
device_manager,
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
topology,
|
||||
);
|
||||
|
|
@ -1074,6 +1171,7 @@ pub fn create_acpi_tables_tdx(
|
|||
// SRAT
|
||||
tables.push(create_srat_table(
|
||||
numa_nodes,
|
||||
device_manager,
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
topology,
|
||||
));
|
||||
|
|
@ -1090,3 +1188,174 @@ pub fn create_acpi_tables_tdx(
|
|||
|
||||
tables
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_generic_initiator_affinity_size() {
|
||||
// ACPI spec requires Generic Initiator Affinity Structure to be exactly 32 bytes
|
||||
assert_eq!(
|
||||
std::mem::size_of::<GenericInitiatorAffinity>(),
|
||||
32,
|
||||
"GenericInitiatorAffinity must be exactly 32 bytes per ACPI 6.6 spec"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generic_initiator_from_pci_bdf() {
|
||||
// Test creating Generic Initiator from PCI BDF
|
||||
// segment:bus:device:function = 0000:00:05.0
|
||||
let bdf = PciBdf::new(0, 0, 5, 0);
|
||||
let proximity_domain = 1;
|
||||
|
||||
let gi = GenericInitiatorAffinity::from_pci_bdf(bdf, proximity_domain);
|
||||
|
||||
// Verify structure fields
|
||||
assert_eq!(gi.type_, 5, "Type must be 5 for Generic Initiator");
|
||||
assert_eq!(gi.length, 32, "Length must be 32 bytes");
|
||||
assert_eq!(gi._reserved1, 0, "Reserved field must be 0");
|
||||
assert_eq!(
|
||||
gi.device_handle_type, 1,
|
||||
"Device handle type must be 1 for PCI per ACPI 6.6 spec"
|
||||
);
|
||||
// Copy packed fields to local variables to avoid unaligned references
|
||||
let gi_proximity_domain = gi.proximity_domain;
|
||||
let gi_flags = gi.flags;
|
||||
let gi_reserved2 = gi._reserved2;
|
||||
assert_eq!(
|
||||
gi_proximity_domain, proximity_domain,
|
||||
"Proximity domain must match input"
|
||||
);
|
||||
assert_eq!(gi_flags, 1, "Flags must be 1 (enabled)");
|
||||
assert_eq!(gi_reserved2, 0, "Reserved field must be 0");
|
||||
|
||||
// Verify PCI BDF encoding in device_handle
|
||||
// ACPI 6.6 Table 5-66 format:
|
||||
// Bytes 0-1: PCI Segment (little-endian)
|
||||
// Byte 2: Start Bus Number
|
||||
// Byte 3: End Bus Number
|
||||
// Byte 4: Start Device Number
|
||||
// Byte 5: End Device Number
|
||||
// Byte 6: Start Function
|
||||
// Byte 7: End Function
|
||||
// Bytes 8-15: Reserved
|
||||
let expected_handle: [u8; 16] = [
|
||||
0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Reserved
|
||||
];
|
||||
assert_eq!(
|
||||
gi.device_handle, expected_handle,
|
||||
"Device handle must encode PCI BDF correctly per ACPI 6.6 Table 5-66"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generic_initiator_multiple_numa_nodes() {
|
||||
// Test Generic Initiators assigned to different NUMA nodes
|
||||
let bdf0 = PciBdf::new(0, 0, 4, 0);
|
||||
let bdf1 = PciBdf::new(0, 0, 5, 0);
|
||||
|
||||
let gi0 = GenericInitiatorAffinity::from_pci_bdf(bdf0, 0);
|
||||
let gi1 = GenericInitiatorAffinity::from_pci_bdf(bdf1, 1);
|
||||
|
||||
// Copy packed fields to local variables to avoid unaligned references
|
||||
let gi0_proximity_domain = gi0.proximity_domain;
|
||||
let gi1_proximity_domain = gi1.proximity_domain;
|
||||
assert_eq!(gi0_proximity_domain, 0);
|
||||
assert_eq!(gi1_proximity_domain, 1);
|
||||
|
||||
// Verify both have correct type and length
|
||||
assert_eq!(gi0.type_, 5);
|
||||
assert_eq!(gi0.length, 32);
|
||||
assert_eq!(gi1.type_, 5);
|
||||
assert_eq!(gi1.length, 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generic_initiator_repr_c_layout() {
|
||||
// Verify the struct has correct C representation for ACPI table
|
||||
// This ensures field offsets match ACPI spec
|
||||
let gi = GenericInitiatorAffinity {
|
||||
type_: 5,
|
||||
length: 32,
|
||||
_reserved1: 0,
|
||||
device_handle_type: 1,
|
||||
proximity_domain: 1,
|
||||
device_handle: [0u8; 16],
|
||||
flags: 1,
|
||||
_reserved2: 0,
|
||||
};
|
||||
|
||||
// Convert to bytes and verify layout
|
||||
// SAFETY: `gi` is a local, initialized struct. Because it is `repr(packed)`,
|
||||
// there is no internal padding, making every byte within it
|
||||
// safe to read. Casting to `u8` satisfies alignment requirements.
|
||||
let bytes = unsafe {
|
||||
std::slice::from_raw_parts(
|
||||
&gi as *const GenericInitiatorAffinity as *const u8,
|
||||
std::mem::size_of::<GenericInitiatorAffinity>(),
|
||||
)
|
||||
};
|
||||
|
||||
// Verify field positions per ACPI 6.6 spec
|
||||
assert_eq!(bytes[0], 5, "Offset 0: Type");
|
||||
assert_eq!(bytes[1], 32, "Offset 1: Length");
|
||||
assert_eq!(bytes[2], 0, "Offset 2: Reserved");
|
||||
assert_eq!(bytes[3], 1, "Offset 3: Device Handle Type (1=PCI per spec)");
|
||||
// Proximity domain at offset 4-7 (u32 little-endian)
|
||||
assert_eq!(bytes[4], 1);
|
||||
assert_eq!(bytes[5], 0);
|
||||
assert_eq!(bytes[6], 0);
|
||||
assert_eq!(bytes[7], 0);
|
||||
// Device handle at offset 8-23 (16 bytes)
|
||||
// Flags at offset 24-27 (u32 little-endian)
|
||||
assert_eq!(bytes[24], 1);
|
||||
// Reserved at offset 28-31
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generic_initiator_acpi_device_handle() {
|
||||
// Test ACPI device handle (device_handle_type=0) for completeness
|
||||
// This validates HID and UID encoding per ACPI 6.6 spec (Table 5.65)
|
||||
let hid: u64 = 0x0123456789ABCDEF;
|
||||
let uid: u32 = 0x12345678;
|
||||
let proximity_domain = 2;
|
||||
|
||||
let gi = GenericInitiatorAffinity::from_acpi_device(hid, uid, proximity_domain);
|
||||
|
||||
// Verify structure fields
|
||||
assert_eq!(gi.type_, 5, "Type must be 5 for Generic Initiator");
|
||||
assert_eq!(gi.length, 32, "Length must be 32 bytes");
|
||||
assert_eq!(gi._reserved1, 0, "Reserved field must be 0");
|
||||
assert_eq!(
|
||||
gi.device_handle_type, 0,
|
||||
"Device handle type must be 0 for ACPI per ACPI 6.6 spec"
|
||||
);
|
||||
// Copy packed fields to local variables to avoid unaligned references
|
||||
let gi_proximity_domain = gi.proximity_domain;
|
||||
let gi_flags = gi.flags;
|
||||
let gi_reserved2 = gi._reserved2;
|
||||
assert_eq!(
|
||||
gi_proximity_domain, proximity_domain,
|
||||
"Proximity domain must match input"
|
||||
);
|
||||
assert_eq!(gi_flags, 1, "Flags must be 1 (enabled)");
|
||||
assert_eq!(gi_reserved2, 0, "Reserved field must be 0");
|
||||
|
||||
// Verify ACPI device handle encoding
|
||||
// Expected format per ACPI 6.6 Table 5.65:
|
||||
// Bytes 0-7: HID (64-bit, little-endian)
|
||||
// Bytes 8-11: UID (32-bit, little-endian)
|
||||
// Bytes 12-15: Reserved
|
||||
let expected_handle: [u8; 16] = [
|
||||
0xEF, 0xCD, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01, // HID
|
||||
0x78, 0x56, 0x34, 0x12, // UID
|
||||
0, 0, 0, 0, // Reserved
|
||||
];
|
||||
assert_eq!(
|
||||
gi.device_handle, expected_handle,
|
||||
"Device handle must encode HID and UID correctly"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1051,6 +1051,10 @@ pub struct DeviceManager {
|
|||
// List of guest NUMA nodes.
|
||||
numa_nodes: NumaNodes,
|
||||
|
||||
// Mapping from device ID (e.g., "vfio0") to guest PCI BDF.
|
||||
// Used for Generic Initiator NUMA nodes to resolve device_id to BDF.
|
||||
device_id_to_bdf: HashMap<String, PciBdf>,
|
||||
|
||||
// Possible handle to the virtio-balloon device
|
||||
balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
|
||||
|
||||
|
|
@ -1348,6 +1352,7 @@ impl DeviceManager {
|
|||
id_to_dev_info: HashMap::new(),
|
||||
seccomp_action,
|
||||
numa_nodes,
|
||||
device_id_to_bdf: HashMap::new(),
|
||||
balloon: None,
|
||||
activate_evt: activate_evt
|
||||
.try_clone()
|
||||
|
|
@ -1645,6 +1650,9 @@ impl DeviceManager {
|
|||
handle.dma_handler,
|
||||
)?;
|
||||
|
||||
// Track device BDF for Generic Initiator support
|
||||
self.device_id_to_bdf.insert(handle.id.clone(), dev_id);
|
||||
|
||||
if handle.iommu {
|
||||
iommu_attached_devices.push(dev_id);
|
||||
}
|
||||
|
|
@ -3837,6 +3845,10 @@ impl DeviceManager {
|
|||
.unwrap()
|
||||
.insert(vfio_name.clone(), node);
|
||||
|
||||
// Track device ID → guest BDF mapping for Generic Initiator resolution
|
||||
self.device_id_to_bdf
|
||||
.insert(vfio_name.clone(), pci_device_bdf);
|
||||
|
||||
Ok((pci_device_bdf, vfio_name))
|
||||
}
|
||||
|
||||
|
|
@ -4018,6 +4030,10 @@ impl DeviceManager {
|
|||
.unwrap()
|
||||
.insert(vfio_user_name.clone(), node);
|
||||
|
||||
// Track device ID → guest BDF mapping for Generic Initiator resolution
|
||||
self.device_id_to_bdf
|
||||
.insert(vfio_user_name.clone(), pci_device_bdf);
|
||||
|
||||
Ok((pci_device_bdf, vfio_user_name))
|
||||
}
|
||||
|
||||
|
|
@ -4331,6 +4347,13 @@ impl DeviceManager {
|
|||
&self.pci_segments
|
||||
}
|
||||
|
||||
// Get the guest PCI BDF for a device ID.
|
||||
// Returns None if the device ID is not found.
|
||||
// Used for resolving Generic Initiator device_id to BDF in ACPI generation.
|
||||
pub fn get_device_bdf(&self, device_id: &str) -> Option<PciBdf> {
|
||||
self.device_id_to_bdf.get(device_id).copied()
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
|
||||
pub fn cmdline_additions(&self) -> &[String] {
|
||||
self.cmdline_additions.as_slice()
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ use linux_loader::loader::bzimage::BzImage;
|
|||
use linux_loader::loader::elf::PvhBootCapability::PvhEntryPresent;
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
|
||||
use linux_loader::loader::pe::Error::InvalidImageMagicNumber;
|
||||
use log::{error, info};
|
||||
use log::{error, info, warn};
|
||||
use seccompiler::SeccompAction;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
|
|
@ -1032,6 +1032,12 @@ impl Vm {
|
|||
let dest = distance.destination;
|
||||
let dist = distance.distance;
|
||||
|
||||
if dest == config.guest_numa_id && dist != 10 {
|
||||
warn!(
|
||||
"Ignoring self-distance {dest}@{dist} (must be 10 per ACPI spec)"
|
||||
);
|
||||
}
|
||||
|
||||
if !configs.iter().any(|cfg| cfg.guest_numa_id == dest) {
|
||||
error!("Unknown destination NUMA node {dest}");
|
||||
return Err(Error::InvalidNumaConfig);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue