arch: Handle Generic Initiator in ARM64 FDT

Update FDT generation to skip NUMA properties when Generic Initiator
nodes are present, preventing conflicts between FDT and ACPI NUMA
information. FDT cannot represent Generic Initiator nodes, so ACPI
(via SRAT Type 5) becomes the authoritative source for the entire
NUMA topology when Generic Initiators exist.

Skip FDT numa-node-id properties in CPU and memory nodes
when Generic Initiator is present

Distance map bug fix : iterate over actual NUMA node IDs instead
of 0..len()

Use distance symmetry to derive distance when forward config is
missing

Default to distance cost 20 when neither direction specified

Only create memory nodes if NUMA node has memory region

Added unit tests

ARM64 boot protocol:
https://docs.kernel.org/arch/arm64/booting.html

Signed-off-by: Saravanan D <saravanand@crusoe.ai>
This commit is contained in:
Saravanan D 2026-01-14 11:24:53 +00:00 committed by Rob Bradford
parent dc0c306dd9
commit df67c3690e

View file

@ -19,7 +19,7 @@ use hypervisor::arch::aarch64::regs::{
AARCH64_ARCH_TIMER_HYP_IRQ, AARCH64_ARCH_TIMER_PHYS_NONSECURE_IRQ,
AARCH64_ARCH_TIMER_PHYS_SECURE_IRQ, AARCH64_ARCH_TIMER_VIRT_IRQ, AARCH64_PMU_IRQ,
};
use log::{debug, warn};
use log::{debug, info, warn};
use thiserror::Error;
use vm_fdt::{FdtWriter, FdtWriterResult};
use vm_memory::{Address, Bytes, GuestMemory, GuestMemoryError, GuestMemoryRegion};
@ -345,6 +345,17 @@ fn create_cpu_nodes(
warn!("cache sysfs system does not exist.");
}
// Arm boot protocol requires a minimal Device Tree
// https://docs.kernel.org/arch/arm64/booting.html
// As Generic initiators are supported only in ACPI
// When a guest kernel does not boot under "acpi=force" mode it can
// hang due to conflicting numa information present in FDT which
// does not support Generic Initiators
let has_generic_initiator = numa_nodes.values().any(|node| node.device_id.is_some());
if has_generic_initiator {
info!("Skipping NUMA CPU node encoding in FDT with Generic Initiator devices");
}
for (cpu_id, mpidr) in vcpu_mpidr.iter().enumerate().take(num_cpus) {
let cpu_name = format!("cpu@{cpu_id:x}");
let cpu_node = fdt.begin_node(&cpu_name)?;
@ -359,8 +370,10 @@ fn create_cpu_nodes(
fdt.property_u32("reg", (mpidr & 0x7FFFFF) as u32)?;
fdt.property_u32("phandle", cpu_id as u32 + FIRST_VCPU_PHANDLE)?;
// Add `numa-node-id` property if there is any numa config.
if numa_nodes.len() > 1 {
// Skipping NUMA encoding in FDT when Generic Initiator devices
// are present allowed such guest kernels to boot properly and
// rely solely on ACPI tables to setup NUMA
if numa_nodes.len() > 1 && !has_generic_initiator {
for numa_node_idx in 0..numa_nodes.len() {
let numa_node = numa_nodes.get(&(numa_node_idx as u32));
if numa_node.unwrap().cpus.contains(&(cpu_id as u32)) {
@ -501,7 +514,14 @@ fn create_memory_node(
) -> FdtWriterResult<()> {
// See https://github.com/torvalds/linux/blob/58ae0b51506802713aa0e9956d1853ba4c722c98/Documentation/devicetree/bindings/numa.txt
// for NUMA setting in memory node.
if numa_nodes.len() > 1 {
let has_generic_initiator = numa_nodes.values().any(|node| node.device_id.is_some());
if has_generic_initiator {
info!("Skipping NUMA memory node encoding in FDT with Generic Initiator devices");
}
// Skipping NUMA encoding in FDT when Generic Initiator devices
// are present allowed guest kernels to boot and
// rely solely on ACPI tables to setup NUMA
if numa_nodes.len() > 1 && !has_generic_initiator {
for numa_node_idx in 0..numa_nodes.len() {
let numa_node = numa_nodes.get(&(numa_node_idx as u32));
let mut mem_reg_prop: Vec<u64> = Vec::new();
@ -518,12 +538,15 @@ fn create_memory_node(
node_memory_addr = memory_region_start_addr;
}
}
let memory_node_name = format!("memory@{node_memory_addr:x}");
let memory_node = fdt.begin_node(&memory_node_name)?;
fdt.property_string("device_type", "memory")?;
fdt.property_array_u64("reg", &mem_reg_prop)?;
fdt.property_u32("numa-node-id", numa_node_idx as u32)?;
fdt.end_node(memory_node)?;
// Only create a memory node if this NUMA node has memory regions
if !mem_reg_prop.is_empty() {
let memory_node_name = format!("memory@{node_memory_addr:x}");
let memory_node = fdt.begin_node(&memory_node_name)?;
fdt.property_string("device_type", "memory")?;
fdt.property_array_u64("reg", &mem_reg_prop)?;
fdt.property_u32("numa-node-id", numa_node_idx as u32)?;
fdt.end_node(memory_node)?;
}
}
} else {
// Note: memory regions from "GuestMemory" are sorted and non-zero sized.
@ -1044,6 +1067,22 @@ fn create_pci_nodes(
}
fn create_distance_map_node(fdt: &mut FdtWriter, numa_nodes: &NumaNodes) -> FdtWriterResult<()> {
// When Generic Initiator nodes are present, skip ALL FDT NUMA information.
// Let ACPI (which supports Generic Initiator via SRAT Type 5) handle the entire NUMA topology.
// FDT cannot represent Generic Initiator nodes, and mixing FDT + ACPI NUMA info causes conflicts.
let has_generic_initiator = numa_nodes.values().any(|node| node.device_id.is_some());
if has_generic_initiator {
info!("Skipping NUMA distance map encoding in FDT with Generic Initiator devices");
return Ok(());
}
// At this point, we know there are no Generic Initiator nodes
let mut numa_ids: Vec<u32> = numa_nodes.keys().cloned().collect();
// If we only have one node, no distance map is needed
if numa_ids.len() <= 1 {
return Ok(());
}
let distance_map_node = fdt.begin_node("distance-map")?;
fdt.property_string("compatible", "numa-distance-map-v1")?;
// Construct the distance matrix.
@ -1056,26 +1095,33 @@ fn create_distance_map_node(fdt: &mut FdtWriter, numa_nodes: &NumaNodes) -> FdtW
// a value greater than 10.
// 4. distance-matrix should have entries in lexicographical ascending
// order of nodes.
numa_ids.sort_unstable(); // lexicographical order
let mut distance_matrix = Vec::new();
for numa_node_idx in 0..numa_nodes.len() {
let numa_node = numa_nodes.get(&(numa_node_idx as u32));
for dest_numa_node in 0..numa_node.unwrap().distances.len() + 1 {
if numa_node_idx == dest_numa_node {
distance_matrix.push(numa_node_idx as u32);
distance_matrix.push(dest_numa_node as u32);
// Iterate over actual numa IDs instead of 0..len()
for numa_id in numa_ids.iter() {
let numa_node = &numa_nodes[numa_id];
for dest_numa_id in numa_ids.iter() {
if *numa_id == *dest_numa_id {
distance_matrix.push(*numa_id);
distance_matrix.push(*dest_numa_id);
distance_matrix.push(10_u32);
continue;
}
distance_matrix.push(numa_node_idx as u32);
distance_matrix.push(dest_numa_node as u32);
distance_matrix.push(
*numa_node
.unwrap()
.distances
.get(&(dest_numa_node as u32))
.unwrap() as u32,
);
distance_matrix.push(*numa_id);
distance_matrix.push(*dest_numa_id);
// Use user-specified distance, checking both directions for symmetry
let distance = if let Some(&dist) = numa_node.distances.get(dest_numa_id) {
// Forward direction: current node -> dest node
dist
} else if let Some(dest_node) = numa_nodes.get(dest_numa_id) {
// Reverse direction for symmetry: dest node -> current node
dest_node.distances.get(numa_id).copied().unwrap_or(20)
} else {
// Default distance when neither direction is specified
20
};
distance_matrix.push(distance as u32);
}
}
fdt.property_array_u32("distance-matrix", distance_matrix.as_ref())?;
@ -1160,3 +1206,118 @@ fn print_node(node: fdt_parser::node::FdtNode<'_, '_>, n_spaces: usize) {
print_node(child, n_spaces + 2);
}
}
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use super::*;
use crate::NumaNode;
// Helper function to create a simple NumaNode for testing
fn create_test_numa_node(cpus: Vec<u32>, device_id: Option<String>) -> NumaNode {
NumaNode {
memory_regions: Vec::new(),
hotplug_regions: Vec::new(),
cpus,
pci_segments: Vec::new(),
distances: BTreeMap::new(),
memory_zones: Vec::new(),
device_id,
}
}
#[test]
fn test_fdt_generic_initiator_detection_and_skip() {
// No Generic Initiator - should not skip FDT NUMA
let mut numa_nodes = BTreeMap::new();
numa_nodes.insert(0, create_test_numa_node(vec![0, 1], None));
numa_nodes.insert(1, create_test_numa_node(vec![2, 3], None));
let has_gi = numa_nodes.values().any(|node| node.device_id.is_some());
assert!(
!has_gi,
"Should not detect Generic Initiator when none present"
);
// One Generic Initiator - should skip FDT NUMA
let mut numa_nodes = BTreeMap::new();
numa_nodes.insert(0, create_test_numa_node(vec![0, 1], None));
numa_nodes.insert(1, create_test_numa_node(vec![], Some("vfio0".to_string())));
let has_gi = numa_nodes.values().any(|node| node.device_id.is_some());
assert!(has_gi, "Should detect Generic Initiator when present");
let mut fdt = FdtWriter::new().unwrap();
let result = create_distance_map_node(&mut fdt, &numa_nodes);
assert!(result.is_ok(), "Should skip distance map when GI present");
// Multiple Generic Initiators - should skip FDT NUMA
let mut numa_nodes = BTreeMap::new();
numa_nodes.insert(0, create_test_numa_node(vec![0, 1], None));
numa_nodes.insert(1, create_test_numa_node(vec![], Some("vfio0".to_string())));
numa_nodes.insert(2, create_test_numa_node(vec![], Some("vfio1".to_string())));
let has_gi = numa_nodes.values().any(|node| node.device_id.is_some());
assert!(has_gi, "Should detect multiple Generic Initiators");
}
#[test]
fn test_fdt_distance_map() {
// Single NUMA node - should skip distance map
let mut numa_nodes = BTreeMap::new();
numa_nodes.insert(0, create_test_numa_node(vec![0, 1], None));
let mut fdt = FdtWriter::new().unwrap();
let result = create_distance_map_node(&mut fdt, &numa_nodes);
assert!(result.is_ok(), "Should skip distance map for single node");
// Empty NUMA nodes - should handle gracefully
let numa_nodes = BTreeMap::new();
let mut fdt = FdtWriter::new().unwrap();
let result = create_distance_map_node(&mut fdt, &numa_nodes);
assert!(result.is_ok(), "Should handle empty NUMA nodes");
// Non-contiguous NUMA IDs (0, 2, 5) with distance symmetry
let mut numa_nodes = BTreeMap::new();
let mut node0 = create_test_numa_node(vec![0], None);
node0.distances.insert(2, 20);
// node0 has no explicit distance to node5
let mut node2 = create_test_numa_node(vec![1], None);
node2.distances.insert(0, 20);
node2.distances.insert(5, 25);
let mut node5 = create_test_numa_node(vec![2], None);
node5.distances.insert(0, 30);
node5.distances.insert(2, 25);
// node5->node0 (should be used for node0->node5)
numa_nodes.insert(0, node0);
numa_nodes.insert(2, node2);
numa_nodes.insert(5, node5);
// Verify IDs are sorted lexicographically
let mut numa_ids: Vec<u32> = numa_nodes.keys().cloned().collect();
numa_ids.sort_unstable();
assert_eq!(numa_ids, vec![0, 2, 5]);
let mut fdt = FdtWriter::new().unwrap();
let result = create_distance_map_node(&mut fdt, &numa_nodes);
assert!(
result.is_ok(),
"Should handle non-contiguous IDs and symmetry"
);
// Default distance (20) when no distance specified in either direction
let mut numa_nodes = BTreeMap::new();
numa_nodes.insert(0, create_test_numa_node(vec![0], None));
numa_nodes.insert(1, create_test_numa_node(vec![1], None));
// Neither node has distance to the other
let mut fdt = FdtWriter::new().unwrap();
let result = create_distance_map_node(&mut fdt, &numa_nodes);
assert!(result.is_ok(), "Should default to 20 for missing distances");
}
}