From 8df05b72dc5f66a17ac66966c944016bda024810 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Wed, 29 May 2019 16:33:29 -0700 Subject: [PATCH] vmm: Add MSI-X support to virtio-pci devices In order to allow virtio-pci devices to use MSI-X messages instead of legacy pin based interrupts, this patch implements the MSI-X support for cloud-hypervisor. The VMM code and virtio-pci bits have been modified based on the "msix" module previously added to the pci crate. Fixes #12 Signed-off-by: Sebastien Boeuf --- pci/src/device.rs | 14 +++- pci/src/lib.rs | 4 +- vm-virtio/src/queue.rs | 4 + vm-virtio/src/transport/pci_common_config.rs | 8 +- vm-virtio/src/transport/pci_device.rs | 69 ++++++++++++++-- vmm/src/vm.rs | 85 ++++++++++++++------ 6 files changed, 148 insertions(+), 36 deletions(-) diff --git a/pci/src/device.rs b/pci/src/device.rs index b84f5a44d..19de0ebc5 100755 --- a/pci/src/device.rs +++ b/pci/src/device.rs @@ -3,6 +3,7 @@ // found in the LICENSE-BSD-3-Clause file. use crate::configuration::{self, PciConfiguration}; +use crate::msix::MsixTableEntry; use crate::PciInterruptPin; use devices::BusDevice; use std; @@ -13,6 +14,8 @@ use vm_memory::{GuestAddress, GuestUsize}; use vmm_sys_util::EventFd; pub type IrqClosure = Box std::result::Result<(), std::io::Error> + Send + Sync>; +pub type MsixClosure = + Box std::result::Result<(), std::io::Error> + Send + Sync>; #[derive(Debug)] pub enum Error { @@ -44,7 +47,16 @@ impl Display for Error { pub trait PciDevice: BusDevice { /// Assign a legacy PCI IRQ to this device. /// The device may write to `irq_evt` to trigger an interrupt. - fn assign_irq(&mut self, _irq_cb: Arc, _irq_num: u32, _irq_pin: PciInterruptPin) {} + fn assign_pin_irq( + &mut self, + _irq_cb: Arc, + _irq_num: u32, + _irq_pin: PciInterruptPin, + ) { + } + + /// Assign MSI-X to this device. + fn assign_msix(&mut self, _msi_cb: Arc) {} /// Allocates the needed PCI BARs space using the `allocate` function which takes a size and /// returns an address. Returns a Vec of (GuestAddress, GuestUsize) tuples. diff --git a/pci/src/lib.rs b/pci/src/lib.rs index b5eb07797..c237527f1 100644 --- a/pci/src/lib.rs +++ b/pci/src/lib.rs @@ -12,6 +12,7 @@ extern crate vmm_sys_util; mod configuration; mod device; +mod msix; mod root; pub use self::configuration::{ @@ -20,7 +21,8 @@ pub use self::configuration::{ PciSubclass, }; pub use self::device::Error as PciDeviceError; -pub use self::device::{IrqClosure, PciDevice}; +pub use self::device::{IrqClosure, MsixClosure, PciDevice}; +pub use self::msix::{MsixCap, MsixConfig, MsixTableEntry}; pub use self::root::{PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; /// PCI has four interrupt pins A->D. diff --git a/vm-virtio/src/queue.rs b/vm-virtio/src/queue.rs index 9deab2ae3..3325e4c62 100644 --- a/vm-virtio/src/queue.rs +++ b/vm-virtio/src/queue.rs @@ -218,6 +218,9 @@ pub struct Queue { /// Inidcates if the queue is finished with configuration pub ready: bool, + /// Interrupt vector index of the queue + pub vector: u16, + /// Guest physical address of the descriptor table pub desc_table: GuestAddress, @@ -238,6 +241,7 @@ impl Queue { max_size, size: max_size, ready: false, + vector: 0, desc_table: GuestAddress(0), avail_ring: GuestAddress(0), used_ring: GuestAddress(0), diff --git a/vm-virtio/src/transport/pci_common_config.rs b/vm-virtio/src/transport/pci_common_config.rs index 83807efbe..a14761d72 100644 --- a/vm-virtio/src/transport/pci_common_config.rs +++ b/vm-virtio/src/transport/pci_common_config.rs @@ -40,6 +40,7 @@ pub struct VirtioPciCommonConfig { pub device_feature_select: u32, pub driver_feature_select: u32, pub queue_select: u16, + pub msix_config: u16, } impl VirtioPciCommonConfig { @@ -119,10 +120,11 @@ impl VirtioPciCommonConfig { fn read_common_config_word(&self, offset: u64, queues: &[Queue]) -> u16 { debug!("read_common_config_word: offset 0x{:x}", offset); match offset { - 0x10 => 0, // TODO msi-x (crbug/854765): self.msix_config, + 0x10 => self.msix_config, 0x12 => queues.len() as u16, // num_queues 0x16 => self.queue_select, 0x18 => self.with_queue(queues, |q| q.size).unwrap_or(0), + 0x1a => self.with_queue(queues, |q| q.vector).unwrap_or(0), 0x1c => { if self.with_queue(queues, |q| q.ready).unwrap_or(false) { 1 @@ -141,10 +143,10 @@ impl VirtioPciCommonConfig { fn write_common_config_word(&mut self, offset: u64, value: u16, queues: &mut Vec) { debug!("write_common_config_word: offset 0x{:x}", offset); match offset { - 0x10 => (), // TODO msi-x (crbug/854765): self.msix_config = value, + 0x10 => self.msix_config = value, 0x16 => self.queue_select = value, 0x18 => self.with_queue_mut(queues, |q| q.size = value), - 0x1a => (), // TODO msi-x (crbug/854765): self.with_queue_mut(queues, |q| q.msix_vector = v), + 0x1a => self.with_queue_mut(queues, |q| q.vector = value), 0x1c => self.with_queue_mut(queues, |q| q.ready = value == 1), _ => { warn!("invalid virtio register word write: 0x{:x}", offset); diff --git a/vm-virtio/src/transport/pci_device.rs b/vm-virtio/src/transport/pci_device.rs index 6c2187d4a..80f52e8a2 100755 --- a/vm-virtio/src/transport/pci_device.rs +++ b/vm-virtio/src/transport/pci_device.rs @@ -16,11 +16,13 @@ use byteorder::{ByteOrder, LittleEndian}; use libc::EFD_NONBLOCK; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; +use std::sync::Mutex; use devices::BusDevice; use pci::{ - IrqClosure, PciBarConfiguration, PciCapability, PciCapabilityID, PciClassCode, - PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, PciInterruptPin, PciSubclass, + IrqClosure, MsixCap, MsixClosure, MsixConfig, PciBarConfiguration, PciCapability, + PciCapabilityID, PciClassCode, PciConfiguration, PciDevice, PciDeviceError, PciHeaderType, + PciInterruptPin, PciSubclass, }; use vm_allocator::SystemAllocator; use vm_memory::{Address, ByteValued, GuestAddress, GuestMemoryMmap, GuestUsize, Le32}; @@ -143,7 +145,11 @@ const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x2000; const DEVICE_CONFIG_SIZE: u64 = 0x1000; const NOTIFICATION_BAR_OFFSET: u64 = 0x3000; const NOTIFICATION_SIZE: u64 = 0x1000; -const CAPABILITY_BAR_SIZE: u64 = 0x4000; +const MSIX_TABLE_BAR_OFFSET: u64 = 0x6000; +const MSIX_TABLE_SIZE: u64 = 0x1000; +const MSIX_PBA_BAR_OFFSET: u64 = 0x7000; +const MSIX_PBA_SIZE: u64 = 0x1000; +const CAPABILITY_BAR_SIZE: u64 = 0x8000; const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address. @@ -157,6 +163,12 @@ pub struct VirtioPciDevice { // virtio PCI common configuration common_config: VirtioPciCommonConfig, + // MSI-X config + msix_config: Arc>, + + // Number of MSI-X vectors + msix_num: u16, + // Virtio device reference and status device: Box, device_activated: bool, @@ -178,7 +190,7 @@ pub struct VirtioPciDevice { impl VirtioPciDevice { /// Constructs a new PCI transport for the given virtio device. - pub fn new(memory: GuestMemoryMmap, device: Box) -> Result { + pub fn new(memory: GuestMemoryMmap, device: Box, msix_num: u16) -> Result { let mut queue_evts = Vec::new(); for _ in device.queue_max_sizes().iter() { queue_evts.push(EventFd::new(EFD_NONBLOCK)?) @@ -210,7 +222,10 @@ impl VirtioPciDevice { device_feature_select: 0, driver_feature_select: 0, queue_select: 0, + msix_config: 0, }, + msix_config: Arc::new(Mutex::new(MsixConfig::new(msix_num))), + msix_num, device, device_activated: false, interrupt_status: Arc::new(AtomicUsize::new(0)), @@ -302,13 +317,23 @@ impl VirtioPciDevice { .add_capability(&configuration_cap) .map_err(PciDeviceError::CapabilitiesSetup)?; + let msix_cap = MsixCap::new( + settings_bar, + self.msix_num, + MSIX_TABLE_BAR_OFFSET as u32, + MSIX_PBA_BAR_OFFSET as u32, + ); + self.configuration + .add_capability(&msix_cap) + .map_err(PciDeviceError::CapabilitiesSetup)?; + self.settings_bar = settings_bar; Ok(()) } } impl PciDevice for VirtioPciDevice { - fn assign_irq(&mut self, irq_cb: Arc, irq_num: u32, irq_pin: PciInterruptPin) { + fn assign_pin_irq(&mut self, irq_cb: Arc, irq_num: u32, irq_pin: PciInterruptPin) { self.configuration.set_irq(irq_num as u8, irq_pin); let cb = Arc::new(Box::new(move |_queue: &Queue| (irq_cb)()) as VirtioInterrupt); @@ -316,6 +341,16 @@ impl PciDevice for VirtioPciDevice { self.interrupt_cb = Some(cb); } + fn assign_msix(&mut self, msi_cb: Arc) { + let msix_config = self.msix_config.clone(); + + let cb = Arc::new(Box::new(move |queue: &Queue| { + (msi_cb)(msix_config.lock().unwrap().table_entries[queue.vector as usize].clone()) + }) as VirtioInterruptClosure); + + self.interrupt_cb = Some(cb); + } + fn config_registers(&self) -> &PciConfiguration { &self.configuration } @@ -406,6 +441,18 @@ impl PciDevice for VirtioPciDevice { { // Handled with ioeventfds. } + o if MSIX_TABLE_BAR_OFFSET <= o && o < MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE => { + self.msix_config + .lock() + .unwrap() + .read_table(o - MSIX_TABLE_BAR_OFFSET, data); + } + o if MSIX_PBA_BAR_OFFSET <= o && o < MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE => { + self.msix_config + .lock() + .unwrap() + .read_pba(o - MSIX_PBA_BAR_OFFSET, data); + } _ => (), } } @@ -434,6 +481,18 @@ impl PciDevice for VirtioPciDevice { { // Handled with ioeventfds. } + o if MSIX_TABLE_BAR_OFFSET <= o && o < MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE => { + self.msix_config + .lock() + .unwrap() + .write_table(o - MSIX_TABLE_BAR_OFFSET, data); + } + o if MSIX_PBA_BAR_OFFSET <= o && o < MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE => { + self.msix_config + .lock() + .unwrap() + .write_pba(o - MSIX_PBA_BAR_OFFSET, data); + } _ => (), }; diff --git a/vmm/src/vm.rs b/vmm/src/vm.rs index db96f6cc2..aa2637622 100755 --- a/vmm/src/vm.rs +++ b/vmm/src/vm.rs @@ -22,12 +22,14 @@ extern crate vm_virtio; extern crate vmm_sys_util; use crate::config::VmConfig; -use kvm_bindings::{kvm_pit_config, kvm_userspace_memory_region, KVM_PIT_SPEAKER_DUMMY}; +use kvm_bindings::{kvm_msi, kvm_pit_config, kvm_userspace_memory_region, KVM_PIT_SPEAKER_DUMMY}; use kvm_ioctls::*; use libc::{c_void, siginfo_t, EFD_NONBLOCK}; use linux_loader::loader::KernelLoader; use net_util::Tap; -use pci::{IrqClosure, PciConfigIo, PciDevice, PciInterruptPin, PciRoot}; +use pci::{ + IrqClosure, MsixClosure, MsixTableEntry, PciConfigIo, PciDevice, PciInterruptPin, PciRoot, +}; use qcow::{self, ImageType, QcowFile}; use std::ffi::CString; use std::fs::{File, OpenOptions}; @@ -47,6 +49,7 @@ use vmm_sys_util::EventFd; const VCPU_RTSIG_OFFSET: i32 = 0; const X86_64_IRQ_BASE: u32 = 5; +const DEFAULT_MSIX_VEC_NUM: u16 = 2; // CPUID feature bits const ECX_HYPERVISOR_SHIFT: u32 = 31; // Hypervisor bit. @@ -303,8 +306,9 @@ impl DeviceManager { fn new( memory: GuestMemoryMmap, allocator: &mut SystemAllocator, - vm_fd: &VmFd, + vm_fd: &Arc, vm_cfg: &VmConfig, + msi_capable: bool, ) -> DeviceManagerResult { let io_bus = devices::Bus::new(); let mut mmio_bus = devices::Bus::new(); @@ -357,6 +361,7 @@ impl DeviceManager { vm_fd, &mut pci_root, &mut mmio_bus, + msi_capable, )?; } @@ -381,6 +386,7 @@ impl DeviceManager { vm_fd, &mut pci_root, &mut mmio_bus, + msi_capable, )?; } @@ -397,6 +403,7 @@ impl DeviceManager { vm_fd, &mut pci_root, &mut mmio_bus, + msi_capable, )?; } @@ -417,12 +424,14 @@ impl DeviceManager { virtio_device: Box, memory: GuestMemoryMmap, allocator: &mut SystemAllocator, - vm_fd: &VmFd, + vm_fd: &Arc, pci_root: &mut PciRoot, mmio_bus: &mut devices::Bus, + msi_capable: bool, ) -> DeviceManagerResult<()> { - let mut virtio_pci_device = VirtioPciDevice::new(memory, virtio_device) - .map_err(DeviceManagerError::VirtioDevice)?; + let mut virtio_pci_device = + VirtioPciDevice::new(memory, virtio_device, DEFAULT_MSIX_VEC_NUM) + .map_err(DeviceManagerError::VirtioDevice)?; let bars = virtio_pci_device .allocate_bars(allocator) @@ -435,19 +444,42 @@ impl DeviceManager { .map_err(DeviceManagerError::RegisterIoevent)?; } - // Assign IRQ to the virtio-pci device - let irqfd = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?; - let irq_num = allocator - .allocate_irq() - .ok_or(DeviceManagerError::AllocateIrq)?; - vm_fd - .register_irqfd(irqfd.as_raw_fd(), irq_num) - .map_err(DeviceManagerError::Irq)?; + if msi_capable { + let vm_fd_clone = vm_fd.clone(); - let irq_cb = Arc::new(Box::new(move || irqfd.write(1)) as IrqClosure); + let msi_cb = Arc::new(Box::new(move |entry: MsixTableEntry| { + let msi_queue = kvm_msi { + address_lo: entry.msg_addr_lo, + address_hi: entry.msg_addr_hi, + data: entry.msg_data, + flags: 0u32, + devid: 0u32, + pad: [0u8; 12], + }; - // Let's use irq line INTA for now. - virtio_pci_device.assign_irq(irq_cb, irq_num as u32, PciInterruptPin::IntA); + vm_fd_clone.signal_msi(msi_queue).map(|ret| { + if ret > 0 { + debug!("MSI message successfully delivered"); + } else if ret == 0 { + warn!("failed to deliver MSI message, blocked by guest"); + } + }) + }) as MsixClosure); + + virtio_pci_device.assign_msix(msi_cb); + } else { + let irqfd = EventFd::new(EFD_NONBLOCK).map_err(DeviceManagerError::EventFd)?; + + let irq_num = allocator + .allocate_irq() + .ok_or(DeviceManagerError::AllocateIrq)?; + vm_fd + .register_irqfd(irqfd.as_raw_fd(), irq_num) + .map_err(DeviceManagerError::Irq)?; + + let irq_cb = Arc::new(Box::new(move || irqfd.write(1)) as IrqClosure); + virtio_pci_device.assign_pin_irq(irq_cb, irq_num as u32, PciInterruptPin::IntA); + } let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device)); @@ -542,7 +574,7 @@ impl AsRawFd for EpollContext { } pub struct Vm<'a> { - fd: VmFd, + fd: Arc, kernel: File, memory: GuestMemoryMmap, vcpus: Vec>, @@ -557,6 +589,7 @@ impl<'a> Vm<'a> { pub fn new(kvm: &Kvm, config: VmConfig<'a>) -> Result { let kernel = File::open(&config.kernel.path).map_err(Error::KernelFile)?; let fd = kvm.create_vm().map_err(Error::VmCreate)?; + let fd = Arc::new(fd); // Init guest memory let arch_mem_regions = arch::arch_memory_regions(u64::from(&config.memory) << 20); @@ -607,8 +640,14 @@ impl<'a> Vm<'a> { ) .ok_or(Error::CreateSystemAllocator)?; - let device_manager = DeviceManager::new(guest_memory.clone(), &mut allocator, &fd, &config) - .map_err(Error::DeviceManager)?; + let device_manager = DeviceManager::new( + guest_memory.clone(), + &mut allocator, + &fd, + &config, + kvm.check_extension(Cap::SignalMsi), + ) + .map_err(Error::DeviceManager)?; fd.register_irqfd(device_manager.serial_evt.as_raw_fd(), 4) .map_err(Error::Irq)?; @@ -789,12 +828,6 @@ impl<'a> Vm<'a> { &self.memory } - /// Gets a reference to the kvm file descriptor owned by this VM. - /// - pub fn get_fd(&self) -> &VmFd { - &self.fd - } - fn patch_cpuid(cpuid: &mut CpuId) { let entries = cpuid.mut_entries_slice();